fix(git-sync): review #4404 batch — sanitize-title echo, per-space gate, move-echo, merge-agreement, fence-aware conflict scan, e2e asserts

Addresses reviewer comment #4404 (critical + blocking):

- Critical #2: renamePage skips the echo where the incoming title equals
  sanitizeTitle(current title) — a Docmost title with FS-hostile chars (: / " |,
  newlines, double-space, >120) was pulled to a sanitized stem then written back,
  permanently corrupting the real title. (datasource)
- Blocking #3: runOnce enforces per-space settings.gitSync.enabled (the event
  path bypassed opt-in; any edited space would git-init + export). (orchestrator)
- Blocking #6: movePage no-ops the position-less same-parent echo that clobbered
  the user's chosen sibling order. (datasource)
- Blocking #9: hasConflictMarkers is fence-aware — '<<<<<<< HEAD' inside a code
  block (git-tutorial page) no longer trips the all-or-nothing gate that froze
  the whole space's refs. (push.ts)
- Blocking #11: three-way tryMergeRegion short-circuits when live==target (diff3
  agreement) instead of logging a false 'same-block conflict resolved to git' —
  the echo noise that masked real data-loss signals. (three-way-merge)
- Blocking #12/#13: e2e-advanced — drop the delete-cap block (no such feature;
  failed with a scary '(data loss!)'); non-member assert now expects 404 (existence
  not leaked), not 403.

Verified on stand: sanitized-title rename preserves DB title (vault file
sanitized); non-enabled space creates no vault; fenced conflict markers ingest
without jamming; build clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
claude-stand
2026-07-02 17:52:07 +03:00
parent f2d12fd2cd
commit c838fdeebe
6 changed files with 117 additions and 30 deletions
@@ -125,6 +125,16 @@ function tryMergeRegion(
a: string[],
b: string[],
): LocalPick[] | null {
// Agreement short-circuit (review #11). When live (a) and target (b) are
// identical, both sides converged on the SAME result — diff3 "agreement", NOT
// a conflict. This is the dominant echo case (live == target != base) that
// otherwise trips the overlap check below and is logged as a false "N same-block
// conflict(s) resolved to the git version", masking REAL data-loss signals.
// Emit the region straight from live (which equals target); no conflict.
if (a.length === b.length && a.every((v, i) => v === b[i])) {
return a.map((_v, i) => ({ src: 'live', local: i }) as LocalPick);
}
const aHunks = buildHunks(o, a);
const bHunks = buildHunks(o, b);
@@ -6,6 +6,7 @@ import type {
runCycle as runCycleFn,
parseDocmostMarkdown as parseDocmostMarkdownFn,
markdownToProseMirror as markdownToProseMirrorFn,
sanitizeTitle as sanitizeTitleFn,
} from '@docmost/git-sync';
/**
@@ -20,6 +21,7 @@ interface GitSyncModule {
runCycle: typeof runCycleFn;
parseDocmostMarkdown: typeof parseDocmostMarkdownFn;
markdownToProseMirror: typeof markdownToProseMirrorFn;
sanitizeTitle: typeof sanitizeTitleFn;
}
// The CJS->ESM dynamic-import bridge lives in one shared helper
@@ -57,6 +57,7 @@ export interface GitSyncRunStatus {
| 'in-progress'
| 'disabled'
| 'no-service-user'
| 'space-not-enabled'
| 'merge-in-progress';
pull?: { written: number; deleted: number; conflict: boolean };
push?: { mode: string; failures: number };
@@ -118,6 +119,24 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
.execute();
}
/**
* Authoritative per-space opt-in check used by the EVENT path (runOnce), which
* — unlike the poll path — is not pre-filtered by enabledSpaces(). Same STRICT
* `settings.gitSync.enabled === 'true'` semantics; a missing/soft-deleted space
* or any non-'true' value returns false (review #3).
*/
private async isSpaceGitSyncEnabled(spaceId: string): Promise<boolean> {
const row = await this.db
.selectFrom('spaces')
.select(
sql<boolean>`settings->'gitSync'->>'enabled' = 'true'`.as('enabled'),
)
.where('id', '=', spaceId)
.where('deletedAt', 'is', null)
.executeTakeFirst();
return row?.enabled ?? false;
}
// --- one sync cycle for a space -------------------------------
/**
@@ -187,6 +206,16 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
);
return { spaceId, ran: false, skipped: 'no-service-user' };
}
// Per-space opt-in gate (review #3). The global GIT_SYNC_ENABLED env switch is
// NOT sufficient — sync must also be turned on for THIS space
// (`space.settings.gitSync.enabled === true`). The poll path filters via
// enabledSpaces(), but the event path (page-change listener) calls runOnce
// directly; without this check an edit in ANY space would `git init` a vault
// and export that space's whole history, violating the module's opt-in
// contract (orchestrator docstring §79-86). STRICT: anything but 'true' skips.
if (!(await this.isSpaceGitSyncEnabled(spaceId))) {
return { spaceId, ran: false, skipped: 'space-not-enabled' };
}
// Run the full cycle under the per-space lock. withSpaceLock owns the
// in-process mutex (no overlapping cycles on this instance) AND the Redis
@@ -362,6 +362,17 @@ export class GitmostDataSourceService {
throw new NotFoundException(`Page ${pageId} not found`);
}
// GS-MOVE-ECHO guard (review #6). A drag-move in Docmost echoes back through
// git-sync as movePage(pageId, sameParent) WITHOUT a position. Recomputing a
// position here would append the page to the end of its sibling list,
// clobbering the position the user just chose. If the parent is unchanged and
// no explicit position was provided, there is nothing to reparent — skip, so
// the user's ordering is preserved. A real reparent (parent differs) or an
// explicit position still proceeds.
if (position == null && parentPageId === (page.parentPageId ?? null)) {
return { id: pageId, skipped: 'no-op-move-echo' };
}
const resolvedPosition =
position ?? (await this.computeMovePosition(page.spaceId, parentPageId));
@@ -429,6 +440,26 @@ export class GitmostDataSourceService {
page.slugId && title.endsWith(suffix)
? title.slice(0, -suffix.length)
: title;
// GS-TITLE-SANITIZE guard (review Critical #2). A rename in Docmost to a
// title with filename-hostile chars (`:` `/` `"` `|`, newlines, double
// spaces, >120 chars) is pulled to a SANITIZED file stem; the same cycle then
// sees an R(ename) line and would call renamePage with that sanitized stem,
// PERMANENTLY replacing the real title with its sanitized form (e.g.
// "Project: Plan" -> "Project- Plan"). In that echo the incoming title equals
// `sanitizeTitle(current title)`, so skip the write — the sanitized stem is a
// local filesystem artifact, never the page's real Docmost title. A genuine
// retitle does NOT equal the sanitized current title, so it still applies.
const { sanitizeTitle } = await loadGitSync();
if (
page.title &&
cleanTitle !== page.title &&
sanitizeTitle(page.title) === cleanTitle
) {
this.logger.log(
`git-sync: skip rename of page ${pageId}: incoming title is the sanitized form of current title (filesystem artifact; real title preserved)`,
);
return { id: pageId };
}
// PageService.update takes a User; the git-sync service user is the
// responsible author. Only the id is read off it for lastUpdatedById.
// `pageId` satisfies the UpdatePageDto type; PageService.update reads the
+10 -27
View File
@@ -158,11 +158,13 @@ RBASIC=$(basicfor "e2e-adv-reader@test.local")
&& ok "reader push (receive-pack) -> 403" || bad "reader push not 403"
# ===========================================================================
say "authz: a NON-member of an enabled space -> 403 (NOT 404)"
say "authz: a NON-member of an enabled space -> 404 (NOT 403; existence not leaked)"
OUTSIDER_ID=$(make_user "e2e-adv-outsider@test.local" none)
OBASIC=$(basicfor "e2e-adv-outsider@test.local")
c=$(code -H "Authorization: Basic $OBASIC" "$GIT_URL/info/refs?service=git-upload-pack")
[ "$c" = "403" ] && ok "non-member fetch -> 403 (existence revealed only to members)" || bad "non-member got $c (contract is 403)"
# The gate deliberately returns 404 (not 403) for a non-member so the 403<->404
# split cannot be used to probe which private spaces exist (git-http.helpers.spec.ts).
[ "$c" = "404" ] && ok "non-member fetch -> 404 (space existence not leaked)" || bad "non-member got $c (contract is 404)"
# ===========================================================================
say "concurrency: a push while the per-space lock is held -> 503 + Retry-After"
@@ -194,31 +196,12 @@ m2=$(vault_sha main); lp2=$(vault_sha refs/docmost/last-pushed)
# deterministically by the engine unit suite — packages/git-sync/test/
# classify-rename-moves.test.ts and node-ops.test.ts.)
# ===========================================================================
say "data-loss guard: deleting MORE than the cap is HELD, not dropped"
# Create cap+2 sibling pages, sync, then git rm all of them in one push.
CAP=$(api "$SERVER/api/git-sync/status" | grep -o '"maxDeletesPerCycle":[0-9]*' | grep -o '[0-9]*')
CAP=${CAP:-5}
N=$((CAP+2))
ids=""
for i in $(seq 1 $N); do
id=$(api -X POST "$SERVER/api/pages/create" -H 'Content-Type: application/json' -d "{\"spaceId\":\"$SPACE_ID\",\"title\":\"E2E-ADV-Del-$i-$RANDOM\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
ids="$ids $id"
done
sync_now
lp_before=$(vault_sha refs/docmost/last-pushed)
rm -rf "$WORK/cd"; gitc clone -q "$GIT_URL" "$WORK/cd" 2>/dev/null
cd "$WORK/cd"; git config user.email e2e@test; git config user.name e2e
for id in $ids; do f=$(grep -rl "$id" --include='*.md' . | head -1); [ -n "$f" ] && git rm -q "$f"; done
git commit -qm "rm $N pages (over cap $CAP)"
gpush
cd "$WORK"
sleep 2
trashed=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and deleted_at is not null and ($(echo $ids | sed "s/ \?\([0-9a-f-]\+\)/ or id='\1'/g; s/^ or //"));")
lp_after=$(vault_sha refs/docmost/last-pushed)
[ "${trashed:-0}" = "0" ] && ok "none of the $N over-cap deletes were applied (held)" || bad "$trashed pages trashed despite over-cap (data loss!)"
[ "$lp_before" = "$lp_after" ] && ok "last-pushed ref did NOT advance past the delete commit (retry-safe)" || bad "last-pushed advanced over suppressed deletes ($lp_before -> $lp_after)"
# cleanup these pages (hard-delete; they are E2E-ADV-* so teardown also catches them)
# NOTE (review #12): the former "delete cap" block was removed. There is NO
# delete cap in the orchestrator ("There is no delete cap") and /status does not
# expose maxDeletesPerCycle, so the block failed deterministically with a scary
# "(data loss!)". The real contract — every git-rm'd page soft-deletes to Trash
# (recoverable) — is exercised by the git-side-delete scenario elsewhere and by
# the engine unit suite.
# ===========================================================================
say "data-loss guard #2: untitled pages + retitle must NOT trash other pages"
+35 -3
View File
@@ -1159,15 +1159,47 @@ export function isPageFile(path: string): boolean {
* `|||||||` and `=======`): the base text is neither side's current content, so
* keeping it would inject obsolete lines AND leak a raw `|||||||` marker.
*/
const CONFLICT_BEGIN_RE = /^<{7}/m;
const CONFLICT_END_RE = /^>{7}/m;
const CONFLICT_BEGIN_LINE_RE = /^<{7}/;
const CONFLICT_BASE_LINE_RE = /^\|{7}/;
const CONFLICT_SEP_LINE_RE = /^={7}/;
const CONFLICT_END_LINE_RE = /^>{7}/;
// A code-fence open/close line (``` or ~~~), matching markdown-to-prosemirror's
// CODE_FENCE_RE. Used to make conflict-marker detection fence-aware.
const CONFLICT_CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
export function hasConflictMarkers(body: string): boolean {
return CONFLICT_BEGIN_RE.test(body) && CONFLICT_END_RE.test(body);
// Fence-aware scan (review #9). A page documenting git (a ```-fenced block that
// literally contains `<<<<<<< HEAD` ... `>>>>>>>`) must NOT be flagged as a
// conflict: with autoMergeConflicts OFF the all-or-nothing gate would then jam
// the ENTIRE space's refs forever, re-failing the batch every cycle. Only count
// begin/end marker lines that live OUTSIDE a fenced code block.
let inFence = false;
let fenceMarker = "";
let sawBegin = false;
for (const line of body.split("\n")) {
const fence = line.match(CONFLICT_CODE_FENCE_RE);
if (inFence) {
// Close on a fence line of the same marker char, length >= the opener.
if (
fence &&
fence[2][0] === fenceMarker[0] &&
fence[2].length >= fenceMarker.length
) {
inFence = false;
fenceMarker = "";
}
continue; // everything inside a fence is inert
}
if (fence) {
inFence = true;
fenceMarker = fence[2];
continue;
}
if (CONFLICT_BEGIN_LINE_RE.test(line)) sawBegin = true;
else if (sawBegin && CONFLICT_END_LINE_RE.test(line)) return true;
}
return false;
}
function stripConflictMarkers(body: string): string {