From c838fdeebe27d8c82dae3c416098cdacfb0555bf Mon Sep 17 00:00:00 2001 From: claude-stand Date: Thu, 2 Jul 2026 17:52:07 +0300 Subject: [PATCH] =?UTF-8?q?fix(git-sync):=20review=20#4404=20batch=20?= =?UTF-8?q?=E2=80=94=20sanitize-title=20echo,=20per-space=20gate,=20move-e?= =?UTF-8?q?cho,=20merge-agreement,=20fence-aware=20conflict=20scan,=20e2e?= =?UTF-8?q?=20asserts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses reviewer comment #4404 (critical + blocking): - Critical #2: renamePage skips the echo where the incoming title equals sanitizeTitle(current title) — a Docmost title with FS-hostile chars (: / " |, newlines, double-space, >120) was pulled to a sanitized stem then written back, permanently corrupting the real title. (datasource) - Blocking #3: runOnce enforces per-space settings.gitSync.enabled (the event path bypassed opt-in; any edited space would git-init + export). (orchestrator) - Blocking #6: movePage no-ops the position-less same-parent echo that clobbered the user's chosen sibling order. (datasource) - Blocking #9: hasConflictMarkers is fence-aware — '<<<<<<< HEAD' inside a code block (git-tutorial page) no longer trips the all-or-nothing gate that froze the whole space's refs. (push.ts) - Blocking #11: three-way tryMergeRegion short-circuits when live==target (diff3 agreement) instead of logging a false 'same-block conflict resolved to git' — the echo noise that masked real data-loss signals. (three-way-merge) - Blocking #12/#13: e2e-advanced — drop the delete-cap block (no such feature; failed with a scary '(data loss!)'); non-member assert now expects 404 (existence not leaked), not 403. Verified on stand: sanitized-title rename preserves DB title (vault file sanitized); non-enabled space creates no vault; fenced conflict markers ingest without jamming; build clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../collaboration/merge/three-way-merge.ts | 10 +++++ .../integrations/git-sync/git-sync.loader.ts | 2 + .../services/git-sync.orchestrator.ts | 29 ++++++++++++++ .../services/gitmost-datasource.service.ts | 31 +++++++++++++++ apps/server/test/git-sync-e2e-advanced.sh | 37 +++++------------- packages/git-sync/src/engine/push.ts | 38 +++++++++++++++++-- 6 files changed, 117 insertions(+), 30 deletions(-) diff --git a/apps/server/src/collaboration/merge/three-way-merge.ts b/apps/server/src/collaboration/merge/three-way-merge.ts index 032e0ee4..0031df86 100644 --- a/apps/server/src/collaboration/merge/three-way-merge.ts +++ b/apps/server/src/collaboration/merge/three-way-merge.ts @@ -125,6 +125,16 @@ function tryMergeRegion( a: string[], b: string[], ): LocalPick[] | null { + // Agreement short-circuit (review #11). When live (a) and target (b) are + // identical, both sides converged on the SAME result — diff3 "agreement", NOT + // a conflict. This is the dominant echo case (live == target != base) that + // otherwise trips the overlap check below and is logged as a false "N same-block + // conflict(s) resolved to the git version", masking REAL data-loss signals. + // Emit the region straight from live (which equals target); no conflict. + if (a.length === b.length && a.every((v, i) => v === b[i])) { + return a.map((_v, i) => ({ src: 'live', local: i }) as LocalPick); + } + const aHunks = buildHunks(o, a); const bHunks = buildHunks(o, b); diff --git a/apps/server/src/integrations/git-sync/git-sync.loader.ts b/apps/server/src/integrations/git-sync/git-sync.loader.ts index 6cebbed0..bc025021 100644 --- a/apps/server/src/integrations/git-sync/git-sync.loader.ts +++ b/apps/server/src/integrations/git-sync/git-sync.loader.ts @@ -6,6 +6,7 @@ import type { runCycle as runCycleFn, parseDocmostMarkdown as parseDocmostMarkdownFn, markdownToProseMirror as markdownToProseMirrorFn, + sanitizeTitle as sanitizeTitleFn, } from '@docmost/git-sync'; /** @@ -20,6 +21,7 @@ interface GitSyncModule { runCycle: typeof runCycleFn; parseDocmostMarkdown: typeof parseDocmostMarkdownFn; markdownToProseMirror: typeof markdownToProseMirrorFn; + sanitizeTitle: typeof sanitizeTitleFn; } // The CJS->ESM dynamic-import bridge lives in one shared helper diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts index 2d31d90a..55157302 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts @@ -57,6 +57,7 @@ export interface GitSyncRunStatus { | 'in-progress' | 'disabled' | 'no-service-user' + | 'space-not-enabled' | 'merge-in-progress'; pull?: { written: number; deleted: number; conflict: boolean }; push?: { mode: string; failures: number }; @@ -118,6 +119,24 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { .execute(); } + /** + * Authoritative per-space opt-in check used by the EVENT path (runOnce), which + * — unlike the poll path — is not pre-filtered by enabledSpaces(). Same STRICT + * `settings.gitSync.enabled === 'true'` semantics; a missing/soft-deleted space + * or any non-'true' value returns false (review #3). + */ + private async isSpaceGitSyncEnabled(spaceId: string): Promise { + const row = await this.db + .selectFrom('spaces') + .select( + sql`settings->'gitSync'->>'enabled' = 'true'`.as('enabled'), + ) + .where('id', '=', spaceId) + .where('deletedAt', 'is', null) + .executeTakeFirst(); + return row?.enabled ?? false; + } + // --- one sync cycle for a space ------------------------------- /** @@ -187,6 +206,16 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { ); return { spaceId, ran: false, skipped: 'no-service-user' }; } + // Per-space opt-in gate (review #3). The global GIT_SYNC_ENABLED env switch is + // NOT sufficient — sync must also be turned on for THIS space + // (`space.settings.gitSync.enabled === true`). The poll path filters via + // enabledSpaces(), but the event path (page-change listener) calls runOnce + // directly; without this check an edit in ANY space would `git init` a vault + // and export that space's whole history, violating the module's opt-in + // contract (orchestrator docstring §79-86). STRICT: anything but 'true' skips. + if (!(await this.isSpaceGitSyncEnabled(spaceId))) { + return { spaceId, ran: false, skipped: 'space-not-enabled' }; + } // Run the full cycle under the per-space lock. withSpaceLock owns the // in-process mutex (no overlapping cycles on this instance) AND the Redis diff --git a/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts b/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts index ce7c738d..e9f3807f 100644 --- a/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts +++ b/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts @@ -362,6 +362,17 @@ export class GitmostDataSourceService { throw new NotFoundException(`Page ${pageId} not found`); } + // GS-MOVE-ECHO guard (review #6). A drag-move in Docmost echoes back through + // git-sync as movePage(pageId, sameParent) WITHOUT a position. Recomputing a + // position here would append the page to the end of its sibling list, + // clobbering the position the user just chose. If the parent is unchanged and + // no explicit position was provided, there is nothing to reparent — skip, so + // the user's ordering is preserved. A real reparent (parent differs) or an + // explicit position still proceeds. + if (position == null && parentPageId === (page.parentPageId ?? null)) { + return { id: pageId, skipped: 'no-op-move-echo' }; + } + const resolvedPosition = position ?? (await this.computeMovePosition(page.spaceId, parentPageId)); @@ -429,6 +440,26 @@ export class GitmostDataSourceService { page.slugId && title.endsWith(suffix) ? title.slice(0, -suffix.length) : title; + // GS-TITLE-SANITIZE guard (review Critical #2). A rename in Docmost to a + // title with filename-hostile chars (`:` `/` `"` `|`, newlines, double + // spaces, >120 chars) is pulled to a SANITIZED file stem; the same cycle then + // sees an R(ename) line and would call renamePage with that sanitized stem, + // PERMANENTLY replacing the real title with its sanitized form (e.g. + // "Project: Plan" -> "Project- Plan"). In that echo the incoming title equals + // `sanitizeTitle(current title)`, so skip the write — the sanitized stem is a + // local filesystem artifact, never the page's real Docmost title. A genuine + // retitle does NOT equal the sanitized current title, so it still applies. + const { sanitizeTitle } = await loadGitSync(); + if ( + page.title && + cleanTitle !== page.title && + sanitizeTitle(page.title) === cleanTitle + ) { + this.logger.log( + `git-sync: skip rename of page ${pageId}: incoming title is the sanitized form of current title (filesystem artifact; real title preserved)`, + ); + return { id: pageId }; + } // PageService.update takes a User; the git-sync service user is the // responsible author. Only the id is read off it for lastUpdatedById. // `pageId` satisfies the UpdatePageDto type; PageService.update reads the diff --git a/apps/server/test/git-sync-e2e-advanced.sh b/apps/server/test/git-sync-e2e-advanced.sh index e8f7abc4..b952a154 100755 --- a/apps/server/test/git-sync-e2e-advanced.sh +++ b/apps/server/test/git-sync-e2e-advanced.sh @@ -158,11 +158,13 @@ RBASIC=$(basicfor "e2e-adv-reader@test.local") && ok "reader push (receive-pack) -> 403" || bad "reader push not 403" # =========================================================================== -say "authz: a NON-member of an enabled space -> 403 (NOT 404)" +say "authz: a NON-member of an enabled space -> 404 (NOT 403; existence not leaked)" OUTSIDER_ID=$(make_user "e2e-adv-outsider@test.local" none) OBASIC=$(basicfor "e2e-adv-outsider@test.local") c=$(code -H "Authorization: Basic $OBASIC" "$GIT_URL/info/refs?service=git-upload-pack") -[ "$c" = "403" ] && ok "non-member fetch -> 403 (existence revealed only to members)" || bad "non-member got $c (contract is 403)" +# The gate deliberately returns 404 (not 403) for a non-member so the 403<->404 +# split cannot be used to probe which private spaces exist (git-http.helpers.spec.ts). +[ "$c" = "404" ] && ok "non-member fetch -> 404 (space existence not leaked)" || bad "non-member got $c (contract is 404)" # =========================================================================== say "concurrency: a push while the per-space lock is held -> 503 + Retry-After" @@ -194,31 +196,12 @@ m2=$(vault_sha main); lp2=$(vault_sha refs/docmost/last-pushed) # deterministically by the engine unit suite — packages/git-sync/test/ # classify-rename-moves.test.ts and node-ops.test.ts.) -# =========================================================================== -say "data-loss guard: deleting MORE than the cap is HELD, not dropped" -# Create cap+2 sibling pages, sync, then git rm all of them in one push. -CAP=$(api "$SERVER/api/git-sync/status" | grep -o '"maxDeletesPerCycle":[0-9]*' | grep -o '[0-9]*') -CAP=${CAP:-5} -N=$((CAP+2)) -ids="" -for i in $(seq 1 $N); do - id=$(api -X POST "$SERVER/api/pages/create" -H 'Content-Type: application/json' -d "{\"spaceId\":\"$SPACE_ID\",\"title\":\"E2E-ADV-Del-$i-$RANDOM\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4) - ids="$ids $id" -done -sync_now -lp_before=$(vault_sha refs/docmost/last-pushed) -rm -rf "$WORK/cd"; gitc clone -q "$GIT_URL" "$WORK/cd" 2>/dev/null -cd "$WORK/cd"; git config user.email e2e@test; git config user.name e2e -for id in $ids; do f=$(grep -rl "$id" --include='*.md' . | head -1); [ -n "$f" ] && git rm -q "$f"; done -git commit -qm "rm $N pages (over cap $CAP)" -gpush -cd "$WORK" -sleep 2 -trashed=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and deleted_at is not null and ($(echo $ids | sed "s/ \?\([0-9a-f-]\+\)/ or id='\1'/g; s/^ or //"));") -lp_after=$(vault_sha refs/docmost/last-pushed) -[ "${trashed:-0}" = "0" ] && ok "none of the $N over-cap deletes were applied (held)" || bad "$trashed pages trashed despite over-cap (data loss!)" -[ "$lp_before" = "$lp_after" ] && ok "last-pushed ref did NOT advance past the delete commit (retry-safe)" || bad "last-pushed advanced over suppressed deletes ($lp_before -> $lp_after)" -# cleanup these pages (hard-delete; they are E2E-ADV-* so teardown also catches them) +# NOTE (review #12): the former "delete cap" block was removed. There is NO +# delete cap in the orchestrator ("There is no delete cap") and /status does not +# expose maxDeletesPerCycle, so the block failed deterministically with a scary +# "(data loss!)". The real contract — every git-rm'd page soft-deletes to Trash +# (recoverable) — is exercised by the git-side-delete scenario elsewhere and by +# the engine unit suite. # =========================================================================== say "data-loss guard #2: untitled pages + retitle must NOT trash other pages" diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts index e8c2d8d5..f5f22247 100644 --- a/packages/git-sync/src/engine/push.ts +++ b/packages/git-sync/src/engine/push.ts @@ -1159,15 +1159,47 @@ export function isPageFile(path: string): boolean { * `|||||||` and `=======`): the base text is neither side's current content, so * keeping it would inject obsolete lines AND leak a raw `|||||||` marker. */ -const CONFLICT_BEGIN_RE = /^<{7}/m; -const CONFLICT_END_RE = /^>{7}/m; const CONFLICT_BEGIN_LINE_RE = /^<{7}/; const CONFLICT_BASE_LINE_RE = /^\|{7}/; const CONFLICT_SEP_LINE_RE = /^={7}/; const CONFLICT_END_LINE_RE = /^>{7}/; +// A code-fence open/close line (``` or ~~~), matching markdown-to-prosemirror's +// CODE_FENCE_RE. Used to make conflict-marker detection fence-aware. +const CONFLICT_CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/; + export function hasConflictMarkers(body: string): boolean { - return CONFLICT_BEGIN_RE.test(body) && CONFLICT_END_RE.test(body); + // Fence-aware scan (review #9). A page documenting git (a ```-fenced block that + // literally contains `<<<<<<< HEAD` ... `>>>>>>>`) must NOT be flagged as a + // conflict: with autoMergeConflicts OFF the all-or-nothing gate would then jam + // the ENTIRE space's refs forever, re-failing the batch every cycle. Only count + // begin/end marker lines that live OUTSIDE a fenced code block. + let inFence = false; + let fenceMarker = ""; + let sawBegin = false; + for (const line of body.split("\n")) { + const fence = line.match(CONFLICT_CODE_FENCE_RE); + if (inFence) { + // Close on a fence line of the same marker char, length >= the opener. + if ( + fence && + fence[2][0] === fenceMarker[0] && + fence[2].length >= fenceMarker.length + ) { + inFence = false; + fenceMarker = ""; + } + continue; // everything inside a fence is inert + } + if (fence) { + inFence = true; + fenceMarker = fence[2]; + continue; + } + if (CONFLICT_BEGIN_LINE_RE.test(line)) sawBegin = true; + else if (sawBegin && CONFLICT_END_LINE_RE.test(line)) return true; + } + return false; } function stripConflictMarkers(body: string): string {