diff --git a/apps/server/src/collaboration/collaboration.handler.ts b/apps/server/src/collaboration/collaboration.handler.ts index 8490c2aa..f4e4d34a 100644 --- a/apps/server/src/collaboration/collaboration.handler.ts +++ b/apps/server/src/collaboration/collaboration.handler.ts @@ -10,7 +10,7 @@ import * as Y from 'yjs'; import { User } from '@docmost/db/types/entity.types'; import { mergeXmlFragments, - mergeXmlFragments3Way, + mergeXmlFragments3WayWithStats, } from './merge/yjs-body-merge'; export type CollabEventHandlers = ReturnType< @@ -168,11 +168,24 @@ export class CollaborationHandler { const liveFrag = doc.getXmlFragment('default'); const targetFrag = targetDoc.getXmlFragment('default'); if (baseDoc) { - mergeXmlFragments3Way( + const { conflicts } = mergeXmlFragments3WayWithStats( liveFrag, targetFrag, baseDoc.getXmlFragment('default'), ); + // SAME-BLOCK conflict contract (SPEC §9): a block both the human + // and git changed resolves to GIT (deterministic). Make that + // OBSERVABLE rather than silent — log it. The losing human content + // is NOT destroyed: the persistence extension's boundary snapshot + // pins the pre-merge page state to history on this user->git-sync + // transition, so it stays recoverable. + if (conflicts > 0) { + this.logger.warn( + `git-sync merge for ${documentName}: ${conflicts} same-block ` + + `conflict(s) resolved to the git version; the prior page ` + + `state is preserved in page history (recoverable).`, + ); + } } else { mergeXmlFragments(liveFrag, targetFrag); } diff --git a/apps/server/src/collaboration/extensions/persistence.extension.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.spec.ts index b74ae7ca..d7c99db4 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.spec.ts @@ -170,17 +170,32 @@ describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)', expect(sourceOf(pageRepo)).toBe('agent'); }); - // --- negative: a git-sync store must NOT pin a boundary history snapshot ---- - // The boundary-snapshot branch only fires when the resolved source is 'agent' - // AND the prior persisted source is not 'agent'. A git-sync store resolves to - // 'git-sync', so saveHistory must NOT be called. - it('does NOT write a boundary history snapshot for a git-sync store', async () => { + // --- boundary snapshot for a git-sync store over a HUMAN baseline ----------- + // SPEC §9 observable-loss guard (bug #2): a git-sync body write is a block-level + // 3-way merge whose same-block rule is "git wins". To keep a concurrent human + // edit RECOVERABLE rather than silently overwritten, a git-sync store over a + // prior NON-git-sync baseline pins that prior state to page history first — + // exactly like the agent path. So saveHistory MUST be called here. + it('DOES pin a boundary snapshot for a git-sync store over a prior human state', async () => { const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' }); await ext.onStoreDocument( makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }), ); + expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1); + }); + + // --- negative: a git-sync store over a git-sync baseline does NOT re-pin ----- + // The boundary is pinned once on the transition INTO git-sync; a subsequent + // git-sync store over an already-git-sync baseline must not churn history. + it('does NOT re-pin a boundary snapshot for a git-sync store over a git-sync baseline', async () => { + const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'git-sync' }); + + await ext.onStoreDocument( + makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }), + ); + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); }); diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 52ecadf0..248bf64b 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -274,21 +274,30 @@ export class PersistenceExtension implements Extension { //this.logger.debug('Contributors error:' + err?.['message']); } - // Approach A — boundary snapshot before the agent's first edit. - // When this store is the agent's and the page's currently persisted - // state was authored by a human, pin that human state as its own - // history version BEFORE the agent overwrites it. `page` still holds - // the OLD content/provenance here, so saveHistory(page) captures the - // pre-agent state tagged 'user'. The agent's new content is - // snapshotted later by the debounced PAGE_HISTORY job ('agent'). Skip - // if the prior state is already agent-authored (boundary already - // pinned on the user->agent transition), if the page is effectively - // empty, or if the latest existing snapshot already equals this human - // state (avoid duplicates). - if ( - lastUpdatedSource === 'agent' && - page.lastUpdatedSource !== 'agent' - ) { + // Approach A — boundary snapshot before a MACHINE write overwrites a + // human (or other-source) baseline. When this store is from a machine + // source — the AGENT or GIT-SYNC — and the page's currently persisted + // state was authored by a DIFFERENT source, pin that prior state as its + // own history version BEFORE the machine write overwrites it. `page` + // still holds the OLD content/provenance here, so saveHistory(page) + // captures the pre-write state. The machine's new content is snapshotted + // later by the debounced PAGE_HISTORY job. + // + // For GIT-SYNC this is the OBSERVABLE-LOSS guard (SPEC §9 conflict + // contract): a git-sync body write is a block-level 3-way merge whose + // same-block rule is "git wins". Without this pin, a concurrent human + // edit to a block git also changed would be overwritten with NO trace. + // Pinning the pre-merge state here means the human's content is always + // RECOVERABLE via page history rather than silently lost — git still + // wins the live doc deterministically, but nothing is destroyed. + // + // Skip if the prior state was already authored by THIS machine source + // (boundary already pinned on the transition into it), if the page is + // effectively empty, or if the latest existing snapshot already equals + // the prior state (avoid duplicates). + const isMachineWrite = + lastUpdatedSource === 'agent' || lastUpdatedSource === 'git-sync'; + if (isMachineWrite && page.lastUpdatedSource !== lastUpdatedSource) { const lastHistory = await this.pageHistoryRepo.findPageLastHistory( pageId, { includeContent: true, trx }, diff --git a/apps/server/src/collaboration/merge/three-way-merge.spec.ts b/apps/server/src/collaboration/merge/three-way-merge.spec.ts index faef167a..bef2fdfb 100644 --- a/apps/server/src/collaboration/merge/three-way-merge.spec.ts +++ b/apps/server/src/collaboration/merge/three-way-merge.spec.ts @@ -1,4 +1,8 @@ -import { diff3Plan, type Pick } from './three-way-merge'; +import { + diff3Plan, + diff3PlanWithConflicts, + type Pick, +} from './three-way-merge'; // Materialize a plan into the merged key sequence for assertion. function apply(plan: Pick[], live: string[], target: string[]): string[] { @@ -31,6 +35,49 @@ describe('diff3Plan (block-level three-way merge)', () => { ]); }); + // Bug #2 observability: diff3PlanWithConflicts reports SAME-BLOCK conflicts so + // the caller can surface the "git wins" loss (log + history pin) instead of + // dropping the human side silently. + describe('diff3PlanWithConflicts (same-block conflict reporting)', () => { + it('reports 0 conflicts when sides changed DIFFERENT blocks (clean merge)', () => { + const r = diff3PlanWithConflicts( + ['1', '2', '3'], + ['H', '2', '3'], + ['1', '2', 'G'], + ); + expect(r.conflicts).toBe(0); + expect(apply(r.picks, ['H', '2', '3'], ['1', '2', 'G'])).toEqual([ + 'H', + '2', + 'G', + ]); + }); + + it('reports 1 conflict and git wins when BOTH rewrote the SAME block', () => { + const r = diff3PlanWithConflicts( + ['1', '2', '3'], + ['1', 'H', '3'], // human rewrote block 2 + ['1', 'G', '3'], // git rewrote block 2 + ); + expect(r.conflicts).toBe(1); + // Git wins the contested block; the human 'H' is NOT in the picks. + expect(apply(r.picks, ['1', 'H', '3'], ['1', 'G', '3'])).toEqual([ + '1', + 'G', + '3', + ]); + }); + + it('does NOT count a git-only region (no human content to lose) as a conflict', () => { + const r = diff3PlanWithConflicts( + ['1', '2', '3'], + ['1', '2', '3'], // human unchanged + ['1', '9', '3'], // git rewrote block 2 + ); + expect(r.conflicts).toBe(0); + }); + }); + it('human and git changed DIFFERENT blocks -> both preserved', () => { // human rewrote block 1, git rewrote block 3. expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([ diff --git a/apps/server/src/collaboration/merge/three-way-merge.ts b/apps/server/src/collaboration/merge/three-way-merge.ts index ce83491f..032e0ee4 100644 --- a/apps/server/src/collaboration/merge/three-way-merge.ts +++ b/apps/server/src/collaboration/merge/three-way-merge.ts @@ -183,15 +183,46 @@ export interface Pick { index: number; } +/** + * The merged block order PLUS how many regions resolved as a genuine SAME-BLOCK + * conflict (both sides rewrote the same base block — `tryMergeRegion` returned + * null and git won the whole region, so the live/human version of those blocks + * is NOT in `picks`). `conflicts > 0` is the OBSERVABLE signal the caller uses to + * surface "git won a concurrent same-block edit" (log it + pin the human + * baseline to page history) instead of dropping the human side silently. + */ +export interface Diff3Result { + picks: Pick[]; + conflicts: number; +} + /** * Three-way merge of base `o`, live `a`, target `b` (arrays of block keys). - * Returns the merged block order as picks from live/target. + * Returns the merged block order as picks from live/target. Thin wrapper over + * `diff3PlanWithConflicts` (kept for the existing pure-array callers/tests). */ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] { + return diff3PlanWithConflicts(o, a, b).picks; +} + +/** + * Like `diff3Plan` but also reports the SAME-BLOCK conflict count (see + * `Diff3Result`). A region where both the human and git rewrote the same base + * block cannot be merged automatically; the rule is deterministic — GIT WINS the + * whole region — but the human's version of those blocks is then absent from the + * picks, so we count it so the caller can make the loss observable/recoverable + * rather than silent (the documented conflict contract). + */ +export function diff3PlanWithConflicts( + o: string[], + a: string[], + b: string[], +): Diff3Result { const oToA = matchMap(lcsPairs(o, a)); const oToB = matchMap(lcsPairs(o, b)); const res: Pick[] = []; + let conflicts = 0; let oi = 0; let ai = 0; let bi = 0; @@ -223,6 +254,10 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] { ); } } else { + // SAME-BLOCK CONFLICT: count it ONLY when the human side actually had + // content in this region that git's win discards (live region non-empty). + // A region only git rewrote (live region empty) is not a human loss. + if (aEnd > ai) conflicts++; for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k }); } @@ -235,5 +270,5 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] { oi = anchor + 1; } - return res; + return { picks: res, conflicts }; } diff --git a/apps/server/src/collaboration/merge/yjs-body-merge.spec.ts b/apps/server/src/collaboration/merge/yjs-body-merge.spec.ts index 48d13d19..d219e2e1 100644 --- a/apps/server/src/collaboration/merge/yjs-body-merge.spec.ts +++ b/apps/server/src/collaboration/merge/yjs-body-merge.spec.ts @@ -3,6 +3,7 @@ import * as Y from 'yjs'; import { mergeXmlFragments, mergeXmlFragments3Way, + mergeXmlFragments3WayWithStats, cloneXmlNode, diffBlocks, } from './yjs-body-merge'; @@ -179,6 +180,40 @@ describe('yjs-body-merge', () => { expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']); }); + // Bug #2 observability: the stats variant reports the same-block conflict so + // the handler can log it + the persistence layer can pin the human baseline. + it('reports the same-block conflict count via mergeXmlFragments3WayWithStats', () => { + const base = new Y.Doc(); + const live = new Y.Doc(); + const target = new Y.Doc(); + const baseFrag = buildFragment(base, ['a', 'b', 'c']); + const liveFrag = buildFragment(live, ['a', 'HUMAN', 'c']); + const targetFrag = buildFragment(target, ['a', 'GIT', 'c']); + + let result!: { applied: number; conflicts: number }; + live.transact(() => { + result = mergeXmlFragments3WayWithStats(liveFrag, targetFrag, baseFrag); + }); + expect(result.conflicts).toBe(1); + expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']); + }); + + it('reports 0 conflicts for a clean different-block 3-way merge', () => { + const base = new Y.Doc(); + const live = new Y.Doc(); + const target = new Y.Doc(); + const baseFrag = buildFragment(base, ['a', 'b', 'c']); + const liveFrag = buildFragment(live, ['HUMAN', 'b', 'c']); + const targetFrag = buildFragment(target, ['a', 'b', 'GIT']); + + let result!: { applied: number; conflicts: number }; + live.transact(() => { + result = mergeXmlFragments3WayWithStats(liveFrag, targetFrag, baseFrag); + }); + expect(result.conflicts).toBe(0); + expect(texts(liveFrag)).toEqual(['HUMAN', 'b', 'GIT']); + }); + it('git change with no concurrent human edit (live == base) applies cleanly', () => { const base = new Y.Doc(); const live = new Y.Doc(); diff --git a/apps/server/src/collaboration/merge/yjs-body-merge.ts b/apps/server/src/collaboration/merge/yjs-body-merge.ts index e21ccdae..1ec64ced 100644 --- a/apps/server/src/collaboration/merge/yjs-body-merge.ts +++ b/apps/server/src/collaboration/merge/yjs-body-merge.ts @@ -3,7 +3,7 @@ import { getSchema } from '@tiptap/core'; import type { Schema } from '@tiptap/pm/model'; import { tiptapExtensions } from '../collaboration.util'; -import { diff3Plan } from './three-way-merge'; +import { diff3PlanWithConflicts } from './three-way-merge'; import { buildLcsTable } from './lcs'; /** @@ -295,6 +295,20 @@ export function mergeXmlFragments( return applied; } +/** Outcome of a 3-way block merge: ops applied + same-block conflict count. */ +export interface Merge3WayResult { + /** Number of block insert/delete operations spliced into `live`. */ + applied: number; + /** + * Regions where the human AND git rewrote the SAME base block. The rule is + * deterministic (GIT WINS the region), so the human's version of those blocks + * is dropped from the live doc. `conflicts > 0` is the OBSERVABLE signal the + * caller uses to LOG the loss and pin the human baseline to page history (so it + * is recoverable), instead of the edit vanishing silently. + */ + conflicts: number; +} + /** * THREE-WAY block merge: reconcile `live` toward `target` using `base` (the * last-synced common ancestor) so a block only the human changed is KEPT and a @@ -305,20 +319,40 @@ export function mergeXmlFragments( * target); we materialize that as a virtual target fragment and reuse the 2-way * `mergeXmlFragments` to splice it into `live` minimally (so untouched live block * instances — and their in-flight edits — stay put). MUST be called inside a Yjs - * transaction. Returns the number of block operations applied. + * transaction. Returns the number of block operations applied. (Use + * `mergeXmlFragments3WayWithStats` when the SAME-BLOCK conflict count is needed.) */ export function mergeXmlFragments3Way( live: Y.XmlFragment, target: Y.XmlFragment, base: Y.XmlFragment, ): number { + return mergeXmlFragments3WayWithStats(live, target, base).applied; +} + +/** + * As `mergeXmlFragments3Way`, but also returns the SAME-BLOCK conflict count so + * the caller can make a "git won a concurrent same-block edit" event OBSERVABLE + * (the documented conflict contract: git wins deterministically, but the losing + * human content is never destroyed silently — it is logged and recoverable via + * page history). + */ +export function mergeXmlFragments3WayWithStats( + live: Y.XmlFragment, + target: Y.XmlFragment, + base: Y.XmlFragment, +): Merge3WayResult { const liveKids = live.toArray(); const targetKids = target.toArray(); const liveKeys = liveKids.map(key); const targetKeys = targetKids.map(key); const baseKeys = base.toArray().map(key); - const plan = diff3Plan(baseKeys, liveKeys, targetKeys); + const { picks: plan, conflicts } = diff3PlanWithConflicts( + baseKeys, + liveKeys, + targetKeys, + ); // Build the merged block sequence in a throwaway doc, cloning from whichever // side each pick came from, then 2-way merge it back into the live fragment. @@ -331,5 +365,5 @@ export function mergeXmlFragments3Way( ); if (nodes.length) mergedFrag.insert(0, nodes); - return mergeXmlFragments(live, mergedFrag); + return { applied: mergeXmlFragments(live, mergedFrag), conflicts }; } diff --git a/apps/server/src/integrations/git-sync/git-sync.constants.ts b/apps/server/src/integrations/git-sync/git-sync.constants.ts index 75c4fd4f..208f444a 100644 --- a/apps/server/src/integrations/git-sync/git-sync.constants.ts +++ b/apps/server/src/integrations/git-sync/git-sync.constants.ts @@ -39,3 +39,24 @@ export const GIT_SYNC_LOCK_PREFIX = 'git-sync:lock:'; * and the Redis lock prevents two instances racing the same space. */ export const GIT_SYNC_LOCK_TTL_MS = 5 * 60 * 1000; + +/** + * Bounded retry budget for ACQUIRING the per-space lock on the PUSH (external + * receive-pack) path. The poll cycle holds the single-writer lock while it + * processes a whole space, so a legitimate `git push` that arrives during a + * cycle would otherwise IMMEDIATELY 503 (GitSyncLockHeldError) even though the + * cycle is about to release the lock in well under a second for most spaces. + * Under continuous polling that made a majority of pushes 503 non- + * deterministically. So the push path retries the acquire with a small capped + * backoff for up to ~`TOTAL_MS` BEFORE giving up — a transient overlap with a + * cycle no longer fails the push, while a genuinely stuck/long cycle still + * surfaces a 503 after the bound (git then retries the whole push, which is + * safe: the receive-pack only runs ONCE the lock is held, so a 503 never leaves + * a half-applied ref). The POLL cycle itself does NOT retry (it just skips and + * the next tick reconciles), so this is push-only — the smaller blast radius. + */ +export const GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS = 5_000; +/** First backoff between push lock-acquire attempts (ms); doubles, capped. */ +export const GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS = 100; +/** Cap on the per-attempt push lock-acquire backoff (ms). */ +export const GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS = 500; diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts index db95cc54..b00cf58c 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts @@ -22,6 +22,11 @@ import { EnvironmentService } from '../../environment/environment.service'; import { GitmostDataSourceService } from './gitmost-datasource.service'; import { VaultRegistryService } from './vault-registry.service'; import { SpaceLockService } from './space-lock.service'; +import { + GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS, + GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS, + GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS, +} from '../git-sync.constants'; /** A space the poll loop should reconcile: its id + the workspace it lives in. */ interface EnabledSpace { @@ -244,7 +249,9 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { } const serviceUserId = this.environmentService.getGitSyncServiceUserId(); - const result = await this.spaceLock.withSpaceLock(spaceId, async (signal) => { + const result = await this.spaceLock.withSpaceLock( + spaceId, + async (signal) => { // 1) Stream the receive-pack to the client (durable commits land on main). // Pass the lost-lock signal so the receive-pack child is killed if the lock // lapses mid-write (no concurrent working-tree writer across replicas). @@ -273,7 +280,23 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { ); } return; - }); + }, + // BOUNDED retry-acquire (push path only): a push that briefly overlaps a + // poll cycle waits a moment (capped backoff up to the budget) instead of + // immediately 503-ing — the cycle releases the lock in well under a second + // for most spaces, so this turns a transient overlap into a SUCCESS rather + // than a spurious failure. A genuinely long/stuck cycle still skips after + // the bound -> GitSyncLockHeldError -> 503, and git retries the whole push + // (the receive-pack only runs once the lock is held, so there is never a + // half-applied ref on a 503). + { + acquireRetry: { + timeoutMs: GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS, + baseMs: GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS, + maxMs: GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS, + }, + }, + ); // The lock was held (in-progress or another replica) — surface to the caller // so the HTTP handler can answer 503 and let git retry. diff --git a/apps/server/src/integrations/git-sync/services/space-lock.service.spec.ts b/apps/server/src/integrations/git-sync/services/space-lock.service.spec.ts index 5526162e..0c5bfe96 100644 --- a/apps/server/src/integrations/git-sync/services/space-lock.service.spec.ts +++ b/apps/server/src/integrations/git-sync/services/space-lock.service.spec.ts @@ -123,6 +123,65 @@ describe('SpaceLockService', () => { }); }); + // Bug #1 (push 503 starvation): the PUSH path passes a bounded acquireRetry so a + // transient overlap with a poll cycle is retried (and succeeds) instead of an + // immediate 503. A genuinely stuck lock still skips after the bound. The poll + // cycle passes NO retry (immediate skip), so only the push path waits. + describe('bounded acquire-retry (push path)', () => { + const retry = { timeoutMs: 5_000, baseMs: 100, maxMs: 500 }; + + it('retries the acquire and SUCCEEDS when the lock is briefly held then released', async () => { + const { service, redis } = build(); + // First acquire attempt fails (lock briefly held by a cycle), the next + // succeeds — the bounded retry must turn this into a SUCCESS, not a skip. + redis.set + .mockResolvedValueOnce(null) // attempt 1: held + .mockResolvedValueOnce(null) // attempt 2: still held + .mockResolvedValue('OK'); // attempt 3+: released -> acquired + const fn = jest.fn(async () => 'pushed'); + + const result = await service.withSpaceLock('space-1', fn, { + acquireRetry: retry, + }); + + expect(result).toBe('pushed'); + expect(fn).toHaveBeenCalledTimes(1); + expect(redis.set.mock.calls.length).toBeGreaterThanOrEqual(3); + // The acquired lock is released in finally (DEL-CAS eval). + expect(redis.eval).toHaveBeenCalledTimes(1); + expect(redis.eval.mock.calls[0][0]).toContain('del'); + }); + + it('still skips (lock-held) after the bound when the lock stays stuck — and never runs fn', async () => { + const { service, redis } = build(); + redis.set.mockResolvedValue(null); // permanently held + const fn = jest.fn(async () => 'pushed'); + + const result = await service.withSpaceLock('space-1', fn, { + acquireRetry: { timeoutMs: 300, baseMs: 50, maxMs: 100 }, + }); + + expect(result).toEqual({ skipped: 'lock-held' }); + expect(fn).not.toHaveBeenCalled(); + // It retried more than once before giving up (bound > one interval). + expect(redis.set.mock.calls.length).toBeGreaterThan(1); + // Never acquired -> never released. + expect(redis.eval).not.toHaveBeenCalled(); + }); + + it('without acquireRetry (poll path) a held lock skips IMMEDIATELY (single attempt)', async () => { + const { service, redis } = build(); + redis.set.mockResolvedValue(null); + const fn = jest.fn(async () => 'cycle'); + + const result = await service.withSpaceLock('space-1', fn); + + expect(result).toEqual({ skipped: 'lock-held' }); + expect(redis.set).toHaveBeenCalledTimes(1); // no retry + expect(fn).not.toHaveBeenCalled(); + }); + }); + describe('fn throwing', () => { it('propagates the throw AND still releases (eval) in finally', async () => { const { service, redis } = build(); diff --git a/apps/server/src/integrations/git-sync/services/space-lock.service.ts b/apps/server/src/integrations/git-sync/services/space-lock.service.ts index 0c9c4a52..6bbd3c84 100644 --- a/apps/server/src/integrations/git-sync/services/space-lock.service.ts +++ b/apps/server/src/integrations/git-sync/services/space-lock.service.ts @@ -120,41 +120,57 @@ export class SpaceLockService { } /** - * Run `fn` under the per-space lock: the in-process mutex (no overlapping - * cycles on this instance) AND the Redis leader lock (single writer across - * replicas). Returns `fn`'s result, or a skip sentinel when the lock could not - * be acquired — `{ skipped: 'in-progress' }` (this instance is mid-cycle) or - * `{ skipped: 'lock-held' }` (another replica holds the Redis lock). The mutex - * + Redis lock are always released in a `finally`, even when `fn` throws (the - * throw propagates to the caller). This is the single reusable wrapper shared - * by `runOnce` (the poll/admin cycle) and `ingestExternalPush` (a push from a - * git client over HTTP) so both serialize against each other identically. + * Options for `withSpaceLock`. `acquireRetry` (PUSH path only) bounds a + * retry-acquire loop: if the lock cannot be entered on the first try, keep + * retrying with a capped exponential backoff until `timeoutMs` elapses before + * returning the skip sentinel. The poll cycle holds the lock while it + * processes a whole space, so a legitimate external push that briefly overlaps + * a cycle should WAIT a moment rather than immediately 503 (bug: ~60% of + * pushes 503'd under continuous polling). The poll cycle passes NO retry (it + * just skips and the next tick reconciles). */ async withSpaceLock( spaceId: string, fn: (signal: AbortSignal) => Promise, + options?: { + acquireRetry?: { timeoutMs: number; baseMs: number; maxMs: number }; + }, ): Promise { - if (this.running.has(spaceId)) { - return { skipped: 'in-progress' }; - } - // Cross-instance, same-process single-writer guard: another live holder (a - // different SpaceLockService in this process) is mid-cycle for this space. - // This survives a swallowed heartbeat / Redis TTL lapse, so a second writer - // in the process cannot race the working tree — it is rejected 'lock-held'. - if (SpaceLockService.liveLocks.has(spaceId)) { - return { skipped: 'lock-held' }; - } - // Reserve the in-process slot synchronously (before any await) so two - // concurrent same-space calls on THIS instance cannot both pass the guard and - // race acquire(). Redis NX is already authoritative across replicas; this just - // closes the in-process TOCTOU window. Released in the outer finally on every - // path (acquire-failure, fn-throw, normal completion). - this.running.add(spaceId); - SpaceLockService.liveLocks.set(spaceId, this.instanceId); - try { - if (!(await this.acquire(spaceId))) { + const retry = options?.acquireRetry; + // Deadline for the bounded retry-acquire (push path). `Date.now()` once so a + // slow first attempt does not over-extend the budget. + const deadline = retry ? Date.now() + retry.timeoutMs : 0; + let attempt = 0; + for (;;) { + // Reserve the in-process slot synchronously (before any await) so two + // concurrent same-space calls on THIS instance cannot both pass the guard + // and race acquire(). On any failure this is released before we retry/skip. + const reservation = this.tryReserveInProcess(spaceId); + if (reservation) { + // Could not even reserve in-process (this instance mid-cycle, or another + // live holder in the process). Retry within the bound, else skip. + if (retry && Date.now() < deadline) { + await this.sleep(this.nextBackoff(attempt++, retry, deadline)); + continue; + } + return reservation; + } + // Reserved in-process — now contend for the Redis leader lock. Release the + // in-process slot on EVERY non-running path so a retry/skip leaves no leak. + let acquired = false; + try { + acquired = await this.acquire(spaceId); + } finally { + if (!acquired) this.releaseInProcess(spaceId); + } + if (!acquired) { + if (retry && Date.now() < deadline) { + await this.sleep(this.nextBackoff(attempt++, retry, deadline)); + continue; + } return { skipped: 'lock-held' }; } + // Both locks held — run `fn` under the heartbeat, releasing in `finally`. // Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the // protected fn can stop instead of writing blind after our lock lapsed. const controller = new AbortController(); @@ -172,10 +188,64 @@ export class SpaceLockService { } finally { clearInterval(heartbeat); await this.release(spaceId); + this.releaseInProcess(spaceId); } - } finally { - this.running.delete(spaceId); - SpaceLockService.liveLocks.delete(spaceId); } } + + /** + * Synchronously try to reserve the in-process single-writer slot for a space. + * Returns a skip sentinel when another holder is live (this instance mid-cycle + * -> 'in-progress'; another SpaceLockService in this process -> 'lock-held'), + * or `null` when the slot was reserved (caller MUST `releaseInProcess` later). + * Both checks + the reservation happen with NO await between them so two + * concurrent same-space calls cannot both pass. + */ + private tryReserveInProcess( + spaceId: string, + ): { skipped: 'lock-held' | 'in-progress' } | null { + if (this.running.has(spaceId)) { + return { skipped: 'in-progress' }; + } + // Cross-instance, same-process single-writer guard: another live holder (a + // different SpaceLockService in this process) is mid-cycle for this space. + // This survives a swallowed heartbeat / Redis TTL lapse, so a second writer + // in the process cannot race the working tree — it is rejected 'lock-held'. + if (SpaceLockService.liveLocks.has(spaceId)) { + return { skipped: 'lock-held' }; + } + this.running.add(spaceId); + SpaceLockService.liveLocks.set(spaceId, this.instanceId); + return null; + } + + /** Release the in-process single-writer slot reserved by tryReserveInProcess. */ + private releaseInProcess(spaceId: string): void { + this.running.delete(spaceId); + SpaceLockService.liveLocks.delete(spaceId); + } + + /** + * Backoff (ms) before the next push lock-acquire attempt: capped exponential + * (`baseMs * 2^attempt`, ceilinged at `maxMs`) clamped so it never overshoots + * the retry `deadline`. Deterministic (no jitter) so the bound is testable. + */ + private nextBackoff( + attempt: number, + retry: { baseMs: number; maxMs: number }, + deadline: number, + ): number { + const exp = retry.baseMs * 2 ** attempt; + const capped = Math.min(exp, retry.maxMs); + const remaining = Math.max(0, deadline - Date.now()); + return Math.max(0, Math.min(capped, remaining)); + } + + /** Promise-based delay (extracted so tests can reason about the retry loop). */ + private sleep(ms: number): Promise { + return new Promise((resolve) => { + const t = setTimeout(resolve, ms); + t.unref?.(); + }); + } } diff --git a/packages/git-sync/src/engine/git.ts b/packages/git-sync/src/engine/git.ts index f334263d..9c029235 100644 --- a/packages/git-sync/src/engine/git.ts +++ b/packages/git-sync/src/engine/git.ts @@ -220,6 +220,13 @@ export class VaultGit { // that core.autocrlf=false does not cover). POSIX-only path, which is // fine: the daemon runs on Linux (Docker) / macOS. A system // /etc/gitattributes remains the host admin's domain (out of scope). + // - merge.conflictStyle=merge — CRITICAL (SPEC §9, conflict-marker leak): + // a global `merge.conflictStyle=diff3`/`zdiff3` makes a conflicting merge + // emit an EXTRA `|||||||` base-marker section. The conflict-marker + // scrub on the push side (`stripConflictMarkers`) handles `|||||||` too, + // but pinning the classic `merge` style keeps the markers the engine + // produces to the canonical three (`<<<<<<<`/`=======`/`>>>>>>>`) so + // behavior is deterministic regardless of the operator's global config. // NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a // human running git by hand in the vault must inherit the same neutralized // behavior; a transient `-c` would not persist. (core.quotepath, by @@ -230,6 +237,7 @@ export class VaultGit { await this.run(["config", "core.safecrlf", "false"]); await this.run(["config", "commit.gpgsign", "false"]); await this.run(["config", "core.attributesFile", "/dev/null"]); + await this.run(["config", "merge.conflictStyle", "merge"]); } catch (err: unknown) { const detail = err instanceof Error ? err.message : String(err); throw new Error( diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts index 61083ff4..d3963b67 100644 --- a/packages/git-sync/src/engine/push.ts +++ b/packages/git-sync/src/engine/push.ts @@ -699,19 +699,39 @@ export async function applyPushActions( }); continue; } + const conflicted = hasConflictMarkers(rawBody); const body = stripConflictMarkers(rawBody); // The last-synced version of this file (pre-image) is the common ancestor // for a 3-way merge against the live page, so concurrent human edits are // not clobbered (review #5). Null when the file is new at last-pushed. Its - // body is stripped the SAME way so the merge compares body-to-body. + // body is stripped the SAME way (frontmatter AND conflict markers) so the + // merge compares clean body-to-body: a base that itself carried markers + // (from a prior conflict commit) must never reintroduce marker syntax or a + // stale diff3 base region into the 3-way merge. const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path); - const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body; + const baseMarkdown = + baseFull === null + ? null + : stripConflictMarkers(parsePageFile(baseFull).body); const result = await client.importPageMarkdown( u.pageId, body, baseMarkdown, ); updated++; + // CONFLICT VAULT-CLEAN (autoMergeConflicts ON, SPEC §9 marker leak). On ON + // a conflicted page is auto-merged INTO Docmost (the clean `body` above), + // but the file on `main` still carries the raw `<<<<<<<`/`>>>>>>>` markers + // the pull-side `commitMerge` committed. Left as-is they would (1) stay in + // the PUBLISHED vault forever (external clones see raw markers) and (2) + // re-conflict every cycle. So write the CLEAN body back to the vault file + // and record it in `writtenBack` — `runPush` step 7a commits it on `main` + // and re-advances the refs, so the published vault converges to the merged + // content. Only conflicted files are rewritten (no churn for clean updates). + if (conflicted) { + await deps.writeFile(u.path, serializePageFile(u.pageId, body)); + writtenBack.push({ path: u.path, pageId: u.pageId }); + } // §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`. pushed.push({ pageId: u.pageId, @@ -1083,13 +1103,23 @@ export function isPageFile(path: string): boolean { * Docmost). A body is treated as conflicted only when it carries BOTH a begin * (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext * heading underline (`=======`) is not mistaken for a conflict. When conflicted, - * the three marker line types are removed while BOTH sides' content is preserved - * (no data loss): the marker SYNTAX never reaches Docmost, but the human's content - * does — where the conflict is visible and fixable rather than silently dropped. + * every marker line type is removed while the human-visible content is preserved + * (no data loss): the marker SYNTAX never reaches Docmost, but the content does — + * where the conflict is visible and fixable rather than silently dropped. + * + * `diff3`/`zdiff3` style: a conflict in that style adds a `|||||||` base section + * (`|||||||` line + the merge-BASE content + `=======`). `ensureRepo` pins + * `merge.conflictStyle=merge` so the engine never produces it, but a vault that + * predates the pin — or content arriving via an external push that a human + * committed in diff3 style — could still carry it. So we ALSO recognize the + * `|||||||` marker and DROP the stale base region it introduces (between + * `|||||||` and `=======`): the base text is neither side's current content, so + * keeping it would inject obsolete lines AND leak a raw `|||||||` marker. */ const CONFLICT_BEGIN_RE = /^<{7}/m; const CONFLICT_END_RE = /^>{7}/m; const CONFLICT_BEGIN_LINE_RE = /^<{7}/; +const CONFLICT_BASE_LINE_RE = /^\|{7}/; const CONFLICT_SEP_LINE_RE = /^={7}/; const CONFLICT_END_LINE_RE = /^>{7}/; @@ -1099,23 +1129,37 @@ export function hasConflictMarkers(body: string): boolean { function stripConflictMarkers(body: string): string { if (!hasConflictMarkers(body)) return body; - // Remove ONLY the three marker line types, and treat a `=======` line as a - // conflict separator ONLY when we are between a `<<<<<<<` begin and a `>>>>>>>` - // end — so a legitimate Markdown setext heading underline (`=======`) outside a - // conflict block is preserved (review finding). Both conflict sides' content is - // kept; only the marker SYNTAX is dropped. - let inBlock = false; + // Track where we are inside a conflict block so a `=======` line is treated as + // a conflict separator ONLY between a `<<<<<<<` begin and a `>>>>>>>` end — a + // legitimate Markdown setext heading underline (`=======`) outside a conflict + // block is preserved (review finding). State machine over the block: + // 'no' — outside any conflict block. + // 'ours' — after `<<<<<<<`, before `|||||||`/`=======` (our side: KEEP). + // 'base' — after `|||||||`, before `=======` (diff3 base region: DROP). + // 'theirs' — after `=======`, before `>>>>>>>` (their side: KEEP). + // Every marker LINE itself is dropped; only the base region's content is also + // dropped (it is stale and not part of either current side). + let state: "no" | "ours" | "base" | "theirs" = "no"; const out: string[] = []; for (const line of body.split("\n")) { if (CONFLICT_BEGIN_LINE_RE.test(line)) { - inBlock = true; + state = "ours"; continue; } - if (CONFLICT_END_LINE_RE.test(line)) { - inBlock = false; + if (state !== "no" && CONFLICT_END_LINE_RE.test(line)) { + state = "no"; continue; } - if (inBlock && CONFLICT_SEP_LINE_RE.test(line)) { + if (state === "ours" && CONFLICT_BASE_LINE_RE.test(line)) { + state = "base"; + continue; + } + if ((state === "ours" || state === "base") && CONFLICT_SEP_LINE_RE.test(line)) { + state = "theirs"; + continue; + } + // Drop the diff3 base region's content (stale, neither current side). + if (state === "base") { continue; } out.push(line); diff --git a/packages/git-sync/test/git.test.ts b/packages/git-sync/test/git.test.ts index 1a914b2b..a3255862 100644 --- a/packages/git-sync/test/git.test.ts +++ b/packages/git-sync/test/git.test.ts @@ -162,6 +162,10 @@ describe('VaultGit (integration; temp repo)', () => { expect(await localConfig('commit.gpgsign')).toBe('false'); expect(await localConfig('core.safecrlf')).toBe('false'); expect(await localConfig('core.attributesFile')).toBe('/dev/null'); + // merge.conflictStyle=merge keeps conflict markers to the canonical three + // (no diff3 `|||||||` base section) regardless of the operator's global + // config (bug #2 marker-leak determinism, SPEC §9). + expect(await localConfig('merge.conflictStyle')).toBe('merge'); // Idempotent: a second run leaves the same single values (no duplicates). await git.ensureRepo(); diff --git a/packages/git-sync/test/redteam-push-cycle.test.ts b/packages/git-sync/test/redteam-push-cycle.test.ts index d0880c80..860ba771 100644 --- a/packages/git-sync/test/redteam-push-cycle.test.ts +++ b/packages/git-sync/test/redteam-push-cycle.test.ts @@ -145,6 +145,79 @@ describe('#13 conflict markers reach Docmost', () => { expect(pushedBody).toContain('their line'); }); + it('autoMergeConflicts on: rewrites the vault file with the CLEAN body so raw markers do not stay in the published vault (bug #2 marker-leak)', async () => { + // Previously the UPDATE path stripped markers for the body SENT to Docmost but + // left the file on `main` carrying raw `<<<<<<<`/`>>>>>>>` forever — the + // published vault external clients clone kept the markers and the page + // re-conflicted every cycle. The fix writes the cleaned body back + records it + // in writtenBack so runPush commits it on `main`. + const { deps, importPageMarkdown } = makeConflictDeps({ + ...makeSettings(), + autoMergeConflicts: true, + }); + + const res = await runPush(deps, { dryRun: false }); + expect(res.mode).toBe('apply'); + + // The clean body was imported into Docmost (no markers). + const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any; + expect(pushedBody).not.toMatch(/[<>=]{7}/); + + // The vault file was rewritten with the cleaned content (no raw markers). + const writeCalls = (deps.writeFile as any).mock.calls as [string, string][]; + const docWrite = writeCalls.find(([p]) => p === 'Doc.md'); + expect(docWrite).toBeDefined(); + expect(docWrite![1]).not.toMatch(/[<>=]{7}/); + expect(docWrite![1]).toContain('my line'); + expect(docWrite![1]).toContain('their line'); + + // It is recorded for the follow-up commit so `main` converges to clean bytes. + expect(res.applied?.writtenBack).toEqual( + expect.arrayContaining([ + expect.objectContaining({ path: 'Doc.md', pageId: 'p-1' }), + ]), + ); + }); + + it('autoMergeConflicts on: strips diff3-style ||||||| base markers + base content (defense-in-depth)', async () => { + // A vault created before `merge.conflictStyle=merge` was pinned (or content a + // human committed in diff3 style) can carry a `||||||| base` section. The + // scrub must drop the `|||||||` marker AND the stale base region, keeping only + // the two live sides — otherwise `|||||||` + obsolete base lines leak into the + // Docmost page. + const diff3Body = + '<<<<<<< HEAD\nmy line\n||||||| base\nold base line\n=======\ntheir line\n>>>>>>> feature\n'; + const file = serializePageFile('p-1', diff3Body); + const { git } = makePushGit({ changes: [{ status: 'M', path: 'Doc.md' }] }); + const importPageMarkdown = vi.fn(async () => ({ success: true })); + const client = { + listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })), + importPageMarkdown, + createPage: vi.fn(), + deletePage: vi.fn(), + movePage: vi.fn(), + renamePage: vi.fn(), + }; + const deps: PushDeps = { + settings: { ...makeSettings(), autoMergeConflicts: true }, + git, + makeClient: () => client as any, + readFile: vi.fn(async (p: string) => { + if (p === 'Doc.md') return file; + throw new Error(`no such file: ${p}`); + }), + writeFile: vi.fn(async () => {}), + log: () => {}, + }; + + await runPush(deps, { dryRun: false }); + const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any; + expect(pushedBody).not.toContain('|||||||'); + expect(pushedBody).not.toContain('old base line'); // stale base dropped + expect(pushedBody).toContain('my line'); + expect(pushedBody).toContain('their line'); + }); + it('CREATE branch (autoMergeConflicts off): does NOT create a page from a conflicted NEW file; records a create failure', async () => { // The conflict-markers guard is DUPLICATED on the CREATE path (a brand-new // .md with NO gitmost_id, status 'A') and was previously untested — only the