fix(git-sync): push 503 starvation + concurrent-edit marker leak/silent loss

Bug #1 (push 503 starvation): an external receive-pack that briefly overlapped
a poll cycle immediately 503'd because the per-space single-writer lock was
held. Add a BOUNDED retry-acquire on the PUSH path only (SpaceLockService
.withSpaceLock acquireRetry: capped exponential backoff up to ~5s); a transient
overlap now waits and succeeds, a genuinely stuck cycle still 503s after the
bound. The poll cycle passes no retry (immediate skip). Push result stays
deterministic: the receive-pack only runs once the lock is held, so a 503 never
leaves a half-applied ref.

Bug #2 (concurrent-edit marker leak + silent same-block loss):
- Marker leak (a): the push UPDATE path stripped markers for the body sent to
  Docmost but left raw <<<<<<</>>>>>>> committed on the published `main` vault
  forever (autoMergeConflicts ON). Now the cleaned body is written back to the
  vault file + recorded in writtenBack so runPush commits it on `main` and the
  vault converges to clean bytes.
- Marker leak (b): pin merge.conflictStyle=merge in ensureRepo and teach
  stripConflictMarkers/hasConflictMarkers about the diff3 `|||||||` base section
  (drop the marker AND the stale base region) so diff3/zdiff3 conflicts can
  never leak `|||||||` + base content into a page. Also scrub the 3-way merge
  BASE markdown.
- Silent same-block loss: the block 3-way merge still resolves same-block
  conflicts deterministically to git, but it is no longer silent: diff3Plan now
  reports a conflict count (mergeXmlFragments3WayWithStats), gitSyncWriteBody
  logs it, and the persistence boundary-snapshot now fires for git-sync writes
  over a non-git-sync baseline so the human's pre-merge content is preserved in
  page history (recoverable). Full both-preserved persisted-conflict UI remains
  the deferred redesign.

Tests: space-lock bounded-retry (success/stuck/poll-immediate); push vault-clean
+ diff3 |||||||  strip; ensureRepo conflictStyle pin; diff3Plan/3-way conflict
counts; persistence git-sync boundary snapshot. Server tsc clean; git-sync
vitest + server collaboration/git-sync jest all green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-28 20:03:21 +03:00
parent 906733b5c8
commit b7e5cb6970
15 changed files with 567 additions and 77 deletions

View File

@@ -10,7 +10,7 @@ import * as Y from 'yjs';
import { User } from '@docmost/db/types/entity.types'; import { User } from '@docmost/db/types/entity.types';
import { import {
mergeXmlFragments, mergeXmlFragments,
mergeXmlFragments3Way, mergeXmlFragments3WayWithStats,
} from './merge/yjs-body-merge'; } from './merge/yjs-body-merge';
export type CollabEventHandlers = ReturnType< export type CollabEventHandlers = ReturnType<
@@ -168,11 +168,24 @@ export class CollaborationHandler {
const liveFrag = doc.getXmlFragment('default'); const liveFrag = doc.getXmlFragment('default');
const targetFrag = targetDoc.getXmlFragment('default'); const targetFrag = targetDoc.getXmlFragment('default');
if (baseDoc) { if (baseDoc) {
mergeXmlFragments3Way( const { conflicts } = mergeXmlFragments3WayWithStats(
liveFrag, liveFrag,
targetFrag, targetFrag,
baseDoc.getXmlFragment('default'), baseDoc.getXmlFragment('default'),
); );
// SAME-BLOCK conflict contract (SPEC §9): a block both the human
// and git changed resolves to GIT (deterministic). Make that
// OBSERVABLE rather than silent — log it. The losing human content
// is NOT destroyed: the persistence extension's boundary snapshot
// pins the pre-merge page state to history on this user->git-sync
// transition, so it stays recoverable.
if (conflicts > 0) {
this.logger.warn(
`git-sync merge for ${documentName}: ${conflicts} same-block ` +
`conflict(s) resolved to the git version; the prior page ` +
`state is preserved in page history (recoverable).`,
);
}
} else { } else {
mergeXmlFragments(liveFrag, targetFrag); mergeXmlFragments(liveFrag, targetFrag);
} }

View File

@@ -170,17 +170,32 @@ describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)',
expect(sourceOf(pageRepo)).toBe('agent'); expect(sourceOf(pageRepo)).toBe('agent');
}); });
// --- negative: a git-sync store must NOT pin a boundary history snapshot ---- // --- boundary snapshot for a git-sync store over a HUMAN baseline -----------
// The boundary-snapshot branch only fires when the resolved source is 'agent' // SPEC §9 observable-loss guard (bug #2): a git-sync body write is a block-level
// AND the prior persisted source is not 'agent'. A git-sync store resolves to // 3-way merge whose same-block rule is "git wins". To keep a concurrent human
// 'git-sync', so saveHistory must NOT be called. // edit RECOVERABLE rather than silently overwritten, a git-sync store over a
it('does NOT write a boundary history snapshot for a git-sync store', async () => { // prior NON-git-sync baseline pins that prior state to page history first —
// exactly like the agent path. So saveHistory MUST be called here.
it('DOES pin a boundary snapshot for a git-sync store over a prior human state', async () => {
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' }); const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
await ext.onStoreDocument( await ext.onStoreDocument(
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }), makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
); );
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
});
// --- negative: a git-sync store over a git-sync baseline does NOT re-pin -----
// The boundary is pinned once on the transition INTO git-sync; a subsequent
// git-sync store over an already-git-sync baseline must not churn history.
it('does NOT re-pin a boundary snapshot for a git-sync store over a git-sync baseline', async () => {
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'git-sync' });
await ext.onStoreDocument(
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
);
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
}); });

View File

@@ -274,21 +274,30 @@ export class PersistenceExtension implements Extension {
//this.logger.debug('Contributors error:' + err?.['message']); //this.logger.debug('Contributors error:' + err?.['message']);
} }
// Approach A — boundary snapshot before the agent's first edit. // Approach A — boundary snapshot before a MACHINE write overwrites a
// When this store is the agent's and the page's currently persisted // human (or other-source) baseline. When this store is from a machine
// state was authored by a human, pin that human state as its own // source — the AGENT or GIT-SYNC — and the page's currently persisted
// history version BEFORE the agent overwrites it. `page` still holds // state was authored by a DIFFERENT source, pin that prior state as its
// the OLD content/provenance here, so saveHistory(page) captures the // own history version BEFORE the machine write overwrites it. `page`
// pre-agent state tagged 'user'. The agent's new content is // still holds the OLD content/provenance here, so saveHistory(page)
// snapshotted later by the debounced PAGE_HISTORY job ('agent'). Skip // captures the pre-write state. The machine's new content is snapshotted
// if the prior state is already agent-authored (boundary already // later by the debounced PAGE_HISTORY job.
// pinned on the user->agent transition), if the page is effectively //
// empty, or if the latest existing snapshot already equals this human // For GIT-SYNC this is the OBSERVABLE-LOSS guard (SPEC §9 conflict
// state (avoid duplicates). // contract): a git-sync body write is a block-level 3-way merge whose
if ( // same-block rule is "git wins". Without this pin, a concurrent human
lastUpdatedSource === 'agent' && // edit to a block git also changed would be overwritten with NO trace.
page.lastUpdatedSource !== 'agent' // Pinning the pre-merge state here means the human's content is always
) { // RECOVERABLE via page history rather than silently lost — git still
// wins the live doc deterministically, but nothing is destroyed.
//
// Skip if the prior state was already authored by THIS machine source
// (boundary already pinned on the transition into it), if the page is
// effectively empty, or if the latest existing snapshot already equals
// the prior state (avoid duplicates).
const isMachineWrite =
lastUpdatedSource === 'agent' || lastUpdatedSource === 'git-sync';
if (isMachineWrite && page.lastUpdatedSource !== lastUpdatedSource) {
const lastHistory = await this.pageHistoryRepo.findPageLastHistory( const lastHistory = await this.pageHistoryRepo.findPageLastHistory(
pageId, pageId,
{ includeContent: true, trx }, { includeContent: true, trx },

View File

@@ -1,4 +1,8 @@
import { diff3Plan, type Pick } from './three-way-merge'; import {
diff3Plan,
diff3PlanWithConflicts,
type Pick,
} from './three-way-merge';
// Materialize a plan into the merged key sequence for assertion. // Materialize a plan into the merged key sequence for assertion.
function apply(plan: Pick[], live: string[], target: string[]): string[] { function apply(plan: Pick[], live: string[], target: string[]): string[] {
@@ -31,6 +35,49 @@ describe('diff3Plan (block-level three-way merge)', () => {
]); ]);
}); });
// Bug #2 observability: diff3PlanWithConflicts reports SAME-BLOCK conflicts so
// the caller can surface the "git wins" loss (log + history pin) instead of
// dropping the human side silently.
describe('diff3PlanWithConflicts (same-block conflict reporting)', () => {
it('reports 0 conflicts when sides changed DIFFERENT blocks (clean merge)', () => {
const r = diff3PlanWithConflicts(
['1', '2', '3'],
['H', '2', '3'],
['1', '2', 'G'],
);
expect(r.conflicts).toBe(0);
expect(apply(r.picks, ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
'H',
'2',
'G',
]);
});
it('reports 1 conflict and git wins when BOTH rewrote the SAME block', () => {
const r = diff3PlanWithConflicts(
['1', '2', '3'],
['1', 'H', '3'], // human rewrote block 2
['1', 'G', '3'], // git rewrote block 2
);
expect(r.conflicts).toBe(1);
// Git wins the contested block; the human 'H' is NOT in the picks.
expect(apply(r.picks, ['1', 'H', '3'], ['1', 'G', '3'])).toEqual([
'1',
'G',
'3',
]);
});
it('does NOT count a git-only region (no human content to lose) as a conflict', () => {
const r = diff3PlanWithConflicts(
['1', '2', '3'],
['1', '2', '3'], // human unchanged
['1', '9', '3'], // git rewrote block 2
);
expect(r.conflicts).toBe(0);
});
});
it('human and git changed DIFFERENT blocks -> both preserved', () => { it('human and git changed DIFFERENT blocks -> both preserved', () => {
// human rewrote block 1, git rewrote block 3. // human rewrote block 1, git rewrote block 3.
expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([ expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([

View File

@@ -183,15 +183,46 @@ export interface Pick {
index: number; index: number;
} }
/**
* The merged block order PLUS how many regions resolved as a genuine SAME-BLOCK
* conflict (both sides rewrote the same base block — `tryMergeRegion` returned
* null and git won the whole region, so the live/human version of those blocks
* is NOT in `picks`). `conflicts > 0` is the OBSERVABLE signal the caller uses to
* surface "git won a concurrent same-block edit" (log it + pin the human
* baseline to page history) instead of dropping the human side silently.
*/
export interface Diff3Result {
picks: Pick[];
conflicts: number;
}
/** /**
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys). * Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
* Returns the merged block order as picks from live/target. * Returns the merged block order as picks from live/target. Thin wrapper over
* `diff3PlanWithConflicts` (kept for the existing pure-array callers/tests).
*/ */
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] { export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
return diff3PlanWithConflicts(o, a, b).picks;
}
/**
* Like `diff3Plan` but also reports the SAME-BLOCK conflict count (see
* `Diff3Result`). A region where both the human and git rewrote the same base
* block cannot be merged automatically; the rule is deterministic — GIT WINS the
* whole region — but the human's version of those blocks is then absent from the
* picks, so we count it so the caller can make the loss observable/recoverable
* rather than silent (the documented conflict contract).
*/
export function diff3PlanWithConflicts(
o: string[],
a: string[],
b: string[],
): Diff3Result {
const oToA = matchMap(lcsPairs(o, a)); const oToA = matchMap(lcsPairs(o, a));
const oToB = matchMap(lcsPairs(o, b)); const oToB = matchMap(lcsPairs(o, b));
const res: Pick[] = []; const res: Pick[] = [];
let conflicts = 0;
let oi = 0; let oi = 0;
let ai = 0; let ai = 0;
let bi = 0; let bi = 0;
@@ -223,6 +254,10 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
); );
} }
} else { } else {
// SAME-BLOCK CONFLICT: count it ONLY when the human side actually had
// content in this region that git's win discards (live region non-empty).
// A region only git rewrote (live region empty) is not a human loss.
if (aEnd > ai) conflicts++;
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k }); for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
} }
@@ -235,5 +270,5 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
oi = anchor + 1; oi = anchor + 1;
} }
return res; return { picks: res, conflicts };
} }

View File

@@ -3,6 +3,7 @@ import * as Y from 'yjs';
import { import {
mergeXmlFragments, mergeXmlFragments,
mergeXmlFragments3Way, mergeXmlFragments3Way,
mergeXmlFragments3WayWithStats,
cloneXmlNode, cloneXmlNode,
diffBlocks, diffBlocks,
} from './yjs-body-merge'; } from './yjs-body-merge';
@@ -179,6 +180,40 @@ describe('yjs-body-merge', () => {
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']); expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
}); });
// Bug #2 observability: the stats variant reports the same-block conflict so
// the handler can log it + the persistence layer can pin the human baseline.
it('reports the same-block conflict count via mergeXmlFragments3WayWithStats', () => {
const base = new Y.Doc();
const live = new Y.Doc();
const target = new Y.Doc();
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
const liveFrag = buildFragment(live, ['a', 'HUMAN', 'c']);
const targetFrag = buildFragment(target, ['a', 'GIT', 'c']);
let result!: { applied: number; conflicts: number };
live.transact(() => {
result = mergeXmlFragments3WayWithStats(liveFrag, targetFrag, baseFrag);
});
expect(result.conflicts).toBe(1);
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
});
it('reports 0 conflicts for a clean different-block 3-way merge', () => {
const base = new Y.Doc();
const live = new Y.Doc();
const target = new Y.Doc();
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
const liveFrag = buildFragment(live, ['HUMAN', 'b', 'c']);
const targetFrag = buildFragment(target, ['a', 'b', 'GIT']);
let result!: { applied: number; conflicts: number };
live.transact(() => {
result = mergeXmlFragments3WayWithStats(liveFrag, targetFrag, baseFrag);
});
expect(result.conflicts).toBe(0);
expect(texts(liveFrag)).toEqual(['HUMAN', 'b', 'GIT']);
});
it('git change with no concurrent human edit (live == base) applies cleanly', () => { it('git change with no concurrent human edit (live == base) applies cleanly', () => {
const base = new Y.Doc(); const base = new Y.Doc();
const live = new Y.Doc(); const live = new Y.Doc();

View File

@@ -3,7 +3,7 @@ import { getSchema } from '@tiptap/core';
import type { Schema } from '@tiptap/pm/model'; import type { Schema } from '@tiptap/pm/model';
import { tiptapExtensions } from '../collaboration.util'; import { tiptapExtensions } from '../collaboration.util';
import { diff3Plan } from './three-way-merge'; import { diff3PlanWithConflicts } from './three-way-merge';
import { buildLcsTable } from './lcs'; import { buildLcsTable } from './lcs';
/** /**
@@ -295,6 +295,20 @@ export function mergeXmlFragments(
return applied; return applied;
} }
/** Outcome of a 3-way block merge: ops applied + same-block conflict count. */
export interface Merge3WayResult {
/** Number of block insert/delete operations spliced into `live`. */
applied: number;
/**
* Regions where the human AND git rewrote the SAME base block. The rule is
* deterministic (GIT WINS the region), so the human's version of those blocks
* is dropped from the live doc. `conflicts > 0` is the OBSERVABLE signal the
* caller uses to LOG the loss and pin the human baseline to page history (so it
* is recoverable), instead of the edit vanishing silently.
*/
conflicts: number;
}
/** /**
* THREE-WAY block merge: reconcile `live` toward `target` using `base` (the * THREE-WAY block merge: reconcile `live` toward `target` using `base` (the
* last-synced common ancestor) so a block only the human changed is KEPT and a * last-synced common ancestor) so a block only the human changed is KEPT and a
@@ -305,20 +319,40 @@ export function mergeXmlFragments(
* target); we materialize that as a virtual target fragment and reuse the 2-way * target); we materialize that as a virtual target fragment and reuse the 2-way
* `mergeXmlFragments` to splice it into `live` minimally (so untouched live block * `mergeXmlFragments` to splice it into `live` minimally (so untouched live block
* instances — and their in-flight edits — stay put). MUST be called inside a Yjs * instances — and their in-flight edits — stay put). MUST be called inside a Yjs
* transaction. Returns the number of block operations applied. * transaction. Returns the number of block operations applied. (Use
* `mergeXmlFragments3WayWithStats` when the SAME-BLOCK conflict count is needed.)
*/ */
export function mergeXmlFragments3Way( export function mergeXmlFragments3Way(
live: Y.XmlFragment, live: Y.XmlFragment,
target: Y.XmlFragment, target: Y.XmlFragment,
base: Y.XmlFragment, base: Y.XmlFragment,
): number { ): number {
return mergeXmlFragments3WayWithStats(live, target, base).applied;
}
/**
* As `mergeXmlFragments3Way`, but also returns the SAME-BLOCK conflict count so
* the caller can make a "git won a concurrent same-block edit" event OBSERVABLE
* (the documented conflict contract: git wins deterministically, but the losing
* human content is never destroyed silently — it is logged and recoverable via
* page history).
*/
export function mergeXmlFragments3WayWithStats(
live: Y.XmlFragment,
target: Y.XmlFragment,
base: Y.XmlFragment,
): Merge3WayResult {
const liveKids = live.toArray(); const liveKids = live.toArray();
const targetKids = target.toArray(); const targetKids = target.toArray();
const liveKeys = liveKids.map(key); const liveKeys = liveKids.map(key);
const targetKeys = targetKids.map(key); const targetKeys = targetKids.map(key);
const baseKeys = base.toArray().map(key); const baseKeys = base.toArray().map(key);
const plan = diff3Plan(baseKeys, liveKeys, targetKeys); const { picks: plan, conflicts } = diff3PlanWithConflicts(
baseKeys,
liveKeys,
targetKeys,
);
// Build the merged block sequence in a throwaway doc, cloning from whichever // Build the merged block sequence in a throwaway doc, cloning from whichever
// side each pick came from, then 2-way merge it back into the live fragment. // side each pick came from, then 2-way merge it back into the live fragment.
@@ -331,5 +365,5 @@ export function mergeXmlFragments3Way(
); );
if (nodes.length) mergedFrag.insert(0, nodes); if (nodes.length) mergedFrag.insert(0, nodes);
return mergeXmlFragments(live, mergedFrag); return { applied: mergeXmlFragments(live, mergedFrag), conflicts };
} }

View File

@@ -39,3 +39,24 @@ export const GIT_SYNC_LOCK_PREFIX = 'git-sync:lock:';
* and the Redis lock prevents two instances racing the same space. * and the Redis lock prevents two instances racing the same space.
*/ */
export const GIT_SYNC_LOCK_TTL_MS = 5 * 60 * 1000; export const GIT_SYNC_LOCK_TTL_MS = 5 * 60 * 1000;
/**
* Bounded retry budget for ACQUIRING the per-space lock on the PUSH (external
* receive-pack) path. The poll cycle holds the single-writer lock while it
* processes a whole space, so a legitimate `git push` that arrives during a
* cycle would otherwise IMMEDIATELY 503 (GitSyncLockHeldError) even though the
* cycle is about to release the lock in well under a second for most spaces.
* Under continuous polling that made a majority of pushes 503 non-
* deterministically. So the push path retries the acquire with a small capped
* backoff for up to ~`TOTAL_MS` BEFORE giving up — a transient overlap with a
* cycle no longer fails the push, while a genuinely stuck/long cycle still
* surfaces a 503 after the bound (git then retries the whole push, which is
* safe: the receive-pack only runs ONCE the lock is held, so a 503 never leaves
* a half-applied ref). The POLL cycle itself does NOT retry (it just skips and
* the next tick reconciles), so this is push-only — the smaller blast radius.
*/
export const GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS = 5_000;
/** First backoff between push lock-acquire attempts (ms); doubles, capped. */
export const GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS = 100;
/** Cap on the per-attempt push lock-acquire backoff (ms). */
export const GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS = 500;

View File

@@ -22,6 +22,11 @@ import { EnvironmentService } from '../../environment/environment.service';
import { GitmostDataSourceService } from './gitmost-datasource.service'; import { GitmostDataSourceService } from './gitmost-datasource.service';
import { VaultRegistryService } from './vault-registry.service'; import { VaultRegistryService } from './vault-registry.service';
import { SpaceLockService } from './space-lock.service'; import { SpaceLockService } from './space-lock.service';
import {
GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS,
GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS,
GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS,
} from '../git-sync.constants';
/** A space the poll loop should reconcile: its id + the workspace it lives in. */ /** A space the poll loop should reconcile: its id + the workspace it lives in. */
interface EnabledSpace { interface EnabledSpace {
@@ -244,7 +249,9 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
} }
const serviceUserId = this.environmentService.getGitSyncServiceUserId(); const serviceUserId = this.environmentService.getGitSyncServiceUserId();
const result = await this.spaceLock.withSpaceLock(spaceId, async (signal) => { const result = await this.spaceLock.withSpaceLock(
spaceId,
async (signal) => {
// 1) Stream the receive-pack to the client (durable commits land on main). // 1) Stream the receive-pack to the client (durable commits land on main).
// Pass the lost-lock signal so the receive-pack child is killed if the lock // Pass the lost-lock signal so the receive-pack child is killed if the lock
// lapses mid-write (no concurrent working-tree writer across replicas). // lapses mid-write (no concurrent working-tree writer across replicas).
@@ -273,7 +280,23 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
); );
} }
return; return;
}); },
// BOUNDED retry-acquire (push path only): a push that briefly overlaps a
// poll cycle waits a moment (capped backoff up to the budget) instead of
// immediately 503-ing — the cycle releases the lock in well under a second
// for most spaces, so this turns a transient overlap into a SUCCESS rather
// than a spurious failure. A genuinely long/stuck cycle still skips after
// the bound -> GitSyncLockHeldError -> 503, and git retries the whole push
// (the receive-pack only runs once the lock is held, so there is never a
// half-applied ref on a 503).
{
acquireRetry: {
timeoutMs: GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS,
baseMs: GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS,
maxMs: GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS,
},
},
);
// The lock was held (in-progress or another replica) — surface to the caller // The lock was held (in-progress or another replica) — surface to the caller
// so the HTTP handler can answer 503 and let git retry. // so the HTTP handler can answer 503 and let git retry.

View File

@@ -123,6 +123,65 @@ describe('SpaceLockService', () => {
}); });
}); });
// Bug #1 (push 503 starvation): the PUSH path passes a bounded acquireRetry so a
// transient overlap with a poll cycle is retried (and succeeds) instead of an
// immediate 503. A genuinely stuck lock still skips after the bound. The poll
// cycle passes NO retry (immediate skip), so only the push path waits.
describe('bounded acquire-retry (push path)', () => {
const retry = { timeoutMs: 5_000, baseMs: 100, maxMs: 500 };
it('retries the acquire and SUCCEEDS when the lock is briefly held then released', async () => {
const { service, redis } = build();
// First acquire attempt fails (lock briefly held by a cycle), the next
// succeeds — the bounded retry must turn this into a SUCCESS, not a skip.
redis.set
.mockResolvedValueOnce(null) // attempt 1: held
.mockResolvedValueOnce(null) // attempt 2: still held
.mockResolvedValue('OK'); // attempt 3+: released -> acquired
const fn = jest.fn(async () => 'pushed');
const result = await service.withSpaceLock('space-1', fn, {
acquireRetry: retry,
});
expect(result).toBe('pushed');
expect(fn).toHaveBeenCalledTimes(1);
expect(redis.set.mock.calls.length).toBeGreaterThanOrEqual(3);
// The acquired lock is released in finally (DEL-CAS eval).
expect(redis.eval).toHaveBeenCalledTimes(1);
expect(redis.eval.mock.calls[0][0]).toContain('del');
});
it('still skips (lock-held) after the bound when the lock stays stuck — and never runs fn', async () => {
const { service, redis } = build();
redis.set.mockResolvedValue(null); // permanently held
const fn = jest.fn(async () => 'pushed');
const result = await service.withSpaceLock('space-1', fn, {
acquireRetry: { timeoutMs: 300, baseMs: 50, maxMs: 100 },
});
expect(result).toEqual({ skipped: 'lock-held' });
expect(fn).not.toHaveBeenCalled();
// It retried more than once before giving up (bound > one interval).
expect(redis.set.mock.calls.length).toBeGreaterThan(1);
// Never acquired -> never released.
expect(redis.eval).not.toHaveBeenCalled();
});
it('without acquireRetry (poll path) a held lock skips IMMEDIATELY (single attempt)', async () => {
const { service, redis } = build();
redis.set.mockResolvedValue(null);
const fn = jest.fn(async () => 'cycle');
const result = await service.withSpaceLock('space-1', fn);
expect(result).toEqual({ skipped: 'lock-held' });
expect(redis.set).toHaveBeenCalledTimes(1); // no retry
expect(fn).not.toHaveBeenCalled();
});
});
describe('fn throwing', () => { describe('fn throwing', () => {
it('propagates the throw AND still releases (eval) in finally', async () => { it('propagates the throw AND still releases (eval) in finally', async () => {
const { service, redis } = build(); const { service, redis } = build();

View File

@@ -120,41 +120,57 @@ export class SpaceLockService {
} }
/** /**
* Run `fn` under the per-space lock: the in-process mutex (no overlapping * Options for `withSpaceLock`. `acquireRetry` (PUSH path only) bounds a
* cycles on this instance) AND the Redis leader lock (single writer across * retry-acquire loop: if the lock cannot be entered on the first try, keep
* replicas). Returns `fn`'s result, or a skip sentinel when the lock could not * retrying with a capped exponential backoff until `timeoutMs` elapses before
* be acquired — `{ skipped: 'in-progress' }` (this instance is mid-cycle) or * returning the skip sentinel. The poll cycle holds the lock while it
* `{ skipped: 'lock-held' }` (another replica holds the Redis lock). The mutex * processes a whole space, so a legitimate external push that briefly overlaps
* + Redis lock are always released in a `finally`, even when `fn` throws (the * a cycle should WAIT a moment rather than immediately 503 (bug: ~60% of
* throw propagates to the caller). This is the single reusable wrapper shared * pushes 503'd under continuous polling). The poll cycle passes NO retry (it
* by `runOnce` (the poll/admin cycle) and `ingestExternalPush` (a push from a * just skips and the next tick reconciles).
* git client over HTTP) so both serialize against each other identically.
*/ */
async withSpaceLock<T>( async withSpaceLock<T>(
spaceId: string, spaceId: string,
fn: (signal: AbortSignal) => Promise<T>, fn: (signal: AbortSignal) => Promise<T>,
options?: {
acquireRetry?: { timeoutMs: number; baseMs: number; maxMs: number };
},
): Promise<T | { skipped: 'lock-held' | 'in-progress' }> { ): Promise<T | { skipped: 'lock-held' | 'in-progress' }> {
if (this.running.has(spaceId)) { const retry = options?.acquireRetry;
return { skipped: 'in-progress' }; // Deadline for the bounded retry-acquire (push path). `Date.now()` once so a
} // slow first attempt does not over-extend the budget.
// Cross-instance, same-process single-writer guard: another live holder (a const deadline = retry ? Date.now() + retry.timeoutMs : 0;
// different SpaceLockService in this process) is mid-cycle for this space. let attempt = 0;
// This survives a swallowed heartbeat / Redis TTL lapse, so a second writer for (;;) {
// in the process cannot race the working tree — it is rejected 'lock-held'. // Reserve the in-process slot synchronously (before any await) so two
if (SpaceLockService.liveLocks.has(spaceId)) { // concurrent same-space calls on THIS instance cannot both pass the guard
return { skipped: 'lock-held' }; // and race acquire(). On any failure this is released before we retry/skip.
} const reservation = this.tryReserveInProcess(spaceId);
// Reserve the in-process slot synchronously (before any await) so two if (reservation) {
// concurrent same-space calls on THIS instance cannot both pass the guard and // Could not even reserve in-process (this instance mid-cycle, or another
// race acquire(). Redis NX is already authoritative across replicas; this just // live holder in the process). Retry within the bound, else skip.
// closes the in-process TOCTOU window. Released in the outer finally on every if (retry && Date.now() < deadline) {
// path (acquire-failure, fn-throw, normal completion). await this.sleep(this.nextBackoff(attempt++, retry, deadline));
this.running.add(spaceId); continue;
SpaceLockService.liveLocks.set(spaceId, this.instanceId); }
try { return reservation;
if (!(await this.acquire(spaceId))) { }
// Reserved in-process — now contend for the Redis leader lock. Release the
// in-process slot on EVERY non-running path so a retry/skip leaves no leak.
let acquired = false;
try {
acquired = await this.acquire(spaceId);
} finally {
if (!acquired) this.releaseInProcess(spaceId);
}
if (!acquired) {
if (retry && Date.now() < deadline) {
await this.sleep(this.nextBackoff(attempt++, retry, deadline));
continue;
}
return { skipped: 'lock-held' }; return { skipped: 'lock-held' };
} }
// Both locks held — run `fn` under the heartbeat, releasing in `finally`.
// Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the // Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the
// protected fn can stop instead of writing blind after our lock lapsed. // protected fn can stop instead of writing blind after our lock lapsed.
const controller = new AbortController(); const controller = new AbortController();
@@ -172,10 +188,64 @@ export class SpaceLockService {
} finally { } finally {
clearInterval(heartbeat); clearInterval(heartbeat);
await this.release(spaceId); await this.release(spaceId);
this.releaseInProcess(spaceId);
} }
} finally {
this.running.delete(spaceId);
SpaceLockService.liveLocks.delete(spaceId);
} }
} }
/**
* Synchronously try to reserve the in-process single-writer slot for a space.
* Returns a skip sentinel when another holder is live (this instance mid-cycle
* -> 'in-progress'; another SpaceLockService in this process -> 'lock-held'),
* or `null` when the slot was reserved (caller MUST `releaseInProcess` later).
* Both checks + the reservation happen with NO await between them so two
* concurrent same-space calls cannot both pass.
*/
private tryReserveInProcess(
spaceId: string,
): { skipped: 'lock-held' | 'in-progress' } | null {
if (this.running.has(spaceId)) {
return { skipped: 'in-progress' };
}
// Cross-instance, same-process single-writer guard: another live holder (a
// different SpaceLockService in this process) is mid-cycle for this space.
// This survives a swallowed heartbeat / Redis TTL lapse, so a second writer
// in the process cannot race the working tree — it is rejected 'lock-held'.
if (SpaceLockService.liveLocks.has(spaceId)) {
return { skipped: 'lock-held' };
}
this.running.add(spaceId);
SpaceLockService.liveLocks.set(spaceId, this.instanceId);
return null;
}
/** Release the in-process single-writer slot reserved by tryReserveInProcess. */
private releaseInProcess(spaceId: string): void {
this.running.delete(spaceId);
SpaceLockService.liveLocks.delete(spaceId);
}
/**
* Backoff (ms) before the next push lock-acquire attempt: capped exponential
* (`baseMs * 2^attempt`, ceilinged at `maxMs`) clamped so it never overshoots
* the retry `deadline`. Deterministic (no jitter) so the bound is testable.
*/
private nextBackoff(
attempt: number,
retry: { baseMs: number; maxMs: number },
deadline: number,
): number {
const exp = retry.baseMs * 2 ** attempt;
const capped = Math.min(exp, retry.maxMs);
const remaining = Math.max(0, deadline - Date.now());
return Math.max(0, Math.min(capped, remaining));
}
/** Promise-based delay (extracted so tests can reason about the retry loop). */
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => {
const t = setTimeout(resolve, ms);
t.unref?.();
});
}
} }

View File

@@ -220,6 +220,13 @@ export class VaultGit {
// that core.autocrlf=false does not cover). POSIX-only path, which is // that core.autocrlf=false does not cover). POSIX-only path, which is
// fine: the daemon runs on Linux (Docker) / macOS. A system // fine: the daemon runs on Linux (Docker) / macOS. A system
// /etc/gitattributes remains the host admin's domain (out of scope). // /etc/gitattributes remains the host admin's domain (out of scope).
// - merge.conflictStyle=merge — CRITICAL (SPEC §9, conflict-marker leak):
// a global `merge.conflictStyle=diff3`/`zdiff3` makes a conflicting merge
// emit an EXTRA `|||||||` base-marker section. The conflict-marker
// scrub on the push side (`stripConflictMarkers`) handles `|||||||` too,
// but pinning the classic `merge` style keeps the markers the engine
// produces to the canonical three (`<<<<<<<`/`=======`/`>>>>>>>`) so
// behavior is deterministic regardless of the operator's global config.
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a // NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
// human running git by hand in the vault must inherit the same neutralized // human running git by hand in the vault must inherit the same neutralized
// behavior; a transient `-c` would not persist. (core.quotepath, by // behavior; a transient `-c` would not persist. (core.quotepath, by
@@ -230,6 +237,7 @@ export class VaultGit {
await this.run(["config", "core.safecrlf", "false"]); await this.run(["config", "core.safecrlf", "false"]);
await this.run(["config", "commit.gpgsign", "false"]); await this.run(["config", "commit.gpgsign", "false"]);
await this.run(["config", "core.attributesFile", "/dev/null"]); await this.run(["config", "core.attributesFile", "/dev/null"]);
await this.run(["config", "merge.conflictStyle", "merge"]);
} catch (err: unknown) { } catch (err: unknown) {
const detail = err instanceof Error ? err.message : String(err); const detail = err instanceof Error ? err.message : String(err);
throw new Error( throw new Error(

View File

@@ -699,19 +699,39 @@ export async function applyPushActions(
}); });
continue; continue;
} }
const conflicted = hasConflictMarkers(rawBody);
const body = stripConflictMarkers(rawBody); const body = stripConflictMarkers(rawBody);
// The last-synced version of this file (pre-image) is the common ancestor // The last-synced version of this file (pre-image) is the common ancestor
// for a 3-way merge against the live page, so concurrent human edits are // for a 3-way merge against the live page, so concurrent human edits are
// not clobbered (review #5). Null when the file is new at last-pushed. Its // not clobbered (review #5). Null when the file is new at last-pushed. Its
// body is stripped the SAME way so the merge compares body-to-body. // body is stripped the SAME way (frontmatter AND conflict markers) so the
// merge compares clean body-to-body: a base that itself carried markers
// (from a prior conflict commit) must never reintroduce marker syntax or a
// stale diff3 base region into the 3-way merge.
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path); const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body; const baseMarkdown =
baseFull === null
? null
: stripConflictMarkers(parsePageFile(baseFull).body);
const result = await client.importPageMarkdown( const result = await client.importPageMarkdown(
u.pageId, u.pageId,
body, body,
baseMarkdown, baseMarkdown,
); );
updated++; updated++;
// CONFLICT VAULT-CLEAN (autoMergeConflicts ON, SPEC §9 marker leak). On ON
// a conflicted page is auto-merged INTO Docmost (the clean `body` above),
// but the file on `main` still carries the raw `<<<<<<<`/`>>>>>>>` markers
// the pull-side `commitMerge` committed. Left as-is they would (1) stay in
// the PUBLISHED vault forever (external clones see raw markers) and (2)
// re-conflict every cycle. So write the CLEAN body back to the vault file
// and record it in `writtenBack` — `runPush` step 7a commits it on `main`
// and re-advances the refs, so the published vault converges to the merged
// content. Only conflicted files are rewritten (no churn for clean updates).
if (conflicted) {
await deps.writeFile(u.path, serializePageFile(u.pageId, body));
writtenBack.push({ path: u.path, pageId: u.pageId });
}
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`. // §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
pushed.push({ pushed.push({
pageId: u.pageId, pageId: u.pageId,
@@ -1083,13 +1103,23 @@ export function isPageFile(path: string): boolean {
* Docmost). A body is treated as conflicted only when it carries BOTH a begin * Docmost). A body is treated as conflicted only when it carries BOTH a begin
* (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext * (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext
* heading underline (`=======`) is not mistaken for a conflict. When conflicted, * heading underline (`=======`) is not mistaken for a conflict. When conflicted,
* the three marker line types are removed while BOTH sides' content is preserved * every marker line type is removed while the human-visible content is preserved
* (no data loss): the marker SYNTAX never reaches Docmost, but the human's content * (no data loss): the marker SYNTAX never reaches Docmost, but the content does —
* does — where the conflict is visible and fixable rather than silently dropped. * where the conflict is visible and fixable rather than silently dropped.
*
* `diff3`/`zdiff3` style: a conflict in that style adds a `|||||||` base section
* (`|||||||` line + the merge-BASE content + `=======`). `ensureRepo` pins
* `merge.conflictStyle=merge` so the engine never produces it, but a vault that
* predates the pin — or content arriving via an external push that a human
* committed in diff3 style — could still carry it. So we ALSO recognize the
* `|||||||` marker and DROP the stale base region it introduces (between
* `|||||||` and `=======`): the base text is neither side's current content, so
* keeping it would inject obsolete lines AND leak a raw `|||||||` marker.
*/ */
const CONFLICT_BEGIN_RE = /^<{7}/m; const CONFLICT_BEGIN_RE = /^<{7}/m;
const CONFLICT_END_RE = /^>{7}/m; const CONFLICT_END_RE = /^>{7}/m;
const CONFLICT_BEGIN_LINE_RE = /^<{7}/; const CONFLICT_BEGIN_LINE_RE = /^<{7}/;
const CONFLICT_BASE_LINE_RE = /^\|{7}/;
const CONFLICT_SEP_LINE_RE = /^={7}/; const CONFLICT_SEP_LINE_RE = /^={7}/;
const CONFLICT_END_LINE_RE = /^>{7}/; const CONFLICT_END_LINE_RE = /^>{7}/;
@@ -1099,23 +1129,37 @@ export function hasConflictMarkers(body: string): boolean {
function stripConflictMarkers(body: string): string { function stripConflictMarkers(body: string): string {
if (!hasConflictMarkers(body)) return body; if (!hasConflictMarkers(body)) return body;
// Remove ONLY the three marker line types, and treat a `=======` line as a // Track where we are inside a conflict block so a `=======` line is treated as
// conflict separator ONLY when we are between a `<<<<<<<` begin and a `>>>>>>>` // a conflict separator ONLY between a `<<<<<<<` begin and a `>>>>>>>` end — a
// end — so a legitimate Markdown setext heading underline (`=======`) outside a // legitimate Markdown setext heading underline (`=======`) outside a conflict
// conflict block is preserved (review finding). Both conflict sides' content is // block is preserved (review finding). State machine over the block:
// kept; only the marker SYNTAX is dropped. // 'no' — outside any conflict block.
let inBlock = false; // 'ours' — after `<<<<<<<`, before `|||||||`/`=======` (our side: KEEP).
// 'base' — after `|||||||`, before `=======` (diff3 base region: DROP).
// 'theirs' — after `=======`, before `>>>>>>>` (their side: KEEP).
// Every marker LINE itself is dropped; only the base region's content is also
// dropped (it is stale and not part of either current side).
let state: "no" | "ours" | "base" | "theirs" = "no";
const out: string[] = []; const out: string[] = [];
for (const line of body.split("\n")) { for (const line of body.split("\n")) {
if (CONFLICT_BEGIN_LINE_RE.test(line)) { if (CONFLICT_BEGIN_LINE_RE.test(line)) {
inBlock = true; state = "ours";
continue; continue;
} }
if (CONFLICT_END_LINE_RE.test(line)) { if (state !== "no" && CONFLICT_END_LINE_RE.test(line)) {
inBlock = false; state = "no";
continue; continue;
} }
if (inBlock && CONFLICT_SEP_LINE_RE.test(line)) { if (state === "ours" && CONFLICT_BASE_LINE_RE.test(line)) {
state = "base";
continue;
}
if ((state === "ours" || state === "base") && CONFLICT_SEP_LINE_RE.test(line)) {
state = "theirs";
continue;
}
// Drop the diff3 base region's content (stale, neither current side).
if (state === "base") {
continue; continue;
} }
out.push(line); out.push(line);

View File

@@ -162,6 +162,10 @@ describe('VaultGit (integration; temp repo)', () => {
expect(await localConfig('commit.gpgsign')).toBe('false'); expect(await localConfig('commit.gpgsign')).toBe('false');
expect(await localConfig('core.safecrlf')).toBe('false'); expect(await localConfig('core.safecrlf')).toBe('false');
expect(await localConfig('core.attributesFile')).toBe('/dev/null'); expect(await localConfig('core.attributesFile')).toBe('/dev/null');
// merge.conflictStyle=merge keeps conflict markers to the canonical three
// (no diff3 `|||||||` base section) regardless of the operator's global
// config (bug #2 marker-leak determinism, SPEC §9).
expect(await localConfig('merge.conflictStyle')).toBe('merge');
// Idempotent: a second run leaves the same single values (no duplicates). // Idempotent: a second run leaves the same single values (no duplicates).
await git.ensureRepo(); await git.ensureRepo();

View File

@@ -145,6 +145,79 @@ describe('#13 conflict markers reach Docmost', () => {
expect(pushedBody).toContain('their line'); expect(pushedBody).toContain('their line');
}); });
it('autoMergeConflicts on: rewrites the vault file with the CLEAN body so raw markers do not stay in the published vault (bug #2 marker-leak)', async () => {
// Previously the UPDATE path stripped markers for the body SENT to Docmost but
// left the file on `main` carrying raw `<<<<<<<`/`>>>>>>>` forever — the
// published vault external clients clone kept the markers and the page
// re-conflicted every cycle. The fix writes the cleaned body back + records it
// in writtenBack so runPush commits it on `main`.
const { deps, importPageMarkdown } = makeConflictDeps({
...makeSettings(),
autoMergeConflicts: true,
});
const res = await runPush(deps, { dryRun: false });
expect(res.mode).toBe('apply');
// The clean body was imported into Docmost (no markers).
const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any;
expect(pushedBody).not.toMatch(/[<>=]{7}/);
// The vault file was rewritten with the cleaned content (no raw markers).
const writeCalls = (deps.writeFile as any).mock.calls as [string, string][];
const docWrite = writeCalls.find(([p]) => p === 'Doc.md');
expect(docWrite).toBeDefined();
expect(docWrite![1]).not.toMatch(/[<>=]{7}/);
expect(docWrite![1]).toContain('my line');
expect(docWrite![1]).toContain('their line');
// It is recorded for the follow-up commit so `main` converges to clean bytes.
expect(res.applied?.writtenBack).toEqual(
expect.arrayContaining([
expect.objectContaining({ path: 'Doc.md', pageId: 'p-1' }),
]),
);
});
it('autoMergeConflicts on: strips diff3-style ||||||| base markers + base content (defense-in-depth)', async () => {
// A vault created before `merge.conflictStyle=merge` was pinned (or content a
// human committed in diff3 style) can carry a `||||||| base` section. The
// scrub must drop the `|||||||` marker AND the stale base region, keeping only
// the two live sides — otherwise `|||||||` + obsolete base lines leak into the
// Docmost page.
const diff3Body =
'<<<<<<< HEAD\nmy line\n||||||| base\nold base line\n=======\ntheir line\n>>>>>>> feature\n';
const file = serializePageFile('p-1', diff3Body);
const { git } = makePushGit({ changes: [{ status: 'M', path: 'Doc.md' }] });
const importPageMarkdown = vi.fn(async () => ({ success: true }));
const client = {
listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
importPageMarkdown,
createPage: vi.fn(),
deletePage: vi.fn(),
movePage: vi.fn(),
renamePage: vi.fn(),
};
const deps: PushDeps = {
settings: { ...makeSettings(), autoMergeConflicts: true },
git,
makeClient: () => client as any,
readFile: vi.fn(async (p: string) => {
if (p === 'Doc.md') return file;
throw new Error(`no such file: ${p}`);
}),
writeFile: vi.fn(async () => {}),
log: () => {},
};
await runPush(deps, { dryRun: false });
const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any;
expect(pushedBody).not.toContain('|||||||');
expect(pushedBody).not.toContain('old base line'); // stale base dropped
expect(pushedBody).toContain('my line');
expect(pushedBody).toContain('their line');
});
it('CREATE branch (autoMergeConflicts off): does NOT create a page from a conflicted NEW file; records a create failure', async () => { it('CREATE branch (autoMergeConflicts off): does NOT create a page from a conflicted NEW file; records a create failure', async () => {
// The conflict-markers guard is DUPLICATED on the CREATE path (a brand-new // The conflict-markers guard is DUPLICATED on the CREATE path (a brand-new
// .md with NO gitmost_id, status 'A') and was previously untested — only the // .md with NO gitmost_id, status 'A') and was previously untested — only the