fix(git-sync): push 503 starvation + concurrent-edit marker leak/silent loss
Bug #1 (push 503 starvation): an external receive-pack that briefly overlapped a poll cycle immediately 503'd because the per-space single-writer lock was held. Add a BOUNDED retry-acquire on the PUSH path only (SpaceLockService .withSpaceLock acquireRetry: capped exponential backoff up to ~5s); a transient overlap now waits and succeeds, a genuinely stuck cycle still 503s after the bound. The poll cycle passes no retry (immediate skip). Push result stays deterministic: the receive-pack only runs once the lock is held, so a 503 never leaves a half-applied ref. Bug #2 (concurrent-edit marker leak + silent same-block loss): - Marker leak (a): the push UPDATE path stripped markers for the body sent to Docmost but left raw <<<<<<</>>>>>>> committed on the published `main` vault forever (autoMergeConflicts ON). Now the cleaned body is written back to the vault file + recorded in writtenBack so runPush commits it on `main` and the vault converges to clean bytes. - Marker leak (b): pin merge.conflictStyle=merge in ensureRepo and teach stripConflictMarkers/hasConflictMarkers about the diff3 `|||||||` base section (drop the marker AND the stale base region) so diff3/zdiff3 conflicts can never leak `|||||||` + base content into a page. Also scrub the 3-way merge BASE markdown. - Silent same-block loss: the block 3-way merge still resolves same-block conflicts deterministically to git, but it is no longer silent: diff3Plan now reports a conflict count (mergeXmlFragments3WayWithStats), gitSyncWriteBody logs it, and the persistence boundary-snapshot now fires for git-sync writes over a non-git-sync baseline so the human's pre-merge content is preserved in page history (recoverable). Full both-preserved persisted-conflict UI remains the deferred redesign. Tests: space-lock bounded-retry (success/stuck/poll-immediate); push vault-clean + diff3 ||||||| strip; ensureRepo conflictStyle pin; diff3Plan/3-way conflict counts; persistence git-sync boundary snapshot. Server tsc clean; git-sync vitest + server collaboration/git-sync jest all green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -10,7 +10,7 @@ import * as Y from 'yjs';
|
|||||||
import { User } from '@docmost/db/types/entity.types';
|
import { User } from '@docmost/db/types/entity.types';
|
||||||
import {
|
import {
|
||||||
mergeXmlFragments,
|
mergeXmlFragments,
|
||||||
mergeXmlFragments3Way,
|
mergeXmlFragments3WayWithStats,
|
||||||
} from './merge/yjs-body-merge';
|
} from './merge/yjs-body-merge';
|
||||||
|
|
||||||
export type CollabEventHandlers = ReturnType<
|
export type CollabEventHandlers = ReturnType<
|
||||||
@@ -168,11 +168,24 @@ export class CollaborationHandler {
|
|||||||
const liveFrag = doc.getXmlFragment('default');
|
const liveFrag = doc.getXmlFragment('default');
|
||||||
const targetFrag = targetDoc.getXmlFragment('default');
|
const targetFrag = targetDoc.getXmlFragment('default');
|
||||||
if (baseDoc) {
|
if (baseDoc) {
|
||||||
mergeXmlFragments3Way(
|
const { conflicts } = mergeXmlFragments3WayWithStats(
|
||||||
liveFrag,
|
liveFrag,
|
||||||
targetFrag,
|
targetFrag,
|
||||||
baseDoc.getXmlFragment('default'),
|
baseDoc.getXmlFragment('default'),
|
||||||
);
|
);
|
||||||
|
// SAME-BLOCK conflict contract (SPEC §9): a block both the human
|
||||||
|
// and git changed resolves to GIT (deterministic). Make that
|
||||||
|
// OBSERVABLE rather than silent — log it. The losing human content
|
||||||
|
// is NOT destroyed: the persistence extension's boundary snapshot
|
||||||
|
// pins the pre-merge page state to history on this user->git-sync
|
||||||
|
// transition, so it stays recoverable.
|
||||||
|
if (conflicts > 0) {
|
||||||
|
this.logger.warn(
|
||||||
|
`git-sync merge for ${documentName}: ${conflicts} same-block ` +
|
||||||
|
`conflict(s) resolved to the git version; the prior page ` +
|
||||||
|
`state is preserved in page history (recoverable).`,
|
||||||
|
);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
mergeXmlFragments(liveFrag, targetFrag);
|
mergeXmlFragments(liveFrag, targetFrag);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -170,17 +170,32 @@ describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)',
|
|||||||
expect(sourceOf(pageRepo)).toBe('agent');
|
expect(sourceOf(pageRepo)).toBe('agent');
|
||||||
});
|
});
|
||||||
|
|
||||||
// --- negative: a git-sync store must NOT pin a boundary history snapshot ----
|
// --- boundary snapshot for a git-sync store over a HUMAN baseline -----------
|
||||||
// The boundary-snapshot branch only fires when the resolved source is 'agent'
|
// SPEC §9 observable-loss guard (bug #2): a git-sync body write is a block-level
|
||||||
// AND the prior persisted source is not 'agent'. A git-sync store resolves to
|
// 3-way merge whose same-block rule is "git wins". To keep a concurrent human
|
||||||
// 'git-sync', so saveHistory must NOT be called.
|
// edit RECOVERABLE rather than silently overwritten, a git-sync store over a
|
||||||
it('does NOT write a boundary history snapshot for a git-sync store', async () => {
|
// prior NON-git-sync baseline pins that prior state to page history first —
|
||||||
|
// exactly like the agent path. So saveHistory MUST be called here.
|
||||||
|
it('DOES pin a boundary snapshot for a git-sync store over a prior human state', async () => {
|
||||||
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
||||||
|
|
||||||
await ext.onStoreDocument(
|
await ext.onStoreDocument(
|
||||||
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- negative: a git-sync store over a git-sync baseline does NOT re-pin -----
|
||||||
|
// The boundary is pinned once on the transition INTO git-sync; a subsequent
|
||||||
|
// git-sync store over an already-git-sync baseline must not churn history.
|
||||||
|
it('does NOT re-pin a boundary snapshot for a git-sync store over a git-sync baseline', async () => {
|
||||||
|
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'git-sync' });
|
||||||
|
|
||||||
|
await ext.onStoreDocument(
|
||||||
|
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
||||||
|
);
|
||||||
|
|
||||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -274,21 +274,30 @@ export class PersistenceExtension implements Extension {
|
|||||||
//this.logger.debug('Contributors error:' + err?.['message']);
|
//this.logger.debug('Contributors error:' + err?.['message']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Approach A — boundary snapshot before the agent's first edit.
|
// Approach A — boundary snapshot before a MACHINE write overwrites a
|
||||||
// When this store is the agent's and the page's currently persisted
|
// human (or other-source) baseline. When this store is from a machine
|
||||||
// state was authored by a human, pin that human state as its own
|
// source — the AGENT or GIT-SYNC — and the page's currently persisted
|
||||||
// history version BEFORE the agent overwrites it. `page` still holds
|
// state was authored by a DIFFERENT source, pin that prior state as its
|
||||||
// the OLD content/provenance here, so saveHistory(page) captures the
|
// own history version BEFORE the machine write overwrites it. `page`
|
||||||
// pre-agent state tagged 'user'. The agent's new content is
|
// still holds the OLD content/provenance here, so saveHistory(page)
|
||||||
// snapshotted later by the debounced PAGE_HISTORY job ('agent'). Skip
|
// captures the pre-write state. The machine's new content is snapshotted
|
||||||
// if the prior state is already agent-authored (boundary already
|
// later by the debounced PAGE_HISTORY job.
|
||||||
// pinned on the user->agent transition), if the page is effectively
|
//
|
||||||
// empty, or if the latest existing snapshot already equals this human
|
// For GIT-SYNC this is the OBSERVABLE-LOSS guard (SPEC §9 conflict
|
||||||
// state (avoid duplicates).
|
// contract): a git-sync body write is a block-level 3-way merge whose
|
||||||
if (
|
// same-block rule is "git wins". Without this pin, a concurrent human
|
||||||
lastUpdatedSource === 'agent' &&
|
// edit to a block git also changed would be overwritten with NO trace.
|
||||||
page.lastUpdatedSource !== 'agent'
|
// Pinning the pre-merge state here means the human's content is always
|
||||||
) {
|
// RECOVERABLE via page history rather than silently lost — git still
|
||||||
|
// wins the live doc deterministically, but nothing is destroyed.
|
||||||
|
//
|
||||||
|
// Skip if the prior state was already authored by THIS machine source
|
||||||
|
// (boundary already pinned on the transition into it), if the page is
|
||||||
|
// effectively empty, or if the latest existing snapshot already equals
|
||||||
|
// the prior state (avoid duplicates).
|
||||||
|
const isMachineWrite =
|
||||||
|
lastUpdatedSource === 'agent' || lastUpdatedSource === 'git-sync';
|
||||||
|
if (isMachineWrite && page.lastUpdatedSource !== lastUpdatedSource) {
|
||||||
const lastHistory = await this.pageHistoryRepo.findPageLastHistory(
|
const lastHistory = await this.pageHistoryRepo.findPageLastHistory(
|
||||||
pageId,
|
pageId,
|
||||||
{ includeContent: true, trx },
|
{ includeContent: true, trx },
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
import { diff3Plan, type Pick } from './three-way-merge';
|
import {
|
||||||
|
diff3Plan,
|
||||||
|
diff3PlanWithConflicts,
|
||||||
|
type Pick,
|
||||||
|
} from './three-way-merge';
|
||||||
|
|
||||||
// Materialize a plan into the merged key sequence for assertion.
|
// Materialize a plan into the merged key sequence for assertion.
|
||||||
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
||||||
@@ -31,6 +35,49 @@ describe('diff3Plan (block-level three-way merge)', () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Bug #2 observability: diff3PlanWithConflicts reports SAME-BLOCK conflicts so
|
||||||
|
// the caller can surface the "git wins" loss (log + history pin) instead of
|
||||||
|
// dropping the human side silently.
|
||||||
|
describe('diff3PlanWithConflicts (same-block conflict reporting)', () => {
|
||||||
|
it('reports 0 conflicts when sides changed DIFFERENT blocks (clean merge)', () => {
|
||||||
|
const r = diff3PlanWithConflicts(
|
||||||
|
['1', '2', '3'],
|
||||||
|
['H', '2', '3'],
|
||||||
|
['1', '2', 'G'],
|
||||||
|
);
|
||||||
|
expect(r.conflicts).toBe(0);
|
||||||
|
expect(apply(r.picks, ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
|
||||||
|
'H',
|
||||||
|
'2',
|
||||||
|
'G',
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports 1 conflict and git wins when BOTH rewrote the SAME block', () => {
|
||||||
|
const r = diff3PlanWithConflicts(
|
||||||
|
['1', '2', '3'],
|
||||||
|
['1', 'H', '3'], // human rewrote block 2
|
||||||
|
['1', 'G', '3'], // git rewrote block 2
|
||||||
|
);
|
||||||
|
expect(r.conflicts).toBe(1);
|
||||||
|
// Git wins the contested block; the human 'H' is NOT in the picks.
|
||||||
|
expect(apply(r.picks, ['1', 'H', '3'], ['1', 'G', '3'])).toEqual([
|
||||||
|
'1',
|
||||||
|
'G',
|
||||||
|
'3',
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT count a git-only region (no human content to lose) as a conflict', () => {
|
||||||
|
const r = diff3PlanWithConflicts(
|
||||||
|
['1', '2', '3'],
|
||||||
|
['1', '2', '3'], // human unchanged
|
||||||
|
['1', '9', '3'], // git rewrote block 2
|
||||||
|
);
|
||||||
|
expect(r.conflicts).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it('human and git changed DIFFERENT blocks -> both preserved', () => {
|
it('human and git changed DIFFERENT blocks -> both preserved', () => {
|
||||||
// human rewrote block 1, git rewrote block 3.
|
// human rewrote block 1, git rewrote block 3.
|
||||||
expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
|
expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
|
||||||
|
|||||||
@@ -183,15 +183,46 @@ export interface Pick {
|
|||||||
index: number;
|
index: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The merged block order PLUS how many regions resolved as a genuine SAME-BLOCK
|
||||||
|
* conflict (both sides rewrote the same base block — `tryMergeRegion` returned
|
||||||
|
* null and git won the whole region, so the live/human version of those blocks
|
||||||
|
* is NOT in `picks`). `conflicts > 0` is the OBSERVABLE signal the caller uses to
|
||||||
|
* surface "git won a concurrent same-block edit" (log it + pin the human
|
||||||
|
* baseline to page history) instead of dropping the human side silently.
|
||||||
|
*/
|
||||||
|
export interface Diff3Result {
|
||||||
|
picks: Pick[];
|
||||||
|
conflicts: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
|
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
|
||||||
* Returns the merged block order as picks from live/target.
|
* Returns the merged block order as picks from live/target. Thin wrapper over
|
||||||
|
* `diff3PlanWithConflicts` (kept for the existing pure-array callers/tests).
|
||||||
*/
|
*/
|
||||||
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
||||||
|
return diff3PlanWithConflicts(o, a, b).picks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like `diff3Plan` but also reports the SAME-BLOCK conflict count (see
|
||||||
|
* `Diff3Result`). A region where both the human and git rewrote the same base
|
||||||
|
* block cannot be merged automatically; the rule is deterministic — GIT WINS the
|
||||||
|
* whole region — but the human's version of those blocks is then absent from the
|
||||||
|
* picks, so we count it so the caller can make the loss observable/recoverable
|
||||||
|
* rather than silent (the documented conflict contract).
|
||||||
|
*/
|
||||||
|
export function diff3PlanWithConflicts(
|
||||||
|
o: string[],
|
||||||
|
a: string[],
|
||||||
|
b: string[],
|
||||||
|
): Diff3Result {
|
||||||
const oToA = matchMap(lcsPairs(o, a));
|
const oToA = matchMap(lcsPairs(o, a));
|
||||||
const oToB = matchMap(lcsPairs(o, b));
|
const oToB = matchMap(lcsPairs(o, b));
|
||||||
|
|
||||||
const res: Pick[] = [];
|
const res: Pick[] = [];
|
||||||
|
let conflicts = 0;
|
||||||
let oi = 0;
|
let oi = 0;
|
||||||
let ai = 0;
|
let ai = 0;
|
||||||
let bi = 0;
|
let bi = 0;
|
||||||
@@ -223,6 +254,10 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// SAME-BLOCK CONFLICT: count it ONLY when the human side actually had
|
||||||
|
// content in this region that git's win discards (live region non-empty).
|
||||||
|
// A region only git rewrote (live region empty) is not a human loss.
|
||||||
|
if (aEnd > ai) conflicts++;
|
||||||
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
|
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -235,5 +270,5 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
|||||||
oi = anchor + 1;
|
oi = anchor + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return { picks: res, conflicts };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import * as Y from 'yjs';
|
|||||||
import {
|
import {
|
||||||
mergeXmlFragments,
|
mergeXmlFragments,
|
||||||
mergeXmlFragments3Way,
|
mergeXmlFragments3Way,
|
||||||
|
mergeXmlFragments3WayWithStats,
|
||||||
cloneXmlNode,
|
cloneXmlNode,
|
||||||
diffBlocks,
|
diffBlocks,
|
||||||
} from './yjs-body-merge';
|
} from './yjs-body-merge';
|
||||||
@@ -179,6 +180,40 @@ describe('yjs-body-merge', () => {
|
|||||||
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
|
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Bug #2 observability: the stats variant reports the same-block conflict so
|
||||||
|
// the handler can log it + the persistence layer can pin the human baseline.
|
||||||
|
it('reports the same-block conflict count via mergeXmlFragments3WayWithStats', () => {
|
||||||
|
const base = new Y.Doc();
|
||||||
|
const live = new Y.Doc();
|
||||||
|
const target = new Y.Doc();
|
||||||
|
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
|
||||||
|
const liveFrag = buildFragment(live, ['a', 'HUMAN', 'c']);
|
||||||
|
const targetFrag = buildFragment(target, ['a', 'GIT', 'c']);
|
||||||
|
|
||||||
|
let result!: { applied: number; conflicts: number };
|
||||||
|
live.transact(() => {
|
||||||
|
result = mergeXmlFragments3WayWithStats(liveFrag, targetFrag, baseFrag);
|
||||||
|
});
|
||||||
|
expect(result.conflicts).toBe(1);
|
||||||
|
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports 0 conflicts for a clean different-block 3-way merge', () => {
|
||||||
|
const base = new Y.Doc();
|
||||||
|
const live = new Y.Doc();
|
||||||
|
const target = new Y.Doc();
|
||||||
|
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
|
||||||
|
const liveFrag = buildFragment(live, ['HUMAN', 'b', 'c']);
|
||||||
|
const targetFrag = buildFragment(target, ['a', 'b', 'GIT']);
|
||||||
|
|
||||||
|
let result!: { applied: number; conflicts: number };
|
||||||
|
live.transact(() => {
|
||||||
|
result = mergeXmlFragments3WayWithStats(liveFrag, targetFrag, baseFrag);
|
||||||
|
});
|
||||||
|
expect(result.conflicts).toBe(0);
|
||||||
|
expect(texts(liveFrag)).toEqual(['HUMAN', 'b', 'GIT']);
|
||||||
|
});
|
||||||
|
|
||||||
it('git change with no concurrent human edit (live == base) applies cleanly', () => {
|
it('git change with no concurrent human edit (live == base) applies cleanly', () => {
|
||||||
const base = new Y.Doc();
|
const base = new Y.Doc();
|
||||||
const live = new Y.Doc();
|
const live = new Y.Doc();
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import { getSchema } from '@tiptap/core';
|
|||||||
import type { Schema } from '@tiptap/pm/model';
|
import type { Schema } from '@tiptap/pm/model';
|
||||||
|
|
||||||
import { tiptapExtensions } from '../collaboration.util';
|
import { tiptapExtensions } from '../collaboration.util';
|
||||||
import { diff3Plan } from './three-way-merge';
|
import { diff3PlanWithConflicts } from './three-way-merge';
|
||||||
import { buildLcsTable } from './lcs';
|
import { buildLcsTable } from './lcs';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -295,6 +295,20 @@ export function mergeXmlFragments(
|
|||||||
return applied;
|
return applied;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Outcome of a 3-way block merge: ops applied + same-block conflict count. */
|
||||||
|
export interface Merge3WayResult {
|
||||||
|
/** Number of block insert/delete operations spliced into `live`. */
|
||||||
|
applied: number;
|
||||||
|
/**
|
||||||
|
* Regions where the human AND git rewrote the SAME base block. The rule is
|
||||||
|
* deterministic (GIT WINS the region), so the human's version of those blocks
|
||||||
|
* is dropped from the live doc. `conflicts > 0` is the OBSERVABLE signal the
|
||||||
|
* caller uses to LOG the loss and pin the human baseline to page history (so it
|
||||||
|
* is recoverable), instead of the edit vanishing silently.
|
||||||
|
*/
|
||||||
|
conflicts: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* THREE-WAY block merge: reconcile `live` toward `target` using `base` (the
|
* THREE-WAY block merge: reconcile `live` toward `target` using `base` (the
|
||||||
* last-synced common ancestor) so a block only the human changed is KEPT and a
|
* last-synced common ancestor) so a block only the human changed is KEPT and a
|
||||||
@@ -305,20 +319,40 @@ export function mergeXmlFragments(
|
|||||||
* target); we materialize that as a virtual target fragment and reuse the 2-way
|
* target); we materialize that as a virtual target fragment and reuse the 2-way
|
||||||
* `mergeXmlFragments` to splice it into `live` minimally (so untouched live block
|
* `mergeXmlFragments` to splice it into `live` minimally (so untouched live block
|
||||||
* instances — and their in-flight edits — stay put). MUST be called inside a Yjs
|
* instances — and their in-flight edits — stay put). MUST be called inside a Yjs
|
||||||
* transaction. Returns the number of block operations applied.
|
* transaction. Returns the number of block operations applied. (Use
|
||||||
|
* `mergeXmlFragments3WayWithStats` when the SAME-BLOCK conflict count is needed.)
|
||||||
*/
|
*/
|
||||||
export function mergeXmlFragments3Way(
|
export function mergeXmlFragments3Way(
|
||||||
live: Y.XmlFragment,
|
live: Y.XmlFragment,
|
||||||
target: Y.XmlFragment,
|
target: Y.XmlFragment,
|
||||||
base: Y.XmlFragment,
|
base: Y.XmlFragment,
|
||||||
): number {
|
): number {
|
||||||
|
return mergeXmlFragments3WayWithStats(live, target, base).applied;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As `mergeXmlFragments3Way`, but also returns the SAME-BLOCK conflict count so
|
||||||
|
* the caller can make a "git won a concurrent same-block edit" event OBSERVABLE
|
||||||
|
* (the documented conflict contract: git wins deterministically, but the losing
|
||||||
|
* human content is never destroyed silently — it is logged and recoverable via
|
||||||
|
* page history).
|
||||||
|
*/
|
||||||
|
export function mergeXmlFragments3WayWithStats(
|
||||||
|
live: Y.XmlFragment,
|
||||||
|
target: Y.XmlFragment,
|
||||||
|
base: Y.XmlFragment,
|
||||||
|
): Merge3WayResult {
|
||||||
const liveKids = live.toArray();
|
const liveKids = live.toArray();
|
||||||
const targetKids = target.toArray();
|
const targetKids = target.toArray();
|
||||||
const liveKeys = liveKids.map(key);
|
const liveKeys = liveKids.map(key);
|
||||||
const targetKeys = targetKids.map(key);
|
const targetKeys = targetKids.map(key);
|
||||||
const baseKeys = base.toArray().map(key);
|
const baseKeys = base.toArray().map(key);
|
||||||
|
|
||||||
const plan = diff3Plan(baseKeys, liveKeys, targetKeys);
|
const { picks: plan, conflicts } = diff3PlanWithConflicts(
|
||||||
|
baseKeys,
|
||||||
|
liveKeys,
|
||||||
|
targetKeys,
|
||||||
|
);
|
||||||
|
|
||||||
// Build the merged block sequence in a throwaway doc, cloning from whichever
|
// Build the merged block sequence in a throwaway doc, cloning from whichever
|
||||||
// side each pick came from, then 2-way merge it back into the live fragment.
|
// side each pick came from, then 2-way merge it back into the live fragment.
|
||||||
@@ -331,5 +365,5 @@ export function mergeXmlFragments3Way(
|
|||||||
);
|
);
|
||||||
if (nodes.length) mergedFrag.insert(0, nodes);
|
if (nodes.length) mergedFrag.insert(0, nodes);
|
||||||
|
|
||||||
return mergeXmlFragments(live, mergedFrag);
|
return { applied: mergeXmlFragments(live, mergedFrag), conflicts };
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,3 +39,24 @@ export const GIT_SYNC_LOCK_PREFIX = 'git-sync:lock:';
|
|||||||
* and the Redis lock prevents two instances racing the same space.
|
* and the Redis lock prevents two instances racing the same space.
|
||||||
*/
|
*/
|
||||||
export const GIT_SYNC_LOCK_TTL_MS = 5 * 60 * 1000;
|
export const GIT_SYNC_LOCK_TTL_MS = 5 * 60 * 1000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bounded retry budget for ACQUIRING the per-space lock on the PUSH (external
|
||||||
|
* receive-pack) path. The poll cycle holds the single-writer lock while it
|
||||||
|
* processes a whole space, so a legitimate `git push` that arrives during a
|
||||||
|
* cycle would otherwise IMMEDIATELY 503 (GitSyncLockHeldError) even though the
|
||||||
|
* cycle is about to release the lock in well under a second for most spaces.
|
||||||
|
* Under continuous polling that made a majority of pushes 503 non-
|
||||||
|
* deterministically. So the push path retries the acquire with a small capped
|
||||||
|
* backoff for up to ~`TOTAL_MS` BEFORE giving up — a transient overlap with a
|
||||||
|
* cycle no longer fails the push, while a genuinely stuck/long cycle still
|
||||||
|
* surfaces a 503 after the bound (git then retries the whole push, which is
|
||||||
|
* safe: the receive-pack only runs ONCE the lock is held, so a 503 never leaves
|
||||||
|
* a half-applied ref). The POLL cycle itself does NOT retry (it just skips and
|
||||||
|
* the next tick reconciles), so this is push-only — the smaller blast radius.
|
||||||
|
*/
|
||||||
|
export const GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS = 5_000;
|
||||||
|
/** First backoff between push lock-acquire attempts (ms); doubles, capped. */
|
||||||
|
export const GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS = 100;
|
||||||
|
/** Cap on the per-attempt push lock-acquire backoff (ms). */
|
||||||
|
export const GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS = 500;
|
||||||
|
|||||||
@@ -22,6 +22,11 @@ import { EnvironmentService } from '../../environment/environment.service';
|
|||||||
import { GitmostDataSourceService } from './gitmost-datasource.service';
|
import { GitmostDataSourceService } from './gitmost-datasource.service';
|
||||||
import { VaultRegistryService } from './vault-registry.service';
|
import { VaultRegistryService } from './vault-registry.service';
|
||||||
import { SpaceLockService } from './space-lock.service';
|
import { SpaceLockService } from './space-lock.service';
|
||||||
|
import {
|
||||||
|
GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS,
|
||||||
|
GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS,
|
||||||
|
GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS,
|
||||||
|
} from '../git-sync.constants';
|
||||||
|
|
||||||
/** A space the poll loop should reconcile: its id + the workspace it lives in. */
|
/** A space the poll loop should reconcile: its id + the workspace it lives in. */
|
||||||
interface EnabledSpace {
|
interface EnabledSpace {
|
||||||
@@ -244,7 +249,9 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
|||||||
}
|
}
|
||||||
const serviceUserId = this.environmentService.getGitSyncServiceUserId();
|
const serviceUserId = this.environmentService.getGitSyncServiceUserId();
|
||||||
|
|
||||||
const result = await this.spaceLock.withSpaceLock(spaceId, async (signal) => {
|
const result = await this.spaceLock.withSpaceLock(
|
||||||
|
spaceId,
|
||||||
|
async (signal) => {
|
||||||
// 1) Stream the receive-pack to the client (durable commits land on main).
|
// 1) Stream the receive-pack to the client (durable commits land on main).
|
||||||
// Pass the lost-lock signal so the receive-pack child is killed if the lock
|
// Pass the lost-lock signal so the receive-pack child is killed if the lock
|
||||||
// lapses mid-write (no concurrent working-tree writer across replicas).
|
// lapses mid-write (no concurrent working-tree writer across replicas).
|
||||||
@@ -273,7 +280,23 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
});
|
},
|
||||||
|
// BOUNDED retry-acquire (push path only): a push that briefly overlaps a
|
||||||
|
// poll cycle waits a moment (capped backoff up to the budget) instead of
|
||||||
|
// immediately 503-ing — the cycle releases the lock in well under a second
|
||||||
|
// for most spaces, so this turns a transient overlap into a SUCCESS rather
|
||||||
|
// than a spurious failure. A genuinely long/stuck cycle still skips after
|
||||||
|
// the bound -> GitSyncLockHeldError -> 503, and git retries the whole push
|
||||||
|
// (the receive-pack only runs once the lock is held, so there is never a
|
||||||
|
// half-applied ref on a 503).
|
||||||
|
{
|
||||||
|
acquireRetry: {
|
||||||
|
timeoutMs: GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS,
|
||||||
|
baseMs: GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS,
|
||||||
|
maxMs: GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
// The lock was held (in-progress or another replica) — surface to the caller
|
// The lock was held (in-progress or another replica) — surface to the caller
|
||||||
// so the HTTP handler can answer 503 and let git retry.
|
// so the HTTP handler can answer 503 and let git retry.
|
||||||
|
|||||||
@@ -123,6 +123,65 @@ describe('SpaceLockService', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Bug #1 (push 503 starvation): the PUSH path passes a bounded acquireRetry so a
|
||||||
|
// transient overlap with a poll cycle is retried (and succeeds) instead of an
|
||||||
|
// immediate 503. A genuinely stuck lock still skips after the bound. The poll
|
||||||
|
// cycle passes NO retry (immediate skip), so only the push path waits.
|
||||||
|
describe('bounded acquire-retry (push path)', () => {
|
||||||
|
const retry = { timeoutMs: 5_000, baseMs: 100, maxMs: 500 };
|
||||||
|
|
||||||
|
it('retries the acquire and SUCCEEDS when the lock is briefly held then released', async () => {
|
||||||
|
const { service, redis } = build();
|
||||||
|
// First acquire attempt fails (lock briefly held by a cycle), the next
|
||||||
|
// succeeds — the bounded retry must turn this into a SUCCESS, not a skip.
|
||||||
|
redis.set
|
||||||
|
.mockResolvedValueOnce(null) // attempt 1: held
|
||||||
|
.mockResolvedValueOnce(null) // attempt 2: still held
|
||||||
|
.mockResolvedValue('OK'); // attempt 3+: released -> acquired
|
||||||
|
const fn = jest.fn(async () => 'pushed');
|
||||||
|
|
||||||
|
const result = await service.withSpaceLock('space-1', fn, {
|
||||||
|
acquireRetry: retry,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toBe('pushed');
|
||||||
|
expect(fn).toHaveBeenCalledTimes(1);
|
||||||
|
expect(redis.set.mock.calls.length).toBeGreaterThanOrEqual(3);
|
||||||
|
// The acquired lock is released in finally (DEL-CAS eval).
|
||||||
|
expect(redis.eval).toHaveBeenCalledTimes(1);
|
||||||
|
expect(redis.eval.mock.calls[0][0]).toContain('del');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('still skips (lock-held) after the bound when the lock stays stuck — and never runs fn', async () => {
|
||||||
|
const { service, redis } = build();
|
||||||
|
redis.set.mockResolvedValue(null); // permanently held
|
||||||
|
const fn = jest.fn(async () => 'pushed');
|
||||||
|
|
||||||
|
const result = await service.withSpaceLock('space-1', fn, {
|
||||||
|
acquireRetry: { timeoutMs: 300, baseMs: 50, maxMs: 100 },
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({ skipped: 'lock-held' });
|
||||||
|
expect(fn).not.toHaveBeenCalled();
|
||||||
|
// It retried more than once before giving up (bound > one interval).
|
||||||
|
expect(redis.set.mock.calls.length).toBeGreaterThan(1);
|
||||||
|
// Never acquired -> never released.
|
||||||
|
expect(redis.eval).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('without acquireRetry (poll path) a held lock skips IMMEDIATELY (single attempt)', async () => {
|
||||||
|
const { service, redis } = build();
|
||||||
|
redis.set.mockResolvedValue(null);
|
||||||
|
const fn = jest.fn(async () => 'cycle');
|
||||||
|
|
||||||
|
const result = await service.withSpaceLock('space-1', fn);
|
||||||
|
|
||||||
|
expect(result).toEqual({ skipped: 'lock-held' });
|
||||||
|
expect(redis.set).toHaveBeenCalledTimes(1); // no retry
|
||||||
|
expect(fn).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('fn throwing', () => {
|
describe('fn throwing', () => {
|
||||||
it('propagates the throw AND still releases (eval) in finally', async () => {
|
it('propagates the throw AND still releases (eval) in finally', async () => {
|
||||||
const { service, redis } = build();
|
const { service, redis } = build();
|
||||||
|
|||||||
@@ -120,41 +120,57 @@ export class SpaceLockService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run `fn` under the per-space lock: the in-process mutex (no overlapping
|
* Options for `withSpaceLock`. `acquireRetry` (PUSH path only) bounds a
|
||||||
* cycles on this instance) AND the Redis leader lock (single writer across
|
* retry-acquire loop: if the lock cannot be entered on the first try, keep
|
||||||
* replicas). Returns `fn`'s result, or a skip sentinel when the lock could not
|
* retrying with a capped exponential backoff until `timeoutMs` elapses before
|
||||||
* be acquired — `{ skipped: 'in-progress' }` (this instance is mid-cycle) or
|
* returning the skip sentinel. The poll cycle holds the lock while it
|
||||||
* `{ skipped: 'lock-held' }` (another replica holds the Redis lock). The mutex
|
* processes a whole space, so a legitimate external push that briefly overlaps
|
||||||
* + Redis lock are always released in a `finally`, even when `fn` throws (the
|
* a cycle should WAIT a moment rather than immediately 503 (bug: ~60% of
|
||||||
* throw propagates to the caller). This is the single reusable wrapper shared
|
* pushes 503'd under continuous polling). The poll cycle passes NO retry (it
|
||||||
* by `runOnce` (the poll/admin cycle) and `ingestExternalPush` (a push from a
|
* just skips and the next tick reconciles).
|
||||||
* git client over HTTP) so both serialize against each other identically.
|
|
||||||
*/
|
*/
|
||||||
async withSpaceLock<T>(
|
async withSpaceLock<T>(
|
||||||
spaceId: string,
|
spaceId: string,
|
||||||
fn: (signal: AbortSignal) => Promise<T>,
|
fn: (signal: AbortSignal) => Promise<T>,
|
||||||
|
options?: {
|
||||||
|
acquireRetry?: { timeoutMs: number; baseMs: number; maxMs: number };
|
||||||
|
},
|
||||||
): Promise<T | { skipped: 'lock-held' | 'in-progress' }> {
|
): Promise<T | { skipped: 'lock-held' | 'in-progress' }> {
|
||||||
if (this.running.has(spaceId)) {
|
const retry = options?.acquireRetry;
|
||||||
return { skipped: 'in-progress' };
|
// Deadline for the bounded retry-acquire (push path). `Date.now()` once so a
|
||||||
}
|
// slow first attempt does not over-extend the budget.
|
||||||
// Cross-instance, same-process single-writer guard: another live holder (a
|
const deadline = retry ? Date.now() + retry.timeoutMs : 0;
|
||||||
// different SpaceLockService in this process) is mid-cycle for this space.
|
let attempt = 0;
|
||||||
// This survives a swallowed heartbeat / Redis TTL lapse, so a second writer
|
for (;;) {
|
||||||
// in the process cannot race the working tree — it is rejected 'lock-held'.
|
// Reserve the in-process slot synchronously (before any await) so two
|
||||||
if (SpaceLockService.liveLocks.has(spaceId)) {
|
// concurrent same-space calls on THIS instance cannot both pass the guard
|
||||||
return { skipped: 'lock-held' };
|
// and race acquire(). On any failure this is released before we retry/skip.
|
||||||
}
|
const reservation = this.tryReserveInProcess(spaceId);
|
||||||
// Reserve the in-process slot synchronously (before any await) so two
|
if (reservation) {
|
||||||
// concurrent same-space calls on THIS instance cannot both pass the guard and
|
// Could not even reserve in-process (this instance mid-cycle, or another
|
||||||
// race acquire(). Redis NX is already authoritative across replicas; this just
|
// live holder in the process). Retry within the bound, else skip.
|
||||||
// closes the in-process TOCTOU window. Released in the outer finally on every
|
if (retry && Date.now() < deadline) {
|
||||||
// path (acquire-failure, fn-throw, normal completion).
|
await this.sleep(this.nextBackoff(attempt++, retry, deadline));
|
||||||
this.running.add(spaceId);
|
continue;
|
||||||
SpaceLockService.liveLocks.set(spaceId, this.instanceId);
|
}
|
||||||
try {
|
return reservation;
|
||||||
if (!(await this.acquire(spaceId))) {
|
}
|
||||||
|
// Reserved in-process — now contend for the Redis leader lock. Release the
|
||||||
|
// in-process slot on EVERY non-running path so a retry/skip leaves no leak.
|
||||||
|
let acquired = false;
|
||||||
|
try {
|
||||||
|
acquired = await this.acquire(spaceId);
|
||||||
|
} finally {
|
||||||
|
if (!acquired) this.releaseInProcess(spaceId);
|
||||||
|
}
|
||||||
|
if (!acquired) {
|
||||||
|
if (retry && Date.now() < deadline) {
|
||||||
|
await this.sleep(this.nextBackoff(attempt++, retry, deadline));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
return { skipped: 'lock-held' };
|
return { skipped: 'lock-held' };
|
||||||
}
|
}
|
||||||
|
// Both locks held — run `fn` under the heartbeat, releasing in `finally`.
|
||||||
// Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the
|
// Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the
|
||||||
// protected fn can stop instead of writing blind after our lock lapsed.
|
// protected fn can stop instead of writing blind after our lock lapsed.
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
@@ -172,10 +188,64 @@ export class SpaceLockService {
|
|||||||
} finally {
|
} finally {
|
||||||
clearInterval(heartbeat);
|
clearInterval(heartbeat);
|
||||||
await this.release(spaceId);
|
await this.release(spaceId);
|
||||||
|
this.releaseInProcess(spaceId);
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
this.running.delete(spaceId);
|
|
||||||
SpaceLockService.liveLocks.delete(spaceId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronously try to reserve the in-process single-writer slot for a space.
|
||||||
|
* Returns a skip sentinel when another holder is live (this instance mid-cycle
|
||||||
|
* -> 'in-progress'; another SpaceLockService in this process -> 'lock-held'),
|
||||||
|
* or `null` when the slot was reserved (caller MUST `releaseInProcess` later).
|
||||||
|
* Both checks + the reservation happen with NO await between them so two
|
||||||
|
* concurrent same-space calls cannot both pass.
|
||||||
|
*/
|
||||||
|
private tryReserveInProcess(
|
||||||
|
spaceId: string,
|
||||||
|
): { skipped: 'lock-held' | 'in-progress' } | null {
|
||||||
|
if (this.running.has(spaceId)) {
|
||||||
|
return { skipped: 'in-progress' };
|
||||||
|
}
|
||||||
|
// Cross-instance, same-process single-writer guard: another live holder (a
|
||||||
|
// different SpaceLockService in this process) is mid-cycle for this space.
|
||||||
|
// This survives a swallowed heartbeat / Redis TTL lapse, so a second writer
|
||||||
|
// in the process cannot race the working tree — it is rejected 'lock-held'.
|
||||||
|
if (SpaceLockService.liveLocks.has(spaceId)) {
|
||||||
|
return { skipped: 'lock-held' };
|
||||||
|
}
|
||||||
|
this.running.add(spaceId);
|
||||||
|
SpaceLockService.liveLocks.set(spaceId, this.instanceId);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Release the in-process single-writer slot reserved by tryReserveInProcess. */
|
||||||
|
private releaseInProcess(spaceId: string): void {
|
||||||
|
this.running.delete(spaceId);
|
||||||
|
SpaceLockService.liveLocks.delete(spaceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Backoff (ms) before the next push lock-acquire attempt: capped exponential
|
||||||
|
* (`baseMs * 2^attempt`, ceilinged at `maxMs`) clamped so it never overshoots
|
||||||
|
* the retry `deadline`. Deterministic (no jitter) so the bound is testable.
|
||||||
|
*/
|
||||||
|
private nextBackoff(
|
||||||
|
attempt: number,
|
||||||
|
retry: { baseMs: number; maxMs: number },
|
||||||
|
deadline: number,
|
||||||
|
): number {
|
||||||
|
const exp = retry.baseMs * 2 ** attempt;
|
||||||
|
const capped = Math.min(exp, retry.maxMs);
|
||||||
|
const remaining = Math.max(0, deadline - Date.now());
|
||||||
|
return Math.max(0, Math.min(capped, remaining));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Promise-based delay (extracted so tests can reason about the retry loop). */
|
||||||
|
private sleep(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const t = setTimeout(resolve, ms);
|
||||||
|
t.unref?.();
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -220,6 +220,13 @@ export class VaultGit {
|
|||||||
// that core.autocrlf=false does not cover). POSIX-only path, which is
|
// that core.autocrlf=false does not cover). POSIX-only path, which is
|
||||||
// fine: the daemon runs on Linux (Docker) / macOS. A system
|
// fine: the daemon runs on Linux (Docker) / macOS. A system
|
||||||
// /etc/gitattributes remains the host admin's domain (out of scope).
|
// /etc/gitattributes remains the host admin's domain (out of scope).
|
||||||
|
// - merge.conflictStyle=merge — CRITICAL (SPEC §9, conflict-marker leak):
|
||||||
|
// a global `merge.conflictStyle=diff3`/`zdiff3` makes a conflicting merge
|
||||||
|
// emit an EXTRA `|||||||` base-marker section. The conflict-marker
|
||||||
|
// scrub on the push side (`stripConflictMarkers`) handles `|||||||` too,
|
||||||
|
// but pinning the classic `merge` style keeps the markers the engine
|
||||||
|
// produces to the canonical three (`<<<<<<<`/`=======`/`>>>>>>>`) so
|
||||||
|
// behavior is deterministic regardless of the operator's global config.
|
||||||
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
|
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
|
||||||
// human running git by hand in the vault must inherit the same neutralized
|
// human running git by hand in the vault must inherit the same neutralized
|
||||||
// behavior; a transient `-c` would not persist. (core.quotepath, by
|
// behavior; a transient `-c` would not persist. (core.quotepath, by
|
||||||
@@ -230,6 +237,7 @@ export class VaultGit {
|
|||||||
await this.run(["config", "core.safecrlf", "false"]);
|
await this.run(["config", "core.safecrlf", "false"]);
|
||||||
await this.run(["config", "commit.gpgsign", "false"]);
|
await this.run(["config", "commit.gpgsign", "false"]);
|
||||||
await this.run(["config", "core.attributesFile", "/dev/null"]);
|
await this.run(["config", "core.attributesFile", "/dev/null"]);
|
||||||
|
await this.run(["config", "merge.conflictStyle", "merge"]);
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const detail = err instanceof Error ? err.message : String(err);
|
const detail = err instanceof Error ? err.message : String(err);
|
||||||
throw new Error(
|
throw new Error(
|
||||||
|
|||||||
@@ -699,19 +699,39 @@ export async function applyPushActions(
|
|||||||
});
|
});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
const conflicted = hasConflictMarkers(rawBody);
|
||||||
const body = stripConflictMarkers(rawBody);
|
const body = stripConflictMarkers(rawBody);
|
||||||
// The last-synced version of this file (pre-image) is the common ancestor
|
// The last-synced version of this file (pre-image) is the common ancestor
|
||||||
// for a 3-way merge against the live page, so concurrent human edits are
|
// for a 3-way merge against the live page, so concurrent human edits are
|
||||||
// not clobbered (review #5). Null when the file is new at last-pushed. Its
|
// not clobbered (review #5). Null when the file is new at last-pushed. Its
|
||||||
// body is stripped the SAME way so the merge compares body-to-body.
|
// body is stripped the SAME way (frontmatter AND conflict markers) so the
|
||||||
|
// merge compares clean body-to-body: a base that itself carried markers
|
||||||
|
// (from a prior conflict commit) must never reintroduce marker syntax or a
|
||||||
|
// stale diff3 base region into the 3-way merge.
|
||||||
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
|
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
|
||||||
const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body;
|
const baseMarkdown =
|
||||||
|
baseFull === null
|
||||||
|
? null
|
||||||
|
: stripConflictMarkers(parsePageFile(baseFull).body);
|
||||||
const result = await client.importPageMarkdown(
|
const result = await client.importPageMarkdown(
|
||||||
u.pageId,
|
u.pageId,
|
||||||
body,
|
body,
|
||||||
baseMarkdown,
|
baseMarkdown,
|
||||||
);
|
);
|
||||||
updated++;
|
updated++;
|
||||||
|
// CONFLICT VAULT-CLEAN (autoMergeConflicts ON, SPEC §9 marker leak). On ON
|
||||||
|
// a conflicted page is auto-merged INTO Docmost (the clean `body` above),
|
||||||
|
// but the file on `main` still carries the raw `<<<<<<<`/`>>>>>>>` markers
|
||||||
|
// the pull-side `commitMerge` committed. Left as-is they would (1) stay in
|
||||||
|
// the PUBLISHED vault forever (external clones see raw markers) and (2)
|
||||||
|
// re-conflict every cycle. So write the CLEAN body back to the vault file
|
||||||
|
// and record it in `writtenBack` — `runPush` step 7a commits it on `main`
|
||||||
|
// and re-advances the refs, so the published vault converges to the merged
|
||||||
|
// content. Only conflicted files are rewritten (no churn for clean updates).
|
||||||
|
if (conflicted) {
|
||||||
|
await deps.writeFile(u.path, serializePageFile(u.pageId, body));
|
||||||
|
writtenBack.push({ path: u.path, pageId: u.pageId });
|
||||||
|
}
|
||||||
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
|
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
|
||||||
pushed.push({
|
pushed.push({
|
||||||
pageId: u.pageId,
|
pageId: u.pageId,
|
||||||
@@ -1083,13 +1103,23 @@ export function isPageFile(path: string): boolean {
|
|||||||
* Docmost). A body is treated as conflicted only when it carries BOTH a begin
|
* Docmost). A body is treated as conflicted only when it carries BOTH a begin
|
||||||
* (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext
|
* (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext
|
||||||
* heading underline (`=======`) is not mistaken for a conflict. When conflicted,
|
* heading underline (`=======`) is not mistaken for a conflict. When conflicted,
|
||||||
* the three marker line types are removed while BOTH sides' content is preserved
|
* every marker line type is removed while the human-visible content is preserved
|
||||||
* (no data loss): the marker SYNTAX never reaches Docmost, but the human's content
|
* (no data loss): the marker SYNTAX never reaches Docmost, but the content does —
|
||||||
* does — where the conflict is visible and fixable rather than silently dropped.
|
* where the conflict is visible and fixable rather than silently dropped.
|
||||||
|
*
|
||||||
|
* `diff3`/`zdiff3` style: a conflict in that style adds a `|||||||` base section
|
||||||
|
* (`|||||||` line + the merge-BASE content + `=======`). `ensureRepo` pins
|
||||||
|
* `merge.conflictStyle=merge` so the engine never produces it, but a vault that
|
||||||
|
* predates the pin — or content arriving via an external push that a human
|
||||||
|
* committed in diff3 style — could still carry it. So we ALSO recognize the
|
||||||
|
* `|||||||` marker and DROP the stale base region it introduces (between
|
||||||
|
* `|||||||` and `=======`): the base text is neither side's current content, so
|
||||||
|
* keeping it would inject obsolete lines AND leak a raw `|||||||` marker.
|
||||||
*/
|
*/
|
||||||
const CONFLICT_BEGIN_RE = /^<{7}/m;
|
const CONFLICT_BEGIN_RE = /^<{7}/m;
|
||||||
const CONFLICT_END_RE = /^>{7}/m;
|
const CONFLICT_END_RE = /^>{7}/m;
|
||||||
const CONFLICT_BEGIN_LINE_RE = /^<{7}/;
|
const CONFLICT_BEGIN_LINE_RE = /^<{7}/;
|
||||||
|
const CONFLICT_BASE_LINE_RE = /^\|{7}/;
|
||||||
const CONFLICT_SEP_LINE_RE = /^={7}/;
|
const CONFLICT_SEP_LINE_RE = /^={7}/;
|
||||||
const CONFLICT_END_LINE_RE = /^>{7}/;
|
const CONFLICT_END_LINE_RE = /^>{7}/;
|
||||||
|
|
||||||
@@ -1099,23 +1129,37 @@ export function hasConflictMarkers(body: string): boolean {
|
|||||||
|
|
||||||
function stripConflictMarkers(body: string): string {
|
function stripConflictMarkers(body: string): string {
|
||||||
if (!hasConflictMarkers(body)) return body;
|
if (!hasConflictMarkers(body)) return body;
|
||||||
// Remove ONLY the three marker line types, and treat a `=======` line as a
|
// Track where we are inside a conflict block so a `=======` line is treated as
|
||||||
// conflict separator ONLY when we are between a `<<<<<<<` begin and a `>>>>>>>`
|
// a conflict separator ONLY between a `<<<<<<<` begin and a `>>>>>>>` end — a
|
||||||
// end — so a legitimate Markdown setext heading underline (`=======`) outside a
|
// legitimate Markdown setext heading underline (`=======`) outside a conflict
|
||||||
// conflict block is preserved (review finding). Both conflict sides' content is
|
// block is preserved (review finding). State machine over the block:
|
||||||
// kept; only the marker SYNTAX is dropped.
|
// 'no' — outside any conflict block.
|
||||||
let inBlock = false;
|
// 'ours' — after `<<<<<<<`, before `|||||||`/`=======` (our side: KEEP).
|
||||||
|
// 'base' — after `|||||||`, before `=======` (diff3 base region: DROP).
|
||||||
|
// 'theirs' — after `=======`, before `>>>>>>>` (their side: KEEP).
|
||||||
|
// Every marker LINE itself is dropped; only the base region's content is also
|
||||||
|
// dropped (it is stale and not part of either current side).
|
||||||
|
let state: "no" | "ours" | "base" | "theirs" = "no";
|
||||||
const out: string[] = [];
|
const out: string[] = [];
|
||||||
for (const line of body.split("\n")) {
|
for (const line of body.split("\n")) {
|
||||||
if (CONFLICT_BEGIN_LINE_RE.test(line)) {
|
if (CONFLICT_BEGIN_LINE_RE.test(line)) {
|
||||||
inBlock = true;
|
state = "ours";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (CONFLICT_END_LINE_RE.test(line)) {
|
if (state !== "no" && CONFLICT_END_LINE_RE.test(line)) {
|
||||||
inBlock = false;
|
state = "no";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (inBlock && CONFLICT_SEP_LINE_RE.test(line)) {
|
if (state === "ours" && CONFLICT_BASE_LINE_RE.test(line)) {
|
||||||
|
state = "base";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((state === "ours" || state === "base") && CONFLICT_SEP_LINE_RE.test(line)) {
|
||||||
|
state = "theirs";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Drop the diff3 base region's content (stale, neither current side).
|
||||||
|
if (state === "base") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
out.push(line);
|
out.push(line);
|
||||||
|
|||||||
@@ -162,6 +162,10 @@ describe('VaultGit (integration; temp repo)', () => {
|
|||||||
expect(await localConfig('commit.gpgsign')).toBe('false');
|
expect(await localConfig('commit.gpgsign')).toBe('false');
|
||||||
expect(await localConfig('core.safecrlf')).toBe('false');
|
expect(await localConfig('core.safecrlf')).toBe('false');
|
||||||
expect(await localConfig('core.attributesFile')).toBe('/dev/null');
|
expect(await localConfig('core.attributesFile')).toBe('/dev/null');
|
||||||
|
// merge.conflictStyle=merge keeps conflict markers to the canonical three
|
||||||
|
// (no diff3 `|||||||` base section) regardless of the operator's global
|
||||||
|
// config (bug #2 marker-leak determinism, SPEC §9).
|
||||||
|
expect(await localConfig('merge.conflictStyle')).toBe('merge');
|
||||||
|
|
||||||
// Idempotent: a second run leaves the same single values (no duplicates).
|
// Idempotent: a second run leaves the same single values (no duplicates).
|
||||||
await git.ensureRepo();
|
await git.ensureRepo();
|
||||||
|
|||||||
@@ -145,6 +145,79 @@ describe('#13 conflict markers reach Docmost', () => {
|
|||||||
expect(pushedBody).toContain('their line');
|
expect(pushedBody).toContain('their line');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('autoMergeConflicts on: rewrites the vault file with the CLEAN body so raw markers do not stay in the published vault (bug #2 marker-leak)', async () => {
|
||||||
|
// Previously the UPDATE path stripped markers for the body SENT to Docmost but
|
||||||
|
// left the file on `main` carrying raw `<<<<<<<`/`>>>>>>>` forever — the
|
||||||
|
// published vault external clients clone kept the markers and the page
|
||||||
|
// re-conflicted every cycle. The fix writes the cleaned body back + records it
|
||||||
|
// in writtenBack so runPush commits it on `main`.
|
||||||
|
const { deps, importPageMarkdown } = makeConflictDeps({
|
||||||
|
...makeSettings(),
|
||||||
|
autoMergeConflicts: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const res = await runPush(deps, { dryRun: false });
|
||||||
|
expect(res.mode).toBe('apply');
|
||||||
|
|
||||||
|
// The clean body was imported into Docmost (no markers).
|
||||||
|
const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any;
|
||||||
|
expect(pushedBody).not.toMatch(/[<>=]{7}/);
|
||||||
|
|
||||||
|
// The vault file was rewritten with the cleaned content (no raw markers).
|
||||||
|
const writeCalls = (deps.writeFile as any).mock.calls as [string, string][];
|
||||||
|
const docWrite = writeCalls.find(([p]) => p === 'Doc.md');
|
||||||
|
expect(docWrite).toBeDefined();
|
||||||
|
expect(docWrite![1]).not.toMatch(/[<>=]{7}/);
|
||||||
|
expect(docWrite![1]).toContain('my line');
|
||||||
|
expect(docWrite![1]).toContain('their line');
|
||||||
|
|
||||||
|
// It is recorded for the follow-up commit so `main` converges to clean bytes.
|
||||||
|
expect(res.applied?.writtenBack).toEqual(
|
||||||
|
expect.arrayContaining([
|
||||||
|
expect.objectContaining({ path: 'Doc.md', pageId: 'p-1' }),
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('autoMergeConflicts on: strips diff3-style ||||||| base markers + base content (defense-in-depth)', async () => {
|
||||||
|
// A vault created before `merge.conflictStyle=merge` was pinned (or content a
|
||||||
|
// human committed in diff3 style) can carry a `||||||| base` section. The
|
||||||
|
// scrub must drop the `|||||||` marker AND the stale base region, keeping only
|
||||||
|
// the two live sides — otherwise `|||||||` + obsolete base lines leak into the
|
||||||
|
// Docmost page.
|
||||||
|
const diff3Body =
|
||||||
|
'<<<<<<< HEAD\nmy line\n||||||| base\nold base line\n=======\ntheir line\n>>>>>>> feature\n';
|
||||||
|
const file = serializePageFile('p-1', diff3Body);
|
||||||
|
const { git } = makePushGit({ changes: [{ status: 'M', path: 'Doc.md' }] });
|
||||||
|
const importPageMarkdown = vi.fn(async () => ({ success: true }));
|
||||||
|
const client = {
|
||||||
|
listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
|
||||||
|
importPageMarkdown,
|
||||||
|
createPage: vi.fn(),
|
||||||
|
deletePage: vi.fn(),
|
||||||
|
movePage: vi.fn(),
|
||||||
|
renamePage: vi.fn(),
|
||||||
|
};
|
||||||
|
const deps: PushDeps = {
|
||||||
|
settings: { ...makeSettings(), autoMergeConflicts: true },
|
||||||
|
git,
|
||||||
|
makeClient: () => client as any,
|
||||||
|
readFile: vi.fn(async (p: string) => {
|
||||||
|
if (p === 'Doc.md') return file;
|
||||||
|
throw new Error(`no such file: ${p}`);
|
||||||
|
}),
|
||||||
|
writeFile: vi.fn(async () => {}),
|
||||||
|
log: () => {},
|
||||||
|
};
|
||||||
|
|
||||||
|
await runPush(deps, { dryRun: false });
|
||||||
|
const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any;
|
||||||
|
expect(pushedBody).not.toContain('|||||||');
|
||||||
|
expect(pushedBody).not.toContain('old base line'); // stale base dropped
|
||||||
|
expect(pushedBody).toContain('my line');
|
||||||
|
expect(pushedBody).toContain('their line');
|
||||||
|
});
|
||||||
|
|
||||||
it('CREATE branch (autoMergeConflicts off): does NOT create a page from a conflicted NEW file; records a create failure', async () => {
|
it('CREATE branch (autoMergeConflicts off): does NOT create a page from a conflicted NEW file; records a create failure', async () => {
|
||||||
// The conflict-markers guard is DUPLICATED on the CREATE path (a brand-new
|
// The conflict-markers guard is DUPLICATED on the CREATE path (a brand-new
|
||||||
// .md with NO gitmost_id, status 'A') and was previously untested — only the
|
// .md with NO gitmost_id, status 'A') and was previously untested — only the
|
||||||
|
|||||||
Reference in New Issue
Block a user