A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML <img> (like video/diagrams) when it carries layout attrs; bare images stay . Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
233 lines
7.7 KiB
TypeScript
233 lines
7.7 KiB
TypeScript
/**
|
|
* Pure block-level THREE-WAY merge planner (diff3) over arrays of opaque block
|
|
* keys. Used by the git-sync body write to merge an incoming git body into the
|
|
* live page using the last-synced version as the common ancestor (review #5):
|
|
*
|
|
* - a block only the human changed (live != base, git == base) -> keep LIVE
|
|
* - a block only git changed (git != base, live == base) -> take GIT
|
|
* - a block both sides changed (a real conflict) -> GIT wins
|
|
* - inserts/deletes from either side are preserved when unambiguous
|
|
*
|
|
* Content-agnostic: it works on string keys and returns the merged block order as
|
|
* picks ({ src: 'live'|'target', index }) — the caller (the Yjs applier)
|
|
* materializes them — so the whole algorithm is unit-testable on plain arrays.
|
|
*
|
|
* Algorithm: anchor on base blocks present (unchanged) in BOTH live and target
|
|
* (their LCS-with-base intersection). Between consecutive anchors lies one region
|
|
* the human and/or git rewrote; resolve each region three-way. Stable anchor
|
|
* blocks are emitted from LIVE so the applier keeps the existing Yjs block
|
|
* instances (and the human's in-flight edits) in place.
|
|
*/
|
|
|
|
import { buildLcsTable } from './lcs';
|
|
|
|
/** Matched index pairs of the longest common subsequence of `a` and `b`. */
|
|
function lcsPairs(a: string[], b: string[]): Array<[number, number]> {
|
|
const n = a.length;
|
|
const m = b.length;
|
|
const dp = buildLcsTable(a, b);
|
|
const pairs: Array<[number, number]> = [];
|
|
let i = 0;
|
|
let j = 0;
|
|
while (i < n && j < m) {
|
|
if (a[i] === b[j]) {
|
|
pairs.push([i, j]);
|
|
i++;
|
|
j++;
|
|
} else if (dp[i + 1][j] >= dp[i][j + 1]) {
|
|
i++;
|
|
} else {
|
|
j++;
|
|
}
|
|
}
|
|
return pairs;
|
|
}
|
|
|
|
/** o-index -> matched index in the other side (only for LCS-matched blocks). */
|
|
function matchMap(pairs: Array<[number, number]>): Map<number, number> {
|
|
const m = new Map<number, number>();
|
|
for (const [o, x] of pairs) m.set(o, x);
|
|
return m;
|
|
}
|
|
|
|
/**
|
|
* One change `side` made to `base` within a region: base blocks `[oStart,oEnd)`
|
|
* were replaced by the side's blocks listed in `content` (region-local indices).
|
|
* A pure insert has `oStart === oEnd`; a pure delete has empty `content`.
|
|
*/
|
|
interface Hunk {
|
|
oStart: number;
|
|
oEnd: number;
|
|
content: number[];
|
|
}
|
|
|
|
/**
|
|
* Diff `o` against one side as a list of non-overlapping hunks (the base spans
|
|
* the side rewrote/inserted/deleted), derived from their LCS alignment.
|
|
*/
|
|
function buildHunks(o: string[], side: string[]): Hunk[] {
|
|
const pairs = lcsPairs(o, side); // [oIdx, sideIdx] kept (unchanged) blocks
|
|
const hunks: Hunk[] = [];
|
|
let prevO = -1;
|
|
let prevS = -1;
|
|
const flush = (curO: number, curS: number): void => {
|
|
const oStart = prevO + 1;
|
|
const oEnd = curO;
|
|
const content: number[] = [];
|
|
for (let s = prevS + 1; s < curS; s++) content.push(s);
|
|
if (oEnd > oStart || content.length > 0) hunks.push({ oStart, oEnd, content });
|
|
};
|
|
for (const [oIdx, sIdx] of pairs) {
|
|
flush(oIdx, sIdx);
|
|
prevO = oIdx;
|
|
prevS = sIdx;
|
|
}
|
|
flush(o.length, side.length);
|
|
return hunks;
|
|
}
|
|
|
|
/**
|
|
* Do two hunks (one per side) touch the same base region? Pure inserts only
|
|
* collide when nested strictly inside the other hunk's base span (or, for two
|
|
* inserts, at the same gap); changes sitting at a shared boundary do not.
|
|
*/
|
|
function hunksOverlap(a: Hunk, b: Hunk): boolean {
|
|
const aIns = a.oStart === a.oEnd;
|
|
const bIns = b.oStart === b.oEnd;
|
|
if (aIns && bIns) return a.oStart === b.oStart;
|
|
if (aIns) return b.oStart < a.oStart && a.oStart < b.oEnd;
|
|
if (bIns) return a.oStart < b.oStart && b.oStart < a.oEnd;
|
|
return Math.max(a.oStart, b.oStart) < Math.min(a.oEnd, b.oEnd);
|
|
}
|
|
|
|
interface LocalPick {
|
|
src: 'live' | 'target';
|
|
local: number;
|
|
}
|
|
|
|
/**
|
|
* Fine-grained three-way merge of ONE inter-anchor region. Combines the human's
|
|
* and git's NON-overlapping hunks (e.g. a human edit to one block plus a git
|
|
* insert/delete of OTHER blocks in the same region) so neither change is lost.
|
|
* Returns the merged region as region-local picks, or `null` when the two sides
|
|
* changed the SAME base block — a genuine conflict the caller resolves by the
|
|
* original all-or-nothing rule (git wins the whole region).
|
|
*/
|
|
function tryMergeRegion(
|
|
o: string[],
|
|
a: string[],
|
|
b: string[],
|
|
): LocalPick[] | null {
|
|
const aHunks = buildHunks(o, a);
|
|
const bHunks = buildHunks(o, b);
|
|
|
|
// Any overlap between a human hunk and a git hunk is a real conflict; bail so
|
|
// the caller falls back to git-wins (preserving the original behavior).
|
|
for (const ah of aHunks) {
|
|
for (const bh of bHunks) {
|
|
if (hunksOverlap(ah, bh)) return null;
|
|
}
|
|
}
|
|
|
|
// Disjoint: live index of each base block that BOTH sides kept (stable).
|
|
const aKept = matchMap(lcsPairs(o, a)); // base index -> live index
|
|
|
|
const out: LocalPick[] = [];
|
|
let pa = 0;
|
|
let pb = 0;
|
|
let oi = 0;
|
|
while (oi < o.length || pa < aHunks.length || pb < bHunks.length) {
|
|
const ah = pa < aHunks.length ? aHunks[pa] : null;
|
|
const bh = pb < bHunks.length ? bHunks[pb] : null;
|
|
const nextStart = Math.min(
|
|
ah ? ah.oStart : o.length,
|
|
bh ? bh.oStart : o.length,
|
|
);
|
|
|
|
// Emit stable base blocks (kept by both) until the next hunk, from LIVE.
|
|
while (oi < nextStart) {
|
|
out.push({ src: 'live', local: aKept.get(oi) as number });
|
|
oi++;
|
|
}
|
|
if (!ah && !bh) break;
|
|
|
|
// Apply the hunk at oi. When both sides act here they are disjoint, so the
|
|
// pure-insert (oEnd === oi) is emitted before the side that consumes base oi.
|
|
const aHere = ah !== null && ah.oStart === oi;
|
|
const bHere = bh !== null && bh.oStart === oi;
|
|
let useA: boolean;
|
|
if (aHere && bHere) {
|
|
useA = ah!.oEnd === oi; // insert side first; otherwise either order is fine
|
|
} else {
|
|
useA = aHere;
|
|
}
|
|
const h = (useA ? ah : bh) as Hunk;
|
|
const src: 'live' | 'target' = useA ? 'live' : 'target';
|
|
for (const idx of h.content) out.push({ src, local: idx });
|
|
oi = h.oEnd;
|
|
if (useA) pa++;
|
|
else pb++;
|
|
}
|
|
return out;
|
|
}
|
|
|
|
export interface Pick {
|
|
src: 'live' | 'target';
|
|
index: number;
|
|
}
|
|
|
|
/**
|
|
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
|
|
* Returns the merged block order as picks from live/target.
|
|
*/
|
|
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
|
const oToA = matchMap(lcsPairs(o, a));
|
|
const oToB = matchMap(lcsPairs(o, b));
|
|
|
|
const res: Pick[] = [];
|
|
let oi = 0;
|
|
let ai = 0;
|
|
let bi = 0;
|
|
|
|
for (;;) {
|
|
// Next anchor: a base block present (unchanged) in BOTH live and target.
|
|
let anchor = oi;
|
|
while (anchor < o.length && !(oToA.has(anchor) && oToB.has(anchor))) {
|
|
anchor++;
|
|
}
|
|
const aEnd = anchor < o.length ? (oToA.get(anchor) as number) : a.length;
|
|
const bEnd = anchor < o.length ? (oToB.get(anchor) as number) : b.length;
|
|
|
|
// Resolve the region [oi,anchor) that one or both sides rewrote/inserted.
|
|
// Try a fine-grained three-way merge first so a human block-edit survives a
|
|
// git insert/delete of OTHER blocks in the same region; only a genuine
|
|
// same-block conflict (null) falls back to the original git-wins rule.
|
|
const merged = tryMergeRegion(
|
|
o.slice(oi, anchor),
|
|
a.slice(ai, aEnd),
|
|
b.slice(bi, bEnd),
|
|
);
|
|
if (merged) {
|
|
for (const p of merged) {
|
|
res.push(
|
|
p.src === 'live'
|
|
? { src: 'live', index: ai + p.local }
|
|
: { src: 'target', index: bi + p.local },
|
|
);
|
|
}
|
|
} else {
|
|
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
|
|
}
|
|
|
|
if (anchor >= o.length) break;
|
|
|
|
// Emit the stable anchor block from LIVE, then advance past it on all sides.
|
|
res.push({ src: 'live', index: aEnd });
|
|
ai = aEnd + 1;
|
|
bi = bEnd + 1;
|
|
oi = anchor + 1;
|
|
}
|
|
|
|
return res;
|
|
}
|