From 857a0064f714a61192a9b1f3c701e005863890f3 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Fri, 26 Jun 2026 15:00:31 +0300 Subject: [PATCH] =?UTF-8?q?fix(git-sync):=20make=20reconcile=20import=20tr?= =?UTF-8?q?uly=20idempotent=20=E2=80=94=20stop=20runaway=20whole-body=20du?= =?UTF-8?q?plication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The live Yjs document materializes the editor-schema default `indent: 0` on every paragraph/heading (and on the paragraph inside every list item, callout, and table cell), but a body re-imported from git — parsed from clean markdown — carries no indent attribute. So every live block's merge key differed from the same block coming back from git: the three-way merge could anchor on nothing, and the trailing unit that git's export already contained (but the merge could not match against the byte-identical live tail) was re-appended on every reconcile cycle. Each grown export then diverged from the last-pushed base by one more unit — a self-sustaining, unbounded whole-body duplication loop with no client connected. The prior fix (0c7b73f7) excluded the volatile block `id` from the key, which was necessary but not sufficient: `indent: 0` is a CONTENT attribute the editor stamps as a default, so it was never stripped. Normalize editor-materialized schema defaults (`indent: 0`) out of the block key — only the default value, so a genuine `indent: 2` still diffs and lands — so a live block compares equal to its git-round-tripped twin and the resync is a true no-op. Regression test (yjs-body-merge.idempotency.spec.ts) encodes the invariant on a body of byte-identical units (heading + paragraph + callout + table with empty cells): a live fragment carrying indent:0 + ids merged against the git-derived fragment (neither) with a stale-by-one base applies 0 ops and does not grow — RED before, GREEN after. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../yjs-body-merge.idempotency.spec.ts | 190 ++++++++++++++++++ .../git-sync/services/yjs-body-merge.ts | 43 +++- 2 files changed, 230 insertions(+), 3 deletions(-) create mode 100644 apps/server/src/integrations/git-sync/services/yjs-body-merge.idempotency.spec.ts diff --git a/apps/server/src/integrations/git-sync/services/yjs-body-merge.idempotency.spec.ts b/apps/server/src/integrations/git-sync/services/yjs-body-merge.idempotency.spec.ts new file mode 100644 index 00000000..03e33377 --- /dev/null +++ b/apps/server/src/integrations/git-sync/services/yjs-body-merge.idempotency.spec.ts @@ -0,0 +1,190 @@ +import * as Y from 'yjs'; + +import { + mergeXmlFragments, + mergeXmlFragments3Way, +} from './yjs-body-merge'; + +/** + * Regression for the HIGH-severity runaway whole-body duplication: a page body + * was RE-APPENDED in full on every git-sync reconcile cycle, unbounded, with NO + * client connected. + * + * ROOT CAUSE (confirmed in-process against the real failing page): the LIVE Yjs + * document materializes the editor-schema default `indent: 0` on every + * paragraph/heading (and on the paragraph inside every list item, callout, and + * table cell), but a body re-imported from git — parsed from clean markdown — + * carries NO indent attribute. So every live block's comparison key differed from + * the same block coming back from git; the three-way merge could anchor on + * NOTHING, and the trailing unit that git's export already contained (but the + * merge could not match against the byte-identical live tail) was re-appended + * each cycle. Each grown export then diverged from the last-pushed base by one + * more unit — a self-sustaining loop. + * + * The fix normalizes the materialized default (`indent: 0`) out of the block key + * (see `DEFAULT_KEY_ATTRS` in yjs-body-merge.ts), so a live block compares equal + * to its git-round-tripped twin and the resync is a true no-op. + * + * These tests model that EXACTLY at the Yjs level: a LIVE fragment whose blocks + * carry `indent: 0` + block ids, versus a git-derived fragment of the SAME + * content with neither — for a body built from BYTE-IDENTICAL units that each + * contain a heading, a paragraph, a callout, and a table with empty cells (the + * trigger). RED before the fix (the merge applies > 0 ops and the body grows), + * GREEN after (0 ops, no growth). + */ + +type Attrs = Record; + +function el(name: string, attrs: Attrs, children: (Y.XmlElement | Y.XmlText)[]) { + const e = new Y.XmlElement(name); + for (const [k, v] of Object.entries(attrs)) e.setAttribute(k, v as string); + if (children.length) e.insert(0, children); + return e; +} + +function text(s: string): Y.XmlText { + const t = new Y.XmlText(); + if (s) t.insert(0, s); + return t; +} + +/** + * One byte-identical content unit (heading / paragraph / callout / table-with- + * empty-cells). `live` toggles the two things that exist ONLY in the live Yjs + * doc and NOT in a git round-trip: the materialized `indent: 0` default and the + * per-block `id`. `n` makes each unit's ids unique (as the editor would stamp) + * while keeping the visible CONTENT byte-identical across units. + */ +function unit(live: boolean, n: number, headingText = 'Big Heading'): Y.XmlElement[] { + const ind: Attrs = live ? { indent: 0 } : {}; + const id = (base: string): Attrs => (live ? { id: `${base}${n}` } : {}); + const para = (attrs: Attrs, s: string) => + el('paragraph', { ...attrs, ...ind }, [text(s)]); + + const cell = (name: string) => + el(name, { colspan: 1, rowspan: 1 }, [para({}, '')]); + + return [ + el('heading', { ...id('h'), level: 1, ...ind }, [text(headingText)]), + para(id('p'), 'Para with the same words'), + el('callout', { type: 'info' }, [para(id('c'), 'CalloutText here')]), + el('table', {}, [ + el('tableRow', {}, [cell('tableHeader'), cell('tableHeader')]), + el('tableRow', {}, [cell('tableCell'), cell('tableCell')]), + ]), + ]; +} + +function fragmentOf(units: Y.XmlElement[][]): { + doc: Y.Doc; + frag: Y.XmlFragment; +} { + const doc = new Y.Doc(); + const frag = doc.getXmlFragment('default'); + const blocks = units.flat(); + if (blocks.length) frag.insert(0, blocks); + return { doc, frag }; +} + +const blockCount = (frag: Y.XmlFragment): number => frag.toArray().length; + +describe('git-sync reconcile import is idempotent (no whole-body duplication)', () => { + const UNITS = 3; + + it('3-way: identical content, live carries indent:0, base stale-by-one -> 0 ops, no growth', () => { + // LIVE: the editor-stamped Yjs doc (indent:0 + ids on every block). + const { doc: liveDoc, frag: live } = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(true, i)), + ); + // INCOMING (git export -> re-import): same content, NO indent / ids. + const { frag: incoming } = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(false, i)), + ); + // BASE = last-pushed file, lagging by ONE unit (the realistic divergence + // that drives the trailing insert-vs-insert). + const { frag: base } = fragmentOf( + Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)), + ); + + const before = blockCount(live); + let applied = -1; + liveDoc.transact(() => { + applied = mergeXmlFragments3Way(live, incoming, base); + }); + + expect(applied).toBe(0); + expect(blockCount(live)).toBe(before); + }); + + it('3-way is a fixpoint across repeated cycles (does not grow)', () => { + const { doc: liveDoc, frag: live } = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(true, i)), + ); + const incomingUnits = () => + fragmentOf(Array.from({ length: UNITS }, (_, i) => unit(false, i))).frag; + const baseUnits = () => + fragmentOf(Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i))) + .frag; + + const before = blockCount(live); + for (let cycle = 0; cycle < 5; cycle++) { + let applied = -1; + liveDoc.transact(() => { + applied = mergeXmlFragments3Way(live, incomingUnits(), baseUnits()); + }); + expect(applied).toBe(0); + expect(blockCount(live)).toBe(before); + } + }); + + it('2-way: identical content, live carries indent:0 -> 0 ops, no growth', () => { + const { doc: liveDoc, frag: live } = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(true, i)), + ); + const { frag: incoming } = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(false, i)), + ); + + const before = blockCount(live); + let applied = -1; + liveDoc.transact(() => { + applied = mergeXmlFragments(live, incoming); + }); + + expect(applied).toBe(0); + expect(blockCount(live)).toBe(before); + }); + + it('does NOT regress real edits: a git change to one block still lands', () => { + const { doc: liveDoc, frag: live } = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(true, i)), + ); + const base = fragmentOf( + Array.from({ length: UNITS }, (_, i) => unit(false, i)), + ).frag; + // git edits the heading text of the LAST unit. + const incoming = fragmentOf( + Array.from({ length: UNITS }, (_, i) => + unit(false, i, i === UNITS - 1 ? 'EDITED Heading' : 'Big Heading'), + ), + ).frag; + + const before = blockCount(live); + liveDoc.transact(() => { + mergeXmlFragments3Way(live, incoming, base); + }); + + // The edit landed, and the body did NOT grow (one block changed in place). + const headings = live + .toArray() + .filter((b) => (b as Y.XmlElement).nodeName === 'heading') + .map((b) => + (b as Y.XmlElement) + .toArray() + .map((c) => (c as Y.XmlText).toString()) + .join(''), + ); + expect(headings).toContain('EDITED Heading'); + expect(blockCount(live)).toBe(before); + }); +}); diff --git a/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts b/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts index db8a8c5c..19493b14 100644 --- a/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts +++ b/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts @@ -58,13 +58,47 @@ type XmlNode = Y.XmlElement | Y.XmlText | Y.XmlHook; */ const VOLATILE_KEY_ATTRS = new Set(['id']); +/** + * Editor-schema attribute DEFAULTS that the live Yjs document MATERIALIZES on + * every block but a git round-trip does NOT carry — so they must be normalized + * out of the block key, otherwise an unchanged block fails to compare equal + * across `DB doc -> markdown (export) -> ProseMirror (re-import)`. + * + * `indent: 0` is the one that bites in practice (HIGH-severity runaway whole-body + * duplication, see below). The editor's indent extension declares + * `indent.default = 0` (`packages/editor-ext/src/lib/indent.ts`), and + * `TiptapTransformer.toYdoc` STAMPS that default onto every `paragraph`/`heading` + * Yjs node — so a body that originated in the UI carries `indent: 0` on every + * block (and on the paragraph inside every list item, callout, and table cell). + * `markdownToProseMirror`, parsing clean markdown, produces NO indent attribute + * (the extension's `renderHTML` even omits it when `<= min`), so a re-imported + * body has `a: {}` where the live body has `a: { indent: 0 }`. + * + * Without this normalization EVERY live block's key differs from the same block + * re-imported from git, so the three-way merge can anchor on NOTHING: the whole + * body becomes one unanchored region, and any trailing unit that git's export + * already contains (but the merge can't match against the identical live tail) + * is RE-APPENDED on every reconcile cycle — an unbounded, self-sustaining + * whole-body duplication loop with no client connected (each grown export + * diverges from the last-pushed base by one more block). Dropping the default + * makes a live `indent: 0` block compare equal to its git-round-tripped twin, so + * the body anchors and the resync is a true no-op. + * + * Only the DEFAULT value is dropped: a genuine `indent: 2` is content and stays + * in the key (so a real indentation edit still diffs and lands). + */ +const DEFAULT_KEY_ATTRS: ReadonlyArray = [ + ['indent', 0], +]; + /** * Canonical, comparable serialization of a Yjs XML node (structure + text + * marks + attributes), with attribute keys sorted so equal blocks always produce * an identical string regardless of attribute insertion order. The volatile - * block `id` (see `VOLATILE_KEY_ATTRS`) is excluded at every level so a block - * compares equal by CONTENT across the git round-trip (which carries no ids) — - * keeping the merge anchor-able and idempotent. + * block `id` (see `VOLATILE_KEY_ATTRS`) and editor-materialized schema defaults + * (see `DEFAULT_KEY_ATTRS`) are excluded at every level so a block compares equal + * by CONTENT across the git round-trip (which carries neither) — keeping the + * merge anchor-able and idempotent. */ export function serializeXmlNode(node: unknown): unknown { if (node instanceof Y.XmlText) { @@ -75,6 +109,9 @@ export function serializeXmlNode(node: unknown): unknown { const sorted: Record = {}; for (const k of Object.keys(attrs).sort()) { if (VOLATILE_KEY_ATTRS.has(k)) continue; + if (DEFAULT_KEY_ATTRS.some(([dk, dv]) => dk === k && attrs[k] === dv)) { + continue; + } sorted[k] = attrs[k]; } return {