From 2594828758945b6907fd1000e3a21867acbdea12 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Fri, 26 Jun 2026 07:54:36 +0300 Subject: [PATCH] =?UTF-8?q?fix(git-sync):=20idempotent=20first-block=20rec?= =?UTF-8?q?onciliation=20=E2=80=94=20stop=20start-of-doc=20content=20dupli?= =?UTF-8?q?cating=20every=20sync=20cycle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The block-level body merge keyed each block by its full attribute set, including the per-block UniqueID the editor stamps on every heading/paragraph. A body arriving from git is parsed from clean markdown and carries no block ids, so a live block (id present) never matched the same block coming from git (no id). The three-way merge's LCS could not anchor on it, and an incoming block with no matching anchor — content inserted at the TOP of the page — was re-added on every push/pull cycle: a non-convergent, unbounded duplication loop. Exclude the volatile 'id' attribute from the block comparison key (serializeXmlNode) so blocks compare by content across the git round-trip. The merge keeps the live block INSTANCE (and its id, and any in-flight edit) for an anchor — picks are by index, not key — so identity is preserved while reconciliation becomes idempotent. Mirrors canonicalize.ts, which already strips the regenerated block id from the round-trip idempotency comparison. Adds a RED-before-fix repro modelling the live-id vs git-no-id asymmetry and asserting no block growth across cycles. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../git-sync/services/yjs-body-merge.spec.ts | 119 ++++++++++++++++++ .../git-sync/services/yjs-body-merge.ts | 36 +++++- 2 files changed, 153 insertions(+), 2 deletions(-) diff --git a/apps/server/src/integrations/git-sync/services/yjs-body-merge.spec.ts b/apps/server/src/integrations/git-sync/services/yjs-body-merge.spec.ts index 10f894af..48d13d19 100644 --- a/apps/server/src/integrations/git-sync/services/yjs-body-merge.spec.ts +++ b/apps/server/src/integrations/git-sync/services/yjs-body-merge.spec.ts @@ -194,6 +194,125 @@ describe('yjs-body-merge', () => { }); }); + // Regression: start-of-document content duplicating on every two-way sync. + // + // The LIVE Docmost doc stamps a per-block UniqueID on every heading/paragraph; + // a body arriving FROM git is parsed from clean markdown and carries NO block + // ids. If the merge comparison key includes that `id`, an unchanged live block + // never matches the SAME block coming from git, so the three-way merge cannot + // anchor on it — and an incoming block with no anchor (content inserted at the + // TOP of the page) is RE-ADDED on every cycle, an unbounded duplication loop. + // These tests model that exact id-asymmetry and assert the reconciliation is + // IDEMPOTENT (no block growth). They are RED before excluding `id` from the + // key in `serializeXmlNode`. + describe('idempotent reconciliation with live block ids (start-of-doc dup)', () => { + // Build a fragment from block specs. `id` is set only when provided, mirroring + // the live doc (ids present) vs a git-parsed body (ids absent). + type Spec = { tag: 'heading' | 'paragraph'; text: string; id?: string }; + function buildDoc(doc: Y.Doc, specs: Spec[]): Y.XmlFragment { + const frag = doc.getXmlFragment('default'); + const blocks = specs.map((s) => { + const el = new Y.XmlElement(s.tag); + if (s.id) el.setAttribute('id', s.id); + if (s.tag === 'heading') el.setAttribute('level', '2'); + const t = new Y.XmlText(); + if (s.text) t.insert(0, s.text); + el.insert(0, [t]); + return el; + }); + if (blocks.length) frag.insert(0, blocks); + return frag; + } + const textsOf = (frag: Y.XmlFragment): string[] => + frag.toArray().map((el) => + (el as Y.XmlElement) + .toArray() + .map((c) => (c as Y.XmlText).toString()) + .join(''), + ); + + it('re-merging the SAME git body does NOT re-add the top block (idempotent)', () => { + // last-synced base (from git markdown): NO block ids. + const base = new Y.Doc(); + const baseFrag = buildDoc(base, [ + { tag: 'heading', text: 'Title' }, + { tag: 'paragraph', text: 'Some paragraph.' }, + { tag: 'paragraph', text: 'End block.' }, + ]); + // live Docmost doc: SAME content, but every block carries a UniqueID. + const live = new Y.Doc(); + const liveFrag = buildDoc(live, [ + { tag: 'heading', text: 'Title', id: 'ida' }, + { tag: 'paragraph', text: 'Some paragraph.', id: 'idb' }, + { tag: 'paragraph', text: 'End block.', id: 'idc' }, + ]); + // incoming git body: the user inserted a heading at the very TOP. + const buildTarget = (): Y.XmlFragment => + buildDoc(new Y.Doc(), [ + { tag: 'heading', text: 'TOPDUP' }, + { tag: 'heading', text: 'Title' }, + { tag: 'paragraph', text: 'Some paragraph.' }, + { tag: 'paragraph', text: 'End block.' }, + ]); + + // First sync: the top block is added once. + live.transact(() => + mergeXmlFragments3Way(liveFrag, buildTarget(), baseFrag), + ); + expect(textsOf(liveFrag)).toEqual([ + 'TOPDUP', + 'Title', + 'Some paragraph.', + 'End block.', + ]); + + // Subsequent sync of the SAME git body against the SAME base must be a + // NO-OP — not a second copy of the top block. Before the fix this re-adds + // 'TOPDUP', growing the doc on every cycle. + live.transact(() => + mergeXmlFragments3Way(liveFrag, buildTarget(), baseFrag), + ); + expect(textsOf(liveFrag)).toEqual([ + 'TOPDUP', + 'Title', + 'Some paragraph.', + 'End block.', + ]); + expect(textsOf(liveFrag).filter((t) => t === 'TOPDUP')).toHaveLength(1); + }); + + it('an unchanged git body (live ids, none in git) is a complete no-op', () => { + // base == git body (no pending git change); live is the same content with + // ids. With `id` in the key the whole body looks rewritten; the merge must + // still leave live byte-identical (block instances untouched). + const base = new Y.Doc(); + const baseFrag = buildDoc(base, [ + { tag: 'heading', text: 'Title' }, + { tag: 'paragraph', text: 'Body.' }, + ]); + const live = new Y.Doc(); + const liveFrag = buildDoc(live, [ + { tag: 'heading', text: 'Title', id: 'ida' }, + { tag: 'paragraph', text: 'Body.', id: 'idb' }, + ]); + const before = liveFrag.toArray(); + let applied = -1; + live.transact(() => { + applied = mergeXmlFragments3Way( + liveFrag, + buildDoc(new Y.Doc(), [ + { tag: 'heading', text: 'Title' }, + { tag: 'paragraph', text: 'Body.' }, + ]), + baseFrag, + ); + }); + expect(applied).toBe(0); + // Same live block instances (ids preserved) — nothing recreated. + expect(liveFrag.toArray()).toEqual(before); + }); + }); + describe('cloneXmlNode', () => { it('preserves text marks (XmlText delta) across docs', () => { const src = new Y.Doc(); diff --git a/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts b/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts index 2c459505..62decbca 100644 --- a/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts +++ b/apps/server/src/integrations/git-sync/services/yjs-body-merge.ts @@ -26,10 +26,39 @@ import { buildLcsTable } from './lcs'; type XmlNode = Y.XmlElement | Y.XmlText | Y.XmlHook; +/** + * Node attributes that are VOLATILE identity (not content) and so must be + * excluded from the block comparison key. + * + * `id` is the per-block UniqueID the editor stamps on every heading/paragraph + * (and transclusionSource). It exists ONLY in the live Yjs document — a body + * arriving from git is parsed from clean markdown, which carries no block ids + * (`markdownToProseMirror` materializes `id: null`, which the Yjs transform then + * drops). If `id` were part of the key, an UNCHANGED live block (id "abc123") + * would never match the SAME block coming from git (no id), so the three-way + * merge's LCS could not anchor on it. The merge would then treat every live + * block as deleted-and-reinserted and, when an incoming block has no matching + * anchor (e.g. content inserted at the very TOP of the page), RE-ADD a copy of + * it on every sync cycle — a non-convergent, unbounded duplication loop + * (start-of-document content duplicating each push/pull cycle). + * + * Excluding `id` makes blocks compare by CONTENT, so an unchanged block matches + * across the git round-trip and the reconciliation is idempotent. Block identity + * is still preserved in the merged output: `diff3Plan` keeps the LIVE block + * INSTANCE (with its id) for an anchor — picks are by index, not by key — so the + * stable Yjs block (and any in-flight human edit on it) stays put. This mirrors + * `canonicalize.ts`, which already strips the regenerated block `id` from the + * round-trip idempotency comparison for exactly the same reason. + */ +const VOLATILE_KEY_ATTRS = new Set(['id']); + /** * Canonical, comparable serialization of a Yjs XML node (structure + text + * marks + attributes), with attribute keys sorted so equal blocks always produce - * an identical string regardless of attribute insertion order. + * an identical string regardless of attribute insertion order. The volatile + * block `id` (see `VOLATILE_KEY_ATTRS`) is excluded at every level so a block + * compares equal by CONTENT across the git round-trip (which carries no ids) — + * keeping the merge anchor-able and idempotent. */ export function serializeXmlNode(node: unknown): unknown { if (node instanceof Y.XmlText) { @@ -38,7 +67,10 @@ export function serializeXmlNode(node: unknown): unknown { if (node instanceof Y.XmlElement) { const attrs = node.getAttributes() as Record; const sorted: Record = {}; - for (const k of Object.keys(attrs).sort()) sorted[k] = attrs[k]; + for (const k of Object.keys(attrs).sort()) { + if (VOLATILE_KEY_ATTRS.has(k)) continue; + sorted[k] = attrs[k]; + } return { n: node.nodeName, a: sorted,