fix(git-sync): make reconcile import truly idempotent — stop runaway whole-body duplication
The live Yjs document materializes the editor-schema default `indent: 0` on
every paragraph/heading (and on the paragraph inside every list item, callout,
and table cell), but a body re-imported from git — parsed from clean markdown —
carries no indent attribute. So every live block's merge key differed from the
same block coming back from git: the three-way merge could anchor on nothing,
and the trailing unit that git's export already contained (but the merge could
not match against the byte-identical live tail) was re-appended on every
reconcile cycle. Each grown export then diverged from the last-pushed base by one
more unit — a self-sustaining, unbounded whole-body duplication loop with no
client connected.
The prior fix (0c7b73f7) excluded the volatile block `id` from the key, which
was necessary but not sufficient: `indent: 0` is a CONTENT attribute the editor
stamps as a default, so it was never stripped. Normalize editor-materialized
schema defaults (`indent: 0`) out of the block key — only the default value, so a
genuine `indent: 2` still diffs and lands — so a live block compares equal to its
git-round-tripped twin and the resync is a true no-op.
Regression test (yjs-body-merge.idempotency.spec.ts) encodes the invariant on a
body of byte-identical units (heading + paragraph + callout + table with empty
cells): a live fragment carrying indent:0 + ids merged against the git-derived
fragment (neither) with a stale-by-one base applies 0 ops and does not grow — RED
before, GREEN after.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,190 @@
|
|||||||
|
import * as Y from 'yjs';
|
||||||
|
|
||||||
|
import {
|
||||||
|
mergeXmlFragments,
|
||||||
|
mergeXmlFragments3Way,
|
||||||
|
} from './yjs-body-merge';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Regression for the HIGH-severity runaway whole-body duplication: a page body
|
||||||
|
* was RE-APPENDED in full on every git-sync reconcile cycle, unbounded, with NO
|
||||||
|
* client connected.
|
||||||
|
*
|
||||||
|
* ROOT CAUSE (confirmed in-process against the real failing page): the LIVE Yjs
|
||||||
|
* document materializes the editor-schema default `indent: 0` on every
|
||||||
|
* paragraph/heading (and on the paragraph inside every list item, callout, and
|
||||||
|
* table cell), but a body re-imported from git — parsed from clean markdown —
|
||||||
|
* carries NO indent attribute. So every live block's comparison key differed from
|
||||||
|
* the same block coming back from git; the three-way merge could anchor on
|
||||||
|
* NOTHING, and the trailing unit that git's export already contained (but the
|
||||||
|
* merge could not match against the byte-identical live tail) was re-appended
|
||||||
|
* each cycle. Each grown export then diverged from the last-pushed base by one
|
||||||
|
* more unit — a self-sustaining loop.
|
||||||
|
*
|
||||||
|
* The fix normalizes the materialized default (`indent: 0`) out of the block key
|
||||||
|
* (see `DEFAULT_KEY_ATTRS` in yjs-body-merge.ts), so a live block compares equal
|
||||||
|
* to its git-round-tripped twin and the resync is a true no-op.
|
||||||
|
*
|
||||||
|
* These tests model that EXACTLY at the Yjs level: a LIVE fragment whose blocks
|
||||||
|
* carry `indent: 0` + block ids, versus a git-derived fragment of the SAME
|
||||||
|
* content with neither — for a body built from BYTE-IDENTICAL units that each
|
||||||
|
* contain a heading, a paragraph, a callout, and a table with empty cells (the
|
||||||
|
* trigger). RED before the fix (the merge applies > 0 ops and the body grows),
|
||||||
|
* GREEN after (0 ops, no growth).
|
||||||
|
*/
|
||||||
|
|
||||||
|
type Attrs = Record<string, string | number>;
|
||||||
|
|
||||||
|
function el(name: string, attrs: Attrs, children: (Y.XmlElement | Y.XmlText)[]) {
|
||||||
|
const e = new Y.XmlElement(name);
|
||||||
|
for (const [k, v] of Object.entries(attrs)) e.setAttribute(k, v as string);
|
||||||
|
if (children.length) e.insert(0, children);
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
function text(s: string): Y.XmlText {
|
||||||
|
const t = new Y.XmlText();
|
||||||
|
if (s) t.insert(0, s);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* One byte-identical content unit (heading / paragraph / callout / table-with-
|
||||||
|
* empty-cells). `live` toggles the two things that exist ONLY in the live Yjs
|
||||||
|
* doc and NOT in a git round-trip: the materialized `indent: 0` default and the
|
||||||
|
* per-block `id`. `n` makes each unit's ids unique (as the editor would stamp)
|
||||||
|
* while keeping the visible CONTENT byte-identical across units.
|
||||||
|
*/
|
||||||
|
function unit(live: boolean, n: number, headingText = 'Big Heading'): Y.XmlElement[] {
|
||||||
|
const ind: Attrs = live ? { indent: 0 } : {};
|
||||||
|
const id = (base: string): Attrs => (live ? { id: `${base}${n}` } : {});
|
||||||
|
const para = (attrs: Attrs, s: string) =>
|
||||||
|
el('paragraph', { ...attrs, ...ind }, [text(s)]);
|
||||||
|
|
||||||
|
const cell = (name: string) =>
|
||||||
|
el(name, { colspan: 1, rowspan: 1 }, [para({}, '')]);
|
||||||
|
|
||||||
|
return [
|
||||||
|
el('heading', { ...id('h'), level: 1, ...ind }, [text(headingText)]),
|
||||||
|
para(id('p'), 'Para with the same words'),
|
||||||
|
el('callout', { type: 'info' }, [para(id('c'), 'CalloutText here')]),
|
||||||
|
el('table', {}, [
|
||||||
|
el('tableRow', {}, [cell('tableHeader'), cell('tableHeader')]),
|
||||||
|
el('tableRow', {}, [cell('tableCell'), cell('tableCell')]),
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
function fragmentOf(units: Y.XmlElement[][]): {
|
||||||
|
doc: Y.Doc;
|
||||||
|
frag: Y.XmlFragment;
|
||||||
|
} {
|
||||||
|
const doc = new Y.Doc();
|
||||||
|
const frag = doc.getXmlFragment('default');
|
||||||
|
const blocks = units.flat();
|
||||||
|
if (blocks.length) frag.insert(0, blocks);
|
||||||
|
return { doc, frag };
|
||||||
|
}
|
||||||
|
|
||||||
|
const blockCount = (frag: Y.XmlFragment): number => frag.toArray().length;
|
||||||
|
|
||||||
|
describe('git-sync reconcile import is idempotent (no whole-body duplication)', () => {
|
||||||
|
const UNITS = 3;
|
||||||
|
|
||||||
|
it('3-way: identical content, live carries indent:0, base stale-by-one -> 0 ops, no growth', () => {
|
||||||
|
// LIVE: the editor-stamped Yjs doc (indent:0 + ids on every block).
|
||||||
|
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||||
|
);
|
||||||
|
// INCOMING (git export -> re-import): same content, NO indent / ids.
|
||||||
|
const { frag: incoming } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||||
|
);
|
||||||
|
// BASE = last-pushed file, lagging by ONE unit (the realistic divergence
|
||||||
|
// that drives the trailing insert-vs-insert).
|
||||||
|
const { frag: base } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)),
|
||||||
|
);
|
||||||
|
|
||||||
|
const before = blockCount(live);
|
||||||
|
let applied = -1;
|
||||||
|
liveDoc.transact(() => {
|
||||||
|
applied = mergeXmlFragments3Way(live, incoming, base);
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(applied).toBe(0);
|
||||||
|
expect(blockCount(live)).toBe(before);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('3-way is a fixpoint across repeated cycles (does not grow)', () => {
|
||||||
|
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||||
|
);
|
||||||
|
const incomingUnits = () =>
|
||||||
|
fragmentOf(Array.from({ length: UNITS }, (_, i) => unit(false, i))).frag;
|
||||||
|
const baseUnits = () =>
|
||||||
|
fragmentOf(Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)))
|
||||||
|
.frag;
|
||||||
|
|
||||||
|
const before = blockCount(live);
|
||||||
|
for (let cycle = 0; cycle < 5; cycle++) {
|
||||||
|
let applied = -1;
|
||||||
|
liveDoc.transact(() => {
|
||||||
|
applied = mergeXmlFragments3Way(live, incomingUnits(), baseUnits());
|
||||||
|
});
|
||||||
|
expect(applied).toBe(0);
|
||||||
|
expect(blockCount(live)).toBe(before);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('2-way: identical content, live carries indent:0 -> 0 ops, no growth', () => {
|
||||||
|
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||||
|
);
|
||||||
|
const { frag: incoming } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||||
|
);
|
||||||
|
|
||||||
|
const before = blockCount(live);
|
||||||
|
let applied = -1;
|
||||||
|
liveDoc.transact(() => {
|
||||||
|
applied = mergeXmlFragments(live, incoming);
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(applied).toBe(0);
|
||||||
|
expect(blockCount(live)).toBe(before);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT regress real edits: a git change to one block still lands', () => {
|
||||||
|
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||||
|
);
|
||||||
|
const base = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||||
|
).frag;
|
||||||
|
// git edits the heading text of the LAST unit.
|
||||||
|
const incoming = fragmentOf(
|
||||||
|
Array.from({ length: UNITS }, (_, i) =>
|
||||||
|
unit(false, i, i === UNITS - 1 ? 'EDITED Heading' : 'Big Heading'),
|
||||||
|
),
|
||||||
|
).frag;
|
||||||
|
|
||||||
|
const before = blockCount(live);
|
||||||
|
liveDoc.transact(() => {
|
||||||
|
mergeXmlFragments3Way(live, incoming, base);
|
||||||
|
});
|
||||||
|
|
||||||
|
// The edit landed, and the body did NOT grow (one block changed in place).
|
||||||
|
const headings = live
|
||||||
|
.toArray()
|
||||||
|
.filter((b) => (b as Y.XmlElement).nodeName === 'heading')
|
||||||
|
.map((b) =>
|
||||||
|
(b as Y.XmlElement)
|
||||||
|
.toArray()
|
||||||
|
.map((c) => (c as Y.XmlText).toString())
|
||||||
|
.join(''),
|
||||||
|
);
|
||||||
|
expect(headings).toContain('EDITED Heading');
|
||||||
|
expect(blockCount(live)).toBe(before);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -58,13 +58,47 @@ type XmlNode = Y.XmlElement | Y.XmlText | Y.XmlHook;
|
|||||||
*/
|
*/
|
||||||
const VOLATILE_KEY_ATTRS = new Set(['id']);
|
const VOLATILE_KEY_ATTRS = new Set(['id']);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Editor-schema attribute DEFAULTS that the live Yjs document MATERIALIZES on
|
||||||
|
* every block but a git round-trip does NOT carry — so they must be normalized
|
||||||
|
* out of the block key, otherwise an unchanged block fails to compare equal
|
||||||
|
* across `DB doc -> markdown (export) -> ProseMirror (re-import)`.
|
||||||
|
*
|
||||||
|
* `indent: 0` is the one that bites in practice (HIGH-severity runaway whole-body
|
||||||
|
* duplication, see below). The editor's indent extension declares
|
||||||
|
* `indent.default = 0` (`packages/editor-ext/src/lib/indent.ts`), and
|
||||||
|
* `TiptapTransformer.toYdoc` STAMPS that default onto every `paragraph`/`heading`
|
||||||
|
* Yjs node — so a body that originated in the UI carries `indent: 0` on every
|
||||||
|
* block (and on the paragraph inside every list item, callout, and table cell).
|
||||||
|
* `markdownToProseMirror`, parsing clean markdown, produces NO indent attribute
|
||||||
|
* (the extension's `renderHTML` even omits it when `<= min`), so a re-imported
|
||||||
|
* body has `a: {}` where the live body has `a: { indent: 0 }`.
|
||||||
|
*
|
||||||
|
* Without this normalization EVERY live block's key differs from the same block
|
||||||
|
* re-imported from git, so the three-way merge can anchor on NOTHING: the whole
|
||||||
|
* body becomes one unanchored region, and any trailing unit that git's export
|
||||||
|
* already contains (but the merge can't match against the identical live tail)
|
||||||
|
* is RE-APPENDED on every reconcile cycle — an unbounded, self-sustaining
|
||||||
|
* whole-body duplication loop with no client connected (each grown export
|
||||||
|
* diverges from the last-pushed base by one more block). Dropping the default
|
||||||
|
* makes a live `indent: 0` block compare equal to its git-round-tripped twin, so
|
||||||
|
* the body anchors and the resync is a true no-op.
|
||||||
|
*
|
||||||
|
* Only the DEFAULT value is dropped: a genuine `indent: 2` is content and stays
|
||||||
|
* in the key (so a real indentation edit still diffs and lands).
|
||||||
|
*/
|
||||||
|
const DEFAULT_KEY_ATTRS: ReadonlyArray<readonly [string, unknown]> = [
|
||||||
|
['indent', 0],
|
||||||
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Canonical, comparable serialization of a Yjs XML node (structure + text +
|
* Canonical, comparable serialization of a Yjs XML node (structure + text +
|
||||||
* marks + attributes), with attribute keys sorted so equal blocks always produce
|
* marks + attributes), with attribute keys sorted so equal blocks always produce
|
||||||
* an identical string regardless of attribute insertion order. The volatile
|
* an identical string regardless of attribute insertion order. The volatile
|
||||||
* block `id` (see `VOLATILE_KEY_ATTRS`) is excluded at every level so a block
|
* block `id` (see `VOLATILE_KEY_ATTRS`) and editor-materialized schema defaults
|
||||||
* compares equal by CONTENT across the git round-trip (which carries no ids) —
|
* (see `DEFAULT_KEY_ATTRS`) are excluded at every level so a block compares equal
|
||||||
* keeping the merge anchor-able and idempotent.
|
* by CONTENT across the git round-trip (which carries neither) — keeping the
|
||||||
|
* merge anchor-able and idempotent.
|
||||||
*/
|
*/
|
||||||
export function serializeXmlNode(node: unknown): unknown {
|
export function serializeXmlNode(node: unknown): unknown {
|
||||||
if (node instanceof Y.XmlText) {
|
if (node instanceof Y.XmlText) {
|
||||||
@@ -75,6 +109,9 @@ export function serializeXmlNode(node: unknown): unknown {
|
|||||||
const sorted: Record<string, unknown> = {};
|
const sorted: Record<string, unknown> = {};
|
||||||
for (const k of Object.keys(attrs).sort()) {
|
for (const k of Object.keys(attrs).sort()) {
|
||||||
if (VOLATILE_KEY_ATTRS.has(k)) continue;
|
if (VOLATILE_KEY_ATTRS.has(k)) continue;
|
||||||
|
if (DEFAULT_KEY_ATTRS.some(([dk, dv]) => dk === k && attrs[k] === dv)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
sorted[k] = attrs[k];
|
sorted[k] = attrs[k];
|
||||||
}
|
}
|
||||||
return {
|
return {
|
||||||
|
|||||||
Reference in New Issue
Block a user