From ceee2a76cacdee50edc138bfb7e5758b62abe2ec Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 13:47:10 +0300 Subject: [PATCH] fix(footnotes): survive duplicate-id definitions without collab divergence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Release-cycle red-team found two same-id footnoteDefinition nodes (trivially produced by markdown import [^d]: first / [^d]: second, or paste/duplicate) caused silent data loss: scan() used a last-wins Map and the sync rebuild (addToHistory:false, propagated via Yjs, un-undoable) dropped all but the last. Fix resolves collisions so BOTH survive, with a DETERMINISTIC id scheme so collaborators converge: - deriveFootnoteId(originalId, occurrence, taken): the k-th (k>=2) occurrence of id X becomes X__k, bumped with a deterministic alpha suffix only against the doc's own id set — a pure function of document state. No Math.random/Date.now on the sync or import paths (random uuid stays only in setFootnote, where a single user originates a brand-new id). - footnote-sync.resolveCollisions walks refs+defs in document order, re-ids duplicate references via setNodeMarkup and pairs them 1:1 with definitions; single SYNC_META-tagged transaction, returns null when canonical (terminates). - Markdown import (footnote.marked) + MCP mirror (collaboration.ts) dedup with the same deterministic scheme + marker rewrite; packages/mcp/build regenerated. - Paste plugin remaps colliding pasted ids against the current doc. Tests: two independent editors resolving the same duplicate-id doc produce IDENTICAL ids (the cross-client determinism guard that the random version would fail); both definitions survive the first edit; import dedup is deterministic. Co-Authored-By: Claude Opus 4.8 --- .../lib/footnote/footnote-markdown.test.ts | 84 +++++ .../src/lib/footnote/footnote-reference.ts | 5 +- .../src/lib/footnote/footnote-sync.ts | 349 ++++++++++++++++-- .../src/lib/footnote/footnote-util.ts | 55 +++ .../src/lib/footnote/footnote.test.ts | 154 ++++++++ .../src/lib/markdown/utils/footnote.marked.ts | 57 ++- packages/mcp/build/lib/collaboration.js | 72 +++- packages/mcp/src/lib/collaboration.ts | 80 +++- packages/mcp/test/unit/footnotes.test.mjs | 33 ++ 9 files changed, 864 insertions(+), 25 deletions(-) diff --git a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts index a6f3d4ab..844134f6 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect } from "vitest"; import { htmlToMarkdown } from "../markdown/utils/turndown.utils"; import { markdownToHtml } from "../markdown/utils/marked.utils"; +import { extractFootnoteDefinitions } from "../markdown/utils/footnote.marked"; // HTML the editor-ext nodes render (sup[data-footnote-ref], section/div). const HTML = @@ -53,4 +54,87 @@ describe("footnote markdown round-trip", () => { expect(html).not.toContain("data-footnotes"); expect(html).not.toContain("data-footnote-def"); }); + + it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => { + // Two definitions share id `d`, and the body has two `[^d]` markers. The + // output must keep BOTH definitions with DISTINCT ids and rewrite the second + // marker so the (reference, definition) pairing stays 1:1. + const md = [ + "See here[^d] and there[^d].", + "", + "[^d]: first", + "[^d]: second", + ].join("\n"); + + const { body, section } = extractFootnoteDefinitions(md); + + // Pull out the def ids from the section in order. + const defIds = Array.from( + section.matchAll(/data-footnote-def data-id="([^"]+)"/g), + ).map((m) => m[1]); + expect(defIds.length).toBe(2); + expect(new Set(defIds).size).toBe(2); // distinct + expect(defIds[0]).toBe("d"); // first definition keeps the id + + // Both definition texts survive. + expect(section).toContain("first"); + expect(section).toContain("second"); + + // The body still has two markers, now pointing at the two distinct ids. + const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map( + (m) => m[1], + ); + expect(refIds.length).toBe(2); + expect(refIds.sort()).toEqual(defIds.sort()); + }); + + it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => { + // The derived id must be a pure function of the input markdown so importing + // the same source twice (or via the editor and the MCP mirror) yields + // identical ids — never random/time-based. + const md = [ + "See[^d] one[^d] two[^d].", + "", + "[^d]: first", + "[^d]: second", + "[^d]: third", + ].join("\n"); + + const run = () => { + const { body, section } = extractFootnoteDefinitions(md); + const defIds = Array.from( + section.matchAll(/data-footnote-def data-id="([^"]+)"/g), + ).map((m) => m[1]); + const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map( + (m) => m[1], + ); + return { defIds, refIds }; + }; + + const a = run(); + const b = run(); + // Identical across runs (this is what would FAIL on the random-id version). + expect(a.defIds).toEqual(b.defIds); + expect(a.refIds).toEqual(b.refIds); + // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3". + expect(a.defIds).toEqual(["d", "d__2", "d__3"]); + expect(a.refIds.sort()).toEqual(a.defIds.sort()); + }); + + it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => { + const md = [ + "See here[^d] and there[^d].", + "", + "[^d]: first", + "[^d]: second", + ].join("\n"); + const html = await markdownToHtml(md); + const defIds = Array.from( + html.matchAll(/data-footnote-def data-id="([^"]+)"/g), + ).map((m) => m[1]); + expect(defIds.length).toBe(2); + expect(new Set(defIds).size).toBe(2); + expect(html).toContain("first"); + expect(html).toContain("second"); + }); }); diff --git a/packages/editor-ext/src/lib/footnote/footnote-reference.ts b/packages/editor-ext/src/lib/footnote/footnote-reference.ts index 90f5e109..7b47617d 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-reference.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-reference.ts @@ -8,7 +8,7 @@ import { generateFootnoteId, } from "./footnote-util"; import { footnoteNumberingPlugin } from "./footnote-numbering"; -import { footnoteSyncPlugin } from "./footnote-sync"; +import { footnoteSyncPlugin, footnotePastePlugin } from "./footnote-sync"; export interface FootnoteReferenceOptions { HTMLAttributes: Record; @@ -88,6 +88,9 @@ export const FootnoteReference = Node.create({ // doc is never mutated. if (this.options.enableSync !== false) { plugins.push(footnoteSyncPlugin(this.options.isRemoteTransaction)); + // Regenerate colliding footnote ids on paste so a pasted reference+ + // definition pair never clobbers/merges with an existing footnote. + plugins.push(footnotePastePlugin()); } return plugins; }, diff --git a/packages/editor-ext/src/lib/footnote/footnote-sync.ts b/packages/editor-ext/src/lib/footnote/footnote-sync.ts index ffd2e136..33258590 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-sync.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-sync.ts @@ -1,48 +1,215 @@ import { Plugin, PluginKey, Transaction } from "@tiptap/pm/state"; -import { Node as ProseMirrorNode, Fragment } from "@tiptap/pm/model"; +import { Node as ProseMirrorNode, Fragment, Slice } from "@tiptap/pm/model"; import { FOOTNOTE_DEFINITION_NAME, FOOTNOTE_REFERENCE_NAME, FOOTNOTES_LIST_NAME, + deriveFootnoteId, } from "./footnote-util"; export const footnoteSyncPluginKey = new PluginKey("footnoteSync"); const SYNC_META = "footnoteSyncApplied"; +interface RefOccurrence { + /** Position of the reference node in the document. */ + pos: number; + /** The id the reference currently carries. */ + id: string; + node: ProseMirrorNode; +} + +interface DefOccurrence { + /** Position of the definition node in the document. */ + pos: number; + /** The id the definition currently carries. */ + id: string; + node: ProseMirrorNode; +} + interface FootnoteScan { - /** Reference ids in document order, first occurrence only, de-duplicated. */ - referenceIds: string[]; - /** definition id -> node (last occurrence wins, matching scan order). */ - definitions: Map; + /** + * Every reference occurrence in document order (NOT de-duplicated). Needed so + * that duplicate ids — which would otherwise be silently collapsed — can be + * detected and (together with their definitions) re-id'd instead of dropped. + */ + refOccurrences: RefOccurrence[]; + /** + * Every definition occurrence in document order (NOT de-duplicated). The old + * implementation used a last-wins Map here, which is exactly what caused + * silent data loss: two definitions sharing an id collapsed to one. + */ + defOccurrences: DefOccurrence[]; /** Every top-level footnotesList node, in document order. */ lists: Array<{ pos: number; node: ProseMirrorNode }>; } function scan(doc: ProseMirrorNode): FootnoteScan { - const referenceIds: string[] = []; - const seenRefs = new Set(); - const definitions = new Map(); + const refOccurrences: RefOccurrence[] = []; + const defOccurrences: DefOccurrence[] = []; const lists: Array<{ pos: number; node: ProseMirrorNode }> = []; doc.descendants((node, pos) => { if (node.type.name === FOOTNOTE_REFERENCE_NAME) { const id = node.attrs.id; - if (id && !seenRefs.has(id)) { - seenRefs.add(id); - referenceIds.push(id); - } + if (id) refOccurrences.push({ pos, id, node }); } if (node.type.name === FOOTNOTE_DEFINITION_NAME) { const id = node.attrs.id; - if (id) definitions.set(id, node); + if (id) defOccurrences.push({ pos, id, node }); } if (node.type.name === FOOTNOTES_LIST_NAME) { lists.push({ pos, node }); } }); - return { referenceIds, definitions, lists }; + return { refOccurrences, defOccurrences, lists }; +} + +/** + * Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the + * concrete reference re-id edits that must be applied to the body so the doc no + * longer contains two footnotes sharing a single id. + * + * The overriding invariant is that NO definition is ever dropped here: every + * definition occurrence ends up with a unique id and therefore survives the + * canonical rebuild. Duplicate references are likewise re-id'd (and paired with + * a duplicate definition when one exists) so importing/pasting `[^d]` twice with + * two `[^d]:` definitions yields TWO distinct footnotes rather than one. + */ +interface CollisionPlan { + /** + * Reference ids in document order, de-duplicated AFTER re-id. This is the + * source of truth for definition order/numbering, exactly as before — only + * now collisions have been resolved so it no longer hides duplicates. + */ + referenceIds: string[]; + /** id -> definition node, after duplicates were re-id'd. One entry per id. */ + definitions: Map; + /** + * Body reference re-id edits to apply (position of a reference node -> the + * fresh id it must carry). Empty when there are no colliding references. + */ + refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>; + /** True when any collision required a re-id (refs and/or defs). */ + changed: boolean; +} + +/** + * Resolve duplicate-id collisions among references and definitions WITHOUT ever + * dropping a definition. + * + * Strategy: + * - Walk references in document order. The FIRST reference for an id keeps it. + * Any later reference sharing that id is a duplicate and gets a fresh unique + * id; if a still-unclaimed duplicate definition with the original id exists, + * it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched. + * - Walk definitions in document order. The FIRST definition for an id keeps + * it; later duplicates that were not already claimed by a duplicate reference + * get their own fresh unique id (surviving as a distinct footnote/orphan). + * + * Re-id determinism: every fresh id is DERIVED from document state via + * deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids + * already present) — NEVER random/time-based. Because the sync plugin runs + * identically on every collaborating client, a deterministic re-id is the only + * way they can converge on the SAME ids; a random id (the previous + * implementation) made two clients editing the same duplicate-id document mint + * DIFFERENT ids for the same duplicate, causing permanent Yjs divergence. + */ +function resolveCollisions(scan: FootnoteScan): CollisionPlan { + const definitions = new Map(); + const refReids: Array<{ + pos: number; + node: ProseMirrorNode; + newId: string; + }> = []; + const referenceIds: string[] = []; + const seenRefIds = new Set(); + let changed = false; + + // `taken` is the set of every id that must be avoided when minting a derived + // id: all original reference + definition ids in the document PLUS every id we + // mint during this pass. It is pure document state, so the derivation stays + // deterministic across clients. Per-original occurrence counters make the k-th + // duplicate of `X` deterministically become `X__2`, `X__3`, ... + const taken = new Set(); + for (const occ of scan.refOccurrences) taken.add(occ.id); + for (const occ of scan.defOccurrences) taken.add(occ.id); + const occurrenceOf = new Map(); + // Mint a deterministic unique id for a duplicate of `originalId`. The first + // duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ... + const mintId = (originalId: string): string => { + const next = (occurrenceOf.get(originalId) ?? 1) + 1; + occurrenceOf.set(originalId, next); + const id = deriveFootnoteId(originalId, next, taken); + taken.add(id); + return id; + }; + + // Bucket definition occurrences by their original id so a duplicate reference + // can claim a matching (as-yet-unclaimed) duplicate definition and re-id the + // pair together. defByOriginalId[id] is consumed front-to-back. + const defByOriginalId = new Map(); + for (const occ of scan.defOccurrences) { + const arr = defByOriginalId.get(occ.id); + if (arr) arr.push(occ); + else defByOriginalId.set(occ.id, [occ]); + } + // The FIRST definition for each id is the canonical keeper of that id. + const claimed = new Set(); + + for (const ref of scan.refOccurrences) { + if (!seenRefIds.has(ref.id)) { + // First reference with this id keeps it. + seenRefIds.add(ref.id); + referenceIds.push(ref.id); + continue; + } + // Duplicate reference: assign a deterministic derived id. Pair it with the + // next unclaimed duplicate definition (NOT the first keeper) carrying the + // same original id, if one exists, so the (ref, def) pairing is preserved + // 1:1. + const newId = mintId(ref.id); + refReids.push({ pos: ref.pos, node: ref.node, newId }); + seenRefIds.add(newId); + referenceIds.push(newId); + changed = true; + + const candidates = defByOriginalId.get(ref.id) ?? []; + // Skip the first occurrence (it keeps the original id); pick the first + // duplicate not already claimed. + for (let i = 1; i < candidates.length; i++) { + const cand = candidates[i]; + if (!claimed.has(cand)) { + claimed.add(cand); + definitions.set(newId, cand.node); + break; + } + } + } + + // Now place every definition under a unique id. The first occurrence of each + // original id keeps it; remaining duplicates either were paired with a + // duplicate reference above (already placed) or get a fresh standalone id. + const seenDefIds = new Set(); + for (const occ of scan.defOccurrences) { + if (claimed.has(occ)) continue; // already placed against a duplicate ref id + if (!seenDefIds.has(occ.id)) { + seenDefIds.add(occ.id); + definitions.set(occ.id, occ.node); + } else { + // Duplicate definition with no duplicate reference to pair with: keep it + // with a deterministic derived id so it is NEVER silently dropped. (It + // becomes an orphan and is then subject to the normal orphan policy — but + // only ever because it has no matching reference, never because it + // collided.) + const newId = mintId(occ.id); + definitions.set(newId, occ.node); + changed = true; + } + } + + return { referenceIds, definitions, refReids, changed }; } /** @@ -78,9 +245,14 @@ function scan(doc: ProseMirrorNode): FootnoteScan { * ping-pong forever (list moved to end -> trailing paragraph appended -> list * no longer last -> moved again ...). * - * Paste id-collision regeneration is left to the paste handler / v2; the common - * cases (orphans, missing definitions, multiple/empty/misplaced lists) are - * covered here. + * Duplicate-id collisions (two references and/or two definitions sharing one + * id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a + * reference+definition pair) are resolved up front by resolveCollisions(): the + * duplicates are re-id'd to fresh unique ids so BOTH survive as distinct + * footnotes. This guarantees the overriding invariant — no footnoteDefinition is + * ever silently deleted by this automatic (addToHistory:false) transaction. A + * definition is only ever removed when it has NO matching reference (orphan + * policy), never because its id collided with another. */ export function footnoteSyncPlugin( isRemoteTransaction?: (tr: Transaction) => boolean, @@ -111,12 +283,33 @@ export function footnoteSyncPlugin( const info = scan(doc); + // 0) Resolve duplicate-id collisions (two references and/or two + // definitions sharing one id) by re-id'ing duplicates to fresh unique + // ids. This is the critical defense: the old last-wins Map silently + // dropped all but the last definition for a shared id; here EVERY + // definition survives with a unique id, and duplicate references are + // paired with duplicate definitions so two same-id imports/pastes yield + // two distinct footnotes instead of one. + const plan = resolveCollisions(info); + const referenceIds = plan.referenceIds; + // 1) Desired definitions: one per referenced id, in reference order, // reusing existing definition nodes (preserving their content) and // synthesizing empty ones for references that lack a definition. - const desiredDefs: ProseMirrorNode[] = info.referenceIds.map((id) => { - const existing = info.definitions.get(id); - if (existing) return existing; + // Definitions whose id has no matching reference (true orphans) are + // dropped per the existing orphan policy — but a collision is NEVER the + // cause of a drop, because collisions were re-id'd above. + const desiredDefs: ProseMirrorNode[] = referenceIds.map((id) => { + const existing = plan.definitions.get(id); + if (existing) { + // A definition paired to a re-id'd reference keeps its CONTENT but + // must carry the new id. Rewrite the id attr when it differs (cheap + // no-op when it already matches). + if (existing.attrs.id !== id) { + return defType.create({ id }, existing.content); + } + return existing; + } return defType.create({ id }, paragraphType.create()); }); @@ -129,7 +322,12 @@ export function footnoteSyncPlugin( node.type === paragraphType && node.content.size === 0; let alreadyCanonical = false; - if (!hasRefs) { + if (plan.changed) { + // A collision was detected (duplicate ids among refs/defs). The doc must + // be rewritten (re-id'd references + rebuilt list); it is never already + // canonical in this case. + alreadyCanonical = false; + } else if (!hasRefs) { // Canonical when there is no footnotesList at all. alreadyCanonical = info.lists.length === 0; } else if (info.lists.length === 1) { @@ -158,6 +356,17 @@ export function footnoteSyncPlugin( // 3) Rebuild: produce exactly ONE transaction that reaches the end-state. const tr = newState.tr; + // 3a) Re-id colliding body references FIRST. A footnoteReference is an + // inline atom, so setNodeMarkup changes only its attrs (not its size), + // leaving every other position valid for the list deletions/insert + // that follow. + for (const reid of plan.refReids) { + tr.setNodeMarkup(reid.pos, undefined, { + ...reid.node.attrs, + id: reid.newId, + }); + } + // Delete every existing footnotesList (from the end so earlier positions // stay valid while we mutate). [...info.lists] @@ -195,3 +404,101 @@ export function footnoteSyncPlugin( }, }); } + +export const footnotePastePluginKey = new PluginKey("footnotePaste"); + +/** + * Paste id-collision guard. When pasted content carries footnote reference or + * definition ids that ALREADY EXIST in the current document, regenerate those + * ids (consistently across the pasted slice, so a pasted reference and its + * definition keep pointing at each other) BEFORE the slice is inserted. + * + * Without this, pasting a reference+definition pair copied from elsewhere — or + * duplicating one in place — would merge with (or clobber) the existing footnote + * of the same id. The schema-sync plugin already guarantees no definition is + * ever silently deleted after the fact (it re-id's collisions), but regenerating + * at paste time keeps the pasted footnote cleanly separate from the start and + * avoids any transient merge. + * + * Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is + * not present elsewhere is left untouched (so it still resolves to its existing + * definition). + */ +export function footnotePastePlugin(): Plugin { + return new Plugin({ + key: footnotePastePluginKey, + props: { + transformPasted(slice, view) { + // Collect ids already present in the current document. + const existing = new Set(); + view.state.doc.descendants((node) => { + if ( + node.type.name === FOOTNOTE_REFERENCE_NAME || + node.type.name === FOOTNOTE_DEFINITION_NAME + ) { + const id = node.attrs.id; + if (id) existing.add(id); + } + }); + if (existing.size === 0) return slice; + + // Build a remap (old id -> fresh id) for every COLLIDING id found in the + // pasted slice, shared by references and definitions so a pasted pair + // stays matched. A paste is a distinct local user action (not a + // shared-state convergence point), so determinism is not strictly + // required here — but we derive the new id deterministically anyway + // (deriveFootnoteId against the current doc's id set) for consistency + // with the sync/import paths and to keep Math.random off this code path. + const remap = new Map(); + const collectColliding = (node: ProseMirrorNode) => { + if ( + node.type.name === FOOTNOTE_REFERENCE_NAME || + node.type.name === FOOTNOTE_DEFINITION_NAME + ) { + const id = node.attrs.id; + if (id && existing.has(id) && !remap.has(id)) { + const newId = deriveFootnoteId(id, 2, existing); + remap.set(id, newId); + // Reserve it so a second colliding id deriving to the same base + // bumps instead of clashing. + existing.add(newId); + } + } + node.descendants(collectColliding); + }; + slice.content.descendants(collectColliding); + if (remap.size === 0) return slice; + + // Rewrite the colliding ids throughout the slice. + const rewrite = (fragment: Fragment): Fragment => { + const nodes: ProseMirrorNode[] = []; + fragment.forEach((node) => { + const isFootnote = + node.type.name === FOOTNOTE_REFERENCE_NAME || + node.type.name === FOOTNOTE_DEFINITION_NAME; + const newId = isFootnote ? remap.get(node.attrs.id) : undefined; + const newContent = node.content.size + ? rewrite(node.content) + : node.content; + if (newId) { + nodes.push( + node.type.create( + { ...node.attrs, id: newId }, + newContent, + node.marks, + ), + ); + } else if (newContent !== node.content) { + nodes.push(node.copy(newContent)); + } else { + nodes.push(node); + } + }); + return Fragment.fromArray(nodes); + }; + + return new Slice(rewrite(slice.content), slice.openStart, slice.openEnd); + }, + }, + }); +} diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.ts b/packages/editor-ext/src/lib/footnote/footnote-util.ts index 41698686..7896595d 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-util.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-util.ts @@ -43,6 +43,61 @@ export function generateFootnoteId(): string { ); } +/** + * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of + * an original id `X` during collision resolution. The result is a pure function + * of (`originalId`, `occurrence`, `taken`) so that every collaborating client — + * and every import path — computes the SAME new id for the same input document. + * + * CRITICAL: this MUST NOT use Math.random()/Date.now()/uuid. Two clients that + * each make a local edit on the same duplicate-id document have to converge on + * identical ids; a random id would diverge permanently over Yjs. + * + * Scheme: the base candidate is `${originalId}__${occurrence}` (e.g. `X__2`, + * `X__3`). If that candidate already exists in `taken` (an existing footnote id, + * or one we already minted in this pass), a stable alphabetic suffix is appended + * and bumped — `X__2b`, `X__2c`, ... — until the candidate is unique. `taken` is + * itself part of the document state, so the whole walk stays deterministic. + * + * `taken` is consulted but NOT mutated here; the caller adds the returned id to + * its own seen-set before requesting the next derived id. + * + * NOTE: this implementation is intentionally duplicated in + * packages/mcp/src/lib/collaboration.ts (deriveFootnoteId) + * and MUST stay in sync with it so markdown imported through either path yields + * identical ids. + */ +export function deriveFootnoteId( + originalId: string, + occurrence: number, + taken: Set | ReadonlySet, +): string { + let candidate = `${originalId}__${occurrence}`; + // Deterministic suffix bump: b, c, d, ... then aa, ab, ... if ever exhausted. + let n = 0; + while (taken.has(candidate)) { + n += 1; + candidate = `${originalId}__${occurrence}${suffix(n)}`; + } + return candidate; +} + +/** + * Map 1 -> "b", 2 -> "c", ... 25 -> "z", 26 -> "ba", ... (base-25 over b..z, + * skipping "a" so the first bump is visibly distinct from the un-bumped base). + * Purely deterministic. + */ +function suffix(n: number): string { + let out = ""; + let x = n; + while (x > 0) { + const rem = (x - 1) % 25; + out = String.fromCharCode(98 + rem) + out; // 98 = 'b' + x = Math.floor((x - 1) / 25); + } + return out; +} + /** * Collect every `footnoteReference` id in document order. This is the single * source of truth for numbering and ordering — a pure function of the document diff --git a/packages/editor-ext/src/lib/footnote/footnote.test.ts b/packages/editor-ext/src/lib/footnote/footnote.test.ts index a68685a3..5dfc666c 100644 --- a/packages/editor-ext/src/lib/footnote/footnote.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote.test.ts @@ -304,6 +304,160 @@ describe("footnote sync plugin (orphans)", () => { editor.destroy(); }); + it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => { + // Reproduces the verified data-loss bug: two footnoteDefinition nodes share + // id "d", and there are two references with id "d". The OLD code built the + // definitions Map last-wins and emitted exactly one definition for the + // de-duplicated reference, so the very first keystroke's sync transaction + // deleted the whole list and rebuilt it from one definition — silently + // destroying "first" and keeping only "second". + const editor = makeEditor({ + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { type: "text", text: "a" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } }, + { type: "text", text: "b" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } }, + ], + }, + { + type: FOOTNOTES_LIST_NAME, + content: [ + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "d" }, + content: [ + { type: "paragraph", content: [{ type: "text", text: "first" }] }, + ], + }, + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "d" }, + content: [ + { type: "paragraph", content: [{ type: "text", text: "second" }] }, + ], + }, + ], + }, + ], + }); + // The first local keystroke fires the sync plugin's appendTransaction. + editor.commands.insertContentAt(1, " "); + + const doc = editor.state.doc; + // BOTH definitions survive. + expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2); + const defTexts: string[] = []; + const defIds: string[] = []; + doc.descendants((node) => { + if (node.type.name === FOOTNOTE_DEFINITION_NAME) { + defIds.push(node.attrs.id); + defTexts.push(node.textContent); + } + }); + // No content was lost: both "first" and "second" are still present. + expect(defTexts.sort()).toEqual(["first", "second"]); + // The colliding ids were made distinct. + expect(new Set(defIds).size).toBe(2); + // Each definition's id matches exactly one reference (1:1 pairing). + const refIds: string[] = []; + doc.descendants((node) => { + if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id); + }); + expect(refIds.sort()).toEqual(defIds.sort()); + editor.destroy(); + }); + + it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => { + // Cross-client determinism guard. Two collaborating clients each see the + // SAME duplicate-id document and each make a local edit. The sync plugin + // runs identically on every client, so it MUST mint the SAME new ids on both + // — otherwise the two clients diverge permanently over Yjs (duplicated + // footnotes). This is exactly the blocker the previous random-id + // (generateFootnoteId / Math.random) implementation caused: it would mint + // DIFFERENT ids on each client and this assertion would fail. + const duplicateDoc = { + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { type: "text", text: "a" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } }, + { type: "text", text: "b" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } }, + { type: "text", text: "c" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } }, + ], + }, + { + type: FOOTNOTES_LIST_NAME, + content: [ + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "d" }, + content: [ + { type: "paragraph", content: [{ type: "text", text: "one" }] }, + ], + }, + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "d" }, + content: [ + { type: "paragraph", content: [{ type: "text", text: "two" }] }, + ], + }, + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "d" }, + content: [ + { + type: "paragraph", + content: [{ type: "text", text: "three" }], + }, + ], + }, + ], + }, + ], + }; + + const idsAfterLocalEdit = () => { + // A fresh editor instance = an independent "client" running the same + // plugin pipeline on the same starting document. + const editor = makeEditor(structuredClone(duplicateDoc)); + editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs + const refIds: string[] = []; + const defIds: string[] = []; + editor.state.doc.descendants((node) => { + if (node.type.name === FOOTNOTE_REFERENCE_NAME) + refIds.push(node.attrs.id); + if (node.type.name === FOOTNOTE_DEFINITION_NAME) + defIds.push(node.attrs.id); + }); + editor.destroy(); + return { refIds, defIds }; + }; + + const clientA = idsAfterLocalEdit(); + const clientB = idsAfterLocalEdit(); + + // Both clients computed IDENTICAL ids (the property that makes Yjs converge). + expect(clientA.refIds).toEqual(clientB.refIds); + expect(clientA.defIds).toEqual(clientB.defIds); + + // And the ids are deterministic-derived (not random uuid-style): the keeper + // keeps "d", the duplicates become "d__2", "d__3". + expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"])); + // Every definition survived with a unique id, 1:1 with the references. + expect(clientA.defIds.length).toBe(3); + expect(new Set(clientA.defIds).size).toBe(3); + expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort()); + }); + it("removes an orphan definition with no matching reference", () => { const editor = makeEditor({ type: "doc", diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts index ad47cc52..b47cf4a4 100644 --- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts +++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts @@ -1,4 +1,5 @@ import { marked } from "marked"; +import { deriveFootnoteId } from "../../footnote/footnote-util"; /** * Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline. @@ -52,6 +53,10 @@ function escapeAttr(value: string): string { return String(value).replace(/&/g, "&").replace(/"/g, """); } +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + /** * Extract `[^id]: text` definition lines from the markdown body, returning the * cleaned body plus a rendered
(empty string when no @@ -96,6 +101,56 @@ export function extractFootnoteDefinitions(markdown: string): { return { body: markdown, section: "" }; } + // De-duplicate colliding definition ids. Two definitions sharing an id (e.g. + // `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote + // downstream (the editor's last-wins sync). Rename each colliding id to a + // DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference + // marker so the (reference, definition) pairing stays 1:1. The FIRST + // definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth + // duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]` + // marker. If there are fewer markers than definitions, the surplus definition + // keeps a derived (orphan) id so it is never silently merged away. + // + // The id is derived (deriveFootnoteId), NOT random: importing the same + // markdown through two paths (here and the MCP mirror) must yield identical + // ids, and re-importing the same markdown twice must be stable. + let dedupedBody = bodyLines.join("\n"); + // Every original definition id is reserved up front so a derived id can never + // collide with an unrelated original id present in the document. + const taken = new Set(definitions.map((d) => d.id)); + const seenDefIds = new Map(); // original id -> how many seen + for (const def of definitions) { + const originalId = def.id; + const count = seenDefIds.get(originalId) ?? 0; + seenDefIds.set(originalId, count + 1); + if (count === 0) continue; // first definition keeps its id + + // count is the 0-based number of PRIOR occurrences; this is occurrence + // (count + 1), i.e. 2 for the first duplicate, 3 for the next, ... + const newId = deriveFootnoteId(originalId, count + 1, taken); + taken.add(newId); + def.id = newId; + + // Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not + // belong to the keeper definition. After a prior duplicate rewrote its + // marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the + // remaining matches are: index 0 = the keeper's marker (left alone), index 1 + // = this duplicate's marker. Rewrite index 1. + let occurrence = 0; + let rewritten = false; + const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g"); + dedupedBody = dedupedBody.replace(re, (match) => { + const idx = occurrence++; + if (!rewritten && idx === 1) { + rewritten = true; + return `[^${newId}]`; + } + return match; + }); + // If there was no second marker (more definitions than references), the + // duplicate simply survives as an orphan with its fresh id — no body change. + } + const defsHtml = definitions .map((d) => { // Render the definition text as inline markdown so emphasis/links inside @@ -109,7 +164,7 @@ export function extractFootnoteDefinitions(markdown: string): { .join(""); return { - body: bodyLines.join("\n"), + body: dedupedBody, section: `
${defsHtml}
`, }; } diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index d5e68a21..5140acee 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -271,6 +271,44 @@ const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value) { return String(value).replace(/&/g, "&").replace(/"/g, """); } +function escapeFootnoteRegExp(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} +/** + * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of + * an original id `X` during definition dedup. + * + * EXACT MIRROR of editor-ext `deriveFootnoteId` + * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST + * STAY IN SYNC: the same markdown imported through the editor and through this + * MCP path has to produce identical ids, and the sync plugin (which re-ids on + * every collaborating client) relies on the same scheme to converge. NEVER use + * Math.random()/Date.now()/uuid here — a random id would diverge across clients. + * + * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped + * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in + * `taken` (the set of ids already present / already minted — pure doc state). + */ +function deriveFootnoteId(originalId, occurrence, taken) { + let candidate = `${originalId}__${occurrence}`; + let n = 0; + while (taken.has(candidate)) { + n += 1; + candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`; + } + return candidate; +} +/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */ +function footnoteSuffix(n) { + let out = ""; + let x = n; + while (x > 0) { + const rem = (x - 1) % 25; + out = String.fromCharCode(98 + rem) + out; // 98 = 'b' + x = Math.floor((x - 1) / 25); + } + return out; +} const footnoteRefMarkedExtension = { name: "footnoteRef", level: "inline", @@ -319,11 +357,43 @@ function extractFootnotes(markdown) { } if (defs.length === 0) return { body: markdown, section: "" }; + // De-duplicate colliding definition ids (mirror of editor-ext + // extractFootnoteDefinitions). Two definitions sharing an id would otherwise + // collapse into one footnote downstream; rename each colliding id to a + // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]` + // marker so the (reference, definition) pairing stays 1:1. Determinism lets + // the same markdown imported here and via the editor produce identical ids. + let dedupedBody = bodyLines.join("\n"); + const taken = new Set(defs.map((d) => d.id)); + const seenDefIds = new Map(); + for (const def of defs) { + const originalId = def.id; + const count = seenDefIds.get(originalId) ?? 0; + seenDefIds.set(originalId, count + 1); + if (count === 0) + continue; // first definition keeps its id + const newId = deriveFootnoteId(originalId, count + 1, taken); + taken.add(newId); + def.id = newId; + // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone), + // index 1 = this duplicate's marker. Rewrite index 1. + let occurrence = 0; + let rewritten = false; + const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g"); + dedupedBody = dedupedBody.replace(re, (match) => { + const idx = occurrence++; + if (!rewritten && idx === 1) { + rewritten = true; + return `[^${newId}]`; + } + return match; + }); + } const inner = defs .map((d) => `

${marked.parseInline(d.text || "")}

`) .join(""); return { - body: bodyLines.join("\n"), + body: dedupedBody, section: `
${inner}
`, }; } diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index 0e6e80a3..6f0ad011 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -306,6 +306,51 @@ function escapeFootnoteAttr(value: string): string { return String(value).replace(/&/g, "&").replace(/"/g, """); } +function escapeFootnoteRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +/** + * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of + * an original id `X` during definition dedup. + * + * EXACT MIRROR of editor-ext `deriveFootnoteId` + * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST + * STAY IN SYNC: the same markdown imported through the editor and through this + * MCP path has to produce identical ids, and the sync plugin (which re-ids on + * every collaborating client) relies on the same scheme to converge. NEVER use + * Math.random()/Date.now()/uuid here — a random id would diverge across clients. + * + * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped + * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in + * `taken` (the set of ids already present / already minted — pure doc state). + */ +function deriveFootnoteId( + originalId: string, + occurrence: number, + taken: Set, +): string { + let candidate = `${originalId}__${occurrence}`; + let n = 0; + while (taken.has(candidate)) { + n += 1; + candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`; + } + return candidate; +} + +/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */ +function footnoteSuffix(n: number): string { + let out = ""; + let x = n; + while (x > 0) { + const rem = (x - 1) % 25; + out = String.fromCharCode(98 + rem) + out; // 98 = 'b' + x = Math.floor((x - 1) / 25); + } + return out; +} + const footnoteRefMarkedExtension = { name: "footnoteRef", level: "inline" as const, @@ -356,6 +401,39 @@ function extractFootnotes(markdown: string): { else bodyLines.push(line); } if (defs.length === 0) return { body: markdown, section: "" }; + + // De-duplicate colliding definition ids (mirror of editor-ext + // extractFootnoteDefinitions). Two definitions sharing an id would otherwise + // collapse into one footnote downstream; rename each colliding id to a + // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]` + // marker so the (reference, definition) pairing stays 1:1. Determinism lets + // the same markdown imported here and via the editor produce identical ids. + let dedupedBody = bodyLines.join("\n"); + const taken = new Set(defs.map((d) => d.id)); + const seenDefIds = new Map(); + for (const def of defs) { + const originalId = def.id; + const count = seenDefIds.get(originalId) ?? 0; + seenDefIds.set(originalId, count + 1); + if (count === 0) continue; // first definition keeps its id + const newId = deriveFootnoteId(originalId, count + 1, taken); + taken.add(newId); + def.id = newId; + // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone), + // index 1 = this duplicate's marker. Rewrite index 1. + let occurrence = 0; + let rewritten = false; + const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g"); + dedupedBody = dedupedBody.replace(re, (match) => { + const idx = occurrence++; + if (!rewritten && idx === 1) { + rewritten = true; + return `[^${newId}]`; + } + return match; + }); + } + const inner = defs .map( (d) => @@ -365,7 +443,7 @@ function extractFootnotes(markdown: string): { ) .join(""); return { - body: bodyLines.join("\n"), + body: dedupedBody, section: `
${inner}
`, }; } diff --git a/packages/mcp/test/unit/footnotes.test.mjs b/packages/mcp/test/unit/footnotes.test.mjs index 4b1ee6ab..df45a7b9 100644 --- a/packages/mcp/test/unit/footnotes.test.mjs +++ b/packages/mcp/test/unit/footnotes.test.mjs @@ -90,6 +90,39 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => { assert.match(md2, /\[\^fn2\]: Second note\./); }); +test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => { + // The MCP import must derive duplicate ids deterministically (NOT random) so + // the same markdown imported here and via the editor produces identical ids, + // and re-importing is stable. This is the test that would FAIL on the old + // Math.random()/Date.now() implementation. + const md = [ + "See[^d] one[^d] two[^d].", + "", + "[^d]: first", + "[^d]: second", + "[^d]: third", + ].join("\n"); + + const idsOf = async () => { + const json = await markdownToProseMirror(md); + const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id); + const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); + return { refs, defs }; + }; + + const a = await idsOf(); + const b = await idsOf(); + + // Identical across runs. + assert.deepEqual(a.refs, b.refs); + assert.deepEqual(a.defs, b.defs); + // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3". + assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]); + // 1:1 reference <-> definition pairing, all distinct. + assert.equal(new Set(a.defs).size, 3); + assert.deepEqual([...a.refs].sort(), [...a.defs].sort()); +}); + test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => { // Markdown that DOCUMENTS footnote syntax inside a code fence. The example // definition line must be preserved verbatim inside the code block and not