fix(footnotes): survive duplicate-id definitions without collab divergence

Release-cycle red-team found two same-id footnoteDefinition nodes (trivially
produced by markdown import [^d]: first / [^d]: second, or paste/duplicate)
caused silent data loss: scan() used a last-wins Map and the sync rebuild
(addToHistory:false, propagated via Yjs, un-undoable) dropped all but the last.

Fix resolves collisions so BOTH survive, with a DETERMINISTIC id scheme so
collaborators converge:
- deriveFootnoteId(originalId, occurrence, taken): the k-th (k>=2) occurrence of
  id X becomes X__k, bumped with a deterministic alpha suffix only against the
  doc's own id set — a pure function of document state. No Math.random/Date.now
  on the sync or import paths (random uuid stays only in setFootnote, where a
  single user originates a brand-new id).
- footnote-sync.resolveCollisions walks refs+defs in document order, re-ids
  duplicate references via setNodeMarkup and pairs them 1:1 with definitions;
  single SYNC_META-tagged transaction, returns null when canonical (terminates).
- Markdown import (footnote.marked) + MCP mirror (collaboration.ts) dedup with
  the same deterministic scheme + marker rewrite; packages/mcp/build regenerated.
- Paste plugin remaps colliding pasted ids against the current doc.

Tests: two independent editors resolving the same duplicate-id doc produce
IDENTICAL ids (the cross-client determinism guard that the random version would
fail); both definitions survive the first edit; import dedup is deterministic.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-20 13:47:10 +03:00
parent 1c83a8ae15
commit ceee2a76ca
9 changed files with 864 additions and 25 deletions

View File

@@ -1,6 +1,7 @@
import { describe, it, expect } from "vitest"; import { describe, it, expect } from "vitest";
import { htmlToMarkdown } from "../markdown/utils/turndown.utils"; import { htmlToMarkdown } from "../markdown/utils/turndown.utils";
import { markdownToHtml } from "../markdown/utils/marked.utils"; import { markdownToHtml } from "../markdown/utils/marked.utils";
import { extractFootnoteDefinitions } from "../markdown/utils/footnote.marked";
// HTML the editor-ext nodes render (sup[data-footnote-ref], section/div). // HTML the editor-ext nodes render (sup[data-footnote-ref], section/div).
const HTML = const HTML =
@@ -53,4 +54,87 @@ describe("footnote markdown round-trip", () => {
expect(html).not.toContain("data-footnotes"); expect(html).not.toContain("data-footnotes");
expect(html).not.toContain("data-footnote-def"); expect(html).not.toContain("data-footnote-def");
}); });
it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => {
// Two definitions share id `d`, and the body has two `[^d]` markers. The
// output must keep BOTH definitions with DISTINCT ids and rewrite the second
// marker so the (reference, definition) pairing stays 1:1.
const md = [
"See here[^d] and there[^d].",
"",
"[^d]: first",
"[^d]: second",
].join("\n");
const { body, section } = extractFootnoteDefinitions(md);
// Pull out the def ids from the section in order.
const defIds = Array.from(
section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
).map((m) => m[1]);
expect(defIds.length).toBe(2);
expect(new Set(defIds).size).toBe(2); // distinct
expect(defIds[0]).toBe("d"); // first definition keeps the id
// Both definition texts survive.
expect(section).toContain("first");
expect(section).toContain("second");
// The body still has two markers, now pointing at the two distinct ids.
const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
(m) => m[1],
);
expect(refIds.length).toBe(2);
expect(refIds.sort()).toEqual(defIds.sort());
});
it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => {
// The derived id must be a pure function of the input markdown so importing
// the same source twice (or via the editor and the MCP mirror) yields
// identical ids — never random/time-based.
const md = [
"See[^d] one[^d] two[^d].",
"",
"[^d]: first",
"[^d]: second",
"[^d]: third",
].join("\n");
const run = () => {
const { body, section } = extractFootnoteDefinitions(md);
const defIds = Array.from(
section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
).map((m) => m[1]);
const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
(m) => m[1],
);
return { defIds, refIds };
};
const a = run();
const b = run();
// Identical across runs (this is what would FAIL on the random-id version).
expect(a.defIds).toEqual(b.defIds);
expect(a.refIds).toEqual(b.refIds);
// Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
expect(a.defIds).toEqual(["d", "d__2", "d__3"]);
expect(a.refIds.sort()).toEqual(a.defIds.sort());
});
it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => {
const md = [
"See here[^d] and there[^d].",
"",
"[^d]: first",
"[^d]: second",
].join("\n");
const html = await markdownToHtml(md);
const defIds = Array.from(
html.matchAll(/data-footnote-def data-id="([^"]+)"/g),
).map((m) => m[1]);
expect(defIds.length).toBe(2);
expect(new Set(defIds).size).toBe(2);
expect(html).toContain("first");
expect(html).toContain("second");
});
}); });

View File

@@ -8,7 +8,7 @@ import {
generateFootnoteId, generateFootnoteId,
} from "./footnote-util"; } from "./footnote-util";
import { footnoteNumberingPlugin } from "./footnote-numbering"; import { footnoteNumberingPlugin } from "./footnote-numbering";
import { footnoteSyncPlugin } from "./footnote-sync"; import { footnoteSyncPlugin, footnotePastePlugin } from "./footnote-sync";
export interface FootnoteReferenceOptions { export interface FootnoteReferenceOptions {
HTMLAttributes: Record<string, any>; HTMLAttributes: Record<string, any>;
@@ -88,6 +88,9 @@ export const FootnoteReference = Node.create<FootnoteReferenceOptions>({
// doc is never mutated. // doc is never mutated.
if (this.options.enableSync !== false) { if (this.options.enableSync !== false) {
plugins.push(footnoteSyncPlugin(this.options.isRemoteTransaction)); plugins.push(footnoteSyncPlugin(this.options.isRemoteTransaction));
// Regenerate colliding footnote ids on paste so a pasted reference+
// definition pair never clobbers/merges with an existing footnote.
plugins.push(footnotePastePlugin());
} }
return plugins; return plugins;
}, },

View File

@@ -1,48 +1,215 @@
import { Plugin, PluginKey, Transaction } from "@tiptap/pm/state"; import { Plugin, PluginKey, Transaction } from "@tiptap/pm/state";
import { Node as ProseMirrorNode, Fragment } from "@tiptap/pm/model"; import { Node as ProseMirrorNode, Fragment, Slice } from "@tiptap/pm/model";
import { import {
FOOTNOTE_DEFINITION_NAME, FOOTNOTE_DEFINITION_NAME,
FOOTNOTE_REFERENCE_NAME, FOOTNOTE_REFERENCE_NAME,
FOOTNOTES_LIST_NAME, FOOTNOTES_LIST_NAME,
deriveFootnoteId,
} from "./footnote-util"; } from "./footnote-util";
export const footnoteSyncPluginKey = new PluginKey("footnoteSync"); export const footnoteSyncPluginKey = new PluginKey("footnoteSync");
const SYNC_META = "footnoteSyncApplied"; const SYNC_META = "footnoteSyncApplied";
interface RefOccurrence {
/** Position of the reference node in the document. */
pos: number;
/** The id the reference currently carries. */
id: string;
node: ProseMirrorNode;
}
interface DefOccurrence {
/** Position of the definition node in the document. */
pos: number;
/** The id the definition currently carries. */
id: string;
node: ProseMirrorNode;
}
interface FootnoteScan { interface FootnoteScan {
/** Reference ids in document order, first occurrence only, de-duplicated. */ /**
referenceIds: string[]; * Every reference occurrence in document order (NOT de-duplicated). Needed so
/** definition id -> node (last occurrence wins, matching scan order). */ * that duplicate ids — which would otherwise be silently collapsed — can be
definitions: Map<string, ProseMirrorNode>; * detected and (together with their definitions) re-id'd instead of dropped.
*/
refOccurrences: RefOccurrence[];
/**
* Every definition occurrence in document order (NOT de-duplicated). The old
* implementation used a last-wins Map here, which is exactly what caused
* silent data loss: two definitions sharing an id collapsed to one.
*/
defOccurrences: DefOccurrence[];
/** Every top-level footnotesList node, in document order. */ /** Every top-level footnotesList node, in document order. */
lists: Array<{ pos: number; node: ProseMirrorNode }>; lists: Array<{ pos: number; node: ProseMirrorNode }>;
} }
function scan(doc: ProseMirrorNode): FootnoteScan { function scan(doc: ProseMirrorNode): FootnoteScan {
const referenceIds: string[] = []; const refOccurrences: RefOccurrence[] = [];
const seenRefs = new Set<string>(); const defOccurrences: DefOccurrence[] = [];
const definitions = new Map<string, ProseMirrorNode>();
const lists: Array<{ pos: number; node: ProseMirrorNode }> = []; const lists: Array<{ pos: number; node: ProseMirrorNode }> = [];
doc.descendants((node, pos) => { doc.descendants((node, pos) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) { if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
const id = node.attrs.id; const id = node.attrs.id;
if (id && !seenRefs.has(id)) { if (id) refOccurrences.push({ pos, id, node });
seenRefs.add(id);
referenceIds.push(id);
}
} }
if (node.type.name === FOOTNOTE_DEFINITION_NAME) { if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
const id = node.attrs.id; const id = node.attrs.id;
if (id) definitions.set(id, node); if (id) defOccurrences.push({ pos, id, node });
} }
if (node.type.name === FOOTNOTES_LIST_NAME) { if (node.type.name === FOOTNOTES_LIST_NAME) {
lists.push({ pos, node }); lists.push({ pos, node });
} }
}); });
return { referenceIds, definitions, lists }; return { refOccurrences, defOccurrences, lists };
}
/**
* Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the
* concrete reference re-id edits that must be applied to the body so the doc no
* longer contains two footnotes sharing a single id.
*
* The overriding invariant is that NO definition is ever dropped here: every
* definition occurrence ends up with a unique id and therefore survives the
* canonical rebuild. Duplicate references are likewise re-id'd (and paired with
* a duplicate definition when one exists) so importing/pasting `[^d]` twice with
* two `[^d]:` definitions yields TWO distinct footnotes rather than one.
*/
interface CollisionPlan {
/**
* Reference ids in document order, de-duplicated AFTER re-id. This is the
* source of truth for definition order/numbering, exactly as before — only
* now collisions have been resolved so it no longer hides duplicates.
*/
referenceIds: string[];
/** id -> definition node, after duplicates were re-id'd. One entry per id. */
definitions: Map<string, ProseMirrorNode>;
/**
* Body reference re-id edits to apply (position of a reference node -> the
* fresh id it must carry). Empty when there are no colliding references.
*/
refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>;
/** True when any collision required a re-id (refs and/or defs). */
changed: boolean;
}
/**
* Resolve duplicate-id collisions among references and definitions WITHOUT ever
* dropping a definition.
*
* Strategy:
* - Walk references in document order. The FIRST reference for an id keeps it.
* Any later reference sharing that id is a duplicate and gets a fresh unique
* id; if a still-unclaimed duplicate definition with the original id exists,
* it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched.
* - Walk definitions in document order. The FIRST definition for an id keeps
* it; later duplicates that were not already claimed by a duplicate reference
* get their own fresh unique id (surviving as a distinct footnote/orphan).
*
* Re-id determinism: every fresh id is DERIVED from document state via
* deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids
* already present) — NEVER random/time-based. Because the sync plugin runs
* identically on every collaborating client, a deterministic re-id is the only
* way they can converge on the SAME ids; a random id (the previous
* implementation) made two clients editing the same duplicate-id document mint
* DIFFERENT ids for the same duplicate, causing permanent Yjs divergence.
*/
function resolveCollisions(scan: FootnoteScan): CollisionPlan {
const definitions = new Map<string, ProseMirrorNode>();
const refReids: Array<{
pos: number;
node: ProseMirrorNode;
newId: string;
}> = [];
const referenceIds: string[] = [];
const seenRefIds = new Set<string>();
let changed = false;
// `taken` is the set of every id that must be avoided when minting a derived
// id: all original reference + definition ids in the document PLUS every id we
// mint during this pass. It is pure document state, so the derivation stays
// deterministic across clients. Per-original occurrence counters make the k-th
// duplicate of `X` deterministically become `X__2`, `X__3`, ...
const taken = new Set<string>();
for (const occ of scan.refOccurrences) taken.add(occ.id);
for (const occ of scan.defOccurrences) taken.add(occ.id);
const occurrenceOf = new Map<string, number>();
// Mint a deterministic unique id for a duplicate of `originalId`. The first
// duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ...
const mintId = (originalId: string): string => {
const next = (occurrenceOf.get(originalId) ?? 1) + 1;
occurrenceOf.set(originalId, next);
const id = deriveFootnoteId(originalId, next, taken);
taken.add(id);
return id;
};
// Bucket definition occurrences by their original id so a duplicate reference
// can claim a matching (as-yet-unclaimed) duplicate definition and re-id the
// pair together. defByOriginalId[id] is consumed front-to-back.
const defByOriginalId = new Map<string, DefOccurrence[]>();
for (const occ of scan.defOccurrences) {
const arr = defByOriginalId.get(occ.id);
if (arr) arr.push(occ);
else defByOriginalId.set(occ.id, [occ]);
}
// The FIRST definition for each id is the canonical keeper of that id.
const claimed = new Set<DefOccurrence>();
for (const ref of scan.refOccurrences) {
if (!seenRefIds.has(ref.id)) {
// First reference with this id keeps it.
seenRefIds.add(ref.id);
referenceIds.push(ref.id);
continue;
}
// Duplicate reference: assign a deterministic derived id. Pair it with the
// next unclaimed duplicate definition (NOT the first keeper) carrying the
// same original id, if one exists, so the (ref, def) pairing is preserved
// 1:1.
const newId = mintId(ref.id);
refReids.push({ pos: ref.pos, node: ref.node, newId });
seenRefIds.add(newId);
referenceIds.push(newId);
changed = true;
const candidates = defByOriginalId.get(ref.id) ?? [];
// Skip the first occurrence (it keeps the original id); pick the first
// duplicate not already claimed.
for (let i = 1; i < candidates.length; i++) {
const cand = candidates[i];
if (!claimed.has(cand)) {
claimed.add(cand);
definitions.set(newId, cand.node);
break;
}
}
}
// Now place every definition under a unique id. The first occurrence of each
// original id keeps it; remaining duplicates either were paired with a
// duplicate reference above (already placed) or get a fresh standalone id.
const seenDefIds = new Set<string>();
for (const occ of scan.defOccurrences) {
if (claimed.has(occ)) continue; // already placed against a duplicate ref id
if (!seenDefIds.has(occ.id)) {
seenDefIds.add(occ.id);
definitions.set(occ.id, occ.node);
} else {
// Duplicate definition with no duplicate reference to pair with: keep it
// with a deterministic derived id so it is NEVER silently dropped. (It
// becomes an orphan and is then subject to the normal orphan policy — but
// only ever because it has no matching reference, never because it
// collided.)
const newId = mintId(occ.id);
definitions.set(newId, occ.node);
changed = true;
}
}
return { referenceIds, definitions, refReids, changed };
} }
/** /**
@@ -78,9 +245,14 @@ function scan(doc: ProseMirrorNode): FootnoteScan {
* ping-pong forever (list moved to end -> trailing paragraph appended -> list * ping-pong forever (list moved to end -> trailing paragraph appended -> list
* no longer last -> moved again ...). * no longer last -> moved again ...).
* *
* Paste id-collision regeneration is left to the paste handler / v2; the common * Duplicate-id collisions (two references and/or two definitions sharing one
* cases (orphans, missing definitions, multiple/empty/misplaced lists) are * id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a
* covered here. * reference+definition pair) are resolved up front by resolveCollisions(): the
* duplicates are re-id'd to fresh unique ids so BOTH survive as distinct
* footnotes. This guarantees the overriding invariant — no footnoteDefinition is
* ever silently deleted by this automatic (addToHistory:false) transaction. A
* definition is only ever removed when it has NO matching reference (orphan
* policy), never because its id collided with another.
*/ */
export function footnoteSyncPlugin( export function footnoteSyncPlugin(
isRemoteTransaction?: (tr: Transaction) => boolean, isRemoteTransaction?: (tr: Transaction) => boolean,
@@ -111,12 +283,33 @@ export function footnoteSyncPlugin(
const info = scan(doc); const info = scan(doc);
// 0) Resolve duplicate-id collisions (two references and/or two
// definitions sharing one id) by re-id'ing duplicates to fresh unique
// ids. This is the critical defense: the old last-wins Map silently
// dropped all but the last definition for a shared id; here EVERY
// definition survives with a unique id, and duplicate references are
// paired with duplicate definitions so two same-id imports/pastes yield
// two distinct footnotes instead of one.
const plan = resolveCollisions(info);
const referenceIds = plan.referenceIds;
// 1) Desired definitions: one per referenced id, in reference order, // 1) Desired definitions: one per referenced id, in reference order,
// reusing existing definition nodes (preserving their content) and // reusing existing definition nodes (preserving their content) and
// synthesizing empty ones for references that lack a definition. // synthesizing empty ones for references that lack a definition.
const desiredDefs: ProseMirrorNode[] = info.referenceIds.map((id) => { // Definitions whose id has no matching reference (true orphans) are
const existing = info.definitions.get(id); // dropped per the existing orphan policy — but a collision is NEVER the
if (existing) return existing; // cause of a drop, because collisions were re-id'd above.
const desiredDefs: ProseMirrorNode[] = referenceIds.map((id) => {
const existing = plan.definitions.get(id);
if (existing) {
// A definition paired to a re-id'd reference keeps its CONTENT but
// must carry the new id. Rewrite the id attr when it differs (cheap
// no-op when it already matches).
if (existing.attrs.id !== id) {
return defType.create({ id }, existing.content);
}
return existing;
}
return defType.create({ id }, paragraphType.create()); return defType.create({ id }, paragraphType.create());
}); });
@@ -129,7 +322,12 @@ export function footnoteSyncPlugin(
node.type === paragraphType && node.content.size === 0; node.type === paragraphType && node.content.size === 0;
let alreadyCanonical = false; let alreadyCanonical = false;
if (!hasRefs) { if (plan.changed) {
// A collision was detected (duplicate ids among refs/defs). The doc must
// be rewritten (re-id'd references + rebuilt list); it is never already
// canonical in this case.
alreadyCanonical = false;
} else if (!hasRefs) {
// Canonical when there is no footnotesList at all. // Canonical when there is no footnotesList at all.
alreadyCanonical = info.lists.length === 0; alreadyCanonical = info.lists.length === 0;
} else if (info.lists.length === 1) { } else if (info.lists.length === 1) {
@@ -158,6 +356,17 @@ export function footnoteSyncPlugin(
// 3) Rebuild: produce exactly ONE transaction that reaches the end-state. // 3) Rebuild: produce exactly ONE transaction that reaches the end-state.
const tr = newState.tr; const tr = newState.tr;
// 3a) Re-id colliding body references FIRST. A footnoteReference is an
// inline atom, so setNodeMarkup changes only its attrs (not its size),
// leaving every other position valid for the list deletions/insert
// that follow.
for (const reid of plan.refReids) {
tr.setNodeMarkup(reid.pos, undefined, {
...reid.node.attrs,
id: reid.newId,
});
}
// Delete every existing footnotesList (from the end so earlier positions // Delete every existing footnotesList (from the end so earlier positions
// stay valid while we mutate). // stay valid while we mutate).
[...info.lists] [...info.lists]
@@ -195,3 +404,101 @@ export function footnoteSyncPlugin(
}, },
}); });
} }
export const footnotePastePluginKey = new PluginKey("footnotePaste");
/**
* Paste id-collision guard. When pasted content carries footnote reference or
* definition ids that ALREADY EXIST in the current document, regenerate those
* ids (consistently across the pasted slice, so a pasted reference and its
* definition keep pointing at each other) BEFORE the slice is inserted.
*
* Without this, pasting a reference+definition pair copied from elsewhere — or
* duplicating one in place — would merge with (or clobber) the existing footnote
* of the same id. The schema-sync plugin already guarantees no definition is
* ever silently deleted after the fact (it re-id's collisions), but regenerating
* at paste time keeps the pasted footnote cleanly separate from the start and
* avoids any transient merge.
*
* Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is
* not present elsewhere is left untouched (so it still resolves to its existing
* definition).
*/
export function footnotePastePlugin(): Plugin {
return new Plugin({
key: footnotePastePluginKey,
props: {
transformPasted(slice, view) {
// Collect ids already present in the current document.
const existing = new Set<string>();
view.state.doc.descendants((node) => {
if (
node.type.name === FOOTNOTE_REFERENCE_NAME ||
node.type.name === FOOTNOTE_DEFINITION_NAME
) {
const id = node.attrs.id;
if (id) existing.add(id);
}
});
if (existing.size === 0) return slice;
// Build a remap (old id -> fresh id) for every COLLIDING id found in the
// pasted slice, shared by references and definitions so a pasted pair
// stays matched. A paste is a distinct local user action (not a
// shared-state convergence point), so determinism is not strictly
// required here — but we derive the new id deterministically anyway
// (deriveFootnoteId against the current doc's id set) for consistency
// with the sync/import paths and to keep Math.random off this code path.
const remap = new Map<string, string>();
const collectColliding = (node: ProseMirrorNode) => {
if (
node.type.name === FOOTNOTE_REFERENCE_NAME ||
node.type.name === FOOTNOTE_DEFINITION_NAME
) {
const id = node.attrs.id;
if (id && existing.has(id) && !remap.has(id)) {
const newId = deriveFootnoteId(id, 2, existing);
remap.set(id, newId);
// Reserve it so a second colliding id deriving to the same base
// bumps instead of clashing.
existing.add(newId);
}
}
node.descendants(collectColliding);
};
slice.content.descendants(collectColliding);
if (remap.size === 0) return slice;
// Rewrite the colliding ids throughout the slice.
const rewrite = (fragment: Fragment): Fragment => {
const nodes: ProseMirrorNode[] = [];
fragment.forEach((node) => {
const isFootnote =
node.type.name === FOOTNOTE_REFERENCE_NAME ||
node.type.name === FOOTNOTE_DEFINITION_NAME;
const newId = isFootnote ? remap.get(node.attrs.id) : undefined;
const newContent = node.content.size
? rewrite(node.content)
: node.content;
if (newId) {
nodes.push(
node.type.create(
{ ...node.attrs, id: newId },
newContent,
node.marks,
),
);
} else if (newContent !== node.content) {
nodes.push(node.copy(newContent));
} else {
nodes.push(node);
}
});
return Fragment.fromArray(nodes);
};
return new Slice(rewrite(slice.content), slice.openStart, slice.openEnd);
},
},
});
}

View File

@@ -43,6 +43,61 @@ export function generateFootnoteId(): string {
); );
} }
/**
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
* an original id `X` during collision resolution. The result is a pure function
* of (`originalId`, `occurrence`, `taken`) so that every collaborating client —
* and every import path — computes the SAME new id for the same input document.
*
* CRITICAL: this MUST NOT use Math.random()/Date.now()/uuid. Two clients that
* each make a local edit on the same duplicate-id document have to converge on
* identical ids; a random id would diverge permanently over Yjs.
*
* Scheme: the base candidate is `${originalId}__${occurrence}` (e.g. `X__2`,
* `X__3`). If that candidate already exists in `taken` (an existing footnote id,
* or one we already minted in this pass), a stable alphabetic suffix is appended
* and bumped — `X__2b`, `X__2c`, ... — until the candidate is unique. `taken` is
* itself part of the document state, so the whole walk stays deterministic.
*
* `taken` is consulted but NOT mutated here; the caller adds the returned id to
* its own seen-set before requesting the next derived id.
*
* NOTE: this implementation is intentionally duplicated in
* packages/mcp/src/lib/collaboration.ts (deriveFootnoteId)
* and MUST stay in sync with it so markdown imported through either path yields
* identical ids.
*/
export function deriveFootnoteId(
originalId: string,
occurrence: number,
taken: Set<string> | ReadonlySet<string>,
): string {
let candidate = `${originalId}__${occurrence}`;
// Deterministic suffix bump: b, c, d, ... then aa, ab, ... if ever exhausted.
let n = 0;
while (taken.has(candidate)) {
n += 1;
candidate = `${originalId}__${occurrence}${suffix(n)}`;
}
return candidate;
}
/**
* Map 1 -> "b", 2 -> "c", ... 25 -> "z", 26 -> "ba", ... (base-25 over b..z,
* skipping "a" so the first bump is visibly distinct from the un-bumped base).
* Purely deterministic.
*/
function suffix(n: number): string {
let out = "";
let x = n;
while (x > 0) {
const rem = (x - 1) % 25;
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
x = Math.floor((x - 1) / 25);
}
return out;
}
/** /**
* Collect every `footnoteReference` id in document order. This is the single * Collect every `footnoteReference` id in document order. This is the single
* source of truth for numbering and ordering — a pure function of the document * source of truth for numbering and ordering — a pure function of the document

View File

@@ -304,6 +304,160 @@ describe("footnote sync plugin (orphans)", () => {
editor.destroy(); editor.destroy();
}); });
it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => {
// Reproduces the verified data-loss bug: two footnoteDefinition nodes share
// id "d", and there are two references with id "d". The OLD code built the
// definitions Map last-wins and emitted exactly one definition for the
// de-duplicated reference, so the very first keystroke's sync transaction
// deleted the whole list and rebuilt it from one definition — silently
// destroying "first" and keeping only "second".
const editor = makeEditor({
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "a" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
{ type: "text", text: "b" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
],
},
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "d" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "first" }] },
],
},
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "d" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "second" }] },
],
},
],
},
],
});
// The first local keystroke fires the sync plugin's appendTransaction.
editor.commands.insertContentAt(1, " ");
const doc = editor.state.doc;
// BOTH definitions survive.
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
const defTexts: string[] = [];
const defIds: string[] = [];
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
defIds.push(node.attrs.id);
defTexts.push(node.textContent);
}
});
// No content was lost: both "first" and "second" are still present.
expect(defTexts.sort()).toEqual(["first", "second"]);
// The colliding ids were made distinct.
expect(new Set(defIds).size).toBe(2);
// Each definition's id matches exactly one reference (1:1 pairing).
const refIds: string[] = [];
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id);
});
expect(refIds.sort()).toEqual(defIds.sort());
editor.destroy();
});
it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => {
// Cross-client determinism guard. Two collaborating clients each see the
// SAME duplicate-id document and each make a local edit. The sync plugin
// runs identically on every client, so it MUST mint the SAME new ids on both
// — otherwise the two clients diverge permanently over Yjs (duplicated
// footnotes). This is exactly the blocker the previous random-id
// (generateFootnoteId / Math.random) implementation caused: it would mint
// DIFFERENT ids on each client and this assertion would fail.
const duplicateDoc = {
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "a" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
{ type: "text", text: "b" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
{ type: "text", text: "c" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
],
},
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "d" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "one" }] },
],
},
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "d" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "two" }] },
],
},
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "d" },
content: [
{
type: "paragraph",
content: [{ type: "text", text: "three" }],
},
],
},
],
},
],
};
const idsAfterLocalEdit = () => {
// A fresh editor instance = an independent "client" running the same
// plugin pipeline on the same starting document.
const editor = makeEditor(structuredClone(duplicateDoc));
editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs
const refIds: string[] = [];
const defIds: string[] = [];
editor.state.doc.descendants((node) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME)
refIds.push(node.attrs.id);
if (node.type.name === FOOTNOTE_DEFINITION_NAME)
defIds.push(node.attrs.id);
});
editor.destroy();
return { refIds, defIds };
};
const clientA = idsAfterLocalEdit();
const clientB = idsAfterLocalEdit();
// Both clients computed IDENTICAL ids (the property that makes Yjs converge).
expect(clientA.refIds).toEqual(clientB.refIds);
expect(clientA.defIds).toEqual(clientB.defIds);
// And the ids are deterministic-derived (not random uuid-style): the keeper
// keeps "d", the duplicates become "d__2", "d__3".
expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"]));
// Every definition survived with a unique id, 1:1 with the references.
expect(clientA.defIds.length).toBe(3);
expect(new Set(clientA.defIds).size).toBe(3);
expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort());
});
it("removes an orphan definition with no matching reference", () => { it("removes an orphan definition with no matching reference", () => {
const editor = makeEditor({ const editor = makeEditor({
type: "doc", type: "doc",

View File

@@ -1,4 +1,5 @@
import { marked } from "marked"; import { marked } from "marked";
import { deriveFootnoteId } from "../../footnote/footnote-util";
/** /**
* Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline. * Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline.
@@ -52,6 +53,10 @@ function escapeAttr(value: string): string {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;"); return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
} }
function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/** /**
* Extract `[^id]: text` definition lines from the markdown body, returning the * Extract `[^id]: text` definition lines from the markdown body, returning the
* cleaned body plus a rendered <section data-footnotes> (empty string when no * cleaned body plus a rendered <section data-footnotes> (empty string when no
@@ -96,6 +101,56 @@ export function extractFootnoteDefinitions(markdown: string): {
return { body: markdown, section: "" }; return { body: markdown, section: "" };
} }
// De-duplicate colliding definition ids. Two definitions sharing an id (e.g.
// `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote
// downstream (the editor's last-wins sync). Rename each colliding id to a
// DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference
// marker so the (reference, definition) pairing stays 1:1. The FIRST
// definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth
// duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]`
// marker. If there are fewer markers than definitions, the surplus definition
// keeps a derived (orphan) id so it is never silently merged away.
//
// The id is derived (deriveFootnoteId), NOT random: importing the same
// markdown through two paths (here and the MCP mirror) must yield identical
// ids, and re-importing the same markdown twice must be stable.
let dedupedBody = bodyLines.join("\n");
// Every original definition id is reserved up front so a derived id can never
// collide with an unrelated original id present in the document.
const taken = new Set<string>(definitions.map((d) => d.id));
const seenDefIds = new Map<string, number>(); // original id -> how many seen
for (const def of definitions) {
const originalId = def.id;
const count = seenDefIds.get(originalId) ?? 0;
seenDefIds.set(originalId, count + 1);
if (count === 0) continue; // first definition keeps its id
// count is the 0-based number of PRIOR occurrences; this is occurrence
// (count + 1), i.e. 2 for the first duplicate, 3 for the next, ...
const newId = deriveFootnoteId(originalId, count + 1, taken);
taken.add(newId);
def.id = newId;
// Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not
// belong to the keeper definition. After a prior duplicate rewrote its
// marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the
// remaining matches are: index 0 = the keeper's marker (left alone), index 1
// = this duplicate's marker. Rewrite index 1.
let occurrence = 0;
let rewritten = false;
const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g");
dedupedBody = dedupedBody.replace(re, (match) => {
const idx = occurrence++;
if (!rewritten && idx === 1) {
rewritten = true;
return `[^${newId}]`;
}
return match;
});
// If there was no second marker (more definitions than references), the
// duplicate simply survives as an orphan with its fresh id — no body change.
}
const defsHtml = definitions const defsHtml = definitions
.map((d) => { .map((d) => {
// Render the definition text as inline markdown so emphasis/links inside // Render the definition text as inline markdown so emphasis/links inside
@@ -109,7 +164,7 @@ export function extractFootnoteDefinitions(markdown: string): {
.join(""); .join("");
return { return {
body: bodyLines.join("\n"), body: dedupedBody,
section: `<section data-footnotes>${defsHtml}</section>`, section: `<section data-footnotes>${defsHtml}</section>`,
}; };
} }

View File

@@ -271,6 +271,44 @@ const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
function escapeFootnoteAttr(value) { function escapeFootnoteAttr(value) {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;"); return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
} }
function escapeFootnoteRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
* an original id `X` during definition dedup.
*
* EXACT MIRROR of editor-ext `deriveFootnoteId`
* (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
* STAY IN SYNC: the same markdown imported through the editor and through this
* MCP path has to produce identical ids, and the sync plugin (which re-ids on
* every collaborating client) relies on the same scheme to converge. NEVER use
* Math.random()/Date.now()/uuid here — a random id would diverge across clients.
*
* Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
* with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
* `taken` (the set of ids already present / already minted — pure doc state).
*/
function deriveFootnoteId(originalId, occurrence, taken) {
let candidate = `${originalId}__${occurrence}`;
let n = 0;
while (taken.has(candidate)) {
n += 1;
candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
}
return candidate;
}
/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
function footnoteSuffix(n) {
let out = "";
let x = n;
while (x > 0) {
const rem = (x - 1) % 25;
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
x = Math.floor((x - 1) / 25);
}
return out;
}
const footnoteRefMarkedExtension = { const footnoteRefMarkedExtension = {
name: "footnoteRef", name: "footnoteRef",
level: "inline", level: "inline",
@@ -319,11 +357,43 @@ function extractFootnotes(markdown) {
} }
if (defs.length === 0) if (defs.length === 0)
return { body: markdown, section: "" }; return { body: markdown, section: "" };
// De-duplicate colliding definition ids (mirror of editor-ext
// extractFootnoteDefinitions). Two definitions sharing an id would otherwise
// collapse into one footnote downstream; rename each colliding id to a
// DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
// marker so the (reference, definition) pairing stays 1:1. Determinism lets
// the same markdown imported here and via the editor produce identical ids.
let dedupedBody = bodyLines.join("\n");
const taken = new Set(defs.map((d) => d.id));
const seenDefIds = new Map();
for (const def of defs) {
const originalId = def.id;
const count = seenDefIds.get(originalId) ?? 0;
seenDefIds.set(originalId, count + 1);
if (count === 0)
continue; // first definition keeps its id
const newId = deriveFootnoteId(originalId, count + 1, taken);
taken.add(newId);
def.id = newId;
// Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
// index 1 = this duplicate's marker. Rewrite index 1.
let occurrence = 0;
let rewritten = false;
const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
dedupedBody = dedupedBody.replace(re, (match) => {
const idx = occurrence++;
if (!rewritten && idx === 1) {
rewritten = true;
return `[^${newId}]`;
}
return match;
});
}
const inner = defs const inner = defs
.map((d) => `<div data-footnote-def data-id="${escapeFootnoteAttr(d.id)}"><p>${marked.parseInline(d.text || "")}</p></div>`) .map((d) => `<div data-footnote-def data-id="${escapeFootnoteAttr(d.id)}"><p>${marked.parseInline(d.text || "")}</p></div>`)
.join(""); .join("");
return { return {
body: bodyLines.join("\n"), body: dedupedBody,
section: `<section data-footnotes>${inner}</section>`, section: `<section data-footnotes>${inner}</section>`,
}; };
} }

View File

@@ -306,6 +306,51 @@ function escapeFootnoteAttr(value: string): string {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;"); return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
} }
function escapeFootnoteRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
/**
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
* an original id `X` during definition dedup.
*
* EXACT MIRROR of editor-ext `deriveFootnoteId`
* (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
* STAY IN SYNC: the same markdown imported through the editor and through this
* MCP path has to produce identical ids, and the sync plugin (which re-ids on
* every collaborating client) relies on the same scheme to converge. NEVER use
* Math.random()/Date.now()/uuid here — a random id would diverge across clients.
*
* Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
* with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
* `taken` (the set of ids already present / already minted — pure doc state).
*/
function deriveFootnoteId(
originalId: string,
occurrence: number,
taken: Set<string>,
): string {
let candidate = `${originalId}__${occurrence}`;
let n = 0;
while (taken.has(candidate)) {
n += 1;
candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
}
return candidate;
}
/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
function footnoteSuffix(n: number): string {
let out = "";
let x = n;
while (x > 0) {
const rem = (x - 1) % 25;
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
x = Math.floor((x - 1) / 25);
}
return out;
}
const footnoteRefMarkedExtension = { const footnoteRefMarkedExtension = {
name: "footnoteRef", name: "footnoteRef",
level: "inline" as const, level: "inline" as const,
@@ -356,6 +401,39 @@ function extractFootnotes(markdown: string): {
else bodyLines.push(line); else bodyLines.push(line);
} }
if (defs.length === 0) return { body: markdown, section: "" }; if (defs.length === 0) return { body: markdown, section: "" };
// De-duplicate colliding definition ids (mirror of editor-ext
// extractFootnoteDefinitions). Two definitions sharing an id would otherwise
// collapse into one footnote downstream; rename each colliding id to a
// DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
// marker so the (reference, definition) pairing stays 1:1. Determinism lets
// the same markdown imported here and via the editor produce identical ids.
let dedupedBody = bodyLines.join("\n");
const taken = new Set<string>(defs.map((d) => d.id));
const seenDefIds = new Map<string, number>();
for (const def of defs) {
const originalId = def.id;
const count = seenDefIds.get(originalId) ?? 0;
seenDefIds.set(originalId, count + 1);
if (count === 0) continue; // first definition keeps its id
const newId = deriveFootnoteId(originalId, count + 1, taken);
taken.add(newId);
def.id = newId;
// Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
// index 1 = this duplicate's marker. Rewrite index 1.
let occurrence = 0;
let rewritten = false;
const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
dedupedBody = dedupedBody.replace(re, (match) => {
const idx = occurrence++;
if (!rewritten && idx === 1) {
rewritten = true;
return `[^${newId}]`;
}
return match;
});
}
const inner = defs const inner = defs
.map( .map(
(d) => (d) =>
@@ -365,7 +443,7 @@ function extractFootnotes(markdown: string): {
) )
.join(""); .join("");
return { return {
body: bodyLines.join("\n"), body: dedupedBody,
section: `<section data-footnotes>${inner}</section>`, section: `<section data-footnotes>${inner}</section>`,
}; };
} }

View File

@@ -90,6 +90,39 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
assert.match(md2, /\[\^fn2\]: Second note\./); assert.match(md2, /\[\^fn2\]: Second note\./);
}); });
test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => {
// The MCP import must derive duplicate ids deterministically (NOT random) so
// the same markdown imported here and via the editor produces identical ids,
// and re-importing is stable. This is the test that would FAIL on the old
// Math.random()/Date.now() implementation.
const md = [
"See[^d] one[^d] two[^d].",
"",
"[^d]: first",
"[^d]: second",
"[^d]: third",
].join("\n");
const idsOf = async () => {
const json = await markdownToProseMirror(md);
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
return { refs, defs };
};
const a = await idsOf();
const b = await idsOf();
// Identical across runs.
assert.deepEqual(a.refs, b.refs);
assert.deepEqual(a.defs, b.defs);
// Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]);
// 1:1 reference <-> definition pairing, all distinct.
assert.equal(new Set(a.defs).size, 3);
assert.deepEqual([...a.refs].sort(), [...a.defs].sort());
});
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => { test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
// Markdown that DOCUMENTS footnote syntax inside a code fence. The example // Markdown that DOCUMENTS footnote syntax inside a code fence. The example
// definition line must be preserved verbatim inside the code block and not // definition line must be preserved verbatim inside the code block and not