fix(footnotes): survive duplicate-id definitions without collab divergence
Release-cycle red-team found two same-id footnoteDefinition nodes (trivially produced by markdown import [^d]: first / [^d]: second, or paste/duplicate) caused silent data loss: scan() used a last-wins Map and the sync rebuild (addToHistory:false, propagated via Yjs, un-undoable) dropped all but the last. Fix resolves collisions so BOTH survive, with a DETERMINISTIC id scheme so collaborators converge: - deriveFootnoteId(originalId, occurrence, taken): the k-th (k>=2) occurrence of id X becomes X__k, bumped with a deterministic alpha suffix only against the doc's own id set — a pure function of document state. No Math.random/Date.now on the sync or import paths (random uuid stays only in setFootnote, where a single user originates a brand-new id). - footnote-sync.resolveCollisions walks refs+defs in document order, re-ids duplicate references via setNodeMarkup and pairs them 1:1 with definitions; single SYNC_META-tagged transaction, returns null when canonical (terminates). - Markdown import (footnote.marked) + MCP mirror (collaboration.ts) dedup with the same deterministic scheme + marker rewrite; packages/mcp/build regenerated. - Paste plugin remaps colliding pasted ids against the current doc. Tests: two independent editors resolving the same duplicate-id doc produce IDENTICAL ids (the cross-client determinism guard that the random version would fail); both definitions survive the first edit; import dedup is deterministic. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { htmlToMarkdown } from "../markdown/utils/turndown.utils";
|
||||
import { markdownToHtml } from "../markdown/utils/marked.utils";
|
||||
import { extractFootnoteDefinitions } from "../markdown/utils/footnote.marked";
|
||||
|
||||
// HTML the editor-ext nodes render (sup[data-footnote-ref], section/div).
|
||||
const HTML =
|
||||
@@ -53,4 +54,87 @@ describe("footnote markdown round-trip", () => {
|
||||
expect(html).not.toContain("data-footnotes");
|
||||
expect(html).not.toContain("data-footnote-def");
|
||||
});
|
||||
|
||||
it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => {
|
||||
// Two definitions share id `d`, and the body has two `[^d]` markers. The
|
||||
// output must keep BOTH definitions with DISTINCT ids and rewrite the second
|
||||
// marker so the (reference, definition) pairing stays 1:1.
|
||||
const md = [
|
||||
"See here[^d] and there[^d].",
|
||||
"",
|
||||
"[^d]: first",
|
||||
"[^d]: second",
|
||||
].join("\n");
|
||||
|
||||
const { body, section } = extractFootnoteDefinitions(md);
|
||||
|
||||
// Pull out the def ids from the section in order.
|
||||
const defIds = Array.from(
|
||||
section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
||||
).map((m) => m[1]);
|
||||
expect(defIds.length).toBe(2);
|
||||
expect(new Set(defIds).size).toBe(2); // distinct
|
||||
expect(defIds[0]).toBe("d"); // first definition keeps the id
|
||||
|
||||
// Both definition texts survive.
|
||||
expect(section).toContain("first");
|
||||
expect(section).toContain("second");
|
||||
|
||||
// The body still has two markers, now pointing at the two distinct ids.
|
||||
const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
|
||||
(m) => m[1],
|
||||
);
|
||||
expect(refIds.length).toBe(2);
|
||||
expect(refIds.sort()).toEqual(defIds.sort());
|
||||
});
|
||||
|
||||
it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => {
|
||||
// The derived id must be a pure function of the input markdown so importing
|
||||
// the same source twice (or via the editor and the MCP mirror) yields
|
||||
// identical ids — never random/time-based.
|
||||
const md = [
|
||||
"See[^d] one[^d] two[^d].",
|
||||
"",
|
||||
"[^d]: first",
|
||||
"[^d]: second",
|
||||
"[^d]: third",
|
||||
].join("\n");
|
||||
|
||||
const run = () => {
|
||||
const { body, section } = extractFootnoteDefinitions(md);
|
||||
const defIds = Array.from(
|
||||
section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
||||
).map((m) => m[1]);
|
||||
const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
|
||||
(m) => m[1],
|
||||
);
|
||||
return { defIds, refIds };
|
||||
};
|
||||
|
||||
const a = run();
|
||||
const b = run();
|
||||
// Identical across runs (this is what would FAIL on the random-id version).
|
||||
expect(a.defIds).toEqual(b.defIds);
|
||||
expect(a.refIds).toEqual(b.refIds);
|
||||
// Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
|
||||
expect(a.defIds).toEqual(["d", "d__2", "d__3"]);
|
||||
expect(a.refIds.sort()).toEqual(a.defIds.sort());
|
||||
});
|
||||
|
||||
it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => {
|
||||
const md = [
|
||||
"See here[^d] and there[^d].",
|
||||
"",
|
||||
"[^d]: first",
|
||||
"[^d]: second",
|
||||
].join("\n");
|
||||
const html = await markdownToHtml(md);
|
||||
const defIds = Array.from(
|
||||
html.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
||||
).map((m) => m[1]);
|
||||
expect(defIds.length).toBe(2);
|
||||
expect(new Set(defIds).size).toBe(2);
|
||||
expect(html).toContain("first");
|
||||
expect(html).toContain("second");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
generateFootnoteId,
|
||||
} from "./footnote-util";
|
||||
import { footnoteNumberingPlugin } from "./footnote-numbering";
|
||||
import { footnoteSyncPlugin } from "./footnote-sync";
|
||||
import { footnoteSyncPlugin, footnotePastePlugin } from "./footnote-sync";
|
||||
|
||||
export interface FootnoteReferenceOptions {
|
||||
HTMLAttributes: Record<string, any>;
|
||||
@@ -88,6 +88,9 @@ export const FootnoteReference = Node.create<FootnoteReferenceOptions>({
|
||||
// doc is never mutated.
|
||||
if (this.options.enableSync !== false) {
|
||||
plugins.push(footnoteSyncPlugin(this.options.isRemoteTransaction));
|
||||
// Regenerate colliding footnote ids on paste so a pasted reference+
|
||||
// definition pair never clobbers/merges with an existing footnote.
|
||||
plugins.push(footnotePastePlugin());
|
||||
}
|
||||
return plugins;
|
||||
},
|
||||
|
||||
@@ -1,48 +1,215 @@
|
||||
import { Plugin, PluginKey, Transaction } from "@tiptap/pm/state";
|
||||
import { Node as ProseMirrorNode, Fragment } from "@tiptap/pm/model";
|
||||
import { Node as ProseMirrorNode, Fragment, Slice } from "@tiptap/pm/model";
|
||||
import {
|
||||
FOOTNOTE_DEFINITION_NAME,
|
||||
FOOTNOTE_REFERENCE_NAME,
|
||||
FOOTNOTES_LIST_NAME,
|
||||
deriveFootnoteId,
|
||||
} from "./footnote-util";
|
||||
|
||||
export const footnoteSyncPluginKey = new PluginKey("footnoteSync");
|
||||
|
||||
const SYNC_META = "footnoteSyncApplied";
|
||||
|
||||
interface RefOccurrence {
|
||||
/** Position of the reference node in the document. */
|
||||
pos: number;
|
||||
/** The id the reference currently carries. */
|
||||
id: string;
|
||||
node: ProseMirrorNode;
|
||||
}
|
||||
|
||||
interface DefOccurrence {
|
||||
/** Position of the definition node in the document. */
|
||||
pos: number;
|
||||
/** The id the definition currently carries. */
|
||||
id: string;
|
||||
node: ProseMirrorNode;
|
||||
}
|
||||
|
||||
interface FootnoteScan {
|
||||
/** Reference ids in document order, first occurrence only, de-duplicated. */
|
||||
referenceIds: string[];
|
||||
/** definition id -> node (last occurrence wins, matching scan order). */
|
||||
definitions: Map<string, ProseMirrorNode>;
|
||||
/**
|
||||
* Every reference occurrence in document order (NOT de-duplicated). Needed so
|
||||
* that duplicate ids — which would otherwise be silently collapsed — can be
|
||||
* detected and (together with their definitions) re-id'd instead of dropped.
|
||||
*/
|
||||
refOccurrences: RefOccurrence[];
|
||||
/**
|
||||
* Every definition occurrence in document order (NOT de-duplicated). The old
|
||||
* implementation used a last-wins Map here, which is exactly what caused
|
||||
* silent data loss: two definitions sharing an id collapsed to one.
|
||||
*/
|
||||
defOccurrences: DefOccurrence[];
|
||||
/** Every top-level footnotesList node, in document order. */
|
||||
lists: Array<{ pos: number; node: ProseMirrorNode }>;
|
||||
}
|
||||
|
||||
function scan(doc: ProseMirrorNode): FootnoteScan {
|
||||
const referenceIds: string[] = [];
|
||||
const seenRefs = new Set<string>();
|
||||
const definitions = new Map<string, ProseMirrorNode>();
|
||||
const refOccurrences: RefOccurrence[] = [];
|
||||
const defOccurrences: DefOccurrence[] = [];
|
||||
const lists: Array<{ pos: number; node: ProseMirrorNode }> = [];
|
||||
|
||||
doc.descendants((node, pos) => {
|
||||
if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
|
||||
const id = node.attrs.id;
|
||||
if (id && !seenRefs.has(id)) {
|
||||
seenRefs.add(id);
|
||||
referenceIds.push(id);
|
||||
}
|
||||
if (id) refOccurrences.push({ pos, id, node });
|
||||
}
|
||||
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
|
||||
const id = node.attrs.id;
|
||||
if (id) definitions.set(id, node);
|
||||
if (id) defOccurrences.push({ pos, id, node });
|
||||
}
|
||||
if (node.type.name === FOOTNOTES_LIST_NAME) {
|
||||
lists.push({ pos, node });
|
||||
}
|
||||
});
|
||||
|
||||
return { referenceIds, definitions, lists };
|
||||
return { refOccurrences, defOccurrences, lists };
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the
|
||||
* concrete reference re-id edits that must be applied to the body so the doc no
|
||||
* longer contains two footnotes sharing a single id.
|
||||
*
|
||||
* The overriding invariant is that NO definition is ever dropped here: every
|
||||
* definition occurrence ends up with a unique id and therefore survives the
|
||||
* canonical rebuild. Duplicate references are likewise re-id'd (and paired with
|
||||
* a duplicate definition when one exists) so importing/pasting `[^d]` twice with
|
||||
* two `[^d]:` definitions yields TWO distinct footnotes rather than one.
|
||||
*/
|
||||
interface CollisionPlan {
|
||||
/**
|
||||
* Reference ids in document order, de-duplicated AFTER re-id. This is the
|
||||
* source of truth for definition order/numbering, exactly as before — only
|
||||
* now collisions have been resolved so it no longer hides duplicates.
|
||||
*/
|
||||
referenceIds: string[];
|
||||
/** id -> definition node, after duplicates were re-id'd. One entry per id. */
|
||||
definitions: Map<string, ProseMirrorNode>;
|
||||
/**
|
||||
* Body reference re-id edits to apply (position of a reference node -> the
|
||||
* fresh id it must carry). Empty when there are no colliding references.
|
||||
*/
|
||||
refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>;
|
||||
/** True when any collision required a re-id (refs and/or defs). */
|
||||
changed: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve duplicate-id collisions among references and definitions WITHOUT ever
|
||||
* dropping a definition.
|
||||
*
|
||||
* Strategy:
|
||||
* - Walk references in document order. The FIRST reference for an id keeps it.
|
||||
* Any later reference sharing that id is a duplicate and gets a fresh unique
|
||||
* id; if a still-unclaimed duplicate definition with the original id exists,
|
||||
* it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched.
|
||||
* - Walk definitions in document order. The FIRST definition for an id keeps
|
||||
* it; later duplicates that were not already claimed by a duplicate reference
|
||||
* get their own fresh unique id (surviving as a distinct footnote/orphan).
|
||||
*
|
||||
* Re-id determinism: every fresh id is DERIVED from document state via
|
||||
* deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids
|
||||
* already present) — NEVER random/time-based. Because the sync plugin runs
|
||||
* identically on every collaborating client, a deterministic re-id is the only
|
||||
* way they can converge on the SAME ids; a random id (the previous
|
||||
* implementation) made two clients editing the same duplicate-id document mint
|
||||
* DIFFERENT ids for the same duplicate, causing permanent Yjs divergence.
|
||||
*/
|
||||
function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
||||
const definitions = new Map<string, ProseMirrorNode>();
|
||||
const refReids: Array<{
|
||||
pos: number;
|
||||
node: ProseMirrorNode;
|
||||
newId: string;
|
||||
}> = [];
|
||||
const referenceIds: string[] = [];
|
||||
const seenRefIds = new Set<string>();
|
||||
let changed = false;
|
||||
|
||||
// `taken` is the set of every id that must be avoided when minting a derived
|
||||
// id: all original reference + definition ids in the document PLUS every id we
|
||||
// mint during this pass. It is pure document state, so the derivation stays
|
||||
// deterministic across clients. Per-original occurrence counters make the k-th
|
||||
// duplicate of `X` deterministically become `X__2`, `X__3`, ...
|
||||
const taken = new Set<string>();
|
||||
for (const occ of scan.refOccurrences) taken.add(occ.id);
|
||||
for (const occ of scan.defOccurrences) taken.add(occ.id);
|
||||
const occurrenceOf = new Map<string, number>();
|
||||
// Mint a deterministic unique id for a duplicate of `originalId`. The first
|
||||
// duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ...
|
||||
const mintId = (originalId: string): string => {
|
||||
const next = (occurrenceOf.get(originalId) ?? 1) + 1;
|
||||
occurrenceOf.set(originalId, next);
|
||||
const id = deriveFootnoteId(originalId, next, taken);
|
||||
taken.add(id);
|
||||
return id;
|
||||
};
|
||||
|
||||
// Bucket definition occurrences by their original id so a duplicate reference
|
||||
// can claim a matching (as-yet-unclaimed) duplicate definition and re-id the
|
||||
// pair together. defByOriginalId[id] is consumed front-to-back.
|
||||
const defByOriginalId = new Map<string, DefOccurrence[]>();
|
||||
for (const occ of scan.defOccurrences) {
|
||||
const arr = defByOriginalId.get(occ.id);
|
||||
if (arr) arr.push(occ);
|
||||
else defByOriginalId.set(occ.id, [occ]);
|
||||
}
|
||||
// The FIRST definition for each id is the canonical keeper of that id.
|
||||
const claimed = new Set<DefOccurrence>();
|
||||
|
||||
for (const ref of scan.refOccurrences) {
|
||||
if (!seenRefIds.has(ref.id)) {
|
||||
// First reference with this id keeps it.
|
||||
seenRefIds.add(ref.id);
|
||||
referenceIds.push(ref.id);
|
||||
continue;
|
||||
}
|
||||
// Duplicate reference: assign a deterministic derived id. Pair it with the
|
||||
// next unclaimed duplicate definition (NOT the first keeper) carrying the
|
||||
// same original id, if one exists, so the (ref, def) pairing is preserved
|
||||
// 1:1.
|
||||
const newId = mintId(ref.id);
|
||||
refReids.push({ pos: ref.pos, node: ref.node, newId });
|
||||
seenRefIds.add(newId);
|
||||
referenceIds.push(newId);
|
||||
changed = true;
|
||||
|
||||
const candidates = defByOriginalId.get(ref.id) ?? [];
|
||||
// Skip the first occurrence (it keeps the original id); pick the first
|
||||
// duplicate not already claimed.
|
||||
for (let i = 1; i < candidates.length; i++) {
|
||||
const cand = candidates[i];
|
||||
if (!claimed.has(cand)) {
|
||||
claimed.add(cand);
|
||||
definitions.set(newId, cand.node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now place every definition under a unique id. The first occurrence of each
|
||||
// original id keeps it; remaining duplicates either were paired with a
|
||||
// duplicate reference above (already placed) or get a fresh standalone id.
|
||||
const seenDefIds = new Set<string>();
|
||||
for (const occ of scan.defOccurrences) {
|
||||
if (claimed.has(occ)) continue; // already placed against a duplicate ref id
|
||||
if (!seenDefIds.has(occ.id)) {
|
||||
seenDefIds.add(occ.id);
|
||||
definitions.set(occ.id, occ.node);
|
||||
} else {
|
||||
// Duplicate definition with no duplicate reference to pair with: keep it
|
||||
// with a deterministic derived id so it is NEVER silently dropped. (It
|
||||
// becomes an orphan and is then subject to the normal orphan policy — but
|
||||
// only ever because it has no matching reference, never because it
|
||||
// collided.)
|
||||
const newId = mintId(occ.id);
|
||||
definitions.set(newId, occ.node);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return { referenceIds, definitions, refReids, changed };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -78,9 +245,14 @@ function scan(doc: ProseMirrorNode): FootnoteScan {
|
||||
* ping-pong forever (list moved to end -> trailing paragraph appended -> list
|
||||
* no longer last -> moved again ...).
|
||||
*
|
||||
* Paste id-collision regeneration is left to the paste handler / v2; the common
|
||||
* cases (orphans, missing definitions, multiple/empty/misplaced lists) are
|
||||
* covered here.
|
||||
* Duplicate-id collisions (two references and/or two definitions sharing one
|
||||
* id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a
|
||||
* reference+definition pair) are resolved up front by resolveCollisions(): the
|
||||
* duplicates are re-id'd to fresh unique ids so BOTH survive as distinct
|
||||
* footnotes. This guarantees the overriding invariant — no footnoteDefinition is
|
||||
* ever silently deleted by this automatic (addToHistory:false) transaction. A
|
||||
* definition is only ever removed when it has NO matching reference (orphan
|
||||
* policy), never because its id collided with another.
|
||||
*/
|
||||
export function footnoteSyncPlugin(
|
||||
isRemoteTransaction?: (tr: Transaction) => boolean,
|
||||
@@ -111,12 +283,33 @@ export function footnoteSyncPlugin(
|
||||
|
||||
const info = scan(doc);
|
||||
|
||||
// 0) Resolve duplicate-id collisions (two references and/or two
|
||||
// definitions sharing one id) by re-id'ing duplicates to fresh unique
|
||||
// ids. This is the critical defense: the old last-wins Map silently
|
||||
// dropped all but the last definition for a shared id; here EVERY
|
||||
// definition survives with a unique id, and duplicate references are
|
||||
// paired with duplicate definitions so two same-id imports/pastes yield
|
||||
// two distinct footnotes instead of one.
|
||||
const plan = resolveCollisions(info);
|
||||
const referenceIds = plan.referenceIds;
|
||||
|
||||
// 1) Desired definitions: one per referenced id, in reference order,
|
||||
// reusing existing definition nodes (preserving their content) and
|
||||
// synthesizing empty ones for references that lack a definition.
|
||||
const desiredDefs: ProseMirrorNode[] = info.referenceIds.map((id) => {
|
||||
const existing = info.definitions.get(id);
|
||||
if (existing) return existing;
|
||||
// Definitions whose id has no matching reference (true orphans) are
|
||||
// dropped per the existing orphan policy — but a collision is NEVER the
|
||||
// cause of a drop, because collisions were re-id'd above.
|
||||
const desiredDefs: ProseMirrorNode[] = referenceIds.map((id) => {
|
||||
const existing = plan.definitions.get(id);
|
||||
if (existing) {
|
||||
// A definition paired to a re-id'd reference keeps its CONTENT but
|
||||
// must carry the new id. Rewrite the id attr when it differs (cheap
|
||||
// no-op when it already matches).
|
||||
if (existing.attrs.id !== id) {
|
||||
return defType.create({ id }, existing.content);
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
return defType.create({ id }, paragraphType.create());
|
||||
});
|
||||
|
||||
@@ -129,7 +322,12 @@ export function footnoteSyncPlugin(
|
||||
node.type === paragraphType && node.content.size === 0;
|
||||
|
||||
let alreadyCanonical = false;
|
||||
if (!hasRefs) {
|
||||
if (plan.changed) {
|
||||
// A collision was detected (duplicate ids among refs/defs). The doc must
|
||||
// be rewritten (re-id'd references + rebuilt list); it is never already
|
||||
// canonical in this case.
|
||||
alreadyCanonical = false;
|
||||
} else if (!hasRefs) {
|
||||
// Canonical when there is no footnotesList at all.
|
||||
alreadyCanonical = info.lists.length === 0;
|
||||
} else if (info.lists.length === 1) {
|
||||
@@ -158,6 +356,17 @@ export function footnoteSyncPlugin(
|
||||
// 3) Rebuild: produce exactly ONE transaction that reaches the end-state.
|
||||
const tr = newState.tr;
|
||||
|
||||
// 3a) Re-id colliding body references FIRST. A footnoteReference is an
|
||||
// inline atom, so setNodeMarkup changes only its attrs (not its size),
|
||||
// leaving every other position valid for the list deletions/insert
|
||||
// that follow.
|
||||
for (const reid of plan.refReids) {
|
||||
tr.setNodeMarkup(reid.pos, undefined, {
|
||||
...reid.node.attrs,
|
||||
id: reid.newId,
|
||||
});
|
||||
}
|
||||
|
||||
// Delete every existing footnotesList (from the end so earlier positions
|
||||
// stay valid while we mutate).
|
||||
[...info.lists]
|
||||
@@ -195,3 +404,101 @@ export function footnoteSyncPlugin(
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export const footnotePastePluginKey = new PluginKey("footnotePaste");
|
||||
|
||||
/**
|
||||
* Paste id-collision guard. When pasted content carries footnote reference or
|
||||
* definition ids that ALREADY EXIST in the current document, regenerate those
|
||||
* ids (consistently across the pasted slice, so a pasted reference and its
|
||||
* definition keep pointing at each other) BEFORE the slice is inserted.
|
||||
*
|
||||
* Without this, pasting a reference+definition pair copied from elsewhere — or
|
||||
* duplicating one in place — would merge with (or clobber) the existing footnote
|
||||
* of the same id. The schema-sync plugin already guarantees no definition is
|
||||
* ever silently deleted after the fact (it re-id's collisions), but regenerating
|
||||
* at paste time keeps the pasted footnote cleanly separate from the start and
|
||||
* avoids any transient merge.
|
||||
*
|
||||
* Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is
|
||||
* not present elsewhere is left untouched (so it still resolves to its existing
|
||||
* definition).
|
||||
*/
|
||||
export function footnotePastePlugin(): Plugin {
|
||||
return new Plugin({
|
||||
key: footnotePastePluginKey,
|
||||
props: {
|
||||
transformPasted(slice, view) {
|
||||
// Collect ids already present in the current document.
|
||||
const existing = new Set<string>();
|
||||
view.state.doc.descendants((node) => {
|
||||
if (
|
||||
node.type.name === FOOTNOTE_REFERENCE_NAME ||
|
||||
node.type.name === FOOTNOTE_DEFINITION_NAME
|
||||
) {
|
||||
const id = node.attrs.id;
|
||||
if (id) existing.add(id);
|
||||
}
|
||||
});
|
||||
if (existing.size === 0) return slice;
|
||||
|
||||
// Build a remap (old id -> fresh id) for every COLLIDING id found in the
|
||||
// pasted slice, shared by references and definitions so a pasted pair
|
||||
// stays matched. A paste is a distinct local user action (not a
|
||||
// shared-state convergence point), so determinism is not strictly
|
||||
// required here — but we derive the new id deterministically anyway
|
||||
// (deriveFootnoteId against the current doc's id set) for consistency
|
||||
// with the sync/import paths and to keep Math.random off this code path.
|
||||
const remap = new Map<string, string>();
|
||||
const collectColliding = (node: ProseMirrorNode) => {
|
||||
if (
|
||||
node.type.name === FOOTNOTE_REFERENCE_NAME ||
|
||||
node.type.name === FOOTNOTE_DEFINITION_NAME
|
||||
) {
|
||||
const id = node.attrs.id;
|
||||
if (id && existing.has(id) && !remap.has(id)) {
|
||||
const newId = deriveFootnoteId(id, 2, existing);
|
||||
remap.set(id, newId);
|
||||
// Reserve it so a second colliding id deriving to the same base
|
||||
// bumps instead of clashing.
|
||||
existing.add(newId);
|
||||
}
|
||||
}
|
||||
node.descendants(collectColliding);
|
||||
};
|
||||
slice.content.descendants(collectColliding);
|
||||
if (remap.size === 0) return slice;
|
||||
|
||||
// Rewrite the colliding ids throughout the slice.
|
||||
const rewrite = (fragment: Fragment): Fragment => {
|
||||
const nodes: ProseMirrorNode[] = [];
|
||||
fragment.forEach((node) => {
|
||||
const isFootnote =
|
||||
node.type.name === FOOTNOTE_REFERENCE_NAME ||
|
||||
node.type.name === FOOTNOTE_DEFINITION_NAME;
|
||||
const newId = isFootnote ? remap.get(node.attrs.id) : undefined;
|
||||
const newContent = node.content.size
|
||||
? rewrite(node.content)
|
||||
: node.content;
|
||||
if (newId) {
|
||||
nodes.push(
|
||||
node.type.create(
|
||||
{ ...node.attrs, id: newId },
|
||||
newContent,
|
||||
node.marks,
|
||||
),
|
||||
);
|
||||
} else if (newContent !== node.content) {
|
||||
nodes.push(node.copy(newContent));
|
||||
} else {
|
||||
nodes.push(node);
|
||||
}
|
||||
});
|
||||
return Fragment.fromArray(nodes);
|
||||
};
|
||||
|
||||
return new Slice(rewrite(slice.content), slice.openStart, slice.openEnd);
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
@@ -43,6 +43,61 @@ export function generateFootnoteId(): string {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
|
||||
* an original id `X` during collision resolution. The result is a pure function
|
||||
* of (`originalId`, `occurrence`, `taken`) so that every collaborating client —
|
||||
* and every import path — computes the SAME new id for the same input document.
|
||||
*
|
||||
* CRITICAL: this MUST NOT use Math.random()/Date.now()/uuid. Two clients that
|
||||
* each make a local edit on the same duplicate-id document have to converge on
|
||||
* identical ids; a random id would diverge permanently over Yjs.
|
||||
*
|
||||
* Scheme: the base candidate is `${originalId}__${occurrence}` (e.g. `X__2`,
|
||||
* `X__3`). If that candidate already exists in `taken` (an existing footnote id,
|
||||
* or one we already minted in this pass), a stable alphabetic suffix is appended
|
||||
* and bumped — `X__2b`, `X__2c`, ... — until the candidate is unique. `taken` is
|
||||
* itself part of the document state, so the whole walk stays deterministic.
|
||||
*
|
||||
* `taken` is consulted but NOT mutated here; the caller adds the returned id to
|
||||
* its own seen-set before requesting the next derived id.
|
||||
*
|
||||
* NOTE: this implementation is intentionally duplicated in
|
||||
* packages/mcp/src/lib/collaboration.ts (deriveFootnoteId)
|
||||
* and MUST stay in sync with it so markdown imported through either path yields
|
||||
* identical ids.
|
||||
*/
|
||||
export function deriveFootnoteId(
|
||||
originalId: string,
|
||||
occurrence: number,
|
||||
taken: Set<string> | ReadonlySet<string>,
|
||||
): string {
|
||||
let candidate = `${originalId}__${occurrence}`;
|
||||
// Deterministic suffix bump: b, c, d, ... then aa, ab, ... if ever exhausted.
|
||||
let n = 0;
|
||||
while (taken.has(candidate)) {
|
||||
n += 1;
|
||||
candidate = `${originalId}__${occurrence}${suffix(n)}`;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Map 1 -> "b", 2 -> "c", ... 25 -> "z", 26 -> "ba", ... (base-25 over b..z,
|
||||
* skipping "a" so the first bump is visibly distinct from the un-bumped base).
|
||||
* Purely deterministic.
|
||||
*/
|
||||
function suffix(n: number): string {
|
||||
let out = "";
|
||||
let x = n;
|
||||
while (x > 0) {
|
||||
const rem = (x - 1) % 25;
|
||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
||||
x = Math.floor((x - 1) / 25);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect every `footnoteReference` id in document order. This is the single
|
||||
* source of truth for numbering and ordering — a pure function of the document
|
||||
|
||||
@@ -304,6 +304,160 @@ describe("footnote sync plugin (orphans)", () => {
|
||||
editor.destroy();
|
||||
});
|
||||
|
||||
it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => {
|
||||
// Reproduces the verified data-loss bug: two footnoteDefinition nodes share
|
||||
// id "d", and there are two references with id "d". The OLD code built the
|
||||
// definitions Map last-wins and emitted exactly one definition for the
|
||||
// de-duplicated reference, so the very first keystroke's sync transaction
|
||||
// deleted the whole list and rebuilt it from one definition — silently
|
||||
// destroying "first" and keeping only "second".
|
||||
const editor = makeEditor({
|
||||
type: "doc",
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{ type: "text", text: "a" },
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
|
||||
{ type: "text", text: "b" },
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTES_LIST_NAME,
|
||||
content: [
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "d" },
|
||||
content: [
|
||||
{ type: "paragraph", content: [{ type: "text", text: "first" }] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "d" },
|
||||
content: [
|
||||
{ type: "paragraph", content: [{ type: "text", text: "second" }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
// The first local keystroke fires the sync plugin's appendTransaction.
|
||||
editor.commands.insertContentAt(1, " ");
|
||||
|
||||
const doc = editor.state.doc;
|
||||
// BOTH definitions survive.
|
||||
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
|
||||
const defTexts: string[] = [];
|
||||
const defIds: string[] = [];
|
||||
doc.descendants((node) => {
|
||||
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
|
||||
defIds.push(node.attrs.id);
|
||||
defTexts.push(node.textContent);
|
||||
}
|
||||
});
|
||||
// No content was lost: both "first" and "second" are still present.
|
||||
expect(defTexts.sort()).toEqual(["first", "second"]);
|
||||
// The colliding ids were made distinct.
|
||||
expect(new Set(defIds).size).toBe(2);
|
||||
// Each definition's id matches exactly one reference (1:1 pairing).
|
||||
const refIds: string[] = [];
|
||||
doc.descendants((node) => {
|
||||
if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id);
|
||||
});
|
||||
expect(refIds.sort()).toEqual(defIds.sort());
|
||||
editor.destroy();
|
||||
});
|
||||
|
||||
it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => {
|
||||
// Cross-client determinism guard. Two collaborating clients each see the
|
||||
// SAME duplicate-id document and each make a local edit. The sync plugin
|
||||
// runs identically on every client, so it MUST mint the SAME new ids on both
|
||||
// — otherwise the two clients diverge permanently over Yjs (duplicated
|
||||
// footnotes). This is exactly the blocker the previous random-id
|
||||
// (generateFootnoteId / Math.random) implementation caused: it would mint
|
||||
// DIFFERENT ids on each client and this assertion would fail.
|
||||
const duplicateDoc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{ type: "text", text: "a" },
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
|
||||
{ type: "text", text: "b" },
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
|
||||
{ type: "text", text: "c" },
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTES_LIST_NAME,
|
||||
content: [
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "d" },
|
||||
content: [
|
||||
{ type: "paragraph", content: [{ type: "text", text: "one" }] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "d" },
|
||||
content: [
|
||||
{ type: "paragraph", content: [{ type: "text", text: "two" }] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "d" },
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [{ type: "text", text: "three" }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const idsAfterLocalEdit = () => {
|
||||
// A fresh editor instance = an independent "client" running the same
|
||||
// plugin pipeline on the same starting document.
|
||||
const editor = makeEditor(structuredClone(duplicateDoc));
|
||||
editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs
|
||||
const refIds: string[] = [];
|
||||
const defIds: string[] = [];
|
||||
editor.state.doc.descendants((node) => {
|
||||
if (node.type.name === FOOTNOTE_REFERENCE_NAME)
|
||||
refIds.push(node.attrs.id);
|
||||
if (node.type.name === FOOTNOTE_DEFINITION_NAME)
|
||||
defIds.push(node.attrs.id);
|
||||
});
|
||||
editor.destroy();
|
||||
return { refIds, defIds };
|
||||
};
|
||||
|
||||
const clientA = idsAfterLocalEdit();
|
||||
const clientB = idsAfterLocalEdit();
|
||||
|
||||
// Both clients computed IDENTICAL ids (the property that makes Yjs converge).
|
||||
expect(clientA.refIds).toEqual(clientB.refIds);
|
||||
expect(clientA.defIds).toEqual(clientB.defIds);
|
||||
|
||||
// And the ids are deterministic-derived (not random uuid-style): the keeper
|
||||
// keeps "d", the duplicates become "d__2", "d__3".
|
||||
expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"]));
|
||||
// Every definition survived with a unique id, 1:1 with the references.
|
||||
expect(clientA.defIds.length).toBe(3);
|
||||
expect(new Set(clientA.defIds).size).toBe(3);
|
||||
expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort());
|
||||
});
|
||||
|
||||
it("removes an orphan definition with no matching reference", () => {
|
||||
const editor = makeEditor({
|
||||
type: "doc",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { marked } from "marked";
|
||||
import { deriveFootnoteId } from "../../footnote/footnote-util";
|
||||
|
||||
/**
|
||||
* Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline.
|
||||
@@ -52,6 +53,10 @@ function escapeAttr(value: string): string {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
}
|
||||
|
||||
function escapeRegExp(value: string): string {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract `[^id]: text` definition lines from the markdown body, returning the
|
||||
* cleaned body plus a rendered <section data-footnotes> (empty string when no
|
||||
@@ -96,6 +101,56 @@ export function extractFootnoteDefinitions(markdown: string): {
|
||||
return { body: markdown, section: "" };
|
||||
}
|
||||
|
||||
// De-duplicate colliding definition ids. Two definitions sharing an id (e.g.
|
||||
// `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote
|
||||
// downstream (the editor's last-wins sync). Rename each colliding id to a
|
||||
// DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference
|
||||
// marker so the (reference, definition) pairing stays 1:1. The FIRST
|
||||
// definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth
|
||||
// duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]`
|
||||
// marker. If there are fewer markers than definitions, the surplus definition
|
||||
// keeps a derived (orphan) id so it is never silently merged away.
|
||||
//
|
||||
// The id is derived (deriveFootnoteId), NOT random: importing the same
|
||||
// markdown through two paths (here and the MCP mirror) must yield identical
|
||||
// ids, and re-importing the same markdown twice must be stable.
|
||||
let dedupedBody = bodyLines.join("\n");
|
||||
// Every original definition id is reserved up front so a derived id can never
|
||||
// collide with an unrelated original id present in the document.
|
||||
const taken = new Set<string>(definitions.map((d) => d.id));
|
||||
const seenDefIds = new Map<string, number>(); // original id -> how many seen
|
||||
for (const def of definitions) {
|
||||
const originalId = def.id;
|
||||
const count = seenDefIds.get(originalId) ?? 0;
|
||||
seenDefIds.set(originalId, count + 1);
|
||||
if (count === 0) continue; // first definition keeps its id
|
||||
|
||||
// count is the 0-based number of PRIOR occurrences; this is occurrence
|
||||
// (count + 1), i.e. 2 for the first duplicate, 3 for the next, ...
|
||||
const newId = deriveFootnoteId(originalId, count + 1, taken);
|
||||
taken.add(newId);
|
||||
def.id = newId;
|
||||
|
||||
// Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not
|
||||
// belong to the keeper definition. After a prior duplicate rewrote its
|
||||
// marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the
|
||||
// remaining matches are: index 0 = the keeper's marker (left alone), index 1
|
||||
// = this duplicate's marker. Rewrite index 1.
|
||||
let occurrence = 0;
|
||||
let rewritten = false;
|
||||
const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g");
|
||||
dedupedBody = dedupedBody.replace(re, (match) => {
|
||||
const idx = occurrence++;
|
||||
if (!rewritten && idx === 1) {
|
||||
rewritten = true;
|
||||
return `[^${newId}]`;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
// If there was no second marker (more definitions than references), the
|
||||
// duplicate simply survives as an orphan with its fresh id — no body change.
|
||||
}
|
||||
|
||||
const defsHtml = definitions
|
||||
.map((d) => {
|
||||
// Render the definition text as inline markdown so emphasis/links inside
|
||||
@@ -109,7 +164,7 @@ export function extractFootnoteDefinitions(markdown: string): {
|
||||
.join("");
|
||||
|
||||
return {
|
||||
body: bodyLines.join("\n"),
|
||||
body: dedupedBody,
|
||||
section: `<section data-footnotes>${defsHtml}</section>`,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -271,6 +271,44 @@ const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||
function escapeFootnoteAttr(value) {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
}
|
||||
function escapeFootnoteRegExp(value) {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
/**
|
||||
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
|
||||
* an original id `X` during definition dedup.
|
||||
*
|
||||
* EXACT MIRROR of editor-ext `deriveFootnoteId`
|
||||
* (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
|
||||
* STAY IN SYNC: the same markdown imported through the editor and through this
|
||||
* MCP path has to produce identical ids, and the sync plugin (which re-ids on
|
||||
* every collaborating client) relies on the same scheme to converge. NEVER use
|
||||
* Math.random()/Date.now()/uuid here — a random id would diverge across clients.
|
||||
*
|
||||
* Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
|
||||
* with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
|
||||
* `taken` (the set of ids already present / already minted — pure doc state).
|
||||
*/
|
||||
function deriveFootnoteId(originalId, occurrence, taken) {
|
||||
let candidate = `${originalId}__${occurrence}`;
|
||||
let n = 0;
|
||||
while (taken.has(candidate)) {
|
||||
n += 1;
|
||||
candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
|
||||
function footnoteSuffix(n) {
|
||||
let out = "";
|
||||
let x = n;
|
||||
while (x > 0) {
|
||||
const rem = (x - 1) % 25;
|
||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
||||
x = Math.floor((x - 1) / 25);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
const footnoteRefMarkedExtension = {
|
||||
name: "footnoteRef",
|
||||
level: "inline",
|
||||
@@ -319,11 +357,43 @@ function extractFootnotes(markdown) {
|
||||
}
|
||||
if (defs.length === 0)
|
||||
return { body: markdown, section: "" };
|
||||
// De-duplicate colliding definition ids (mirror of editor-ext
|
||||
// extractFootnoteDefinitions). Two definitions sharing an id would otherwise
|
||||
// collapse into one footnote downstream; rename each colliding id to a
|
||||
// DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
|
||||
// marker so the (reference, definition) pairing stays 1:1. Determinism lets
|
||||
// the same markdown imported here and via the editor produce identical ids.
|
||||
let dedupedBody = bodyLines.join("\n");
|
||||
const taken = new Set(defs.map((d) => d.id));
|
||||
const seenDefIds = new Map();
|
||||
for (const def of defs) {
|
||||
const originalId = def.id;
|
||||
const count = seenDefIds.get(originalId) ?? 0;
|
||||
seenDefIds.set(originalId, count + 1);
|
||||
if (count === 0)
|
||||
continue; // first definition keeps its id
|
||||
const newId = deriveFootnoteId(originalId, count + 1, taken);
|
||||
taken.add(newId);
|
||||
def.id = newId;
|
||||
// Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
|
||||
// index 1 = this duplicate's marker. Rewrite index 1.
|
||||
let occurrence = 0;
|
||||
let rewritten = false;
|
||||
const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
|
||||
dedupedBody = dedupedBody.replace(re, (match) => {
|
||||
const idx = occurrence++;
|
||||
if (!rewritten && idx === 1) {
|
||||
rewritten = true;
|
||||
return `[^${newId}]`;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
}
|
||||
const inner = defs
|
||||
.map((d) => `<div data-footnote-def data-id="${escapeFootnoteAttr(d.id)}"><p>${marked.parseInline(d.text || "")}</p></div>`)
|
||||
.join("");
|
||||
return {
|
||||
body: bodyLines.join("\n"),
|
||||
body: dedupedBody,
|
||||
section: `<section data-footnotes>${inner}</section>`,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -306,6 +306,51 @@ function escapeFootnoteAttr(value: string): string {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
}
|
||||
|
||||
function escapeFootnoteRegExp(value: string): string {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
|
||||
* an original id `X` during definition dedup.
|
||||
*
|
||||
* EXACT MIRROR of editor-ext `deriveFootnoteId`
|
||||
* (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
|
||||
* STAY IN SYNC: the same markdown imported through the editor and through this
|
||||
* MCP path has to produce identical ids, and the sync plugin (which re-ids on
|
||||
* every collaborating client) relies on the same scheme to converge. NEVER use
|
||||
* Math.random()/Date.now()/uuid here — a random id would diverge across clients.
|
||||
*
|
||||
* Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
|
||||
* with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
|
||||
* `taken` (the set of ids already present / already minted — pure doc state).
|
||||
*/
|
||||
function deriveFootnoteId(
|
||||
originalId: string,
|
||||
occurrence: number,
|
||||
taken: Set<string>,
|
||||
): string {
|
||||
let candidate = `${originalId}__${occurrence}`;
|
||||
let n = 0;
|
||||
while (taken.has(candidate)) {
|
||||
n += 1;
|
||||
candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
|
||||
function footnoteSuffix(n: number): string {
|
||||
let out = "";
|
||||
let x = n;
|
||||
while (x > 0) {
|
||||
const rem = (x - 1) % 25;
|
||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
||||
x = Math.floor((x - 1) / 25);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
const footnoteRefMarkedExtension = {
|
||||
name: "footnoteRef",
|
||||
level: "inline" as const,
|
||||
@@ -356,6 +401,39 @@ function extractFootnotes(markdown: string): {
|
||||
else bodyLines.push(line);
|
||||
}
|
||||
if (defs.length === 0) return { body: markdown, section: "" };
|
||||
|
||||
// De-duplicate colliding definition ids (mirror of editor-ext
|
||||
// extractFootnoteDefinitions). Two definitions sharing an id would otherwise
|
||||
// collapse into one footnote downstream; rename each colliding id to a
|
||||
// DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
|
||||
// marker so the (reference, definition) pairing stays 1:1. Determinism lets
|
||||
// the same markdown imported here and via the editor produce identical ids.
|
||||
let dedupedBody = bodyLines.join("\n");
|
||||
const taken = new Set<string>(defs.map((d) => d.id));
|
||||
const seenDefIds = new Map<string, number>();
|
||||
for (const def of defs) {
|
||||
const originalId = def.id;
|
||||
const count = seenDefIds.get(originalId) ?? 0;
|
||||
seenDefIds.set(originalId, count + 1);
|
||||
if (count === 0) continue; // first definition keeps its id
|
||||
const newId = deriveFootnoteId(originalId, count + 1, taken);
|
||||
taken.add(newId);
|
||||
def.id = newId;
|
||||
// Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
|
||||
// index 1 = this duplicate's marker. Rewrite index 1.
|
||||
let occurrence = 0;
|
||||
let rewritten = false;
|
||||
const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
|
||||
dedupedBody = dedupedBody.replace(re, (match) => {
|
||||
const idx = occurrence++;
|
||||
if (!rewritten && idx === 1) {
|
||||
rewritten = true;
|
||||
return `[^${newId}]`;
|
||||
}
|
||||
return match;
|
||||
});
|
||||
}
|
||||
|
||||
const inner = defs
|
||||
.map(
|
||||
(d) =>
|
||||
@@ -365,7 +443,7 @@ function extractFootnotes(markdown: string): {
|
||||
)
|
||||
.join("");
|
||||
return {
|
||||
body: bodyLines.join("\n"),
|
||||
body: dedupedBody,
|
||||
section: `<section data-footnotes>${inner}</section>`,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -90,6 +90,39 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
|
||||
assert.match(md2, /\[\^fn2\]: Second note\./);
|
||||
});
|
||||
|
||||
test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => {
|
||||
// The MCP import must derive duplicate ids deterministically (NOT random) so
|
||||
// the same markdown imported here and via the editor produces identical ids,
|
||||
// and re-importing is stable. This is the test that would FAIL on the old
|
||||
// Math.random()/Date.now() implementation.
|
||||
const md = [
|
||||
"See[^d] one[^d] two[^d].",
|
||||
"",
|
||||
"[^d]: first",
|
||||
"[^d]: second",
|
||||
"[^d]: third",
|
||||
].join("\n");
|
||||
|
||||
const idsOf = async () => {
|
||||
const json = await markdownToProseMirror(md);
|
||||
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
|
||||
const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
|
||||
return { refs, defs };
|
||||
};
|
||||
|
||||
const a = await idsOf();
|
||||
const b = await idsOf();
|
||||
|
||||
// Identical across runs.
|
||||
assert.deepEqual(a.refs, b.refs);
|
||||
assert.deepEqual(a.defs, b.defs);
|
||||
// Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
|
||||
assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]);
|
||||
// 1:1 reference <-> definition pairing, all distinct.
|
||||
assert.equal(new Set(a.defs).size, 3);
|
||||
assert.deepEqual([...a.refs].sort(), [...a.defs].sort());
|
||||
});
|
||||
|
||||
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
|
||||
// Markdown that DOCUMENTS footnote syntax inside a code fence. The example
|
||||
// definition line must be preserved verbatim inside the code block and not
|
||||
|
||||
Reference in New Issue
Block a user