diff --git a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts index 844134f6..6c87f2d6 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts @@ -55,10 +55,11 @@ describe("footnote markdown round-trip", () => { expect(html).not.toContain("data-footnote-def"); }); - it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => { - // Two definitions share id `d`, and the body has two `[^d]` markers. The - // output must keep BOTH definitions with DISTINCT ids and rewrite the second - // marker so the (reference, definition) pairing stays 1:1. + it("extractFootnoteDefinitions keeps the FIRST duplicate definition and reuses markers", () => { + // Two definitions share id `d`, and the body has two `[^d]` markers. Under + // the import model (#166) duplicate definition ids are FIRST-WINS: only the + // first definition is kept; markers are NEVER rewritten, so the two `[^d]` + // references reuse the single footnote. const md = [ "See here[^d] and there[^d].", "", @@ -68,30 +69,23 @@ describe("footnote markdown round-trip", () => { const { body, section } = extractFootnoteDefinitions(md); - // Pull out the def ids from the section in order. const defIds = Array.from( section.matchAll(/data-footnote-def data-id="([^"]+)"/g), ).map((m) => m[1]); - expect(defIds.length).toBe(2); - expect(new Set(defIds).size).toBe(2); // distinct - expect(defIds[0]).toBe("d"); // first definition keeps the id - - // Both definition texts survive. + expect(defIds).toEqual(["d"]); // first-wins: one definition expect(section).toContain("first"); - expect(section).toContain("second"); + expect(section).not.toContain("second"); // duplicate dropped - // The body still has two markers, now pointing at the two distinct ids. + // Both markers stay `[^d]` (reuse) — no `d__2` minting. const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map( (m) => m[1], ); - expect(refIds.length).toBe(2); - expect(refIds.sort()).toEqual(defIds.sort()); + expect(refIds).toEqual(["d", "d"]); }); - it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => { - // The derived id must be a pure function of the input markdown so importing - // the same source twice (or via the editor and the MCP mirror) yields - // identical ids — never random/time-based. + it("extractFootnoteDefinitions is DETERMINISTIC and stable (same input -> same output)", () => { + // The output must be a pure function of the input markdown so importing the + // same source twice (or via the editor and the MCP mirror) is identical. const md = [ "See[^d] one[^d] two[^d].", "", @@ -113,15 +107,13 @@ describe("footnote markdown round-trip", () => { const a = run(); const b = run(); - // Identical across runs (this is what would FAIL on the random-id version). - expect(a.defIds).toEqual(b.defIds); - expect(a.refIds).toEqual(b.refIds); - // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3". - expect(a.defIds).toEqual(["d", "d__2", "d__3"]); - expect(a.refIds.sort()).toEqual(a.defIds.sort()); + expect(a).toEqual(b); + // First-wins: one kept definition `d`; all three reuse markers stay `d`. + expect(a.defIds).toEqual(["d"]); + expect(a.refIds).toEqual(["d", "d", "d"]); }); - it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => { + it("markdownToHtml with a reused id renders ONE shared footnote def", async () => { const md = [ "See here[^d] and there[^d].", "", @@ -132,9 +124,8 @@ describe("footnote markdown round-trip", () => { const defIds = Array.from( html.matchAll(/data-footnote-def data-id="([^"]+)"/g), ).map((m) => m[1]); - expect(defIds.length).toBe(2); - expect(new Set(defIds).size).toBe(2); + expect(defIds).toEqual(["d"]); // one shared definition expect(html).toContain("first"); - expect(html).toContain("second"); + expect(html).not.toContain("second"); }); }); diff --git a/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts new file mode 100644 index 00000000..5790faf8 --- /dev/null +++ b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts @@ -0,0 +1,162 @@ +import { describe, it, expect } from "vitest"; +import { Editor } from "@tiptap/core"; +import { Document } from "@tiptap/extension-document"; +import { Paragraph } from "@tiptap/extension-paragraph"; +import { Text } from "@tiptap/extension-text"; +import { Node as PMNode, Fragment, Slice } from "@tiptap/pm/model"; +import { FootnoteReference } from "./footnote-reference"; +import { FootnotesList } from "./footnotes-list"; +import { FootnoteDefinition } from "./footnote-definition"; +import { footnotePastePlugin } from "./footnote-sync"; +import { + FOOTNOTE_REFERENCE_NAME, + FOOTNOTE_DEFINITION_NAME, + FOOTNOTES_LIST_NAME, +} from "./footnote-util"; + +// transformPasted reuse semantics (#166): a pasted reference to an id that +// already exists must KEEP the id (reuse → resolves to the existing footnote); +// only a pasted DEFINITION that collides is re-id'd (it would otherwise clobber +// the existing definition's text), and its paired references follow it. + +const extensions = [ + Document, + Paragraph, + Text, + FootnoteReference, + FootnotesList, + FootnoteDefinition, +]; + +/** An editor whose doc already contains footnote "a" (ref + definition). */ +function makeEditorWithFootnoteA() { + return new Editor({ + extensions, + content: { + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { type: "text", text: "x" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } }, + ], + }, + { + type: FOOTNOTES_LIST_NAME, + content: [ + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "a" }, + content: [ + { type: "paragraph", content: [{ type: "text", text: "note A" }] }, + ], + }, + ], + }, + ], + }, + }); +} + +/** Run footnotePastePlugin's transformPasted against the editor's current doc. */ +function paste(editor: Editor, slice: Slice): Slice { + const plugin = footnotePastePlugin(); + return plugin.props!.transformPasted!(slice, editor.view); +} + +/** Collect the ids of footnote refs/defs in a slice, in order (single DFS). */ +function sliceFootnoteIds(slice: Slice): Array<{ kind: string; id: string }> { + const out: Array<{ kind: string; id: string }> = []; + const walk = (frag: Fragment) => { + frag.forEach((node: PMNode) => { + if (node.type.name === FOOTNOTE_REFERENCE_NAME) + out.push({ kind: "ref", id: node.attrs.id }); + if (node.type.name === FOOTNOTE_DEFINITION_NAME) + out.push({ kind: "def", id: node.attrs.id }); + walk(node.content); + }); + }; + walk(slice.content); + return out; +} + +describe("footnotePastePlugin — reuse-aware id remap", () => { + it("keeps a pasted lone reference to an existing id (reuse, no remap)", () => { + const editor = makeEditorWithFootnoteA(); + const { schema } = editor; + // Paste: a paragraph containing only a reference to the existing id "a". + const slice = new Slice( + Fragment.from( + schema.nodes.paragraph.create(null, [ + schema.text("see "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + ), + 0, + 0, + ); + const out = paste(editor, slice); + // The reference keeps id "a" so it reuses the existing footnote. + expect(sliceFootnoteIds(out)).toEqual([{ kind: "ref", id: "a" }]); + editor.destroy(); + }); + + it("re-ids a pasted DEFINITION (and its paired reference) that collides", () => { + const editor = makeEditorWithFootnoteA(); + const { schema } = editor; + // Paste: a reference AND a definition both carrying the existing id "a". The + // definition would clobber the existing one, so both are remapped together. + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.text("dup "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("pasted note")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = paste(editor, slice); + const ids = sliceFootnoteIds(out); + // Both the pasted ref and def were remapped to the SAME fresh id (paired), + // and it is the deterministic derived id (not "a"). + const remappedIds = new Set(ids.map((x) => x.id)); + expect(remappedIds.size).toBe(1); + expect(remappedIds.has("a")).toBe(false); + expect([...remappedIds][0]).toBe("a__2"); + editor.destroy(); + }); + + it("leaves the slice untouched when no pasted definition collides", () => { + const editor = makeEditorWithFootnoteA(); + const { schema } = editor; + // A pasted reference+definition for a BRAND-NEW id "b" — no collision. + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.text("new "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [ + schema.nodes.paragraph.create(null, [schema.text("note B")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = paste(editor, slice); + expect(sliceFootnoteIds(out)).toEqual([ + { kind: "ref", id: "b" }, + { kind: "def", id: "b" }, + ]); + editor.destroy(); + }); +}); diff --git a/packages/editor-ext/src/lib/footnote/footnote-sync.ts b/packages/editor-ext/src/lib/footnote/footnote-sync.ts index 505a60d0..e861ed0e 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-sync.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-sync.ts @@ -73,51 +73,58 @@ function scan(doc: ProseMirrorNode): FootnoteScan { * * The overriding invariant is that NO definition is ever dropped here: every * definition occurrence ends up with a unique id and therefore survives the - * canonical rebuild. Duplicate references are likewise re-id'd (and paired with - * a duplicate definition when one exists) so importing/pasting `[^d]` twice with - * two `[^d]:` definitions yields TWO distinct footnotes rather than one. + * canonical rebuild. Repeated references that share an id are REUSE (one + * footnote) and are left untouched; only duplicate DEFINITIONS are re-id'd, so a + * pasted/merged second `[^d]:` survives as its own (then orphaned) footnote. */ interface CollisionPlan { /** - * Reference ids in document order, de-duplicated AFTER re-id. This is the - * source of truth for definition order/numbering, exactly as before — only - * now collisions have been resolved so it no longer hides duplicates. + * Distinct reference ids in document order (first appearance). Repeated ids + * are reuse and collapse to a single entry. Source of truth for definition + * order/numbering. */ referenceIds: string[]; - /** id -> definition node, after duplicates were re-id'd. One entry per id. */ + /** id -> definition node, after duplicate definitions were re-id'd. One per id. */ definitions: Map; /** - * Body reference re-id edits to apply (position of a reference node -> the - * fresh id it must carry). Empty when there are no colliding references. + * Body reference re-id edits. ALWAYS EMPTY under reuse semantics (references + * are never re-id'd); retained so the downstream consumer stays a harmless + * no-op rather than needing removal. */ refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>; - /** True when any collision required a re-id (refs and/or defs). */ + /** True when a duplicate definition required a re-id. */ changed: boolean; } /** - * Resolve duplicate-id collisions among references and definitions WITHOUT ever - * dropping a definition. + * Resolve the footnote id topology WITHOUT ever dropping a definition. * - * Strategy: - * - Walk references in document order. The FIRST reference for an id keeps it. - * Any later reference sharing that id is a duplicate and gets a fresh unique - * id; if a still-unclaimed duplicate definition with the original id exists, - * it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched. - * - Walk definitions in document order. The FIRST definition for an id keeps - * it; later duplicates that were not already claimed by a duplicate reference - * get their own fresh unique id (surviving as a distinct footnote/orphan). + * Reference REUSE (Pandoc semantics, #166): repeated `[^a]` references that share + * an id are the SAME footnote — they get one number and one definition and are + * NEVER re-id'd. So the reference walk only records the FIRST occurrence of each + * id (de-duplicating in document order); later occurrences are reuse and produce + * no mutation at all. * - * Re-id determinism: every fresh id is DERIVED from document state via - * deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids - * already present) — NEVER random/time-based. Because the sync plugin runs - * identically on every collaborating client, a deterministic re-id is the only - * way they can converge on the SAME ids; a random id (the previous - * implementation) made two clients editing the same duplicate-id document mint - * DIFFERENT ids for the same duplicate, causing permanent Yjs divergence. + * Duplicate DEFINITIONS (two `[^d]:` nodes sharing an id reaching the LIVE editor + * via paste/collab merge) keep the never-lose policy: the first keeps the id, and + * each later duplicate is re-id'd to a DETERMINISTIC fresh id (deriveFootnoteId: + * `X__2`, `X__3`, collision-bumped) so it survives as a distinct footnote — which, + * having no matching reference, then falls under the normal orphan policy. It is + * only ever dropped for lacking a reference, never for colliding. The IMPORT + * paths (footnote.marked.ts / MCP extractFootnotes) instead apply first-wins + + * drop + warn for duplicate definitions; that divergence is intentional — import + * is an agent-authored artifact we sanitize, the editor is live user data we must + * not lose. + * + * Re-id determinism: every fresh id is DERIVED from document state, NEVER + * random/time-based, because the sync plugin runs identically on every + * collaborating client and a random id would make two clients mint DIFFERENT ids + * for the same duplicate, causing permanent Yjs divergence. */ function resolveCollisions(scan: FootnoteScan): CollisionPlan { const definitions = new Map(); + // References are never re-id'd under reuse semantics, so this stays empty; it + // is retained so the CollisionPlan shape (and its no-op consumer) is unchanged. const refReids: Array<{ pos: number; node: ProseMirrorNode; @@ -127,17 +134,14 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { const seenRefIds = new Set(); let changed = false; - // `taken` is the set of every id that must be avoided when minting a derived - // id: all original reference + definition ids in the document PLUS every id we - // mint during this pass. It is pure document state, so the derivation stays - // deterministic across clients. Per-original occurrence counters make the k-th - // duplicate of `X` deterministically become `X__2`, `X__3`, ... + // `taken` is the set of every id to avoid when minting a derived id for a + // duplicate definition: all original reference + definition ids PLUS every id + // minted in this pass. Pure document state, so the derivation is deterministic + // across clients. const taken = new Set(); for (const occ of scan.refOccurrences) taken.add(occ.id); for (const occ of scan.defOccurrences) taken.add(occ.id); const occurrenceOf = new Map(); - // Mint a deterministic unique id for a duplicate of `originalId`. The first - // duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ... const mintId = (originalId: string): string => { const next = (occurrenceOf.get(originalId) ?? 1) + 1; occurrenceOf.set(originalId, next); @@ -146,63 +150,23 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { return id; }; - // Bucket definition occurrences by their original id so a duplicate reference - // can claim a matching (as-yet-unclaimed) duplicate definition and re-id the - // pair together. defByOriginalId[id] is consumed front-to-back. - const defByOriginalId = new Map(); - for (const occ of scan.defOccurrences) { - const arr = defByOriginalId.get(occ.id); - if (arr) arr.push(occ); - else defByOriginalId.set(occ.id, [occ]); - } - // The FIRST definition for each id is the canonical keeper of that id. - const claimed = new Set(); - + // References: record each DISTINCT id once, in first-appearance order. Repeated + // ids are reuse — nothing to mint, nothing to re-id. for (const ref of scan.refOccurrences) { if (!seenRefIds.has(ref.id)) { - // First reference with this id keeps it. seenRefIds.add(ref.id); referenceIds.push(ref.id); - continue; - } - // Duplicate reference: assign a deterministic derived id. Pair it with the - // next unclaimed duplicate definition (NOT the first keeper) carrying the - // same original id, if one exists, so the (ref, def) pairing is preserved - // 1:1. - const newId = mintId(ref.id); - refReids.push({ pos: ref.pos, node: ref.node, newId }); - seenRefIds.add(newId); - referenceIds.push(newId); - changed = true; - - const candidates = defByOriginalId.get(ref.id) ?? []; - // Skip the first occurrence (it keeps the original id); pick the first - // duplicate not already claimed. - for (let i = 1; i < candidates.length; i++) { - const cand = candidates[i]; - if (!claimed.has(cand)) { - claimed.add(cand); - definitions.set(newId, cand.node); - break; - } } } - // Now place every definition under a unique id. The first occurrence of each - // original id keeps it; remaining duplicates either were paired with a - // duplicate reference above (already placed) or get a fresh standalone id. + // Definitions: the first occurrence of each id keeps it; a later duplicate is + // re-id'd deterministically so it is never silently dropped (never-lose). const seenDefIds = new Set(); for (const occ of scan.defOccurrences) { - if (claimed.has(occ)) continue; // already placed against a duplicate ref id if (!seenDefIds.has(occ.id)) { seenDefIds.add(occ.id); definitions.set(occ.id, occ.node); } else { - // Duplicate definition with no duplicate reference to pair with: keep it - // with a deterministic derived id so it is NEVER silently dropped. (It - // becomes an orphan and is then subject to the normal orphan policy — but - // only ever because it has no matching reference, never because it - // collided.) const newId = mintId(occ.id); definitions.set(newId, occ.node); changed = true; @@ -546,13 +510,17 @@ export const footnotePastePluginKey = new PluginKey("footnotePaste"); * Without this, pasting a reference+definition pair copied from elsewhere — or * duplicating one in place — would merge with (or clobber) the existing footnote * of the same id. The schema-sync plugin already guarantees no definition is - * ever silently deleted after the fact (it re-id's collisions), but regenerating - * at paste time keeps the pasted footnote cleanly separate from the start and - * avoids any transient merge. + * ever silently deleted after the fact (it re-id's duplicate definitions), but + * regenerating at paste time keeps the pasted footnote cleanly separate from the + * start and avoids any transient merge. * - * Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is - * not present elsewhere is left untouched (so it still resolves to its existing - * definition). + * REUSE-aware (#166): only a colliding DEFINITION forces a remap. Pasting a lone + * reference whose id already exists is REUSE — it must keep the id so it resolves + * to the existing footnote (one number, shared definition). So we remap an id + * only when the pasted slice itself carries a `footnoteDefinition` for it (which + * would otherwise clobber the existing definition's text); the matching pasted + * references are remapped along with it to stay paired. A self-paste of just a + * reference is left untouched. */ export function footnotePastePlugin(): Plugin { return new Plugin({ @@ -572,31 +540,35 @@ export function footnotePastePlugin(): Plugin { }); if (existing.size === 0) return slice; - // Build a remap (old id -> fresh id) for every COLLIDING id found in the - // pasted slice, shared by references and definitions so a pasted pair - // stays matched. A paste is a distinct local user action (not a - // shared-state convergence point), so determinism is not strictly - // required here — but we derive the new id deterministically anyway - // (deriveFootnoteId against the current doc's id set) for consistency - // with the sync/import paths and to keep Math.random off this code path. - const remap = new Map(); - const collectColliding = (node: ProseMirrorNode) => { - if ( - node.type.name === FOOTNOTE_REFERENCE_NAME || - node.type.name === FOOTNOTE_DEFINITION_NAME - ) { + // Ids the pasted slice DEFINES (carries a footnoteDefinition for). Only + // these can clobber an existing footnote's text, so only these force a + // remap; a pasted reference to an already-existing id is reuse and keeps + // its id. + const sliceDefIds = new Set(); + const collectDefIds = (node: ProseMirrorNode) => { + if (node.type.name === FOOTNOTE_DEFINITION_NAME) { const id = node.attrs.id; - if (id && existing.has(id) && !remap.has(id)) { - const newId = deriveFootnoteId(id, 2, existing); - remap.set(id, newId); - // Reserve it so a second colliding id deriving to the same base - // bumps instead of clashing. - existing.add(newId); - } + if (id) sliceDefIds.add(id); } - node.descendants(collectColliding); + node.descendants(collectDefIds); }; - slice.content.descendants(collectColliding); + slice.content.descendants(collectDefIds); + + // Build a remap (old id -> fresh id) for every colliding id the slice + // DEFINES, shared by references and definitions so a pasted pair stays + // matched. The new id is derived deterministically (deriveFootnoteId + // against the current doc's id set) for consistency with the sync/import + // paths and to keep Math.random off this code path. + const remap = new Map(); + for (const id of sliceDefIds) { + if (existing.has(id) && !remap.has(id)) { + const newId = deriveFootnoteId(id, 2, existing); + remap.set(id, newId); + // Reserve it so a second colliding id deriving to the same base + // bumps instead of clashing. + existing.add(newId); + } + } if (remap.size === 0) return slice; // Rewrite the colliding ids throughout the slice. diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.ts b/packages/editor-ext/src/lib/footnote/footnote-util.ts index 7896595d..56813288 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-util.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-util.ts @@ -62,10 +62,11 @@ export function generateFootnoteId(): string { * `taken` is consulted but NOT mutated here; the caller adds the returned id to * its own seen-set before requesting the next derived id. * - * NOTE: this implementation is intentionally duplicated in - * packages/mcp/src/lib/collaboration.ts (deriveFootnoteId) - * and MUST stay in sync with it so markdown imported through either path yields - * identical ids. + * Used only inside editor-ext now (resolveCollisions for a re-id'd duplicate + * DEFINITION, and footnotePastePlugin). The MCP/marked import paths no longer + * derive ids — duplicate definitions there are first-wins-dropped (#166) — so + * there is no cross-package copy to keep in sync. The golden table in + * footnote-util.derive-id.test.ts pins the scheme. */ export function deriveFootnoteId( originalId: string, diff --git a/packages/editor-ext/src/lib/footnote/footnote.test.ts b/packages/editor-ext/src/lib/footnote/footnote.test.ts index 9ecf9a55..ff4e1625 100644 --- a/packages/editor-ext/src/lib/footnote/footnote.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote.test.ts @@ -307,13 +307,12 @@ describe("footnote sync plugin (orphans)", () => { editor.destroy(); }); - it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => { - // Reproduces the verified data-loss bug: two footnoteDefinition nodes share - // id "d", and there are two references with id "d". The OLD code built the - // definitions Map last-wins and emitted exactly one definition for the - // de-duplicated reference, so the very first keystroke's sync transaction - // deleted the whole list and rebuilt it from one definition — silently - // destroying "first" and keeping only "second". + it("repeated references REUSE one footnote; a duplicate definition is dropped (first-wins)", () => { + // Reuse semantics (#166): two references with id "d" are the SAME footnote + // (one number, shared definition) — they are NEVER re-id'd. Two definitions + // sharing id "d" are first-wins: the first keeps "d", the second is re-id'd + // to a deterministic orphan id and then dropped by the orphan policy (it has + // no matching reference). So the result is ONE reused footnote on "first". const editor = makeEditor({ type: "doc", content: [ @@ -351,8 +350,8 @@ describe("footnote sync plugin (orphans)", () => { editor.commands.insertContentAt(1, " "); const doc = editor.state.doc; - // BOTH definitions survive. - expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2); + // One shared definition survives (first-wins); the duplicate is dropped. + expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1); const defTexts: string[] = []; const defIds: string[] = []; doc.descendants((node) => { @@ -361,27 +360,23 @@ describe("footnote sync plugin (orphans)", () => { defTexts.push(node.textContent); } }); - // No content was lost: both "first" and "second" are still present. - expect(defTexts.sort()).toEqual(["first", "second"]); - // The colliding ids were made distinct. - expect(new Set(defIds).size).toBe(2); - // Each definition's id matches exactly one reference (1:1 pairing). + expect(defTexts).toEqual(["first"]); + expect(defIds).toEqual(["d"]); + // Both references keep id "d" (reuse — not re-id'd). const refIds: string[] = []; doc.descendants((node) => { if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id); }); - expect(refIds.sort()).toEqual(defIds.sort()); + expect(refIds).toEqual(["d", "d"]); editor.destroy(); }); - it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => { + it("reuse outcome is DETERMINISTIC across clients (Yjs convergence)", () => { // Cross-client determinism guard. Two collaborating clients each see the - // SAME duplicate-id document and each make a local edit. The sync plugin - // runs identically on every client, so it MUST mint the SAME new ids on both - // — otherwise the two clients diverge permanently over Yjs (duplicated - // footnotes). This is exactly the blocker the previous random-id - // (generateFootnoteId / Math.random) implementation caused: it would mint - // DIFFERENT ids on each client and this assertion would fail. + // SAME document and make a local edit; the sync plugin runs identically, so + // the resolved state MUST be identical (else they diverge over Yjs). Under + // reuse the three "d" references collapse to one footnote and the duplicate + // definitions are dropped (first-wins) — deterministically on every client. const duplicateDoc = { type: "doc", content: [ @@ -435,30 +430,28 @@ describe("footnote sync plugin (orphans)", () => { editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs const refIds: string[] = []; const defIds: string[] = []; + const defTexts: string[] = []; editor.state.doc.descendants((node) => { if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id); - if (node.type.name === FOOTNOTE_DEFINITION_NAME) + if (node.type.name === FOOTNOTE_DEFINITION_NAME) { defIds.push(node.attrs.id); + defTexts.push(node.textContent); + } }); editor.destroy(); - return { refIds, defIds }; + return { refIds, defIds, defTexts }; }; const clientA = idsAfterLocalEdit(); const clientB = idsAfterLocalEdit(); - // Both clients computed IDENTICAL ids (the property that makes Yjs converge). - expect(clientA.refIds).toEqual(clientB.refIds); - expect(clientA.defIds).toEqual(clientB.defIds); - - // And the ids are deterministic-derived (not random uuid-style): the keeper - // keeps "d", the duplicates become "d__2", "d__3". - expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"])); - // Every definition survived with a unique id, 1:1 with the references. - expect(clientA.defIds.length).toBe(3); - expect(new Set(clientA.defIds).size).toBe(3); - expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort()); + // Both clients resolved to IDENTICAL state (the Yjs-convergence property). + expect(clientA).toEqual(clientB); + // Reuse: the three references stay "d"; one definition survives (first-wins). + expect(clientA.refIds).toEqual(["d", "d", "d"]); + expect(clientA.defIds).toEqual(["d"]); + expect(clientA.defTexts).toEqual(["one"]); }); it("removes an orphan definition with no matching reference", () => { diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts index be955793..5834c1d5 100644 --- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts +++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts @@ -13,36 +13,33 @@ function bodyMarkers(body: string): string[] { return [...body.matchAll(/\[\^([^\]\s]+)\]/g)].map((m) => m[1]); } -describe("extractFootnoteDefinitions: more definitions than markers (orphans)", () => { - // Body has ONE `[^d]` reference marker but THREE `[^d]:` definitions. The - // surplus definitions have no marker to pair with — they must NOT be silently - // merged into one footnote (the editor's last-wins sync would otherwise drop - // two of them). The dedup gives each colliding definition a deterministic - // derived id so all three survive as distinct footnoteDefinition nodes. +describe("extractFootnoteDefinitions: duplicate definition ids (first-wins)", () => { + // Body has ONE `[^d]` reference but THREE `[^d]:` definitions. Under the + // import model (#166) a duplicate definition id is FIRST-WINS: only the first + // definition is kept; the rest are DROPPED (and surfaced by analyzeFootnotes, + // not silently re-id'd into orphan footnotes as before). Reference markers are + // never rewritten, so repeated references would reuse the single footnote. const md = ["See[^d].", "", "[^d]: a", "[^d]: b", "[^d]: c"].join("\n"); - it("emits 3 DISTINCT definition ids: d, d__2, d__3 (derived scheme, in order)", () => { + it("keeps only the FIRST definition for the id (first-wins)", () => { const { section } = extractFootnoteDefinitions(md); const ids = defIds(section); - expect(ids).toEqual(["d", "d__2", "d__3"]); - // All distinct: nothing was merged away. - expect(new Set(ids).size).toBe(3); + expect(ids).toEqual(["d"]); }); - it("preserves each definition's text against its (possibly derived) id", () => { + it("keeps the first definition's text and drops the duplicates", () => { const { section } = extractFootnoteDefinitions(md); - // First definition keeps the original id and its text. expect(section).toContain('data-footnote-def data-id="d">

a

'); - // The two surplus definitions survive as orphans with derived ids. - expect(section).toContain('data-footnote-def data-id="d__2">

b

'); - expect(section).toContain('data-footnote-def data-id="d__3">

c

'); + // No derived `d__2` / `d__3` ids are emitted anymore. + expect(section).not.toContain("d__2"); + expect(section).not.toContain("d__3"); + // The dropped duplicate texts are not in the section. + expect(section).not.toContain("

b

"); + expect(section).not.toContain("

c

"); }); - it("leaves the SINGLE body marker as [^d] (no surplus marker to rewrite)", () => { + it("leaves the SINGLE body marker as [^d] (markers are never rewritten)", () => { const { body } = extractFootnoteDefinitions(md); - // There is exactly one reference marker and it is untouched: the keeper - // definition pairs with it. The orphan defs have no marker, so the body is - // unchanged except for the stripped definition lines. expect(bodyMarkers(body)).toEqual(["d"]); expect(body).toContain("See[^d]."); // The definition lines themselves were pulled OUT of the body. @@ -55,9 +52,21 @@ describe("extractFootnoteDefinitions: more definitions than markers (orphans)", const { section } = extractFootnoteDefinitions(md); expect(section.startsWith("
")).toBe(true); expect(section.endsWith("
")).toBe(true); - // Exactly three definition divs. - expect( - [...section.matchAll(/
{ + // Pandoc semantics: many `[^a]` references + one `[^a]:` definition = one + // footnote, shared. Markers are left intact so the editor numbers them as one. + const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared note"].join("\n"); + + it("emits exactly one definition and leaves every reference marker as [^a]", () => { + const { section, body } = extractFootnoteDefinitions(md); + expect(defIds(section)).toEqual(["a"]); + expect(section).toContain('data-footnote-def data-id="a">

shared note

'); + // All three reference markers stay `a` (no `a__2`/`a__3` minting). + expect(bodyMarkers(body)).toEqual(["a", "a", "a"]); }); }); diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts index b47cf4a4..58dd27d7 100644 --- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts +++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts @@ -1,5 +1,4 @@ import { marked } from "marked"; -import { deriveFootnoteId } from "../../footnote/footnote-util"; /** * Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline. @@ -53,10 +52,6 @@ function escapeAttr(value: string): string { return String(value).replace(/&/g, "&").replace(/"/g, """); } -function escapeRegExp(value: string): string { - return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} - /** * Extract `[^id]: text` definition lines from the markdown body, returning the * cleaned body plus a rendered
(empty string when no @@ -101,70 +96,32 @@ export function extractFootnoteDefinitions(markdown: string): { return { body: markdown, section: "" }; } - // De-duplicate colliding definition ids. Two definitions sharing an id (e.g. - // `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote - // downstream (the editor's last-wins sync). Rename each colliding id to a - // DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference - // marker so the (reference, definition) pairing stays 1:1. The FIRST - // definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth - // duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]` - // marker. If there are fewer markers than definitions, the surplus definition - // keeps a derived (orphan) id so it is never silently merged away. - // - // The id is derived (deriveFootnoteId), NOT random: importing the same - // markdown through two paths (here and the MCP mirror) must yield identical - // ids, and re-importing the same markdown twice must be stable. - let dedupedBody = bodyLines.join("\n"); - // Every original definition id is reserved up front so a derived id can never - // collide with an unrelated original id present in the document. - const taken = new Set(definitions.map((d) => d.id)); - const seenDefIds = new Map(); // original id -> how many seen + // Duplicate definition ids (e.g. `[^d]: first` / `[^d]: second`): FIRST WINS, + // the rest are DROPPED. Reference markers are left UNTOUCHED so repeated `[^a]` + // references reuse the single footnote (Pandoc semantics, #166). This differs + // from the live editor's never-lose policy (resolveCollisions re-ids a + // duplicate definition into an orphan) on purpose: an import is an + // agent-authored artifact we sanitize, and the dropped duplicate is surfaced + // to the caller via analyzeFootnotes' `duplicateDefinitions` warning instead. + const firstById = new Map(); // id -> first definition text for (const def of definitions) { - const originalId = def.id; - const count = seenDefIds.get(originalId) ?? 0; - seenDefIds.set(originalId, count + 1); - if (count === 0) continue; // first definition keeps its id - - // count is the 0-based number of PRIOR occurrences; this is occurrence - // (count + 1), i.e. 2 for the first duplicate, 3 for the next, ... - const newId = deriveFootnoteId(originalId, count + 1, taken); - taken.add(newId); - def.id = newId; - - // Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not - // belong to the keeper definition. After a prior duplicate rewrote its - // marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the - // remaining matches are: index 0 = the keeper's marker (left alone), index 1 - // = this duplicate's marker. Rewrite index 1. - let occurrence = 0; - let rewritten = false; - const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g"); - dedupedBody = dedupedBody.replace(re, (match) => { - const idx = occurrence++; - if (!rewritten && idx === 1) { - rewritten = true; - return `[^${newId}]`; - } - return match; - }); - // If there was no second marker (more definitions than references), the - // duplicate simply survives as an orphan with its fresh id — no body change. + if (!firstById.has(def.id)) firstById.set(def.id, def.text); } - const defsHtml = definitions - .map((d) => { + const defsHtml = [...firstById.entries()] + .map(([id, text]) => { // Render the definition text as inline markdown so emphasis/links inside // a footnote survive the round-trip; wrap in a paragraph (the node's // content is paragraph+). - const inner = marked.parseInline(d.text || ""); + const inner = marked.parseInline(text || ""); return `

${inner}

`; }) .join(""); return { - body: dedupedBody, + body: bodyLines.join("\n"), section: `
${defsHtml}
`, }; } diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index a825dd03..28e5438e 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -9,6 +9,7 @@ import WebSocket from "ws"; import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js"; import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; +import { analyzeFootnotes } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js"; import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; @@ -566,7 +567,9 @@ export class DocmostClient { // Always fetch subpages to provide context to the agent let subpages = []; try { - subpages = await this.listSidebarPages(resultData.spaceId, pageId); + // `pageId` may be a slugId, but the sidebar-pages endpoint requires the + // UUID; `resultData.id` holds the resolved UUID returned by getPageRaw. + subpages = await this.listSidebarPages(resultData.spaceId, resultData.id); } catch (e) { console.warn("Failed to fetch subpages:", e); @@ -814,7 +817,11 @@ export class DocmostClient { if (title) { await this.client.post("/pages/update", { pageId: newPageId, title }); } - return this.getPage(newPageId); + const page = await this.getPage(newPageId); + // Surface non-fatal footnote problems (dangling refs, empty/duplicate + // definitions, markers in tables) so the agent can fix its markup (#166). + const { warnings } = analyzeFootnotes(content); + return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page; } /** * Update a page's content from markdown and optionally its title. @@ -844,12 +851,15 @@ export class DocmostClient { } throw new Error(`Failed to update page content: ${error.message}`); } + const { warnings } = analyzeFootnotes(content); return { success: true, modified: true, message: "Page updated successfully.", pageId: pageId, verify: mutation.verify, + // Non-fatal footnote diagnostics (#166); omitted when there are none. + ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}), }; } /** @@ -1119,6 +1129,11 @@ export class DocmostClient { if (meta?.pageId && meta.pageId !== pageId) { result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; } + // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions + // and references live there, not in the front-matter/comments sections). + const { warnings } = analyzeFootnotes(body); + if (warnings.length > 0) + result.footnoteWarnings = warnings; return result; } /** @@ -2422,9 +2437,9 @@ export class DocmostClient { const raw = await this.getPageRaw(pageId); const current = raw.content || { type: "doc", content: [] }; runTransform(current); - // Exercise the same Yjs encoder the apply path uses, so the preview - // fails with the SAME descriptive error when the doc is not encodable - // instead of returning a misleadingly-green diff. + // Run an independent Yjs-encodability check (same sanitize + schema as the + // apply path), so the preview fails with the same descriptive error when + // the doc is not encodable instead of returning a misleadingly-green diff. assertYjsEncodable(newDoc); return { pushed: false, diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index fc72bbf3..dc4ef79c 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -285,44 +285,6 @@ const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value) { return String(value).replace(/&/g, "&").replace(/"/g, """); } -function escapeFootnoteRegExp(value) { - return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} -/** - * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of - * an original id `X` during definition dedup. - * - * EXACT MIRROR of editor-ext `deriveFootnoteId` - * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST - * STAY IN SYNC: the same markdown imported through the editor and through this - * MCP path has to produce identical ids, and the sync plugin (which re-ids on - * every collaborating client) relies on the same scheme to converge. NEVER use - * Math.random()/Date.now()/uuid here — a random id would diverge across clients. - * - * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped - * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in - * `taken` (the set of ids already present / already minted — pure doc state). - */ -function deriveFootnoteId(originalId, occurrence, taken) { - let candidate = `${originalId}__${occurrence}`; - let n = 0; - while (taken.has(candidate)) { - n += 1; - candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`; - } - return candidate; -} -/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */ -function footnoteSuffix(n) { - let out = ""; - let x = n; - while (x > 0) { - const rem = (x - 1) % 25; - out = String.fromCharCode(98 + rem) + out; // 98 = 'b' - x = Math.floor((x - 1) / 25); - } - return out; -} const footnoteRefMarkedExtension = { name: "footnoteRef", level: "inline", @@ -371,43 +333,22 @@ function extractFootnotes(markdown) { } if (defs.length === 0) return { body: markdown, section: "" }; - // De-duplicate colliding definition ids (mirror of editor-ext - // extractFootnoteDefinitions). Two definitions sharing an id would otherwise - // collapse into one footnote downstream; rename each colliding id to a - // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]` - // marker so the (reference, definition) pairing stays 1:1. Determinism lets - // the same markdown imported here and via the editor produce identical ids. - let dedupedBody = bodyLines.join("\n"); - const taken = new Set(defs.map((d) => d.id)); - const seenDefIds = new Map(); + // Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of + // editor-ext extractFootnoteDefinitions). Reference markers are left untouched + // so repeated `[^a]` references reuse the single footnote (Pandoc semantics, + // #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes + // (`duplicateDefinitions`), not silently lost. MUST stay in sync with the + // editor-ext mirror. + const firstById = new Map(); // id -> first definition text for (const def of defs) { - const originalId = def.id; - const count = seenDefIds.get(originalId) ?? 0; - seenDefIds.set(originalId, count + 1); - if (count === 0) - continue; // first definition keeps its id - const newId = deriveFootnoteId(originalId, count + 1, taken); - taken.add(newId); - def.id = newId; - // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone), - // index 1 = this duplicate's marker. Rewrite index 1. - let occurrence = 0; - let rewritten = false; - const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g"); - dedupedBody = dedupedBody.replace(re, (match) => { - const idx = occurrence++; - if (!rewritten && idx === 1) { - rewritten = true; - return `[^${newId}]`; - } - return match; - }); + if (!firstById.has(def.id)) + firstById.set(def.id, def.text); } - const inner = defs - .map((d) => `

${marked.parseInline(d.text || "")}

`) + const inner = [...firstById.entries()] + .map(([id, text]) => `

${marked.parseInline(text || "")}

`) .join(""); return { - body: dedupedBody, + body: bodyLines.join("\n"), section: `
${inner}
`, }; } diff --git a/packages/mcp/build/lib/footnote-analyze.js b/packages/mcp/build/lib/footnote-analyze.js new file mode 100644 index 00000000..919674b3 --- /dev/null +++ b/packages/mcp/build/lib/footnote-analyze.js @@ -0,0 +1,115 @@ +/** + * Footnote diagnostics for imported Markdown (issue #166). + * + * A PURE, fence-aware text scan (independent of the Markdown->ProseMirror + * conversion path, so it reports the same problems for `create_page`, + * `update_page` and `import_page_markdown`). It never changes the document — the + * importer still creates the page; this only surfaces footnote problems to the + * caller so an agent can fix its own markup instead of shipping broken footnotes. + * + * Detected problems: + * - danglingReferences: a `[^id]` reference with no `[^id]:` definition. + * - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace. + * - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the + * first is kept on import — first-wins; see extractFootnotes). + * - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic: + * the line, trimmed, starts with `|`) — footnotes in table cells often do not + * render as expected. + */ +/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ +const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ +const REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +function forEachReference(line, onRef) { + REF_RE_G.lastIndex = 0; + let m; + while ((m = REF_RE_G.exec(line)) !== null) + onRef(m[1]); +} +/** + * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. + */ +export function analyzeFootnotes(markdown) { + const lines = markdown.split("\n"); + // Distinct reference ids in first-appearance order, plus the set of ids seen + // inside a table row. + const refIds = []; + const refIdSet = new Set(); + const referencesInTables = new Set(); + const addRef = (id, inTable) => { + if (!refIdSet.has(id)) { + refIdSet.add(id); + refIds.push(id); + } + if (inTable) + referencesInTables.add(id); + }; + // Definition texts per id, in first-appearance order of the id. + const defTextsById = new Map(); + let fence = null; + for (const line of lines) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) + fence = marker; + else if (marker === fence) + fence = null; + continue; + } + // Footnote syntax shown inside a code fence is not real markup. + if (fence !== null) + continue; + const defM = DEF_RE.exec(line); + if (defM) { + const id = defM[1]; + const text = defM[2]; + const arr = defTextsById.get(id); + if (arr) + arr.push(text); + else + defTextsById.set(id, [text]); + // A definition's TEXT can itself reference another footnote (`[^a]: see + // [^b]`); count those so such a `[^b]` is not falsely reported dangling. + forEachReference(text, (rid) => addRef(rid, false)); + continue; + } + const inTable = line.trimStart().startsWith("|"); + forEachReference(line, (id) => addRef(id, inTable)); + } + const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); + const duplicateDefinitions = []; + const emptyDefinitions = []; + for (const [id, texts] of defTextsById) { + if (texts.length >= 2) + duplicateDefinitions.push(id); + // First-wins: the kept definition is the first one; flag it if it is blank. + if ((texts[0] ?? "").trim().length === 0) + emptyDefinitions.push(id); + } + const tableRefs = [...referencesInTables]; + const warnings = []; + const list = (ids) => ids.map((id) => `[^${id}]`).join(", "); + if (danglingReferences.length > 0) { + warnings.push(`Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`); + } + if (emptyDefinitions.length > 0) { + warnings.push(`Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`); + } + if (duplicateDefinitions.length > 0) { + warnings.push(`Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`); + } + if (tableRefs.length > 0) { + warnings.push(`Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`); + } + return { + danglingReferences, + emptyDefinitions, + duplicateDefinitions, + referencesInTables: tableRefs, + warnings, + }; +} diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index bd891fc9..36ee85b6 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -23,6 +23,7 @@ import { MutationResult, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; +import { analyzeFootnotes } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, @@ -1054,7 +1055,11 @@ export class DocmostClient { await this.client.post("/pages/update", { pageId: newPageId, title }); } - return this.getPage(newPageId); + const page = await this.getPage(newPageId); + // Surface non-fatal footnote problems (dangling refs, empty/duplicate + // definitions, markers in tables) so the agent can fix its markup (#166). + const { warnings } = analyzeFootnotes(content); + return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page; } /** @@ -1095,12 +1100,15 @@ export class DocmostClient { throw new Error(`Failed to update page content: ${error.message}`); } + const { warnings } = analyzeFootnotes(content); return { success: true, modified: true, message: "Page updated successfully.", pageId: pageId, verify: mutation.verify, + // Non-fatal footnote diagnostics (#166); omitted when there are none. + ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}), }; } @@ -1416,6 +1424,10 @@ export class DocmostClient { if (meta?.pageId && meta.pageId !== pageId) { result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; } + // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions + // and references live there, not in the front-matter/comments sections). + const { warnings } = analyzeFootnotes(body); + if (warnings.length > 0) result.footnoteWarnings = warnings; return result; } diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index efc7bf17..178ff71b 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -323,51 +323,6 @@ function escapeFootnoteAttr(value: string): string { return String(value).replace(/&/g, "&").replace(/"/g, """); } -function escapeFootnoteRegExp(value: string): string { - return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} - -/** - * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of - * an original id `X` during definition dedup. - * - * EXACT MIRROR of editor-ext `deriveFootnoteId` - * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST - * STAY IN SYNC: the same markdown imported through the editor and through this - * MCP path has to produce identical ids, and the sync plugin (which re-ids on - * every collaborating client) relies on the same scheme to converge. NEVER use - * Math.random()/Date.now()/uuid here — a random id would diverge across clients. - * - * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped - * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in - * `taken` (the set of ids already present / already minted — pure doc state). - */ -function deriveFootnoteId( - originalId: string, - occurrence: number, - taken: Set, -): string { - let candidate = `${originalId}__${occurrence}`; - let n = 0; - while (taken.has(candidate)) { - n += 1; - candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`; - } - return candidate; -} - -/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */ -function footnoteSuffix(n: number): string { - let out = ""; - let x = n; - while (x > 0) { - const rem = (x - 1) % 25; - out = String.fromCharCode(98 + rem) + out; // 98 = 'b' - x = Math.floor((x - 1) / 25); - } - return out; -} - const footnoteRefMarkedExtension = { name: "footnoteRef", level: "inline" as const, @@ -419,48 +374,27 @@ function extractFootnotes(markdown: string): { } if (defs.length === 0) return { body: markdown, section: "" }; - // De-duplicate colliding definition ids (mirror of editor-ext - // extractFootnoteDefinitions). Two definitions sharing an id would otherwise - // collapse into one footnote downstream; rename each colliding id to a - // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]` - // marker so the (reference, definition) pairing stays 1:1. Determinism lets - // the same markdown imported here and via the editor produce identical ids. - let dedupedBody = bodyLines.join("\n"); - const taken = new Set(defs.map((d) => d.id)); - const seenDefIds = new Map(); + // Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of + // editor-ext extractFootnoteDefinitions). Reference markers are left untouched + // so repeated `[^a]` references reuse the single footnote (Pandoc semantics, + // #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes + // (`duplicateDefinitions`), not silently lost. MUST stay in sync with the + // editor-ext mirror. + const firstById = new Map(); // id -> first definition text for (const def of defs) { - const originalId = def.id; - const count = seenDefIds.get(originalId) ?? 0; - seenDefIds.set(originalId, count + 1); - if (count === 0) continue; // first definition keeps its id - const newId = deriveFootnoteId(originalId, count + 1, taken); - taken.add(newId); - def.id = newId; - // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone), - // index 1 = this duplicate's marker. Rewrite index 1. - let occurrence = 0; - let rewritten = false; - const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g"); - dedupedBody = dedupedBody.replace(re, (match) => { - const idx = occurrence++; - if (!rewritten && idx === 1) { - rewritten = true; - return `[^${newId}]`; - } - return match; - }); + if (!firstById.has(def.id)) firstById.set(def.id, def.text); } - const inner = defs + const inner = [...firstById.entries()] .map( - (d) => + ([id, text]) => `

${marked.parseInline(d.text || "")}

`, + id, + )}">

${marked.parseInline(text || "")}

`, ) .join(""); return { - body: dedupedBody, + body: bodyLines.join("\n"), section: `
${inner}
`, }; } diff --git a/packages/mcp/src/lib/footnote-analyze.ts b/packages/mcp/src/lib/footnote-analyze.ts new file mode 100644 index 00000000..97264dbc --- /dev/null +++ b/packages/mcp/src/lib/footnote-analyze.ts @@ -0,0 +1,138 @@ +/** + * Footnote diagnostics for imported Markdown (issue #166). + * + * A PURE, fence-aware text scan (independent of the Markdown->ProseMirror + * conversion path, so it reports the same problems for `create_page`, + * `update_page` and `import_page_markdown`). It never changes the document — the + * importer still creates the page; this only surfaces footnote problems to the + * caller so an agent can fix its own markup instead of shipping broken footnotes. + * + * Detected problems: + * - danglingReferences: a `[^id]` reference with no `[^id]:` definition. + * - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace. + * - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the + * first is kept on import — first-wins; see extractFootnotes). + * - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic: + * the line, trimmed, starts with `|`) — footnotes in table cells often do not + * render as expected. + */ + +/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ +const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ +const REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; + +export interface FootnoteDiagnostics { + /** Reference ids (distinct, document order) with no matching definition. */ + danglingReferences: string[]; + /** Definition ids whose first (kept) text is empty/whitespace. */ + emptyDefinitions: string[]; + /** Ids defined by two or more `[^id]:` lines (only the first is kept). */ + duplicateDefinitions: string[]; + /** Reference ids found inside a GFM table row (heuristic). */ + referencesInTables: string[]; + /** Human-readable warning lines for the tool result (one per problem class). */ + warnings: string[]; +} + +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +function forEachReference(line: string, onRef: (id: string) => void): void { + REF_RE_G.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]); +} + +/** + * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. + */ +export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { + const lines = markdown.split("\n"); + + // Distinct reference ids in first-appearance order, plus the set of ids seen + // inside a table row. + const refIds: string[] = []; + const refIdSet = new Set(); + const referencesInTables = new Set(); + const addRef = (id: string, inTable: boolean) => { + if (!refIdSet.has(id)) { + refIdSet.add(id); + refIds.push(id); + } + if (inTable) referencesInTables.add(id); + }; + + // Definition texts per id, in first-appearance order of the id. + const defTextsById = new Map(); + + let fence: string | null = null; + for (const line of lines) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) fence = marker; + else if (marker === fence) fence = null; + continue; + } + // Footnote syntax shown inside a code fence is not real markup. + if (fence !== null) continue; + + const defM = DEF_RE.exec(line); + if (defM) { + const id = defM[1]; + const text = defM[2]; + const arr = defTextsById.get(id); + if (arr) arr.push(text); + else defTextsById.set(id, [text]); + // A definition's TEXT can itself reference another footnote (`[^a]: see + // [^b]`); count those so such a `[^b]` is not falsely reported dangling. + forEachReference(text, (rid) => addRef(rid, false)); + continue; + } + + const inTable = line.trimStart().startsWith("|"); + forEachReference(line, (id) => addRef(id, inTable)); + } + + const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); + const duplicateDefinitions: string[] = []; + const emptyDefinitions: string[] = []; + for (const [id, texts] of defTextsById) { + if (texts.length >= 2) duplicateDefinitions.push(id); + // First-wins: the kept definition is the first one; flag it if it is blank. + if ((texts[0] ?? "").trim().length === 0) emptyDefinitions.push(id); + } + const tableRefs = [...referencesInTables]; + + const warnings: string[] = []; + const list = (ids: string[]) => ids.map((id) => `[^${id}]`).join(", "); + if (danglingReferences.length > 0) { + warnings.push( + `Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`, + ); + } + if (emptyDefinitions.length > 0) { + warnings.push( + `Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`, + ); + } + if (duplicateDefinitions.length > 0) { + warnings.push( + `Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`, + ); + } + if (tableRefs.length > 0) { + warnings.push( + `Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`, + ); + } + + return { + danglingReferences, + emptyDefinitions, + duplicateDefinitions, + referencesInTables: tableRefs, + warnings, + }; +} diff --git a/packages/mcp/test/unit/derive-id-parity.test.mjs b/packages/mcp/test/unit/derive-id-parity.test.mjs deleted file mode 100644 index cb74bc6f..00000000 --- a/packages/mcp/test/unit/derive-id-parity.test.mjs +++ /dev/null @@ -1,134 +0,0 @@ -import { test } from "node:test"; -import assert from "node:assert/strict"; - -import { markdownToProseMirror } from "../../build/lib/collaboration.js"; - -/** - * CROSS-PACKAGE DRIFT GUARD for the footnote id derivation scheme. - * - * `deriveFootnoteId` is duplicated in two places that MUST behave identically: - * - packages/editor-ext/src/lib/footnote/footnote-util.ts (exported) - * - packages/mcp/src/lib/collaboration.ts (internal helper) - * so the same markdown imported through the editor and through the MCP path - * derives identical footnote ids. - * - * The mcp copy is NOT exported from the compiled build (it is an internal helper - * of collaboration.js), and production source must not be modified to export it. - * So this test exercises the REAL compiled `deriveFootnoteId` *indirectly*, the - * same way production does: through `markdownToProseMirror`, which runs - * extractFootnotes -> deriveFootnoteId during duplicate-id dedup. We craft the - * `taken` set via literal pre-existing definition ids and read back the derived - * footnoteDefinition ids. - * - * GOLDEN below mirrors DERIVE_GOLDEN in - * packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts - * (asserted there by a DIRECT call). Same (originalId, occurrence, taken) -> - * same expected id. If the two copies drift, one of the two suites goes red. - */ - -/** The 25 single-letter suffixes the scheme uses (n=1..25): b, c, ..., z. */ -function singleLetterSuffixes() { - return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i)); -} - -// Identical matrix + expected values to the editor-ext golden table. -const GOLDEN = [ - { originalId: "d", occurrence: 2, taken: [], expected: "d__2" }, - { originalId: "d", occurrence: 3, taken: [], expected: "d__3" }, - { originalId: "d", occurrence: 2, taken: ["d__2"], expected: "d__2b" }, - { originalId: "d", occurrence: 2, taken: ["d__2", "d__2b"], expected: "d__2c" }, - { - originalId: "d", - occurrence: 2, - taken: ["d__2", "d__2b", "d__2c", "d__2d"], - expected: "d__2e", - }, - { - originalId: "d", - occurrence: 2, - taken: ["d__2", ...singleLetterSuffixes().map((s) => `d__2${s}`)], - expected: "d__2bb", - }, -]; - -/** Recursively collect every node of `type`. */ -function findAll(node, type, acc = []) { - if (!node || typeof node !== "object") return acc; - if (node.type === type) acc.push(node); - if (Array.isArray(node.content)) for (const c of node.content) findAll(c, type, acc); - return acc; -} - -/** - * Build markdown that drives the real `deriveFootnoteId(originalId, occurrence, - * taken)`: - * - `occurrence` duplicate definitions of `[^originalId]` so the dedup walk - * reaches the requested occurrence (occurrence=2 -> 1 keeper + 1 duplicate; - * occurrence=3 -> keeper + 2 duplicates, of which the LAST is the one whose - * id we read); - * - one literal pre-existing definition for every id in `taken`, each with its - * own reference marker so it is a real (non-orphan) definition. Those ids are - * reserved up-front in the dedup `taken` set, exactly forcing the bump. - * - * Returns the derived id of the FINAL duplicate of `originalId`. - */ -async function deriveViaMarkdown(originalId, occurrence, takenIds) { - // References: one [^originalId] per definition (keeper + duplicates) so each - // duplicate has a marker to pair with, plus one marker per taken id. - const dupCount = occurrence; // keeper + (occurrence-1) duplicates = `occurrence` defs - const refMarkers = []; - for (let i = 0; i < dupCount; i++) refMarkers.push(`[^${originalId}]`); - for (const id of takenIds) refMarkers.push(`[^${id}]`); - const refLine = `Body ${refMarkers.join(" ")}.`; - - // Definitions: `occurrence` copies of [^originalId]: ... then the taken ids. - const defLines = []; - for (let i = 0; i < dupCount; i++) { - defLines.push(`[^${originalId}]: copy ${i}`); - } - for (const id of takenIds) { - defLines.push(`[^${id}]: reserved ${id}`); - } - - const md = [refLine, "", ...defLines].join("\n"); - const json = await markdownToProseMirror(md); - const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); - - // The derived id we want is the one that is neither the keeper (originalId), - // nor any reserved taken id, nor a lower-occurrence derived id. For - // occurrence=2 that is the single bumped id; for occurrence=3 it is the - // highest `${originalId}__3...` id. Compute it generically: among the def ids - // that start with `${originalId}__${occurrence}`, the expected one is present. - return { defIds, json }; -} - -for (const row of GOLDEN) { - test(`parity: derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) -> "${row.expected}"`, async () => { - const { defIds } = await deriveViaMarkdown( - row.originalId, - row.occurrence, - row.taken, - ); - // The real compiled deriveFootnoteId must have minted exactly the golden id. - assert.ok( - defIds.includes(row.expected), - `expected derived id "${row.expected}" among def ids ${JSON.stringify(defIds)}`, - ); - // And every id is distinct: nothing collapsed. - assert.equal(new Set(defIds).size, defIds.length, "all def ids distinct"); - }); -} - -test("parity: the simple keeper+two-duplicate case mints d, d__2, d__3", async () => { - // The canonical no-collision path, asserted as a whole set for clarity. - const md = [ - "See[^d] one[^d] two[^d].", - "", - "[^d]: first", - "[^d]: second", - "[^d]: third", - ].join("\n"); - const json = await markdownToProseMirror(md); - const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); - assert.deepEqual([...defIds].sort(), ["d", "d__2", "d__3"]); -}); diff --git a/packages/mcp/test/unit/footnote-analyze.test.mjs b/packages/mcp/test/unit/footnote-analyze.test.mjs new file mode 100644 index 00000000..b2de1787 --- /dev/null +++ b/packages/mcp/test/unit/footnote-analyze.test.mjs @@ -0,0 +1,106 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { analyzeFootnotes } from "../../build/lib/footnote-analyze.js"; + +test("clean footnotes produce no diagnostics", () => { + const md = ["A[^a] and B[^b].", "", "[^a]: first", "[^b]: second"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, []); + assert.deepEqual(d.emptyDefinitions, []); + assert.deepEqual(d.duplicateDefinitions, []); + assert.deepEqual(d.referencesInTables, []); + assert.deepEqual(d.warnings, []); +}); + +test("reuse (repeated references to one definition) is NOT a warning", () => { + const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, []); + assert.deepEqual(d.warnings, []); +}); + +test("dangling reference (no definition) is reported", () => { + const md = ["See[^missing] and[^a].", "", "[^a]: defined"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, ["missing"]); + assert.equal(d.warnings.length, 1); + assert.match(d.warnings[0], /no matching definition/); + assert.match(d.warnings[0], /\[\^missing\]/); +}); + +test("empty definition text is reported", () => { + const md = ["See[^a].", "", "[^a]: "].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.emptyDefinitions, ["a"]); + assert.match(d.warnings.join("\n"), /empty text/); +}); + +test("duplicate definition id is reported (first-wins)", () => { + const md = ["See[^d].", "", "[^d]: first", "[^d]: second"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.duplicateDefinitions, ["d"]); + assert.match(d.warnings.join("\n"), /defined more than once/); +}); + +test("reference inside a GFM table row is reported (heuristic)", () => { + const md = [ + "| Col |", + "| --- |", + "| cell[^t] |", + "", + "[^t]: table note", + ].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.referencesInTables, ["t"]); + assert.match(d.warnings.join("\n"), /table/); + // It is defined, so it is NOT also dangling. + assert.deepEqual(d.danglingReferences, []); +}); + +test("footnote syntax inside a code fence is ignored", () => { + const md = [ + "Intro.", + "", + "```", + "Example[^demo]", + "[^demo]: not a real definition", + "```", + "", + "Outro[^a].", + "", + "[^a]: real", + ].join("\n"); + const d = analyzeFootnotes(md); + // `[^demo]` lives only in the fenced block, so it is neither a reference nor a + // dangling one, and `[^demo]:` is not counted as a definition. + assert.deepEqual(d.danglingReferences, []); + assert.deepEqual(d.duplicateDefinitions, []); + assert.deepEqual(d.warnings, []); +}); + +test("a reference that only appears inside a definition's text is not dangling", () => { + // `[^b]` is referenced from within [^a]'s text and has its own definition. + const md = ["See[^a].", "", "[^a]: see also [^b]", "[^b]: the other"].join( + "\n", + ); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, []); +}); + +test("multiple problem classes accumulate distinct warnings", () => { + const md = [ + "Ref[^x] and[^dup].", + "", + "[^dup]: one", + "[^dup]: two", + "[^empty]:", + ].join("\n"); + const d = analyzeFootnotes(md); + // x has no definition; dup is defined twice; empty is empty AND has no ref. + assert.ok(d.danglingReferences.includes("x")); + assert.deepEqual(d.duplicateDefinitions, ["dup"]); + assert.deepEqual(d.emptyDefinitions, ["empty"]); + // One warning line per problem class present. + assert.ok(d.warnings.length >= 3); +}); diff --git a/packages/mcp/test/unit/footnotes.test.mjs b/packages/mcp/test/unit/footnotes.test.mjs index df45a7b9..67ec9bc5 100644 --- a/packages/mcp/test/unit/footnotes.test.mjs +++ b/packages/mcp/test/unit/footnotes.test.mjs @@ -90,11 +90,10 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => { assert.match(md2, /\[\^fn2\]: Second note\./); }); -test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => { - // The MCP import must derive duplicate ids deterministically (NOT random) so - // the same markdown imported here and via the editor produces identical ids, - // and re-importing is stable. This is the test that would FAIL on the old - // Math.random()/Date.now() implementation. +test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => { + // Reuse semantics: many `[^d]` references + several `[^d]:` definitions import + // as ONE footnote — the references all keep id "d" (reuse), and only the FIRST + // definition is kept (first-wins). Deterministic and stable across re-imports. const md = [ "See[^d] one[^d] two[^d].", "", @@ -106,21 +105,26 @@ test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", const idsOf = async () => { const json = await markdownToProseMirror(md); const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id); - const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); - return { refs, defs }; + const defs = findAll(json, "footnoteDefinition"); + return { + refs, + defIds: defs.map((d) => d.attrs.id), + defText: defs + .map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1]) + .join("|"), + }; }; const a = await idsOf(); const b = await idsOf(); - // Identical across runs. - assert.deepEqual(a.refs, b.refs); - assert.deepEqual(a.defs, b.defs); - // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3". - assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]); - // 1:1 reference <-> definition pairing, all distinct. - assert.equal(new Set(a.defs).size, 3); - assert.deepEqual([...a.refs].sort(), [...a.defs].sort()); + // Stable across runs. + assert.deepEqual(a, b); + // Reuse: all three reference markers stay "d". + assert.deepEqual(a.refs, ["d", "d", "d"]); + // First-wins: a single definition "d" with the FIRST text. + assert.deepEqual(a.defIds, ["d"]); + assert.equal(a.defText, "first"); }); test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {