feat(footnotes): reuse semantics + import diagnostics (#166)

Footnotes were strict 1:1: a repeated `[^a]` reference was treated as a collision and re-id'd to `a__2`, and a reference with no definition synthesized its own empty one — so an agent-authored article with reused labels produced dozens of empty `kowiki__N` footnotes. Move to Pandoc REUSE semantics and add non-fatal import diagnostics. Reuse (core): - resolveCollisions (footnote-sync): repeated references sharing an id are REUSE (recorded once in document order, never re-id'd) — one number, one shared definition. Only a duplicate DEFINITION is re-id'd deterministically and, with no matching reference, dropped by the existing orphan policy (first-wins). CollisionPlan.refReids is now always empty (harmless no-op downstream). - extractFootnoteDefinitions (marked) and extractFootnotes (MCP): duplicate definition ids are FIRST-WINS (keep first, drop rest); reference markers are never rewritten. Removed the marker-rewriting and the now-dead deriveFootnoteId mirror + helpers from the MCP path. Import diagnostics: - New analyzeFootnotes() (MCP): fence-aware pure scan reporting dangling references, empty/duplicate definitions and `[^id]` markers inside table rows. - createPage / updatePage / importPageMarkdown now attach `footnoteWarnings` (only when non-empty) so an agent can fix its markup; the page is still created. Paste-reuse: - footnotePastePlugin remaps only ids the pasted slice DEFINES (a colliding definition); a pasted lone reference to an existing id keeps it (reuse). Tests: reuse/first-wins rewrites of footnote.test, footnote-markdown.test, footnote.marked.orphan.test and the MCP footnotes.test; new footnote-paste.test (editor-ext) and footnote-analyze.test (MCP). Deleted derive-id-parity.test.mjs (the MCP no longer derives ids; editor-ext's deriveFootnoteId keeps its own golden test). editor-ext 128, MCP 299, server roundtrip 2, client views 3, client+server tsc clean. Two review suggestions applied: corrected a stale "duplicated in MCP" comment and the dangling-reference warning wording. Note: the multi-backlink editor UI (a reused definition linking back to each of its references) is deferred to a follow-up — this PR delivers the data-integrity core (reuse + warnings + paste-reuse). Forward links and numbering already reuse correctly; the backlink currently targets the first reference. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 15:34:41 +03:00
parent 6566d2153c
commit 17e683a311
16 changed files with 774 additions and 558 deletions
--- a/packages/mcp/build/client.js
+++ b/packages/mcp/build/client.js
@@ -9,6 +9,7 @@ import WebSocket from "ws";
 import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
 import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js";
 import { docmostExtensions } from "./lib/docmost-schema.js";
+import { analyzeFootnotes } from "./lib/footnote-analyze.js";
 import { buildPageTree } from "./lib/tree.js";
 import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js";
 import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js";
@@ -566,7 +567,9 @@ export class DocmostClient {
        // Always fetch subpages to provide context to the agent
        let subpages = [];
        try {
-            subpages = await this.listSidebarPages(resultData.spaceId, pageId);
+            // `pageId` may be a slugId, but the sidebar-pages endpoint requires the
+            // UUID; `resultData.id` holds the resolved UUID returned by getPageRaw.
+            subpages = await this.listSidebarPages(resultData.spaceId, resultData.id);
        }
        catch (e) {
            console.warn("Failed to fetch subpages:", e);
@@ -814,7 +817,11 @@ export class DocmostClient {
        if (title) {
            await this.client.post("/pages/update", { pageId: newPageId, title });
        }
-        return this.getPage(newPageId);
+        const page = await this.getPage(newPageId);
+        // Surface non-fatal footnote problems (dangling refs, empty/duplicate
+        // definitions, markers in tables) so the agent can fix its markup (#166).
+        const { warnings } = analyzeFootnotes(content);
+        return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page;
    }
    /**
     * Update a page's content from markdown and optionally its title.
@@ -844,12 +851,15 @@ export class DocmostClient {
            }
            throw new Error(`Failed to update page content: ${error.message}`);
        }
+        const { warnings } = analyzeFootnotes(content);
        return {
            success: true,
            modified: true,
            message: "Page updated successfully.",
            pageId: pageId,
            verify: mutation.verify,
+            // Non-fatal footnote diagnostics (#166); omitted when there are none.
+            ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}),
        };
    }
    /**
@@ -1119,6 +1129,11 @@ export class DocmostClient {
        if (meta?.pageId && meta.pageId !== pageId) {
            result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
        }
+        // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions
+        // and references live there, not in the front-matter/comments sections).
+        const { warnings } = analyzeFootnotes(body);
+        if (warnings.length > 0)
+            result.footnoteWarnings = warnings;
        return result;
    }
    /**
@@ -2422,9 +2437,9 @@ export class DocmostClient {
            const raw = await this.getPageRaw(pageId);
            const current = raw.content || { type: "doc", content: [] };
            runTransform(current);
-            // Exercise the same Yjs encoder the apply path uses, so the preview
-            // fails with the SAME descriptive error when the doc is not encodable
-            // instead of returning a misleadingly-green diff.
+            // Run an independent Yjs-encodability check (same sanitize + schema as the
+            // apply path), so the preview fails with the same descriptive error when
+            // the doc is not encodable instead of returning a misleadingly-green diff.
            assertYjsEncodable(newDoc);
            return {
                pushed: false,
--- a/packages/mcp/build/lib/collaboration.js
+++ b/packages/mcp/build/lib/collaboration.js
@@ -285,44 +285,6 @@ const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
 function escapeFootnoteAttr(value) {
    return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
 }
-function escapeFootnoteRegExp(value) {
-    return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}
-/**
- * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
- * an original id `X` during definition dedup.
- *
- * EXACT MIRROR of editor-ext `deriveFootnoteId`
- * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
- * STAY IN SYNC: the same markdown imported through the editor and through this
- * MCP path has to produce identical ids, and the sync plugin (which re-ids on
- * every collaborating client) relies on the same scheme to converge. NEVER use
- * Math.random()/Date.now()/uuid here — a random id would diverge across clients.
- *
- * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
- * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
- * `taken` (the set of ids already present / already minted — pure doc state).
- */
-function deriveFootnoteId(originalId, occurrence, taken) {
-    let candidate = `${originalId}__${occurrence}`;
-    let n = 0;
-    while (taken.has(candidate)) {
-        n += 1;
-        candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
-    }
-    return candidate;
-}
-/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
-function footnoteSuffix(n) {
-    let out = "";
-    let x = n;
-    while (x > 0) {
-        const rem = (x - 1) % 25;
-        out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
-        x = Math.floor((x - 1) / 25);
-    }
-    return out;
-}
 const footnoteRefMarkedExtension = {
    name: "footnoteRef",
    level: "inline",
@@ -371,43 +333,22 @@ function extractFootnotes(markdown) {
    }
    if (defs.length === 0)
        return { body: markdown, section: "" };
-    // De-duplicate colliding definition ids (mirror of editor-ext
-    // extractFootnoteDefinitions). Two definitions sharing an id would otherwise
-    // collapse into one footnote downstream; rename each colliding id to a
-    // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
-    // marker so the (reference, definition) pairing stays 1:1. Determinism lets
-    // the same markdown imported here and via the editor produce identical ids.
-    let dedupedBody = bodyLines.join("\n");
-    const taken = new Set(defs.map((d) => d.id));
-    const seenDefIds = new Map();
+    // Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
+    // editor-ext extractFootnoteDefinitions). Reference markers are left untouched
+    // so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
+    // #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
+    // (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
+    // editor-ext mirror.
+    const firstById = new Map(); // id -> first definition text
    for (const def of defs) {
-        const originalId = def.id;
-        const count = seenDefIds.get(originalId) ?? 0;
-        seenDefIds.set(originalId, count + 1);
-        if (count === 0)
-            continue; // first definition keeps its id
-        const newId = deriveFootnoteId(originalId, count + 1, taken);
-        taken.add(newId);
-        def.id = newId;
-        // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
-        // index 1 = this duplicate's marker. Rewrite index 1.
-        let occurrence = 0;
-        let rewritten = false;
-        const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
-        dedupedBody = dedupedBody.replace(re, (match) => {
-            const idx = occurrence++;
-            if (!rewritten && idx === 1) {
-                rewritten = true;
-                return `[^${newId}]`;
-            }
-            return match;
-        });
+        if (!firstById.has(def.id))
+            firstById.set(def.id, def.text);
    }
-    const inner = defs
-        .map((d) => `<div data-footnote-def data-id="${escapeFootnoteAttr(d.id)}"><p>${marked.parseInline(d.text || "")}</p></div>`)
+    const inner = [...firstById.entries()]
+        .map(([id, text]) => `<div data-footnote-def data-id="${escapeFootnoteAttr(id)}"><p>${marked.parseInline(text || "")}</p></div>`)
        .join("");
    return {
-        body: dedupedBody,
+        body: bodyLines.join("\n"),
        section: `<section data-footnotes>${inner}</section>`,
    };
 }
--- a/packages/mcp/build/lib/footnote-analyze.js
+++ b/packages/mcp/build/lib/footnote-analyze.js
@@ -0,0 +1,115 @@
+/**
+ * Footnote diagnostics for imported Markdown (issue #166).
+ *
+ * A PURE, fence-aware text scan (independent of the Markdown->ProseMirror
+ * conversion path, so it reports the same problems for `create_page`,
+ * `update_page` and `import_page_markdown`). It never changes the document — the
+ * importer still creates the page; this only surfaces footnote problems to the
+ * caller so an agent can fix its own markup instead of shipping broken footnotes.
+ *
+ * Detected problems:
+ *  - danglingReferences: a `[^id]` reference with no `[^id]:` definition.
+ *  - emptyDefinitions:   a `[^id]:` whose (kept) text is empty/whitespace.
+ *  - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the
+ *    first is kept on import — first-wins; see extractFootnotes).
+ *  - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic:
+ *    the line, trimmed, starts with `|`) — footnotes in table cells often do not
+ *    render as expected.
+ */
+/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */
+const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
+/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */
+const REF_RE_G = /\[\^([^\]\s]+)\]/g;
+/** Opening/closing fence marker (``` or ~~~). */
+const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
+/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
+function forEachReference(line, onRef) {
+    REF_RE_G.lastIndex = 0;
+    let m;
+    while ((m = REF_RE_G.exec(line)) !== null)
+        onRef(m[1]);
+}
+/**
+ * Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
+ */
+export function analyzeFootnotes(markdown) {
+    const lines = markdown.split("\n");
+    // Distinct reference ids in first-appearance order, plus the set of ids seen
+    // inside a table row.
+    const refIds = [];
+    const refIdSet = new Set();
+    const referencesInTables = new Set();
+    const addRef = (id, inTable) => {
+        if (!refIdSet.has(id)) {
+            refIdSet.add(id);
+            refIds.push(id);
+        }
+        if (inTable)
+            referencesInTables.add(id);
+    };
+    // Definition texts per id, in first-appearance order of the id.
+    const defTextsById = new Map();
+    let fence = null;
+    for (const line of lines) {
+        const fenceMatch = FENCE_RE.exec(line);
+        if (fenceMatch) {
+            const marker = fenceMatch[2][0];
+            if (fence === null)
+                fence = marker;
+            else if (marker === fence)
+                fence = null;
+            continue;
+        }
+        // Footnote syntax shown inside a code fence is not real markup.
+        if (fence !== null)
+            continue;
+        const defM = DEF_RE.exec(line);
+        if (defM) {
+            const id = defM[1];
+            const text = defM[2];
+            const arr = defTextsById.get(id);
+            if (arr)
+                arr.push(text);
+            else
+                defTextsById.set(id, [text]);
+            // A definition's TEXT can itself reference another footnote (`[^a]: see
+            // [^b]`); count those so such a `[^b]` is not falsely reported dangling.
+            forEachReference(text, (rid) => addRef(rid, false));
+            continue;
+        }
+        const inTable = line.trimStart().startsWith("|");
+        forEachReference(line, (id) => addRef(id, inTable));
+    }
+    const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
+    const duplicateDefinitions = [];
+    const emptyDefinitions = [];
+    for (const [id, texts] of defTextsById) {
+        if (texts.length >= 2)
+            duplicateDefinitions.push(id);
+        // First-wins: the kept definition is the first one; flag it if it is blank.
+        if ((texts[0] ?? "").trim().length === 0)
+            emptyDefinitions.push(id);
+    }
+    const tableRefs = [...referencesInTables];
+    const warnings = [];
+    const list = (ids) => ids.map((id) => `[^${id}]`).join(", ");
+    if (danglingReferences.length > 0) {
+        warnings.push(`Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`);
+    }
+    if (emptyDefinitions.length > 0) {
+        warnings.push(`Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`);
+    }
+    if (duplicateDefinitions.length > 0) {
+        warnings.push(`Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`);
+    }
+    if (tableRefs.length > 0) {
+        warnings.push(`Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`);
+    }
+    return {
+        danglingReferences,
+        emptyDefinitions,
+        duplicateDefinitions,
+        referencesInTables: tableRefs,
+        warnings,
+    };
+}
--- a/packages/mcp/src/client.ts
+++ b/packages/mcp/src/client.ts
@@ -23,6 +23,7 @@ import {
  MutationResult,
 } from "./lib/collaboration.js";
 import { docmostExtensions } from "./lib/docmost-schema.js";
+import { analyzeFootnotes } from "./lib/footnote-analyze.js";
 import { buildPageTree } from "./lib/tree.js";
 import {
  serializeDocmostMarkdown,
@@ -1054,7 +1055,11 @@ export class DocmostClient {
      await this.client.post("/pages/update", { pageId: newPageId, title });
    }

-    return this.getPage(newPageId);
+    const page = await this.getPage(newPageId);
+    // Surface non-fatal footnote problems (dangling refs, empty/duplicate
+    // definitions, markers in tables) so the agent can fix its markup (#166).
+    const { warnings } = analyzeFootnotes(content);
+    return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page;
  }

  /**
@@ -1095,12 +1100,15 @@ export class DocmostClient {
      throw new Error(`Failed to update page content: ${error.message}`);
    }

+    const { warnings } = analyzeFootnotes(content);
    return {
      success: true,
      modified: true,
      message: "Page updated successfully.",
      pageId: pageId,
      verify: mutation.verify,
+      // Non-fatal footnote diagnostics (#166); omitted when there are none.
+      ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}),
    };
  }

@@ -1416,6 +1424,10 @@ export class DocmostClient {
    if (meta?.pageId && meta.pageId !== pageId) {
      result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
    }
+    // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions
+    // and references live there, not in the front-matter/comments sections).
+    const { warnings } = analyzeFootnotes(body);
+    if (warnings.length > 0) result.footnoteWarnings = warnings;
    return result;
  }

--- a/packages/mcp/src/lib/collaboration.ts
+++ b/packages/mcp/src/lib/collaboration.ts
@@ -323,51 +323,6 @@ function escapeFootnoteAttr(value: string): string {
  return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
 }

-function escapeFootnoteRegExp(value: string): string {
-  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}
-
-/**
- * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
- * an original id `X` during definition dedup.
- *
- * EXACT MIRROR of editor-ext `deriveFootnoteId`
- * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
- * STAY IN SYNC: the same markdown imported through the editor and through this
- * MCP path has to produce identical ids, and the sync plugin (which re-ids on
- * every collaborating client) relies on the same scheme to converge. NEVER use
- * Math.random()/Date.now()/uuid here — a random id would diverge across clients.
- *
- * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
- * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
- * `taken` (the set of ids already present / already minted — pure doc state).
- */
-function deriveFootnoteId(
-  originalId: string,
-  occurrence: number,
-  taken: Set<string>,
-): string {
-  let candidate = `${originalId}__${occurrence}`;
-  let n = 0;
-  while (taken.has(candidate)) {
-    n += 1;
-    candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
-  }
-  return candidate;
-}
-
-/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
-function footnoteSuffix(n: number): string {
-  let out = "";
-  let x = n;
-  while (x > 0) {
-    const rem = (x - 1) % 25;
-    out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
-    x = Math.floor((x - 1) / 25);
-  }
-  return out;
-}
-
 const footnoteRefMarkedExtension = {
  name: "footnoteRef",
  level: "inline" as const,
@@ -419,48 +374,27 @@ function extractFootnotes(markdown: string): {
  }
  if (defs.length === 0) return { body: markdown, section: "" };

-  // De-duplicate colliding definition ids (mirror of editor-ext
-  // extractFootnoteDefinitions). Two definitions sharing an id would otherwise
-  // collapse into one footnote downstream; rename each colliding id to a
-  // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
-  // marker so the (reference, definition) pairing stays 1:1. Determinism lets
-  // the same markdown imported here and via the editor produce identical ids.
-  let dedupedBody = bodyLines.join("\n");
-  const taken = new Set<string>(defs.map((d) => d.id));
-  const seenDefIds = new Map<string, number>();
+  // Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
+  // editor-ext extractFootnoteDefinitions). Reference markers are left untouched
+  // so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
+  // #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
+  // (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
+  // editor-ext mirror.
+  const firstById = new Map<string, string>(); // id -> first definition text
  for (const def of defs) {
-    const originalId = def.id;
-    const count = seenDefIds.get(originalId) ?? 0;
-    seenDefIds.set(originalId, count + 1);
-    if (count === 0) continue; // first definition keeps its id
-    const newId = deriveFootnoteId(originalId, count + 1, taken);
-    taken.add(newId);
-    def.id = newId;
-    // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
-    // index 1 = this duplicate's marker. Rewrite index 1.
-    let occurrence = 0;
-    let rewritten = false;
-    const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
-    dedupedBody = dedupedBody.replace(re, (match) => {
-      const idx = occurrence++;
-      if (!rewritten && idx === 1) {
-        rewritten = true;
-        return `[^${newId}]`;
-      }
-      return match;
-    });
+    if (!firstById.has(def.id)) firstById.set(def.id, def.text);
  }

-  const inner = defs
+  const inner = [...firstById.entries()]
    .map(
-      (d) =>
+      ([id, text]) =>
        `<div data-footnote-def data-id="${escapeFootnoteAttr(
-          d.id,
-        )}"><p>${marked.parseInline(d.text || "")}</p></div>`,
+          id,
+        )}"><p>${marked.parseInline(text || "")}</p></div>`,
    )
    .join("");
  return {
-    body: dedupedBody,
+    body: bodyLines.join("\n"),
    section: `<section data-footnotes>${inner}</section>`,
  };
 }
--- a/packages/mcp/src/lib/footnote-analyze.ts
+++ b/packages/mcp/src/lib/footnote-analyze.ts
@@ -0,0 +1,138 @@
+/**
+ * Footnote diagnostics for imported Markdown (issue #166).
+ *
+ * A PURE, fence-aware text scan (independent of the Markdown->ProseMirror
+ * conversion path, so it reports the same problems for `create_page`,
+ * `update_page` and `import_page_markdown`). It never changes the document — the
+ * importer still creates the page; this only surfaces footnote problems to the
+ * caller so an agent can fix its own markup instead of shipping broken footnotes.
+ *
+ * Detected problems:
+ *  - danglingReferences: a `[^id]` reference with no `[^id]:` definition.
+ *  - emptyDefinitions:   a `[^id]:` whose (kept) text is empty/whitespace.
+ *  - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the
+ *    first is kept on import — first-wins; see extractFootnotes).
+ *  - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic:
+ *    the line, trimmed, starts with `|`) — footnotes in table cells often do not
+ *    render as expected.
+ */
+
+/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */
+const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
+/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */
+const REF_RE_G = /\[\^([^\]\s]+)\]/g;
+/** Opening/closing fence marker (``` or ~~~). */
+const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
+
+export interface FootnoteDiagnostics {
+  /** Reference ids (distinct, document order) with no matching definition. */
+  danglingReferences: string[];
+  /** Definition ids whose first (kept) text is empty/whitespace. */
+  emptyDefinitions: string[];
+  /** Ids defined by two or more `[^id]:` lines (only the first is kept). */
+  duplicateDefinitions: string[];
+  /** Reference ids found inside a GFM table row (heuristic). */
+  referencesInTables: string[];
+  /** Human-readable warning lines for the tool result (one per problem class). */
+  warnings: string[];
+}
+
+/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
+function forEachReference(line: string, onRef: (id: string) => void): void {
+  REF_RE_G.lastIndex = 0;
+  let m: RegExpExecArray | null;
+  while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]);
+}
+
+/**
+ * Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
+ */
+export function analyzeFootnotes(markdown: string): FootnoteDiagnostics {
+  const lines = markdown.split("\n");
+
+  // Distinct reference ids in first-appearance order, plus the set of ids seen
+  // inside a table row.
+  const refIds: string[] = [];
+  const refIdSet = new Set<string>();
+  const referencesInTables = new Set<string>();
+  const addRef = (id: string, inTable: boolean) => {
+    if (!refIdSet.has(id)) {
+      refIdSet.add(id);
+      refIds.push(id);
+    }
+    if (inTable) referencesInTables.add(id);
+  };
+
+  // Definition texts per id, in first-appearance order of the id.
+  const defTextsById = new Map<string, string[]>();
+
+  let fence: string | null = null;
+  for (const line of lines) {
+    const fenceMatch = FENCE_RE.exec(line);
+    if (fenceMatch) {
+      const marker = fenceMatch[2][0];
+      if (fence === null) fence = marker;
+      else if (marker === fence) fence = null;
+      continue;
+    }
+    // Footnote syntax shown inside a code fence is not real markup.
+    if (fence !== null) continue;
+
+    const defM = DEF_RE.exec(line);
+    if (defM) {
+      const id = defM[1];
+      const text = defM[2];
+      const arr = defTextsById.get(id);
+      if (arr) arr.push(text);
+      else defTextsById.set(id, [text]);
+      // A definition's TEXT can itself reference another footnote (`[^a]: see
+      // [^b]`); count those so such a `[^b]` is not falsely reported dangling.
+      forEachReference(text, (rid) => addRef(rid, false));
+      continue;
+    }
+
+    const inTable = line.trimStart().startsWith("|");
+    forEachReference(line, (id) => addRef(id, inTable));
+  }
+
+  const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
+  const duplicateDefinitions: string[] = [];
+  const emptyDefinitions: string[] = [];
+  for (const [id, texts] of defTextsById) {
+    if (texts.length >= 2) duplicateDefinitions.push(id);
+    // First-wins: the kept definition is the first one; flag it if it is blank.
+    if ((texts[0] ?? "").trim().length === 0) emptyDefinitions.push(id);
+  }
+  const tableRefs = [...referencesInTables];
+
+  const warnings: string[] = [];
+  const list = (ids: string[]) => ids.map((id) => `[^${id}]`).join(", ");
+  if (danglingReferences.length > 0) {
+    warnings.push(
+      `Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`,
+    );
+  }
+  if (emptyDefinitions.length > 0) {
+    warnings.push(
+      `Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`,
+    );
+  }
+  if (duplicateDefinitions.length > 0) {
+    warnings.push(
+      `Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`,
+    );
+  }
+  if (tableRefs.length > 0) {
+    warnings.push(
+      `Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`,
+    );
+  }
+
+  return {
+    danglingReferences,
+    emptyDefinitions,
+    duplicateDefinitions,
+    referencesInTables: tableRefs,
+    warnings,
+  };
+}
--- a/packages/mcp/test/unit/derive-id-parity.test.mjs
+++ b/packages/mcp/test/unit/derive-id-parity.test.mjs
@@ -1,134 +0,0 @@
-import { test } from "node:test";
-import assert from "node:assert/strict";
-
-import { markdownToProseMirror } from "../../build/lib/collaboration.js";
-
-/**
- * CROSS-PACKAGE DRIFT GUARD for the footnote id derivation scheme.
- *
- * `deriveFootnoteId` is duplicated in two places that MUST behave identically:
- *   - packages/editor-ext/src/lib/footnote/footnote-util.ts (exported)
- *   - packages/mcp/src/lib/collaboration.ts                  (internal helper)
- * so the same markdown imported through the editor and through the MCP path
- * derives identical footnote ids.
- *
- * The mcp copy is NOT exported from the compiled build (it is an internal helper
- * of collaboration.js), and production source must not be modified to export it.
- * So this test exercises the REAL compiled `deriveFootnoteId` *indirectly*, the
- * same way production does: through `markdownToProseMirror`, which runs
- * extractFootnotes -> deriveFootnoteId during duplicate-id dedup. We craft the
- * `taken` set via literal pre-existing definition ids and read back the derived
- * footnoteDefinition ids.
- *
- * GOLDEN below mirrors DERIVE_GOLDEN in
- *   packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts
- * (asserted there by a DIRECT call). Same (originalId, occurrence, taken) ->
- * same expected id. If the two copies drift, one of the two suites goes red.
- */
-
-/** The 25 single-letter suffixes the scheme uses (n=1..25): b, c, ..., z. */
-function singleLetterSuffixes() {
-  return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i));
-}
-
-// Identical matrix + expected values to the editor-ext golden table.
-const GOLDEN = [
-  { originalId: "d", occurrence: 2, taken: [], expected: "d__2" },
-  { originalId: "d", occurrence: 3, taken: [], expected: "d__3" },
-  { originalId: "d", occurrence: 2, taken: ["d__2"], expected: "d__2b" },
-  { originalId: "d", occurrence: 2, taken: ["d__2", "d__2b"], expected: "d__2c" },
-  {
-    originalId: "d",
-    occurrence: 2,
-    taken: ["d__2", "d__2b", "d__2c", "d__2d"],
-    expected: "d__2e",
-  },
-  {
-    originalId: "d",
-    occurrence: 2,
-    taken: ["d__2", ...singleLetterSuffixes().map((s) => `d__2${s}`)],
-    expected: "d__2bb",
-  },
-];
-
-/** Recursively collect every node of `type`. */
-function findAll(node, type, acc = []) {
-  if (!node || typeof node !== "object") return acc;
-  if (node.type === type) acc.push(node);
-  if (Array.isArray(node.content)) for (const c of node.content) findAll(c, type, acc);
-  return acc;
-}
-
-/**
- * Build markdown that drives the real `deriveFootnoteId(originalId, occurrence,
- * taken)`:
- *  - `occurrence` duplicate definitions of `[^originalId]` so the dedup walk
- *    reaches the requested occurrence (occurrence=2 -> 1 keeper + 1 duplicate;
- *    occurrence=3 -> keeper + 2 duplicates, of which the LAST is the one whose
- *    id we read);
- *  - one literal pre-existing definition for every id in `taken`, each with its
- *    own reference marker so it is a real (non-orphan) definition. Those ids are
- *    reserved up-front in the dedup `taken` set, exactly forcing the bump.
- *
- * Returns the derived id of the FINAL duplicate of `originalId`.
- */
-async function deriveViaMarkdown(originalId, occurrence, takenIds) {
-  // References: one [^originalId] per definition (keeper + duplicates) so each
-  // duplicate has a marker to pair with, plus one marker per taken id.
-  const dupCount = occurrence; // keeper + (occurrence-1) duplicates = `occurrence` defs
-  const refMarkers = [];
-  for (let i = 0; i < dupCount; i++) refMarkers.push(`[^${originalId}]`);
-  for (const id of takenIds) refMarkers.push(`[^${id}]`);
-  const refLine = `Body ${refMarkers.join(" ")}.`;
-
-  // Definitions: `occurrence` copies of [^originalId]: ... then the taken ids.
-  const defLines = [];
-  for (let i = 0; i < dupCount; i++) {
-    defLines.push(`[^${originalId}]: copy ${i}`);
-  }
-  for (const id of takenIds) {
-    defLines.push(`[^${id}]: reserved ${id}`);
-  }
-
-  const md = [refLine, "", ...defLines].join("\n");
-  const json = await markdownToProseMirror(md);
-  const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
-
-  // The derived id we want is the one that is neither the keeper (originalId),
-  // nor any reserved taken id, nor a lower-occurrence derived id. For
-  // occurrence=2 that is the single bumped id; for occurrence=3 it is the
-  // highest `${originalId}__3...` id. Compute it generically: among the def ids
-  // that start with `${originalId}__${occurrence}`, the expected one is present.
-  return { defIds, json };
-}
-
-for (const row of GOLDEN) {
-  test(`parity: derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) -> "${row.expected}"`, async () => {
-    const { defIds } = await deriveViaMarkdown(
-      row.originalId,
-      row.occurrence,
-      row.taken,
-    );
-    // The real compiled deriveFootnoteId must have minted exactly the golden id.
-    assert.ok(
-      defIds.includes(row.expected),
-      `expected derived id "${row.expected}" among def ids ${JSON.stringify(defIds)}`,
-    );
-    // And every id is distinct: nothing collapsed.
-    assert.equal(new Set(defIds).size, defIds.length, "all def ids distinct");
-  });
-}
-
-test("parity: the simple keeper+two-duplicate case mints d, d__2, d__3", async () => {
-  // The canonical no-collision path, asserted as a whole set for clarity.
-  const md = [
-    "See[^d] one[^d] two[^d].",
-    "",
-    "[^d]: first",
-    "[^d]: second",
-    "[^d]: third",
-  ].join("\n");
-  const json = await markdownToProseMirror(md);
-  const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
-  assert.deepEqual([...defIds].sort(), ["d", "d__2", "d__3"]);
-});
--- a/packages/mcp/test/unit/footnote-analyze.test.mjs
+++ b/packages/mcp/test/unit/footnote-analyze.test.mjs
@@ -0,0 +1,106 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+
+import { analyzeFootnotes } from "../../build/lib/footnote-analyze.js";
+
+test("clean footnotes produce no diagnostics", () => {
+  const md = ["A[^a] and B[^b].", "", "[^a]: first", "[^b]: second"].join("\n");
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.danglingReferences, []);
+  assert.deepEqual(d.emptyDefinitions, []);
+  assert.deepEqual(d.duplicateDefinitions, []);
+  assert.deepEqual(d.referencesInTables, []);
+  assert.deepEqual(d.warnings, []);
+});
+
+test("reuse (repeated references to one definition) is NOT a warning", () => {
+  const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared"].join("\n");
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.danglingReferences, []);
+  assert.deepEqual(d.warnings, []);
+});
+
+test("dangling reference (no definition) is reported", () => {
+  const md = ["See[^missing] and[^a].", "", "[^a]: defined"].join("\n");
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.danglingReferences, ["missing"]);
+  assert.equal(d.warnings.length, 1);
+  assert.match(d.warnings[0], /no matching definition/);
+  assert.match(d.warnings[0], /\[\^missing\]/);
+});
+
+test("empty definition text is reported", () => {
+  const md = ["See[^a].", "", "[^a]:   "].join("\n");
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.emptyDefinitions, ["a"]);
+  assert.match(d.warnings.join("\n"), /empty text/);
+});
+
+test("duplicate definition id is reported (first-wins)", () => {
+  const md = ["See[^d].", "", "[^d]: first", "[^d]: second"].join("\n");
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.duplicateDefinitions, ["d"]);
+  assert.match(d.warnings.join("\n"), /defined more than once/);
+});
+
+test("reference inside a GFM table row is reported (heuristic)", () => {
+  const md = [
+    "| Col |",
+    "| --- |",
+    "| cell[^t] |",
+    "",
+    "[^t]: table note",
+  ].join("\n");
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.referencesInTables, ["t"]);
+  assert.match(d.warnings.join("\n"), /table/);
+  // It is defined, so it is NOT also dangling.
+  assert.deepEqual(d.danglingReferences, []);
+});
+
+test("footnote syntax inside a code fence is ignored", () => {
+  const md = [
+    "Intro.",
+    "",
+    "```",
+    "Example[^demo]",
+    "[^demo]: not a real definition",
+    "```",
+    "",
+    "Outro[^a].",
+    "",
+    "[^a]: real",
+  ].join("\n");
+  const d = analyzeFootnotes(md);
+  // `[^demo]` lives only in the fenced block, so it is neither a reference nor a
+  // dangling one, and `[^demo]:` is not counted as a definition.
+  assert.deepEqual(d.danglingReferences, []);
+  assert.deepEqual(d.duplicateDefinitions, []);
+  assert.deepEqual(d.warnings, []);
+});
+
+test("a reference that only appears inside a definition's text is not dangling", () => {
+  // `[^b]` is referenced from within [^a]'s text and has its own definition.
+  const md = ["See[^a].", "", "[^a]: see also [^b]", "[^b]: the other"].join(
+    "\n",
+  );
+  const d = analyzeFootnotes(md);
+  assert.deepEqual(d.danglingReferences, []);
+});
+
+test("multiple problem classes accumulate distinct warnings", () => {
+  const md = [
+    "Ref[^x] and[^dup].",
+    "",
+    "[^dup]: one",
+    "[^dup]: two",
+    "[^empty]:",
+  ].join("\n");
+  const d = analyzeFootnotes(md);
+  // x has no definition; dup is defined twice; empty is empty AND has no ref.
+  assert.ok(d.danglingReferences.includes("x"));
+  assert.deepEqual(d.duplicateDefinitions, ["dup"]);
+  assert.deepEqual(d.emptyDefinitions, ["empty"]);
+  // One warning line per problem class present.
+  assert.ok(d.warnings.length >= 3);
+});
--- a/packages/mcp/test/unit/footnotes.test.mjs
+++ b/packages/mcp/test/unit/footnotes.test.mjs
@@ -90,11 +90,10 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
  assert.match(md2, /\[\^fn2\]: Second note\./);
 });

-test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => {
-  // The MCP import must derive duplicate ids deterministically (NOT random) so
-  // the same markdown imported here and via the editor produces identical ids,
-  // and re-importing is stable. This is the test that would FAIL on the old
-  // Math.random()/Date.now() implementation.
+test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => {
+  // Reuse semantics: many `[^d]` references + several `[^d]:` definitions import
+  // as ONE footnote — the references all keep id "d" (reuse), and only the FIRST
+  // definition is kept (first-wins). Deterministic and stable across re-imports.
  const md = [
    "See[^d] one[^d] two[^d].",
    "",
@@ -106,21 +105,26 @@ test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)",
  const idsOf = async () => {
    const json = await markdownToProseMirror(md);
    const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
-    const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
-    return { refs, defs };
+    const defs = findAll(json, "footnoteDefinition");
+    return {
+      refs,
+      defIds: defs.map((d) => d.attrs.id),
+      defText: defs
+        .map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1])
+        .join("|"),
+    };
  };

  const a = await idsOf();
  const b = await idsOf();

-  // Identical across runs.
-  assert.deepEqual(a.refs, b.refs);
-  assert.deepEqual(a.defs, b.defs);
-  // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
-  assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]);
-  // 1:1 reference <-> definition pairing, all distinct.
-  assert.equal(new Set(a.defs).size, 3);
-  assert.deepEqual([...a.refs].sort(), [...a.defs].sort());
+  // Stable across runs.
+  assert.deepEqual(a, b);
+  // Reuse: all three reference markers stay "d".
+  assert.deepEqual(a.refs, ["d", "d", "d"]);
+  // First-wins: a single definition "d" with the FIRST text.
+  assert.deepEqual(a.defIds, ["d"]);
+  assert.equal(a.defText, "first");
 });

 test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {