fix(footnotes): guard insert against nested/bare definitions, skip definitions-only paste, doc + reorder fixes (#228)

Must-fix: - insertInlineFootnote could glue a footnoteReference inside an EXISTING definition (nested footnotesList, or a bare footnoteDefinition with no list wrapper), which canonicalize then dropped as an orphan — silently losing the definition's prose. Now: (a) the body/notes boundary is computed from the first top-level block that IS or CONTAINS (recursively) a footnotesList/ footnoteDefinition, not just a top-level list; and (b) the insertNodesAfterAnchor core skips footnotesList/footnoteDefinition subtrees entirely (skipSubtreeTypes), so an anchor whose only match is inside a definition -> inserted:false (clean abort, no write). Added tests: nested-definition, bare-definition, and body-before-nested-list-still-inserts. - editor-ext footnote-canonicalize header listed `markdownToProseMirror` among the canonicalizing MCP paths; it is the NON-canonicalizing primitive. Replaced with `markdownToProseMirrorCanonical` (+ note that the plain primitive is for comment bodies) and added copy_page_content. - Client paste: canonicalizePastedFootnotes now skips a definitions-ONLY paste (no footnoteReference anywhere) — canonicalizing it would strip the reference-less list and yield an EMPTY paste. Added a test. Suggestions: - docmost_transform now runs validateDocStructure/validateDocUrls on the RAW transform output BEFORE canonicalizeFootnotes (mirrors updatePageJson), so a too-deep doc gives the intended max-depth error instead of a stack overflow. - docmost_transform tool description now states the RESULT is footnote-canonical (dryRun diff may show tidy-ups; idempotent after first run). - insertFootnote: dropped the dead `result ? … : undefined` ternaries and the `as any` casts (result is always set by the time we return; the not-found path throws and aborts mutatePage). `const r = result!;`. Tests / architecture: - Added a LIVE-plugin golden case: the real footnoteSyncPlugin leaves a list with non-empty content after it in place, and canonicalize agrees (placement parity is now a driven property, not a hand-set expected). - Added generateFootnoteId uuidv7 shape + uniqueness test. - Item 9: added the ENFORCEMENT-RULE comments at the server parseProsemirrorContent and the MCP canonicalizer header (any NEW full-doc persist path MUST canonicalize; fragments/append/prepend and comment bodies MUST NOT). Kept per-call-site over a brittle grep CI test (the replace-vs-fragment + comment-vs-page nuance makes a single wrapper unsafe). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-27 23:40:28 +03:00
parent 3fd66b4245
commit 9c1f952b2f
14 changed files with 305 additions and 38 deletions
--- a/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts
+++ b/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts
@@ -117,6 +117,32 @@ describe("canonicalizePastedFootnotes", () => {
    editor.destroy();
  });
  it("leaves a definitions-ONLY paste untouched (no references -> no empty paste)", () => {
    // A whole-block paste of ONLY definitions (a footnotesList with no matching
    // footnoteReference anywhere in the selection). Canonicalizing it would strip
    // the reference-less list -> an EMPTY paste, losing the pasted text. The hook
    // must leave such a block untouched.
    const { editor, schema } = makeSchema();
    const slice = new Slice(
      Fragment.fromArray([
        schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
          schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [
            schema.nodes.paragraph.create(null, [schema.text("note A")]),
          ]),
          schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [
            schema.nodes.paragraph.create(null, [schema.text("note B")]),
          ]),
        ]),
      ]),
      0,
      0,
    );
    const out = canonicalizePastedFootnotes(slice, schema);
    expect(out).toBe(slice); // returned unchanged (same reference, content kept)
    expect(listIds(out)).toEqual(["a", "b"]);
    editor.destroy();
  });
  it("leaves an open (partial) slice untouched even if it carries a list", () => {
    // An open slice (openStart/openEnd > 0) is a partial selection, not a
    // standalone block, so it is returned as-is BEFORE any footnote handling.
--- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts
+++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts
@@ -8,6 +8,7 @@ import {
  htmlToMarkdown,
  canonicalizeFootnotes,
  FOOTNOTES_LIST_NAME,
  FOOTNOTE_REFERENCE_NAME,
 } from "@docmost/editor-ext";
 import type { Schema } from "@tiptap/pm/model";
@@ -165,15 +166,28 @@ export const MarkdownClipboard = Extension.create({
 * for the paste/sync plugins to merge. Residual: when the pasted block is merged
 * into a doc that already has footnotes, ordering RELATIVE to the pre-existing
 * footnotes is still governed by the sync plugin (which does not reorder).
 *
 * Also requires at least one footnoteReference in the selection: a definitions-ONLY
 * paste (`[^a]: …` with no `[^a]` reference in the same block) has no references,
 * so canonicalizeFootnotes would drop the whole list and the paste would come out
 * EMPTY — losing the pasted text. Such a block is left as-is for the sync plugin.
 */
 export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice {
  if (slice.openStart !== 0 || slice.openEnd !== 0) return slice;
  let hasFootnotesList = false;
  let hasReference = false;
  slice.content.forEach((node) => {
    if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true;
    if (node.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
    node.descendants((child) => {
      if (child.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
    });
  });
  if (!hasFootnotesList) return slice;
  // No reference anywhere -> a definitions-only paste; canonicalizing would strip
  // the reference-less list (empty paste). Leave it untouched.
  if (!hasReference) return slice;
  const content = slice.content.toJSON();
  if (!Array.isArray(content)) return slice;
--- a/apps/server/src/core/page/services/page.service.ts
+++ b/apps/server/src/core/page/services/page.service.ts
@@ -1328,6 +1328,12 @@ export class PageService {
    // (Future consolidation, architecture B: the import services persist via a
    // different path; folding all of these into one "prepare JSON for persist"
    // helper would centralize the canonicalize call — left as follow-up.)
    //
    // ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
    // `canonicalizeFootnotes(json)` before writing (see createPage and
    // updatePageContent 'replace'); append/prepend FRAGMENT writes MUST NOT (it
    // would drop or duplicate footnotes — that is exactly why this is per-call-site
    // rather than a single wrapper here).
    try {
      jsonToNode(prosemirrorJson);
    } catch (err) {
--- a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.test.ts
+++ b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.test.ts
@@ -308,6 +308,31 @@ describe('canonicalizeFootnotes golden parity with footnoteSyncPlugin', () => {
    });
  }
  it('placement parity: the LIVE plugin leaves a list with NON-EMPTY content after it in place, and canonicalize agrees', () => {
    // Drives the real footnoteSyncPlugin (not a hand-authored expected): a single
    // canonical list with body content AFTER it must NOT be repositioned by the
    // plugin, and the server canonicalizer must agree (step-6 placement parity).
    const content = {
      type: 'doc',
      content: [
        para({ type: 'text', text: 'a' }, ref('x')),
        list(def('x', 'X')),
        para({ type: 'text', text: 'epilogue' }),
      ],
    };
    const steady = pluginSteadyState(content);
    // The plugin did NOT move the list to the end: a non-empty paragraph follows it.
    const types = steady.content.map((n: any) => n.type);
    const listPos = types.indexOf(FOOTNOTES_LIST_NAME);
    expect(listPos).toBeGreaterThanOrEqual(0);
    expect(listPos).toBeLessThan(types.length - 1);
    const after = steady.content[listPos + 1];
    expect(after.type).toBe('paragraph');
    expect(JSON.stringify(after)).toContain('epilogue');
    // The canonicalizer is a byte-for-byte no-op on that steady state (parity).
    expect(canonicalizeFootnotes(steady)).toEqual(steady);
  });
  it('the canonicalizer and the editor agree on reference order and definition set', () => {
    const content = {
      type: 'doc',
--- a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts
+++ b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts
@@ -18,9 +18,11 @@ import {
 * `PageService` create/update (`parseProsemirrorContent` for the JSON/markdown/
 * HTML REST write paths), and the client markdown PASTE path
 * (`markdown-clipboard.ts`). (The MCP package mirrors this canonicalizer in
- * `packages/mcp/src/lib/footnote-canonicalize.ts` for its own write paths —
+ * `packages/mcp/src/lib/footnote-canonicalize.ts` for its own FULL-document write
- * `markdownToProseMirror`, `update_page_json`, `docmost_transform`,
+ * paths — `markdownToProseMirrorCanonical` (the page markdown-import path; the
- * `insert_footnote` — see that file's header.) All of these are the root cause
+ * plain `markdownToProseMirror` primitive used for COMMENT bodies does NOT
 * canonicalize), `update_page_json`, `docmost_transform`, `insert_footnote`,
 * `copy_page_content` — see that file's header.) All of these are the root cause
 * of the symptom in the issue: footnotes rendered out of order (`1, 4, 2, 3, …`),
 * a raw trailing `[^id]: …` block, and orphan definitions, all of which are
 * simply the result of content written PAST the canonicalizer.
--- a/packages/mcp/build/client.js
+++ b/packages/mcp/build/client.js
@@ -1118,13 +1118,16 @@ export class DocmostClient {
            result = { footnoteId: r.footnoteId, reused: r.reused };
            return r.doc;
        });
        // The not-found path throws inside the transform (aborting mutatePage), so by
        // here `result` is always set.
        const r = result;
        return {
            success: true,
            modified: true,
            pageId,
-            footnoteId: result ? result.footnoteId : undefined,
+            footnoteId: r.footnoteId,
-            reused: result ? result.reused : undefined,
+            reused: r.reused,
-            message: result && result.reused
+            message: r.reused
                ? "Footnote inserted (reused an existing same-content definition)."
                : "Footnote inserted.",
            verify: mutation.verify,
@@ -2534,14 +2537,18 @@ export class DocmostClient {
                !Array.isArray(raw.content)) {
                throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })');
            }
            // Validate the RAW transform output FIRST (structure — including the
            // MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
            // recurses without a depth limiter, so validating after it would turn a
            // too-deep doc into an opaque "Maximum call stack size exceeded" instead of
            // the intended "nesting exceeds the maximum depth" error.
            this.validateDocStructure(raw);
            this.validateDocUrls(raw);
            // Auto-canonicalize footnotes after the transform (idempotent): no write
            // path can leave footnotes out of order / orphaned / in a raw `[^id]`
            // block. In a dryRun preview this may surface footnote edits the script
            // author did not write (the canonicalizer tidied them) — that is expected.
            const result = canonicalizeFootnotes(raw);
            // Validate the returned doc before it can be written.
            this.validateDocStructure(result);
            this.validateDocUrls(result);
            newDoc = result;
            return result;
        };
--- a/packages/mcp/build/index.js
+++ b/packages/mcp/build/index.js
@@ -640,7 +640,10 @@ export function createDocmostMcpServer(config) {
            "commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
            "comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
            "footnote numbering + the single bottom list from reference order, drop " +
-            "orphans/duplicates — runs automatically after every transform too), and " +
+            "orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
            "applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
            "diff may therefore show footnote tidy-ups your script did not make, and " +
            "it is idempotent after the first run), and " +
            "insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
            "marker + dedup'd definition, list derived). Footnote convention: markers are " +
            "plain '[N]' text in the body; the notes are an orderedList under a " +
--- a/packages/mcp/build/lib/footnote-canonicalize.js
+++ b/packages/mcp/build/lib/footnote-canonicalize.js
@@ -23,6 +23,15 @@
 * was never enforced. Running this at the end of every write path closes that
 * gap; because it is idempotent, it is a no-op when the footnotes are already
 * canonical (no spurious mutations / git-sync churn).
 *
 * ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
 * `canonicalizeFootnotes(doc)` before writing — the current callers are
 * `markdownToProseMirrorCanonical` (page markdown import/update; the plain
 * `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
 * reference-less definition), `update_page_json`, `docmost_transform`,
 * `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
 * NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
 * and comment-vs-page nuances make a single naive wrapper unsafe).
 */
 const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
 const FOOTNOTES_LIST_NAME = "footnotesList";
--- a/packages/mcp/build/lib/transforms.js
+++ b/packages/mcp/build/lib/transforms.js
@@ -76,6 +76,27 @@ export function getList(doc, predicate) {
 * footnote inserter excludes via `beforeBlock`.)
 */
 const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]);
 /**
 * Footnote-notes subtrees the inline footnote inserter must never split into (at
 * any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
 * a reference inside one of these would later be dropped as an orphan by the
 * canonicalizer, taking the existing definition's text with it.
 */
 const FOOTNOTE_NOTES_SUBTREES = new Set([
    "footnotesList",
    "footnoteDefinition",
 ]);
 /** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
 function containsFootnoteNotes(node) {
    if (!isObject(node))
        return false;
    if (FOOTNOTE_NOTES_SUBTREES.has(node.type))
        return true;
    if (Array.isArray(node.content)) {
        return node.content.some((c) => containsFootnoteNotes(c));
    }
    return false;
 }
 /**
 * Insert `marker` as a PLAIN (unmarked) text run right after the first
 * occurrence of `anchor`.
@@ -136,6 +157,13 @@ function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) {
            if (inserted || !isObject(container) || !Array.isArray(container.content)) {
                return;
            }
            // Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
            // never split into it, but keep `offset` aligned for any sibling text after
            // it within this block.
            if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
                offset += blockPlainText(container).length;
                return;
            }
            const inline = container.content;
            // Detect whether this array is an inline array (contains text nodes).
            const hasText = inline.some((n) => isObject(n) && n.type === "text");
@@ -553,18 +581,26 @@ export function insertInlineFootnote(doc, opts) {
    if (footnoteId == null)
        footnoteId = generateFootnoteId();
    // Insert the footnoteReference node directly after the anchor (mark-safe
-    // split); it hugs the preceding word with no leading space. The search is
+    // split); it hugs the preceding word with no leading space. Two guards keep the
-    // bounded to the BODY (before the first footnotesList) and refuses codeBlocks,
+    // inline atom out of the notes section and out of blocks that cannot hold it:
-    // so the inline atom can never be spliced into a footnote definition or a code
+    //  - beforeBlock bounds the search to the BODY, before the first top-level block
-    // block — which would persist a schema-invalid doc (insert_footnote skips
+    //    that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
-    // validateDocStructure). When the only match is in such a place the insert is
+    //    a NESTED list or a bare definition also bounds the search, not just a
-    // refused and the write aborts cleanly (inserted:false).
+    //    top-level list;
-    const listIdx = Array.isArray(doc?.content)
+    //  - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
-        ? doc.content.findIndex((n) => isObject(n) && n.type === "footnotesList")
+    //    subtree, so a reference is never glued inside an existing definition (which
    //    the canonicalizer would then drop as an orphan, losing that definition's
    //    prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
    //    schema-invalid doc; insert_footnote skips validateDocStructure).
    // When the only anchor match is in such a place, the insert is refused and the
    // write aborts cleanly (inserted:false) instead of destroying content.
    const boundaryIdx = Array.isArray(doc?.content)
        ? doc.content.findIndex((n) => containsFootnoteNotes(n))
        : -1;
    const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], {
-        ...(listIdx >= 0 ? { beforeBlock: listIdx } : {}),
+        ...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
        forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
        skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
    });
    if (!r.inserted) {
        return { doc: clone(doc), inserted: false, footnoteId, reused };
--- a/packages/mcp/src/client.ts
+++ b/packages/mcp/src/client.ts
@@ -1420,13 +1420,16 @@ export class DocmostClient {
        return r.doc;
      },
    );
    // The not-found path throws inside the transform (aborting mutatePage), so by
    // here `result` is always set.
    const r = result!;
    return {
      success: true,
      modified: true,
      pageId,
-      footnoteId: result ? (result as any).footnoteId : undefined,
+      footnoteId: r.footnoteId,
-      reused: result ? (result as any).reused : undefined,
+      reused: r.reused,
-      message: result && (result as any).reused
+      message: r.reused
        ? "Footnote inserted (reused an existing same-content definition)."
        : "Footnote inserted.",
      verify: mutation.verify,
@@ -3130,14 +3133,18 @@ export class DocmostClient {
          'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })',
        );
      }
      // Validate the RAW transform output FIRST (structure — including the
      // MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
      // recurses without a depth limiter, so validating after it would turn a
      // too-deep doc into an opaque "Maximum call stack size exceeded" instead of
      // the intended "nesting exceeds the maximum depth" error.
      this.validateDocStructure(raw);
      this.validateDocUrls(raw);
      // Auto-canonicalize footnotes after the transform (idempotent): no write
      // path can leave footnotes out of order / orphaned / in a raw `[^id]`
      // block. In a dryRun preview this may surface footnote edits the script
      // author did not write (the canonicalizer tidied them) — that is expected.
      const result = canonicalizeFootnotes(raw);
      // Validate the returned doc before it can be written.
      this.validateDocStructure(result);
      this.validateDocUrls(result);
      newDoc = result;
      return result;
    };
--- a/packages/mcp/src/index.ts
+++ b/packages/mcp/src/index.ts
@@ -895,7 +895,10 @@ server.registerTool(
      "commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
      "comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
      "footnote numbering + the single bottom list from reference order, drop " +
-      "orphans/duplicates — runs automatically after every transform too), and " +
+      "orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
      "applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
      "diff may therefore show footnote tidy-ups your script did not make, and " +
      "it is idempotent after the first run), and " +
      "insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
      "marker + dedup'd definition, list derived). Footnote convention: markers are " +
      "plain '[N]' text in the body; the notes are an orderedList under a " +
--- a/packages/mcp/src/lib/footnote-canonicalize.ts
+++ b/packages/mcp/src/lib/footnote-canonicalize.ts
@@ -23,6 +23,15 @@
 * was never enforced. Running this at the end of every write path closes that
 * gap; because it is idempotent, it is a no-op when the footnotes are already
 * canonical (no spurious mutations / git-sync churn).
 *
 * ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
 * `canonicalizeFootnotes(doc)` before writing — the current callers are
 * `markdownToProseMirrorCanonical` (page markdown import/update; the plain
 * `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
 * reference-less definition), `update_page_json`, `docmost_transform`,
 * `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
 * NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
 * and comment-vs-page nuances make a single naive wrapper unsafe).
 */
 const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
--- a/packages/mcp/src/lib/transforms.ts
+++ b/packages/mcp/src/lib/transforms.ts
@@ -96,6 +96,15 @@ export interface InsertMarkerOptions {
   * markers leave this unset (text is valid inside a codeBlock).
   */
  forbidBlockTypes?: ReadonlySet<string>;
  /**
   * Node types whose ENTIRE subtree is skipped during the walk (never split into,
   * at any depth). Used to keep the footnote inserter out of the notes section:
   * splitting text inside an existing `footnoteDefinition` would glue a reference
   * into a definition, which the canonicalizer then drops as an orphan together
   * with the definition's prose — silent loss of an existing footnote. Skipped
   * subtrees still advance the running offset so sibling text stays aligned.
   */
  skipSubtreeTypes?: ReadonlySet<string>;
 }
 /**
@@ -108,6 +117,27 @@ export interface InsertMarkerOptions {
 */
 const INLINE_ATOM_FORBIDDEN_BLOCKS: ReadonlySet<string> = new Set(["codeBlock"]);
 /**
 * Footnote-notes subtrees the inline footnote inserter must never split into (at
 * any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
 * a reference inside one of these would later be dropped as an orphan by the
 * canonicalizer, taking the existing definition's text with it.
 */
 const FOOTNOTE_NOTES_SUBTREES: ReadonlySet<string> = new Set([
  "footnotesList",
  "footnoteDefinition",
 ]);
 /** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
 function containsFootnoteNotes(node: any): boolean {
  if (!isObject(node)) return false;
  if (FOOTNOTE_NOTES_SUBTREES.has(node.type)) return true;
  if (Array.isArray(node.content)) {
    return node.content.some((c: any) => containsFootnoteNotes(c));
  }
  return false;
 }
 /**
 * Insert `marker` as a PLAIN (unmarked) text run right after the first
 * occurrence of `anchor`.
@@ -187,6 +217,13 @@ function insertNodesAfterAnchor(
      if (inserted || !isObject(container) || !Array.isArray(container.content)) {
        return;
      }
      // Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
      // never split into it, but keep `offset` aligned for any sibling text after
      // it within this block.
      if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
        offset += blockPlainText(container).length;
        return;
      }
      const inline = container.content;
      // Detect whether this array is an inline array (contains text nodes).
      const hasText = inline.some(
@@ -692,24 +729,30 @@ export function insertInlineFootnote(
  if (footnoteId == null) footnoteId = generateFootnoteId();
  // Insert the footnoteReference node directly after the anchor (mark-safe
-  // split); it hugs the preceding word with no leading space. The search is
+  // split); it hugs the preceding word with no leading space. Two guards keep the
-  // bounded to the BODY (before the first footnotesList) and refuses codeBlocks,
+  // inline atom out of the notes section and out of blocks that cannot hold it:
-  // so the inline atom can never be spliced into a footnote definition or a code
+  //  - beforeBlock bounds the search to the BODY, before the first top-level block
-  // block — which would persist a schema-invalid doc (insert_footnote skips
+  //    that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
-  // validateDocStructure). When the only match is in such a place the insert is
+  //    a NESTED list or a bare definition also bounds the search, not just a
-  // refused and the write aborts cleanly (inserted:false).
+  //    top-level list;
-  const listIdx = Array.isArray(doc?.content)
+  //  - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
-    ? doc.content.findIndex(
+  //    subtree, so a reference is never glued inside an existing definition (which
-        (n: any) => isObject(n) && n.type === "footnotesList",
+  //    the canonicalizer would then drop as an orphan, losing that definition's
-      )
+  //    prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
  //    schema-invalid doc; insert_footnote skips validateDocStructure).
  // When the only anchor match is in such a place, the insert is refused and the
  // write aborts cleanly (inserted:false) instead of destroying content.
  const boundaryIdx = Array.isArray(doc?.content)
    ? doc.content.findIndex((n: any) => containsFootnoteNotes(n))
    : -1;
  const r = insertNodesAfterAnchor(
    doc,
    (opts.anchorText ?? "").trimEnd(),
    () => [{ type: "footnoteReference", attrs: { id: footnoteId } }],
    {
-      ...(listIdx >= 0 ? { beforeBlock: listIdx } : {}),
+      ...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
      forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
      skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
    },
  );
  if (!r.inserted) {
--- a/packages/mcp/test/unit/footnote-canonicalize.test.mjs
+++ b/packages/mcp/test/unit/footnote-canonicalize.test.mjs
@@ -2,7 +2,10 @@ import { test } from "node:test";
 import assert from "node:assert/strict";
 import { canonicalizeFootnotes } from "../../build/lib/footnote-canonicalize.js";
-import { footnoteContentKey } from "../../build/lib/footnote-authoring.js";
+import {
  footnoteContentKey,
  generateFootnoteId,
 } from "../../build/lib/footnote-authoring.js";
 import { insertInlineFootnote } from "../../build/lib/transforms.js";
 import { markdownToProseMirrorCanonical } from "../../build/lib/collaboration.js";
@@ -190,6 +193,66 @@ test("insertInlineFootnote: codeBlock match is skipped, a later body paragraph s
  assert.equal(findAll(r.doc, "footnoteReference").length, 1);
 });
 test("insertInlineFootnote: anchor only inside a NESTED definition -> refused, definition preserved", () => {
  // The footnotesList is nested in a callout (not top level) and the anchor text
  // appears ONLY inside that definition. The search must be bounded past the
  // notes subtree (recursive boundary) AND refuse to descend into the definition,
  // so it aborts cleanly instead of gluing a reference into the definition (which
  // canonicalize would then drop as an orphan, losing the definition's prose).
  const doc = {
    type: "doc",
    content: [
      para({ type: "text", text: "Body text here." }, ref("a")),
      {
        type: "callout",
        content: [list(def("a", "the unique anchor lives here"))],
      },
    ],
  };
  const r = insertInlineFootnote(doc, {
    anchorText: "unique anchor",
    text: "new note",
  });
  assert.equal(r.inserted, false);
  // The existing definition (and its text) is preserved untouched.
  assert.equal(findAll(r.doc, "footnoteDefinition").length, 1);
  assert.match(JSON.stringify(r.doc), /the unique anchor lives here/);
  assert.equal(findAll(r.doc, "footnoteReference").length, 1); // only the original
 });
 test("insertInlineFootnote: anchor only inside a BARE definition (no list wrapper) -> refused", () => {
  const doc = {
    type: "doc",
    content: [
      para({ type: "text", text: "Some body." }),
      {
        type: "footnoteDefinition",
        attrs: { id: "a" },
        content: [{ type: "paragraph", content: [{ type: "text", text: "orphan anchor text" }] }],
      },
    ],
  };
  const r = insertInlineFootnote(doc, { anchorText: "orphan anchor", text: "x" });
  assert.equal(r.inserted, false);
  assert.equal(findAll(r.doc, "footnoteDefinition").length, 1);
  assert.match(JSON.stringify(r.doc), /orphan anchor text/);
 });
 test("insertInlineFootnote: anchor in body BEFORE a nested list still inserts", () => {
  const doc = {
    type: "doc",
    content: [
      para({ type: "text", text: "The sky is blue." }, ref("a")),
      { type: "callout", content: [list(def("a", "note a"))] },
    ],
  };
  const r = insertInlineFootnote(doc, { anchorText: "blue", text: "Rayleigh." });
  assert.equal(r.inserted, true);
  // The new reference plus the original = two references; a single canonical list.
  assert.equal(findAll(r.doc, "footnoteReference").length, 2);
  assert.equal(findAll(r.doc, "footnotesList").length, 1);
 });
 test("markdown import (page path): out-of-order definitions render as a reference-ordered list", async () => {
  // References appear b, a, c in the body; definitions are written in a, b, c
  // order (the import order). The PAGE import path (markdownToProseMirrorCanonical)
@@ -207,3 +270,17 @@ test("markdown import (page path): out-of-order definitions render as a referenc
  assert.deepEqual(defIds(json), ["b", "a", "c"]);
  assert.equal(findAll(json, "footnotesList").length, 1);
 });
 test("generateFootnoteId: valid uuidv7 shape (version 7, variant 8..b) and unique", () => {
  // version nibble = 7; variant nibble in [8,9,a,b]; otherwise lowercase hex.
  const re =
    /^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/;
  const ids = new Set();
  for (let i = 0; i < 50; i++) {
    const id = generateFootnoteId();
    assert.match(id, re, `not a uuidv7: ${id}`);
    ids.add(id);
  }
  // Distinct across calls (random component makes collisions astronomically rare).
  assert.equal(ids.size, 50, "generated ids must be unique");
 });