diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts index 65d10481..e4d83288 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts @@ -117,6 +117,32 @@ describe("canonicalizePastedFootnotes", () => { editor.destroy(); }); + it("leaves a definitions-ONLY paste untouched (no references -> no empty paste)", () => { + // A whole-block paste of ONLY definitions (a footnotesList with no matching + // footnoteReference anywhere in the selection). Canonicalizing it would strip + // the reference-less list -> an EMPTY paste, losing the pasted text. The hook + // must leave such a block untouched. + const { editor, schema } = makeSchema(); + const slice = new Slice( + Fragment.fromArray([ + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("note A")]), + ]), + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [ + schema.nodes.paragraph.create(null, [schema.text("note B")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = canonicalizePastedFootnotes(slice, schema); + expect(out).toBe(slice); // returned unchanged (same reference, content kept) + expect(listIds(out)).toEqual(["a", "b"]); + editor.destroy(); + }); + it("leaves an open (partial) slice untouched even if it carries a list", () => { // An open slice (openStart/openEnd > 0) is a partial selection, not a // standalone block, so it is returned as-is BEFORE any footnote handling. diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.ts index 89b7c22e..c28d2690 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts @@ -8,6 +8,7 @@ import { htmlToMarkdown, canonicalizeFootnotes, FOOTNOTES_LIST_NAME, + FOOTNOTE_REFERENCE_NAME, } from "@docmost/editor-ext"; import type { Schema } from "@tiptap/pm/model"; @@ -165,15 +166,28 @@ export const MarkdownClipboard = Extension.create({ * for the paste/sync plugins to merge. Residual: when the pasted block is merged * into a doc that already has footnotes, ordering RELATIVE to the pre-existing * footnotes is still governed by the sync plugin (which does not reorder). + * + * Also requires at least one footnoteReference in the selection: a definitions-ONLY + * paste (`[^a]: …` with no `[^a]` reference in the same block) has no references, + * so canonicalizeFootnotes would drop the whole list and the paste would come out + * EMPTY — losing the pasted text. Such a block is left as-is for the sync plugin. */ export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice { if (slice.openStart !== 0 || slice.openEnd !== 0) return slice; let hasFootnotesList = false; + let hasReference = false; slice.content.forEach((node) => { if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true; + if (node.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true; + node.descendants((child) => { + if (child.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true; + }); }); if (!hasFootnotesList) return slice; + // No reference anywhere -> a definitions-only paste; canonicalizing would strip + // the reference-less list (empty paste). Leave it untouched. + if (!hasReference) return slice; const content = slice.content.toJSON(); if (!Array.isArray(content)) return slice; diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index 44382d8a..c6ee150d 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -1328,6 +1328,12 @@ export class PageService { // (Future consolidation, architecture B: the import services persist via a // different path; folding all of these into one "prepare JSON for persist" // helper would centralize the canonicalize call — left as follow-up.) + // + // ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call + // `canonicalizeFootnotes(json)` before writing (see createPage and + // updatePageContent 'replace'); append/prepend FRAGMENT writes MUST NOT (it + // would drop or duplicate footnotes — that is exactly why this is per-call-site + // rather than a single wrapper here). try { jsonToNode(prosemirrorJson); } catch (err) { diff --git a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.test.ts b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.test.ts index 80b56874..4bc17bb6 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.test.ts @@ -308,6 +308,31 @@ describe('canonicalizeFootnotes golden parity with footnoteSyncPlugin', () => { }); } + it('placement parity: the LIVE plugin leaves a list with NON-EMPTY content after it in place, and canonicalize agrees', () => { + // Drives the real footnoteSyncPlugin (not a hand-authored expected): a single + // canonical list with body content AFTER it must NOT be repositioned by the + // plugin, and the server canonicalizer must agree (step-6 placement parity). + const content = { + type: 'doc', + content: [ + para({ type: 'text', text: 'a' }, ref('x')), + list(def('x', 'X')), + para({ type: 'text', text: 'epilogue' }), + ], + }; + const steady = pluginSteadyState(content); + // The plugin did NOT move the list to the end: a non-empty paragraph follows it. + const types = steady.content.map((n: any) => n.type); + const listPos = types.indexOf(FOOTNOTES_LIST_NAME); + expect(listPos).toBeGreaterThanOrEqual(0); + expect(listPos).toBeLessThan(types.length - 1); + const after = steady.content[listPos + 1]; + expect(after.type).toBe('paragraph'); + expect(JSON.stringify(after)).toContain('epilogue'); + // The canonicalizer is a byte-for-byte no-op on that steady state (parity). + expect(canonicalizeFootnotes(steady)).toEqual(steady); + }); + it('the canonicalizer and the editor agree on reference order and definition set', () => { const content = { type: 'doc', diff --git a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts index 3d52ea5f..21719eba 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts @@ -18,9 +18,11 @@ import { * `PageService` create/update (`parseProsemirrorContent` for the JSON/markdown/ * HTML REST write paths), and the client markdown PASTE path * (`markdown-clipboard.ts`). (The MCP package mirrors this canonicalizer in - * `packages/mcp/src/lib/footnote-canonicalize.ts` for its own write paths — - * `markdownToProseMirror`, `update_page_json`, `docmost_transform`, - * `insert_footnote` — see that file's header.) All of these are the root cause + * `packages/mcp/src/lib/footnote-canonicalize.ts` for its own FULL-document write + * paths — `markdownToProseMirrorCanonical` (the page markdown-import path; the + * plain `markdownToProseMirror` primitive used for COMMENT bodies does NOT + * canonicalize), `update_page_json`, `docmost_transform`, `insert_footnote`, + * `copy_page_content` — see that file's header.) All of these are the root cause * of the symptom in the issue: footnotes rendered out of order (`1, 4, 2, 3, …`), * a raw trailing `[^id]: …` block, and orphan definitions, all of which are * simply the result of content written PAST the canonicalizer. diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index 7b8fc936..422b0e6c 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -1118,13 +1118,16 @@ export class DocmostClient { result = { footnoteId: r.footnoteId, reused: r.reused }; return r.doc; }); + // The not-found path throws inside the transform (aborting mutatePage), so by + // here `result` is always set. + const r = result; return { success: true, modified: true, pageId, - footnoteId: result ? result.footnoteId : undefined, - reused: result ? result.reused : undefined, - message: result && result.reused + footnoteId: r.footnoteId, + reused: r.reused, + message: r.reused ? "Footnote inserted (reused an existing same-content definition)." : "Footnote inserted.", verify: mutation.verify, @@ -2534,14 +2537,18 @@ export class DocmostClient { !Array.isArray(raw.content)) { throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })'); } + // Validate the RAW transform output FIRST (structure — including the + // MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer + // recurses without a depth limiter, so validating after it would turn a + // too-deep doc into an opaque "Maximum call stack size exceeded" instead of + // the intended "nesting exceeds the maximum depth" error. + this.validateDocStructure(raw); + this.validateDocUrls(raw); // Auto-canonicalize footnotes after the transform (idempotent): no write // path can leave footnotes out of order / orphaned / in a raw `[^id]` // block. In a dryRun preview this may surface footnote edits the script // author did not write (the canonicalizer tidied them) — that is expected. const result = canonicalizeFootnotes(raw); - // Validate the returned doc before it can be written. - this.validateDocStructure(result); - this.validateDocUrls(result); newDoc = result; return result; }; diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index 58197d09..edcad9e6 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -640,7 +640,10 @@ export function createDocmostMcpServer(config) { "commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " + "comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " + "footnote numbering + the single bottom list from reference order, drop " + - "orphans/duplicates — runs automatically after every transform too), and " + + "orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " + + "applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " + + "diff may therefore show footnote tidy-ups your script did not make, and " + + "it is idempotent after the first run), and " + "insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " + "marker + dedup'd definition, list derived). Footnote convention: markers are " + "plain '[N]' text in the body; the notes are an orderedList under a " + diff --git a/packages/mcp/build/lib/footnote-canonicalize.js b/packages/mcp/build/lib/footnote-canonicalize.js index b6673082..1df76154 100644 --- a/packages/mcp/build/lib/footnote-canonicalize.js +++ b/packages/mcp/build/lib/footnote-canonicalize.js @@ -23,6 +23,15 @@ * was never enforced. Running this at the end of every write path closes that * gap; because it is idempotent, it is a no-op when the footnotes are already * canonical (no spurious mutations / git-sync churn). + * + * ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call + * `canonicalizeFootnotes(doc)` before writing — the current callers are + * `markdownToProseMirrorCanonical` (page markdown import/update; the plain + * `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a + * reference-less definition), `update_page_json`, `docmost_transform`, + * `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST + * NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment + * and comment-vs-page nuances make a single naive wrapper unsafe). */ const FOOTNOTE_REFERENCE_NAME = "footnoteReference"; const FOOTNOTES_LIST_NAME = "footnotesList"; diff --git a/packages/mcp/build/lib/transforms.js b/packages/mcp/build/lib/transforms.js index 935ff33d..c1b822ba 100644 --- a/packages/mcp/build/lib/transforms.js +++ b/packages/mcp/build/lib/transforms.js @@ -76,6 +76,27 @@ export function getList(doc, predicate) { * footnote inserter excludes via `beforeBlock`.) */ const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]); +/** + * Footnote-notes subtrees the inline footnote inserter must never split into (at + * any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring + * a reference inside one of these would later be dropped as an orphan by the + * canonicalizer, taking the existing definition's text with it. + */ +const FOOTNOTE_NOTES_SUBTREES = new Set([ + "footnotesList", + "footnoteDefinition", +]); +/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */ +function containsFootnoteNotes(node) { + if (!isObject(node)) + return false; + if (FOOTNOTE_NOTES_SUBTREES.has(node.type)) + return true; + if (Array.isArray(node.content)) { + return node.content.some((c) => containsFootnoteNotes(c)); + } + return false; +} /** * Insert `marker` as a PLAIN (unmarked) text run right after the first * occurrence of `anchor`. @@ -136,6 +157,13 @@ function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) { if (inserted || !isObject(container) || !Array.isArray(container.content)) { return; } + // Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition): + // never split into it, but keep `offset` aligned for any sibling text after + // it within this block. + if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) { + offset += blockPlainText(container).length; + return; + } const inline = container.content; // Detect whether this array is an inline array (contains text nodes). const hasText = inline.some((n) => isObject(n) && n.type === "text"); @@ -553,18 +581,26 @@ export function insertInlineFootnote(doc, opts) { if (footnoteId == null) footnoteId = generateFootnoteId(); // Insert the footnoteReference node directly after the anchor (mark-safe - // split); it hugs the preceding word with no leading space. The search is - // bounded to the BODY (before the first footnotesList) and refuses codeBlocks, - // so the inline atom can never be spliced into a footnote definition or a code - // block — which would persist a schema-invalid doc (insert_footnote skips - // validateDocStructure). When the only match is in such a place the insert is - // refused and the write aborts cleanly (inserted:false). - const listIdx = Array.isArray(doc?.content) - ? doc.content.findIndex((n) => isObject(n) && n.type === "footnotesList") + // split); it hugs the preceding word with no leading space. Two guards keep the + // inline atom out of the notes section and out of blocks that cannot hold it: + // - beforeBlock bounds the search to the BODY, before the first top-level block + // that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so + // a NESTED list or a bare definition also bounds the search, not just a + // top-level list; + // - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition + // subtree, so a reference is never glued inside an existing definition (which + // the canonicalizer would then drop as an orphan, losing that definition's + // prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a + // schema-invalid doc; insert_footnote skips validateDocStructure). + // When the only anchor match is in such a place, the insert is refused and the + // write aborts cleanly (inserted:false) instead of destroying content. + const boundaryIdx = Array.isArray(doc?.content) + ? doc.content.findIndex((n) => containsFootnoteNotes(n)) : -1; const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], { - ...(listIdx >= 0 ? { beforeBlock: listIdx } : {}), + ...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}), forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS, + skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES, }); if (!r.inserted) { return { doc: clone(doc), inserted: false, footnoteId, reused }; diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 2228b34e..85a5a3a4 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1420,13 +1420,16 @@ export class DocmostClient { return r.doc; }, ); + // The not-found path throws inside the transform (aborting mutatePage), so by + // here `result` is always set. + const r = result!; return { success: true, modified: true, pageId, - footnoteId: result ? (result as any).footnoteId : undefined, - reused: result ? (result as any).reused : undefined, - message: result && (result as any).reused + footnoteId: r.footnoteId, + reused: r.reused, + message: r.reused ? "Footnote inserted (reused an existing same-content definition)." : "Footnote inserted.", verify: mutation.verify, @@ -3130,14 +3133,18 @@ export class DocmostClient { 'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })', ); } + // Validate the RAW transform output FIRST (structure — including the + // MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer + // recurses without a depth limiter, so validating after it would turn a + // too-deep doc into an opaque "Maximum call stack size exceeded" instead of + // the intended "nesting exceeds the maximum depth" error. + this.validateDocStructure(raw); + this.validateDocUrls(raw); // Auto-canonicalize footnotes after the transform (idempotent): no write // path can leave footnotes out of order / orphaned / in a raw `[^id]` // block. In a dryRun preview this may surface footnote edits the script // author did not write (the canonicalizer tidied them) — that is expected. const result = canonicalizeFootnotes(raw); - // Validate the returned doc before it can be written. - this.validateDocStructure(result); - this.validateDocUrls(result); newDoc = result; return result; }; diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index d439229a..db29f143 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -895,7 +895,10 @@ server.registerTool( "commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " + "comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " + "footnote numbering + the single bottom list from reference order, drop " + - "orphans/duplicates — runs automatically after every transform too), and " + + "orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " + + "applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " + + "diff may therefore show footnote tidy-ups your script did not make, and " + + "it is idempotent after the first run), and " + "insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " + "marker + dedup'd definition, list derived). Footnote convention: markers are " + "plain '[N]' text in the body; the notes are an orderedList under a " + diff --git a/packages/mcp/src/lib/footnote-canonicalize.ts b/packages/mcp/src/lib/footnote-canonicalize.ts index 1e544a92..b4ae6e03 100644 --- a/packages/mcp/src/lib/footnote-canonicalize.ts +++ b/packages/mcp/src/lib/footnote-canonicalize.ts @@ -23,6 +23,15 @@ * was never enforced. Running this at the end of every write path closes that * gap; because it is idempotent, it is a no-op when the footnotes are already * canonical (no spurious mutations / git-sync churn). + * + * ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call + * `canonicalizeFootnotes(doc)` before writing — the current callers are + * `markdownToProseMirrorCanonical` (page markdown import/update; the plain + * `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a + * reference-less definition), `update_page_json`, `docmost_transform`, + * `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST + * NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment + * and comment-vs-page nuances make a single naive wrapper unsafe). */ const FOOTNOTE_REFERENCE_NAME = "footnoteReference"; diff --git a/packages/mcp/src/lib/transforms.ts b/packages/mcp/src/lib/transforms.ts index 2bb96393..e3ab0cff 100644 --- a/packages/mcp/src/lib/transforms.ts +++ b/packages/mcp/src/lib/transforms.ts @@ -96,6 +96,15 @@ export interface InsertMarkerOptions { * markers leave this unset (text is valid inside a codeBlock). */ forbidBlockTypes?: ReadonlySet; + /** + * Node types whose ENTIRE subtree is skipped during the walk (never split into, + * at any depth). Used to keep the footnote inserter out of the notes section: + * splitting text inside an existing `footnoteDefinition` would glue a reference + * into a definition, which the canonicalizer then drops as an orphan together + * with the definition's prose — silent loss of an existing footnote. Skipped + * subtrees still advance the running offset so sibling text stays aligned. + */ + skipSubtreeTypes?: ReadonlySet; } /** @@ -108,6 +117,27 @@ export interface InsertMarkerOptions { */ const INLINE_ATOM_FORBIDDEN_BLOCKS: ReadonlySet = new Set(["codeBlock"]); +/** + * Footnote-notes subtrees the inline footnote inserter must never split into (at + * any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring + * a reference inside one of these would later be dropped as an orphan by the + * canonicalizer, taking the existing definition's text with it. + */ +const FOOTNOTE_NOTES_SUBTREES: ReadonlySet = new Set([ + "footnotesList", + "footnoteDefinition", +]); + +/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */ +function containsFootnoteNotes(node: any): boolean { + if (!isObject(node)) return false; + if (FOOTNOTE_NOTES_SUBTREES.has(node.type)) return true; + if (Array.isArray(node.content)) { + return node.content.some((c: any) => containsFootnoteNotes(c)); + } + return false; +} + /** * Insert `marker` as a PLAIN (unmarked) text run right after the first * occurrence of `anchor`. @@ -187,6 +217,13 @@ function insertNodesAfterAnchor( if (inserted || !isObject(container) || !Array.isArray(container.content)) { return; } + // Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition): + // never split into it, but keep `offset` aligned for any sibling text after + // it within this block. + if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) { + offset += blockPlainText(container).length; + return; + } const inline = container.content; // Detect whether this array is an inline array (contains text nodes). const hasText = inline.some( @@ -692,24 +729,30 @@ export function insertInlineFootnote( if (footnoteId == null) footnoteId = generateFootnoteId(); // Insert the footnoteReference node directly after the anchor (mark-safe - // split); it hugs the preceding word with no leading space. The search is - // bounded to the BODY (before the first footnotesList) and refuses codeBlocks, - // so the inline atom can never be spliced into a footnote definition or a code - // block — which would persist a schema-invalid doc (insert_footnote skips - // validateDocStructure). When the only match is in such a place the insert is - // refused and the write aborts cleanly (inserted:false). - const listIdx = Array.isArray(doc?.content) - ? doc.content.findIndex( - (n: any) => isObject(n) && n.type === "footnotesList", - ) + // split); it hugs the preceding word with no leading space. Two guards keep the + // inline atom out of the notes section and out of blocks that cannot hold it: + // - beforeBlock bounds the search to the BODY, before the first top-level block + // that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so + // a NESTED list or a bare definition also bounds the search, not just a + // top-level list; + // - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition + // subtree, so a reference is never glued inside an existing definition (which + // the canonicalizer would then drop as an orphan, losing that definition's + // prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a + // schema-invalid doc; insert_footnote skips validateDocStructure). + // When the only anchor match is in such a place, the insert is refused and the + // write aborts cleanly (inserted:false) instead of destroying content. + const boundaryIdx = Array.isArray(doc?.content) + ? doc.content.findIndex((n: any) => containsFootnoteNotes(n)) : -1; const r = insertNodesAfterAnchor( doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], { - ...(listIdx >= 0 ? { beforeBlock: listIdx } : {}), + ...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}), forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS, + skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES, }, ); if (!r.inserted) { diff --git a/packages/mcp/test/unit/footnote-canonicalize.test.mjs b/packages/mcp/test/unit/footnote-canonicalize.test.mjs index 3052aad2..e626b316 100644 --- a/packages/mcp/test/unit/footnote-canonicalize.test.mjs +++ b/packages/mcp/test/unit/footnote-canonicalize.test.mjs @@ -2,7 +2,10 @@ import { test } from "node:test"; import assert from "node:assert/strict"; import { canonicalizeFootnotes } from "../../build/lib/footnote-canonicalize.js"; -import { footnoteContentKey } from "../../build/lib/footnote-authoring.js"; +import { + footnoteContentKey, + generateFootnoteId, +} from "../../build/lib/footnote-authoring.js"; import { insertInlineFootnote } from "../../build/lib/transforms.js"; import { markdownToProseMirrorCanonical } from "../../build/lib/collaboration.js"; @@ -190,6 +193,66 @@ test("insertInlineFootnote: codeBlock match is skipped, a later body paragraph s assert.equal(findAll(r.doc, "footnoteReference").length, 1); }); +test("insertInlineFootnote: anchor only inside a NESTED definition -> refused, definition preserved", () => { + // The footnotesList is nested in a callout (not top level) and the anchor text + // appears ONLY inside that definition. The search must be bounded past the + // notes subtree (recursive boundary) AND refuse to descend into the definition, + // so it aborts cleanly instead of gluing a reference into the definition (which + // canonicalize would then drop as an orphan, losing the definition's prose). + const doc = { + type: "doc", + content: [ + para({ type: "text", text: "Body text here." }, ref("a")), + { + type: "callout", + content: [list(def("a", "the unique anchor lives here"))], + }, + ], + }; + const r = insertInlineFootnote(doc, { + anchorText: "unique anchor", + text: "new note", + }); + assert.equal(r.inserted, false); + // The existing definition (and its text) is preserved untouched. + assert.equal(findAll(r.doc, "footnoteDefinition").length, 1); + assert.match(JSON.stringify(r.doc), /the unique anchor lives here/); + assert.equal(findAll(r.doc, "footnoteReference").length, 1); // only the original +}); + +test("insertInlineFootnote: anchor only inside a BARE definition (no list wrapper) -> refused", () => { + const doc = { + type: "doc", + content: [ + para({ type: "text", text: "Some body." }), + { + type: "footnoteDefinition", + attrs: { id: "a" }, + content: [{ type: "paragraph", content: [{ type: "text", text: "orphan anchor text" }] }], + }, + ], + }; + const r = insertInlineFootnote(doc, { anchorText: "orphan anchor", text: "x" }); + assert.equal(r.inserted, false); + assert.equal(findAll(r.doc, "footnoteDefinition").length, 1); + assert.match(JSON.stringify(r.doc), /orphan anchor text/); +}); + +test("insertInlineFootnote: anchor in body BEFORE a nested list still inserts", () => { + const doc = { + type: "doc", + content: [ + para({ type: "text", text: "The sky is blue." }, ref("a")), + { type: "callout", content: [list(def("a", "note a"))] }, + ], + }; + const r = insertInlineFootnote(doc, { anchorText: "blue", text: "Rayleigh." }); + assert.equal(r.inserted, true); + // The new reference plus the original = two references; a single canonical list. + assert.equal(findAll(r.doc, "footnoteReference").length, 2); + assert.equal(findAll(r.doc, "footnotesList").length, 1); +}); + test("markdown import (page path): out-of-order definitions render as a reference-ordered list", async () => { // References appear b, a, c in the body; definitions are written in a, b, c // order (the import order). The PAGE import path (markdownToProseMirrorCanonical) @@ -207,3 +270,17 @@ test("markdown import (page path): out-of-order definitions render as a referenc assert.deepEqual(defIds(json), ["b", "a", "c"]); assert.equal(findAll(json, "footnotesList").length, 1); }); + +test("generateFootnoteId: valid uuidv7 shape (version 7, variant 8..b) and unique", () => { + // version nibble = 7; variant nibble in [8,9,a,b]; otherwise lowercase hex. + const re = + /^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; + const ids = new Set(); + for (let i = 0; i < 50; i++) { + const id = generateFootnoteId(); + assert.match(id, re, `not a uuidv7: ${id}`); + ids.add(id); + } + // Distinct across calls (random component makes collisions astronomically rare). + assert.equal(ids.size, 50, "generated ids must be unique"); +});