fix(footnotes): guard insert against nested/bare definitions, skip definitions-only paste, doc + reorder fixes (#228)

Must-fix:
- insertInlineFootnote could glue a footnoteReference inside an EXISTING
  definition (nested footnotesList, or a bare footnoteDefinition with no list
  wrapper), which canonicalize then dropped as an orphan — silently losing the
  definition's prose. Now: (a) the body/notes boundary is computed from the first
  top-level block that IS or CONTAINS (recursively) a footnotesList/
  footnoteDefinition, not just a top-level list; and (b) the insertNodesAfterAnchor
  core skips footnotesList/footnoteDefinition subtrees entirely (skipSubtreeTypes),
  so an anchor whose only match is inside a definition -> inserted:false (clean
  abort, no write). Added tests: nested-definition, bare-definition, and
  body-before-nested-list-still-inserts.
- editor-ext footnote-canonicalize header listed `markdownToProseMirror` among the
  canonicalizing MCP paths; it is the NON-canonicalizing primitive. Replaced with
  `markdownToProseMirrorCanonical` (+ note that the plain primitive is for comment
  bodies) and added copy_page_content.
- Client paste: canonicalizePastedFootnotes now skips a definitions-ONLY paste
  (no footnoteReference anywhere) — canonicalizing it would strip the
  reference-less list and yield an EMPTY paste. Added a test.

Suggestions:
- docmost_transform now runs validateDocStructure/validateDocUrls on the RAW
  transform output BEFORE canonicalizeFootnotes (mirrors updatePageJson), so a
  too-deep doc gives the intended max-depth error instead of a stack overflow.
- docmost_transform tool description now states the RESULT is footnote-canonical
  (dryRun diff may show tidy-ups; idempotent after first run).
- insertFootnote: dropped the dead `result ? … : undefined` ternaries and the
  `as any` casts (result is always set by the time we return; the not-found path
  throws and aborts mutatePage). `const r = result!;`.

Tests / architecture:
- Added a LIVE-plugin golden case: the real footnoteSyncPlugin leaves a list with
  non-empty content after it in place, and canonicalize agrees (placement parity
  is now a driven property, not a hand-set expected).
- Added generateFootnoteId uuidv7 shape + uniqueness test.
- Item 9: added the ENFORCEMENT-RULE comments at the server parseProsemirrorContent
  and the MCP canonicalizer header (any NEW full-doc persist path MUST canonicalize;
  fragments/append/prepend and comment bodies MUST NOT). Kept per-call-site over a
  brittle grep CI test (the replace-vs-fragment + comment-vs-page nuance makes a
  single wrapper unsafe).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
a
2026-06-27 23:40:28 +03:00
parent 3fd66b4245
commit 9c1f952b2f
14 changed files with 305 additions and 38 deletions

View File

@@ -117,6 +117,32 @@ describe("canonicalizePastedFootnotes", () => {
editor.destroy(); editor.destroy();
}); });
it("leaves a definitions-ONLY paste untouched (no references -> no empty paste)", () => {
// A whole-block paste of ONLY definitions (a footnotesList with no matching
// footnoteReference anywhere in the selection). Canonicalizing it would strip
// the reference-less list -> an EMPTY paste, losing the pasted text. The hook
// must leave such a block untouched.
const { editor, schema } = makeSchema();
const slice = new Slice(
Fragment.fromArray([
schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [
schema.nodes.paragraph.create(null, [schema.text("note A")]),
]),
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [
schema.nodes.paragraph.create(null, [schema.text("note B")]),
]),
]),
]),
0,
0,
);
const out = canonicalizePastedFootnotes(slice, schema);
expect(out).toBe(slice); // returned unchanged (same reference, content kept)
expect(listIds(out)).toEqual(["a", "b"]);
editor.destroy();
});
it("leaves an open (partial) slice untouched even if it carries a list", () => { it("leaves an open (partial) slice untouched even if it carries a list", () => {
// An open slice (openStart/openEnd > 0) is a partial selection, not a // An open slice (openStart/openEnd > 0) is a partial selection, not a
// standalone block, so it is returned as-is BEFORE any footnote handling. // standalone block, so it is returned as-is BEFORE any footnote handling.

View File

@@ -8,6 +8,7 @@ import {
htmlToMarkdown, htmlToMarkdown,
canonicalizeFootnotes, canonicalizeFootnotes,
FOOTNOTES_LIST_NAME, FOOTNOTES_LIST_NAME,
FOOTNOTE_REFERENCE_NAME,
} from "@docmost/editor-ext"; } from "@docmost/editor-ext";
import type { Schema } from "@tiptap/pm/model"; import type { Schema } from "@tiptap/pm/model";
@@ -165,15 +166,28 @@ export const MarkdownClipboard = Extension.create({
* for the paste/sync plugins to merge. Residual: when the pasted block is merged * for the paste/sync plugins to merge. Residual: when the pasted block is merged
* into a doc that already has footnotes, ordering RELATIVE to the pre-existing * into a doc that already has footnotes, ordering RELATIVE to the pre-existing
* footnotes is still governed by the sync plugin (which does not reorder). * footnotes is still governed by the sync plugin (which does not reorder).
*
* Also requires at least one footnoteReference in the selection: a definitions-ONLY
* paste (`[^a]: …` with no `[^a]` reference in the same block) has no references,
* so canonicalizeFootnotes would drop the whole list and the paste would come out
* EMPTY — losing the pasted text. Such a block is left as-is for the sync plugin.
*/ */
export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice { export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice {
if (slice.openStart !== 0 || slice.openEnd !== 0) return slice; if (slice.openStart !== 0 || slice.openEnd !== 0) return slice;
let hasFootnotesList = false; let hasFootnotesList = false;
let hasReference = false;
slice.content.forEach((node) => { slice.content.forEach((node) => {
if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true; if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true;
if (node.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
node.descendants((child) => {
if (child.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
});
}); });
if (!hasFootnotesList) return slice; if (!hasFootnotesList) return slice;
// No reference anywhere -> a definitions-only paste; canonicalizing would strip
// the reference-less list (empty paste). Leave it untouched.
if (!hasReference) return slice;
const content = slice.content.toJSON(); const content = slice.content.toJSON();
if (!Array.isArray(content)) return slice; if (!Array.isArray(content)) return slice;

View File

@@ -1328,6 +1328,12 @@ export class PageService {
// (Future consolidation, architecture B: the import services persist via a // (Future consolidation, architecture B: the import services persist via a
// different path; folding all of these into one "prepare JSON for persist" // different path; folding all of these into one "prepare JSON for persist"
// helper would centralize the canonicalize call — left as follow-up.) // helper would centralize the canonicalize call — left as follow-up.)
//
// ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
// `canonicalizeFootnotes(json)` before writing (see createPage and
// updatePageContent 'replace'); append/prepend FRAGMENT writes MUST NOT (it
// would drop or duplicate footnotes — that is exactly why this is per-call-site
// rather than a single wrapper here).
try { try {
jsonToNode(prosemirrorJson); jsonToNode(prosemirrorJson);
} catch (err) { } catch (err) {

View File

@@ -308,6 +308,31 @@ describe('canonicalizeFootnotes golden parity with footnoteSyncPlugin', () => {
}); });
} }
it('placement parity: the LIVE plugin leaves a list with NON-EMPTY content after it in place, and canonicalize agrees', () => {
// Drives the real footnoteSyncPlugin (not a hand-authored expected): a single
// canonical list with body content AFTER it must NOT be repositioned by the
// plugin, and the server canonicalizer must agree (step-6 placement parity).
const content = {
type: 'doc',
content: [
para({ type: 'text', text: 'a' }, ref('x')),
list(def('x', 'X')),
para({ type: 'text', text: 'epilogue' }),
],
};
const steady = pluginSteadyState(content);
// The plugin did NOT move the list to the end: a non-empty paragraph follows it.
const types = steady.content.map((n: any) => n.type);
const listPos = types.indexOf(FOOTNOTES_LIST_NAME);
expect(listPos).toBeGreaterThanOrEqual(0);
expect(listPos).toBeLessThan(types.length - 1);
const after = steady.content[listPos + 1];
expect(after.type).toBe('paragraph');
expect(JSON.stringify(after)).toContain('epilogue');
// The canonicalizer is a byte-for-byte no-op on that steady state (parity).
expect(canonicalizeFootnotes(steady)).toEqual(steady);
});
it('the canonicalizer and the editor agree on reference order and definition set', () => { it('the canonicalizer and the editor agree on reference order and definition set', () => {
const content = { const content = {
type: 'doc', type: 'doc',

View File

@@ -18,9 +18,11 @@ import {
* `PageService` create/update (`parseProsemirrorContent` for the JSON/markdown/ * `PageService` create/update (`parseProsemirrorContent` for the JSON/markdown/
* HTML REST write paths), and the client markdown PASTE path * HTML REST write paths), and the client markdown PASTE path
* (`markdown-clipboard.ts`). (The MCP package mirrors this canonicalizer in * (`markdown-clipboard.ts`). (The MCP package mirrors this canonicalizer in
* `packages/mcp/src/lib/footnote-canonicalize.ts` for its own write paths — * `packages/mcp/src/lib/footnote-canonicalize.ts` for its own FULL-document write
* `markdownToProseMirror`, `update_page_json`, `docmost_transform`, * paths — `markdownToProseMirrorCanonical` (the page markdown-import path; the
* `insert_footnote` — see that file's header.) All of these are the root cause * plain `markdownToProseMirror` primitive used for COMMENT bodies does NOT
* canonicalize), `update_page_json`, `docmost_transform`, `insert_footnote`,
* `copy_page_content` — see that file's header.) All of these are the root cause
* of the symptom in the issue: footnotes rendered out of order (`1, 4, 2, 3, …`), * of the symptom in the issue: footnotes rendered out of order (`1, 4, 2, 3, …`),
* a raw trailing `[^id]: …` block, and orphan definitions, all of which are * a raw trailing `[^id]: …` block, and orphan definitions, all of which are
* simply the result of content written PAST the canonicalizer. * simply the result of content written PAST the canonicalizer.

View File

@@ -1118,13 +1118,16 @@ export class DocmostClient {
result = { footnoteId: r.footnoteId, reused: r.reused }; result = { footnoteId: r.footnoteId, reused: r.reused };
return r.doc; return r.doc;
}); });
// The not-found path throws inside the transform (aborting mutatePage), so by
// here `result` is always set.
const r = result;
return { return {
success: true, success: true,
modified: true, modified: true,
pageId, pageId,
footnoteId: result ? result.footnoteId : undefined, footnoteId: r.footnoteId,
reused: result ? result.reused : undefined, reused: r.reused,
message: result && result.reused message: r.reused
? "Footnote inserted (reused an existing same-content definition)." ? "Footnote inserted (reused an existing same-content definition)."
: "Footnote inserted.", : "Footnote inserted.",
verify: mutation.verify, verify: mutation.verify,
@@ -2534,14 +2537,18 @@ export class DocmostClient {
!Array.isArray(raw.content)) { !Array.isArray(raw.content)) {
throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })'); throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })');
} }
// Validate the RAW transform output FIRST (structure — including the
// MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
// recurses without a depth limiter, so validating after it would turn a
// too-deep doc into an opaque "Maximum call stack size exceeded" instead of
// the intended "nesting exceeds the maximum depth" error.
this.validateDocStructure(raw);
this.validateDocUrls(raw);
// Auto-canonicalize footnotes after the transform (idempotent): no write // Auto-canonicalize footnotes after the transform (idempotent): no write
// path can leave footnotes out of order / orphaned / in a raw `[^id]` // path can leave footnotes out of order / orphaned / in a raw `[^id]`
// block. In a dryRun preview this may surface footnote edits the script // block. In a dryRun preview this may surface footnote edits the script
// author did not write (the canonicalizer tidied them) — that is expected. // author did not write (the canonicalizer tidied them) — that is expected.
const result = canonicalizeFootnotes(raw); const result = canonicalizeFootnotes(raw);
// Validate the returned doc before it can be written.
this.validateDocStructure(result);
this.validateDocUrls(result);
newDoc = result; newDoc = result;
return result; return result;
}; };

View File

@@ -640,7 +640,10 @@ export function createDocmostMcpServer(config) {
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " + "commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " + "comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
"footnote numbering + the single bottom list from reference order, drop " + "footnote numbering + the single bottom list from reference order, drop " +
"orphans/duplicates — runs automatically after every transform too), and " + "orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
"applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
"diff may therefore show footnote tidy-ups your script did not make, and " +
"it is idempotent after the first run), and " +
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " + "insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
"marker + dedup'd definition, list derived). Footnote convention: markers are " + "marker + dedup'd definition, list derived). Footnote convention: markers are " +
"plain '[N]' text in the body; the notes are an orderedList under a " + "plain '[N]' text in the body; the notes are an orderedList under a " +

View File

@@ -23,6 +23,15 @@
* was never enforced. Running this at the end of every write path closes that * was never enforced. Running this at the end of every write path closes that
* gap; because it is idempotent, it is a no-op when the footnotes are already * gap; because it is idempotent, it is a no-op when the footnotes are already
* canonical (no spurious mutations / git-sync churn). * canonical (no spurious mutations / git-sync churn).
*
* ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
* `canonicalizeFootnotes(doc)` before writing — the current callers are
* `markdownToProseMirrorCanonical` (page markdown import/update; the plain
* `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
* reference-less definition), `update_page_json`, `docmost_transform`,
* `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
* NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
* and comment-vs-page nuances make a single naive wrapper unsafe).
*/ */
const FOOTNOTE_REFERENCE_NAME = "footnoteReference"; const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
const FOOTNOTES_LIST_NAME = "footnotesList"; const FOOTNOTES_LIST_NAME = "footnotesList";

View File

@@ -76,6 +76,27 @@ export function getList(doc, predicate) {
* footnote inserter excludes via `beforeBlock`.) * footnote inserter excludes via `beforeBlock`.)
*/ */
const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]); const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]);
/**
* Footnote-notes subtrees the inline footnote inserter must never split into (at
* any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
* a reference inside one of these would later be dropped as an orphan by the
* canonicalizer, taking the existing definition's text with it.
*/
const FOOTNOTE_NOTES_SUBTREES = new Set([
"footnotesList",
"footnoteDefinition",
]);
/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
function containsFootnoteNotes(node) {
if (!isObject(node))
return false;
if (FOOTNOTE_NOTES_SUBTREES.has(node.type))
return true;
if (Array.isArray(node.content)) {
return node.content.some((c) => containsFootnoteNotes(c));
}
return false;
}
/** /**
* Insert `marker` as a PLAIN (unmarked) text run right after the first * Insert `marker` as a PLAIN (unmarked) text run right after the first
* occurrence of `anchor`. * occurrence of `anchor`.
@@ -136,6 +157,13 @@ function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) {
if (inserted || !isObject(container) || !Array.isArray(container.content)) { if (inserted || !isObject(container) || !Array.isArray(container.content)) {
return; return;
} }
// Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
// never split into it, but keep `offset` aligned for any sibling text after
// it within this block.
if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
offset += blockPlainText(container).length;
return;
}
const inline = container.content; const inline = container.content;
// Detect whether this array is an inline array (contains text nodes). // Detect whether this array is an inline array (contains text nodes).
const hasText = inline.some((n) => isObject(n) && n.type === "text"); const hasText = inline.some((n) => isObject(n) && n.type === "text");
@@ -553,18 +581,26 @@ export function insertInlineFootnote(doc, opts) {
if (footnoteId == null) if (footnoteId == null)
footnoteId = generateFootnoteId(); footnoteId = generateFootnoteId();
// Insert the footnoteReference node directly after the anchor (mark-safe // Insert the footnoteReference node directly after the anchor (mark-safe
// split); it hugs the preceding word with no leading space. The search is // split); it hugs the preceding word with no leading space. Two guards keep the
// bounded to the BODY (before the first footnotesList) and refuses codeBlocks, // inline atom out of the notes section and out of blocks that cannot hold it:
// so the inline atom can never be spliced into a footnote definition or a code // - beforeBlock bounds the search to the BODY, before the first top-level block
// block — which would persist a schema-invalid doc (insert_footnote skips // that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
// validateDocStructure). When the only match is in such a place the insert is // a NESTED list or a bare definition also bounds the search, not just a
// refused and the write aborts cleanly (inserted:false). // top-level list;
const listIdx = Array.isArray(doc?.content) // - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
? doc.content.findIndex((n) => isObject(n) && n.type === "footnotesList") // subtree, so a reference is never glued inside an existing definition (which
// the canonicalizer would then drop as an orphan, losing that definition's
// prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
// schema-invalid doc; insert_footnote skips validateDocStructure).
// When the only anchor match is in such a place, the insert is refused and the
// write aborts cleanly (inserted:false) instead of destroying content.
const boundaryIdx = Array.isArray(doc?.content)
? doc.content.findIndex((n) => containsFootnoteNotes(n))
: -1; : -1;
const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], { const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], {
...(listIdx >= 0 ? { beforeBlock: listIdx } : {}), ...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS, forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
}); });
if (!r.inserted) { if (!r.inserted) {
return { doc: clone(doc), inserted: false, footnoteId, reused }; return { doc: clone(doc), inserted: false, footnoteId, reused };

View File

@@ -1420,13 +1420,16 @@ export class DocmostClient {
return r.doc; return r.doc;
}, },
); );
// The not-found path throws inside the transform (aborting mutatePage), so by
// here `result` is always set.
const r = result!;
return { return {
success: true, success: true,
modified: true, modified: true,
pageId, pageId,
footnoteId: result ? (result as any).footnoteId : undefined, footnoteId: r.footnoteId,
reused: result ? (result as any).reused : undefined, reused: r.reused,
message: result && (result as any).reused message: r.reused
? "Footnote inserted (reused an existing same-content definition)." ? "Footnote inserted (reused an existing same-content definition)."
: "Footnote inserted.", : "Footnote inserted.",
verify: mutation.verify, verify: mutation.verify,
@@ -3130,14 +3133,18 @@ export class DocmostClient {
'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })', 'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })',
); );
} }
// Validate the RAW transform output FIRST (structure — including the
// MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
// recurses without a depth limiter, so validating after it would turn a
// too-deep doc into an opaque "Maximum call stack size exceeded" instead of
// the intended "nesting exceeds the maximum depth" error.
this.validateDocStructure(raw);
this.validateDocUrls(raw);
// Auto-canonicalize footnotes after the transform (idempotent): no write // Auto-canonicalize footnotes after the transform (idempotent): no write
// path can leave footnotes out of order / orphaned / in a raw `[^id]` // path can leave footnotes out of order / orphaned / in a raw `[^id]`
// block. In a dryRun preview this may surface footnote edits the script // block. In a dryRun preview this may surface footnote edits the script
// author did not write (the canonicalizer tidied them) — that is expected. // author did not write (the canonicalizer tidied them) — that is expected.
const result = canonicalizeFootnotes(raw); const result = canonicalizeFootnotes(raw);
// Validate the returned doc before it can be written.
this.validateDocStructure(result);
this.validateDocUrls(result);
newDoc = result; newDoc = result;
return result; return result;
}; };

View File

@@ -895,7 +895,10 @@ server.registerTool(
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " + "commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " + "comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
"footnote numbering + the single bottom list from reference order, drop " + "footnote numbering + the single bottom list from reference order, drop " +
"orphans/duplicates — runs automatically after every transform too), and " + "orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
"applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
"diff may therefore show footnote tidy-ups your script did not make, and " +
"it is idempotent after the first run), and " +
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " + "insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
"marker + dedup'd definition, list derived). Footnote convention: markers are " + "marker + dedup'd definition, list derived). Footnote convention: markers are " +
"plain '[N]' text in the body; the notes are an orderedList under a " + "plain '[N]' text in the body; the notes are an orderedList under a " +

View File

@@ -23,6 +23,15 @@
* was never enforced. Running this at the end of every write path closes that * was never enforced. Running this at the end of every write path closes that
* gap; because it is idempotent, it is a no-op when the footnotes are already * gap; because it is idempotent, it is a no-op when the footnotes are already
* canonical (no spurious mutations / git-sync churn). * canonical (no spurious mutations / git-sync churn).
*
* ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
* `canonicalizeFootnotes(doc)` before writing — the current callers are
* `markdownToProseMirrorCanonical` (page markdown import/update; the plain
* `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
* reference-less definition), `update_page_json`, `docmost_transform`,
* `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
* NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
* and comment-vs-page nuances make a single naive wrapper unsafe).
*/ */
const FOOTNOTE_REFERENCE_NAME = "footnoteReference"; const FOOTNOTE_REFERENCE_NAME = "footnoteReference";

View File

@@ -96,6 +96,15 @@ export interface InsertMarkerOptions {
* markers leave this unset (text is valid inside a codeBlock). * markers leave this unset (text is valid inside a codeBlock).
*/ */
forbidBlockTypes?: ReadonlySet<string>; forbidBlockTypes?: ReadonlySet<string>;
/**
* Node types whose ENTIRE subtree is skipped during the walk (never split into,
* at any depth). Used to keep the footnote inserter out of the notes section:
* splitting text inside an existing `footnoteDefinition` would glue a reference
* into a definition, which the canonicalizer then drops as an orphan together
* with the definition's prose — silent loss of an existing footnote. Skipped
* subtrees still advance the running offset so sibling text stays aligned.
*/
skipSubtreeTypes?: ReadonlySet<string>;
} }
/** /**
@@ -108,6 +117,27 @@ export interface InsertMarkerOptions {
*/ */
const INLINE_ATOM_FORBIDDEN_BLOCKS: ReadonlySet<string> = new Set(["codeBlock"]); const INLINE_ATOM_FORBIDDEN_BLOCKS: ReadonlySet<string> = new Set(["codeBlock"]);
/**
* Footnote-notes subtrees the inline footnote inserter must never split into (at
* any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
* a reference inside one of these would later be dropped as an orphan by the
* canonicalizer, taking the existing definition's text with it.
*/
const FOOTNOTE_NOTES_SUBTREES: ReadonlySet<string> = new Set([
"footnotesList",
"footnoteDefinition",
]);
/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
function containsFootnoteNotes(node: any): boolean {
if (!isObject(node)) return false;
if (FOOTNOTE_NOTES_SUBTREES.has(node.type)) return true;
if (Array.isArray(node.content)) {
return node.content.some((c: any) => containsFootnoteNotes(c));
}
return false;
}
/** /**
* Insert `marker` as a PLAIN (unmarked) text run right after the first * Insert `marker` as a PLAIN (unmarked) text run right after the first
* occurrence of `anchor`. * occurrence of `anchor`.
@@ -187,6 +217,13 @@ function insertNodesAfterAnchor(
if (inserted || !isObject(container) || !Array.isArray(container.content)) { if (inserted || !isObject(container) || !Array.isArray(container.content)) {
return; return;
} }
// Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
// never split into it, but keep `offset` aligned for any sibling text after
// it within this block.
if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
offset += blockPlainText(container).length;
return;
}
const inline = container.content; const inline = container.content;
// Detect whether this array is an inline array (contains text nodes). // Detect whether this array is an inline array (contains text nodes).
const hasText = inline.some( const hasText = inline.some(
@@ -692,24 +729,30 @@ export function insertInlineFootnote(
if (footnoteId == null) footnoteId = generateFootnoteId(); if (footnoteId == null) footnoteId = generateFootnoteId();
// Insert the footnoteReference node directly after the anchor (mark-safe // Insert the footnoteReference node directly after the anchor (mark-safe
// split); it hugs the preceding word with no leading space. The search is // split); it hugs the preceding word with no leading space. Two guards keep the
// bounded to the BODY (before the first footnotesList) and refuses codeBlocks, // inline atom out of the notes section and out of blocks that cannot hold it:
// so the inline atom can never be spliced into a footnote definition or a code // - beforeBlock bounds the search to the BODY, before the first top-level block
// block — which would persist a schema-invalid doc (insert_footnote skips // that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
// validateDocStructure). When the only match is in such a place the insert is // a NESTED list or a bare definition also bounds the search, not just a
// refused and the write aborts cleanly (inserted:false). // top-level list;
const listIdx = Array.isArray(doc?.content) // - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
? doc.content.findIndex( // subtree, so a reference is never glued inside an existing definition (which
(n: any) => isObject(n) && n.type === "footnotesList", // the canonicalizer would then drop as an orphan, losing that definition's
) // prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
// schema-invalid doc; insert_footnote skips validateDocStructure).
// When the only anchor match is in such a place, the insert is refused and the
// write aborts cleanly (inserted:false) instead of destroying content.
const boundaryIdx = Array.isArray(doc?.content)
? doc.content.findIndex((n: any) => containsFootnoteNotes(n))
: -1; : -1;
const r = insertNodesAfterAnchor( const r = insertNodesAfterAnchor(
doc, doc,
(opts.anchorText ?? "").trimEnd(), (opts.anchorText ?? "").trimEnd(),
() => [{ type: "footnoteReference", attrs: { id: footnoteId } }], () => [{ type: "footnoteReference", attrs: { id: footnoteId } }],
{ {
...(listIdx >= 0 ? { beforeBlock: listIdx } : {}), ...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS, forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
}, },
); );
if (!r.inserted) { if (!r.inserted) {

View File

@@ -2,7 +2,10 @@ import { test } from "node:test";
import assert from "node:assert/strict"; import assert from "node:assert/strict";
import { canonicalizeFootnotes } from "../../build/lib/footnote-canonicalize.js"; import { canonicalizeFootnotes } from "../../build/lib/footnote-canonicalize.js";
import { footnoteContentKey } from "../../build/lib/footnote-authoring.js"; import {
footnoteContentKey,
generateFootnoteId,
} from "../../build/lib/footnote-authoring.js";
import { insertInlineFootnote } from "../../build/lib/transforms.js"; import { insertInlineFootnote } from "../../build/lib/transforms.js";
import { markdownToProseMirrorCanonical } from "../../build/lib/collaboration.js"; import { markdownToProseMirrorCanonical } from "../../build/lib/collaboration.js";
@@ -190,6 +193,66 @@ test("insertInlineFootnote: codeBlock match is skipped, a later body paragraph s
assert.equal(findAll(r.doc, "footnoteReference").length, 1); assert.equal(findAll(r.doc, "footnoteReference").length, 1);
}); });
test("insertInlineFootnote: anchor only inside a NESTED definition -> refused, definition preserved", () => {
// The footnotesList is nested in a callout (not top level) and the anchor text
// appears ONLY inside that definition. The search must be bounded past the
// notes subtree (recursive boundary) AND refuse to descend into the definition,
// so it aborts cleanly instead of gluing a reference into the definition (which
// canonicalize would then drop as an orphan, losing the definition's prose).
const doc = {
type: "doc",
content: [
para({ type: "text", text: "Body text here." }, ref("a")),
{
type: "callout",
content: [list(def("a", "the unique anchor lives here"))],
},
],
};
const r = insertInlineFootnote(doc, {
anchorText: "unique anchor",
text: "new note",
});
assert.equal(r.inserted, false);
// The existing definition (and its text) is preserved untouched.
assert.equal(findAll(r.doc, "footnoteDefinition").length, 1);
assert.match(JSON.stringify(r.doc), /the unique anchor lives here/);
assert.equal(findAll(r.doc, "footnoteReference").length, 1); // only the original
});
test("insertInlineFootnote: anchor only inside a BARE definition (no list wrapper) -> refused", () => {
const doc = {
type: "doc",
content: [
para({ type: "text", text: "Some body." }),
{
type: "footnoteDefinition",
attrs: { id: "a" },
content: [{ type: "paragraph", content: [{ type: "text", text: "orphan anchor text" }] }],
},
],
};
const r = insertInlineFootnote(doc, { anchorText: "orphan anchor", text: "x" });
assert.equal(r.inserted, false);
assert.equal(findAll(r.doc, "footnoteDefinition").length, 1);
assert.match(JSON.stringify(r.doc), /orphan anchor text/);
});
test("insertInlineFootnote: anchor in body BEFORE a nested list still inserts", () => {
const doc = {
type: "doc",
content: [
para({ type: "text", text: "The sky is blue." }, ref("a")),
{ type: "callout", content: [list(def("a", "note a"))] },
],
};
const r = insertInlineFootnote(doc, { anchorText: "blue", text: "Rayleigh." });
assert.equal(r.inserted, true);
// The new reference plus the original = two references; a single canonical list.
assert.equal(findAll(r.doc, "footnoteReference").length, 2);
assert.equal(findAll(r.doc, "footnotesList").length, 1);
});
test("markdown import (page path): out-of-order definitions render as a reference-ordered list", async () => { test("markdown import (page path): out-of-order definitions render as a reference-ordered list", async () => {
// References appear b, a, c in the body; definitions are written in a, b, c // References appear b, a, c in the body; definitions are written in a, b, c
// order (the import order). The PAGE import path (markdownToProseMirrorCanonical) // order (the import order). The PAGE import path (markdownToProseMirrorCanonical)
@@ -207,3 +270,17 @@ test("markdown import (page path): out-of-order definitions render as a referenc
assert.deepEqual(defIds(json), ["b", "a", "c"]); assert.deepEqual(defIds(json), ["b", "a", "c"]);
assert.equal(findAll(json, "footnotesList").length, 1); assert.equal(findAll(json, "footnotesList").length, 1);
}); });
test("generateFootnoteId: valid uuidv7 shape (version 7, variant 8..b) and unique", () => {
// version nibble = 7; variant nibble in [8,9,a,b]; otherwise lowercase hex.
const re =
/^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/;
const ids = new Set();
for (let i = 0; i < 50; i++) {
const id = generateFootnoteId();
assert.match(id, re, `not a uuidv7: ${id}`);
ids.add(id);
}
// Distinct across calls (random component makes collisions astronomically rare).
assert.equal(ids.size, 50, "generated ids must be unique");
});