fix(footnotes): guard insert against nested/bare definitions, skip definitions-only paste, doc + reorder fixes (#228)
Must-fix: - insertInlineFootnote could glue a footnoteReference inside an EXISTING definition (nested footnotesList, or a bare footnoteDefinition with no list wrapper), which canonicalize then dropped as an orphan — silently losing the definition's prose. Now: (a) the body/notes boundary is computed from the first top-level block that IS or CONTAINS (recursively) a footnotesList/ footnoteDefinition, not just a top-level list; and (b) the insertNodesAfterAnchor core skips footnotesList/footnoteDefinition subtrees entirely (skipSubtreeTypes), so an anchor whose only match is inside a definition -> inserted:false (clean abort, no write). Added tests: nested-definition, bare-definition, and body-before-nested-list-still-inserts. - editor-ext footnote-canonicalize header listed `markdownToProseMirror` among the canonicalizing MCP paths; it is the NON-canonicalizing primitive. Replaced with `markdownToProseMirrorCanonical` (+ note that the plain primitive is for comment bodies) and added copy_page_content. - Client paste: canonicalizePastedFootnotes now skips a definitions-ONLY paste (no footnoteReference anywhere) — canonicalizing it would strip the reference-less list and yield an EMPTY paste. Added a test. Suggestions: - docmost_transform now runs validateDocStructure/validateDocUrls on the RAW transform output BEFORE canonicalizeFootnotes (mirrors updatePageJson), so a too-deep doc gives the intended max-depth error instead of a stack overflow. - docmost_transform tool description now states the RESULT is footnote-canonical (dryRun diff may show tidy-ups; idempotent after first run). - insertFootnote: dropped the dead `result ? … : undefined` ternaries and the `as any` casts (result is always set by the time we return; the not-found path throws and aborts mutatePage). `const r = result!;`. Tests / architecture: - Added a LIVE-plugin golden case: the real footnoteSyncPlugin leaves a list with non-empty content after it in place, and canonicalize agrees (placement parity is now a driven property, not a hand-set expected). - Added generateFootnoteId uuidv7 shape + uniqueness test. - Item 9: added the ENFORCEMENT-RULE comments at the server parseProsemirrorContent and the MCP canonicalizer header (any NEW full-doc persist path MUST canonicalize; fragments/append/prepend and comment bodies MUST NOT). Kept per-call-site over a brittle grep CI test (the replace-vs-fragment + comment-vs-page nuance makes a single wrapper unsafe). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -117,6 +117,32 @@ describe("canonicalizePastedFootnotes", () => {
|
||||
editor.destroy();
|
||||
});
|
||||
|
||||
it("leaves a definitions-ONLY paste untouched (no references -> no empty paste)", () => {
|
||||
// A whole-block paste of ONLY definitions (a footnotesList with no matching
|
||||
// footnoteReference anywhere in the selection). Canonicalizing it would strip
|
||||
// the reference-less list -> an EMPTY paste, losing the pasted text. The hook
|
||||
// must leave such a block untouched.
|
||||
const { editor, schema } = makeSchema();
|
||||
const slice = new Slice(
|
||||
Fragment.fromArray([
|
||||
schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
|
||||
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [
|
||||
schema.nodes.paragraph.create(null, [schema.text("note A")]),
|
||||
]),
|
||||
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [
|
||||
schema.nodes.paragraph.create(null, [schema.text("note B")]),
|
||||
]),
|
||||
]),
|
||||
]),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
const out = canonicalizePastedFootnotes(slice, schema);
|
||||
expect(out).toBe(slice); // returned unchanged (same reference, content kept)
|
||||
expect(listIds(out)).toEqual(["a", "b"]);
|
||||
editor.destroy();
|
||||
});
|
||||
|
||||
it("leaves an open (partial) slice untouched even if it carries a list", () => {
|
||||
// An open slice (openStart/openEnd > 0) is a partial selection, not a
|
||||
// standalone block, so it is returned as-is BEFORE any footnote handling.
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
htmlToMarkdown,
|
||||
canonicalizeFootnotes,
|
||||
FOOTNOTES_LIST_NAME,
|
||||
FOOTNOTE_REFERENCE_NAME,
|
||||
} from "@docmost/editor-ext";
|
||||
import type { Schema } from "@tiptap/pm/model";
|
||||
|
||||
@@ -165,15 +166,28 @@ export const MarkdownClipboard = Extension.create({
|
||||
* for the paste/sync plugins to merge. Residual: when the pasted block is merged
|
||||
* into a doc that already has footnotes, ordering RELATIVE to the pre-existing
|
||||
* footnotes is still governed by the sync plugin (which does not reorder).
|
||||
*
|
||||
* Also requires at least one footnoteReference in the selection: a definitions-ONLY
|
||||
* paste (`[^a]: …` with no `[^a]` reference in the same block) has no references,
|
||||
* so canonicalizeFootnotes would drop the whole list and the paste would come out
|
||||
* EMPTY — losing the pasted text. Such a block is left as-is for the sync plugin.
|
||||
*/
|
||||
export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice {
|
||||
if (slice.openStart !== 0 || slice.openEnd !== 0) return slice;
|
||||
|
||||
let hasFootnotesList = false;
|
||||
let hasReference = false;
|
||||
slice.content.forEach((node) => {
|
||||
if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true;
|
||||
if (node.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
|
||||
node.descendants((child) => {
|
||||
if (child.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
|
||||
});
|
||||
});
|
||||
if (!hasFootnotesList) return slice;
|
||||
// No reference anywhere -> a definitions-only paste; canonicalizing would strip
|
||||
// the reference-less list (empty paste). Leave it untouched.
|
||||
if (!hasReference) return slice;
|
||||
|
||||
const content = slice.content.toJSON();
|
||||
if (!Array.isArray(content)) return slice;
|
||||
|
||||
@@ -1328,6 +1328,12 @@ export class PageService {
|
||||
// (Future consolidation, architecture B: the import services persist via a
|
||||
// different path; folding all of these into one "prepare JSON for persist"
|
||||
// helper would centralize the canonicalize call — left as follow-up.)
|
||||
//
|
||||
// ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
|
||||
// `canonicalizeFootnotes(json)` before writing (see createPage and
|
||||
// updatePageContent 'replace'); append/prepend FRAGMENT writes MUST NOT (it
|
||||
// would drop or duplicate footnotes — that is exactly why this is per-call-site
|
||||
// rather than a single wrapper here).
|
||||
try {
|
||||
jsonToNode(prosemirrorJson);
|
||||
} catch (err) {
|
||||
|
||||
@@ -308,6 +308,31 @@ describe('canonicalizeFootnotes golden parity with footnoteSyncPlugin', () => {
|
||||
});
|
||||
}
|
||||
|
||||
it('placement parity: the LIVE plugin leaves a list with NON-EMPTY content after it in place, and canonicalize agrees', () => {
|
||||
// Drives the real footnoteSyncPlugin (not a hand-authored expected): a single
|
||||
// canonical list with body content AFTER it must NOT be repositioned by the
|
||||
// plugin, and the server canonicalizer must agree (step-6 placement parity).
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'a' }, ref('x')),
|
||||
list(def('x', 'X')),
|
||||
para({ type: 'text', text: 'epilogue' }),
|
||||
],
|
||||
};
|
||||
const steady = pluginSteadyState(content);
|
||||
// The plugin did NOT move the list to the end: a non-empty paragraph follows it.
|
||||
const types = steady.content.map((n: any) => n.type);
|
||||
const listPos = types.indexOf(FOOTNOTES_LIST_NAME);
|
||||
expect(listPos).toBeGreaterThanOrEqual(0);
|
||||
expect(listPos).toBeLessThan(types.length - 1);
|
||||
const after = steady.content[listPos + 1];
|
||||
expect(after.type).toBe('paragraph');
|
||||
expect(JSON.stringify(after)).toContain('epilogue');
|
||||
// The canonicalizer is a byte-for-byte no-op on that steady state (parity).
|
||||
expect(canonicalizeFootnotes(steady)).toEqual(steady);
|
||||
});
|
||||
|
||||
it('the canonicalizer and the editor agree on reference order and definition set', () => {
|
||||
const content = {
|
||||
type: 'doc',
|
||||
|
||||
@@ -18,9 +18,11 @@ import {
|
||||
* `PageService` create/update (`parseProsemirrorContent` for the JSON/markdown/
|
||||
* HTML REST write paths), and the client markdown PASTE path
|
||||
* (`markdown-clipboard.ts`). (The MCP package mirrors this canonicalizer in
|
||||
* `packages/mcp/src/lib/footnote-canonicalize.ts` for its own write paths —
|
||||
* `markdownToProseMirror`, `update_page_json`, `docmost_transform`,
|
||||
* `insert_footnote` — see that file's header.) All of these are the root cause
|
||||
* `packages/mcp/src/lib/footnote-canonicalize.ts` for its own FULL-document write
|
||||
* paths — `markdownToProseMirrorCanonical` (the page markdown-import path; the
|
||||
* plain `markdownToProseMirror` primitive used for COMMENT bodies does NOT
|
||||
* canonicalize), `update_page_json`, `docmost_transform`, `insert_footnote`,
|
||||
* `copy_page_content` — see that file's header.) All of these are the root cause
|
||||
* of the symptom in the issue: footnotes rendered out of order (`1, 4, 2, 3, …`),
|
||||
* a raw trailing `[^id]: …` block, and orphan definitions, all of which are
|
||||
* simply the result of content written PAST the canonicalizer.
|
||||
|
||||
@@ -1118,13 +1118,16 @@ export class DocmostClient {
|
||||
result = { footnoteId: r.footnoteId, reused: r.reused };
|
||||
return r.doc;
|
||||
});
|
||||
// The not-found path throws inside the transform (aborting mutatePage), so by
|
||||
// here `result` is always set.
|
||||
const r = result;
|
||||
return {
|
||||
success: true,
|
||||
modified: true,
|
||||
pageId,
|
||||
footnoteId: result ? result.footnoteId : undefined,
|
||||
reused: result ? result.reused : undefined,
|
||||
message: result && result.reused
|
||||
footnoteId: r.footnoteId,
|
||||
reused: r.reused,
|
||||
message: r.reused
|
||||
? "Footnote inserted (reused an existing same-content definition)."
|
||||
: "Footnote inserted.",
|
||||
verify: mutation.verify,
|
||||
@@ -2534,14 +2537,18 @@ export class DocmostClient {
|
||||
!Array.isArray(raw.content)) {
|
||||
throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })');
|
||||
}
|
||||
// Validate the RAW transform output FIRST (structure — including the
|
||||
// MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
|
||||
// recurses without a depth limiter, so validating after it would turn a
|
||||
// too-deep doc into an opaque "Maximum call stack size exceeded" instead of
|
||||
// the intended "nesting exceeds the maximum depth" error.
|
||||
this.validateDocStructure(raw);
|
||||
this.validateDocUrls(raw);
|
||||
// Auto-canonicalize footnotes after the transform (idempotent): no write
|
||||
// path can leave footnotes out of order / orphaned / in a raw `[^id]`
|
||||
// block. In a dryRun preview this may surface footnote edits the script
|
||||
// author did not write (the canonicalizer tidied them) — that is expected.
|
||||
const result = canonicalizeFootnotes(raw);
|
||||
// Validate the returned doc before it can be written.
|
||||
this.validateDocStructure(result);
|
||||
this.validateDocUrls(result);
|
||||
newDoc = result;
|
||||
return result;
|
||||
};
|
||||
|
||||
@@ -640,7 +640,10 @@ export function createDocmostMcpServer(config) {
|
||||
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
|
||||
"footnote numbering + the single bottom list from reference order, drop " +
|
||||
"orphans/duplicates — runs automatically after every transform too), and " +
|
||||
"orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
|
||||
"applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
|
||||
"diff may therefore show footnote tidy-ups your script did not make, and " +
|
||||
"it is idempotent after the first run), and " +
|
||||
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
|
||||
"marker + dedup'd definition, list derived). Footnote convention: markers are " +
|
||||
"plain '[N]' text in the body; the notes are an orderedList under a " +
|
||||
|
||||
@@ -23,6 +23,15 @@
|
||||
* was never enforced. Running this at the end of every write path closes that
|
||||
* gap; because it is idempotent, it is a no-op when the footnotes are already
|
||||
* canonical (no spurious mutations / git-sync churn).
|
||||
*
|
||||
* ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
|
||||
* `canonicalizeFootnotes(doc)` before writing — the current callers are
|
||||
* `markdownToProseMirrorCanonical` (page markdown import/update; the plain
|
||||
* `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
|
||||
* reference-less definition), `update_page_json`, `docmost_transform`,
|
||||
* `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
|
||||
* NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
|
||||
* and comment-vs-page nuances make a single naive wrapper unsafe).
|
||||
*/
|
||||
const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
|
||||
const FOOTNOTES_LIST_NAME = "footnotesList";
|
||||
|
||||
@@ -76,6 +76,27 @@ export function getList(doc, predicate) {
|
||||
* footnote inserter excludes via `beforeBlock`.)
|
||||
*/
|
||||
const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]);
|
||||
/**
|
||||
* Footnote-notes subtrees the inline footnote inserter must never split into (at
|
||||
* any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
|
||||
* a reference inside one of these would later be dropped as an orphan by the
|
||||
* canonicalizer, taking the existing definition's text with it.
|
||||
*/
|
||||
const FOOTNOTE_NOTES_SUBTREES = new Set([
|
||||
"footnotesList",
|
||||
"footnoteDefinition",
|
||||
]);
|
||||
/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
|
||||
function containsFootnoteNotes(node) {
|
||||
if (!isObject(node))
|
||||
return false;
|
||||
if (FOOTNOTE_NOTES_SUBTREES.has(node.type))
|
||||
return true;
|
||||
if (Array.isArray(node.content)) {
|
||||
return node.content.some((c) => containsFootnoteNotes(c));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Insert `marker` as a PLAIN (unmarked) text run right after the first
|
||||
* occurrence of `anchor`.
|
||||
@@ -136,6 +157,13 @@ function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) {
|
||||
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
|
||||
return;
|
||||
}
|
||||
// Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
|
||||
// never split into it, but keep `offset` aligned for any sibling text after
|
||||
// it within this block.
|
||||
if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
|
||||
offset += blockPlainText(container).length;
|
||||
return;
|
||||
}
|
||||
const inline = container.content;
|
||||
// Detect whether this array is an inline array (contains text nodes).
|
||||
const hasText = inline.some((n) => isObject(n) && n.type === "text");
|
||||
@@ -553,18 +581,26 @@ export function insertInlineFootnote(doc, opts) {
|
||||
if (footnoteId == null)
|
||||
footnoteId = generateFootnoteId();
|
||||
// Insert the footnoteReference node directly after the anchor (mark-safe
|
||||
// split); it hugs the preceding word with no leading space. The search is
|
||||
// bounded to the BODY (before the first footnotesList) and refuses codeBlocks,
|
||||
// so the inline atom can never be spliced into a footnote definition or a code
|
||||
// block — which would persist a schema-invalid doc (insert_footnote skips
|
||||
// validateDocStructure). When the only match is in such a place the insert is
|
||||
// refused and the write aborts cleanly (inserted:false).
|
||||
const listIdx = Array.isArray(doc?.content)
|
||||
? doc.content.findIndex((n) => isObject(n) && n.type === "footnotesList")
|
||||
// split); it hugs the preceding word with no leading space. Two guards keep the
|
||||
// inline atom out of the notes section and out of blocks that cannot hold it:
|
||||
// - beforeBlock bounds the search to the BODY, before the first top-level block
|
||||
// that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
|
||||
// a NESTED list or a bare definition also bounds the search, not just a
|
||||
// top-level list;
|
||||
// - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
|
||||
// subtree, so a reference is never glued inside an existing definition (which
|
||||
// the canonicalizer would then drop as an orphan, losing that definition's
|
||||
// prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
|
||||
// schema-invalid doc; insert_footnote skips validateDocStructure).
|
||||
// When the only anchor match is in such a place, the insert is refused and the
|
||||
// write aborts cleanly (inserted:false) instead of destroying content.
|
||||
const boundaryIdx = Array.isArray(doc?.content)
|
||||
? doc.content.findIndex((n) => containsFootnoteNotes(n))
|
||||
: -1;
|
||||
const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], {
|
||||
...(listIdx >= 0 ? { beforeBlock: listIdx } : {}),
|
||||
...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
|
||||
forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
|
||||
skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
|
||||
});
|
||||
if (!r.inserted) {
|
||||
return { doc: clone(doc), inserted: false, footnoteId, reused };
|
||||
|
||||
@@ -1420,13 +1420,16 @@ export class DocmostClient {
|
||||
return r.doc;
|
||||
},
|
||||
);
|
||||
// The not-found path throws inside the transform (aborting mutatePage), so by
|
||||
// here `result` is always set.
|
||||
const r = result!;
|
||||
return {
|
||||
success: true,
|
||||
modified: true,
|
||||
pageId,
|
||||
footnoteId: result ? (result as any).footnoteId : undefined,
|
||||
reused: result ? (result as any).reused : undefined,
|
||||
message: result && (result as any).reused
|
||||
footnoteId: r.footnoteId,
|
||||
reused: r.reused,
|
||||
message: r.reused
|
||||
? "Footnote inserted (reused an existing same-content definition)."
|
||||
: "Footnote inserted.",
|
||||
verify: mutation.verify,
|
||||
@@ -3130,14 +3133,18 @@ export class DocmostClient {
|
||||
'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })',
|
||||
);
|
||||
}
|
||||
// Validate the RAW transform output FIRST (structure — including the
|
||||
// MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
|
||||
// recurses without a depth limiter, so validating after it would turn a
|
||||
// too-deep doc into an opaque "Maximum call stack size exceeded" instead of
|
||||
// the intended "nesting exceeds the maximum depth" error.
|
||||
this.validateDocStructure(raw);
|
||||
this.validateDocUrls(raw);
|
||||
// Auto-canonicalize footnotes after the transform (idempotent): no write
|
||||
// path can leave footnotes out of order / orphaned / in a raw `[^id]`
|
||||
// block. In a dryRun preview this may surface footnote edits the script
|
||||
// author did not write (the canonicalizer tidied them) — that is expected.
|
||||
const result = canonicalizeFootnotes(raw);
|
||||
// Validate the returned doc before it can be written.
|
||||
this.validateDocStructure(result);
|
||||
this.validateDocUrls(result);
|
||||
newDoc = result;
|
||||
return result;
|
||||
};
|
||||
|
||||
@@ -895,7 +895,10 @@ server.registerTool(
|
||||
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
|
||||
"footnote numbering + the single bottom list from reference order, drop " +
|
||||
"orphans/duplicates — runs automatically after every transform too), and " +
|
||||
"orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
|
||||
"applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
|
||||
"diff may therefore show footnote tidy-ups your script did not make, and " +
|
||||
"it is idempotent after the first run), and " +
|
||||
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
|
||||
"marker + dedup'd definition, list derived). Footnote convention: markers are " +
|
||||
"plain '[N]' text in the body; the notes are an orderedList under a " +
|
||||
|
||||
@@ -23,6 +23,15 @@
|
||||
* was never enforced. Running this at the end of every write path closes that
|
||||
* gap; because it is idempotent, it is a no-op when the footnotes are already
|
||||
* canonical (no spurious mutations / git-sync churn).
|
||||
*
|
||||
* ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
|
||||
* `canonicalizeFootnotes(doc)` before writing — the current callers are
|
||||
* `markdownToProseMirrorCanonical` (page markdown import/update; the plain
|
||||
* `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
|
||||
* reference-less definition), `update_page_json`, `docmost_transform`,
|
||||
* `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
|
||||
* NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
|
||||
* and comment-vs-page nuances make a single naive wrapper unsafe).
|
||||
*/
|
||||
|
||||
const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
|
||||
|
||||
@@ -96,6 +96,15 @@ export interface InsertMarkerOptions {
|
||||
* markers leave this unset (text is valid inside a codeBlock).
|
||||
*/
|
||||
forbidBlockTypes?: ReadonlySet<string>;
|
||||
/**
|
||||
* Node types whose ENTIRE subtree is skipped during the walk (never split into,
|
||||
* at any depth). Used to keep the footnote inserter out of the notes section:
|
||||
* splitting text inside an existing `footnoteDefinition` would glue a reference
|
||||
* into a definition, which the canonicalizer then drops as an orphan together
|
||||
* with the definition's prose — silent loss of an existing footnote. Skipped
|
||||
* subtrees still advance the running offset so sibling text stays aligned.
|
||||
*/
|
||||
skipSubtreeTypes?: ReadonlySet<string>;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -108,6 +117,27 @@ export interface InsertMarkerOptions {
|
||||
*/
|
||||
const INLINE_ATOM_FORBIDDEN_BLOCKS: ReadonlySet<string> = new Set(["codeBlock"]);
|
||||
|
||||
/**
|
||||
* Footnote-notes subtrees the inline footnote inserter must never split into (at
|
||||
* any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
|
||||
* a reference inside one of these would later be dropped as an orphan by the
|
||||
* canonicalizer, taking the existing definition's text with it.
|
||||
*/
|
||||
const FOOTNOTE_NOTES_SUBTREES: ReadonlySet<string> = new Set([
|
||||
"footnotesList",
|
||||
"footnoteDefinition",
|
||||
]);
|
||||
|
||||
/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
|
||||
function containsFootnoteNotes(node: any): boolean {
|
||||
if (!isObject(node)) return false;
|
||||
if (FOOTNOTE_NOTES_SUBTREES.has(node.type)) return true;
|
||||
if (Array.isArray(node.content)) {
|
||||
return node.content.some((c: any) => containsFootnoteNotes(c));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert `marker` as a PLAIN (unmarked) text run right after the first
|
||||
* occurrence of `anchor`.
|
||||
@@ -187,6 +217,13 @@ function insertNodesAfterAnchor(
|
||||
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
|
||||
return;
|
||||
}
|
||||
// Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
|
||||
// never split into it, but keep `offset` aligned for any sibling text after
|
||||
// it within this block.
|
||||
if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
|
||||
offset += blockPlainText(container).length;
|
||||
return;
|
||||
}
|
||||
const inline = container.content;
|
||||
// Detect whether this array is an inline array (contains text nodes).
|
||||
const hasText = inline.some(
|
||||
@@ -692,24 +729,30 @@ export function insertInlineFootnote(
|
||||
if (footnoteId == null) footnoteId = generateFootnoteId();
|
||||
|
||||
// Insert the footnoteReference node directly after the anchor (mark-safe
|
||||
// split); it hugs the preceding word with no leading space. The search is
|
||||
// bounded to the BODY (before the first footnotesList) and refuses codeBlocks,
|
||||
// so the inline atom can never be spliced into a footnote definition or a code
|
||||
// block — which would persist a schema-invalid doc (insert_footnote skips
|
||||
// validateDocStructure). When the only match is in such a place the insert is
|
||||
// refused and the write aborts cleanly (inserted:false).
|
||||
const listIdx = Array.isArray(doc?.content)
|
||||
? doc.content.findIndex(
|
||||
(n: any) => isObject(n) && n.type === "footnotesList",
|
||||
)
|
||||
// split); it hugs the preceding word with no leading space. Two guards keep the
|
||||
// inline atom out of the notes section and out of blocks that cannot hold it:
|
||||
// - beforeBlock bounds the search to the BODY, before the first top-level block
|
||||
// that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
|
||||
// a NESTED list or a bare definition also bounds the search, not just a
|
||||
// top-level list;
|
||||
// - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
|
||||
// subtree, so a reference is never glued inside an existing definition (which
|
||||
// the canonicalizer would then drop as an orphan, losing that definition's
|
||||
// prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
|
||||
// schema-invalid doc; insert_footnote skips validateDocStructure).
|
||||
// When the only anchor match is in such a place, the insert is refused and the
|
||||
// write aborts cleanly (inserted:false) instead of destroying content.
|
||||
const boundaryIdx = Array.isArray(doc?.content)
|
||||
? doc.content.findIndex((n: any) => containsFootnoteNotes(n))
|
||||
: -1;
|
||||
const r = insertNodesAfterAnchor(
|
||||
doc,
|
||||
(opts.anchorText ?? "").trimEnd(),
|
||||
() => [{ type: "footnoteReference", attrs: { id: footnoteId } }],
|
||||
{
|
||||
...(listIdx >= 0 ? { beforeBlock: listIdx } : {}),
|
||||
...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
|
||||
forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
|
||||
skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
|
||||
},
|
||||
);
|
||||
if (!r.inserted) {
|
||||
|
||||
@@ -2,7 +2,10 @@ import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import { canonicalizeFootnotes } from "../../build/lib/footnote-canonicalize.js";
|
||||
import { footnoteContentKey } from "../../build/lib/footnote-authoring.js";
|
||||
import {
|
||||
footnoteContentKey,
|
||||
generateFootnoteId,
|
||||
} from "../../build/lib/footnote-authoring.js";
|
||||
import { insertInlineFootnote } from "../../build/lib/transforms.js";
|
||||
import { markdownToProseMirrorCanonical } from "../../build/lib/collaboration.js";
|
||||
|
||||
@@ -190,6 +193,66 @@ test("insertInlineFootnote: codeBlock match is skipped, a later body paragraph s
|
||||
assert.equal(findAll(r.doc, "footnoteReference").length, 1);
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: anchor only inside a NESTED definition -> refused, definition preserved", () => {
|
||||
// The footnotesList is nested in a callout (not top level) and the anchor text
|
||||
// appears ONLY inside that definition. The search must be bounded past the
|
||||
// notes subtree (recursive boundary) AND refuse to descend into the definition,
|
||||
// so it aborts cleanly instead of gluing a reference into the definition (which
|
||||
// canonicalize would then drop as an orphan, losing the definition's prose).
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "Body text here." }, ref("a")),
|
||||
{
|
||||
type: "callout",
|
||||
content: [list(def("a", "the unique anchor lives here"))],
|
||||
},
|
||||
],
|
||||
};
|
||||
const r = insertInlineFootnote(doc, {
|
||||
anchorText: "unique anchor",
|
||||
text: "new note",
|
||||
});
|
||||
assert.equal(r.inserted, false);
|
||||
// The existing definition (and its text) is preserved untouched.
|
||||
assert.equal(findAll(r.doc, "footnoteDefinition").length, 1);
|
||||
assert.match(JSON.stringify(r.doc), /the unique anchor lives here/);
|
||||
assert.equal(findAll(r.doc, "footnoteReference").length, 1); // only the original
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: anchor only inside a BARE definition (no list wrapper) -> refused", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "Some body." }),
|
||||
{
|
||||
type: "footnoteDefinition",
|
||||
attrs: { id: "a" },
|
||||
content: [{ type: "paragraph", content: [{ type: "text", text: "orphan anchor text" }] }],
|
||||
},
|
||||
],
|
||||
};
|
||||
const r = insertInlineFootnote(doc, { anchorText: "orphan anchor", text: "x" });
|
||||
assert.equal(r.inserted, false);
|
||||
assert.equal(findAll(r.doc, "footnoteDefinition").length, 1);
|
||||
assert.match(JSON.stringify(r.doc), /orphan anchor text/);
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: anchor in body BEFORE a nested list still inserts", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "The sky is blue." }, ref("a")),
|
||||
{ type: "callout", content: [list(def("a", "note a"))] },
|
||||
],
|
||||
};
|
||||
const r = insertInlineFootnote(doc, { anchorText: "blue", text: "Rayleigh." });
|
||||
assert.equal(r.inserted, true);
|
||||
// The new reference plus the original = two references; a single canonical list.
|
||||
assert.equal(findAll(r.doc, "footnoteReference").length, 2);
|
||||
assert.equal(findAll(r.doc, "footnotesList").length, 1);
|
||||
});
|
||||
|
||||
test("markdown import (page path): out-of-order definitions render as a reference-ordered list", async () => {
|
||||
// References appear b, a, c in the body; definitions are written in a, b, c
|
||||
// order (the import order). The PAGE import path (markdownToProseMirrorCanonical)
|
||||
@@ -207,3 +270,17 @@ test("markdown import (page path): out-of-order definitions render as a referenc
|
||||
assert.deepEqual(defIds(json), ["b", "a", "c"]);
|
||||
assert.equal(findAll(json, "footnotesList").length, 1);
|
||||
});
|
||||
|
||||
test("generateFootnoteId: valid uuidv7 shape (version 7, variant 8..b) and unique", () => {
|
||||
// version nibble = 7; variant nibble in [8,9,a,b]; otherwise lowercase hex.
|
||||
const re =
|
||||
/^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/;
|
||||
const ids = new Set();
|
||||
for (let i = 0; i < 50; i++) {
|
||||
const id = generateFootnoteId();
|
||||
assert.match(id, re, `not a uuidv7: ${id}`);
|
||||
ids.add(id);
|
||||
}
|
||||
// Distinct across calls (random component makes collisions astronomically rare).
|
||||
assert.equal(ids.size, 50, "generated ids must be unique");
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user