Must-fix: - insertInlineFootnote could glue a footnoteReference inside an EXISTING definition (nested footnotesList, or a bare footnoteDefinition with no list wrapper), which canonicalize then dropped as an orphan — silently losing the definition's prose. Now: (a) the body/notes boundary is computed from the first top-level block that IS or CONTAINS (recursively) a footnotesList/ footnoteDefinition, not just a top-level list; and (b) the insertNodesAfterAnchor core skips footnotesList/footnoteDefinition subtrees entirely (skipSubtreeTypes), so an anchor whose only match is inside a definition -> inserted:false (clean abort, no write). Added tests: nested-definition, bare-definition, and body-before-nested-list-still-inserts. - editor-ext footnote-canonicalize header listed `markdownToProseMirror` among the canonicalizing MCP paths; it is the NON-canonicalizing primitive. Replaced with `markdownToProseMirrorCanonical` (+ note that the plain primitive is for comment bodies) and added copy_page_content. - Client paste: canonicalizePastedFootnotes now skips a definitions-ONLY paste (no footnoteReference anywhere) — canonicalizing it would strip the reference-less list and yield an EMPTY paste. Added a test. Suggestions: - docmost_transform now runs validateDocStructure/validateDocUrls on the RAW transform output BEFORE canonicalizeFootnotes (mirrors updatePageJson), so a too-deep doc gives the intended max-depth error instead of a stack overflow. - docmost_transform tool description now states the RESULT is footnote-canonical (dryRun diff may show tidy-ups; idempotent after first run). - insertFootnote: dropped the dead `result ? … : undefined` ternaries and the `as any` casts (result is always set by the time we return; the not-found path throws and aborts mutatePage). `const r = result!;`. Tests / architecture: - Added a LIVE-plugin golden case: the real footnoteSyncPlugin leaves a list with non-empty content after it in place, and canonicalize agrees (placement parity is now a driven property, not a hand-set expected). - Added generateFootnoteId uuidv7 shape + uniqueness test. - Item 9: added the ENFORCEMENT-RULE comments at the server parseProsemirrorContent and the MCP canonicalizer header (any NEW full-doc persist path MUST canonicalize; fragments/append/prepend and comment bodies MUST NOT). Kept per-call-site over a brittle grep CI test (the replace-vs-fragment + comment-vs-page nuance makes a single wrapper unsafe). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
632 lines
28 KiB
JavaScript
632 lines
28 KiB
JavaScript
/**
|
|
* Pure, network-free transform primitives for a ProseMirror/TipTap document
|
|
* tree, plus one higher-level orchestration (commentsToFootnotes).
|
|
*
|
|
* A ProseMirror node here is a plain JSON object of the shape produced by
|
|
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
|
* `content` array; callouts, tables, lists all hold their children in
|
|
* `content`, so a single recursive walk reaches them all.
|
|
*
|
|
* Conventions (matching node-ops.ts):
|
|
* - functions that produce a new document deep-clone their input and return a
|
|
* `{ doc, ... }` object; the caller's objects are never mutated.
|
|
* - functions are defensively null-safe.
|
|
* - `marks` arrays are preserved verbatim when fragments are split/reordered.
|
|
*/
|
|
import { blockPlainText } from "./node-ops.js";
|
|
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
|
import { footnoteContentKey, makeFootnoteDefinition, generateFootnoteId, } from "./footnote-authoring.js";
|
|
export { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
|
/** Deep-clone a JSON-serializable value without mutating the original. */
|
|
function clone(value) {
|
|
if (typeof structuredClone === "function") {
|
|
return structuredClone(value);
|
|
}
|
|
// Fallback for environments without structuredClone.
|
|
return JSON.parse(JSON.stringify(value));
|
|
}
|
|
/** True if `value` is a non-null object (and not an array). */
|
|
function isObject(value) {
|
|
return value != null && typeof value === "object" && !Array.isArray(value);
|
|
}
|
|
/**
|
|
* Plain text of a node (re-export of node-ops' blockPlainText so transform
|
|
* authors have a single import surface). Recurses through nested content.
|
|
*/
|
|
export function blockText(node) {
|
|
return blockPlainText(node);
|
|
}
|
|
/**
|
|
* Depth-first visit of every node in the tree, including the root and the
|
|
* nested content of callouts, tables, lists, etc. `fn` is called once per node.
|
|
* Null-safe: a nullish or non-object node is ignored.
|
|
*/
|
|
export function walk(node, fn) {
|
|
if (!isObject(node))
|
|
return;
|
|
fn(node);
|
|
if (Array.isArray(node.content)) {
|
|
for (const child of node.content) {
|
|
walk(child, fn);
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
|
|
* Works even when the node carries no `attrs.id` (it searches the raw tree, not
|
|
* an id index). Returns the live node reference inside `doc` (NOT a clone), or
|
|
* null when nothing matches. Typical use: `getList(doc, n => n.type ===
|
|
* "orderedList")`.
|
|
*/
|
|
export function getList(doc, predicate) {
|
|
let found = null;
|
|
walk(doc, (node) => {
|
|
if (found == null && predicate(node)) {
|
|
found = node;
|
|
}
|
|
});
|
|
return found;
|
|
}
|
|
/**
|
|
* Textblocks that hold raw text but do NOT accept inline atom nodes. A
|
|
* `footnoteReference` is `group:"inline", atom:true`; `codeBlock` is
|
|
* `content:"text*"` (text only), so splicing a footnoteReference into it yields
|
|
* an invalid document. (paragraph/heading/detailsSummary are `inline*` and DO
|
|
* accept it; footnote definitions live inside a footnotesList which the
|
|
* footnote inserter excludes via `beforeBlock`.)
|
|
*/
|
|
const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]);
|
|
/**
|
|
* Footnote-notes subtrees the inline footnote inserter must never split into (at
|
|
* any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
|
|
* a reference inside one of these would later be dropped as an orphan by the
|
|
* canonicalizer, taking the existing definition's text with it.
|
|
*/
|
|
const FOOTNOTE_NOTES_SUBTREES = new Set([
|
|
"footnotesList",
|
|
"footnoteDefinition",
|
|
]);
|
|
/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
|
|
function containsFootnoteNotes(node) {
|
|
if (!isObject(node))
|
|
return false;
|
|
if (FOOTNOTE_NOTES_SUBTREES.has(node.type))
|
|
return true;
|
|
if (Array.isArray(node.content)) {
|
|
return node.content.some((c) => containsFootnoteNotes(c));
|
|
}
|
|
return false;
|
|
}
|
|
/**
|
|
* Insert `marker` as a PLAIN (unmarked) text run right after the first
|
|
* occurrence of `anchor`.
|
|
*
|
|
* The text run that contains the END of the anchor is SPLIT at the anchor end,
|
|
* so all existing marks (links, bold, ...) on the surrounding text are
|
|
* preserved, while the inserted marker run carries NO marks. The marker is
|
|
* inserted as a leading-space-padded run (`" " + marker`) so it visually
|
|
* separates from the preceding word.
|
|
*
|
|
* The anchor is matched against the concatenated plain text of each top-level
|
|
* block (so an anchor that spans several text/mark runs still matches). The
|
|
* insertion happens inside the inline content array that holds the anchor's
|
|
* final character.
|
|
*
|
|
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
|
* false when the anchor text was not found in any in-scope block.
|
|
*/
|
|
export function insertMarkerAfter(doc, anchor, marker, opts = {}) {
|
|
// A plain marker is a leading-space-padded unmarked text run.
|
|
return insertNodesAfterAnchor(doc, anchor, () => [{ type: "text", text: " " + marker }], opts);
|
|
}
|
|
/**
|
|
* Mark-safe insertion CORE: split the inline text run that holds the END of
|
|
* `anchor` (preserving the surrounding marks) and splice the nodes produced by
|
|
* `makeMiddle()` in at the split point. `insertMarkerAfter` (plain text marker)
|
|
* and `insertInlineFootnote` (a `footnoteReference` node) are both thin callers —
|
|
* the only difference is WHAT is inserted (a space-padded text run vs. a node
|
|
* that should hug the preceding word), which is exactly what `makeMiddle`
|
|
* decides. Operates on a clone; returns `{ doc, inserted }`.
|
|
*/
|
|
function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) {
|
|
const out = clone(doc);
|
|
if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
|
|
return { doc: out, inserted: false };
|
|
}
|
|
const limit = typeof opts.beforeBlock === "number"
|
|
? Math.min(opts.beforeBlock, out.content.length)
|
|
: out.content.length;
|
|
for (let b = 0; b < limit; b++) {
|
|
const block = out.content[b];
|
|
if (!isObject(block))
|
|
continue;
|
|
// Quick reject: skip blocks whose plain text cannot contain the anchor.
|
|
if (!blockPlainText(block).includes(anchor))
|
|
continue;
|
|
// Walk the inline content arrays inside this block, tracking a running
|
|
// character offset so we can locate the inline array + text run that holds
|
|
// the END of the anchor's first occurrence.
|
|
let inserted = false;
|
|
let offset = 0; // characters of plain text seen so far in this block
|
|
const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
|
|
// Recurse into inline-bearing containers (paragraph, heading, table cell,
|
|
// callout child paragraphs, ...). We only split inside an array of inline
|
|
// nodes (text/inline atoms); the FIRST array whose cumulative range covers
|
|
// anchorEnd receives the split + marker.
|
|
const visit = (container) => {
|
|
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
|
|
return;
|
|
}
|
|
// Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
|
|
// never split into it, but keep `offset` aligned for any sibling text after
|
|
// it within this block.
|
|
if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
|
|
offset += blockPlainText(container).length;
|
|
return;
|
|
}
|
|
const inline = container.content;
|
|
// Detect whether this array is an inline array (contains text nodes).
|
|
const hasText = inline.some((n) => isObject(n) && n.type === "text");
|
|
if (hasText) {
|
|
// Refuse a textblock whose content spec cannot hold the inserted nodes
|
|
// (e.g. a codeBlock for an inline atom). Keep `offset` aligned for any
|
|
// sibling textblocks in this same block, then bail so the search falls
|
|
// through to the next candidate block.
|
|
if (opts.forbidBlockTypes && opts.forbidBlockTypes.has(container.type)) {
|
|
offset += blockPlainText(container).length;
|
|
return;
|
|
}
|
|
for (let i = 0; i < inline.length; i++) {
|
|
const n = inline[i];
|
|
const len = isObject(n) ? blockPlainText(n).length : 0;
|
|
const runStart = offset;
|
|
const runEnd = offset + len;
|
|
// The run that contains the anchor end (anchorEnd lands inside this
|
|
// run, i.e. runStart < anchorEnd <= runEnd) is the split point.
|
|
if (!inserted &&
|
|
isObject(n) &&
|
|
n.type === "text" &&
|
|
typeof n.text === "string" &&
|
|
anchorEnd > runStart &&
|
|
anchorEnd <= runEnd) {
|
|
const cut = anchorEnd - runStart; // split index within this text run
|
|
const before = n.text.slice(0, cut);
|
|
const after = n.text.slice(cut);
|
|
const marks = Array.isArray(n.marks) ? n.marks : [];
|
|
const parts = [];
|
|
if (before.length > 0) {
|
|
parts.push({ ...n, text: before, marks: [...marks] });
|
|
}
|
|
// The inserted nodes are caller-decided (a space-padded marker run,
|
|
// or a node that hugs the word). They carry no copied marks.
|
|
parts.push(...makeMiddle());
|
|
if (after.length > 0) {
|
|
parts.push({ ...n, text: after, marks: [...marks] });
|
|
}
|
|
inline.splice(i, 1, ...parts);
|
|
inserted = true;
|
|
return;
|
|
}
|
|
offset = runEnd;
|
|
}
|
|
}
|
|
else {
|
|
// Not an inline array: recurse into children (e.g. callout -> paragraph).
|
|
for (const child of inline) {
|
|
visit(child);
|
|
if (inserted)
|
|
return;
|
|
}
|
|
}
|
|
};
|
|
visit(block);
|
|
if (inserted) {
|
|
return { doc: out, inserted: true };
|
|
}
|
|
// If the block matched in plain text but we could not split (e.g. anchor
|
|
// lands inside an atom), fall through to the next block rather than failing.
|
|
}
|
|
return { doc: out, inserted: false };
|
|
}
|
|
/**
|
|
* In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
|
|
*
|
|
* Docmost translations use a callout that states the footnote range, e.g.
|
|
* "[1]…[5]". When the number of notes changes, this rewrites the trailing
|
|
* number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
|
|
* callout's text nodes to `[1]…[n]`. Operates on a clone; returns
|
|
* `{ doc, changed }` where `changed` is the number of text nodes rewritten.
|
|
*/
|
|
export function setCalloutRange(doc, n) {
|
|
const out = clone(doc);
|
|
let changed = 0;
|
|
// Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
|
|
const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
|
|
walk(out, (node) => {
|
|
if (node.type === "callout") {
|
|
walk(node, (inner) => {
|
|
if (inner.type === "text" &&
|
|
typeof inner.text === "string" &&
|
|
rangeRe.test(inner.text)) {
|
|
rangeRe.lastIndex = 0;
|
|
inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
|
|
changed++;
|
|
}
|
|
rangeRe.lastIndex = 0;
|
|
});
|
|
}
|
|
});
|
|
return { doc: out, changed };
|
|
}
|
|
/**
|
|
* Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
|
|
* a base36 random string is sufficient here (uniqueness within one document).
|
|
*/
|
|
function freshId() {
|
|
return (Math.random().toString(36).slice(2, 12) +
|
|
Math.random().toString(36).slice(2, 6));
|
|
}
|
|
/**
|
|
* Wrap inline ProseMirror nodes in a list item:
|
|
* { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
|
|
* with a fresh random block id on the paragraph. The inline nodes are cloned so
|
|
* the result shares no references with the caller's input.
|
|
*/
|
|
export function noteItem(inlineNodes) {
|
|
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
|
|
return {
|
|
type: "listItem",
|
|
content: [
|
|
{
|
|
type: "paragraph",
|
|
attrs: { id: freshId() },
|
|
content,
|
|
},
|
|
],
|
|
};
|
|
}
|
|
/**
|
|
* Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
|
|
* { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
|
|
* (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
|
|
*
|
|
* Built on the shared `makeFootnoteDefinition` factory (footnote-authoring.ts);
|
|
* the only extra is a fresh block id on the inner paragraph (Docmost stamps one,
|
|
* and the canonicalizer preserves attrs as-is). Single factory, one place to
|
|
* change the definition shape.
|
|
*/
|
|
export function footnoteDefinition(id, inlineNodes) {
|
|
const node = makeFootnoteDefinition(id, inlineNodes);
|
|
node.content[0].attrs = { id: freshId() };
|
|
return node;
|
|
}
|
|
/**
|
|
* Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
|
|
* an inline content array with a real `footnoteReference` node, in reading
|
|
* order. `onMarker` is called for each replaced marker (with the original `[N]`
|
|
* number or the placeholder index) and returns the fresh footnote id to attach
|
|
* to the inserted node. Mutates `inline` in place.
|
|
*/
|
|
function replaceMarkersWithReferences(inline, onMarker) {
|
|
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
|
|
for (let i = 0; i < inline.length; i++) {
|
|
const n = inline[i];
|
|
if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
|
|
continue;
|
|
}
|
|
if (!re.test(n.text))
|
|
continue;
|
|
re.lastIndex = 0;
|
|
const marks = Array.isArray(n.marks) ? n.marks : [];
|
|
const parts = [];
|
|
let last = 0;
|
|
let m;
|
|
while ((m = re.exec(n.text)) !== null) {
|
|
if (m.index > last) {
|
|
parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
|
|
}
|
|
const oldNum = m[1] != null ? Number(m[1]) : undefined;
|
|
const phIdx = m[2] != null ? Number(m[2]) : undefined;
|
|
const fnId = onMarker({ oldNum, phIdx });
|
|
parts.push({ type: "footnoteReference", attrs: { id: fnId } });
|
|
last = m.index + m[0].length;
|
|
}
|
|
if (last < n.text.length) {
|
|
parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
|
|
}
|
|
// Drop any zero-length text runs the slicing may have produced.
|
|
const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
|
|
inline.splice(i, 1, ...cleaned);
|
|
i += cleaned.length - 1;
|
|
}
|
|
}
|
|
/**
|
|
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
|
|
* ProseMirror nodes.
|
|
*
|
|
* A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
|
|
* stripped first. Then a minimal bold-split is applied: a leading
|
|
* `**bold lead**` run becomes a text node with a bold mark, and the remainder
|
|
* becomes a plain text node. This keeps the conversion synchronous (the
|
|
* transform sandbox runs synchronously) and dependency-free; the existing
|
|
* async markdownToProseMirror is intentionally NOT used here.
|
|
*/
|
|
export function mdToInlineNodes(markdown) {
|
|
let md = typeof markdown === "string" ? markdown : "";
|
|
// Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
|
|
md = md.replace(/^\s*комментарий\s*:\s*/i, "");
|
|
md = md.replace(/^\s*\d+\.\s+/, "");
|
|
md = md.trim();
|
|
if (md === "")
|
|
return [];
|
|
const nodes = [];
|
|
// Leading bold lead: **...** at the very start.
|
|
const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
|
|
if (leadMatch) {
|
|
const leadText = leadMatch[1];
|
|
nodes.push({
|
|
type: "text",
|
|
text: leadText,
|
|
marks: [{ type: "bold" }],
|
|
});
|
|
const rest = md.slice(leadMatch[0].length);
|
|
if (rest.length > 0) {
|
|
// Preserve the separating space that followed the bold lead.
|
|
const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
|
|
const spacing = sep ? sep[1] : "";
|
|
nodes.push({ type: "text", text: spacing + rest });
|
|
}
|
|
return nodes;
|
|
}
|
|
// No bold lead: emit the whole thing as a single plain text node, with any
|
|
// remaining **bold** spans split out inline.
|
|
return splitInlineBold(md);
|
|
}
|
|
/**
|
|
* Split a string with inline `**bold**` spans into text nodes, bolding the
|
|
* spans. Used as the no-lead fallback in mdToInlineNodes.
|
|
*/
|
|
function splitInlineBold(text) {
|
|
const nodes = [];
|
|
const re = /\*\*([^*]+)\*\*/g;
|
|
let last = 0;
|
|
let m;
|
|
while ((m = re.exec(text)) !== null) {
|
|
if (m.index > last) {
|
|
nodes.push({ type: "text", text: text.slice(last, m.index) });
|
|
}
|
|
nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
|
|
last = m.index + m[0].length;
|
|
}
|
|
if (last < text.length) {
|
|
nodes.push({ type: "text", text: text.slice(last) });
|
|
}
|
|
return nodes.length > 0 ? nodes : [{ type: "text", text }];
|
|
}
|
|
/**
|
|
* Turn inline comments into numbered footnotes.
|
|
*
|
|
* For each inline comment that carries a `selection`:
|
|
* 1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
|
|
* sentinel) right after the selection text in the BODY (before the
|
|
* notes heading);
|
|
* 2. build a note list item from the comment's markdown content.
|
|
*
|
|
* Then RENUMBER every footnote marker in the body by reading order: existing
|
|
* `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
|
|
* sequential `[seq]`, and the notes orderedList is reordered so each note lines
|
|
* up with its marker's reading-order position. Finally the disclaimer callout
|
|
* range is synced to the new note count.
|
|
*
|
|
* Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
|
|
* were successfully anchored (their selection was found and a placeholder
|
|
* inserted). Operates on a clone of `doc`.
|
|
*/
|
|
export function commentsToFootnotes(doc, comments, opts = {}) {
|
|
let working = clone(doc);
|
|
const notesHeading = opts.notesHeading ?? "Примечания переводчика";
|
|
const top = Array.isArray(working.content) ? working.content : [];
|
|
const notesIdx = top.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
|
|
if (notesIdx < 0) {
|
|
throw new Error(`heading "${notesHeading}" not found`);
|
|
}
|
|
// The notes orderedList lives at or after the heading.
|
|
const notesList = top
|
|
.slice(notesIdx)
|
|
.find((n) => isObject(n) && n.type === "orderedList");
|
|
if (!notesList) {
|
|
throw new Error("notes orderedList not found");
|
|
}
|
|
const consumed = [];
|
|
const noteInlineByPh = new Map();
|
|
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
|
|
if (!c || !c.selection)
|
|
return;
|
|
// Collision-proof sentinel delimited by NUL control chars, which never occur
|
|
// in real Docmost prose - so the marker regex cannot mistake any body text
|
|
// (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
|
|
// transient: the placeholder is inserted here and replaced by a
|
|
// footnoteReference node below; it never persists in a returned document.
|
|
const ph = `\u0000FN${i}\u0000`;
|
|
// insertMarkerAfter returns a NEW cloned doc; reassign `working`.
|
|
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
|
|
beforeBlock: notesIdx,
|
|
});
|
|
if (!r.inserted)
|
|
return;
|
|
working = r.doc;
|
|
noteInlineByPh.set(ph, mdToInlineNodes(c.content));
|
|
consumed.push(c.id);
|
|
});
|
|
// Re-resolve references into the (possibly re-cloned) working doc.
|
|
const top2 = Array.isArray(working.content) ? working.content : [];
|
|
const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
|
|
const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
|
|
const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
|
|
if (!notesList2) {
|
|
throw new Error("notes orderedList not found");
|
|
}
|
|
// Inline content of each existing note (listItem -> paragraph -> inline).
|
|
const oldNoteInline = (Array.isArray(notesList2.content)
|
|
? notesList2.content
|
|
: []).map((item) => {
|
|
const para = isObject(item) && Array.isArray(item.content)
|
|
? item.content.find((c) => isObject(c) && c.type === "paragraph")
|
|
: null;
|
|
return para && Array.isArray(para.content) ? para.content : [];
|
|
});
|
|
// Walk the body in reading order, turning each "[N]" / placeholder marker into
|
|
// a real footnoteReference node and collecting its definition inline content.
|
|
const definitions = [];
|
|
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
|
|
// Recursively visit inline arrays inside a block (paragraph, heading, callout
|
|
// child paragraphs, table cells, ...), preserving document reading order.
|
|
const visitInlineArrays = (container) => {
|
|
if (!isObject(container) || !Array.isArray(container.content))
|
|
return;
|
|
const hasText = container.content.some((n) => isObject(n) && n.type === "text");
|
|
if (hasText) {
|
|
replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
|
|
const fnId = freshId();
|
|
if (oldNum != null) {
|
|
const inline = oldNoteInline[oldNum - 1];
|
|
// Every existing body marker MUST map to a real note. An out-of-range
|
|
// marker means the document is internally inconsistent; fail loudly.
|
|
if (inline === undefined) {
|
|
throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
|
|
}
|
|
definitions.push(footnoteDefinition(fnId, inline));
|
|
}
|
|
else {
|
|
const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
|
|
definitions.push(footnoteDefinition(fnId, inline));
|
|
}
|
|
return fnId;
|
|
});
|
|
}
|
|
else {
|
|
for (const child of container.content)
|
|
visitInlineArrays(child);
|
|
}
|
|
};
|
|
const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
|
|
for (let i = 0; i < notesBoundary; i++) {
|
|
// Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
|
|
// marker and is synced separately by setCalloutRange.
|
|
if (isObject(top2[i]) &&
|
|
top2[i].type === "callout" &&
|
|
disclaimerRangeRe.test(blockText(top2[i]))) {
|
|
continue;
|
|
}
|
|
visitInlineArrays(top2[i]);
|
|
}
|
|
// Replace the old orderedList with a real footnotesList of the collected
|
|
// definitions (reading order). If there are no definitions, drop the list.
|
|
if (definitions.length > 0) {
|
|
top2[oldListIndex] = {
|
|
type: "footnotesList",
|
|
content: definitions,
|
|
};
|
|
}
|
|
else {
|
|
top2.splice(oldListIndex, 1);
|
|
}
|
|
// Sync the disclaimer callout range to the new note count.
|
|
const synced = setCalloutRange(working, definitions.length);
|
|
return { doc: synced.doc, consumed };
|
|
}
|
|
/**
|
|
* AUTHOR-INLINE footnote insertion. The caller supplies WHERE (anchorText) and
|
|
* WHAT (markdown text); numbering and the bottom list are derived server-side by
|
|
* `canonicalizeFootnotes`. The caller never sees or edits `footnotesList`, never
|
|
* assigns a number, and cannot desync — orphans / out-of-order lists / raw
|
|
* `[^id]` markdown are structurally impossible.
|
|
*
|
|
* Content DEDUP (#3 in the issue): if an existing definition has the SAME
|
|
* normalized content key, its id is REUSED (the new reference points at it: one
|
|
* number, one definition, several references). Otherwise a fresh uuid id is
|
|
* minted and a new definition added. Conservative — only an exact content match
|
|
* merges.
|
|
*
|
|
* Mechanics: the `footnoteReference` node is inserted DIRECTLY at the anchor via
|
|
* the same mark-safe split as `insertMarkerAfter` (the shared
|
|
* `insertNodesAfterAnchor` core), so it hugs the preceding word with no text
|
|
* sentinel round-trip. The whole document is then canonicalized.
|
|
*
|
|
* Operates on a clone of `doc`. When the anchor is not found, returns the input
|
|
* unchanged with `inserted:false`.
|
|
*/
|
|
export function insertInlineFootnote(doc, opts) {
|
|
const inline = mdToInlineNodes(opts.text ?? "");
|
|
// footnoteContentKey only reads `.content`, so key off the inline array
|
|
// directly instead of building a throwaway definition node.
|
|
const key = footnoteContentKey({ content: inline });
|
|
// Content dedup: reuse an existing definition's id when its key matches.
|
|
let footnoteId = null;
|
|
let reused = false;
|
|
if (key !== "") {
|
|
walk(doc, (n) => {
|
|
if (footnoteId == null &&
|
|
isObject(n) &&
|
|
n.type === "footnoteDefinition" &&
|
|
n.attrs &&
|
|
typeof n.attrs.id === "string" &&
|
|
n.attrs.id !== "" &&
|
|
footnoteContentKey(n) === key) {
|
|
footnoteId = n.attrs.id;
|
|
reused = true;
|
|
}
|
|
});
|
|
}
|
|
if (footnoteId == null)
|
|
footnoteId = generateFootnoteId();
|
|
// Insert the footnoteReference node directly after the anchor (mark-safe
|
|
// split); it hugs the preceding word with no leading space. Two guards keep the
|
|
// inline atom out of the notes section and out of blocks that cannot hold it:
|
|
// - beforeBlock bounds the search to the BODY, before the first top-level block
|
|
// that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
|
|
// a NESTED list or a bare definition also bounds the search, not just a
|
|
// top-level list;
|
|
// - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
|
|
// subtree, so a reference is never glued inside an existing definition (which
|
|
// the canonicalizer would then drop as an orphan, losing that definition's
|
|
// prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
|
|
// schema-invalid doc; insert_footnote skips validateDocStructure).
|
|
// When the only anchor match is in such a place, the insert is refused and the
|
|
// write aborts cleanly (inserted:false) instead of destroying content.
|
|
const boundaryIdx = Array.isArray(doc?.content)
|
|
? doc.content.findIndex((n) => containsFootnoteNotes(n))
|
|
: -1;
|
|
const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], {
|
|
...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
|
|
forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
|
|
skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
|
|
});
|
|
if (!r.inserted) {
|
|
return { doc: clone(doc), inserted: false, footnoteId, reused };
|
|
}
|
|
let working = r.doc;
|
|
// Add a NEW definition (canonicalize will order/place it); a reused id needs
|
|
// no new definition (the existing one is shared).
|
|
if (!reused) {
|
|
appendDefinition(working, makeFootnoteDefinition(footnoteId, inline));
|
|
}
|
|
// Derive numbering + the single bottom list deterministically.
|
|
working = canonicalizeFootnotes(working);
|
|
return { doc: working, inserted: true, footnoteId, reused };
|
|
}
|
|
/**
|
|
* Append a definition node so the canonicalizer can order/place it: into the
|
|
* first existing footnotesList, or a new trailing list when none exists.
|
|
*/
|
|
function appendDefinition(doc, defNode) {
|
|
const existingList = getList(doc, (n) => isObject(n) && n.type === "footnotesList");
|
|
if (existingList && Array.isArray(existingList.content)) {
|
|
existingList.content.push(defNode);
|
|
return;
|
|
}
|
|
if (Array.isArray(doc.content)) {
|
|
doc.content.push({ type: "footnotesList", content: [defNode] });
|
|
}
|
|
}
|