feat(editor): footnotes (reference + definitions model)

Adds footnotes: a superscript marker in the text linked to an editable
definition in a Footnotes section at the end of the page, with auto-numbering
and a read-only hover popover. Chose the reference+definitions model (3 plain
nodes) over an inline atom with a sub-editor specifically for collaboration
safety.

editor-ext (packages/editor-ext/src/lib/footnote/):
- footnoteReference (inline atom, id), footnotesList (block, last child),
  footnoteDefinition (paragraph+, id). renderHTML emits sup[data-footnote-ref]
  / section[data-footnotes] / div[data-footnote-def]; parse-rule priority makes
  the empty reference win over the Superscript mark (else it is dropped on the
  server save).
- numbering: a decoration-only plugin (pure function of doc order) -> every
  client computes identical numbers, no document mutation, Yjs-safe.
- sync plugin: single-pass, always SYNC_META-tagged and skipping remote txns
  (terminates, no loop), idempotent; canonicalizes to one trailing footnotesList
  (merging duplicates), creates missing definitions, drops orphans, and
  coexists with TrailingNode. Disabled in read-only.
- commands setFootnote (one tx: reference + definition at the matching index +
  focus) / removeFootnote (cascade, one undo) / scrollTo*. slash /footnote.

client: superscript NodeView + floating-ui read-only popover; bottom-list and
definition NodeViews; registered in mainExtensions.

server: the three nodes registered in tiptapExtensions so collab/save/export
keep them. Round-trip regression spec guards the Superscript parse-priority.

markdown: turndown/marked round-trip to pandoc/GFM [^id] (+ a code-fence guard
so footnote-like lines inside code blocks are not extracted).

MCP mirror: schema + markdown-converter + commentsToFootnotes rewritten to real
footnote nodes + diff marker counting; NUL sentinels written as \u0000 escapes.

v2 follow-ups (per plan): definition reordering on reference move, id-collision
regeneration on paste, multiple references to one footnote.

Implements docs/footnotes-plan.md (variant B).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-20 11:39:00 +03:00
parent c8af637654
commit 4d17befb0d
38 changed files with 2906 additions and 151 deletions

View File

@@ -263,10 +263,75 @@ function bridgeTaskLists(html) {
}
return document.body.innerHTML;
}
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
// definition lines are collected into a single <section data-footnotes>.
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
function escapeFootnoteAttr(value) {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
}
const footnoteRefMarkedExtension = {
name: "footnoteRef",
level: "inline",
start(src) {
return src.match(/\[\^/)?.index ?? -1;
},
tokenizer(src) {
const match = FOOTNOTE_REF_RE.exec(src);
if (match && match.index === 0) {
return { type: "footnoteRef", raw: match[0], id: match[1] };
}
return undefined;
},
renderer(token) {
return `<sup data-footnote-ref data-id="${escapeFootnoteAttr(token.id)}"></sup>`;
},
};
marked.use({ extensions: [footnoteRefMarkedExtension] });
/**
* Pull `[^id]: text` definition lines out of the body and render a single
* <section data-footnotes> for them (or "" when there are none).
*/
function extractFootnotes(markdown) {
const lines = markdown.split("\n");
const bodyLines = [];
const defs = [];
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
// block is preserved verbatim and not treated as a footnote definition.
let fence = null;
for (const line of lines) {
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null)
fence = marker;
else if (marker === fence)
fence = null;
bodyLines.push(line);
continue;
}
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
if (m)
defs.push({ id: m[1], text: m[2] });
else
bodyLines.push(line);
}
if (defs.length === 0)
return { body: markdown, section: "" };
const inner = defs
.map((d) => `<div data-footnote-def data-id="${escapeFootnoteAttr(d.id)}"><p>${marked.parseInline(d.text || "")}</p></div>`)
.join("");
return {
body: bodyLines.join("\n"),
section: `<section data-footnotes>${inner}</section>`,
};
}
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(markdownContent) {
const withCallouts = await preprocessCallouts(markdownContent);
const html = await marked.parse(withCallouts);
const { body, section } = extractFootnotes(withCallouts);
const html = (await marked.parse(body)) + section;
const bridged = bridgeTaskLists(html);
return generateJSON(bridged, docmostExtensions);
}

View File

@@ -79,10 +79,26 @@ function countUniqueLinks(doc) {
visit(doc);
return hrefs.size;
}
/** Count footnoteReference nodes anywhere under a node (reading order). */
function countFootnoteRefs(node) {
if (!node || typeof node !== "object")
return 0;
let n = node.type === "footnoteReference" ? 1 : 0;
if (Array.isArray(node.content)) {
for (const child of node.content)
n += countFootnoteRefs(child);
}
return n;
}
/**
* Parse the ordered list of integers from `[N]` footnote markers found in the
* BODY only (every top-level block before the first "Примечания..." notes
* heading; if no such heading, the whole doc). Returned in reading order.
* Ordered list of footnote marker numbers found in the BODY only (every
* top-level block before the first "Примечания..." notes heading; if no such
* heading, the whole doc), in reading order.
*
* Supports BOTH representations:
* - real `footnoteReference` nodes (the current footnote feature) — numbered
* 1..n by reading position, since their visible number is derived;
* - legacy `[N]` text markers (older translated docs) — the literal N.
*/
function footnoteMarkers(doc, notesHeading) {
const top = Array.isArray(doc?.content) ? doc.content : [];
@@ -90,6 +106,15 @@ function footnoteMarkers(doc, notesHeading) {
n.type === "heading" &&
plainText(n).trim() === notesHeading);
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
// Real footnoteReference nodes take precedence: when present, number them by
// reading position (their displayed number is not stored).
let refCount = 0;
for (const block of bodyBlocks)
refCount += countFootnoteRefs(block);
if (refCount > 0) {
return Array.from({ length: refCount }, (_, i) => i + 1);
}
// Fallback: legacy `[N]` text markers.
const markers = [];
const re = /\[(\d+)\]/g;
for (const block of bodyBlocks) {

View File

@@ -342,6 +342,78 @@ const Mention = Node.create({
return ["span", { "data-type": "mention", ...HTMLAttributes }, 0];
},
});
/**
* Footnote feature (mirror of packages/editor-ext/src/lib/footnote). Three
* nodes connected by `id`:
* - FootnoteReference: inline atom marker in the body (<sup data-footnote-ref>);
* - FootnotesList: a single bottom container (<section data-footnotes>);
* - FootnoteDefinition: one editable note keyed by id (<div data-footnote-def>).
* The visible number is not stored; it is derived from reference order.
*
* priority 101 so this node's <sup> parse rule beats the Superscript mark's
* <sup> rule (otherwise an empty reference is parsed as an empty superscript
* mark and dropped). Keep in sync with editor-ext.
*/
const FootnoteReference = Node.create({
name: "footnoteReference",
priority: 101,
group: "inline",
inline: true,
atom: true,
selectable: true,
draggable: false,
addAttributes() {
return {
id: {
default: null,
parseHTML: (el) => el.getAttribute("data-id"),
renderHTML: (attrs) => attrs.id ? { "data-id": attrs.id } : {},
},
};
},
parseHTML() {
return [{ tag: "sup[data-footnote-ref]", priority: 100 }];
},
renderHTML({ HTMLAttributes }) {
return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }];
},
});
const FootnotesList = Node.create({
name: "footnotesList",
group: "block",
content: "footnoteDefinition+",
isolating: true,
selectable: false,
defining: true,
parseHTML() {
return [{ tag: "section[data-footnotes]" }];
},
renderHTML({ HTMLAttributes }) {
return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0];
},
});
const FootnoteDefinition = Node.create({
name: "footnoteDefinition",
content: "paragraph+",
defining: true,
isolating: true,
selectable: false,
addAttributes() {
return {
id: {
default: null,
parseHTML: (el) => el.getAttribute("data-id"),
renderHTML: (attrs) => attrs.id ? { "data-id": attrs.id } : {},
},
};
},
parseHTML() {
return [{ tag: "div[data-footnote-def]" }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0];
},
});
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
const MathInline = Node.create({
name: "mathInline",
@@ -978,6 +1050,9 @@ export const docmostExtensions = [
TableCell,
TableHeader,
Mention,
FootnoteReference,
FootnotesList,
FootnoteDefinition,
MathInline,
MathBlock,
Details,

View File

@@ -388,6 +388,27 @@ export function convertProseMirrorToMarkdown(content) {
// carry the real values), so escape it for the text context, not attrs.
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
}
case "footnoteReference": {
// Pandoc/GFM inline marker. The number is derived (not stored), so the
// id is the stable anchor.
const fnId = node.attrs?.id || "";
return fnId ? `[^${fnId}]` : "";
}
case "footnotesList":
// The container renders its definitions, each on its own `[^id]: ...`
// line. A blank line separates the body from the notes block.
return nodeContent.map(processNode).join("\n");
case "footnoteDefinition": {
const defId = node.attrs?.id || "";
// Collapse the definition's paragraphs into a single line; multi-line
// footnotes are a v2 refinement.
const defText = nodeContent
.map(processNode)
.join(" ")
.replace(/\s*\n+\s*/g, " ")
.trim();
return defId ? `[^${defId}]: ${defText}` : "";
}
case "attachment": {
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
// the schema stores name/url (plus mime/size/attachmentId). Emit the

View File

@@ -223,6 +223,59 @@ export function noteItem(inlineNodes) {
],
};
}
/**
* Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
* { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
* (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
*/
export function footnoteDefinition(id, inlineNodes) {
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
return {
type: "footnoteDefinition",
attrs: { id },
content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
};
}
/**
* Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
* an inline content array with a real `footnoteReference` node, in reading
* order. `onMarker` is called for each replaced marker (with the original `[N]`
* number or the placeholder index) and returns the fresh footnote id to attach
* to the inserted node. Mutates `inline` in place.
*/
function replaceMarkersWithReferences(inline, onMarker) {
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
for (let i = 0; i < inline.length; i++) {
const n = inline[i];
if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
continue;
}
if (!re.test(n.text))
continue;
re.lastIndex = 0;
const marks = Array.isArray(n.marks) ? n.marks : [];
const parts = [];
let last = 0;
let m;
while ((m = re.exec(n.text)) !== null) {
if (m.index > last) {
parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
}
const oldNum = m[1] != null ? Number(m[1]) : undefined;
const phIdx = m[2] != null ? Number(m[2]) : undefined;
const fnId = onMarker({ oldNum, phIdx });
parts.push({ type: "footnoteReference", attrs: { id: fnId } });
last = m.index + m[0].length;
}
if (last < n.text.length) {
parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
}
// Drop any zero-length text runs the slicing may have produced.
const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
inline.splice(i, 1, ...cleaned);
i += cleaned.length - 1;
}
}
/**
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
* ProseMirror nodes.
@@ -321,85 +374,100 @@ export function commentsToFootnotes(doc, comments, opts = {}) {
throw new Error("notes orderedList not found");
}
const consumed = [];
const noteByPh = new Map();
const noteInlineByPh = new Map();
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
if (!c || !c.selection)
return;
// Collision-proof sentinel delimited by NUL control chars, which never occur
// in real Docmost prose so the renumber regex below cannot mistake any body
// text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
// transient: the placeholder round-trips within this function (insertMarkerAfter
// inserts it, the renumber pass replaces it with "[N]"), so it never persists
// in a returned/pushed document.
// in real Docmost prose - so the marker regex cannot mistake any body text
// (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
// transient: the placeholder is inserted here and replaced by a
// footnoteReference node below; it never persists in a returned document.
const ph = `\u0000FN${i}\u0000`;
// insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh
// the `top` / `notesList` references that point into it.
// insertMarkerAfter returns a NEW cloned doc; reassign `working`.
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
beforeBlock: notesIdx,
});
if (!r.inserted)
return;
working = r.doc;
noteByPh.set(ph, noteItem(mdToInlineNodes(c.content)));
noteInlineByPh.set(ph, mdToInlineNodes(c.content));
consumed.push(c.id);
});
// Re-resolve references into the (possibly re-cloned) working doc.
const top2 = Array.isArray(working.content) ? working.content : [];
const notesList2 = top2
.slice(notesIdx)
.find((n) => isObject(n) && n.type === "orderedList");
const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
if (!notesList2) {
throw new Error("notes orderedList not found");
}
const oldNotes = Array.isArray(notesList2.content)
// Inline content of each existing note (listItem -> paragraph -> inline).
const oldNoteInline = (Array.isArray(notesList2.content)
? notesList2.content
: [];
const newNotes = [];
let seq = 0;
// Match either an existing "[N]" marker or a NUL-delimited "\u0000FN<i>\u0000"
// placeholder, in reading order across the body (blocks before the notes heading).
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
// Same range regex setCalloutRange uses to detect the disclaimer callout's
// "[1]…[K]" range; used here to decide whether a top-level callout is the
// disclaimer (skip) or an ordinary callout (renumber normally).
: []).map((item) => {
const para = isObject(item) && Array.isArray(item.content)
? item.content.find((c) => isObject(c) && c.type === "paragraph")
: null;
return para && Array.isArray(para.content) ? para.content : [];
});
// Walk the body in reading order, turning each "[N]" / placeholder marker into
// a real footnoteReference node and collecting its definition inline content.
const definitions = [];
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
for (let i = 0; i < notesIdx; i++) {
// Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote
// marker and is synced separately by setCalloutRange. Renumbering it here
// would consume note slots and corrupt the sequence. Other top-level
// callouts may carry legitimate "[N]" body markers and are renumbered.
// Recursively visit inline arrays inside a block (paragraph, heading, callout
// child paragraphs, table cells, ...), preserving document reading order.
const visitInlineArrays = (container) => {
if (!isObject(container) || !Array.isArray(container.content))
return;
const hasText = container.content.some((n) => isObject(n) && n.type === "text");
if (hasText) {
replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
const fnId = freshId();
if (oldNum != null) {
const inline = oldNoteInline[oldNum - 1];
// Every existing body marker MUST map to a real note. An out-of-range
// marker means the document is internally inconsistent; fail loudly.
if (inline === undefined) {
throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
}
definitions.push(footnoteDefinition(fnId, inline));
}
else {
const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
definitions.push(footnoteDefinition(fnId, inline));
}
return fnId;
});
}
else {
for (const child of container.content)
visitInlineArrays(child);
}
};
const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
for (let i = 0; i < notesBoundary; i++) {
// Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
// marker and is synced separately by setCalloutRange.
if (isObject(top2[i]) &&
top2[i].type === "callout" &&
disclaimerRangeRe.test(blockText(top2[i]))) {
continue;
}
walk(top2[i], (node) => {
if (node.type !== "text" || typeof node.text !== "string")
return;
node.text = node.text.replace(re, (_m, oldNum, phIdx) => {
if (oldNum != null) {
const note = oldNotes[Number(oldNum) - 1];
// Every existing body marker MUST map to a real note. An out-of-range
// marker means the document is internally inconsistent; fail loudly
// rather than silently dropping the note and desyncing the callout.
if (note === undefined) {
throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`);
}
newNotes.push(note);
}
else {
newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`));
}
return `[${++seq}]`;
});
});
visitInlineArrays(top2[i]);
}
// Reorder the notes list IN PLACE on `working` first, THEN sync the callout
// range. setCalloutRange clones `working`, so the reordered notes (mutated
// before the clone) are carried into its result automatically. No null-filter
// here: marker count and note count must stay exactly equal (the out-of-range
// guard above guarantees no undefined entry is ever pushed).
notesList2.content = newNotes;
const synced = setCalloutRange(working, notesList2.content.length);
// Replace the old orderedList with a real footnotesList of the collected
// definitions (reading order). If there are no definitions, drop the list.
if (definitions.length > 0) {
top2[oldListIndex] = {
type: "footnotesList",
content: definitions,
};
}
else {
top2.splice(oldListIndex, 1);
}
// Sync the disclaimer callout range to the new note count.
const synced = setCalloutRange(working, definitions.length);
return { doc: synced.doc, consumed };
}

View File

@@ -296,12 +296,87 @@ function bridgeTaskLists(html: string): string {
return document.body.innerHTML;
}
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
// definition lines are collected into a single <section data-footnotes>.
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
function escapeFootnoteAttr(value: string): string {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
}
const footnoteRefMarkedExtension = {
name: "footnoteRef",
level: "inline" as const,
start(src: string) {
return src.match(/\[\^/)?.index ?? -1;
},
tokenizer(src: string) {
const match = FOOTNOTE_REF_RE.exec(src);
if (match && match.index === 0) {
return { type: "footnoteRef", raw: match[0], id: match[1] };
}
return undefined;
},
renderer(token: any) {
return `<sup data-footnote-ref data-id="${escapeFootnoteAttr(
token.id,
)}"></sup>`;
},
};
marked.use({ extensions: [footnoteRefMarkedExtension] });
/**
* Pull `[^id]: text` definition lines out of the body and render a single
* <section data-footnotes> for them (or "" when there are none).
*/
function extractFootnotes(markdown: string): {
body: string;
section: string;
} {
const lines = markdown.split("\n");
const bodyLines: string[] = [];
const defs: Array<{ id: string; text: string }> = [];
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
// block is preserved verbatim and not treated as a footnote definition.
let fence: string | null = null;
for (const line of lines) {
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null) fence = marker;
else if (marker === fence) fence = null;
bodyLines.push(line);
continue;
}
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
if (m) defs.push({ id: m[1], text: m[2] });
else bodyLines.push(line);
}
if (defs.length === 0) return { body: markdown, section: "" };
const inner = defs
.map(
(d) =>
`<div data-footnote-def data-id="${escapeFootnoteAttr(
d.id,
)}"><p>${marked.parseInline(d.text || "")}</p></div>`,
)
.join("");
return {
body: bodyLines.join("\n"),
section: `<section data-footnotes>${inner}</section>`,
};
}
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(
markdownContent: string,
): Promise<any> {
const withCallouts = await preprocessCallouts(markdownContent);
const html = await marked.parse(withCallouts);
const { body, section } = extractFootnotes(withCallouts);
const html = (await marked.parse(body)) + section;
const bridged = bridgeTaskLists(html);
return generateJSON(bridged, docmostExtensions);
}

View File

@@ -101,10 +101,25 @@ function countUniqueLinks(doc: any): number {
return hrefs.size;
}
/** Count footnoteReference nodes anywhere under a node (reading order). */
function countFootnoteRefs(node: any): number {
if (!node || typeof node !== "object") return 0;
let n = node.type === "footnoteReference" ? 1 : 0;
if (Array.isArray(node.content)) {
for (const child of node.content) n += countFootnoteRefs(child);
}
return n;
}
/**
* Parse the ordered list of integers from `[N]` footnote markers found in the
* BODY only (every top-level block before the first "Примечания..." notes
* heading; if no such heading, the whole doc). Returned in reading order.
* Ordered list of footnote marker numbers found in the BODY only (every
* top-level block before the first "Примечания..." notes heading; if no such
* heading, the whole doc), in reading order.
*
* Supports BOTH representations:
* - real `footnoteReference` nodes (the current footnote feature) — numbered
* 1..n by reading position, since their visible number is derived;
* - legacy `[N]` text markers (older translated docs) — the literal N.
*/
function footnoteMarkers(doc: any, notesHeading: string): number[] {
const top: any[] = Array.isArray(doc?.content) ? doc.content : [];
@@ -115,6 +130,16 @@ function footnoteMarkers(doc: any, notesHeading: string): number[] {
plainText(n).trim() === notesHeading,
);
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
// Real footnoteReference nodes take precedence: when present, number them by
// reading position (their displayed number is not stored).
let refCount = 0;
for (const block of bodyBlocks) refCount += countFootnoteRefs(block);
if (refCount > 0) {
return Array.from({ length: refCount }, (_, i) => i + 1);
}
// Fallback: legacy `[N]` text markers.
const markers: number[] = [];
const re = /\[(\d+)\]/g;
for (const block of bodyBlocks) {

View File

@@ -378,6 +378,83 @@ const Mention = Node.create({
},
});
/**
* Footnote feature (mirror of packages/editor-ext/src/lib/footnote). Three
* nodes connected by `id`:
* - FootnoteReference: inline atom marker in the body (<sup data-footnote-ref>);
* - FootnotesList: a single bottom container (<section data-footnotes>);
* - FootnoteDefinition: one editable note keyed by id (<div data-footnote-def>).
* The visible number is not stored; it is derived from reference order.
*
* priority 101 so this node's <sup> parse rule beats the Superscript mark's
* <sup> rule (otherwise an empty reference is parsed as an empty superscript
* mark and dropped). Keep in sync with editor-ext.
*/
const FootnoteReference = Node.create({
name: "footnoteReference",
priority: 101,
group: "inline",
inline: true,
atom: true,
selectable: true,
draggable: false,
addAttributes() {
return {
id: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
renderHTML: (attrs: Record<string, any>) =>
attrs.id ? { "data-id": attrs.id } : {},
},
};
},
parseHTML() {
return [{ tag: "sup[data-footnote-ref]", priority: 100 }];
},
renderHTML({ HTMLAttributes }) {
return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }];
},
});
const FootnotesList = Node.create({
name: "footnotesList",
group: "block",
content: "footnoteDefinition+",
isolating: true,
selectable: false,
defining: true,
parseHTML() {
return [{ tag: "section[data-footnotes]" }];
},
renderHTML({ HTMLAttributes }) {
return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0];
},
});
const FootnoteDefinition = Node.create({
name: "footnoteDefinition",
content: "paragraph+",
defining: true,
isolating: true,
selectable: false,
addAttributes() {
return {
id: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
renderHTML: (attrs: Record<string, any>) =>
attrs.id ? { "data-id": attrs.id } : {},
},
};
},
parseHTML() {
return [{ tag: "div[data-footnote-def]" }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0];
},
});
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
const MathInline = Node.create({
name: "mathInline",
@@ -1069,6 +1146,9 @@ export const docmostExtensions = [
TableCell,
TableHeader,
Mention,
FootnoteReference,
FootnotesList,
FootnoteDefinition,
MathInline,
MathBlock,
Details,

View File

@@ -430,6 +430,30 @@ export function convertProseMirrorToMarkdown(content: any): string {
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
}
case "footnoteReference": {
// Pandoc/GFM inline marker. The number is derived (not stored), so the
// id is the stable anchor.
const fnId = node.attrs?.id || "";
return fnId ? `[^${fnId}]` : "";
}
case "footnotesList":
// The container renders its definitions, each on its own `[^id]: ...`
// line. A blank line separates the body from the notes block.
return nodeContent.map(processNode).join("\n");
case "footnoteDefinition": {
const defId = node.attrs?.id || "";
// Collapse the definition's paragraphs into a single line; multi-line
// footnotes are a v2 refinement.
const defText = nodeContent
.map(processNode)
.join(" ")
.replace(/\s*\n+\s*/g, " ")
.trim();
return defId ? `[^${defId}]: ${defText}` : "";
}
case "attachment": {
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
// the schema stores name/url (plus mime/size/attachmentId). Emit the

View File

@@ -264,6 +264,66 @@ export function noteItem(inlineNodes: any[]): any {
};
}
/**
* Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
* { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
* (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
*/
export function footnoteDefinition(id: string, inlineNodes: any[]): any {
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
return {
type: "footnoteDefinition",
attrs: { id },
content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
};
}
/**
* Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
* an inline content array with a real `footnoteReference` node, in reading
* order. `onMarker` is called for each replaced marker (with the original `[N]`
* number or the placeholder index) and returns the fresh footnote id to attach
* to the inserted node. Mutates `inline` in place.
*/
function replaceMarkersWithReferences(
inline: any[],
onMarker: (info: { oldNum?: number; phIdx?: number }) => string,
): void {
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
for (let i = 0; i < inline.length; i++) {
const n = inline[i];
if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
continue;
}
if (!re.test(n.text)) continue;
re.lastIndex = 0;
const marks = Array.isArray(n.marks) ? n.marks : [];
const parts: any[] = [];
let last = 0;
let m: RegExpExecArray | null;
while ((m = re.exec(n.text)) !== null) {
if (m.index > last) {
parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
}
const oldNum = m[1] != null ? Number(m[1]) : undefined;
const phIdx = m[2] != null ? Number(m[2]) : undefined;
const fnId = onMarker({ oldNum, phIdx });
parts.push({ type: "footnoteReference", attrs: { id: fnId } });
last = m.index + m[0].length;
}
if (last < n.text.length) {
parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
}
// Drop any zero-length text runs the slicing may have produced.
const cleaned = parts.filter(
(p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0),
);
inline.splice(i, 1, ...cleaned);
i += cleaned.length - 1;
}
}
/**
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
* ProseMirror nodes.
@@ -388,54 +448,91 @@ export function commentsToFootnotes(
}
const consumed: string[] = [];
const noteByPh = new Map<string, any>();
const noteInlineByPh = new Map<string, any[]>();
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
if (!c || !c.selection) return;
// Collision-proof sentinel delimited by NUL control chars, which never occur
// in real Docmost prose so the renumber regex below cannot mistake any body
// text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
// transient: the placeholder round-trips within this function (insertMarkerAfter
// inserts it, the renumber pass replaces it with "[N]"), so it never persists
// in a returned/pushed document.
// in real Docmost prose - so the marker regex cannot mistake any body text
// (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
// transient: the placeholder is inserted here and replaced by a
// footnoteReference node below; it never persists in a returned document.
const ph = `\u0000FN${i}\u0000`;
// insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh
// the `top` / `notesList` references that point into it.
// insertMarkerAfter returns a NEW cloned doc; reassign `working`.
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
beforeBlock: notesIdx,
});
if (!r.inserted) return;
working = r.doc;
noteByPh.set(ph, noteItem(mdToInlineNodes(c.content)));
noteInlineByPh.set(ph, mdToInlineNodes(c.content));
consumed.push(c.id);
});
// Re-resolve references into the (possibly re-cloned) working doc.
const top2: any[] = Array.isArray(working.content) ? working.content : [];
const notesList2 = top2
.slice(notesIdx)
.find((n) => isObject(n) && n.type === "orderedList");
const notesIdx2 = top2.findIndex(
(n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading,
);
const oldListIndex = top2.findIndex(
(n) => isObject(n) && n.type === "orderedList",
);
const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
if (!notesList2) {
throw new Error("notes orderedList not found");
}
const oldNotes: any[] = Array.isArray(notesList2.content)
// Inline content of each existing note (listItem -> paragraph -> inline).
const oldNoteInline = (Array.isArray(notesList2.content)
? notesList2.content
: [];
const newNotes: any[] = [];
let seq = 0;
// Match either an existing "[N]" marker or a NUL-delimited "\u0000FN<i>\u0000"
// placeholder, in reading order across the body (blocks before the notes heading).
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
// Same range regex setCalloutRange uses to detect the disclaimer callout's
// "[1]…[K]" range; used here to decide whether a top-level callout is the
// disclaimer (skip) or an ordinary callout (renumber normally).
: []
).map((item: any) => {
const para =
isObject(item) && Array.isArray(item.content)
? item.content.find((c: any) => isObject(c) && c.type === "paragraph")
: null;
return para && Array.isArray(para.content) ? para.content : [];
});
// Walk the body in reading order, turning each "[N]" / placeholder marker into
// a real footnoteReference node and collecting its definition inline content.
const definitions: any[] = [];
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
for (let i = 0; i < notesIdx; i++) {
// Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote
// marker and is synced separately by setCalloutRange. Renumbering it here
// would consume note slots and corrupt the sequence. Other top-level
// callouts may carry legitimate "[N]" body markers and are renumbered.
// Recursively visit inline arrays inside a block (paragraph, heading, callout
// child paragraphs, table cells, ...), preserving document reading order.
const visitInlineArrays = (container: any): void => {
if (!isObject(container) || !Array.isArray(container.content)) return;
const hasText = container.content.some(
(n: any) => isObject(n) && n.type === "text",
);
if (hasText) {
replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
const fnId = freshId();
if (oldNum != null) {
const inline = oldNoteInline[oldNum - 1];
// Every existing body marker MUST map to a real note. An out-of-range
// marker means the document is internally inconsistent; fail loudly.
if (inline === undefined) {
throw new Error(
`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`,
);
}
definitions.push(footnoteDefinition(fnId, inline));
} else {
const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
definitions.push(footnoteDefinition(fnId, inline));
}
return fnId;
});
} else {
for (const child of container.content) visitInlineArrays(child);
}
};
const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
for (let i = 0; i < notesBoundary; i++) {
// Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
// marker and is synced separately by setCalloutRange.
if (
isObject(top2[i]) &&
top2[i].type === "callout" &&
@@ -443,35 +540,22 @@ export function commentsToFootnotes(
) {
continue;
}
walk(top2[i], (node) => {
if (node.type !== "text" || typeof node.text !== "string") return;
node.text = node.text.replace(re, (_m: string, oldNum: string, phIdx: string) => {
if (oldNum != null) {
const note = oldNotes[Number(oldNum) - 1];
// Every existing body marker MUST map to a real note. An out-of-range
// marker means the document is internally inconsistent; fail loudly
// rather than silently dropping the note and desyncing the callout.
if (note === undefined) {
throw new Error(
`footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`,
);
}
newNotes.push(note);
} else {
newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`));
}
return `[${++seq}]`;
});
});
visitInlineArrays(top2[i]);
}
// Reorder the notes list IN PLACE on `working` first, THEN sync the callout
// range. setCalloutRange clones `working`, so the reordered notes (mutated
// before the clone) are carried into its result automatically. No null-filter
// here: marker count and note count must stay exactly equal (the out-of-range
// guard above guarantees no undefined entry is ever pushed).
notesList2.content = newNotes;
const synced = setCalloutRange(working, notesList2.content.length);
// Replace the old orderedList with a real footnotesList of the collected
// definitions (reading order). If there are no definitions, drop the list.
if (definitions.length > 0) {
top2[oldListIndex] = {
type: "footnotesList",
content: definitions,
};
} else {
top2.splice(oldListIndex, 1);
}
// Sync the disclaimer callout range to the new note count.
const synced = setCalloutRange(working, definitions.length);
return { doc: synced.doc, consumed };
}

View File

@@ -0,0 +1,120 @@
import { test } from "node:test";
import assert from "node:assert/strict";
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
/** Recursively collect every node of `type`. */
function findAll(node, type, acc = []) {
if (!node || typeof node !== "object") return acc;
if (node.type === type) acc.push(node);
if (Array.isArray(node.content)) {
for (const c of node.content) findAll(c, type, acc);
}
return acc;
}
const footnoteDoc = {
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "Water" },
{ type: "footnoteReference", attrs: { id: "fn1" } },
{ type: "text", text: " and clay" },
{ type: "footnoteReference", attrs: { id: "fn2" } },
{ type: "text", text: "." },
],
},
{
type: "footnotesList",
content: [
{
type: "footnoteDefinition",
attrs: { id: "fn1" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "First note." }] },
],
},
{
type: "footnoteDefinition",
attrs: { id: "fn2" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "Second note." }] },
],
},
],
},
],
};
test("JSON -> Markdown emits pandoc footnote syntax", () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
assert.match(md, /\[\^fn1\]/);
assert.match(md, /\[\^fn2\]/);
assert.match(md, /\[\^fn1\]: First note\./);
assert.match(md, /\[\^fn2\]: Second note\./);
});
test("Markdown -> JSON rebuilds footnote nodes", async () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
const json = await markdownToProseMirror(md);
const refs = findAll(json, "footnoteReference");
const list = findAll(json, "footnotesList");
const defs = findAll(json, "footnoteDefinition");
assert.equal(refs.length, 2);
assert.deepEqual(
refs.map((r) => r.attrs.id),
["fn1", "fn2"],
);
assert.equal(list.length, 1);
assert.equal(defs.length, 2);
assert.deepEqual(
defs.map((d) => d.attrs.id),
["fn1", "fn2"],
);
});
test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
const json = await markdownToProseMirror(md);
const md2 = convertProseMirrorToMarkdown(json);
// The second markdown serialization carries the same markers + definitions.
assert.match(md2, /\[\^fn1\]/);
assert.match(md2, /\[\^fn2\]/);
assert.match(md2, /\[\^fn1\]: First note\./);
assert.match(md2, /\[\^fn2\]: Second note\./);
});
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
// Markdown that DOCUMENTS footnote syntax inside a code fence. The example
// definition line must be preserved verbatim inside the code block and not
// pulled out into a real footnotesList / footnoteDefinition.
const md = [
"Intro text.",
"",
"```markdown",
"Body[^demo]",
"",
"[^demo]: example definition",
"```",
"",
"Outro.",
].join("\n");
const json = await markdownToProseMirror(md);
// No real footnote nodes were extracted from the code block.
assert.equal(findAll(json, "footnotesList").length, 0);
assert.equal(findAll(json, "footnoteDefinition").length, 0);
// The example definition line survives somewhere in the code block text.
const codeBlocks = findAll(json, "codeBlock");
assert.ok(codeBlocks.length >= 1, "code block present");
const codeText = JSON.stringify(json);
assert.match(codeText, /\[\^demo\]: example definition/);
});

View File

@@ -34,6 +34,18 @@ const li = (text) => ({
const doc = (...children) => ({ type: "doc", content: children });
const snapshot = (v) => JSON.parse(JSON.stringify(v));
// Collect every footnoteReference id under a node, in reading order.
const collectRefIds = (node, acc = []) => {
if (!node || typeof node !== "object") return acc;
if (node.type === "footnoteReference") acc.push(node.attrs?.id);
if (Array.isArray(node.content)) {
for (const c of node.content) collectRefIds(c, acc);
}
return acc;
};
// Plain text of a footnoteDefinition.
const defText = (def) => blockText(def);
// ---------------------------------------------------------------------------
// blockText / walk / getList
// ---------------------------------------------------------------------------
@@ -173,21 +185,30 @@ test("commentsToFootnotes anchors comments and renumbers by position", () => {
const { doc: out, consumed } = commentsToFootnotes(d, comments);
assert.deepEqual(consumed.sort(), ["cA", "cB"]);
// Markers in reading order: p1 "apple"->[1], p2 existing->[2], p3 "banana"->[3]
assert.match(blockText(out.content[1]), /\[1\]/);
assert.match(blockText(out.content[2]), /\[2\]/);
assert.match(blockText(out.content[3]), /\[3\]/);
// Real footnoteReference nodes were inserted at p1 (apple), p2 (existing),
// p3 (banana), in reading order — the old `[N]` text markers are gone.
const refIds = collectRefIds(out);
assert.equal(refIds.length, 3);
// Body paragraphs p1..p3 no longer carry literal [N] text markers.
assert.doesNotMatch(blockText(out.content[1]), /\[\d+\]/);
assert.doesNotMatch(blockText(out.content[2]), /\[\d+\]/);
assert.doesNotMatch(blockText(out.content[3]), /\[\d+\]/);
// No stray placeholders remain.
const allText = blockText(out);
assert.doesNotMatch(allText, / F\d+ /);
// No stray NUL placeholders remain.
assert.doesNotMatch(blockText(out), /\u0000/);
// Notes list reordered to [apple, existing, banana] (reading order).
const list = out.content.find((n) => n.type === "orderedList");
// The bottom footnotesList holds the definitions in reading order, each keyed
// by the matching reference id.
const list = out.content.find((n) => n.type === "footnotesList");
assert.ok(list, "footnotesList present");
assert.equal(list.content.length, 3);
assert.equal(blockText(list.content[0]), "apple note");
assert.equal(blockText(list.content[1]), "existing note one");
assert.equal(blockText(list.content[2]), "banana note");
assert.deepEqual(
list.content.map((d) => d.attrs.id),
refIds,
);
assert.equal(defText(list.content[0]), "apple note");
assert.equal(defText(list.content[1]), "existing note one");
assert.equal(defText(list.content[2]), "banana note");
// Callout range synced to 3 notes.
assert.match(blockText(out.content[0]), /\[1\]…\[3\]/);
@@ -224,15 +245,16 @@ test("commentsToFootnotes leaves literal 'F1'/'FN2'/'F12' body text untouched",
// The literal "F1"/"FN2"/"F12" prose is preserved verbatim (no bogus
// footnotes, no eaten spaces around them).
assert.match(bodyText, /Press F1 for help, model FN2 and F12 for tools/);
// Exactly one real footnote marker was produced, at the anchored word.
const markerCount = (bodyText.match(/\[\d+\]/g) || []).length;
assert.equal(markerCount, 1);
assert.match(bodyText, /apple \[1\]/);
// Exactly one real footnoteReference node was produced, at the anchored word.
const refIds = collectRefIds(out);
assert.equal(refIds.length, 1);
// Exactly one note in the list — "F1"/"FN2"/"F12" did not spawn extra notes.
const list = out.content.find((n) => n.type === "orderedList");
const list = out.content.find((n) => n.type === "footnotesList");
assert.ok(list, "footnotesList present");
assert.equal(list.content.length, 1);
assert.equal(blockText(list.content[0]), "apple note");
assert.equal(list.content[0].attrs.id, refIds[0]);
assert.equal(defText(list.content[0]), "apple note");
// No stray placeholder sentinel remains anywhere: the NUL-delimited sentinel
// is fully consumed by the renumber pass, so no raw NUL control char persists
@@ -287,17 +309,25 @@ test("commentsToFootnotes renumbers body callouts but skips the disclaimer range
assert.deepEqual(consumed, []);
// The disclaimer's "[1]…[K]" range is NOT treated as body markers: it stays
// a range and is synced to the note count (2), not renumbered into [1],[2].
// a range and is synced to the note count (2), not turned into references.
assert.match(blockText(out.content[0]), /\[1\]…\[2\]/);
// The body callout's [1] is renumbered as a real reading-order marker.
assert.match(blockText(out.content[1]), /noted \[1\] above/);
// The following paragraph's [2] keeps reading order.
assert.match(blockText(out.content[2]), /with \[2\] too/);
// The body callout's [1] and the paragraph's [2] became footnoteReference
// nodes in reading order (the literal text markers are gone).
const refIds = collectRefIds(out);
assert.equal(refIds.length, 2);
assert.match(blockText(out.content[1]), /noted +above/); // [1] -> node, no text
assert.match(blockText(out.content[2]), /with +too/); // [2] -> node, no text
// Notes list still has the two original notes in order.
const list = out.content.find((n) => n.type === "orderedList");
// The footnotesList holds the two original notes in reading order, keyed to
// the new reference ids.
const list = out.content.find((n) => n.type === "footnotesList");
assert.ok(list, "footnotesList present");
assert.equal(list.content.length, 2);
assert.equal(blockText(list.content[0]), "first note");
assert.equal(blockText(list.content[1]), "second note");
assert.deepEqual(
list.content.map((d) => d.attrs.id),
refIds,
);
assert.equal(defText(list.content[0]), "first note");
assert.equal(defText(list.content[1]), "second note");
});