feat(editor): footnotes (reference + definitions model)

Adds footnotes: a superscript marker in the text linked to an editable
definition in a Footnotes section at the end of the page, with auto-numbering
and a read-only hover popover. Chose the reference+definitions model (3 plain
nodes) over an inline atom with a sub-editor specifically for collaboration
safety.

editor-ext (packages/editor-ext/src/lib/footnote/):
- footnoteReference (inline atom, id), footnotesList (block, last child),
  footnoteDefinition (paragraph+, id). renderHTML emits sup[data-footnote-ref]
  / section[data-footnotes] / div[data-footnote-def]; parse-rule priority makes
  the empty reference win over the Superscript mark (else it is dropped on the
  server save).
- numbering: a decoration-only plugin (pure function of doc order) -> every
  client computes identical numbers, no document mutation, Yjs-safe.
- sync plugin: single-pass, always SYNC_META-tagged and skipping remote txns
  (terminates, no loop), idempotent; canonicalizes to one trailing footnotesList
  (merging duplicates), creates missing definitions, drops orphans, and
  coexists with TrailingNode. Disabled in read-only.
- commands setFootnote (one tx: reference + definition at the matching index +
  focus) / removeFootnote (cascade, one undo) / scrollTo*. slash /footnote.

client: superscript NodeView + floating-ui read-only popover; bottom-list and
definition NodeViews; registered in mainExtensions.

server: the three nodes registered in tiptapExtensions so collab/save/export
keep them. Round-trip regression spec guards the Superscript parse-priority.

markdown: turndown/marked round-trip to pandoc/GFM [^id] (+ a code-fence guard
so footnote-like lines inside code blocks are not extracted).

MCP mirror: schema + markdown-converter + commentsToFootnotes rewritten to real
footnote nodes + diff marker counting; NUL sentinels written as \u0000 escapes.

v2 follow-ups (per plan): definition reordering on reference move, id-collision
regeneration on paste, multiple references to one footnote.

Implements docs/footnotes-plan.md (variant B).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-20 11:39:00 +03:00
parent c8af637654
commit 4d17befb0d
38 changed files with 2906 additions and 151 deletions

View File

@@ -33,4 +33,5 @@ export * from "./lib/status";
export * from "./lib/pdf";
export * from "./lib/page-break";
export * from "./lib/resizable-nodeview";
export * from "./lib/footnote";

View File

@@ -0,0 +1,72 @@
import { mergeAttributes, Node } from "@tiptap/core";
import { ReactNodeViewRenderer } from "@tiptap/react";
import { FOOTNOTE_DEFINITION_NAME } from "./footnote-util";
export interface FootnoteDefinitionOptions {
HTMLAttributes: Record<string, any>;
view: any;
}
/**
* A single footnote definition: an editable block (paragraphs only, no nested
* footnotes) keyed by `id` to its reference. Lives only inside `footnotesList`.
*/
export const FootnoteDefinition = Node.create<FootnoteDefinitionOptions>({
name: FOOTNOTE_DEFINITION_NAME,
// paragraph+ keeps definitions simple. Note this does NOT block nested
// footnote references on its own: a footnoteReference is inline and the
// paragraphs here accept inline content, so the schema would permit one.
// Nested references are instead prevented by the setFootnote command and the
// sync plugin (which refuse to create/keep a reference inside a definition).
content: "paragraph+",
defining: true,
isolating: true,
selectable: false,
addOptions() {
return {
HTMLAttributes: {},
view: null,
};
},
addAttributes() {
return {
id: {
default: null,
parseHTML: (element) => element.getAttribute("data-id"),
renderHTML: (attributes) => {
if (!attributes.id) return {};
return { "data-id": attributes.id };
},
},
};
},
parseHTML() {
return [
{
tag: "div[data-footnote-def]",
},
];
},
renderHTML({ HTMLAttributes }) {
return [
"div",
mergeAttributes(
{ "data-footnote-def": "", class: "footnote-def" },
this.options.HTMLAttributes,
HTMLAttributes,
),
0,
];
},
addNodeView() {
if (!this.options.view) return null;
this.editor.isInitialized = true;
return ReactNodeViewRenderer(this.options.view);
},
});

View File

@@ -0,0 +1,56 @@
import { describe, it, expect } from "vitest";
import { htmlToMarkdown } from "../markdown/utils/turndown.utils";
import { markdownToHtml } from "../markdown/utils/marked.utils";
// HTML the editor-ext nodes render (sup[data-footnote-ref], section/div).
const HTML =
`<p>Water<sup data-footnote-ref data-id="fn1"></sup> and clay<sup data-footnote-ref data-id="fn2"></sup>.</p>` +
`<section data-footnotes>` +
`<div data-footnote-def data-id="fn1"><p>First note.</p></div>` +
`<div data-footnote-def data-id="fn2"><p>Second note.</p></div>` +
`</section>`;
describe("footnote markdown round-trip", () => {
it("HTML -> Markdown produces pandoc footnote syntax", () => {
const md = htmlToMarkdown(HTML);
expect(md).toContain("[^fn1]");
expect(md).toContain("[^fn2]");
expect(md).toContain("[^fn1]: First note.");
expect(md).toContain("[^fn2]: Second note.");
});
it("Markdown -> HTML rebuilds the footnote nodes' HTML", async () => {
const md = htmlToMarkdown(HTML);
const html = await markdownToHtml(md);
expect(html).toContain('data-footnote-ref data-id="fn1"');
expect(html).toContain('data-footnote-ref data-id="fn2"');
expect(html).toContain("data-footnotes");
expect(html).toContain('data-footnote-def data-id="fn1"');
expect(html).toContain("First note.");
expect(html).toContain("Second note.");
});
it("preserves a [^id]: line shown inside a fenced code block (not a definition)", async () => {
// A document that DOCUMENTS footnote syntax inside a code fence. The
// `[^demo]: ...` line is example text, not a real definition, and must
// survive the Markdown -> HTML conversion verbatim.
const md = [
"Here is how footnotes look:",
"",
"```markdown",
"Some text[^demo]",
"",
"[^demo]: this is the definition",
"```",
"",
"End of doc.",
].join("\n");
const html = await markdownToHtml(md);
// The example definition line is kept inside the rendered code block.
expect(html).toContain("[^demo]: this is the definition");
// It did NOT get pulled out into a real footnotes section.
expect(html).not.toContain("data-footnotes");
expect(html).not.toContain("data-footnote-def");
});
});

View File

@@ -0,0 +1,75 @@
import { Plugin, PluginKey } from "@tiptap/pm/state";
import { Decoration, DecorationSet } from "@tiptap/pm/view";
import { Node as ProseMirrorNode } from "@tiptap/pm/model";
import {
FOOTNOTE_DEFINITION_NAME,
FOOTNOTE_REFERENCE_NAME,
computeFootnoteNumbers,
} from "./footnote-util";
export const footnoteNumberingPluginKey = new PluginKey("footnoteNumbering");
/**
* Build the decoration set for footnote numbers. Pure function of the document:
* walk references in document order, assign 1-based numbers, then attach a
* node decoration (carrying the number via a CSS variable + data attribute) to
* every reference and to every matching definition. Because it is deterministic
* from the document alone, all collaborating clients compute identical numbers
* with no document mutation.
*/
export function buildFootnoteDecorations(doc: ProseMirrorNode): DecorationSet {
const numbers = computeFootnoteNumbers(doc);
const decorations: Decoration[] = [];
doc.descendants((node, pos) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
const num = numbers.get(node.attrs.id);
if (num != null) {
decorations.push(
Decoration.node(pos, pos + node.nodeSize, {
"data-footnote-number": String(num),
style: `--footnote-number: "${num}";`,
}),
);
}
}
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
const num = numbers.get(node.attrs.id);
if (num != null) {
decorations.push(
Decoration.node(pos, pos + node.nodeSize, {
"data-footnote-number": String(num),
style: `--footnote-number: "${num}";`,
}),
);
}
}
});
return DecorationSet.create(doc, decorations);
}
/**
* ProseMirror plugin that renders footnote numbers as decorations. It never
* mutates the document (safe in read-only / share and in collaboration) — it
* only recomputes decorations from the current doc on each transaction.
*/
export function footnoteNumberingPlugin(): Plugin {
return new Plugin({
key: footnoteNumberingPluginKey,
state: {
init(_, { doc }) {
return buildFootnoteDecorations(doc);
},
apply(tr, old) {
if (!tr.docChanged) return old;
return buildFootnoteDecorations(tr.doc);
},
},
props: {
decorations(state) {
return this.getState(state);
},
},
});
}

View File

@@ -0,0 +1,328 @@
import { mergeAttributes, Node } from "@tiptap/core";
import { TextSelection, Transaction } from "@tiptap/pm/state";
import { ReactNodeViewRenderer } from "@tiptap/react";
import {
FOOTNOTE_DEFINITION_NAME,
FOOTNOTE_REFERENCE_NAME,
FOOTNOTES_LIST_NAME,
generateFootnoteId,
} from "./footnote-util";
import { footnoteNumberingPlugin } from "./footnote-numbering";
import { footnoteSyncPlugin } from "./footnote-sync";
export interface FootnoteReferenceOptions {
HTMLAttributes: Record<string, any>;
view: any;
/**
* Optional predicate identifying remote/collaboration transactions so the
* sync plugin skips them (orphan cleanup must run only on local changes).
*/
isRemoteTransaction?: (tr: Transaction) => boolean;
/**
* When false, the footnote sync/integrity plugin is fully disabled — it never
* appends a transaction. Numbering decorations stay active. Set this in
* read-only / share editors so a viewer's doc is decorated (numbered) but
* never mutated (e.g. by a programmatic setContent). Defaults to true.
*/
enableSync?: boolean;
}
declare module "@tiptap/core" {
interface Commands<ReturnType> {
footnote: {
/**
* Insert a footnote reference at the cursor and create the matching
* (empty) definition in the bottom footnotes list, in one transaction.
*/
setFootnote: () => ReturnType;
/**
* Remove a footnote reference and cascade-delete its definition (one
* transaction so a single undo restores both).
*/
removeFootnote: (id: string) => ReturnType;
/** Scroll to (and focus) a footnote definition by id. */
scrollToFootnote: (id: string) => ReturnType;
/** Scroll to (and select) a footnote reference by id. */
scrollToReference: (id: string) => ReturnType;
};
}
}
/**
* Inline atom that marks a footnote reference in the body text. It holds only
* an `id` linking it to its `footnoteDefinition`; the visible number is NOT
* stored — it is rendered by the numbering plugin as a decoration (see
* footnote-numbering.ts). Modeled on mention.ts (inline atom).
*
* The reference is forbidden inside code blocks and inside footnote definitions
* (no nested footnotes); those restrictions are enforced by the `setFootnote`
* command and the sync plugin rather than by schema content expressions, since
* an inline group node cannot express "not inside X" declaratively.
*/
export const FootnoteReference = Node.create<FootnoteReferenceOptions>({
name: FOOTNOTE_REFERENCE_NAME,
// Higher than the default (100) so its parse rule is considered before the
// Superscript mark's <sup> rule.
priority: 101,
group: "inline",
inline: true,
atom: true,
selectable: true,
draggable: false,
addOptions() {
return {
HTMLAttributes: {},
view: null,
isRemoteTransaction: undefined,
enableSync: true,
};
},
addProseMirrorPlugins() {
const plugins = [footnoteNumberingPlugin()];
// Numbering always runs (decoration-only). The sync/integrity plugin is
// skipped entirely when sync is disabled (read-only / share) so the viewer's
// doc is never mutated.
if (this.options.enableSync !== false) {
plugins.push(footnoteSyncPlugin(this.options.isRemoteTransaction));
}
return plugins;
},
addAttributes() {
return {
id: {
default: null,
parseHTML: (element) => element.getAttribute("data-id"),
renderHTML: (attributes) => {
if (!attributes.id) return {};
return { "data-id": attributes.id };
},
},
};
},
parseHTML() {
return [
{
// High priority so the Superscript mark (which also matches <sup>) does
// not claim a footnote reference and drop it as empty content.
tag: "sup[data-footnote-ref]",
priority: 100,
},
];
},
renderHTML({ HTMLAttributes }) {
return [
"sup",
mergeAttributes(
{ "data-footnote-ref": "", class: "footnote-ref" },
this.options.HTMLAttributes,
HTMLAttributes,
),
];
},
// Plain-text representation (used by generateText / markdown text fallbacks).
renderText({ node }) {
return `[^${node.attrs.id ?? ""}]`;
},
addNodeView() {
if (!this.options.view) return null;
// Force the react node view to render immediately using flush sync.
this.editor.isInitialized = true;
return ReactNodeViewRenderer(this.options.view);
},
addCommands() {
return {
setFootnote:
() =>
({ state, tr, dispatch, editor }) => {
const { schema, selection } = state;
const refType = schema.nodes[FOOTNOTE_REFERENCE_NAME];
const listType = schema.nodes[FOOTNOTES_LIST_NAME];
const defType = schema.nodes[FOOTNOTE_DEFINITION_NAME];
if (!refType || !listType || !defType) return false;
const { $from } = selection;
// Forbid references inside code blocks and inside footnote definitions
// (no nested footnotes).
for (let depth = $from.depth; depth > 0; depth--) {
const node = $from.node(depth);
if (
node.type.spec.code ||
node.type.name === FOOTNOTE_DEFINITION_NAME ||
node.type.name === FOOTNOTES_LIST_NAME
) {
return false;
}
}
// Make sure the parent accepts an inline atom here.
const insertPos = selection.from;
if (!$from.parent.type.spec.content?.includes("inline") &&
!$from.parent.isTextblock) {
return false;
}
const id = generateFootnoteId();
// 1) Count references that occur strictly before the insertion point;
// the new definition goes at that index in the bottom list so the
// list order matches reference order.
let refsBefore = 0;
state.doc.nodesBetween(0, insertPos, (node) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) refsBefore++;
});
// 2) Insert the reference at the cursor.
tr.insert(insertPos, refType.create({ id }));
// 3) Locate (or create) the footnotes list, then insert the new
// definition at index `refsBefore`.
const emptyParagraph = schema.nodes.paragraph.create();
const definition = defType.create({ id }, emptyParagraph);
// Find existing list (always the last top-level child if present).
let listPos: number | null = null;
let listNode: any = null;
tr.doc.forEach((child, offset) => {
if (child.type.name === FOOTNOTES_LIST_NAME) {
listPos = offset;
listNode = child;
}
});
let defInsidePos: number | null = null;
if (listNode == null) {
// Create a new list at the very end of the document.
const list = listType.create(null, definition);
const end = tr.doc.content.size;
tr.insert(end, list);
// Cursor target: inside the new definition's first paragraph.
// end -> list open, +1 definition open, +1 paragraph open.
defInsidePos = end + 3;
} else {
// Insert at the right index within the existing list.
const listStart = listPos! + 1; // position of the first definition
let pos = listStart;
let index = 0;
listNode.forEach((defChild: any, defOffset: number) => {
if (index < refsBefore) {
pos = listStart + defOffset + defChild.nodeSize;
index++;
}
});
tr.insert(pos, definition);
defInsidePos = pos + 2; // +1 enter definition, +1 enter paragraph
}
if (dispatch) {
// Move the cursor into the new definition's paragraph so the user
// can immediately type the footnote text.
try {
const resolved = tr.doc.resolve(
Math.min(defInsidePos!, tr.doc.content.size),
);
tr.setSelection(TextSelection.near(resolved));
} catch {
// Selection placement is best-effort; ignore failures.
}
tr.scrollIntoView();
dispatch(tr);
}
return true;
},
removeFootnote:
(id: string) =>
({ state, tr, dispatch }) => {
if (!id) return false;
// Collect: reference range(s), the definition range, and the list.
const refRanges: Array<{ from: number; to: number }> = [];
let defRange: { from: number; to: number } | null = null;
let listInfo: { pos: number; size: number; count: number } | null =
null;
state.doc.descendants((node, pos) => {
if (
node.type.name === FOOTNOTE_REFERENCE_NAME &&
node.attrs.id === id
) {
refRanges.push({ from: pos, to: pos + node.nodeSize });
}
if (
node.type.name === FOOTNOTE_DEFINITION_NAME &&
node.attrs.id === id
) {
defRange = { from: pos, to: pos + node.nodeSize };
}
if (node.type.name === FOOTNOTES_LIST_NAME) {
listInfo = {
pos,
size: node.nodeSize,
count: node.childCount,
};
}
});
if (refRanges.length === 0 && !defRange) return false;
// Build the list of ranges to delete. If removing this definition
// would empty the list (it is the list's only child), delete the
// entire list instead — an empty footnotesList is invalid schema and
// a leftover empty list would be ugly.
const ranges: Array<{ from: number; to: number }> = [...refRanges];
if (defRange) {
if (listInfo && (listInfo as any).count <= 1) {
const li = listInfo as { pos: number; size: number };
ranges.push({ from: li.pos, to: li.pos + li.size });
} else {
ranges.push(defRange);
}
}
// Delete from the end so earlier positions stay valid.
ranges
.sort((a, b) => b.from - a.from)
.forEach(({ from, to }) => tr.delete(from, to));
if (dispatch) dispatch(tr);
return true;
},
scrollToFootnote:
(id: string) =>
({ editor }) => {
if (!id) return false;
const dom = editor.view.dom.querySelector(
`[data-footnote-def][data-id="${id}"]`,
) as HTMLElement | null;
if (!dom) return false;
dom.scrollIntoView({ behavior: "smooth", block: "center" });
return true;
},
scrollToReference:
(id: string) =>
({ editor }) => {
if (!id) return false;
const dom = editor.view.dom.querySelector(
`sup[data-footnote-ref][data-id="${id}"]`,
) as HTMLElement | null;
if (!dom) return false;
dom.scrollIntoView({ behavior: "smooth", block: "center" });
return true;
},
};
},
});

View File

@@ -0,0 +1,197 @@
import { Plugin, PluginKey, Transaction } from "@tiptap/pm/state";
import { Node as ProseMirrorNode, Fragment } from "@tiptap/pm/model";
import {
FOOTNOTE_DEFINITION_NAME,
FOOTNOTE_REFERENCE_NAME,
FOOTNOTES_LIST_NAME,
} from "./footnote-util";
export const footnoteSyncPluginKey = new PluginKey("footnoteSync");
const SYNC_META = "footnoteSyncApplied";
interface FootnoteScan {
/** Reference ids in document order, first occurrence only, de-duplicated. */
referenceIds: string[];
/** definition id -> node (last occurrence wins, matching scan order). */
definitions: Map<string, ProseMirrorNode>;
/** Every top-level footnotesList node, in document order. */
lists: Array<{ pos: number; node: ProseMirrorNode }>;
}
function scan(doc: ProseMirrorNode): FootnoteScan {
const referenceIds: string[] = [];
const seenRefs = new Set<string>();
const definitions = new Map<string, ProseMirrorNode>();
const lists: Array<{ pos: number; node: ProseMirrorNode }> = [];
doc.descendants((node, pos) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
const id = node.attrs.id;
if (id && !seenRefs.has(id)) {
seenRefs.add(id);
referenceIds.push(id);
}
}
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
const id = node.attrs.id;
if (id) definitions.set(id, node);
}
if (node.type.name === FOOTNOTES_LIST_NAME) {
lists.push({ pos, node });
}
});
return { referenceIds, definitions, lists };
}
/**
* Idempotent integrity pass for footnotes. Runs only on LOCAL document changes
* (skips remote/collaboration steps and — crucially — its own appended meta) so
* the plugin can never re-trigger itself, guaranteeing termination.
*
* Everything is computed against the CURRENT document in a SINGLE invocation and
* emitted as AT MOST ONE transaction, always tagged with SYNC_META (and
* addToHistory:false). The strategy is "rebuild the canonical footnotes section
* from the desired end-state" rather than running several self-triggering
* passes:
*
* 1. Collect every footnote reference id in document order (the source of
* truth for which definitions must exist and in what order).
* 2. Compute the desired list of definitions: one per referenced id, in
* reference order, reusing the existing definition node when present or
* creating an empty one when missing. Orphan definitions (no matching
* reference) are dropped.
* 3. Compare against the actual footnotesList state:
* - no references -> there must be NO list (remove any);
* - references present -> there must be exactly ONE list, holding
* exactly the desired definitions, and it
* must sit after all real body content.
* 4. If the document already matches the desired end-state, return null (no
* transaction) — this idempotence is what stops oscillation.
*
* Placement note: the list is considered correctly placed when nothing but
* EMPTY paragraphs follow it. This is deliberate so the plugin coexists with a
* trailing-node plugin (which keeps an empty paragraph at the very end of the
* doc): the footnote list does not need to be the literal last child, only the
* last block of meaningful content. Without this, the two plugins would
* ping-pong forever (list moved to end -> trailing paragraph appended -> list
* no longer last -> moved again ...).
*
* Paste id-collision regeneration is left to the paste handler / v2; the common
* cases (orphans, missing definitions, multiple/empty/misplaced lists) are
* covered here.
*/
export function footnoteSyncPlugin(
isRemoteTransaction?: (tr: Transaction) => boolean,
): Plugin {
return new Plugin({
key: footnoteSyncPluginKey,
appendTransaction(transactions, _oldState, newState) {
// Only react to document changes.
if (!transactions.some((t) => t.docChanged)) return null;
// Skip our OWN appended transaction. This is the guard that makes the
// plugin loop-safe: the transaction we emit carries SYNC_META, so when
// ProseMirror feeds it back to appendTransaction we bail out immediately
// and never produce a follow-up. (Termination invariant.)
if (transactions.some((t) => t.getMeta(SYNC_META))) return null;
// Skip remote/collab steps (orphan cleanup must run only on local edits).
if (
isRemoteTransaction &&
transactions.some((t) => isRemoteTransaction(t))
) {
return null;
}
const { doc, schema } = newState;
const defType = schema.nodes[FOOTNOTE_DEFINITION_NAME];
const listType = schema.nodes[FOOTNOTES_LIST_NAME];
const paragraphType = schema.nodes.paragraph;
if (!defType || !listType || !paragraphType) return null;
const info = scan(doc);
// 1) Desired definitions: one per referenced id, in reference order,
// reusing existing definition nodes (preserving their content) and
// synthesizing empty ones for references that lack a definition.
const desiredDefs: ProseMirrorNode[] = info.referenceIds.map((id) => {
const existing = info.definitions.get(id);
if (existing) return existing;
return defType.create({ id }, paragraphType.create());
});
// 2) Determine whether the document already matches the desired end-state.
const hasRefs = desiredDefs.length > 0;
// Is the existing single list already exactly the desired list, placed
// after all meaningful content (nothing but empty paragraphs after it)?
const isEmptyParagraph = (node: ProseMirrorNode) =>
node.type === paragraphType && node.content.size === 0;
let alreadyCanonical = false;
if (!hasRefs) {
// Canonical when there is no footnotesList at all.
alreadyCanonical = info.lists.length === 0;
} else if (info.lists.length === 1) {
const { pos, node } = info.lists[0];
// Same definitions, same order, same identity (no rewrite needed)?
const sameDefs =
node.childCount === desiredDefs.length &&
desiredDefs.every((d, i) => node.child(i) === d);
// Placement: only empty paragraphs may follow the list.
const listEnd = pos + node.nodeSize;
let onlyEmptyParasAfter = true;
doc.nodesBetween(listEnd, doc.content.size, (child, childPos) => {
// Only inspect top-level children that start at/after the list end.
if (childPos >= listEnd && child !== node) {
if (!isEmptyParagraph(child)) onlyEmptyParasAfter = false;
}
return false; // do not descend
});
alreadyCanonical = sameDefs && onlyEmptyParasAfter;
}
if (alreadyCanonical) return null;
// 3) Rebuild: produce exactly ONE transaction that reaches the end-state.
const tr = newState.tr;
// Delete every existing footnotesList (from the end so earlier positions
// stay valid while we mutate).
[...info.lists]
.sort((a, b) => b.pos - a.pos)
.forEach(({ pos, node }) => {
tr.delete(pos, pos + node.nodeSize);
});
if (hasRefs) {
// Insert a single canonical list holding the desired definitions. Place
// it after the last meaningful (non-empty-paragraph) top-level block, so
// it lands before any trailing empty paragraph the trailing-node plugin
// maintains. This keeps both plugins idempotent.
const mappedDoc = tr.doc;
let insertPos = mappedDoc.content.size;
for (let i = mappedDoc.childCount - 1; i >= 0; i--) {
const child = mappedDoc.child(i);
if (isEmptyParagraph(child)) {
// skip trailing empty paragraphs; insert before them
insertPos -= child.nodeSize;
} else {
break;
}
}
const merged = listType.create(null, Fragment.fromArray(desiredDefs));
tr.insert(insertPos, merged);
}
if (!tr.docChanged) return null;
tr.setMeta(SYNC_META, true);
tr.setMeta("addToHistory", false);
return tr;
},
});
}

View File

@@ -0,0 +1,77 @@
import { Node as ProseMirrorNode } from "@tiptap/pm/model";
/**
* Node type names for the footnote feature. Centralized so every part of the
* feature (nodes, plugins, commands) references the same string.
*/
export const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
export const FOOTNOTES_LIST_NAME = "footnotesList";
export const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
/**
* Generate a uuidv7-style id (time-ordered). Implemented locally so editor-ext
* does not need a runtime dependency on the `uuid` package; matches the
* lexicographically-sortable layout uuidv7 produces.
*/
export function generateFootnoteId(): string {
const now = Date.now();
const timeHex = now.toString(16).padStart(12, "0");
const rand = (length: number) => {
let out = "";
for (let i = 0; i < length; i++) {
out += Math.floor(Math.random() * 16).toString(16);
}
return out;
};
// version 7 nibble, then variant (8..b) nibble.
const versioned = "7" + rand(3);
const variantNibble = (8 + Math.floor(Math.random() * 4)).toString(16);
const variant = variantNibble + rand(3);
return (
timeHex.slice(0, 8) +
"-" +
timeHex.slice(8, 12) +
"-" +
versioned +
"-" +
variant +
"-" +
rand(12)
);
}
/**
* Collect every `footnoteReference` id in document order. This is the single
* source of truth for numbering and ordering — a pure function of the document
* so every collaborating client computes the same result.
*/
export function collectReferenceIds(doc: ProseMirrorNode): string[] {
const ids: string[] = [];
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
const id = node.attrs.id;
if (id) ids.push(id);
}
});
return ids;
}
/**
* Build a map of `referenceId -> displayNumber` (1-based) from document order.
* Pure function — the basis for the numbering decorations and any test.
*/
export function computeFootnoteNumbers(
doc: ProseMirrorNode,
): Map<string, number> {
const numbers = new Map<string, number>();
let n = 0;
for (const id of collectReferenceIds(doc)) {
if (!numbers.has(id)) {
numbers.set(id, ++n);
}
}
return numbers;
}

View File

@@ -0,0 +1,536 @@
import { describe, it, expect } from "vitest";
import { Editor, Extension, getSchema } from "@tiptap/core";
import { Document } from "@tiptap/extension-document";
import { Paragraph } from "@tiptap/extension-paragraph";
import { Text } from "@tiptap/extension-text";
import { Superscript } from "@tiptap/extension-superscript";
import { Plugin, PluginKey } from "@tiptap/pm/state";
import { Node as PMNode } from "@tiptap/pm/model";
import { FootnoteReference } from "./footnote-reference";
import { FootnotesList } from "./footnotes-list";
import { FootnoteDefinition } from "./footnote-definition";
import { TrailingNode } from "../trailing-node";
import {
computeFootnoteNumbers,
collectReferenceIds,
FOOTNOTE_REFERENCE_NAME,
FOOTNOTES_LIST_NAME,
FOOTNOTE_DEFINITION_NAME,
} from "./footnote-util";
const extensions = [
Document,
Paragraph,
Text,
FootnoteReference,
FootnotesList,
FootnoteDefinition,
];
function makeEditor(content?: any) {
return new Editor({
extensions,
content: content ?? { type: "doc", content: [{ type: "paragraph" }] },
});
}
function countType(doc: PMNode, name: string): number {
let n = 0;
doc.descendants((node) => {
if (node.type.name === name) n++;
});
return n;
}
describe("footnote numbering (pure function)", () => {
it("numbers references in document order", () => {
const schema = getSchema(extensions);
const doc = PMNode.fromJSON(schema, {
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "a" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
{ type: "text", text: "b" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
],
},
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "x" },
content: [{ type: "paragraph" }],
},
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "y" },
content: [{ type: "paragraph" }],
},
],
},
],
});
expect(collectReferenceIds(doc)).toEqual(["x", "y"]);
const numbers = computeFootnoteNumbers(doc);
expect(numbers.get("x")).toBe(1);
expect(numbers.get("y")).toBe(2);
});
});
describe("setFootnote command", () => {
it("inserts a reference and a matching definition in the footnotes list", () => {
const editor = makeEditor({
type: "doc",
content: [
{ type: "paragraph", content: [{ type: "text", text: "Hello" }] },
],
});
// Cursor at end of the word.
editor.commands.setTextSelection(6);
const ok = editor.commands.setFootnote();
expect(ok).toBe(true);
const doc = editor.state.doc;
expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(1);
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1);
// The reference id and the definition id match.
let refId: string | null = null;
let defId: string | null = null;
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) refId = node.attrs.id;
if (node.type.name === FOOTNOTE_DEFINITION_NAME) defId = node.attrs.id;
});
expect(refId).toBeTruthy();
expect(refId).toBe(defId);
editor.destroy();
});
it("inserts the definition at the correct position matching reference order", () => {
const editor = makeEditor({
type: "doc",
content: [
{ type: "paragraph", content: [{ type: "text", text: "AAAA" }] },
{ type: "paragraph", content: [{ type: "text", text: "BBBB" }] },
],
});
// First footnote: place inside the SECOND paragraph (after "BBBB").
editor.commands.setTextSelection(11); // end of BBBB
editor.commands.setFootnote();
// Second footnote: place inside the FIRST paragraph (after "AAAA"),
// which is BEFORE the first reference in document order.
editor.commands.setTextSelection(5); // end of AAAA
editor.commands.setFootnote();
const doc = editor.state.doc;
// Reference order in document.
const refOrder = collectReferenceIds(doc);
// Definition order in the list.
const defOrder: string[] = [];
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
defOrder.push(node.attrs.id);
}
});
expect(defOrder).toEqual(refOrder);
expect(defOrder.length).toBe(2);
editor.destroy();
});
});
describe("removeFootnote command (cascade)", () => {
it("removes both the reference and its definition, and drops the empty list", () => {
const editor = makeEditor({
type: "doc",
content: [
{ type: "paragraph", content: [{ type: "text", text: "Hello" }] },
],
});
editor.commands.setTextSelection(6);
editor.commands.setFootnote();
let id: string | null = null;
editor.state.doc.descendants((node) => {
if (node.type.name === FOOTNOTE_REFERENCE_NAME) id = node.attrs.id;
});
expect(id).toBeTruthy();
editor.commands.removeFootnote(id!);
const doc = editor.state.doc;
expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(0);
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(0);
// empty list removed
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(0);
editor.destroy();
});
});
describe("footnote sync plugin (orphans)", () => {
it("creates an empty definition for a reference pasted without one", () => {
const editor = makeEditor({
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "x" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "orphan-ref" } },
],
},
],
});
// Trigger a doc change so appendTransaction runs.
editor.commands.insertContentAt(1, " ");
const doc = editor.state.doc;
let defFound = false;
doc.descendants((node) => {
if (
node.type.name === FOOTNOTE_DEFINITION_NAME &&
node.attrs.id === "orphan-ref"
) {
defFound = true;
}
});
expect(defFound).toBe(true);
editor.destroy();
});
it("merges multiple footnotesList nodes into one, preserving all definitions, as the last child", () => {
const editor = makeEditor({
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "a" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
{ type: "text", text: "b" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
],
},
// First (stray) footnotes list, e.g. from a paste/collab merge.
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "x" },
content: [{ type: "paragraph", content: [{ type: "text", text: "X note" }] }],
},
],
},
{ type: "paragraph", content: [{ type: "text", text: "tail" }] },
// Second footnotes list (the "real" trailing one).
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "y" },
content: [{ type: "paragraph", content: [{ type: "text", text: "Y note" }] }],
},
],
},
],
});
// Trigger a local doc change so appendTransaction runs.
editor.commands.insertContentAt(1, " ");
const doc = editor.state.doc;
// Converged to exactly ONE list.
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
// Both definitions preserved (no tracking lost).
const defIds: string[] = [];
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_DEFINITION_NAME) defIds.push(node.attrs.id);
});
expect(defIds.sort()).toEqual(["x", "y"]);
// The single list is the LAST child of the document.
const lastChild = doc.child(doc.childCount - 1);
expect(lastChild.type.name).toBe(FOOTNOTES_LIST_NAME);
editor.destroy();
});
it("leaves a correct doc (single trailing list) unchanged — no merge loop", () => {
const editor = makeEditor({
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "a" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
],
},
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "x" },
content: [{ type: "paragraph", content: [{ type: "text", text: "X note" }] }],
},
],
},
],
});
const before = editor.state.doc.toJSON();
// A change that doesn't touch footnote structure.
editor.commands.insertContentAt(1, "z");
const doc = editor.state.doc;
// Still exactly one list, still last, definition preserved.
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
const lastChild = doc.child(doc.childCount - 1);
expect(lastChild.type.name).toBe(FOOTNOTES_LIST_NAME);
// The footnotes list subtree is identical to before (no spurious rewrite).
const beforeList = before.content.find(
(n: any) => n.type === FOOTNOTES_LIST_NAME,
);
const afterList = doc
.toJSON()
.content.find((n: any) => n.type === FOOTNOTES_LIST_NAME);
expect(afterList).toEqual(beforeList);
editor.destroy();
});
it("removes an orphan definition with no matching reference", () => {
const editor = makeEditor({
type: "doc",
content: [
{ type: "paragraph", content: [{ type: "text", text: "x" }] },
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "orphan-def" },
content: [{ type: "paragraph" }],
},
],
},
],
});
editor.commands.insertContentAt(1, "y");
const doc = editor.state.doc;
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(0);
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(0);
editor.destroy();
});
});
/**
* Live-editor regression tests for the sync-plugin infinite loop (the hard
* freeze when activating /footnote). These drive a REAL Tiptap editor through
* the same plugin pipeline the browser uses — including the TrailingNode plugin,
* which is what turned the "move list to the end" pass into an infinite
* ping-pong (list moved last -> trailing paragraph appended after it -> list no
* longer last -> moved again -> ...).
*
* If the loop regresses, ProseMirror's appendTransaction round loop never
* terminates and these tests HANG (the vitest timeout fails them). The
* transaction counter additionally fails fast with a bounded iteration cap, so
* a regression surfaces as an explicit error instead of only a slow timeout.
*/
describe("footnote sync plugin (no infinite loop — live editor)", () => {
// Hard cap on how many doc-changing appendTransaction rounds we tolerate for a
// single user action. Convergence takes a couple of rounds at most; anything
// approaching this means the plugins are oscillating.
const MAX_ROUNDS = 50;
// The production editor wires FootnoteReference alongside TrailingNode and
// Superscript; both participate in the loop the bug exhibited, so we mirror
// that here.
function makeLiveEditor(content?: any) {
let rounds = 0;
// A guard plugin that counts doc-changing appendTransaction rounds and
// throws if they exceed the cap, converting a would-be infinite loop into a
// deterministic failure instead of a wall-clock hang.
const LoopGuard = Extension.create({
name: "footnoteLoopGuard",
// Run last so it observes every other plugin's appended transaction.
priority: -1000,
addProseMirrorPlugins() {
return [
new Plugin({
key: new PluginKey("footnoteLoopGuard"),
appendTransaction(transactions) {
if (transactions.some((t) => t.docChanged)) {
rounds += 1;
if (rounds > MAX_ROUNDS) {
throw new Error(
`footnote sync did not converge: exceeded ${MAX_ROUNDS} appendTransaction rounds (infinite loop)`,
);
}
}
return null;
},
}),
];
},
});
const editor = new Editor({
extensions: [
Document,
Paragraph,
Text,
Superscript,
TrailingNode,
LoopGuard,
FootnoteReference,
FootnotesList,
FootnoteDefinition,
],
content: content ?? { type: "doc", content: [{ type: "paragraph" }] },
});
return { editor, getRounds: () => rounds, resetRounds: () => (rounds = 0) };
}
function lastFootnotesListIsTrailing(doc: PMNode): boolean {
// Canonical placement: the list is the last meaningful block — only empty
// paragraphs (the trailing-node) may follow it.
let listIndex = -1;
for (let i = 0; i < doc.childCount; i++) {
if (doc.child(i).type.name === FOOTNOTES_LIST_NAME) listIndex = i;
}
if (listIndex === -1) return false;
for (let i = listIndex + 1; i < doc.childCount; i++) {
const child = doc.child(i);
if (!(child.type.name === "paragraph" && child.content.size === 0)) {
return false;
}
}
return true;
}
it("setFootnote() RETURNS (no hang) and produces one ref + one def in a trailing list", () => {
const { editor } = makeLiveEditor({
type: "doc",
content: [{ type: "paragraph", content: [{ type: "text", text: "Hi" }] }],
});
editor.commands.setTextSelection(3);
const ok = editor.commands.setFootnote();
expect(ok).toBe(true);
const doc = editor.state.doc;
expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(1);
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1);
expect(lastFootnotesListIsTrailing(doc)).toBe(true);
editor.destroy();
});
it("a second setFootnote() does not hang: two refs + two defs in one list", () => {
const { editor } = makeLiveEditor({
type: "doc",
content: [{ type: "paragraph", content: [{ type: "text", text: "Hi" }] }],
});
editor.commands.setTextSelection(3);
editor.commands.setFootnote();
editor.commands.setTextSelection(3);
editor.commands.setFootnote();
const doc = editor.state.doc;
expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(2);
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
expect(lastFootnotesListIsTrailing(doc)).toBe(true);
editor.destroy();
});
it("converges and stabilizes: an unrelated edit does not keep producing transactions", () => {
const { editor, getRounds, resetRounds } = makeLiveEditor({
type: "doc",
content: [{ type: "paragraph", content: [{ type: "text", text: "Hi" }] }],
});
editor.commands.setTextSelection(3);
editor.commands.setFootnote();
// Now the doc is canonical. Dispatch an unrelated edit (insert text) and
// assert the sync plugin converges in a bounded number of rounds and the
// document is stable (one ref/def/list, list trailing).
resetRounds();
editor.commands.insertContentAt(1, "Z");
const afterFirst = editor.state.doc.toJSON();
const roundsAfterEdit = getRounds();
expect(roundsAfterEdit).toBeLessThan(MAX_ROUNDS);
// A follow-up no-op-ish edit must not re-trigger structural rewrites: the
// footnotes section is identical before and after a further unrelated edit.
editor.commands.insertContentAt(2, "Y");
const afterSecond = editor.state.doc.toJSON();
const listOf = (json: any) =>
json.content.find((n: any) => n.type === FOOTNOTES_LIST_NAME);
expect(listOf(afterSecond)).toEqual(listOf(afterFirst));
expect(countType(editor.state.doc, FOOTNOTES_LIST_NAME)).toBe(1);
editor.destroy();
});
it("two footnotesList nodes converge to one (merge) without looping", () => {
const { editor } = makeLiveEditor({
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "a" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
{ type: "text", text: "b" },
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
],
},
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "x" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "X" }] },
],
},
],
},
{ type: "paragraph", content: [{ type: "text", text: "tail" }] },
{
type: FOOTNOTES_LIST_NAME,
content: [
{
type: FOOTNOTE_DEFINITION_NAME,
attrs: { id: "y" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "Y" }] },
],
},
],
},
],
});
// Trigger a local doc change so appendTransaction runs (must not hang).
editor.commands.insertContentAt(1, " ");
const doc = editor.state.doc;
expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
const defIds: string[] = [];
doc.descendants((node) => {
if (node.type.name === FOOTNOTE_DEFINITION_NAME)
defIds.push(node.attrs.id);
});
expect(defIds.sort()).toEqual(["x", "y"]);
expect(lastFootnotesListIsTrailing(doc)).toBe(true);
editor.destroy();
});
});

View File

@@ -0,0 +1,56 @@
import { mergeAttributes, Node } from "@tiptap/core";
import { ReactNodeViewRenderer } from "@tiptap/react";
import { FOOTNOTES_LIST_NAME } from "./footnote-util";
export interface FootnotesListOptions {
HTMLAttributes: Record<string, any>;
view: any;
}
/**
* Block container that holds all footnote definitions. There is a single
* instance per document and it is always the last child of the doc (enforced by
* the sync plugin). Modeled on the callout block node.
*/
export const FootnotesList = Node.create<FootnotesListOptions>({
name: FOOTNOTES_LIST_NAME,
group: "block",
content: "footnoteDefinition+",
isolating: true,
selectable: false,
defining: true,
addOptions() {
return {
HTMLAttributes: {},
view: null,
};
},
parseHTML() {
return [
{
tag: "section[data-footnotes]",
},
];
},
renderHTML({ HTMLAttributes }) {
return [
"section",
mergeAttributes(
{ "data-footnotes": "", class: "footnotes" },
this.options.HTMLAttributes,
HTMLAttributes,
),
0,
];
},
addNodeView() {
if (!this.options.view) return null;
this.editor.isInitialized = true;
return ReactNodeViewRenderer(this.options.view);
},
});

View File

@@ -0,0 +1,6 @@
export * from "./footnote-util";
export * from "./footnote-reference";
export * from "./footnotes-list";
export * from "./footnote-definition";
export * from "./footnote-numbering";
export * from "./footnote-sync";

View File

@@ -0,0 +1,115 @@
import { marked } from "marked";
/**
* Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline.
*
* Two pieces:
* - an INLINE tokenizer for `[^id]` references -> <sup data-footnote-ref
* data-id="id"> (matches the editor-ext FootnoteReference renderHTML);
* - a document hook (`preprocess`/`walkTokens` is awkward for collecting +
* removing definitions, so we use a regex preprocessing step instead) that
* pulls every `[^id]: text` definition line out of the body and appends a
* single <section data-footnotes> with one <div data-footnote-def> per
* definition, so the round-trip rebuilds footnotesList + footnoteDefinition.
*
* Only definitions that have a matching reference are emitted (and vice-versa
* the sync plugin fills any gaps on the editor side), keeping the output valid.
*/
const DEFINITION_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
const REFERENCE_RE = /\[\^([^\]\s]+)\]/;
interface FootnoteRefToken {
type: "footnoteRef";
raw: string;
id: string;
}
export const footnoteReferenceExtension = {
name: "footnoteRef",
level: "inline" as const,
start(src: string) {
return src.match(/\[\^/)?.index ?? -1;
},
tokenizer(src: string): FootnoteRefToken | undefined {
const match = REFERENCE_RE.exec(src);
// Only match at the very start of the remaining inline source.
if (match && match.index === 0) {
return {
type: "footnoteRef",
raw: match[0],
id: match[1],
};
}
return undefined;
},
renderer(token: FootnoteRefToken) {
return `<sup data-footnote-ref data-id="${escapeAttr(token.id)}"></sup>`;
},
};
function escapeAttr(value: string): string {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
}
/**
* Extract `[^id]: text` definition lines from the markdown body, returning the
* cleaned body plus a rendered <section data-footnotes> (empty string when no
* definitions). Call this BEFORE marked.parse and append the section to the
* resulting HTML.
*/
export function extractFootnoteDefinitions(markdown: string): {
body: string;
section: string;
} {
const lines = markdown.split("\n");
const bodyLines: string[] = [];
const definitions: Array<{ id: string; text: string }> = [];
// Track fenced-code state so a `[^id]: ...` line that merely SHOWS footnote
// syntax inside a ``` / ~~~ code block is left in the body verbatim and not
// mistaken for a real definition.
let fence: string | null = null;
for (const line of lines) {
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null) {
fence = marker; // opening fence
} else if (marker === fence) {
fence = null; // closing fence (matching delimiter type)
}
bodyLines.push(line);
continue;
}
const m = fence === null ? DEFINITION_RE.exec(line) : null;
if (m) {
definitions.push({ id: m[1], text: m[2] });
} else {
bodyLines.push(line);
}
}
if (definitions.length === 0) {
return { body: markdown, section: "" };
}
const defsHtml = definitions
.map((d) => {
// Render the definition text as inline markdown so emphasis/links inside
// a footnote survive the round-trip; wrap in a paragraph (the node's
// content is paragraph+).
const inner = marked.parseInline(d.text || "");
return `<div data-footnote-def data-id="${escapeAttr(
d.id,
)}"><p>${inner}</p></div>`;
})
.join("");
return {
body: bodyLines.join("\n"),
section: `<section data-footnotes>${defsHtml}</section>`,
};
}

View File

@@ -2,6 +2,10 @@ import { marked } from "marked";
import { calloutExtension } from "./callout.marked";
import { mathBlockExtension } from "./math-block.marked";
import { mathInlineExtension } from "./math-inline.marked";
import {
footnoteReferenceExtension,
extractFootnoteDefinitions,
} from "./footnote.marked";
marked.use({
renderer: {
@@ -34,7 +38,12 @@ marked.use({
});
marked.use({
extensions: [calloutExtension, mathBlockExtension, mathInlineExtension],
extensions: [
calloutExtension,
mathBlockExtension,
mathInlineExtension,
footnoteReferenceExtension,
],
});
marked.setOptions({ breaks: true });
@@ -48,5 +57,16 @@ export function markdownToHtml(
.replace(YAML_FONT_MATTER_REGEX, "")
.trimStart();
return marked.parse(markdown).toString();
// Pull `[^id]: ...` definition lines out of the body, render the body, then
// append a single <section data-footnotes> so the round-trip rebuilds the
// footnotesList + footnoteDefinition nodes.
const { body, section } = extractFootnoteDefinitions(markdown);
const parsed = marked.parse(body);
if (!section) return parsed;
if (typeof parsed === "string") {
return parsed + section;
}
return parsed.then((html) => html + section);
}

View File

@@ -12,12 +12,44 @@ function sanitizeMdLinkText(value: string): string {
.replace(/[\r\n]+/g, ' ');
}
// Tags turndown treats as void (self-closing). Footnote references render as an
// empty <sup data-footnote-ref> whose meaning lives entirely in its data-id;
// without marking it void, turndown's blank-node removal drops it before our
// rule runs, losing the `[^id]` marker. Mirrors turndown's built-in list.
const TURNDOWN_VOID_ELEMENTS = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR',
];
function isVoidNode(node: any): boolean {
const name = node?.nodeName?.toUpperCase?.();
if (!name) return false;
if (name === 'SUP' && node.hasAttribute?.('data-footnote-ref')) {
return true;
}
return TURNDOWN_VOID_ELEMENTS.indexOf(name) !== -1;
}
/**
* An empty <sup data-footnote-ref> is "blank" to turndown, which removes blank
* inline nodes (RootNode/Node use a module-level isVoid the options cannot
* override). To survive, inject the id as text content so the node is non-blank;
* the footnoteReference rule then reads data-id and emits `[^id]`.
*/
function fillEmptyFootnoteRefs(html: string): string {
return html.replace(
/<sup\b([^>]*\bdata-footnote-ref\b[^>]*)>\s*<\/sup>/gi,
(_m, attrs) => `<sup${attrs}>​</sup>`,
);
}
export function htmlToMarkdown(html: string): string {
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
hr: '---',
bulletListMarker: '-',
isVoid: isVoidNode,
});
turndownService.use([
@@ -34,8 +66,12 @@ export function htmlToMarkdown(html: string): string {
iframeEmbed,
image,
video,
footnoteReference,
footnotesList,
]);
return turndownService.turndown(html).replaceAll('<br>', ' ');
return turndownService
.turndown(fillEmptyFootnoteRefs(html))
.replaceAll('<br>', ' ');
}
function listParagraph(turndownService: _TurndownService) {
@@ -203,6 +239,57 @@ function image(turndownService: _TurndownService) {
});
}
/**
* Footnote reference (inline atom) -> pandoc/GFM marker `[^id]`.
* The visible number is derived (not stored), so the id is the stable anchor.
*/
function footnoteReference(turndownService: _TurndownService) {
turndownService.addRule('footnoteReference', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'SUP' && node.hasAttribute('data-footnote-ref')
);
},
replacement: function (_content: string, node: HTMLInputElement) {
const id = node.getAttribute('data-id') || '';
return id ? `[^${id}]` : '';
},
});
}
/**
* Footnotes container -> the list of `[^id]: text` definitions at the end of
* the document (one per line). Each footnoteDefinition inside emits its own
* `[^id]: ...` line; turndown joins them with the surrounding block spacing.
*/
function footnotesList(turndownService: _TurndownService) {
turndownService.addRule('footnoteDefinition', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'DIV' && node.hasAttribute('data-footnote-def')
);
},
replacement: function (content: string, node: HTMLInputElement) {
const id = node.getAttribute('data-id') || '';
// Collapse internal newlines so the definition stays a single MD line;
// continuation lines are a v2 refinement.
const text = content.replace(/\s*\n+\s*/g, ' ').trim();
return id ? `\n[^${id}]: ${text}\n` : '';
},
});
turndownService.addRule('footnotesList', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'SECTION' && node.hasAttribute('data-footnotes')
);
},
replacement: function (content: string) {
return `\n\n${content.trim()}\n`;
},
});
}
function video(turndownService: _TurndownService) {
turndownService.addRule('video', {
filter: function (node: HTMLInputElement) {