Footnotes were strict 1:1: a repeated `[^a]` reference was treated as a collision and re-id'd to `a__2`, and a reference with no definition synthesized its own empty one — so an agent-authored article with reused labels produced dozens of empty `kowiki__N` footnotes. Move to Pandoc REUSE semantics and add non-fatal import diagnostics. Reuse (core): - resolveCollisions (footnote-sync): repeated references sharing an id are REUSE (recorded once in document order, never re-id'd) — one number, one shared definition. Only a duplicate DEFINITION is re-id'd deterministically and, with no matching reference, dropped by the existing orphan policy (first-wins). CollisionPlan.refReids is now always empty (harmless no-op downstream). - extractFootnoteDefinitions (marked) and extractFootnotes (MCP): duplicate definition ids are FIRST-WINS (keep first, drop rest); reference markers are never rewritten. Removed the marker-rewriting and the now-dead deriveFootnoteId mirror + helpers from the MCP path. Import diagnostics: - New analyzeFootnotes() (MCP): fence-aware pure scan reporting dangling references, empty/duplicate definitions and `[^id]` markers inside table rows. - createPage / updatePage / importPageMarkdown now attach `footnoteWarnings` (only when non-empty) so an agent can fix its markup; the page is still created. Paste-reuse: - footnotePastePlugin remaps only ids the pasted slice DEFINES (a colliding definition); a pasted lone reference to an existing id keeps it (reuse). Tests: reuse/first-wins rewrites of footnote.test, footnote-markdown.test, footnote.marked.orphan.test and the MCP footnotes.test; new footnote-paste.test (editor-ext) and footnote-analyze.test (MCP). Deleted derive-id-parity.test.mjs (the MCP no longer derives ids; editor-ext's deriveFootnoteId keeps its own golden test). editor-ext 128, MCP 299, server roundtrip 2, client views 3, client+server tsc clean. Two review suggestions applied: corrected a stale "duplicated in MCP" comment and the dangling-reference warning wording. Note: the multi-backlink editor UI (a reused definition linking back to each of its references) is deferred to a follow-up — this PR delivers the data-integrity core (reuse + warnings + paste-reuse). Forward links and numbering already reuse correctly; the backlink currently targets the first reference. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
158 lines
4.9 KiB
JavaScript
158 lines
4.9 KiB
JavaScript
import { test } from "node:test";
|
|
import assert from "node:assert/strict";
|
|
|
|
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
|
|
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
|
|
|
|
/** Recursively collect every node of `type`. */
|
|
function findAll(node, type, acc = []) {
|
|
if (!node || typeof node !== "object") return acc;
|
|
if (node.type === type) acc.push(node);
|
|
if (Array.isArray(node.content)) {
|
|
for (const c of node.content) findAll(c, type, acc);
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
const footnoteDoc = {
|
|
type: "doc",
|
|
content: [
|
|
{
|
|
type: "paragraph",
|
|
content: [
|
|
{ type: "text", text: "Water" },
|
|
{ type: "footnoteReference", attrs: { id: "fn1" } },
|
|
{ type: "text", text: " and clay" },
|
|
{ type: "footnoteReference", attrs: { id: "fn2" } },
|
|
{ type: "text", text: "." },
|
|
],
|
|
},
|
|
{
|
|
type: "footnotesList",
|
|
content: [
|
|
{
|
|
type: "footnoteDefinition",
|
|
attrs: { id: "fn1" },
|
|
content: [
|
|
{ type: "paragraph", content: [{ type: "text", text: "First note." }] },
|
|
],
|
|
},
|
|
{
|
|
type: "footnoteDefinition",
|
|
attrs: { id: "fn2" },
|
|
content: [
|
|
{ type: "paragraph", content: [{ type: "text", text: "Second note." }] },
|
|
],
|
|
},
|
|
],
|
|
},
|
|
],
|
|
};
|
|
|
|
test("JSON -> Markdown emits pandoc footnote syntax", () => {
|
|
const md = convertProseMirrorToMarkdown(footnoteDoc);
|
|
assert.match(md, /\[\^fn1\]/);
|
|
assert.match(md, /\[\^fn2\]/);
|
|
assert.match(md, /\[\^fn1\]: First note\./);
|
|
assert.match(md, /\[\^fn2\]: Second note\./);
|
|
});
|
|
|
|
test("Markdown -> JSON rebuilds footnote nodes", async () => {
|
|
const md = convertProseMirrorToMarkdown(footnoteDoc);
|
|
const json = await markdownToProseMirror(md);
|
|
|
|
const refs = findAll(json, "footnoteReference");
|
|
const list = findAll(json, "footnotesList");
|
|
const defs = findAll(json, "footnoteDefinition");
|
|
|
|
assert.equal(refs.length, 2);
|
|
assert.deepEqual(
|
|
refs.map((r) => r.attrs.id),
|
|
["fn1", "fn2"],
|
|
);
|
|
assert.equal(list.length, 1);
|
|
assert.equal(defs.length, 2);
|
|
assert.deepEqual(
|
|
defs.map((d) => d.attrs.id),
|
|
["fn1", "fn2"],
|
|
);
|
|
});
|
|
|
|
test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
|
|
const md = convertProseMirrorToMarkdown(footnoteDoc);
|
|
const json = await markdownToProseMirror(md);
|
|
const md2 = convertProseMirrorToMarkdown(json);
|
|
|
|
// The second markdown serialization carries the same markers + definitions.
|
|
assert.match(md2, /\[\^fn1\]/);
|
|
assert.match(md2, /\[\^fn2\]/);
|
|
assert.match(md2, /\[\^fn1\]: First note\./);
|
|
assert.match(md2, /\[\^fn2\]: Second note\./);
|
|
});
|
|
|
|
test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => {
|
|
// Reuse semantics: many `[^d]` references + several `[^d]:` definitions import
|
|
// as ONE footnote — the references all keep id "d" (reuse), and only the FIRST
|
|
// definition is kept (first-wins). Deterministic and stable across re-imports.
|
|
const md = [
|
|
"See[^d] one[^d] two[^d].",
|
|
"",
|
|
"[^d]: first",
|
|
"[^d]: second",
|
|
"[^d]: third",
|
|
].join("\n");
|
|
|
|
const idsOf = async () => {
|
|
const json = await markdownToProseMirror(md);
|
|
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
|
|
const defs = findAll(json, "footnoteDefinition");
|
|
return {
|
|
refs,
|
|
defIds: defs.map((d) => d.attrs.id),
|
|
defText: defs
|
|
.map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1])
|
|
.join("|"),
|
|
};
|
|
};
|
|
|
|
const a = await idsOf();
|
|
const b = await idsOf();
|
|
|
|
// Stable across runs.
|
|
assert.deepEqual(a, b);
|
|
// Reuse: all three reference markers stay "d".
|
|
assert.deepEqual(a.refs, ["d", "d", "d"]);
|
|
// First-wins: a single definition "d" with the FIRST text.
|
|
assert.deepEqual(a.defIds, ["d"]);
|
|
assert.equal(a.defText, "first");
|
|
});
|
|
|
|
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
|
|
// Markdown that DOCUMENTS footnote syntax inside a code fence. The example
|
|
// definition line must be preserved verbatim inside the code block and not
|
|
// pulled out into a real footnotesList / footnoteDefinition.
|
|
const md = [
|
|
"Intro text.",
|
|
"",
|
|
"```markdown",
|
|
"Body[^demo]",
|
|
"",
|
|
"[^demo]: example definition",
|
|
"```",
|
|
"",
|
|
"Outro.",
|
|
].join("\n");
|
|
|
|
const json = await markdownToProseMirror(md);
|
|
|
|
// No real footnote nodes were extracted from the code block.
|
|
assert.equal(findAll(json, "footnotesList").length, 0);
|
|
assert.equal(findAll(json, "footnoteDefinition").length, 0);
|
|
|
|
// The example definition line survives somewhere in the code block text.
|
|
const codeBlocks = findAll(json, "codeBlock");
|
|
assert.ok(codeBlocks.length >= 1, "code block present");
|
|
const codeText = JSON.stringify(json);
|
|
assert.match(codeText, /\[\^demo\]: example definition/);
|
|
});
|