Files
gitmost/packages/mcp/test/unit/footnotes.test.mjs
claude code agent 227 17e683a311 feat(footnotes): reuse semantics + import diagnostics (#166)
Footnotes were strict 1:1: a repeated `[^a]` reference was treated as a
collision and re-id'd to `a__2`, and a reference with no definition synthesized
its own empty one — so an agent-authored article with reused labels produced
dozens of empty `kowiki__N` footnotes. Move to Pandoc REUSE semantics and add
non-fatal import diagnostics.

Reuse (core):
- resolveCollisions (footnote-sync): repeated references sharing an id are REUSE
  (recorded once in document order, never re-id'd) — one number, one shared
  definition. Only a duplicate DEFINITION is re-id'd deterministically and, with
  no matching reference, dropped by the existing orphan policy (first-wins).
  CollisionPlan.refReids is now always empty (harmless no-op downstream).
- extractFootnoteDefinitions (marked) and extractFootnotes (MCP): duplicate
  definition ids are FIRST-WINS (keep first, drop rest); reference markers are
  never rewritten. Removed the marker-rewriting and the now-dead deriveFootnoteId
  mirror + helpers from the MCP path.

Import diagnostics:
- New analyzeFootnotes() (MCP): fence-aware pure scan reporting dangling
  references, empty/duplicate definitions and `[^id]` markers inside table rows.
- createPage / updatePage / importPageMarkdown now attach `footnoteWarnings`
  (only when non-empty) so an agent can fix its markup; the page is still created.

Paste-reuse:
- footnotePastePlugin remaps only ids the pasted slice DEFINES (a colliding
  definition); a pasted lone reference to an existing id keeps it (reuse).

Tests: reuse/first-wins rewrites of footnote.test, footnote-markdown.test,
footnote.marked.orphan.test and the MCP footnotes.test; new footnote-paste.test
(editor-ext) and footnote-analyze.test (MCP). Deleted derive-id-parity.test.mjs
(the MCP no longer derives ids; editor-ext's deriveFootnoteId keeps its own
golden test). editor-ext 128, MCP 299, server roundtrip 2, client views 3,
client+server tsc clean.

Two review suggestions applied: corrected a stale "duplicated in MCP" comment and
the dangling-reference warning wording.

Note: the multi-backlink editor UI (a reused definition linking back to each of
its references) is deferred to a follow-up — this PR delivers the data-integrity
core (reuse + warnings + paste-reuse). Forward links and numbering already reuse
correctly; the backlink currently targets the first reference.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 15:34:41 +03:00

158 lines
4.9 KiB
JavaScript

import { test } from "node:test";
import assert from "node:assert/strict";
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
/** Recursively collect every node of `type`. */
function findAll(node, type, acc = []) {
if (!node || typeof node !== "object") return acc;
if (node.type === type) acc.push(node);
if (Array.isArray(node.content)) {
for (const c of node.content) findAll(c, type, acc);
}
return acc;
}
const footnoteDoc = {
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "Water" },
{ type: "footnoteReference", attrs: { id: "fn1" } },
{ type: "text", text: " and clay" },
{ type: "footnoteReference", attrs: { id: "fn2" } },
{ type: "text", text: "." },
],
},
{
type: "footnotesList",
content: [
{
type: "footnoteDefinition",
attrs: { id: "fn1" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "First note." }] },
],
},
{
type: "footnoteDefinition",
attrs: { id: "fn2" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "Second note." }] },
],
},
],
},
],
};
test("JSON -> Markdown emits pandoc footnote syntax", () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
assert.match(md, /\[\^fn1\]/);
assert.match(md, /\[\^fn2\]/);
assert.match(md, /\[\^fn1\]: First note\./);
assert.match(md, /\[\^fn2\]: Second note\./);
});
test("Markdown -> JSON rebuilds footnote nodes", async () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
const json = await markdownToProseMirror(md);
const refs = findAll(json, "footnoteReference");
const list = findAll(json, "footnotesList");
const defs = findAll(json, "footnoteDefinition");
assert.equal(refs.length, 2);
assert.deepEqual(
refs.map((r) => r.attrs.id),
["fn1", "fn2"],
);
assert.equal(list.length, 1);
assert.equal(defs.length, 2);
assert.deepEqual(
defs.map((d) => d.attrs.id),
["fn1", "fn2"],
);
});
test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
const json = await markdownToProseMirror(md);
const md2 = convertProseMirrorToMarkdown(json);
// The second markdown serialization carries the same markers + definitions.
assert.match(md2, /\[\^fn1\]/);
assert.match(md2, /\[\^fn2\]/);
assert.match(md2, /\[\^fn1\]: First note\./);
assert.match(md2, /\[\^fn2\]: Second note\./);
});
test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => {
// Reuse semantics: many `[^d]` references + several `[^d]:` definitions import
// as ONE footnote — the references all keep id "d" (reuse), and only the FIRST
// definition is kept (first-wins). Deterministic and stable across re-imports.
const md = [
"See[^d] one[^d] two[^d].",
"",
"[^d]: first",
"[^d]: second",
"[^d]: third",
].join("\n");
const idsOf = async () => {
const json = await markdownToProseMirror(md);
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
const defs = findAll(json, "footnoteDefinition");
return {
refs,
defIds: defs.map((d) => d.attrs.id),
defText: defs
.map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1])
.join("|"),
};
};
const a = await idsOf();
const b = await idsOf();
// Stable across runs.
assert.deepEqual(a, b);
// Reuse: all three reference markers stay "d".
assert.deepEqual(a.refs, ["d", "d", "d"]);
// First-wins: a single definition "d" with the FIRST text.
assert.deepEqual(a.defIds, ["d"]);
assert.equal(a.defText, "first");
});
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
// Markdown that DOCUMENTS footnote syntax inside a code fence. The example
// definition line must be preserved verbatim inside the code block and not
// pulled out into a real footnotesList / footnoteDefinition.
const md = [
"Intro text.",
"",
"```markdown",
"Body[^demo]",
"",
"[^demo]: example definition",
"```",
"",
"Outro.",
].join("\n");
const json = await markdownToProseMirror(md);
// No real footnote nodes were extracted from the code block.
assert.equal(findAll(json, "footnotesList").length, 0);
assert.equal(findAll(json, "footnoteDefinition").length, 0);
// The example definition line survives somewhere in the code block text.
const codeBlocks = findAll(json, "codeBlock");
assert.ok(codeBlocks.length >= 1, "code block present");
const codeText = JSON.stringify(json);
assert.match(codeText, /\[\^demo\]: example definition/);
});