Files
gitmost/packages/mcp/test/unit/footnotes.test.mjs
T
claude code agent 227 124f5a45a2 refactor(mcp): consume @docmost/prosemirror-markdown, drop the drifted converter copy (#293/#326 step 5)
mcp had its OWN drifted copy of the converter (markdown-converter.ts ~900 lines,
docmost-schema.ts ~1270 lines, markdown-document.ts) — older than the shared
package, missing the git-sync fixes AND the #293 canon. This switches mcp's
converter CORE to @docmost/prosemirror-markdown, so mcp jumps straight to the
canonical format and the drift-generating second copy is gone.

- markdown-converter.ts / markdown-document.ts / docmost-schema.ts become thin
  re-export shims of the package (convertProseMirrorToMarkdown, the docmost:meta
  envelope, docmostExtensions + docmostSchema=getSchema(docmostExtensions)). The
  mcp-only helpers clampCalloutType/sanitizeCssColor are preserved verbatim in
  the schema shim (the package doesn't expose them via its barrel). ~2170 lines
  of the drifted converter/schema bodies deleted.
- collaboration.ts drops its own ~360-line marked pipeline (preprocessCallouts,
  bridgeTaskLists, extractFootnotes, the footnoteRef extension) and re-points to
  the package's markdownToProseMirror, keeping markdownToProseMirrorCanonical and
  all the yjs/collab write glue. footnote-lex/analyze doc comments updated (they
  now describe advisory legacy-syntax diagnostics, not an importer).

Schema parity verified: the package schema is a strict SUPERSET of mcp's old
schema — every node and attr mcp declared is present (the package only adds
status/pageEmbed/transclusion*/subpages.recursive/etc.), so nothing is silently
dropped on the switch. The switch actually FIXES two pre-existing mcp data-loss
bugs its own tests documented: htmlEmbed and pageBreak now round-trip (were
dropped by the old mcp converter).

Footnotes: the package assembles inline ^[body] footnotes on import (sequential
fn-N ids, identical bodies merged), so mcp's canonicalizeFootnotes is now an
idempotent no-op after it (verified). Legacy reference footnotes [^id]/[^id]:
are inert literal text (canon #2 no-backward-compat) — lossless, the text
survives verbatim.

Build hygiene: packages/mcp/build/ is now gitignored and untracked, matching the
git-sync/prosemirror-markdown convention (private package, rebuilt in CI/Docker,
so src and prod can never silently diverge). This also removes a dead untracked
build/_vendored_editor_ext/ artifact that a broad `git add` would otherwise
commit.

Dependency: packages/mcp/package.json gains @docmost/prosemirror-markdown
(workspace:*); pnpm-lock.yaml gets the matching link importer (mirrors git-sync).

mcp tests updated deliberately to the canonical forms (highlight ==, math $…$,
image ![](src)<!--img-->, drawio/media discriminators, subpages/pageBreak
comments, textAlign, inline ^[…] footnotes) with strict assertions; 4 structural
safety-net round-trip tests added.

mcp: node --test 454 passed; tsc clean. package: 657 passed. git-sync: 268 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 11:16:09 +03:00

159 lines
5.4 KiB
JavaScript

import { test } from "node:test";
import assert from "node:assert/strict";
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
/** Recursively collect every node of `type`. */
function findAll(node, type, acc = []) {
if (!node || typeof node !== "object") return acc;
if (node.type === type) acc.push(node);
if (Array.isArray(node.content)) {
for (const c of node.content) findAll(c, type, acc);
}
return acc;
}
const footnoteDoc = {
type: "doc",
content: [
{
type: "paragraph",
content: [
{ type: "text", text: "Water" },
{ type: "footnoteReference", attrs: { id: "fn1" } },
{ type: "text", text: " and clay" },
{ type: "footnoteReference", attrs: { id: "fn2" } },
{ type: "text", text: "." },
],
},
{
type: "footnotesList",
content: [
{
type: "footnoteDefinition",
attrs: { id: "fn1" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "First note." }] },
],
},
{
type: "footnoteDefinition",
attrs: { id: "fn2" },
content: [
{ type: "paragraph", content: [{ type: "text", text: "Second note." }] },
],
},
],
},
],
};
test("JSON -> Markdown emits canonical inline footnote syntax (#293 canon #2)", () => {
// Canonical markdown form is Pandoc/Obsidian INLINE footnotes: the note body is
// written at the reference point as `^[body]`. There is NO `[^id]` reference
// marker and NO trailing `[^id]: …` definition list; the schema id never
// reaches markdown.
const md = convertProseMirrorToMarkdown(footnoteDoc);
assert.match(md, /\^\[First note\.\]/);
assert.match(md, /\^\[Second note\.\]/);
assert.doesNotMatch(md, /\[\^/); // no reference-style markers
assert.doesNotMatch(md, /^\[\^.+\]:/m); // no bottom definition lines
});
test("Markdown -> JSON rebuilds footnote nodes with sequential fn-N ids", async () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
const json = await markdownToProseMirror(md);
const refs = findAll(json, "footnoteReference");
const list = findAll(json, "footnotesList");
const defs = findAll(json, "footnoteDefinition");
// Structure is preserved; ids are (re)assigned sequentially in first-reference
// order by the importer (fn-1, fn-2, …) — the concrete id is never carried in
// markdown, so it is derived on import.
assert.equal(refs.length, 2);
assert.deepEqual(
refs.map((r) => r.attrs.id),
["fn-1", "fn-2"],
);
assert.equal(list.length, 1);
assert.equal(defs.length, 2);
assert.deepEqual(
defs.map((d) => d.attrs.id),
["fn-1", "fn-2"],
);
});
test("JSON -> MD -> JSON is byte-stable and preserves footnote body text", async () => {
const md = convertProseMirrorToMarkdown(footnoteDoc);
const json = await markdownToProseMirror(md);
const md2 = convertProseMirrorToMarkdown(json);
// The round trip is byte-stable (ids are not written to markdown, so the
// concrete import id cannot perturb the output) and the bodies survive.
assert.equal(md2, md);
assert.match(md2, /\^\[First note\.\]/);
assert.match(md2, /\^\[Second note\.\]/);
});
test("identical footnote bodies MERGE to one shared definition (#293 canon #2)", async () => {
// Two references whose bodies are byte-identical import as ONE definition
// shared by both references (dedup on the exact body text). Two DIFFERENT
// bodies stay distinct. Deterministic and stable across re-imports.
const md = "See^[same] and^[same], but^[other].";
const idsOf = async () => {
const json = await markdownToProseMirror(md);
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
const defs = findAll(json, "footnoteDefinition");
return {
refs,
defIds: defs.map((d) => d.attrs.id),
defText: defs
.map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1])
.join("|"),
};
};
const a = await idsOf();
const b = await idsOf();
// Stable across runs.
assert.deepEqual(a, b);
// Merge: the two "same" references share fn-1; the "other" reference is fn-2.
assert.deepEqual(a.refs, ["fn-1", "fn-1", "fn-2"]);
// One definition per unique body, in first-reference order.
assert.deepEqual(a.defIds, ["fn-1", "fn-2"]);
assert.equal(a.defText, "same|other");
});
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
// Markdown that DOCUMENTS footnote syntax inside a code fence. The example
// definition line must be preserved verbatim inside the code block and not
// pulled out into a real footnotesList / footnoteDefinition.
const md = [
"Intro text.",
"",
"```markdown",
"Body[^demo]",
"",
"[^demo]: example definition",
"```",
"",
"Outro.",
].join("\n");
const json = await markdownToProseMirror(md);
// No real footnote nodes were extracted from the code block.
assert.equal(findAll(json, "footnotesList").length, 0);
assert.equal(findAll(json, "footnoteDefinition").length, 0);
// The example definition line survives somewhere in the code block text.
const codeBlocks = findAll(json, "codeBlock");
assert.ok(codeBlocks.length >= 1, "code block present");
const codeText = JSON.stringify(json);
assert.match(codeText, /\[\^demo\]: example definition/);
});