Files
gitmost/packages/mcp/test/unit/roundtrip.test.mjs
T
claude code agent 227 124f5a45a2 refactor(mcp): consume @docmost/prosemirror-markdown, drop the drifted converter copy (#293/#326 step 5)
mcp had its OWN drifted copy of the converter (markdown-converter.ts ~900 lines,
docmost-schema.ts ~1270 lines, markdown-document.ts) — older than the shared
package, missing the git-sync fixes AND the #293 canon. This switches mcp's
converter CORE to @docmost/prosemirror-markdown, so mcp jumps straight to the
canonical format and the drift-generating second copy is gone.

- markdown-converter.ts / markdown-document.ts / docmost-schema.ts become thin
  re-export shims of the package (convertProseMirrorToMarkdown, the docmost:meta
  envelope, docmostExtensions + docmostSchema=getSchema(docmostExtensions)). The
  mcp-only helpers clampCalloutType/sanitizeCssColor are preserved verbatim in
  the schema shim (the package doesn't expose them via its barrel). ~2170 lines
  of the drifted converter/schema bodies deleted.
- collaboration.ts drops its own ~360-line marked pipeline (preprocessCallouts,
  bridgeTaskLists, extractFootnotes, the footnoteRef extension) and re-points to
  the package's markdownToProseMirror, keeping markdownToProseMirrorCanonical and
  all the yjs/collab write glue. footnote-lex/analyze doc comments updated (they
  now describe advisory legacy-syntax diagnostics, not an importer).

Schema parity verified: the package schema is a strict SUPERSET of mcp's old
schema — every node and attr mcp declared is present (the package only adds
status/pageEmbed/transclusion*/subpages.recursive/etc.), so nothing is silently
dropped on the switch. The switch actually FIXES two pre-existing mcp data-loss
bugs its own tests documented: htmlEmbed and pageBreak now round-trip (were
dropped by the old mcp converter).

Footnotes: the package assembles inline ^[body] footnotes on import (sequential
fn-N ids, identical bodies merged), so mcp's canonicalizeFootnotes is now an
idempotent no-op after it (verified). Legacy reference footnotes [^id]/[^id]:
are inert literal text (canon #2 no-backward-compat) — lossless, the text
survives verbatim.

Build hygiene: packages/mcp/build/ is now gitignored and untracked, matching the
git-sync/prosemirror-markdown convention (private package, rebuilt in CI/Docker,
so src and prod can never silently diverge). This also removes a dead untracked
build/_vendored_editor_ext/ artifact that a broad `git add` would otherwise
commit.

Dependency: packages/mcp/package.json gains @docmost/prosemirror-markdown
(workspace:*); pnpm-lock.yaml gets the matching link importer (mirrors git-sync).

mcp tests updated deliberately to the canonical forms (highlight ==, math $…$,
image ![](src)<!--img-->, drawio/media discriminators, subpages/pageBreak
comments, textAlign, inline ^[…] footnotes) with strict assertions; 4 structural
safety-net round-trip tests added.

mcp: node --test 454 passed; tsc clean. package: 657 passed. git-sync: 268 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 11:16:09 +03:00

232 lines
11 KiB
JavaScript

// Round-trip regression tests: PM -> markdown -> PM must preserve rich nodes.
// These lock in the converter/schema fixes (math, mention, attachment, columns,
// nested blocks, text color) and the attribute-escaping idempotency fix.
import { test } from "node:test";
import assert from "node:assert/strict";
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
const doc = (...content) => ({ type: "doc", content });
const para = (...content) => ({ type: "paragraph", content });
const text = (t, marks) => (marks ? { type: "text", text: t, marks } : { type: "text", text: t });
// Recursively collect nodes of a given type.
const findNodes = (node, type, acc = []) => {
if (!node) return acc;
if (node.type === type) acc.push(node);
for (const c of node.content || []) findNodes(c, type, acc);
return acc;
};
// Recursively collect the set of mark types present.
const markTypes = (node, acc = new Set()) => {
if (!node) return acc;
for (const m of node.marks || []) acc.add(m.type);
for (const c of node.content || []) markTypes(c, acc);
return acc;
};
const roundtrip = async (pmDoc) => markdownToProseMirror(convertProseMirrorToMarkdown(pmDoc));
test("round-trip: text color (textStyle mark) survives", async () => {
const input = doc(para(text("colored", [{ type: "textStyle", attrs: { color: "red" } }])));
const out = await roundtrip(input);
const ts = findNodes(out, "text").flatMap((n) => n.marks || []).filter((m) => m.type === "textStyle");
assert.ok(ts.length >= 1, "textStyle mark should survive");
assert.equal(ts[0].attrs?.color, "red");
});
test("round-trip: mathInline with '<' survives and is idempotent", async () => {
const input = doc(para(text("x"), { type: "mathInline", attrs: { text: "a < b \\leq c" } }));
const md1 = convertProseMirrorToMarkdown(input);
const md2 = convertProseMirrorToMarkdown(await markdownToProseMirror(md1));
assert.equal(md1, md2, "markdown must be idempotent across a round-trip (no escape accumulation)");
const out = await markdownToProseMirror(md1);
const math = findNodes(out, "mathInline");
assert.equal(math.length, 1, "mathInline node should survive");
assert.equal(math[0].attrs?.text, "a < b \\leq c", "LaTeX (incl. '<') preserved exactly");
});
test("round-trip: mathBlock survives", async () => {
const input = doc({ type: "mathBlock", attrs: { text: "E = mc^2" } });
const out = await roundtrip(input);
const math = findNodes(out, "mathBlock");
assert.equal(math.length, 1);
assert.equal(math[0].attrs?.text, "E = mc^2");
});
test("round-trip: mention node survives (not flattened to @text)", async () => {
const input = doc(para(text("hi "), { type: "mention", attrs: { id: "u1", label: "Alice", entityType: "user", entityId: "u1" } }));
const out = await roundtrip(input);
assert.equal(findNodes(out, "mention").length, 1, "mention node should survive");
});
test("round-trip: attachment node survives with url + name", async () => {
const input = doc({ type: "attachment", attrs: { url: "/api/files/x/report.pdf", name: "report.pdf", mime: "application/pdf" } });
const out = await roundtrip(input);
const att = findNodes(out, "attachment");
assert.equal(att.length, 1, "attachment node should survive");
assert.equal(att[0].attrs?.url, "/api/files/x/report.pdf");
assert.equal(att[0].attrs?.name, "report.pdf");
});
test("round-trip: image inside a column survives as an image node (not literal markdown)", async () => {
const input = doc({
type: "columns",
content: [
{ type: "column", content: [para(text("left")), { type: "image", attrs: { src: "/api/files/a/p.png", alt: "pic" } }] },
{ type: "column", content: [para(text("right"))] },
],
});
const out = await roundtrip(input);
assert.equal(findNodes(out, "image").length, 1, "image inside a column must survive");
// and it must NOT leak as literal markdown text
assert.ok(!JSON.stringify(out).includes("![pic]"), "image must not become literal markdown text");
});
test("round-trip: captioned image inside a column preserves its caption (imageToHtml branch)", async () => {
// A captioned image in a column is emitted via the imageToHtml helper (raw
// HTML container), a different path from the top-level image case. Special
// chars in the caption exercise attribute escaping on the way out and in.
const caption = 'Tom & "Jerry"';
const input = doc({
type: "columns",
content: [
{ type: "column", content: [{ type: "image", attrs: { src: "/api/files/a/p.png", alt: "pic", caption } }] },
{ type: "column", content: [para(text("right"))] },
],
});
const out = await roundtrip(input);
const imgs = findNodes(out, "image");
assert.equal(imgs.length, 1, "captioned image inside a column must survive");
assert.equal(imgs[0].attrs?.caption, caption, "caption (incl. special chars) must be preserved");
});
test("round-trip: blockquote inside a column survives as a blockquote node", async () => {
const input = doc({
type: "columns",
content: [
{ type: "column", content: [{ type: "blockquote", content: [para(text("quoted"))] }] },
{ type: "column", content: [para(text("r"))] },
],
});
const out = await roundtrip(input);
assert.equal(findNodes(out, "blockquote").length, 1, "blockquote inside a column must survive");
});
test("round-trip: table cell with colspan>1 keeps the grid (HTML fallback)", async () => {
const cell = (t, attrs = {}) => ({ type: "tableCell", attrs, content: [para(text(t))] });
const header = (t) => ({ type: "tableHeader", attrs: {}, content: [para(text(t))] });
const input = doc({
type: "table",
content: [
{ type: "tableRow", content: [header("A"), header("B")] },
{ type: "tableRow", content: [cell("wide", { colspan: 2 })] },
],
});
const out = await roundtrip(input);
const tables = findNodes(out, "table");
assert.equal(tables.length, 1, "table should survive");
const spanned = findNodes(out, "tableCell").find((c) => (c.attrs?.colspan ?? 1) > 1);
assert.ok(spanned, "colspan>1 cell should be preserved via the HTML fallback");
});
test("import: an unsafe highlight color (raw data-color) is sanitized to null (no style breakout)", async () => {
// data-color is read verbatim (no CSSOM isolation), so it is the real
// injection surface; a value with quotes/semicolons must be clamped to null.
const out = await markdownToProseMirror('<mark data-color="red&quot;; background:url(x)">hi</mark>');
const hl = findNodes(out, "text").flatMap((n) => n.marks || []).filter((m) => m.type === "highlight");
assert.ok(hl.length >= 1, "highlight mark present");
assert.equal(hl[0].attrs?.color ?? null, null, "unsafe color must be clamped to null");
});
test("import: a safe highlight color is preserved", async () => {
const out = await markdownToProseMirror('<mark style="background-color: #ff0000">hi</mark>');
const hl = findNodes(out, "text").flatMap((n) => n.marks || []).filter((m) => m.type === "highlight");
assert.ok(hl.length >= 1);
assert.equal(hl[0].attrs?.color, "#ff0000");
});
test("round-trip: attribute value with an apostrophe is idempotent (no &amp; accumulation)", async () => {
const input = doc({ type: "attachment", attrs: { url: "/api/files/x/o'brien's file.pdf", name: "o'brien's file.pdf" } });
const md1 = convertProseMirrorToMarkdown(input);
const md2 = convertProseMirrorToMarkdown(await markdownToProseMirror(md1));
assert.equal(md1, md2, "apostrophe in an attribute value must not accumulate escapes across round-trips");
const att = findNodes(await markdownToProseMirror(md1), "attachment");
assert.equal(att.length, 1);
assert.equal(att[0].attrs?.name, "o'brien's file.pdf", "apostrophe preserved verbatim");
});
test("import: a colored span that is also a comment keeps the comment mark", async () => {
const out = await markdownToProseMirror('<span data-comment-id="c1" style="color: red">x</span>');
const marks = findNodes(out, "text").flatMap((n) => n.marks || []).map((m) => m.type);
assert.ok(marks.includes("comment"), "comment mark must survive (textStyle must not steal the span)");
});
test("import: a colored mention span keeps the mention node", async () => {
const out = await markdownToProseMirror('<span data-type="mention" data-id="u1" data-label="Alice" style="color: blue">@Alice</span>');
assert.equal(findNodes(out, "mention").length, 1, "mention node must survive a colored span");
});
// ---------------------------------------------------------------------------
// #293 STEP 5 canon safety net. These assert STRUCTURE/content preservation
// (format-agnostic: the node/mark and its value survive PM -> markdown -> PM,
// and the markdown is idempotent), NOT the exact markdown bytes — so they stay
// valid regardless of the concrete canonical spelling. They cover the node/mark
// types whose canonical markdown form changed in #293 (highlight-without-color,
// textAlign, subpages, inline footnotes) and complement the existing math /
// media / mention / column round-trips above.
// ---------------------------------------------------------------------------
test("round-trip: highlight WITHOUT a color survives as a highlight mark (==)", async () => {
const input = doc(para(text("hi", [{ type: "highlight", attrs: { color: null } }])));
const md = convertProseMirrorToMarkdown(input);
const out = await roundtrip(input);
const hit = findNodes(out, "text").find(
(n) => n.text === "hi" && (n.marks || []).some((m) => m.type === "highlight"),
);
assert.ok(hit, "the highlight mark must survive a color-less round-trip");
// Idempotent markdown.
assert.equal(convertProseMirrorToMarkdown(out), md);
});
test("round-trip: paragraph textAlign survives via the attached-comment directive", async () => {
const input = doc({
type: "paragraph",
attrs: { textAlign: "center" },
content: [text("mid")],
});
const md = convertProseMirrorToMarkdown(input);
const out = await roundtrip(input);
const p = findNodes(out, "paragraph").find((n) => n.attrs && n.attrs.textAlign === "center");
assert.ok(p, "textAlign must be restored on the paragraph");
assert.equal(convertProseMirrorToMarkdown(out), md, "textAlign round-trip is idempotent");
});
test("round-trip: subpages atom survives", async () => {
const input = doc({ type: "subpages" });
const out = await roundtrip(input);
assert.equal(findNodes(out, "subpages").length, 1, "subpages node must survive");
});
test("round-trip: inline footnote survives with body text (canonical structure)", async () => {
const input = doc(
para(text("Claim"), { type: "footnoteReference", attrs: { id: "fnA" } }),
{
type: "footnotesList",
content: [
{
type: "footnoteDefinition",
attrs: { id: "fnA" },
content: [para(text("the evidence"))],
},
],
},
);
const md = convertProseMirrorToMarkdown(input);
const out = await roundtrip(input);
assert.equal(findNodes(out, "footnoteReference").length, 1);
assert.equal(findNodes(out, "footnotesList").length, 1);
assert.equal(findNodes(out, "footnoteDefinition").length, 1);
assert.match(JSON.stringify(out), /the evidence/, "footnote body survives");
// Byte-stable (the schema id is never written to markdown).
assert.equal(convertProseMirrorToMarkdown(out), md);
});