124f5a45a2
mcp had its OWN drifted copy of the converter (markdown-converter.ts ~900 lines, docmost-schema.ts ~1270 lines, markdown-document.ts) — older than the shared package, missing the git-sync fixes AND the #293 canon. This switches mcp's converter CORE to @docmost/prosemirror-markdown, so mcp jumps straight to the canonical format and the drift-generating second copy is gone. - markdown-converter.ts / markdown-document.ts / docmost-schema.ts become thin re-export shims of the package (convertProseMirrorToMarkdown, the docmost:meta envelope, docmostExtensions + docmostSchema=getSchema(docmostExtensions)). The mcp-only helpers clampCalloutType/sanitizeCssColor are preserved verbatim in the schema shim (the package doesn't expose them via its barrel). ~2170 lines of the drifted converter/schema bodies deleted. - collaboration.ts drops its own ~360-line marked pipeline (preprocessCallouts, bridgeTaskLists, extractFootnotes, the footnoteRef extension) and re-points to the package's markdownToProseMirror, keeping markdownToProseMirrorCanonical and all the yjs/collab write glue. footnote-lex/analyze doc comments updated (they now describe advisory legacy-syntax diagnostics, not an importer). Schema parity verified: the package schema is a strict SUPERSET of mcp's old schema — every node and attr mcp declared is present (the package only adds status/pageEmbed/transclusion*/subpages.recursive/etc.), so nothing is silently dropped on the switch. The switch actually FIXES two pre-existing mcp data-loss bugs its own tests documented: htmlEmbed and pageBreak now round-trip (were dropped by the old mcp converter). Footnotes: the package assembles inline ^[body] footnotes on import (sequential fn-N ids, identical bodies merged), so mcp's canonicalizeFootnotes is now an idempotent no-op after it (verified). Legacy reference footnotes [^id]/[^id]: are inert literal text (canon #2 no-backward-compat) — lossless, the text survives verbatim. Build hygiene: packages/mcp/build/ is now gitignored and untracked, matching the git-sync/prosemirror-markdown convention (private package, rebuilt in CI/Docker, so src and prod can never silently diverge). This also removes a dead untracked build/_vendored_editor_ext/ artifact that a broad `git add` would otherwise commit. Dependency: packages/mcp/package.json gains @docmost/prosemirror-markdown (workspace:*); pnpm-lock.yaml gets the matching link importer (mirrors git-sync). mcp tests updated deliberately to the canonical forms (highlight ==, math $…$, image <!--img-->, drawio/media discriminators, subpages/pageBreak comments, textAlign, inline ^[…] footnotes) with strict assertions; 4 structural safety-net round-trip tests added. mcp: node --test 454 passed; tsc clean. package: 657 passed. git-sync: 268 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
232 lines
11 KiB
JavaScript
232 lines
11 KiB
JavaScript
// Round-trip regression tests: PM -> markdown -> PM must preserve rich nodes.
|
|
// These lock in the converter/schema fixes (math, mention, attachment, columns,
|
|
// nested blocks, text color) and the attribute-escaping idempotency fix.
|
|
import { test } from "node:test";
|
|
import assert from "node:assert/strict";
|
|
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
|
|
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
|
|
|
|
const doc = (...content) => ({ type: "doc", content });
|
|
const para = (...content) => ({ type: "paragraph", content });
|
|
const text = (t, marks) => (marks ? { type: "text", text: t, marks } : { type: "text", text: t });
|
|
|
|
// Recursively collect nodes of a given type.
|
|
const findNodes = (node, type, acc = []) => {
|
|
if (!node) return acc;
|
|
if (node.type === type) acc.push(node);
|
|
for (const c of node.content || []) findNodes(c, type, acc);
|
|
return acc;
|
|
};
|
|
// Recursively collect the set of mark types present.
|
|
const markTypes = (node, acc = new Set()) => {
|
|
if (!node) return acc;
|
|
for (const m of node.marks || []) acc.add(m.type);
|
|
for (const c of node.content || []) markTypes(c, acc);
|
|
return acc;
|
|
};
|
|
const roundtrip = async (pmDoc) => markdownToProseMirror(convertProseMirrorToMarkdown(pmDoc));
|
|
|
|
test("round-trip: text color (textStyle mark) survives", async () => {
|
|
const input = doc(para(text("colored", [{ type: "textStyle", attrs: { color: "red" } }])));
|
|
const out = await roundtrip(input);
|
|
const ts = findNodes(out, "text").flatMap((n) => n.marks || []).filter((m) => m.type === "textStyle");
|
|
assert.ok(ts.length >= 1, "textStyle mark should survive");
|
|
assert.equal(ts[0].attrs?.color, "red");
|
|
});
|
|
|
|
test("round-trip: mathInline with '<' survives and is idempotent", async () => {
|
|
const input = doc(para(text("x"), { type: "mathInline", attrs: { text: "a < b \\leq c" } }));
|
|
const md1 = convertProseMirrorToMarkdown(input);
|
|
const md2 = convertProseMirrorToMarkdown(await markdownToProseMirror(md1));
|
|
assert.equal(md1, md2, "markdown must be idempotent across a round-trip (no escape accumulation)");
|
|
const out = await markdownToProseMirror(md1);
|
|
const math = findNodes(out, "mathInline");
|
|
assert.equal(math.length, 1, "mathInline node should survive");
|
|
assert.equal(math[0].attrs?.text, "a < b \\leq c", "LaTeX (incl. '<') preserved exactly");
|
|
});
|
|
|
|
test("round-trip: mathBlock survives", async () => {
|
|
const input = doc({ type: "mathBlock", attrs: { text: "E = mc^2" } });
|
|
const out = await roundtrip(input);
|
|
const math = findNodes(out, "mathBlock");
|
|
assert.equal(math.length, 1);
|
|
assert.equal(math[0].attrs?.text, "E = mc^2");
|
|
});
|
|
|
|
test("round-trip: mention node survives (not flattened to @text)", async () => {
|
|
const input = doc(para(text("hi "), { type: "mention", attrs: { id: "u1", label: "Alice", entityType: "user", entityId: "u1" } }));
|
|
const out = await roundtrip(input);
|
|
assert.equal(findNodes(out, "mention").length, 1, "mention node should survive");
|
|
});
|
|
|
|
test("round-trip: attachment node survives with url + name", async () => {
|
|
const input = doc({ type: "attachment", attrs: { url: "/api/files/x/report.pdf", name: "report.pdf", mime: "application/pdf" } });
|
|
const out = await roundtrip(input);
|
|
const att = findNodes(out, "attachment");
|
|
assert.equal(att.length, 1, "attachment node should survive");
|
|
assert.equal(att[0].attrs?.url, "/api/files/x/report.pdf");
|
|
assert.equal(att[0].attrs?.name, "report.pdf");
|
|
});
|
|
|
|
test("round-trip: image inside a column survives as an image node (not literal markdown)", async () => {
|
|
const input = doc({
|
|
type: "columns",
|
|
content: [
|
|
{ type: "column", content: [para(text("left")), { type: "image", attrs: { src: "/api/files/a/p.png", alt: "pic" } }] },
|
|
{ type: "column", content: [para(text("right"))] },
|
|
],
|
|
});
|
|
const out = await roundtrip(input);
|
|
assert.equal(findNodes(out, "image").length, 1, "image inside a column must survive");
|
|
// and it must NOT leak as literal markdown text
|
|
assert.ok(!JSON.stringify(out).includes("![pic]"), "image must not become literal markdown text");
|
|
});
|
|
|
|
test("round-trip: captioned image inside a column preserves its caption (imageToHtml branch)", async () => {
|
|
// A captioned image in a column is emitted via the imageToHtml helper (raw
|
|
// HTML container), a different path from the top-level image case. Special
|
|
// chars in the caption exercise attribute escaping on the way out and in.
|
|
const caption = 'Tom & "Jerry"';
|
|
const input = doc({
|
|
type: "columns",
|
|
content: [
|
|
{ type: "column", content: [{ type: "image", attrs: { src: "/api/files/a/p.png", alt: "pic", caption } }] },
|
|
{ type: "column", content: [para(text("right"))] },
|
|
],
|
|
});
|
|
const out = await roundtrip(input);
|
|
const imgs = findNodes(out, "image");
|
|
assert.equal(imgs.length, 1, "captioned image inside a column must survive");
|
|
assert.equal(imgs[0].attrs?.caption, caption, "caption (incl. special chars) must be preserved");
|
|
});
|
|
|
|
test("round-trip: blockquote inside a column survives as a blockquote node", async () => {
|
|
const input = doc({
|
|
type: "columns",
|
|
content: [
|
|
{ type: "column", content: [{ type: "blockquote", content: [para(text("quoted"))] }] },
|
|
{ type: "column", content: [para(text("r"))] },
|
|
],
|
|
});
|
|
const out = await roundtrip(input);
|
|
assert.equal(findNodes(out, "blockquote").length, 1, "blockquote inside a column must survive");
|
|
});
|
|
|
|
test("round-trip: table cell with colspan>1 keeps the grid (HTML fallback)", async () => {
|
|
const cell = (t, attrs = {}) => ({ type: "tableCell", attrs, content: [para(text(t))] });
|
|
const header = (t) => ({ type: "tableHeader", attrs: {}, content: [para(text(t))] });
|
|
const input = doc({
|
|
type: "table",
|
|
content: [
|
|
{ type: "tableRow", content: [header("A"), header("B")] },
|
|
{ type: "tableRow", content: [cell("wide", { colspan: 2 })] },
|
|
],
|
|
});
|
|
const out = await roundtrip(input);
|
|
const tables = findNodes(out, "table");
|
|
assert.equal(tables.length, 1, "table should survive");
|
|
const spanned = findNodes(out, "tableCell").find((c) => (c.attrs?.colspan ?? 1) > 1);
|
|
assert.ok(spanned, "colspan>1 cell should be preserved via the HTML fallback");
|
|
});
|
|
|
|
test("import: an unsafe highlight color (raw data-color) is sanitized to null (no style breakout)", async () => {
|
|
// data-color is read verbatim (no CSSOM isolation), so it is the real
|
|
// injection surface; a value with quotes/semicolons must be clamped to null.
|
|
const out = await markdownToProseMirror('<mark data-color="red"; background:url(x)">hi</mark>');
|
|
const hl = findNodes(out, "text").flatMap((n) => n.marks || []).filter((m) => m.type === "highlight");
|
|
assert.ok(hl.length >= 1, "highlight mark present");
|
|
assert.equal(hl[0].attrs?.color ?? null, null, "unsafe color must be clamped to null");
|
|
});
|
|
|
|
test("import: a safe highlight color is preserved", async () => {
|
|
const out = await markdownToProseMirror('<mark style="background-color: #ff0000">hi</mark>');
|
|
const hl = findNodes(out, "text").flatMap((n) => n.marks || []).filter((m) => m.type === "highlight");
|
|
assert.ok(hl.length >= 1);
|
|
assert.equal(hl[0].attrs?.color, "#ff0000");
|
|
});
|
|
|
|
test("round-trip: attribute value with an apostrophe is idempotent (no & accumulation)", async () => {
|
|
const input = doc({ type: "attachment", attrs: { url: "/api/files/x/o'brien's file.pdf", name: "o'brien's file.pdf" } });
|
|
const md1 = convertProseMirrorToMarkdown(input);
|
|
const md2 = convertProseMirrorToMarkdown(await markdownToProseMirror(md1));
|
|
assert.equal(md1, md2, "apostrophe in an attribute value must not accumulate escapes across round-trips");
|
|
const att = findNodes(await markdownToProseMirror(md1), "attachment");
|
|
assert.equal(att.length, 1);
|
|
assert.equal(att[0].attrs?.name, "o'brien's file.pdf", "apostrophe preserved verbatim");
|
|
});
|
|
|
|
test("import: a colored span that is also a comment keeps the comment mark", async () => {
|
|
const out = await markdownToProseMirror('<span data-comment-id="c1" style="color: red">x</span>');
|
|
const marks = findNodes(out, "text").flatMap((n) => n.marks || []).map((m) => m.type);
|
|
assert.ok(marks.includes("comment"), "comment mark must survive (textStyle must not steal the span)");
|
|
});
|
|
|
|
test("import: a colored mention span keeps the mention node", async () => {
|
|
const out = await markdownToProseMirror('<span data-type="mention" data-id="u1" data-label="Alice" style="color: blue">@Alice</span>');
|
|
assert.equal(findNodes(out, "mention").length, 1, "mention node must survive a colored span");
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// #293 STEP 5 canon safety net. These assert STRUCTURE/content preservation
|
|
// (format-agnostic: the node/mark and its value survive PM -> markdown -> PM,
|
|
// and the markdown is idempotent), NOT the exact markdown bytes — so they stay
|
|
// valid regardless of the concrete canonical spelling. They cover the node/mark
|
|
// types whose canonical markdown form changed in #293 (highlight-without-color,
|
|
// textAlign, subpages, inline footnotes) and complement the existing math /
|
|
// media / mention / column round-trips above.
|
|
// ---------------------------------------------------------------------------
|
|
test("round-trip: highlight WITHOUT a color survives as a highlight mark (==)", async () => {
|
|
const input = doc(para(text("hi", [{ type: "highlight", attrs: { color: null } }])));
|
|
const md = convertProseMirrorToMarkdown(input);
|
|
const out = await roundtrip(input);
|
|
const hit = findNodes(out, "text").find(
|
|
(n) => n.text === "hi" && (n.marks || []).some((m) => m.type === "highlight"),
|
|
);
|
|
assert.ok(hit, "the highlight mark must survive a color-less round-trip");
|
|
// Idempotent markdown.
|
|
assert.equal(convertProseMirrorToMarkdown(out), md);
|
|
});
|
|
|
|
test("round-trip: paragraph textAlign survives via the attached-comment directive", async () => {
|
|
const input = doc({
|
|
type: "paragraph",
|
|
attrs: { textAlign: "center" },
|
|
content: [text("mid")],
|
|
});
|
|
const md = convertProseMirrorToMarkdown(input);
|
|
const out = await roundtrip(input);
|
|
const p = findNodes(out, "paragraph").find((n) => n.attrs && n.attrs.textAlign === "center");
|
|
assert.ok(p, "textAlign must be restored on the paragraph");
|
|
assert.equal(convertProseMirrorToMarkdown(out), md, "textAlign round-trip is idempotent");
|
|
});
|
|
|
|
test("round-trip: subpages atom survives", async () => {
|
|
const input = doc({ type: "subpages" });
|
|
const out = await roundtrip(input);
|
|
assert.equal(findNodes(out, "subpages").length, 1, "subpages node must survive");
|
|
});
|
|
|
|
test("round-trip: inline footnote survives with body text (canonical structure)", async () => {
|
|
const input = doc(
|
|
para(text("Claim"), { type: "footnoteReference", attrs: { id: "fnA" } }),
|
|
{
|
|
type: "footnotesList",
|
|
content: [
|
|
{
|
|
type: "footnoteDefinition",
|
|
attrs: { id: "fnA" },
|
|
content: [para(text("the evidence"))],
|
|
},
|
|
],
|
|
},
|
|
);
|
|
const md = convertProseMirrorToMarkdown(input);
|
|
const out = await roundtrip(input);
|
|
assert.equal(findNodes(out, "footnoteReference").length, 1);
|
|
assert.equal(findNodes(out, "footnotesList").length, 1);
|
|
assert.equal(findNodes(out, "footnoteDefinition").length, 1);
|
|
assert.match(JSON.stringify(out), /the evidence/, "footnote body survives");
|
|
// Byte-stable (the schema id is never written to markdown).
|
|
assert.equal(convertProseMirrorToMarkdown(out), md);
|
|
});
|