124f5a45a2
mcp had its OWN drifted copy of the converter (markdown-converter.ts ~900 lines, docmost-schema.ts ~1270 lines, markdown-document.ts) — older than the shared package, missing the git-sync fixes AND the #293 canon. This switches mcp's converter CORE to @docmost/prosemirror-markdown, so mcp jumps straight to the canonical format and the drift-generating second copy is gone. - markdown-converter.ts / markdown-document.ts / docmost-schema.ts become thin re-export shims of the package (convertProseMirrorToMarkdown, the docmost:meta envelope, docmostExtensions + docmostSchema=getSchema(docmostExtensions)). The mcp-only helpers clampCalloutType/sanitizeCssColor are preserved verbatim in the schema shim (the package doesn't expose them via its barrel). ~2170 lines of the drifted converter/schema bodies deleted. - collaboration.ts drops its own ~360-line marked pipeline (preprocessCallouts, bridgeTaskLists, extractFootnotes, the footnoteRef extension) and re-points to the package's markdownToProseMirror, keeping markdownToProseMirrorCanonical and all the yjs/collab write glue. footnote-lex/analyze doc comments updated (they now describe advisory legacy-syntax diagnostics, not an importer). Schema parity verified: the package schema is a strict SUPERSET of mcp's old schema — every node and attr mcp declared is present (the package only adds status/pageEmbed/transclusion*/subpages.recursive/etc.), so nothing is silently dropped on the switch. The switch actually FIXES two pre-existing mcp data-loss bugs its own tests documented: htmlEmbed and pageBreak now round-trip (were dropped by the old mcp converter). Footnotes: the package assembles inline ^[body] footnotes on import (sequential fn-N ids, identical bodies merged), so mcp's canonicalizeFootnotes is now an idempotent no-op after it (verified). Legacy reference footnotes [^id]/[^id]: are inert literal text (canon #2 no-backward-compat) — lossless, the text survives verbatim. Build hygiene: packages/mcp/build/ is now gitignored and untracked, matching the git-sync/prosemirror-markdown convention (private package, rebuilt in CI/Docker, so src and prod can never silently diverge). This also removes a dead untracked build/_vendored_editor_ext/ artifact that a broad `git add` would otherwise commit. Dependency: packages/mcp/package.json gains @docmost/prosemirror-markdown (workspace:*); pnpm-lock.yaml gets the matching link importer (mirrors git-sync). mcp tests updated deliberately to the canonical forms (highlight ==, math $…$, image <!--img-->, drawio/media discriminators, subpages/pageBreak comments, textAlign, inline ^[…] footnotes) with strict assertions; 4 structural safety-net round-trip tests added. mcp: node --test 454 passed; tsc clean. package: 657 passed. git-sync: 268 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
187 lines
5.5 KiB
JavaScript
187 lines
5.5 KiB
JavaScript
import { test } from "node:test";
|
|
import assert from "node:assert/strict";
|
|
|
|
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
|
|
|
|
// ProseMirror builders.
|
|
const text = (t, marks) => (marks ? { type: "text", text: t, marks } : { type: "text", text: t });
|
|
const paragraph = (...content) => ({ type: "paragraph", content });
|
|
const doc = (...content) => ({ type: "doc", content });
|
|
const listItem = (...content) => ({ type: "listItem", content });
|
|
const bulletList = (...items) => ({ type: "bulletList", content: items });
|
|
const orderedList = (...items) => ({ type: "orderedList", content: items });
|
|
|
|
test("nested bulletList with 3 children keeps all children indented under the parent", () => {
|
|
const input = doc(
|
|
bulletList(
|
|
listItem(
|
|
paragraph(text("Parent")),
|
|
bulletList(
|
|
listItem(paragraph(text("A"))),
|
|
listItem(paragraph(text("B"))),
|
|
listItem(paragraph(text("C"))),
|
|
),
|
|
),
|
|
),
|
|
);
|
|
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
"- Parent\n - A\n - B\n - C",
|
|
);
|
|
});
|
|
|
|
test("nested list under an ordered item indents 3 spaces", () => {
|
|
const input = doc(
|
|
orderedList(
|
|
listItem(
|
|
paragraph(text("Parent")),
|
|
bulletList(listItem(paragraph(text("Child")))),
|
|
),
|
|
),
|
|
);
|
|
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
"1. Parent\n - Child",
|
|
);
|
|
});
|
|
|
|
test("link with title -> [t](url \"title\")", () => {
|
|
const input = doc(
|
|
paragraph(
|
|
text("click", [
|
|
{ type: "link", attrs: { href: "https://example.com", title: "the title" } },
|
|
]),
|
|
),
|
|
);
|
|
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
'[click](https://example.com "the title")',
|
|
);
|
|
});
|
|
|
|
test("hardBreak -> trailing two-spaces+newline", () => {
|
|
const input = doc(
|
|
paragraph(text("line1"), { type: "hardBreak" }, text("line2")),
|
|
);
|
|
|
|
assert.equal(convertProseMirrorToMarkdown(input), "line1 \nline2");
|
|
});
|
|
|
|
test("table cell with two block children falls back to a raw HTML table", () => {
|
|
const input = doc({
|
|
type: "table",
|
|
content: [
|
|
{
|
|
type: "tableRow",
|
|
content: [
|
|
{
|
|
type: "tableCell",
|
|
content: [paragraph(text("a|b")), paragraph(text("c"))],
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
// A pipe-table cell cannot represent two block children, so the canonical
|
|
// converter emits the whole table as raw HTML (lossless) rather than lossily
|
|
// flattening the paragraphs into one cell.
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
"<table><tbody><tr><td><p>a|b</p><p>c</p></td></tr></tbody></table>",
|
|
);
|
|
});
|
|
|
|
test("code block trailing newline trimmed", () => {
|
|
const input = doc({
|
|
type: "codeBlock",
|
|
attrs: { language: "js" },
|
|
content: [text("const a = 1;\n")],
|
|
});
|
|
|
|
// The single trailing newline inside the code is trimmed; fences add one.
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
"```js\nconst a = 1;\n```",
|
|
);
|
|
});
|
|
|
|
test("textAlign is carried in a trailing attached-comment directive (JSON-encoded, safe)", () => {
|
|
const input = doc({
|
|
type: "paragraph",
|
|
attrs: { textAlign: 'right"><b' },
|
|
content: [text("body")],
|
|
});
|
|
|
|
// #293 canon #9: paragraph textAlign has no native markdown syntax, so it is
|
|
// attached as a trailing `<!--attrs {json}-->` comment on the block. The value
|
|
// is JSON-encoded, so a hostile value (`"`, `<`, `>`) is carried verbatim and
|
|
// inert — it cannot break out of the comment.
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
'body <!--attrs {"textAlign":"right\\"><b"}-->',
|
|
);
|
|
});
|
|
|
|
test("highlight color: delimiting double-quote escaped (attribute-safe; < > inert, and import sanitizes the color)", () => {
|
|
const input = doc(
|
|
paragraph(
|
|
text("hi", [{ type: "highlight", attrs: { color: 'red"><script' } }]),
|
|
),
|
|
);
|
|
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
'<mark style="background-color: red"><script">hi</mark>',
|
|
);
|
|
});
|
|
|
|
test("empty task item still emits its marker", () => {
|
|
const input = doc({
|
|
type: "taskList",
|
|
content: [
|
|
{ type: "taskItem", attrs: { checked: false }, content: [] },
|
|
{ type: "taskItem", attrs: { checked: true }, content: [] },
|
|
],
|
|
});
|
|
|
|
assert.equal(convertProseMirrorToMarkdown(input), "- [ ]\n- [x]");
|
|
});
|
|
|
|
// Image captions (issue #221 / #293 canon #8). An image WITHOUT a caption stays
|
|
// the plain ``; WITH a caption (or any other non-src attr) the extra
|
|
// attrs ride in a trailing `<!--img {json}-->` discriminator comment on the
|
|
// markdown image form, so the round-trip md -> json restores them.
|
|
test("image without a caption emits plain ", () => {
|
|
const input = doc({
|
|
type: "image",
|
|
attrs: { src: "/files/a.png", alt: "cat" },
|
|
});
|
|
assert.equal(convertProseMirrorToMarkdown(input), "");
|
|
});
|
|
|
|
test("image with a caption emits  plus an <!--img--> directive", () => {
|
|
const input = doc({
|
|
type: "image",
|
|
attrs: { src: "/files/a.png", alt: "cat", caption: "A grey cat" },
|
|
});
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
' <!--img {"caption":"A grey cat"}-->',
|
|
);
|
|
});
|
|
|
|
test("image caption is JSON-encoded in the <!--img--> directive (& and \" safe)", () => {
|
|
const input = doc({
|
|
type: "image",
|
|
attrs: { src: "/files/a.png", caption: 'Tom & "Jerry"' },
|
|
});
|
|
assert.equal(
|
|
convertProseMirrorToMarkdown(input),
|
|
' <!--img {"caption":"Tom & \\"Jerry\\""}-->',
|
|
);
|
|
});
|