Files
gitmost/packages/mcp/test/unit/markdown-converter.test.mjs
T
claude code agent 227 124f5a45a2 refactor(mcp): consume @docmost/prosemirror-markdown, drop the drifted converter copy (#293/#326 step 5)
mcp had its OWN drifted copy of the converter (markdown-converter.ts ~900 lines,
docmost-schema.ts ~1270 lines, markdown-document.ts) — older than the shared
package, missing the git-sync fixes AND the #293 canon. This switches mcp's
converter CORE to @docmost/prosemirror-markdown, so mcp jumps straight to the
canonical format and the drift-generating second copy is gone.

- markdown-converter.ts / markdown-document.ts / docmost-schema.ts become thin
  re-export shims of the package (convertProseMirrorToMarkdown, the docmost:meta
  envelope, docmostExtensions + docmostSchema=getSchema(docmostExtensions)). The
  mcp-only helpers clampCalloutType/sanitizeCssColor are preserved verbatim in
  the schema shim (the package doesn't expose them via its barrel). ~2170 lines
  of the drifted converter/schema bodies deleted.
- collaboration.ts drops its own ~360-line marked pipeline (preprocessCallouts,
  bridgeTaskLists, extractFootnotes, the footnoteRef extension) and re-points to
  the package's markdownToProseMirror, keeping markdownToProseMirrorCanonical and
  all the yjs/collab write glue. footnote-lex/analyze doc comments updated (they
  now describe advisory legacy-syntax diagnostics, not an importer).

Schema parity verified: the package schema is a strict SUPERSET of mcp's old
schema — every node and attr mcp declared is present (the package only adds
status/pageEmbed/transclusion*/subpages.recursive/etc.), so nothing is silently
dropped on the switch. The switch actually FIXES two pre-existing mcp data-loss
bugs its own tests documented: htmlEmbed and pageBreak now round-trip (were
dropped by the old mcp converter).

Footnotes: the package assembles inline ^[body] footnotes on import (sequential
fn-N ids, identical bodies merged), so mcp's canonicalizeFootnotes is now an
idempotent no-op after it (verified). Legacy reference footnotes [^id]/[^id]:
are inert literal text (canon #2 no-backward-compat) — lossless, the text
survives verbatim.

Build hygiene: packages/mcp/build/ is now gitignored and untracked, matching the
git-sync/prosemirror-markdown convention (private package, rebuilt in CI/Docker,
so src and prod can never silently diverge). This also removes a dead untracked
build/_vendored_editor_ext/ artifact that a broad `git add` would otherwise
commit.

Dependency: packages/mcp/package.json gains @docmost/prosemirror-markdown
(workspace:*); pnpm-lock.yaml gets the matching link importer (mirrors git-sync).

mcp tests updated deliberately to the canonical forms (highlight ==, math $…$,
image ![](src)<!--img-->, drawio/media discriminators, subpages/pageBreak
comments, textAlign, inline ^[…] footnotes) with strict assertions; 4 structural
safety-net round-trip tests added.

mcp: node --test 454 passed; tsc clean. package: 657 passed. git-sync: 268 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 11:16:09 +03:00

187 lines
5.5 KiB
JavaScript

import { test } from "node:test";
import assert from "node:assert/strict";
import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
// ProseMirror builders.
const text = (t, marks) => (marks ? { type: "text", text: t, marks } : { type: "text", text: t });
const paragraph = (...content) => ({ type: "paragraph", content });
const doc = (...content) => ({ type: "doc", content });
const listItem = (...content) => ({ type: "listItem", content });
const bulletList = (...items) => ({ type: "bulletList", content: items });
const orderedList = (...items) => ({ type: "orderedList", content: items });
test("nested bulletList with 3 children keeps all children indented under the parent", () => {
const input = doc(
bulletList(
listItem(
paragraph(text("Parent")),
bulletList(
listItem(paragraph(text("A"))),
listItem(paragraph(text("B"))),
listItem(paragraph(text("C"))),
),
),
),
);
assert.equal(
convertProseMirrorToMarkdown(input),
"- Parent\n - A\n - B\n - C",
);
});
test("nested list under an ordered item indents 3 spaces", () => {
const input = doc(
orderedList(
listItem(
paragraph(text("Parent")),
bulletList(listItem(paragraph(text("Child")))),
),
),
);
assert.equal(
convertProseMirrorToMarkdown(input),
"1. Parent\n - Child",
);
});
test("link with title -> [t](url \"title\")", () => {
const input = doc(
paragraph(
text("click", [
{ type: "link", attrs: { href: "https://example.com", title: "the title" } },
]),
),
);
assert.equal(
convertProseMirrorToMarkdown(input),
'[click](https://example.com "the title")',
);
});
test("hardBreak -> trailing two-spaces+newline", () => {
const input = doc(
paragraph(text("line1"), { type: "hardBreak" }, text("line2")),
);
assert.equal(convertProseMirrorToMarkdown(input), "line1 \nline2");
});
test("table cell with two block children falls back to a raw HTML table", () => {
const input = doc({
type: "table",
content: [
{
type: "tableRow",
content: [
{
type: "tableCell",
content: [paragraph(text("a|b")), paragraph(text("c"))],
},
],
},
],
});
// A pipe-table cell cannot represent two block children, so the canonical
// converter emits the whole table as raw HTML (lossless) rather than lossily
// flattening the paragraphs into one cell.
assert.equal(
convertProseMirrorToMarkdown(input),
"<table><tbody><tr><td><p>a|b</p><p>c</p></td></tr></tbody></table>",
);
});
test("code block trailing newline trimmed", () => {
const input = doc({
type: "codeBlock",
attrs: { language: "js" },
content: [text("const a = 1;\n")],
});
// The single trailing newline inside the code is trimmed; fences add one.
assert.equal(
convertProseMirrorToMarkdown(input),
"```js\nconst a = 1;\n```",
);
});
test("textAlign is carried in a trailing attached-comment directive (JSON-encoded, safe)", () => {
const input = doc({
type: "paragraph",
attrs: { textAlign: 'right"><b' },
content: [text("body")],
});
// #293 canon #9: paragraph textAlign has no native markdown syntax, so it is
// attached as a trailing `<!--attrs {json}-->` comment on the block. The value
// is JSON-encoded, so a hostile value (`"`, `<`, `>`) is carried verbatim and
// inert — it cannot break out of the comment.
assert.equal(
convertProseMirrorToMarkdown(input),
'body <!--attrs {"textAlign":"right\\"><b"}-->',
);
});
test("highlight color: delimiting double-quote escaped (attribute-safe; < > inert, and import sanitizes the color)", () => {
const input = doc(
paragraph(
text("hi", [{ type: "highlight", attrs: { color: 'red"><script' } }]),
),
);
assert.equal(
convertProseMirrorToMarkdown(input),
'<mark style="background-color: red&quot;><script">hi</mark>',
);
});
test("empty task item still emits its marker", () => {
const input = doc({
type: "taskList",
content: [
{ type: "taskItem", attrs: { checked: false }, content: [] },
{ type: "taskItem", attrs: { checked: true }, content: [] },
],
});
assert.equal(convertProseMirrorToMarkdown(input), "- [ ]\n- [x]");
});
// Image captions (issue #221 / #293 canon #8). An image WITHOUT a caption stays
// the plain `![alt](src)`; WITH a caption (or any other non-src attr) the extra
// attrs ride in a trailing `<!--img {json}-->` discriminator comment on the
// markdown image form, so the round-trip md -> json restores them.
test("image without a caption emits plain ![alt](src)", () => {
const input = doc({
type: "image",
attrs: { src: "/files/a.png", alt: "cat" },
});
assert.equal(convertProseMirrorToMarkdown(input), "![cat](/files/a.png)");
});
test("image with a caption emits ![alt](src) plus an <!--img--> directive", () => {
const input = doc({
type: "image",
attrs: { src: "/files/a.png", alt: "cat", caption: "A grey cat" },
});
assert.equal(
convertProseMirrorToMarkdown(input),
'![cat](/files/a.png) <!--img {"caption":"A grey cat"}-->',
);
});
test("image caption is JSON-encoded in the <!--img--> directive (& and \" safe)", () => {
const input = doc({
type: "image",
attrs: { src: "/files/a.png", caption: 'Tom & "Jerry"' },
});
assert.equal(
convertProseMirrorToMarkdown(input),
'![](/files/a.png) <!--img {"caption":"Tom & \\"Jerry\\""}-->',
);
});