Files
gitmost/packages/prosemirror-markdown/test/serializer-contract.test.ts
T
claude code agent 227 b751852425 fix(prosemirror-markdown): converter inventory bugs — spoiler/link-title in raw-HTML, contract test, codeCombined dead code (#293)
The four bugs found during the #293 HTML-emission inventory, fixed in the package:

1. Spoiler mark was silently lost in the raw-HTML path: inlineToHtml (columns /
   spanned cells) had no `case "spoiler"`, so spoilered text there dropped the
   mark on round-trip. Now emits `<span data-spoiler="true">` — the same form the
   top-level serializer uses and exactly what the schema's Spoiler mark parses.

2. Link `title` was dropped in the raw-HTML path: inlineToHtml's link case
   emitted `<a href>` without the title. The schema's link mark carries a
   `title` global attr (DocmostAttributes), so a titled link inside a column now
   round-trips via `<a href … title=…>`.

3. Serializer contract test: emoji/date/toc were flagged as possibly caseless
   inline atoms. Verified they exist in NEITHER the package schema NOR
   editor-ext, so no node handling is needed today. Added
   serializer-contract.test.ts, which derives every node type from the live
   schema (getSchema(docmostExtensions)) and asserts each has an explicit
   serializer `case` — all 45 current node types are covered and present, and a
   future node added without a case will fail this test loudly.

4. codeCombined dead code: `const codeCombined = false` was hardcoded, so every
   `codeCombined ? <html> : <markdown>` ternary always took the markdown branch.
   Removed the variable and the dead HTML-alternative branches (bold/italic/code/
   link/strike). Pure cleanup — output is byte-identical (goldens + full suite
   pass unchanged). The `hasCode` early-return (code excludes other marks) stays.

Tests: spoiler-inside-column and link-title-inside-column round-trips, the
serializer contract test + inline-atom non-empty behavioral checks.

package vitest: 657 passed; tsc clean. git-sync: 268 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 10:37:35 +03:00

195 lines
7.6 KiB
TypeScript

import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { describe, expect, it } from "vitest";
import { getSchema } from "@tiptap/core";
import { docmostExtensions } from "../src/lib/docmost-schema.js";
import { convertProseMirrorToMarkdown } from "../src/lib/markdown-converter.js";
import { markdownToProseMirror } from "../src/lib/markdown-to-prosemirror.js";
/**
* SERIALIZER-CONTRACT GUARD (#293 canon #inventory, bug 3).
*
* The markdown serializer (`convertProseMirrorToMarkdown`) dispatches on
* `node.type` in a big `switch`. Any node type that reaches the `default` arm
* is NOT serialized as itself — it silently collapses to its children's text
* (or, for an ATOM node with no children, to the empty string). The canon
* inventory flagged exactly this class: had the editor schema declared inline
* atoms like `emoji`/`date`/`toc`, a document could carry one and the converter
* would drop it with no case and no error (a git-sync data-loss on the data
* path).
*
* INVARIANT: every node type declared in the package schema
* (`docmostExtensions`) has an EXPLICIT serializer case. This test derives the
* node-type set from the live schema and asserts a `case "<name>":` exists in
* the serializer source for each. A future node added to the schema WITHOUT a
* serializer case (the emoji/date/toc failure mode) fails here loudly.
*
* We scan the SOURCE (not behavioral output) because it is the only formulation
* that reliably catches a missing case for EVERY node kind: a missing case on a
* *container* node still emits its children via `default` (non-empty output, so
* a behavioral non-empty check would pass while structure was lost), whereas the
* source scan catches the drop regardless of whether the node is an atom or a
* container. A complementary behavioral check for the atom case follows.
*/
const SERIALIZER_SOURCE = readFileSync(
fileURLToPath(new URL("../src/lib/markdown-converter.ts", import.meta.url)),
"utf8",
);
function schemaNodeNames(): string[] {
const schema = getSchema(docmostExtensions as never);
return Object.keys(schema.nodes).sort();
}
describe("serializer contract: every schema node type has a serializer case", () => {
const nodeNames = schemaNodeNames();
it("covers a known, non-trivial set of node types", () => {
// Sanity: the schema really does expose the full Docmost node surface, so
// this test is not vacuously iterating an empty/tiny list.
expect(nodeNames.length).toBeGreaterThanOrEqual(40);
// A representative atom that would silently drop without a case.
expect(nodeNames).toContain("status");
expect(nodeNames).toContain("mention");
});
for (const name of schemaNodeNames()) {
it(`serializer has an explicit case for node type "${name}"`, () => {
// Node names and mark names never collide, so a `case "<node>"` anywhere
// in the serializer is that node's case (marks have distinct names).
const pattern = new RegExp(`case "${name}"\\s*:`);
expect(
pattern.test(SERIALIZER_SOURCE),
`Node type "${name}" is declared in the package schema but has no ` +
`case "${name}": in convertProseMirrorToMarkdown — it would fall ` +
`through to the default arm and be silently dropped on git-sync ` +
`export. Add a lossless serializer case (see mention/status).`,
).toBe(true);
});
}
});
/**
* Behavioral complement: an INLINE ATOM with no serializer case collapses to
* "" via the default arm (exactly the emoji/date/toc risk). Prove that the two
* inline atoms the schema actually declares (mention, status) do NOT vanish —
* i.e. the default-drop path is not reached for them. This is the runtime shape
* the source-scan invariant protects.
*/
describe("serializer contract: inline atoms are not dropped to empty", () => {
const P = (...c: any[]) => ({ type: "paragraph", content: c });
const doc = (...c: any[]) => ({ type: "doc", content: c });
it("mention serializes to non-empty output", () => {
const md = convertProseMirrorToMarkdown(
doc(P({ type: "mention", attrs: { id: "u1", label: "Bob" } })),
);
expect(md.trim()).not.toBe("");
expect(md).toContain('data-type="mention"');
});
it("status serializes to non-empty output", () => {
const md = convertProseMirrorToMarkdown(
doc(P({ type: "status", attrs: { text: "Done", color: "green" } })),
);
expect(md.trim()).not.toBe("");
expect(md).toContain('data-type="status"');
});
});
/**
* Raw-HTML path (columns) round-trips for the two marks fixed alongside the
* contract test. A column renders its inline content via `inlineToHtml`, whose
* mark switch previously lacked a `spoiler` case (bug 1) and dropped a link's
* `title` (bug 2).
*/
// Walk a ProseMirror tree and return the first text run whose marks include the
// given mark type, or undefined.
function findMarkedText(n: any, markType: string): any {
if (!n || typeof n !== "object") return undefined;
if (
n.type === "text" &&
Array.isArray(n.marks) &&
n.marks.some((m: any) => m?.type === markType)
) {
return n;
}
if (Array.isArray(n.content)) {
for (const c of n.content) {
const hit = findMarkedText(c, markType);
if (hit) return hit;
}
}
return undefined;
}
describe("raw-HTML path (columns): spoiler + link title round-trip", () => {
const P = (...c: any[]) => ({ type: "paragraph", content: c });
const doc = (...c: any[]) => ({ type: "doc", content: c });
const column = (...c: any[]) => ({
type: "column",
attrs: { width: "50%" },
content: c,
});
it("bug 1: a spoiler mark inside a column survives the round trip", async () => {
const original = doc({
type: "columns",
content: [
column(P({ type: "text", text: "hidden", marks: [{ type: "spoiler" }] })),
column(P({ type: "text", text: "plain" })),
],
});
const md = convertProseMirrorToMarkdown(original);
// The raw-HTML path must emit the schema's spoiler span (RED before bug 1
// fix: inlineToHtml had no spoiler case, so the mark was dropped and the
// text emitted bare).
expect(md).toContain('data-spoiler="true"');
expect(md).toContain("<span data-spoiler=\"true\">hidden</span>");
const back = await markdownToProseMirror(md);
const spoilered = findMarkedText(back, "spoiler");
expect(spoilered).toBeDefined();
expect(spoilered.text).toBe("hidden");
});
it("bug 2: a link with a title inside a column keeps its title", async () => {
const original = doc({
type: "columns",
content: [
column(
P({
type: "text",
text: "site",
marks: [
{
type: "link",
attrs: { href: "https://example.com", title: "Example Title" },
},
],
}),
),
column(P({ type: "text", text: "plain" })),
],
});
const md = convertProseMirrorToMarkdown(original);
// The raw-HTML anchor must carry the title (RED before bug 2 fix:
// inlineToHtml emitted <a href> with no title).
expect(md).toContain('title="Example Title"');
expect(md).toContain('href="https://example.com"');
const back = await markdownToProseMirror(md);
const linked = findMarkedText(back, "link");
expect(linked).toBeDefined();
const linkMark = linked.marks.find((m: any) => m.type === "link");
expect(linkMark.attrs?.href).toBe("https://example.com");
// The schema's link mark carries `title`; it must round-trip through the
// raw-HTML column path.
expect(linkMark.attrs?.title).toBe("Example Title");
});
});