b751852425
The four bugs found during the #293 HTML-emission inventory, fixed in the package: 1. Spoiler mark was silently lost in the raw-HTML path: inlineToHtml (columns / spanned cells) had no `case "spoiler"`, so spoilered text there dropped the mark on round-trip. Now emits `<span data-spoiler="true">` — the same form the top-level serializer uses and exactly what the schema's Spoiler mark parses. 2. Link `title` was dropped in the raw-HTML path: inlineToHtml's link case emitted `<a href>` without the title. The schema's link mark carries a `title` global attr (DocmostAttributes), so a titled link inside a column now round-trips via `<a href … title=…>`. 3. Serializer contract test: emoji/date/toc were flagged as possibly caseless inline atoms. Verified they exist in NEITHER the package schema NOR editor-ext, so no node handling is needed today. Added serializer-contract.test.ts, which derives every node type from the live schema (getSchema(docmostExtensions)) and asserts each has an explicit serializer `case` — all 45 current node types are covered and present, and a future node added without a case will fail this test loudly. 4. codeCombined dead code: `const codeCombined = false` was hardcoded, so every `codeCombined ? <html> : <markdown>` ternary always took the markdown branch. Removed the variable and the dead HTML-alternative branches (bold/italic/code/ link/strike). Pure cleanup — output is byte-identical (goldens + full suite pass unchanged). The `hasCode` early-return (code excludes other marks) stays. Tests: spoiler-inside-column and link-title-inside-column round-trips, the serializer contract test + inline-atom non-empty behavioral checks. package vitest: 657 passed; tsc clean. git-sync: 268 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
195 lines
7.6 KiB
TypeScript
195 lines
7.6 KiB
TypeScript
import { readFileSync } from "node:fs";
|
|
import { fileURLToPath } from "node:url";
|
|
|
|
import { describe, expect, it } from "vitest";
|
|
import { getSchema } from "@tiptap/core";
|
|
|
|
import { docmostExtensions } from "../src/lib/docmost-schema.js";
|
|
import { convertProseMirrorToMarkdown } from "../src/lib/markdown-converter.js";
|
|
import { markdownToProseMirror } from "../src/lib/markdown-to-prosemirror.js";
|
|
|
|
/**
|
|
* SERIALIZER-CONTRACT GUARD (#293 canon #inventory, bug 3).
|
|
*
|
|
* The markdown serializer (`convertProseMirrorToMarkdown`) dispatches on
|
|
* `node.type` in a big `switch`. Any node type that reaches the `default` arm
|
|
* is NOT serialized as itself — it silently collapses to its children's text
|
|
* (or, for an ATOM node with no children, to the empty string). The canon
|
|
* inventory flagged exactly this class: had the editor schema declared inline
|
|
* atoms like `emoji`/`date`/`toc`, a document could carry one and the converter
|
|
* would drop it with no case and no error (a git-sync data-loss on the data
|
|
* path).
|
|
*
|
|
* INVARIANT: every node type declared in the package schema
|
|
* (`docmostExtensions`) has an EXPLICIT serializer case. This test derives the
|
|
* node-type set from the live schema and asserts a `case "<name>":` exists in
|
|
* the serializer source for each. A future node added to the schema WITHOUT a
|
|
* serializer case (the emoji/date/toc failure mode) fails here loudly.
|
|
*
|
|
* We scan the SOURCE (not behavioral output) because it is the only formulation
|
|
* that reliably catches a missing case for EVERY node kind: a missing case on a
|
|
* *container* node still emits its children via `default` (non-empty output, so
|
|
* a behavioral non-empty check would pass while structure was lost), whereas the
|
|
* source scan catches the drop regardless of whether the node is an atom or a
|
|
* container. A complementary behavioral check for the atom case follows.
|
|
*/
|
|
|
|
const SERIALIZER_SOURCE = readFileSync(
|
|
fileURLToPath(new URL("../src/lib/markdown-converter.ts", import.meta.url)),
|
|
"utf8",
|
|
);
|
|
|
|
function schemaNodeNames(): string[] {
|
|
const schema = getSchema(docmostExtensions as never);
|
|
return Object.keys(schema.nodes).sort();
|
|
}
|
|
|
|
describe("serializer contract: every schema node type has a serializer case", () => {
|
|
const nodeNames = schemaNodeNames();
|
|
|
|
it("covers a known, non-trivial set of node types", () => {
|
|
// Sanity: the schema really does expose the full Docmost node surface, so
|
|
// this test is not vacuously iterating an empty/tiny list.
|
|
expect(nodeNames.length).toBeGreaterThanOrEqual(40);
|
|
// A representative atom that would silently drop without a case.
|
|
expect(nodeNames).toContain("status");
|
|
expect(nodeNames).toContain("mention");
|
|
});
|
|
|
|
for (const name of schemaNodeNames()) {
|
|
it(`serializer has an explicit case for node type "${name}"`, () => {
|
|
// Node names and mark names never collide, so a `case "<node>"` anywhere
|
|
// in the serializer is that node's case (marks have distinct names).
|
|
const pattern = new RegExp(`case "${name}"\\s*:`);
|
|
expect(
|
|
pattern.test(SERIALIZER_SOURCE),
|
|
`Node type "${name}" is declared in the package schema but has no ` +
|
|
`case "${name}": in convertProseMirrorToMarkdown — it would fall ` +
|
|
`through to the default arm and be silently dropped on git-sync ` +
|
|
`export. Add a lossless serializer case (see mention/status).`,
|
|
).toBe(true);
|
|
});
|
|
}
|
|
});
|
|
|
|
/**
|
|
* Behavioral complement: an INLINE ATOM with no serializer case collapses to
|
|
* "" via the default arm (exactly the emoji/date/toc risk). Prove that the two
|
|
* inline atoms the schema actually declares (mention, status) do NOT vanish —
|
|
* i.e. the default-drop path is not reached for them. This is the runtime shape
|
|
* the source-scan invariant protects.
|
|
*/
|
|
describe("serializer contract: inline atoms are not dropped to empty", () => {
|
|
const P = (...c: any[]) => ({ type: "paragraph", content: c });
|
|
const doc = (...c: any[]) => ({ type: "doc", content: c });
|
|
|
|
it("mention serializes to non-empty output", () => {
|
|
const md = convertProseMirrorToMarkdown(
|
|
doc(P({ type: "mention", attrs: { id: "u1", label: "Bob" } })),
|
|
);
|
|
expect(md.trim()).not.toBe("");
|
|
expect(md).toContain('data-type="mention"');
|
|
});
|
|
|
|
it("status serializes to non-empty output", () => {
|
|
const md = convertProseMirrorToMarkdown(
|
|
doc(P({ type: "status", attrs: { text: "Done", color: "green" } })),
|
|
);
|
|
expect(md.trim()).not.toBe("");
|
|
expect(md).toContain('data-type="status"');
|
|
});
|
|
});
|
|
|
|
/**
|
|
* Raw-HTML path (columns) round-trips for the two marks fixed alongside the
|
|
* contract test. A column renders its inline content via `inlineToHtml`, whose
|
|
* mark switch previously lacked a `spoiler` case (bug 1) and dropped a link's
|
|
* `title` (bug 2).
|
|
*/
|
|
|
|
// Walk a ProseMirror tree and return the first text run whose marks include the
|
|
// given mark type, or undefined.
|
|
function findMarkedText(n: any, markType: string): any {
|
|
if (!n || typeof n !== "object") return undefined;
|
|
if (
|
|
n.type === "text" &&
|
|
Array.isArray(n.marks) &&
|
|
n.marks.some((m: any) => m?.type === markType)
|
|
) {
|
|
return n;
|
|
}
|
|
if (Array.isArray(n.content)) {
|
|
for (const c of n.content) {
|
|
const hit = findMarkedText(c, markType);
|
|
if (hit) return hit;
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
describe("raw-HTML path (columns): spoiler + link title round-trip", () => {
|
|
const P = (...c: any[]) => ({ type: "paragraph", content: c });
|
|
const doc = (...c: any[]) => ({ type: "doc", content: c });
|
|
const column = (...c: any[]) => ({
|
|
type: "column",
|
|
attrs: { width: "50%" },
|
|
content: c,
|
|
});
|
|
|
|
it("bug 1: a spoiler mark inside a column survives the round trip", async () => {
|
|
const original = doc({
|
|
type: "columns",
|
|
content: [
|
|
column(P({ type: "text", text: "hidden", marks: [{ type: "spoiler" }] })),
|
|
column(P({ type: "text", text: "plain" })),
|
|
],
|
|
});
|
|
const md = convertProseMirrorToMarkdown(original);
|
|
// The raw-HTML path must emit the schema's spoiler span (RED before bug 1
|
|
// fix: inlineToHtml had no spoiler case, so the mark was dropped and the
|
|
// text emitted bare).
|
|
expect(md).toContain('data-spoiler="true"');
|
|
expect(md).toContain("<span data-spoiler=\"true\">hidden</span>");
|
|
|
|
const back = await markdownToProseMirror(md);
|
|
const spoilered = findMarkedText(back, "spoiler");
|
|
expect(spoilered).toBeDefined();
|
|
expect(spoilered.text).toBe("hidden");
|
|
});
|
|
|
|
it("bug 2: a link with a title inside a column keeps its title", async () => {
|
|
const original = doc({
|
|
type: "columns",
|
|
content: [
|
|
column(
|
|
P({
|
|
type: "text",
|
|
text: "site",
|
|
marks: [
|
|
{
|
|
type: "link",
|
|
attrs: { href: "https://example.com", title: "Example Title" },
|
|
},
|
|
],
|
|
}),
|
|
),
|
|
column(P({ type: "text", text: "plain" })),
|
|
],
|
|
});
|
|
const md = convertProseMirrorToMarkdown(original);
|
|
// The raw-HTML anchor must carry the title (RED before bug 2 fix:
|
|
// inlineToHtml emitted <a href> with no title).
|
|
expect(md).toContain('title="Example Title"');
|
|
expect(md).toContain('href="https://example.com"');
|
|
|
|
const back = await markdownToProseMirror(md);
|
|
const linked = findMarkedText(back, "link");
|
|
expect(linked).toBeDefined();
|
|
const linkMark = linked.marks.find((m: any) => m.type === "link");
|
|
expect(linkMark.attrs?.href).toBe("https://example.com");
|
|
// The schema's link mark carries `title`; it must round-trip through the
|
|
// raw-HTML column path.
|
|
expect(linkMark.attrs?.title).toBe("Example Title");
|
|
});
|
|
});
|