import { describe, expect, it } from 'vitest'; // Import the converter DIRECTLY from src (NOT the docmost-client barrel, which // pulls in collaboration.ts and mutates the global DOM at import time), matching // the other converter unit tests. markdownToProseMirror is imported for the // round-trip cases; loading it mutates the global DOM via jsdom (required for // @tiptap/html's generateJSON under Node) — this is expected. import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js'; import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js'; // Wrap one or more nodes in a minimal ProseMirror doc. The top-level converter // joins doc children with "\n\n" then .trim()s, so a single-node doc yields // exactly that node's rendered (trimmed) string. const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes }); const text = (t: string) => ({ type: 'text', text: t }); const para = (...inline: any[]) => ({ type: 'paragraph', content: inline }); // Run a full export -> import -> export cycle and return both markdown strings // plus the intermediate ProseMirror doc (mirrors the property test's helper). async function roundTrip(node: any): Promise<{ md1: string; doc2: any; md2: string }> { const md1 = convertProseMirrorToMarkdown(doc(node)); const doc2 = await markdownToProseMirror(md1); const md2 = convertProseMirrorToMarkdown(doc2); return { md1, doc2, md2 }; } // --------------------------------------------------------------------------- // 1. pageBreak DATA LOSS (markdown-converter.ts has NO `case "pageBreak"`). // // The schema declares a `pageBreak` block atom (docmost-schema.ts ~L1009), so a // real document CAN legally contain one. The converter's switch has no branch // for it, so it falls through to `default`, which renders only the node's // children — and a pageBreak atom has NONE. It therefore exports to "" and the // node silently disappears: an exported markdown file can never carry a page // break, and a round-trip cannot reconstruct it. We pin this as a known // divergence with an `it.fails` round-trip repro (mirroring the package's two // existing documented `it.fails` bugs in markdown-roundtrip.property.test.ts). // --------------------------------------------------------------------------- describe('pageBreak data loss (no converter case — SPEC §11 divergence)', () => { it('exports a pageBreak node to the schema-matching block div', () => { // FIXED: a standalone pageBreak now emits the block-level HTML div so the // node survives instead of being erased to "". expect(convertProseMirrorToMarkdown(doc({ type: 'pageBreak' }))).toBe( '
', ); }); it('keeps a pageBreak sitting BETWEEN two paragraphs on export', () => { // FIXED: with surrounding content the divider is emitted as its own block // between the two paragraphs (joined by the doc "\n\n"), no longer dropped. const out = convertProseMirrorToMarkdown( doc(para(text('before')), { type: 'pageBreak' }, para(text('after'))), ); expect(out).toBe( 'before\n\n
\n\nafter', ); expect(out).toContain('pageBreak'); }); // FIXED: a pageBreak node now survives an export -> import -> export cycle // because the FIRST export emits the schema-matching block div, which marked // passes through and generateJSON rebuilds into a pageBreak node again. it('a pageBreak node round-trips (export -> import yields a pageBreak)', async () => { const { md1, doc2 } = await roundTrip({ type: 'pageBreak' }); expect(md1).not.toBe(''); const types = (doc2.content || []).map((n: any) => n.type); expect(types).toContain('pageBreak'); }); }); // --------------------------------------------------------------------------- // 2. subpages LOSSY round-trip (`case "subpages"` emits `{{SUBPAGES}}`). // // The golden test only pins the EMISSION string. The token has no markdown or // HTML meaning, so on re-import marked treats `{{SUBPAGES}}` as ordinary text: // the subpages BLOCK comes back as a plain PARAGRAPH carrying that literal // string, NOT a `subpages` node. The export is "lossy but legible" by design; // this test pins the actual lossy round-trip behavior. // --------------------------------------------------------------------------- describe('subpages lossy round-trip ({{SUBPAGES}} placeholder)', () => { it('emits {{SUBPAGES}} which re-imports as a paragraph, not a subpages node', async () => { const { md1, doc2 } = await roundTrip({ type: 'subpages' }); expect(md1).toBe('{{SUBPAGES}}'); // The re-imported doc has a single paragraph holding the literal token. const top = doc2.content || []; expect(top).toHaveLength(1); expect(top[0].type).toBe('paragraph'); expect(top[0].content?.[0]).toMatchObject({ type: 'text', text: '{{SUBPAGES}}' }); // The subpages node itself is gone: nothing in the doc is a subpages node. const allTypes = top.map((n: any) => n.type); expect(allTypes).not.toContain('subpages'); }); }); // --------------------------------------------------------------------------- // 3. column.width number<->string drift (`case "column"` + width parseHTML). // // The converter emits the width verbatim into `data-width="..."` (a STRING in // the HTML, as all HTML attributes are). On import the schema's `column.width` // parseHTML does `parseFloat(value)`, so the attribute always comes back as a // NUMBER. A document authored/stored with a STRING fractional width therefore // DRIFTS to a number across a round-trip at the ProseMirror-doc level — even // though the emitted MARKDOWN stays byte-stable (the number prints the same). // Pinned here as a documented attribute-type divergence (SPEC §11). // --------------------------------------------------------------------------- describe('column.width number<->string drift (schema parseFloat — SPEC §11)', () => { const columnsWith = (width: any) => ({ type: 'columns', attrs: { layout: 'two' }, content: [ { type: 'column', attrs: { width }, content: [para(text('L'))] }, { type: 'column', content: [para(text('R'))] }, ], }); it('a STRING fractional width drifts to a NUMBER across the round-trip', async () => { const { md1, doc2, md2 } = await roundTrip(columnsWith('33.3')); // The emitted markdown carries the value as an HTML attribute string and is // byte-stable across the cycle (the divergence is at the doc level only). expect(md1).toContain('data-width="33.3"'); expect(md2).toBe(md1); // But the doc attribute type changed: authored as string "33.3", it comes // back as the number 33.3 (schema's parseFloat). This is the drift. const rtWidth = doc2.content?.[0]?.content?.[0]?.attrs?.width; expect(typeof rtWidth).toBe('number'); expect(rtWidth).toBe(33.3); }); it('a NUMBER fractional width keeps its value (no precision loss) and is byte-stable', async () => { const { md1, doc2, md2 } = await roundTrip(columnsWith(33.333333)); expect(md1).toContain('data-width="33.333333"'); expect(md2).toBe(md1); const rtWidth = doc2.content?.[0]?.content?.[0]?.attrs?.width; expect(typeof rtWidth).toBe('number'); expect(rtWidth).toBe(33.333333); }); }); // --------------------------------------------------------------------------- // 5b. EMPTY detailsContent (`case "details"` with an empty body). // // detailsContent's schema content is `block*` (docmost-schema.ts ~L474), so an // empty details body is legal. The converter must handle a `detailsContent` // with no children without crashing and without emitting invalid output that // breaks the round-trip. This pins that an empty details body exports cleanly // and re-imports as a valid `details` whose body is an empty `detailsContent`. // --------------------------------------------------------------------------- describe('empty detailsContent (schema allows block*)', () => { const emptyDetails = doc({ type: 'details', content: [ { type: 'detailsSummary', content: [text('Summary')] }, { type: 'detailsContent', content: [] }, ], }); it('exports an empty details body without crashing or producing junk', () => { const md = convertProseMirrorToMarkdown(emptyDetails); // The summary survives and the
wrapper closes; the empty body adds // no content of its own. expect(md).toContain('Summary'); expect(md).toContain('
'); expect(md).not.toContain('undefined'); expect(md).not.toContain('null'); }); it('round-trips to a valid details with an empty detailsContent body', async () => { const md1 = convertProseMirrorToMarkdown(emptyDetails); const doc2 = await markdownToProseMirror(md1); const md2 = convertProseMirrorToMarkdown(doc2); // Export is byte-stable (no growth / no junk on the second pass). expect(md2).toBe(md1); // The re-imported tree is a details with summary + an empty content body. const details = doc2.content?.[0]; expect(details?.type).toBe('details'); const childTypes = (details?.content || []).map((c: any) => c.type); expect(childTypes).toEqual(['detailsSummary', 'detailsContent']); const detailsContent = details.content.find( (c: any) => c.type === 'detailsContent', ); // block* — an empty body has no (or empty) content, which is valid. expect(detailsContent.content == null || detailsContent.content.length === 0).toBe( true, ); }); }); // =========================================================================== // CONVERTER GAP COVERAGE (specs 1–29) // // These describe the converter's exact emission for under-tested branches and, // for the round-trip cases, pin export byte-stability and/or documented data // loss. docsCanonicallyEqual is imported here (not at the top) to keep the // existing block's imports untouched. heading/col are local helpers; doc/text/ // para are reused from the top of the file. // =========================================================================== import { docsCanonicallyEqual } from '../src/lib/canonicalize.js'; const heading = (level: number, ...inline: any[]) => ({ type: 'heading', attrs: { level }, content: inline, }); // A two-layout columns block carrying a single column with exactly one child — // the shared shape for the raw-HTML-container round-trip specs (15, 17–29). const oneColumn = (child: any) => ({ type: 'columns', attrs: { layout: 'two' }, content: [{ type: 'column', content: [child] }], }); // Extract the single column's single child node from a round-tripped doc. const colChildOf = (doc2: any) => doc2?.content?.[0]?.content?.[0]?.content?.[0]; describe('converter gap coverage — emission branches (specs 1–11)', () => { // 1. orderedList renders index+1 and DROPS the start attribute. it('orderedList start:5 restarts numbering at 1 (start attr ignored)', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'orderedList', attrs: { start: 5 }, content: [ { type: 'listItem', content: [para(text('a'))] }, { type: 'listItem', content: [para(text('b'))] }, ], }), ); expect(out).toBe('1. a\n2. b'); }); // 2. An empty paragraph contributes an empty segment between two "\n\n" joins. it('an empty paragraph between two paragraphs yields doubled blank lines', () => { const out = convertProseMirrorToMarkdown( doc(para(text('a')), { type: 'paragraph' }, para(text('b'))), ); expect(out).toBe('a\n\n\n\nb'); }); // 3. A code block inside a blockquote: every physical line gets "> ". it('a codeBlock inside a blockquote prefixes every fence/code line with "> "', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'blockquote', content: [ { type: 'codeBlock', attrs: { language: 'js' }, content: [text('a\nb')], }, ], }), ); expect(out).toBe('> ```js\n> a\n> b\n> ```'); }); // 4. A GFM body cell with TWO block children (paragraph + bulletList): joined // by a space, the list's newline collapsed so the row stays intact. it('a GFM body cell with paragraph+list joins them by a space (no "p1- a")', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'table', content: [ { type: 'tableRow', content: [{ type: 'tableHeader', content: [para(text('h'))] }], }, { type: 'tableRow', content: [ { type: 'tableCell', content: [ para(text('p1')), { type: 'bulletList', content: [{ type: 'listItem', content: [para(text('a'))] }], }, ], }, ], }, ], }), ); expect(out).toBe('| h |\n| --- |\n| p1 - a |'); }); // 5. code + link co-occur: the schema's `code` mark excludes all other marks // (including link), so the link cannot survive import. The lossless, // byte-stable behavior is to emit ONLY the backtick code span (code wins). it('a code+link run emits the backtick code form (code wins, link dropped)', () => { const out = convertProseMirrorToMarkdown( doc( para({ type: 'text', text: 'x', marks: [ { type: 'code' }, { type: 'link', attrs: { href: 'http://a?b&c"d' } }, ], }), ), ); expect(out).toBe('`x`'); }); // 6. hardBreak inside a heading: prefix applied once, " \n" between a and b. it('a hardBreak inside an h2 heading produces "## a \\nb"', () => { const out = convertProseMirrorToMarkdown( doc(heading(2, text('a'), { type: 'hardBreak' }, text('b'))), ); expect(out).toBe('## a \nb'); }); // 7. encodeMdUrl's non-space whitespace sub-path: a newline -> %0A. it('an image src containing a newline percent-encodes it to %0A', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'image', attrs: { alt: 'cap', src: '/a\nb.png' } }), ); expect(out).toBe('![cap](/a%0Ab.png)'); }); // 8. spanned-table HTML fallback: rowspan>1 AND align cell-attr branches, . it('a spanned cell with rowspan+align emits in that order', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'table', content: [ { type: 'tableRow', content: [ { type: 'tableCell', attrs: { rowspan: 2, align: 'center' }, content: [para(text('m'))], }, ], }, ], }), ); expect(out).toBe( '

m

', ); }); // 9. taskItem fixed indent width of 2 (NOT prefix.length+1) for a nested sublist. it('a task item with a nested bullet sublist indents the sublist by 2 columns', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'taskList', content: [ { type: 'taskItem', attrs: { checked: false }, content: [ para(text('top')), { type: 'bulletList', content: [ { type: 'listItem', content: [para(text('child'))] }, ], }, ], }, ], }), ); expect(out).toBe('- [ ] top\n - child'); }); // 10. A bulletList inside a blockquote: each list line independently prefixed. it('a bulletList inside a blockquote prefixes every list line with "> "', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'blockquote', content: [ { type: 'bulletList', content: [ { type: 'listItem', content: [para(text('x'))] }, { type: 'listItem', content: [para(text('y'))] }, ], }, ], }), ); expect(out).toBe('> - x\n> - y'); }); // 11. GFM (non-spanned) cell: multi-block space-join + pipe-escape + newline-collapse. it('a GFM cell escapes a literal pipe and collapses newlines across two paragraphs', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'table', content: [ { type: 'tableRow', content: [{ type: 'tableHeader', content: [para(text('h'))] }], }, { type: 'tableRow', content: [ { type: 'tableCell', content: [para(text('a|b')), para(text('c'))], }, ], }, ], }), ); expect(out).toBe('| h |\n| --- |\n| a\\|b c |'); }); }); describe('converter gap coverage — documented round-trip data loss (specs 12–14)', () => { // 12. A 3-backtick fence inside a codeBlock body is now lengthened: the outer // fence widens to (longest inner run + 1) backticks per CommonMark, so the // inner ``` is treated as content and the block survives as ONE node. it('a triple-backtick fence inside a codeBlock body round-trips via a widened fence', async () => { const d = doc({ type: 'codeBlock', attrs: { language: 'js' }, content: [{ type: 'text', text: '```\ninner\n```' }], }); const md1 = convertProseMirrorToMarkdown(d); // Outer fence widened to 4 backticks; the inner 3-backtick fence is content. expect(md1).toBe('````js\n```\ninner\n```\n````'); const doc2 = await markdownToProseMirror(md1); // The block survives as a SINGLE code block (no premature split). const top = doc2.content || []; expect(top).toHaveLength(1); expect(top[0].type).toBe('codeBlock'); expect(top[0].attrs?.language).toBe('js'); expect(top[0].content?.[0]?.text).toContain('```\ninner\n```'); const md2 = convertProseMirrorToMarkdown(doc2); expect(md2).toBe(md1); // byte-stable // Canonically the re-imported code text gains a single trailing newline // (marked re-adds it; the exporter strips it back, hence byte stability). // The fence is no longer lossy: the inner fence and content fully survive. expect(docsCanonicallyEqual(d, doc2)).toBe(false); }); // 13. A leading ordered-list marker in paragraph text is NOT escaped, so a // plain paragraph silently becomes an orderedList on re-import. it('a paragraph starting with "1. " is promoted to an orderedList on re-import', async () => { const d = doc({ type: 'paragraph', content: [{ type: 'text', text: '1. not a list' }], }); const md1 = convertProseMirrorToMarkdown(d); expect(md1).toBe('1. not a list'); // no backslash escape const doc2 = await markdownToProseMirror(md1); expect(doc2.content?.[0]?.type).toBe('orderedList'); const li = doc2.content[0].content?.[0]; expect(li?.type).toBe('listItem'); expect(li.content?.[0]?.content?.[0]).toMatchObject({ type: 'text', text: 'not a list', // the "1. " was consumed as a list marker }); expect(docsCanonicallyEqual(d, doc2)).toBe(false); }); // 14. The image emitter drops the title attribute (silently lost on round-trip). it('an image title attribute is dropped on export and lost on re-import', async () => { const d = doc({ type: 'image', attrs: { src: '/i.png', alt: 'a', title: 't"q' }, }); const md1 = convertProseMirrorToMarkdown(d); expect(md1).toBe('![a](/i.png)'); // no title, no quotes const doc2 = await markdownToProseMirror(md1); const img = (doc2.content || []).find((n: any) => n.type === 'image'); expect(img).toBeTruthy(); expect(img.attrs?.title).toBeNull(); // the original 't"q' was dropped expect(img.attrs?.src).toBe('/i.png'); expect(img.attrs?.alt).toBe('a'); expect(docsCanonicallyEqual(d, doc2)).toBe(false); }); }); describe('converter gap coverage — raw-HTML container round-trips (specs 15–29)', () => { // 15. image inside a column: imageToHtml width+align arms; byte-stable; no // literal-markdown text node leaks. it('an image in a column emits (width/align arms) and round-trips byte-stable', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'image', attrs: { src: '/i.png', alt: 'cap', width: 320, align: 'center' }, }), ); expect(md1).toBe( '
cap
', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('image'); }); // 16. image inside a SPANNED table cell (the other raw-HTML container). it('an image in a spanned table cell emits (width arm) and round-trips byte-stable', async () => { const { md1, md2 } = await roundTrip({ type: 'table', content: [ { type: 'tableRow', content: [ { type: 'tableCell', attrs: { colspan: 2 }, content: [ { type: 'image', attrs: { src: '/i.png', alt: 'x', width: 100 }, }, ], }, ], }, ], }); expect(md1).toBe( '
x
', ); expect(md2).toBe(md1); }); // 17. callout inside a column: calloutToHtml lower-cases the type; byte-stable. it('a callout in a column emits the HTML div (type lower-cased) and round-trips', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'callout', attrs: { type: 'WARNING' }, content: [para(text('a'))], }), ); expect(md1).toBe( '

a

', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('callout'); }); // 18. details tree inside a column: summary via inlineToHtml, content via blockToHtml. it('a details tree in a column emits
//
and round-trips', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'details', content: [ { type: 'detailsSummary', content: [text('S')] }, { type: 'detailsContent', content: [para(text('body'))] }, ], }), ); expect(md1).toBe( '
S

body

', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('details'); }); // 19. taskList inside a column: BOTH checked:true and checked:false arms. it('a taskList in a column emits both data-checked arms and round-trips', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'taskList', content: [ { type: 'taskItem', attrs: { checked: true }, content: [para(text('done'))], }, { type: 'taskItem', attrs: { checked: false }, content: [para(text('todo'))], }, ], }), ); expect(md1).toBe( '
  • done

  • todo

', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('taskList'); }); // 20. bare taskItem (no wrapping taskList) inside a column self-wraps. it('a bare taskItem in a column self-wraps in a single-item taskList and round-trips', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'taskItem', attrs: { checked: false }, content: [para(text('lone'))], }), ); expect(md1).toBe( '
  • lone

', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('taskList'); }); // 21. blockquote inside a column: real
, not markdown "> q". it('a blockquote in a column emits
and round-trips', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'blockquote', content: [para(text('q'))] }), ); expect(md1).toBe( '

q

', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('blockquote'); }); // 22. horizontalRule inside a column: literal
, not markdown "---". it('a horizontalRule in a column emits
and round-trips', async () => { const { md1, doc2, md2 } = await roundTrip( oneColumn({ type: 'horizontalRule' }), ); expect(md1).toBe( '

', ); expect(md2).toBe(md1); expect(colChildOf(doc2)?.type).toBe('horizontalRule'); }); // 23. Unknown block type with NON-text block children ->
-wrap of children. it('an unknown block with block children wraps them in
(no markdown leak)', () => { const md1 = convertProseMirrorToMarkdown( doc( oneColumn({ type: 'someFutureBlock', content: [para(text('a')), para(text('b'))], }), ), ); expect(md1).toContain('

a

b

'); // No markdown paragraph separator survives inside the raw-HTML column. expect(md1).toBe( '

a

b

', ); }); // 24. Unknown block with ONLY inline/text children ->
inlineToHtml
. it('an unknown block with only inline children renders inline as HTML (marks not markdown)', () => { const md1 = convertProseMirrorToMarkdown( doc( oneColumn({ type: 'someInlineOnlyBlock', content: [text('hi'), { type: 'text', text: '!', marks: [{ type: 'bold' }] }], }), ), ); expect(md1).toContain('
hi!
'); }); // 25. mathBlock inside a column delegates through processNode (NOT $$ fence). it('a mathBlock in a column delegates to processNode (HTML div, no $$ fence)', () => { const md1 = convertProseMirrorToMarkdown( doc(oneColumn({ type: 'mathBlock', attrs: { text: 'a^2+b^2' } })), ); expect(md1).toContain( '
', ); expect(md1).not.toContain('$$'); }); // 26. SPANNED table inside a column delegates to processNode -> raw . it('a spanned table in a column delegates to raw
HTML (no GFM pipes)', () => { const md1 = convertProseMirrorToMarkdown( doc( oneColumn({ type: 'table', content: [ { type: 'tableRow', content: [ { type: 'tableCell', attrs: { colspan: 2 }, content: [para(text('x'))], }, ], }, ], }), ), ); expect(md1).toContain(' blockChildrenToHtml. it('a list item with paragraph+codeBlock in a column emits both blocks as HTML', () => { const md1 = convertProseMirrorToMarkdown( doc( oneColumn({ type: 'bulletList', content: [ { type: 'listItem', content: [ para(text('p')), { type: 'codeBlock', attrs: { language: 'js' }, content: [text('a\nb')], }, ], }, ], }), ), ); expect(md1).toContain('

p

'); expect(md1).toContain('
a\nb
'); // The two blocks appear sequentially inside the same
  • . expect(md1).toContain( '
  • p

    a\nb
  • ', ); }); // 28. ordered list item whose 2nd block child is a NESTED bulletList. it('an ordered list item with a nested bulletList in a column emits nested
      HTML', () => { const md1 = convertProseMirrorToMarkdown( doc( oneColumn({ type: 'orderedList', content: [ { type: 'listItem', content: [ para(text('p1')), { type: 'bulletList', content: [ { type: 'listItem', content: [para(text('nested'))] }, ], }, ], }, ], }), ), ); // NOTE(review): the spec's expected literal said '
      • nested
      ', // but blockChildrenToHtml renders the nested listItem's paragraph child as a // real

      , so the actual (correct) emission is '

      • nested

      '. expect(md1).toContain( '
      1. p1

        • nested

      ', ); // No markdown list markers leaked into the raw-HTML column. expect(md1).not.toContain('1. '); expect(md1).not.toContain('- nested'); }); // 29. mathInline atom inside a column paragraph -> inlineToHtml delegates via processNode. it('a mathInline atom in a column paragraph emits schema HTML (no $...$ fence)', () => { const md1 = convertProseMirrorToMarkdown( doc(oneColumn(para(text('eq: '), { type: 'mathInline', attrs: { text: 'x_i' } }))), ); expect(md1).toContain( '

      eq:

      ', ); expect(md1).not.toContain('$x_i$'); }); });