Files
gitmost/packages/git-sync/test/media-roundtrip.test.ts
claude code agent 227 e5607cb1d2 test(git-sync): exhaustive converter coverage + fix 3 round-trip data-loss bugs
Coder↔reviewer design loop (9 rounds, reviewer verdict: exhaustive) produced
92 specs; implemented +123 tests (465 -> 588 passing). The new round-trip
coverage exposed three genuine data-loss bugs in the Markdown<->ProseMirror
converter, all now FIXED (round-trip is lossless for these):

1. pageBreak was lost on export (no converter case -> rendered to "" and the
   node vanished). Now emits <div data-type="pageBreak"></div>, which the schema
   parses back -> round-trips.
2. A block image between blocks left an empty <p> artifact after import-hoisting,
   producing a phantom blank-gap diff on every sync. markdownToProseMirror now
   strips content-less paragraphs after generateJSON — with a schema-validity
   guard that keeps the obligatory single empty paragraph in `content: "block+"`
   containers (tableCell/tableHeader/blockquote/column/callout/doc), so empty
   cells/quotes never become an invalid `content: []`.
3. The `code` mark combined with another mark was not byte-stable (emitted nested
   HTML that the schema's `code` `excludes:"_"` collapsed on import). The
   converter now emits code-only when `code` co-occurs, matching the editor.

New coverage spans media/diagram/details/columns/math/mention attribute
round-trips, converter emission branches, git error paths, and engine decision
branches. A dedicated test pins the empty-container schema validity (the review
catch on the bug-2 fix).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 00:17:24 +03:00

276 lines
11 KiB
TypeScript

import { describe, expect, it } from 'vitest';
import {
convertProseMirrorToMarkdown,
markdownToProseMirror,
docsCanonicallyEqual,
} from 'docmost-client';
// ---------------------------------------------------------------------------
// Media / atom node round-trip coverage (audio, video, pdf, attachment, embed,
// youtube). The existing specs (corpus + property test) exercise the EXPORT
// direction of these nodes only; their parseHTML branches (the INVERSE parse of
// the exported HTML) are otherwise unprotected. Each test runs the full
// export -> import -> export pipeline and pins:
// - the exact md1 byte string the converter emits,
// - whether md2 is byte-stable (md2 === md1) or grows by a materialized
// schema default on the first import,
// - the re-parsed doc2 attrs (NOTE: parseHTML reads via getAttribute and so
// returns STRINGS for numeric attrs, which is what breaks naive canonical
// equality), and
// - docsCanonicallyEqual(doc, doc2) where the spec asserts a specific result.
//
// `convertProseMirrorToMarkdown` requires a full doc ({type:'doc', content:[]}),
// so each spec's `doc=[...]` content array is wrapped via mkDoc().
// ---------------------------------------------------------------------------
/** Wrap a content array (as the specs express `doc`) into a real PM doc. */
const mkDoc = (content: any[]) => ({ type: 'doc', content });
/** export -> import -> export, returning both markdowns and the re-parsed doc. */
async function roundTrip(doc: any) {
const md1 = convertProseMirrorToMarkdown(doc);
const doc2 = await markdownToProseMirror(md1);
const md2 = convertProseMirrorToMarkdown(doc2);
return { md1, md2, doc2 };
}
/** Find the first node of a given type anywhere in a PM doc tree. */
const findFirst = (node: any, type: string): any => {
if (node && node.type === type) return node;
for (const child of node?.content || []) {
const hit = findFirst(child, type);
if (hit) return hit;
}
return null;
};
describe('media atom round-trip (audio/video/pdf/attachment/embed/youtube)', () => {
// 1. audio with ALL optional attrs ---------------------------------------
it('audio with src+attachmentId+size: byte-stable, size re-parses to the STRING "9001"', async () => {
const doc = mkDoc([
{ type: 'audio', attrs: { src: '/a.mp3', attachmentId: 'att-7', size: 9001 } },
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div><audio src="/a.mp3" data-attachment-id="att-7" data-size="9001"></audio></div>',
);
// Byte-stable: a second export reproduces the first exactly.
expect(md2).toBe(md1);
const audio = findFirst(doc2, 'audio');
expect(audio).not.toBeNull();
expect(audio.type).toBe('audio');
expect(audio.attrs.src).toBe('/a.mp3');
expect(audio.attrs.attachmentId).toBe('att-7');
// NOTE: the schema's data-size parseHTML returns getAttribute() -> a STRING,
// so the number 9001 comes back as the string '9001'.
expect(audio.attrs.size).toBe('9001');
});
// 2. fully-populated video -----------------------------------------------
it('video with all attrs: byte-stable; numeric attrs re-parse to STRINGS; canonical equality FALSE', async () => {
const doc = mkDoc([
{
type: 'video',
attrs: {
src: '/v.mp4',
alt: 'clip',
attachmentId: 'att-1',
width: 640,
height: 480,
size: 1234,
align: 'center',
aspectRatio: 1.777,
},
},
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div><video src="/v.mp4" aria-label="clip" data-attachment-id="att-1" width="640" height="480" data-size="1234" data-align="center" data-aspect-ratio="1.777"></video></div>',
);
expect(md2).toBe(md1);
const video = findFirst(doc2, 'video');
expect(video).not.toBeNull();
expect(video.attrs.alt).toBe('clip');
// All numeric attrs come back as STRINGS via getAttribute().
expect(video.attrs.width).toBe('640');
expect(video.attrs.height).toBe('480');
expect(video.attrs.size).toBe('1234');
expect(video.attrs.aspectRatio).toBe('1.777');
// Byte-stable export but NOT canonically equal: the numeric width/height/
// size/aspectRatio came back as strings, so deep-equal of the canonical
// forms fails (align:'center' is normalized away, the numbers are not).
expect(docsCanonicallyEqual(doc, doc2)).toBe(false);
});
// 3. minimal video (only src) --------------------------------------------
it('minimal video (src only): NOT byte-stable (gains data-align="center") but canonically equal', async () => {
const doc = mkDoc([{ type: 'video', attrs: { src: '/v.mp4' } }]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe('<div><video src="/v.mp4"></video></div>');
// video.align has a non-null schema default 'center' that materializes on
// import; the converter only emits data-align when set, so export #2 grows
// by data-align="center" exactly once (the documented one-time asymmetry).
expect(md2).toBe('<div><video src="/v.mp4" data-align="center"></video></div>');
expect(md2).not.toBe(md1);
// align:'center' is normalized away via KNOWN_DEFAULTS.video, so despite the
// byte growth the documents ARE canonically equal.
expect(docsCanonicallyEqual(doc, doc2)).toBe(true);
});
// 4. pdf with no numeric attrs (positive control) -------------------------
it('pdf with src+name+attachmentId (no numerics): byte- AND canonically-stable', async () => {
const doc = mkDoc([
{ type: 'pdf', attrs: { src: '/d.pdf', name: 'd.pdf', attachmentId: 'att-9' } },
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div data-type="pdf" src="/d.pdf" data-name="d.pdf" data-attachment-id="att-9"></div>',
);
expect(md2).toBe(md1);
const pdf = findFirst(doc2, 'pdf');
expect(pdf).not.toBeNull();
expect(pdf.attrs.src).toBe('/d.pdf');
expect(pdf.attrs.name).toBe('d.pdf');
expect(pdf.attrs.attachmentId).toBe('att-9');
// No numeric attrs to coerce to strings, so the round-trip is BOTH byte- and
// canonically-stable (the positive control vs. the numeric-divergence cases).
expect(docsCanonicallyEqual(doc, doc2)).toBe(true);
});
// 5. attachment with numeric size ----------------------------------------
it('attachment with url+name+mime+size+attachmentId: byte-stable; size STRING; canonical FALSE', async () => {
const doc = mkDoc([
{
type: 'attachment',
attrs: {
url: '/f.zip',
name: 'f.zip',
mime: 'application/zip',
size: 512,
attachmentId: 'att-3',
},
},
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div data-type="attachment" data-attachment-url="/f.zip" data-attachment-name="f.zip" data-attachment-mime="application/zip" data-attachment-size="512" data-attachment-id="att-3"></div>',
);
expect(md2).toBe(md1);
const att = findFirst(doc2, 'attachment');
expect(att).not.toBeNull();
expect(att.attrs.url).toBe('/f.zip');
expect(att.attrs.name).toBe('f.zip');
expect(att.attrs.mime).toBe('application/zip');
expect(att.attrs.attachmentId).toBe('att-3');
// data-attachment-size parseHTML -> getAttribute() -> STRING.
expect(att.attrs.size).toBe('512');
// The numeric size coerced to a string breaks canonical equality.
expect(docsCanonicallyEqual(doc, doc2)).toBe(false);
});
// 6. embed WITH explicit width/height/align (byte-stable) ----------------
it('embed with explicit src+provider+align+width+height: byte-stable; width/height STRINGS', async () => {
const doc = mkDoc([
{
type: 'embed',
attrs: {
src: 'https://x.com/e',
provider: 'iframe',
align: 'left',
width: 600,
height: 400,
},
},
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe" data-align="left" data-width="600" data-height="400"></div>',
);
expect(md2).toBe(md1);
const embed = findFirst(doc2, 'embed');
expect(embed).not.toBeNull();
expect(embed.attrs.src).toBe('https://x.com/e');
expect(embed.attrs.provider).toBe('iframe');
expect(embed.attrs.align).toBe('left');
// data-width / data-height parseHTML -> getAttribute() -> STRINGS.
expect(embed.attrs.width).toBe('600');
expect(embed.attrs.height).toBe('400');
});
// 7. minimal embed (only src+provider) -----------------------------------
it('minimal embed (src+provider): NOT byte-stable; defaults width/height materialize as NUMBERS 800/600', async () => {
const doc = mkDoc([
{ type: 'embed', attrs: { src: 'https://x.com/e', provider: 'iframe' } },
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe"></div>',
);
// embed has non-null schema defaults align='center', width=800, height=600
// that the converter never emits on export #1 but materialize on import, so
// export #2 grows by three data-* attrs (a one-time divergence).
expect(md2).toBe(
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe" data-align="center" data-width="800" data-height="600"></div>',
);
expect(md2).not.toBe(md1);
const embed = findFirst(doc2, 'embed');
expect(embed).not.toBeNull();
expect(embed.attrs.align).toBe('center');
// NOTE: these come from the addAttributes default (NOT parseHTML), so on the
// FIRST import they are the NUMBERS 800/600, not strings — parseHTML only
// runs when the attribute is actually present on the imported element.
expect(embed.attrs.width).toBe(800);
expect(embed.attrs.height).toBe(600);
});
// 8. youtube with src+width+height+align ---------------------------------
it('youtube with src+width+height+align(right): byte-stable; width/height STRINGS; canonical FALSE', async () => {
const doc = mkDoc([
{
type: 'youtube',
attrs: {
src: 'https://youtu.be/abc',
width: 560,
height: 315,
align: 'right',
},
},
]);
const { md1, md2, doc2 } = await roundTrip(doc);
expect(md1).toBe(
'<div data-type="youtube" data-src="https://youtu.be/abc" data-width="560" data-height="315" data-align="right"></div>',
);
expect(md2).toBe(md1);
const yt = findFirst(doc2, 'youtube');
expect(yt).not.toBeNull();
expect(yt.attrs.src).toBe('https://youtu.be/abc');
expect(yt.attrs.align).toBe('right');
// data-width / data-height parseHTML -> getAttribute() -> STRINGS.
expect(yt.attrs.width).toBe('560');
expect(yt.attrs.height).toBe('315');
// Numeric width/height coerced to strings; align='right' is non-default so
// it is kept (not in KNOWN_DEFAULTS.youtube's normalization). Canonical FALSE.
expect(docsCanonicallyEqual(doc, doc2)).toBe(false);
});
});