From 8b14a2e7b1f419ba468392a47b6d53e537d443e4 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Tue, 23 Jun 2026 06:50:20 +0300 Subject: [PATCH] test(git-sync): exhaustive converter coverage + fix 3 round-trip data-loss bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coder↔reviewer design loop (9 rounds, reviewer verdict: exhaustive) produced 92 specs; implemented +123 tests (465 -> 588 passing). The new round-trip coverage exposed three genuine data-loss bugs in the Markdown<->ProseMirror converter, all now FIXED (round-trip is lossless for these): 1. pageBreak was lost on export (no converter case -> rendered to "" and the node vanished). Now emits
, which the schema parses back -> round-trips. 2. A block image between blocks left an empty

artifact after import-hoisting, producing a phantom blank-gap diff on every sync. markdownToProseMirror now strips content-less paragraphs after generateJSON — with a schema-validity guard that keeps the obligatory single empty paragraph in `content: "block+"` containers (tableCell/tableHeader/blockquote/column/callout/doc), so empty cells/quotes never become an invalid `content: []`. 3. The `code` mark combined with another mark was not byte-stable (emitted nested HTML that the schema's `code` `excludes:"_"` collapsed on import). The converter now emits code-only when `code` co-occurs, matching the editor. New coverage spans media/diagram/details/columns/math/mention attribute round-trips, converter emission branches, git error paths, and engine decision branches. A dedicated test pins the empty-container schema validity (the review catch on the bug-2 fix). Co-Authored-By: Claude Opus 4.8 --- .../git-sync/src/lib/markdown-converter.ts | 33 +- .../src/lib/markdown-to-prosemirror.ts | 41 +- .../git-sync/test/apply-pull-actions.test.ts | 247 +++++++ .../git-sync/test/diagram-roundtrip.test.ts | 109 +++ .../test/docmost-schema-attrs.test.ts | 75 ++ .../git-sync/test/git-error-paths.test.ts | 198 ++++++ .../test/git-integration-gaps.test.ts | 89 +++ .../test/markdown-converter-gaps.test.ts | 638 +++++++++++++++++- .../test/markdown-converter-golden.test.ts | 163 +++++ .../markdown-converter-html-marks.test.ts | 223 ++++++ .../git-sync/test/markdown-converter.test.ts | 154 ++++- .../test/markdown-roundtrip.property.test.ts | 6 +- .../test/markdown-to-prosemirror-gaps.test.ts | 296 ++++++++ .../git-sync/test/media-roundtrip.test.ts | 275 ++++++++ packages/git-sync/test/roundtrip.test.ts | 139 ++++ packages/git-sync/test/run-push.test.ts | 136 ++++ packages/git-sync/test/sanitize.test.ts | 73 ++ .../strip-empty-paragraphs-validity.test.ts | 57 ++ 18 files changed, 2902 insertions(+), 50 deletions(-) create mode 100644 packages/git-sync/test/diagram-roundtrip.test.ts create mode 100644 packages/git-sync/test/docmost-schema-attrs.test.ts create mode 100644 packages/git-sync/test/git-error-paths.test.ts create mode 100644 packages/git-sync/test/markdown-converter-html-marks.test.ts create mode 100644 packages/git-sync/test/media-roundtrip.test.ts create mode 100644 packages/git-sync/test/strip-empty-paragraphs-validity.test.ts diff --git a/packages/git-sync/src/lib/markdown-converter.ts b/packages/git-sync/src/lib/markdown-converter.ts index cbaa7042..7cd4b320 100644 --- a/packages/git-sync/src/lib/markdown-converter.ts +++ b/packages/git-sync/src/lib/markdown-converter.ts @@ -68,21 +68,21 @@ export function convertProseMirrorToMarkdown(content: any): string { let textContent = node.text || ""; // Apply marks (bold, italic, code, etc.) if (node.marks) { - // Markdown code spans (`...`) cannot carry inner formatting, so when a - // run has the `code` mark alongside ANY other mark, backtick syntax - // would leak literal ** / []() into the code text. In that case emit - // nested HTML ( innermost, the other marks wrapping it as HTML) - // so the output is at least well-formed and re-parseable. - // - // NOTE: this does NOT round-trip both marks. The schema's `code` mark - // has `excludes: "_"` (it excludes every other mark), so on import the - // co-occurring mark is always dropped — the run comes back as `code` - // only. We keep the emission simple and accept that the other mark is - // lost; preserving both is impossible while `code` excludes them. - // Only use the backtick form when `code` is the sole mark. + // The schema's `code` mark declares `excludes: "_"` — it excludes every + // other inline mark — so the editor can NEVER produce a text run that + // carries `code` together with another mark, and on import any + // co-occurring mark is always dropped (the run comes back as code-only). + // The lossless, byte-stable behavior is therefore: when a run has the + // `code` mark, emit ONLY the backtick code span and ignore every other + // mark, so md1 is already code-only and md2 === md1. Runs WITHOUT a code + // mark are rendered exactly as before. const markTypes = node.marks.map((m: any) => m.type); const hasCode = markTypes.includes("code"); - const codeCombined = hasCode && markTypes.length > 1; + if (hasCode) { + textContent = `\`${textContent}\``; + return textContent; + } + const codeCombined = false; for (const mark of node.marks) { switch (mark.type) { case "bold": @@ -571,6 +571,13 @@ export function convertProseMirrorToMarkdown(content: any): string { return `

${inner}
`; } + case "pageBreak": + // Emit the schema-matching div[data-type="pageBreak"] so marked passes + // it through as a block and generateJSON rebuilds the pageBreak atom. + // Without this case the node fell through to `default` and rendered "" + // (the divider silently disappeared and could not round-trip). + return `
`; + case "subpages": return "{{SUBPAGES}}"; diff --git a/packages/git-sync/src/lib/markdown-to-prosemirror.ts b/packages/git-sync/src/lib/markdown-to-prosemirror.ts index 82a765d7..a9305902 100644 --- a/packages/git-sync/src/lib/markdown-to-prosemirror.ts +++ b/packages/git-sync/src/lib/markdown-to-prosemirror.ts @@ -337,6 +337,44 @@ function bridgeTaskLists(html: string): string { return document.body.innerHTML; } +/** + * Recursively strip content-less paragraph nodes from a generated doc. + * + * A block-level atom whose markdown form is INLINE (e.g. the block `image`'s + * `![](url)`, or a bare media element) is wrapped by marked in a

; the schema + * then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph + * sibling. On the next export that empty `

` renders to "" and the doc "\n\n" + * join injects a phantom blank gap, so the markdown is not byte-stable. + * + * Markdown blank lines are separators, never content, so generateJSON only ever + * produces an empty paragraph as such a hoist artifact — removing them is safe + * and general (it also subsumes the

-wrapper workaround the `video` case + * uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent + * or an empty array; every other node (including atoms without `content`) is + * preserved, and we recurse into the content of any node that has children. + */ +function stripEmptyParagraphs(node: any): any { + if (!node || !Array.isArray(node.content)) { + // Atom / leaf node (no children to recurse into): keep as-is. + return node; + } + const mapped = node.content.map((child: any) => stripEmptyParagraphs(child)); + const isEmptyParagraph = (child: any): boolean => + !!child && + child.type === "paragraph" && + (!Array.isArray(child.content) || child.content.length === 0); + const filtered = mapped.filter((child: any) => !isEmptyParagraph(child)); + // Schema-validity guard: several nodes require NON-empty block content + // (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout, + // and the doc root). For an empty one of those, generateJSON materializes a + // single empty paragraph as its OBLIGATORY content — that is not a hoist + // artifact. If stripping would empty the container, keep ONE empty paragraph + // so the result stays schema-valid (an empty cell/quote must not become `[]`). + const cleaned = + filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered; + return { ...node, content: cleaned }; +} + /** Convert markdown to a ProseMirror doc using the full Docmost schema. */ export async function markdownToProseMirror( markdownContent: string, @@ -345,5 +383,6 @@ export async function markdownToProseMirror( const withCallouts = await preprocessCallouts(markdownContent); const html = await marked.parse(withCallouts); const bridged = bridgeTaskLists(html); - return generateJSON(bridged, docmostExtensions); + const doc = generateJSON(bridged, docmostExtensions); + return stripEmptyParagraphs(doc); } diff --git a/packages/git-sync/test/apply-pull-actions.test.ts b/packages/git-sync/test/apply-pull-actions.test.ts index 1b7276fd..4af46451 100644 --- a/packages/git-sync/test/apply-pull-actions.test.ts +++ b/packages/git-sync/test/apply-pull-actions.test.ts @@ -415,3 +415,250 @@ describe('applyPullActions — merge result is surfaced, not swallowed', () => { expect(res.merge.conflict).toBe(false); }); }); + +// =========================================================================== +// R-Pull-2 coverage gaps (review-driven): the suppression warning FORKS for +// `empty-live` and `mass-delete` reasons (pull.ts 278-290), and the +// fault-tolerant `removePath` catch branch (pull.ts 354-364) where `deps.rm` +// REJECTS. The existing block above only exercises the `incomplete-fetch` +// reason and an rm that always succeeds. +// +// Helper: build a deps object whose `rm` rejects for a chosen set of absolute +// paths and resolves otherwise. We override the recording fs's `rm` (a vi.fn) +// in place so `fs.rms` still records the SUCCESSFUL calls only (a rejecting rm +// throws before pushing), matching the real `node:fs/promises` semantics where +// a thrown rm performed no removal. +function makeFsWithRejectingRm(rejectFor: Set) { + const base = makeFs(); + base.fs.rm = vi.fn(async (abs: string) => { + if (rejectFor.has(abs)) { + throw new Error(`rm failed for ${abs}`); + } + base.rms.push(abs); + }); + return base; +} + +describe('applyPullActions — suppression warning forks (empty-live / mass-delete)', () => { + it('emits the empty-live warning (with existingCount) and performs no removals', async () => { + // SPEC §8 empty-live fork: live fetch returned 0 pages but files are + // tracked. Mirrors the incomplete-fetch suppression test, but the message + // text + its `existingCount` interpolation are a DISTINCT branch. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'p1', relPath: 'A.md' }], + toDelete: [], // suppressed -> already empty + deletionDecision: { apply: false, reason: 'empty-live' }, + plannedDeleteCount: 3, + existingCount: 4, + }), + VAULT, + ); + + expect(res.deleted).toBe(0); + expect(fs.rms).toEqual([]); + // The empty-live message names the tracked-file count and "deletions + // suppressed". + expect(console.warn).toHaveBeenCalledWith( + expect.stringMatching(/live fetch returned 0 pages but 4 file\(s\) are tracked/), + ); + expect(console.warn).toHaveBeenCalledWith( + expect.stringMatching(/deletions suppressed/), + ); + }); + + it('emits the mass-delete guard warning (with planned AND existing counts) and performs no removals', async () => { + // SPEC §8 mass-delete fork (the final else branch): the message + // interpolates BOTH plannedDeleteCount and existingCount ("would delete N + // of M"), distinct from the other two suppression messages. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'p1', relPath: 'A.md' }], + toDelete: [], + deletionDecision: { apply: false, reason: 'mass-delete' }, + plannedDeleteCount: 5, + existingCount: 6, + }), + VAULT, + ); + + expect(res.deleted).toBe(0); + expect(fs.rms).toEqual([]); + expect(console.warn).toHaveBeenCalledWith( + expect.stringMatching(/plan would delete 5 of 6 tracked file\(s\) \(mass-delete guard\)/), + ); + expect(console.warn).toHaveBeenCalledWith( + expect.stringMatching(/deletions suppressed/), + ); + }); +}); + +describe('applyPullActions — removePath fault tolerance (rm REJECTS)', () => { + it('does NOT reject, logs the failure, and does not count the failed removal', async () => { + // pull.ts 354-364: when `deps.rm` throws, removePath logs via console.error + // and returns false; the run continues. Existing delete tests use an rm + // that always succeeds, leaving this catch branch uncovered. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFsWithRejectingRm(new Set(['/vault/Dead.md'])); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [], + toDelete: ['Dead.md'], + deletionDecision: APPLY, + plannedDeleteCount: 1, + existingCount: 1, + }), + VAULT, + ); + + // Resolved (not rejected) — the pull is fault-tolerant. + expect(res.deleted).toBe(0); + // removePath's catch logs "pull: failed to delete Dead.md: ...". + expect(console.error).toHaveBeenCalledWith( + expect.stringMatching(/failed to .* Dead\.md/), + expect.anything(), + ); + // The (would-be) removal never succeeded, so nothing was recorded. + expect(fs.rms).toEqual([]); + }); + + it('counts ONLY successful removals on a partial-failure delete batch (1 reject of 3)', async () => { + // pull.ts 388-391 increments `deleted` only when removePath returns true. + // Here Dead1/Dead3 succeed and Dead2 rejects -> deleted === 2, and the + // deleted>0 subject branch (399-400) fires with written=0. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFsWithRejectingRm(new Set(['/vault/Dead2.md'])); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [], + moved: [], + toDelete: ['Dead1.md', 'Dead2.md', 'Dead3.md'], + deletionDecision: APPLY, + plannedDeleteCount: 3, + existingCount: 5, + }), + VAULT, + ); + + expect(res.deleted).toBe(2); + expect(fs.rms).toEqual(['/vault/Dead1.md', '/vault/Dead3.md']); + expect(g.committedSubject).toBe('docmost: sync 0 page(s), 2 deleted'); + // Exactly one rejection was logged (Dead2.md). + expect(console.error).toHaveBeenCalledTimes(1); + expect(console.error).toHaveBeenCalledWith( + expect.stringMatching(/failed to .* Dead2\.md/), + expect.anything(), + ); + // The run still reached commit + checkout + merge. + expect(g.order).toEqual([ + 'stageAll', + 'commit:docmost: sync 0 page(s), 2 deleted', + 'checkout:main', + 'merge', + ]); + }); +}); + +describe('applyPullActions — move old-path removal rejects vs move-write fails', () => { + it('a move old-path rm REJECTION does not increment movedApplied but an independent delete still succeeds', async () => { + // pull.ts 383 increments movedApplied only when removePath of the old path + // succeeds. Here the new-path write SUCCEEDS (so the page is not in + // failedPageIds and the move loop proceeds to rm) but the old-path rm + // REJECTS — distinct from the move-write-failure guard at 376. An absence + // delete in the same run must still succeed independently. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFsWithRejectingRm(new Set(['/vault/Old/M.md'])); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + toDelete: ['Dead.md'], + deletionDecision: APPLY, + plannedDeleteCount: 1, + existingCount: 3, + }), + VAULT, + ); + + expect(res.written).toBe(1); + expect(res.movedApplied).toBe(0); // old-path rm failed -> not counted + expect(res.deleted).toBe(1); // independent absence delete still succeeded + expect(fs.rms).toEqual(['/vault/Dead.md']); // Old/M.md rm threw, not recorded + expect(g.committedSubject).toBe('docmost: sync 1 page(s), 1 deleted'); + // The failure log named the moved old path. + expect(console.error).toHaveBeenCalledWith( + expect.stringMatching(/failed to .* Old\/M\.md/), + expect.anything(), + ); + }); + + it('a move-write FAILURE keeps the old path: rm is never attempted for it (data-loss guard, 374-383)', async () => { + // Distinct branch from the move-old-path rm rejection above: here the + // new-path WRITE itself fails, so `m` enters failedPageIds and the move + // loop short-circuits at line 376 BEFORE calling rm — emitting a + // console.warn and PRESERVING the old path (the only copy). + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs({ failWriteFor: new Set(['/vault/New/M.md']) }); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + toDelete: [], + deletionDecision: APPLY, + plannedDeleteCount: 0, + existingCount: 1, + }), + VAULT, + ); + + expect(res.written).toBe(0); + expect(res.movedApplied).toBe(0); + // The old path was NEVER removed (rm not even attempted for it). + expect(fs.fs.rm).not.toHaveBeenCalledWith('/vault/Old/M.md'); + expect(fs.rms).toEqual([]); + // The "keeping old path" warning fired exactly once for `m`. + const warnCalls = (console.warn as unknown as ReturnType).mock.calls + .map((c: unknown[]) => String(c[0])) + .filter((m: string) => m.includes('move write for m failed')); + expect(warnCalls.length).toBe(1); + expect(warnCalls[0]).toContain('keeping old path Old/M.md'); + // deleted === 0 -> no ", N deleted" suffix. + expect(g.committedSubject).toBe('docmost: sync 0 page(s)'); + }); +}); diff --git a/packages/git-sync/test/diagram-roundtrip.test.ts b/packages/git-sync/test/diagram-roundtrip.test.ts new file mode 100644 index 00000000..d8ffc5fe --- /dev/null +++ b/packages/git-sync/test/diagram-roundtrip.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it } from 'vitest'; +import { + convertProseMirrorToMarkdown, + markdownToProseMirror, + docsCanonicallyEqual, +} from 'docmost-client'; + +// Helper mirroring the convention in markdown-converter.test.ts: wrap atoms in +// a top-level doc node so convertProseMirrorToMarkdown (which requires +// content.content) walks them. +const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes }); + +describe('diagram round-trip (docmost-schema diagramAttributes)', () => { + // SPEC case 1: drawio carrying the full numeric-attr surface + // (data-width/data-height/data-size/data-aspect-ratio) that it shares with + // audio/video/pdf but which no fixture exercises on a diagram node. + it('drawio round-trips numeric attrs, coercing number -> string via getAttribute', async () => { + const input = doc({ + type: 'drawio', + attrs: { + src: '/d.drawio', + attachmentId: 'att-1', + width: 640, + height: 480, + size: 1234, + aspectRatio: 1.777, + align: 'center', + }, + }); + + const md1 = convertProseMirrorToMarkdown(input); + const doc2 = await markdownToProseMirror(md1); + const md2 = convertProseMirrorToMarkdown(doc2); + + // Exact serialized form: numbers render as bare data-* values; attribute + // order follows the converter's emit order (src, then width/height/size/ + // aspect-ratio/align, then attachment-id). + expect(md1).toBe( + '
', + ); + + // A second export reproduces the first byte-for-byte (drawio align default + // is already "center", so nothing new materializes on import). + expect(md2).toBe(md1); + + // Re-import coerces every numeric attr to a STRING because parseHTML reads + // them via getAttribute(). This is the gap the reviewer flagged: the + // number -> string coercion on a diagram node is otherwise untested. + const attrs2 = doc2.content[0].attrs; + expect(attrs2.width).toBe('640'); + expect(attrs2.height).toBe('480'); + expect(attrs2.size).toBe('1234'); + expect(attrs2.aspectRatio).toBe('1.777'); + expect(typeof attrs2.width).toBe('string'); + expect(typeof attrs2.aspectRatio).toBe('string'); + // String attrs pass through unchanged. + expect(attrs2.align).toBe('center'); + expect(attrs2.attachmentId).toBe('att-1'); + + // Canonically NOT equal: the numeric -> string coercion survives + // canonicalization (only align='center' is normalized away via + // KNOWN_DEFAULTS.drawio), so 640 !== '640' makes the docs differ. + expect(docsCanonicallyEqual(input, doc2)).toBe(false); + }); + + // SPEC case 2: minimal excalidraw atom with ONLY string attrs (no align, no + // numeric attrs). Locks the one-time export divergence (align='center' + // default materializes only on import) plus escapeAttr of title/alt through + // the data-title/data-alt path. + it('excalidraw materializes align default only on import and escapes title/alt', async () => { + const input = doc({ + type: 'excalidraw', + attrs: { + src: '/e.excalidraw', + title: 'My "Diagram"', + alt: 'a&b', + }, + }); + + const md1 = convertProseMirrorToMarkdown(input); + const doc2 = await markdownToProseMirror(md1); + const md2 = convertProseMirrorToMarkdown(doc2); + + // First export: no align emitted (the input doc carries no align), and the + // " in title becomes ", the & in alt becomes & via escapeAttr. + expect(md1).toBe( + '
', + ); + + // Second export: align='center' has now materialized (the schema's + // diagramAttributes default), so md2 gains a data-align="center" suffix and + // is NOT byte-equal to md1. This one-time divergence is the diagram quirk. + expect(md2).toBe( + '
', + ); + expect(md2).not.toBe(md1); + + // Re-import decodes the escaped entities back to the original characters. + const attrs2 = doc2.content[0].attrs; + expect(attrs2.title).toBe('My "Diagram"'); + expect(attrs2.alt).toBe('a&b'); + expect(attrs2.align).toBe('center'); + + // Canonically EQUAL: align='center' is normalized away via + // KNOWN_DEFAULTS.excalidraw, and title/alt are non-default strings that + // survive on both sides, so the docs are semantically equal. + expect(docsCanonicallyEqual(input, doc2)).toBe(true); + }); +}); diff --git a/packages/git-sync/test/docmost-schema-attrs.test.ts b/packages/git-sync/test/docmost-schema-attrs.test.ts new file mode 100644 index 00000000..6b9f5d8d --- /dev/null +++ b/packages/git-sync/test/docmost-schema-attrs.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from 'vitest'; +import { + sanitizeCssColor, + clampCalloutType, +} from '../src/lib/docmost-schema.js'; + +// These tests pin the two security/normalization helpers that Docmost +// interpolates into inline style and the callout banner type on re-render. +// They are the allowlist guard (XSS/style-breakout boundary) and the +// case-insensitive callout normalizer, both otherwise only exercised +// indirectly through parseHTML/renderHTML. + +describe('sanitizeCssColor', () => { + it('accepts a plain named color unchanged', () => { + expect(sanitizeCssColor('red')).toBe('red'); + }); + + it('accepts 3-digit and 6-digit hex colors unchanged', () => { + expect(sanitizeCssColor('#abc')).toBe('#abc'); + expect(sanitizeCssColor('#aabbcc')).toBe('#aabbcc'); + }); + + it('accepts well-formed functional notation unchanged', () => { + expect(sanitizeCssColor('rgb(1,2,3)')).toBe('rgb(1,2,3)'); + expect(sanitizeCssColor('rgba(0,0,0,0.5)')).toBe('rgba(0,0,0,0.5)'); + expect(sanitizeCssColor('hsl(120,50%,50%)')).toBe('hsl(120,50%,50%)'); + }); + + it('trims surrounding whitespace before matching', () => { + // ' blue ' trims to 'blue', which is a valid named color. + expect(sanitizeCssColor(' blue ')).toBe('blue'); + }); + + it('rejects a style-injection payload (returns null)', () => { + expect(sanitizeCssColor('red; --x: url(x)')).toBeNull(); + }); + + it('rejects an attribute-breakout payload (returns null)', () => { + expect(sanitizeCssColor('red">