From 81452141f73cfc795d9604b1fdfdffa47d51d1b2 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Tue, 23 Jun 2026 06:56:29 +0300 Subject: [PATCH] build(git-sync): rebuild committed build/ to match the converter fixes This branch commits packages/git-sync/build/ and the server/Docker consume it, so the stale build/ would otherwise ship WITHOUT the round-trip data-loss fixes in 7d39c16b. Rebuilt via tsc (only the two changed modules). NOTE: not committing build/ at all (review finding #2) is the proper fix, pending the CI/Docker build-orchestration change. Co-Authored-By: Claude Opus 4.8 --- .../git-sync/build/lib/markdown-converter.js | 32 +++++++++------- .../build/lib/markdown-to-prosemirror.js | 38 ++++++++++++++++++- 2 files changed, 56 insertions(+), 14 deletions(-) diff --git a/packages/git-sync/build/lib/markdown-converter.js b/packages/git-sync/build/lib/markdown-converter.js index 3a691ec0..73a49776 100644 --- a/packages/git-sync/build/lib/markdown-converter.js +++ b/packages/git-sync/build/lib/markdown-converter.js @@ -61,21 +61,21 @@ function convertProseMirrorToMarkdown(content) { let textContent = node.text || ""; // Apply marks (bold, italic, code, etc.) if (node.marks) { - // Markdown code spans (`...`) cannot carry inner formatting, so when a - // run has the `code` mark alongside ANY other mark, backtick syntax - // would leak literal ** / []() into the code text. In that case emit - // nested HTML ( innermost, the other marks wrapping it as HTML) - // so the output is at least well-formed and re-parseable. - // - // NOTE: this does NOT round-trip both marks. The schema's `code` mark - // has `excludes: "_"` (it excludes every other mark), so on import the - // co-occurring mark is always dropped — the run comes back as `code` - // only. We keep the emission simple and accept that the other mark is - // lost; preserving both is impossible while `code` excludes them. - // Only use the backtick form when `code` is the sole mark. + // The schema's `code` mark declares `excludes: "_"` — it excludes every + // other inline mark — so the editor can NEVER produce a text run that + // carries `code` together with another mark, and on import any + // co-occurring mark is always dropped (the run comes back as code-only). + // The lossless, byte-stable behavior is therefore: when a run has the + // `code` mark, emit ONLY the backtick code span and ignore every other + // mark, so md1 is already code-only and md2 === md1. Runs WITHOUT a code + // mark are rendered exactly as before. const markTypes = node.marks.map((m) => m.type); const hasCode = markTypes.includes("code"); - const codeCombined = hasCode && markTypes.length > 1; + if (hasCode) { + textContent = `\`${textContent}\``; + return textContent; + } + const codeCombined = false; for (const mark of node.marks) { switch (mark.type) { case "bold": @@ -519,6 +519,12 @@ function convertProseMirrorToMarkdown(content) { const inner = nodeContent.map((n) => blockToHtml(n)).join(""); return `
${inner}
`; } + case "pageBreak": + // Emit the schema-matching div[data-type="pageBreak"] so marked passes + // it through as a block and generateJSON rebuilds the pageBreak atom. + // Without this case the node fell through to `default` and rendered "" + // (the divider silently disappeared and could not round-trip). + return `
`; case "subpages": return "{{SUBPAGES}}"; default: diff --git a/packages/git-sync/build/lib/markdown-to-prosemirror.js b/packages/git-sync/build/lib/markdown-to-prosemirror.js index de430ce8..6e0e1879 100644 --- a/packages/git-sync/build/lib/markdown-to-prosemirror.js +++ b/packages/git-sync/build/lib/markdown-to-prosemirror.js @@ -302,11 +302,47 @@ function bridgeTaskLists(html) { } return document.body.innerHTML; } +/** + * Recursively strip content-less paragraph nodes from a generated doc. + * + * A block-level atom whose markdown form is INLINE (e.g. the block `image`'s + * `![](url)`, or a bare media element) is wrapped by marked in a

; the schema + * then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph + * sibling. On the next export that empty `

` renders to "" and the doc "\n\n" + * join injects a phantom blank gap, so the markdown is not byte-stable. + * + * Markdown blank lines are separators, never content, so generateJSON only ever + * produces an empty paragraph as such a hoist artifact — removing them is safe + * and general (it also subsumes the

-wrapper workaround the `video` case + * uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent + * or an empty array; every other node (including atoms without `content`) is + * preserved, and we recurse into the content of any node that has children. + */ +function stripEmptyParagraphs(node) { + if (!node || !Array.isArray(node.content)) { + // Atom / leaf node (no children to recurse into): keep as-is. + return node; + } + const mapped = node.content.map((child) => stripEmptyParagraphs(child)); + const isEmptyParagraph = (child) => !!child && + child.type === "paragraph" && + (!Array.isArray(child.content) || child.content.length === 0); + const filtered = mapped.filter((child) => !isEmptyParagraph(child)); + // Schema-validity guard: several nodes require NON-empty block content + // (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout, + // and the doc root). For an empty one of those, generateJSON materializes a + // single empty paragraph as its OBLIGATORY content — that is not a hoist + // artifact. If stripping would empty the container, keep ONE empty paragraph + // so the result stays schema-valid (an empty cell/quote must not become `[]`). + const cleaned = filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered; + return { ...node, content: cleaned }; +} /** Convert markdown to a ProseMirror doc using the full Docmost schema. */ async function markdownToProseMirror(markdownContent) { const marked = await loadMarked(); const withCallouts = await preprocessCallouts(markdownContent); const html = await marked.parse(withCallouts); const bridged = bridgeTaskLists(html); - return (0, html_1.generateJSON)(bridged, docmost_schema_1.docmostExtensions); + const doc = (0, html_1.generateJSON)(bridged, docmost_schema_1.docmostExtensions); + return stripEmptyParagraphs(doc); }