From 78953cf775b15f5655a553379e20e6c59101defd Mon Sep 17 00:00:00 2001 From: claude_code Date: Sun, 28 Jun 2026 14:48:31 +0300 Subject: [PATCH 1/5] fix(#244 Part A): close two HIGH data-loss bugs PR #230 only documented MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mdrt-2 (markdown export): add lossless turndown rules for the custom nodes that had no rule — transclusionReference, pageBreak, mention, status. Each re-emits the node as inert raw HTML carrying every data-* attribute instead of being silently dropped (childless atom divs) or collapsed to bare text (mention/status losing data-id/data-color). Empty atom blocks are made non-blank before turndown's blank-rule strips them (mirrors the footnote-ref fix). markdownToHtml passes the raw HTML through and each node's parseHTML rebuilds it, so the form round-trips. Flips the it.fails cases to passing and adds export + import round-trip coverage. persist-6 (collab store): add a store-side empty-guard in onStoreDocument. Before updatePage, if the serialized live doc is an empty paragraph doc AND the persisted page is non-empty, skip the write and log — unless an explicit context.intentionalClear signal is present (deliberate select-all+delete). New/empty pages and unchanged docs are unaffected. Flips the it.failing case to passing and adds escape-hatch + empty-over-empty coverage. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../extensions/persistence-store.spec.ts | 84 ++++++++---- .../extensions/persistence.extension.ts | 27 ++++ .../markdown/utils/turndown.dataloss.test.ts | 120 ++++++++++------- .../src/lib/markdown/utils/turndown.utils.ts | 121 +++++++++++++++++- 4 files changed, 285 insertions(+), 67 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence-store.spec.ts b/apps/server/src/collaboration/extensions/persistence-store.spec.ts index 8bc713bf..c8ac68b9 100644 --- a/apps/server/src/collaboration/extensions/persistence-store.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts @@ -205,31 +205,69 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot' expect(historyQueue.add).toHaveBeenCalledTimes(1); }); - // #206 persist-6 — RED (it.failing): a momentarily-empty live Y.Doc must not - // overwrite non-empty persisted content. `onStoreDocument` empty-guards the - // LOAD path but not the STORE path, so today an empty doc (a client/agent - // glitch, a bad merge, an emptying transclusion) is written straight over the - // page and the content is wiped silently. A store-side empty-guard is a real - // behaviour change (a deliberate "select-all + delete" is also empty), so it - // is left UNFIXED pending a product decision; this documents the data-loss - // path and flips to a normal passing test the moment the guard lands. - it.failing( - 'does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)', - async () => { - const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; - const document = ydocFor(emptyDoc); - pageRepo.findById.mockResolvedValue({ - ...persistedHumanPage('IGNORED'), - content: doc('IMPORTANT RICH CONTENT'), - }); + // #206 persist-6 — FIXED: a momentarily-empty live Y.Doc must not overwrite + // non-empty persisted content. `onStoreDocument` empty-guarded the LOAD path + // but not the STORE path, so an empty doc (a client/agent glitch, a bad + // merge, an emptying transclusion) was written straight over the page and the + // content was wiped silently. The store-side empty-guard now skips the write + // when the incoming doc is empty and the stored page is non-empty, unless an + // explicit intentional-clear signal is present. + it('does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)', async () => { + const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; + const document = ydocFor(emptyDoc); + pageRepo.findById.mockResolvedValue({ + ...persistedHumanPage('IGNORED'), + content: doc('IMPORTANT RICH CONTENT'), + }); - await ext.onStoreDocument(buildData(document, 'user') as any); + await ext.onStoreDocument(buildData(document, 'user') as any); - // Desired contract: the empty incoming doc is rejected and the rich page - // survives. Today updatePage is called with the empty content (data loss). - expect(pageRepo.updatePage).not.toHaveBeenCalled(); - }, - ); + // The empty incoming doc is rejected and the rich page survives. + expect(pageRepo.updatePage).not.toHaveBeenCalled(); + // No false-success side effects for a write that never happened. + expect((document as any).broadcastStateless).not.toHaveBeenCalled(); + expect(historyQueue.add).not.toHaveBeenCalled(); + }); + + // persist-6 — a legitimate clear is NOT broken: with the explicit + // intentional-clear signal, emptying a non-empty page still persists. + it('persists an intentional clear of a non-empty page (persist-6 escape hatch)', async () => { + const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; + const document = ydocFor(emptyDoc); + pageRepo.findById.mockResolvedValue({ + ...persistedHumanPage('IGNORED'), + content: doc('IMPORTANT RICH CONTENT'), + }); + + await ext.onStoreDocument({ + documentName: `page.${PAGE_ID}`, + document, + context: { + user: { id: USER_ID, name: 'Alice' }, + actor: 'user', + intentionalClear: true, + }, + } as any); + + expect(pageRepo.updatePage).toHaveBeenCalledTimes(1); + }); + + // persist-6 — a brand-new / already-empty page is unaffected: an empty store + // over empty stored content is not blocked (it short-circuits as unchanged). + it('does not block an empty store over an already-empty page (persist-6)', async () => { + const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; + const document = ydocFor(emptyDoc); + const normalized = TiptapTransformer.fromYdoc(document, 'default'); + pageRepo.findById.mockResolvedValue({ + ...persistedHumanPage('IGNORED'), + content: normalized, + }); + + await ext.onStoreDocument(buildData(document, 'user') as any); + + // Unchanged empty-over-empty: short-circuits, no spurious write, no error. + expect(pageRepo.updatePage).not.toHaveBeenCalled(); + }); // persist-1 — when every attempt fails the hook must NOT report a phantom // success: no "page.updated" badge broadcast and no history snapshot for diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index f802f229..47fadf8f 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -210,6 +210,33 @@ export class PersistenceExtension implements Extension { return; } + // #206 persist-6 — store-side empty-guard. A momentarily-empty live + // Y.Doc (a client/agent glitch, a bad merge, a transclusion that + // emptied) must NOT overwrite non-empty persisted content. The LOAD + // path already guards emptiness (onLoadDocument only hydrates from db + // when the live doc isEmpty); the STORE path did not, so an empty + // serialization was written straight over the page, wiping it + // silently. Skip the write when the incoming doc is an empty + // paragraph doc AND the stored page is non-empty — unless the writer + // sends an explicit intentional-clear signal (a deliberate + // select-all + delete), the one case where emptying is the user's + // intent. New/empty pages are unaffected (stored content is already + // empty), and an unchanged doc was already short-circuited above. + const intentionalClear = context?.intentionalClear === true; + if ( + !intentionalClear && + isEmptyParagraphDoc(tiptapJson as any) && + page.content && + !isEmptyParagraphDoc(page.content as any) + ) { + this.logger.warn( + `Skipping store for ${pageId}: empty live doc would overwrite ` + + `non-empty persisted content (no intentional-clear signal)`, + ); + page = null; + return; + } + let contributorIds = undefined; try { const existingContributors = page.contributorIds || []; diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts index 431c92f2..98fdc03a 100644 --- a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts +++ b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts @@ -1,77 +1,111 @@ import { describe, it, expect } from "vitest"; import { htmlToMarkdown } from "./turndown.utils"; +import { markdownToHtml } from "./marked.utils"; /** - * #206 mdrt-2 — Markdown export must never SILENTLY drop a block. + * #206 mdrt-2 — Markdown export must never SILENTLY drop a block. (FIXED) * - * `htmlToMarkdown` (turndown) only registers rules for a fixed set of custom - * nodes (callout, taskItem, details, math, iframe, htmlEmbed, image, video, - * footnote). Any other custom node — `transclusionReference`, `pageBreak`, - * `mention`, `status` — falls through to turndown's default handling: an empty - * wrapper is "blank" and removed, so the block disappears from the exported - * Markdown with no trace. The invariant "never silently lose a block" is broken. + * `htmlToMarkdown` (turndown) historically only registered rules for a fixed + * set of custom nodes (callout, taskItem, details, math, iframe, htmlEmbed, + * image, video, footnote). Any other custom node — `transclusionReference`, + * `pageBreak`, `mention`, `status` — fell through to turndown's default + * handling: an empty wrapper is "blank" and removed, so the block disappeared + * from the exported Markdown with no trace, and `mention`/`status` collapsed to + * bare text, losing their identity (data-id / data-color). The invariant + * "never silently lose a block" was broken. * - * The `it.fails` cases assert the DESIRED contract (the block survives export in - * SOME form) and are RED today: they document the unfixed data loss and flip to - * green the moment a turndown rule (real syntax or a lossless HTML-comment - * placeholder) is added. A normal characterization `it` pins the exact current - * lossy output so the regression is unambiguous. + * The fix adds lossless turndown rules that re-emit each of these nodes as raw + * HTML carrying every `data-*` attribute. Plain-Markdown viewers ignore the + * inert tag; the import path round-trips it (`markdownToHtml` passes the raw + * HTML through and each node's `parseHTML` rebuilds the ProseMirror node). These + * tests assert the surviving contract (the block is preserved AND its identity + * round-trips back through import). */ -describe("htmlToMarkdown — custom nodes without a turndown rule (#206 mdrt-2)", () => { - const wrap = (inner: string) => - `

before

${inner}

after

`; +describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2)", () => { + const wrap = (inner: string) => `

before

${inner}

after

`; - it("CURRENTLY drops a pageBreak entirely (data loss)", () => { + it("preserves a pageBreak block on Markdown export", () => { const md = htmlToMarkdown( wrap('
'), ); - // The page break vanishes: only the two paragraphs remain, nothing between. expect(md).toContain("before"); expect(md).toContain("after"); - expect(md).not.toMatch(/page-?break/i); - expect(md).not.toContain("---"); // not even a horizontal-rule fallback + // The break survives as an inert raw-HTML tag, not silently dropped. + expect(md).toMatch(/data-type="pageBreak"/); + expect(md).toMatch(/page-?break/i); }); - it("CURRENTLY drops a transclusionReference entirely (data loss)", () => { + it("preserves a transclusionReference's identity on Markdown export", () => { const md = htmlToMarkdown( wrap('
'), ); expect(md).toContain("before"); expect(md).toContain("after"); - // The data-id (the only thing that gives the reference identity) is gone. - expect(md).not.toContain("abc"); + // The data-id (the only thing that gives the reference identity) survives. + expect(md).toContain("abc"); + expect(md).toMatch(/data-type="transclusionReference"/); }); - it.fails( - "should NOT lose a pageBreak block on Markdown export", - () => { + it("preserves a mention's data-id (stable identity) on Markdown export", () => { + const md = htmlToMarkdown( + '

hi @Bob there

', + ); + // The mention keeps its stable identity (data-id), not just the text. + expect(md).toContain("u1"); + expect(md).toContain("Bob"); + expect(md).toMatch(/data-type="mention"/); + }); + + it("preserves a status chip's color on Markdown export", () => { + const md = htmlToMarkdown( + '

s Done

', + ); + // The chip's color (its identity) survives, not just the visible text. + expect(md).toContain("green"); + expect(md).toContain("Done"); + expect(md).toMatch(/data-type="status"/); + }); + + // The export form is only lossless if the import path can rebuild it. These + // assert the full MD -> HTML round-trip restores the node + its attributes, + // which is the marker <-> node contract each `parseHTML` relies on. + describe("import round-trip (markdownToHtml restores the node)", () => { + it("round-trips a pageBreak through export + import", async () => { const md = htmlToMarkdown( wrap('
'), ); - // Desired: the break survives in some form (e.g. a `---` rule or marker). - expect(md).toMatch(/(-{3,}|page-?break)/i); - }, - ); + const html = await markdownToHtml(md); + expect(html).toMatch(/]*data-type="pageBreak"[^>]*>/); + expect(html).toContain("before"); + expect(html).toContain("after"); + }); - it.fails( - "should NOT lose a transclusionReference's identity on Markdown export", - () => { + it("round-trips a transclusionReference (keeps data-id)", async () => { const md = htmlToMarkdown( wrap('
'), ); - // Desired: the referenced id survives so the block can be rebuilt. - expect(md).toContain("abc"); - }, - ); + const html = await markdownToHtml(md); + expect(html).toMatch(/]*data-type="transclusionReference"[^>]*>/); + expect(html).toContain("abc"); + }); - it.fails( - "should NOT lose a mention's data-id on Markdown export", - () => { + it("round-trips a mention (keeps data-id + data-label)", async () => { const md = htmlToMarkdown( '

hi @Bob there

', ); - // Desired: the mention keeps its stable identity (data-id), not just text. - expect(md).toContain("u1"); - }, - ); + const html = await markdownToHtml(md); + expect(html).toMatch(/]*data-type="mention"[^>]*>/); + expect(html).toContain("u1"); + expect(html).toContain("Bob"); + }); + + it("round-trips a status chip (keeps data-color)", async () => { + const md = htmlToMarkdown( + '

s Done

', + ); + const html = await markdownToHtml(md); + expect(html).toMatch(/]*data-type="status"[^>]*>/); + expect(html).toContain("green"); + }); + }); }); diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts index 172786a3..6bb8e456 100644 --- a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts +++ b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts @@ -43,6 +43,54 @@ function fillEmptyFootnoteRefs(html: string): string { ); } +/** + * `pageBreak` and `transclusionReference` are childless atom
s. Like an + * empty footnote ref (see above), turndown treats a childless block as "blank" + * and replaces it with the blankRule BEFORE any custom rule can fire — so the + * node disappears from the export with no trace (#206 mdrt-2). Inject a + * zero-width space so the node is non-blank and our lossless rule runs; the + * rule rebuilds the tag from the element's attributes, so the injected char + * never reaches the output. + */ +function fillEmptyAtomBlocks(html: string): string { + return html.replace( + /]*\bdata-type="(?:pageBreak|transclusionReference)"[^>]*)>\s*<\/div>/gi, + (_m, attrs) => `
`, + ); +} + +/** HTML-escape an attribute value so a re-emitted raw-HTML tag is well-formed. */ +function escapeHtmlAttr(value: string): string { + return value + .replace(/&/g, '&') + .replace(/"/g, '"') + .replace(//g, '>'); +} + +/** HTML-escape text placed inside a re-emitted raw-HTML element. */ +function escapeHtmlText(value: string): string { + return value + .replace(/&/g, '&') + .replace(//g, '>'); +} + +/** + * Serialize ALL of an element's attributes back to a raw-HTML attribute string + * (leading space included). Generic on purpose: a custom node's identity lives + * entirely in its `data-*` attributes (data-id, data-color, data-source-page-id, + * data-transclusion-id, …), and serializing every attribute keeps the export + * lossless regardless of which attributes a given node carries. + */ +function serializeAttrs(node: any): string { + const attrs = node?.attributes; + if (!attrs) return ''; + return Array.from(attrs as ArrayLike<{ name: string; value: string }>) + .map((attr) => ` ${attr.name}="${escapeHtmlAttr(attr.value ?? '')}"`) + .join(''); +} + export function htmlToMarkdown(html: string): string { const turndownService = new TurndownService({ headingStyle: 'atx', @@ -69,12 +117,83 @@ export function htmlToMarkdown(html: string): string { video, footnoteReference, footnotesList, + pageBreak, + transclusionReference, + mention, + status, ]); return turndownService - .turndown(fillEmptyFootnoteRefs(html)) + .turndown(fillEmptyAtomBlocks(fillEmptyFootnoteRefs(html))) .replaceAll('
', ' '); } +/** + * Lossless export rules for custom nodes that have NO native Markdown syntax + * (#206 mdrt-2). Markdown cannot represent a page break, a transclusion + * reference, a mention's stable id, or a status chip's color — so rather than + * letting turndown silently drop them, each rule re-emits the node as raw HTML + * carrying every `data-*` attribute. Plain-Markdown viewers ignore the inert + * tag, and the import path round-trips it: `markdownToHtml` passes raw HTML + * through and each node's `parseHTML` (`div[data-type="…"]`, `span[…]`) rebuilds + * the ProseMirror node with its attributes intact. + */ +function pageBreak(turndownService: _TurndownService) { + turndownService.addRule('pageBreak', { + filter: function (node: HTMLInputElement) { + return ( + node.nodeName === 'DIV' && + node.getAttribute('data-type') === 'pageBreak' + ); + }, + replacement: function (_content: string, node: HTMLInputElement) { + return `\n\n\n\n`; + }, + }); +} + +function transclusionReference(turndownService: _TurndownService) { + turndownService.addRule('transclusionReference', { + filter: function (node: HTMLInputElement) { + return ( + node.nodeName === 'DIV' && + node.getAttribute('data-type') === 'transclusionReference' + ); + }, + replacement: function (_content: string, node: HTMLInputElement) { + return `\n\n\n\n`; + }, + }); +} + +function mention(turndownService: _TurndownService) { + turndownService.addRule('mention', { + filter: function (node: HTMLInputElement) { + return ( + node.nodeName === 'SPAN' && + node.getAttribute('data-type') === 'mention' + ); + }, + replacement: function (_content: string, node: HTMLInputElement) { + const text = escapeHtmlText(node.textContent || ''); + return `${text}`; + }, + }); +} + +function status(turndownService: _TurndownService) { + turndownService.addRule('status', { + filter: function (node: HTMLInputElement) { + return ( + node.nodeName === 'SPAN' && node.getAttribute('data-type') === 'status' + ); + }, + replacement: function (_content: string, node: HTMLInputElement) { + const text = escapeHtmlText(node.textContent || ''); + return `${text}`; + }, + }); +} + /** * Serialize the `htmlEmbed` node to Markdown. * -- 2.49.1 From 78cc0194928085d45e2905381972347b2d0eb791 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sun, 28 Jun 2026 23:45:45 +0300 Subject: [PATCH 2/5] fix(#248 F1): remove dead intentional-clear escape hatch from empty-guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-1 review F1 (maintainer decision: variant A). The store-side empty-guard's `context?.intentionalClear === true` branch was dead: `intentionalClear` is never set in production (connection context is {user, actor, aiChatId}); it appeared only in the guard and a hand-injected spec, so the guard already blocked empty-over-non-empty unconditionally. - persistence.extension.ts: drop the dead branch; the guard now simply skips empty-over-non-empty, full stop. Reference issue #251 (real intentional-clear UX) in the comment where the branch was. - persistence-store.spec.ts: remove the misleading "persists an intentional clear" escape-hatch test (false coverage — green only because the flag was injected by hand). Real guard tests (empty-over-empty allowed, empty-over-non-empty blocked, etc.) kept. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../extensions/persistence-store.spec.ts | 27 ++----------------- .../extensions/persistence.extension.ts | 18 +++++++------ 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence-store.spec.ts b/apps/server/src/collaboration/extensions/persistence-store.spec.ts index c8ac68b9..8cf5fd0d 100644 --- a/apps/server/src/collaboration/extensions/persistence-store.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts @@ -210,8 +210,8 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot' // but not the STORE path, so an empty doc (a client/agent glitch, a bad // merge, an emptying transclusion) was written straight over the page and the // content was wiped silently. The store-side empty-guard now skips the write - // when the incoming doc is empty and the stored page is non-empty, unless an - // explicit intentional-clear signal is present. + // when the incoming doc is empty and the stored page is non-empty. A real + // intentional-clear UX is tracked separately in issue #251. it('does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)', async () => { const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; const document = ydocFor(emptyDoc); @@ -229,29 +229,6 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot' expect(historyQueue.add).not.toHaveBeenCalled(); }); - // persist-6 — a legitimate clear is NOT broken: with the explicit - // intentional-clear signal, emptying a non-empty page still persists. - it('persists an intentional clear of a non-empty page (persist-6 escape hatch)', async () => { - const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; - const document = ydocFor(emptyDoc); - pageRepo.findById.mockResolvedValue({ - ...persistedHumanPage('IGNORED'), - content: doc('IMPORTANT RICH CONTENT'), - }); - - await ext.onStoreDocument({ - documentName: `page.${PAGE_ID}`, - document, - context: { - user: { id: USER_ID, name: 'Alice' }, - actor: 'user', - intentionalClear: true, - }, - } as any); - - expect(pageRepo.updatePage).toHaveBeenCalledTimes(1); - }); - // persist-6 — a brand-new / already-empty page is unaffected: an empty store // over empty stored content is not blocked (it short-circuits as unchanged). it('does not block an empty store over an already-empty page (persist-6)', async () => { diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 47fadf8f..0739a8c6 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -217,21 +217,23 @@ export class PersistenceExtension implements Extension { // when the live doc isEmpty); the STORE path did not, so an empty // serialization was written straight over the page, wiping it // silently. Skip the write when the incoming doc is an empty - // paragraph doc AND the stored page is non-empty — unless the writer - // sends an explicit intentional-clear signal (a deliberate - // select-all + delete), the one case where emptying is the user's - // intent. New/empty pages are unaffected (stored content is already - // empty), and an unchanged doc was already short-circuited above. - const intentionalClear = context?.intentionalClear === true; + // paragraph doc AND the stored page is non-empty. New/empty pages are + // unaffected (stored content is already empty), and an unchanged doc + // was already short-circuited above. + // + // This unconditionally blocks empty-over-non-empty: a deliberate + // select-all + delete is currently indistinguishable from a glitch at + // this layer, so data-loss prevention wins. A real intentional-clear + // UX (a distinct signal threaded from the client) is tracked in issue + // #251; do not re-add an escape hatch here without that signal. if ( - !intentionalClear && isEmptyParagraphDoc(tiptapJson as any) && page.content && !isEmptyParagraphDoc(page.content as any) ) { this.logger.warn( `Skipping store for ${pageId}: empty live doc would overwrite ` + - `non-empty persisted content (no intentional-clear signal)`, + `non-empty persisted content`, ); page = null; return; -- 2.49.1 From 5308f2fb65c3cf643514d1c3408daa16ec71dd5c Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sun, 28 Jun 2026 23:45:53 +0300 Subject: [PATCH 3/5] test(#248 F2): cover HTML-escaping of attrs/text in lossless raw-HTML export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-1 review F2. The escapeHtmlAttr (&,",<,>) and escapeHtmlText (&,<,>) helpers in turndown.utils were untested — every existing round-trip case used alphanumeric values, so no escape branch ran. A mention/status carrying HTML special chars would re-emit malformed HTML that import's parseHTML can't restore → the same data loss this PR fixes, uncaught. Add a round-trip case to turndown.dataloss.test.ts: a mention with `&` and `"` in both data-label and visible text. Assert (a) the exported Markdown carries the correctly-escaped, well-formed tag (data-label="A & "B"", text escapes &), not the raw malformed form; and (b) markdownToHtml restores the original unescaped values (attribute `A & "B"`, text `@A & "B"`). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../markdown/utils/turndown.dataloss.test.ts | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts index 98fdc03a..dd5a8950 100644 --- a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts +++ b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts @@ -107,5 +107,35 @@ describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2) expect(html).toMatch(/]*data-type="status"[^>]*>/); expect(html).toContain("green"); }); + + // HTML special chars in an attribute value or in a node's text must be + // ESCAPED when re-emitted as raw HTML, otherwise the exported tag is + // malformed and `markdownToHtml`'s parser cannot restore the original value + // (the same silent data loss this PR fixes). This exercises the escape + // branches of escapeHtmlAttr (&, ", <, >) and escapeHtmlText (&, <, >) that + // the alphanumeric-only cases above never hit. The mention's data-label and + // visible text both carry `&` and `"`. + it("escapes HTML special chars in attrs + text and round-trips them", async () => { + const md = htmlToMarkdown( + `

hi @A & "B" there

`, + ); + + // (a) The exported Markdown carries a WELL-FORMED, correctly-escaped tag: + // the attribute escapes both `&` and `"`; the text escapes `&` (a `"` + // inside text content is legal, so it stays literal). + expect(md).toContain('data-label="A & "B""'); + expect(md).toContain('>@A & "B"'); + // And NOT the raw, malformed form that would break the attribute. + expect(md).not.toContain('data-label="A & "B""'); + + // (b) Import restores the ORIGINAL (unescaped) values, attribute and text. + const html = await markdownToHtml(md); + const dom = new DOMParser().parseFromString(html as string, "text/html"); + const span = dom.querySelector('span[data-type="mention"]'); + expect(span).not.toBeNull(); + expect(span!.getAttribute("data-id")).toBe("u1"); + expect(span!.getAttribute("data-label")).toBe('A & "B"'); + expect(span!.textContent).toBe('@A & "B"'); + }); }); }); -- 2.49.1 From 04fda0c0b28a6e553eff0fa6fd57f41b37e03cfe Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Mon, 29 Jun 2026 00:04:56 +0300 Subject: [PATCH 4/5] test(#248 F2): exercise <,> escape branches in raw-HTML export round-trip The escaping round-trip test's data (A & "B") only contained & and ", so the <,> branches of escapeHtmlAttr (&,",<,>) and escapeHtmlText (&,<,>) were never exercised; a regression dropping <,> escaping would still pass. Extend the data to A & "C" in both the data-label attribute and the visible text so both functions' <,> branches are genuinely covered. Assert the well-formed escaped tag (attr: A & <B> "C", text: A & <B> "C"), explicitly reject the raw tag-corrupting forms, and confirm markdownToHtml restores the originals. Comment updated to match. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../markdown/utils/turndown.dataloss.test.ts | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts index dd5a8950..9bec4e78 100644 --- a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts +++ b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts @@ -111,22 +111,28 @@ describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2) // HTML special chars in an attribute value or in a node's text must be // ESCAPED when re-emitted as raw HTML, otherwise the exported tag is // malformed and `markdownToHtml`'s parser cannot restore the original value - // (the same silent data loss this PR fixes). This exercises the escape - // branches of escapeHtmlAttr (&, ", <, >) and escapeHtmlText (&, <, >) that - // the alphanumeric-only cases above never hit. The mention's data-label and - // visible text both carry `&` and `"`. - it("escapes HTML special chars in attrs + text and round-trips them", async () => { + // (the same silent data loss this PR fixes). Dropping `<`/`>` escaping is the + // dangerous regression: a stray `<` or `>` corrupts the tag (or injects new + // markup), so the test data carries ALL of `&`, `"`, `<`, `>` in BOTH the + // data-label attribute and the visible text. That fully exercises + // escapeHtmlAttr's `&,",<,>` branches and escapeHtmlText's `&,<,>` branches + // (escapeHtmlText leaves `"` literal); the alphanumeric-only cases above hit + // none of them. + it("escapes HTML special chars (& \" < >) in attrs + text and round-trips them", async () => { const md = htmlToMarkdown( - `

hi @A & "B" there

`, + `

hi @A & <B> "C" there

`, ); // (a) The exported Markdown carries a WELL-FORMED, correctly-escaped tag: - // the attribute escapes both `&` and `"`; the text escapes `&` (a `"` - // inside text content is legal, so it stays literal). - expect(md).toContain('data-label="A & "B""'); - expect(md).toContain('>@A & "B"'); - // And NOT the raw, malformed form that would break the attribute. - expect(md).not.toContain('data-label="A & "B""'); + // the attribute escapes `&`, `<`, `>` AND `"`; the text escapes `&`, `<`, + // `>` (a `"` inside text content is legal, so it stays literal). + expect(md).toContain('data-label="A & <B> "C""'); + expect(md).toContain('>@A & <B> "C"'); + // And explicitly NOT the raw, tag-corrupting forms: a literal `` (would + // mean `<`/`>` escaping was dropped in either the attr or the text)... + expect(md).not.toContain(""); + // ...nor the malformed attribute that an unescaped `"` would produce. + expect(md).not.toContain('data-label="A & <B> "C""'); // (b) Import restores the ORIGINAL (unescaped) values, attribute and text. const html = await markdownToHtml(md); @@ -134,8 +140,8 @@ describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2) const span = dom.querySelector('span[data-type="mention"]'); expect(span).not.toBeNull(); expect(span!.getAttribute("data-id")).toBe("u1"); - expect(span!.getAttribute("data-label")).toBe('A & "B"'); - expect(span!.textContent).toBe('@A & "B"'); + expect(span!.getAttribute("data-label")).toBe('A & "C"'); + expect(span!.textContent).toBe('@A & "C"'); }); }); }); -- 2.49.1 From 90a3fa012d65654477d35b20f7ad6fe285f67dc1 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Mon, 29 Jun 2026 01:56:00 +0300 Subject: [PATCH 5/5] test(#248 F3): make empty-over-empty test actually reach the store empty-guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "does not block an empty store over an already-empty page" test set the stored page.content to TiptapTransformer.fromYdoc(document,'default') — exactly the value tiptapJson is computed from — so isDeepStrictEqual(tiptapJson, page.content) was TRUE and onStoreDocument RETURNED at the unchanged short-circuit before ever reaching the empty-guard. It exercised the old short-circuit, not the new guard's `!isEmptyParagraphDoc(page.content)` branch (the only NEW branch protecting empty existing pages from over-blocking); the condition could be removed and the test would still pass (false coverage). Set stored content to an empty paragraph with `content: []` — empty per isEmptyParagraphDoc but NOT deep-equal to the live doc (which normalizes to a paragraph with `attrs: { indent: 0 }` and no content key). Execution now skips the short-circuit and enters the guard; reorient the assertion to "the write is NOT blocked" (updatePage IS called). Verified the test now FAILS if the `!isEmptyParagraphDoc(page.content)` condition is removed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../extensions/persistence-store.spec.ts | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence-store.spec.ts b/apps/server/src/collaboration/extensions/persistence-store.spec.ts index 8cf5fd0d..519abc5b 100644 --- a/apps/server/src/collaboration/extensions/persistence-store.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts @@ -229,21 +229,36 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot' expect(historyQueue.add).not.toHaveBeenCalled(); }); - // persist-6 — a brand-new / already-empty page is unaffected: an empty store - // over empty stored content is not blocked (it short-circuits as unchanged). + // persist-6 — a legitimately-empty existing page must still be writable when + // the empty live doc actually DIFFERS from the stored content (so the + // unchanged short-circuit does NOT fire and execution reaches the empty-guard). + // This exercises the guard's third condition `!isEmptyParagraphDoc(page.content)`: + // because the stored page is ALSO empty, the guard must NOT block the write. + // The live doc normalizes to a paragraph carrying `attrs: { indent: 0 }` and no + // `content` key; the stored page is an empty paragraph with `content: []` — + // both empty per `isEmptyParagraphDoc`, but NOT `isDeepStrictEqual`, so the + // store passes the short-circuit (~line 208) and genuinely enters the guard + // (~line 229). If the `!isEmptyParagraphDoc(page.content)` condition were + // removed, the guard would block this write and updatePage would never run, + // failing this test. it('does not block an empty store over an already-empty page (persist-6)', async () => { - const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; - const document = ydocFor(emptyDoc); - const normalized = TiptapTransformer.fromYdoc(document, 'default'); + const liveEmptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] }; + const document = ydocFor(liveEmptyDoc); + // Stored content is empty per isEmptyParagraphDoc (paragraph with content:[]) + // but structurally NOT deep-equal to the normalized live doc — so execution + // skips the unchanged short-circuit and reaches the empty-guard. + const storedEmptyDoc = { type: 'doc', content: [{ type: 'paragraph', content: [] }] }; pageRepo.findById.mockResolvedValue({ ...persistedHumanPage('IGNORED'), - content: normalized, + content: storedEmptyDoc, }); await ext.onStoreDocument(buildData(document, 'user') as any); - // Unchanged empty-over-empty: short-circuits, no spurious write, no error. - expect(pageRepo.updatePage).not.toHaveBeenCalled(); + // Empty-over-empty reaches the guard, which must let the write through + // (the stored page is empty, so the empty-overwrite protection does not + // apply). updatePage IS called — proving `!isEmptyParagraphDoc(page.content)`. + expect(pageRepo.updatePage).toHaveBeenCalledTimes(1); }); // persist-1 — when every attempt fails the hook must NOT report a phantom -- 2.49.1