From 5308f2fb65c3cf643514d1c3408daa16ec71dd5c Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sun, 28 Jun 2026 23:45:53 +0300 Subject: [PATCH] test(#248 F2): cover HTML-escaping of attrs/text in lossless raw-HTML export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-1 review F2. The escapeHtmlAttr (&,",<,>) and escapeHtmlText (&,<,>) helpers in turndown.utils were untested — every existing round-trip case used alphanumeric values, so no escape branch ran. A mention/status carrying HTML special chars would re-emit malformed HTML that import's parseHTML can't restore → the same data loss this PR fixes, uncaught. Add a round-trip case to turndown.dataloss.test.ts: a mention with `&` and `"` in both data-label and visible text. Assert (a) the exported Markdown carries the correctly-escaped, well-formed tag (data-label="A & "B"", text escapes &), not the raw malformed form; and (b) markdownToHtml restores the original unescaped values (attribute `A & "B"`, text `@A & "B"`). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../markdown/utils/turndown.dataloss.test.ts | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts index 98fdc03a..dd5a8950 100644 --- a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts +++ b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts @@ -107,5 +107,35 @@ describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2) expect(html).toMatch(/]*data-type="status"[^>]*>/); expect(html).toContain("green"); }); + + // HTML special chars in an attribute value or in a node's text must be + // ESCAPED when re-emitted as raw HTML, otherwise the exported tag is + // malformed and `markdownToHtml`'s parser cannot restore the original value + // (the same silent data loss this PR fixes). This exercises the escape + // branches of escapeHtmlAttr (&, ", <, >) and escapeHtmlText (&, <, >) that + // the alphanumeric-only cases above never hit. The mention's data-label and + // visible text both carry `&` and `"`. + it("escapes HTML special chars in attrs + text and round-trips them", async () => { + const md = htmlToMarkdown( + `

hi @A & "B" there

`, + ); + + // (a) The exported Markdown carries a WELL-FORMED, correctly-escaped tag: + // the attribute escapes both `&` and `"`; the text escapes `&` (a `"` + // inside text content is legal, so it stays literal). + expect(md).toContain('data-label="A & "B""'); + expect(md).toContain('>@A & "B"'); + // And NOT the raw, malformed form that would break the attribute. + expect(md).not.toContain('data-label="A & "B""'); + + // (b) Import restores the ORIGINAL (unescaped) values, attribute and text. + const html = await markdownToHtml(md); + const dom = new DOMParser().parseFromString(html as string, "text/html"); + const span = dom.querySelector('span[data-type="mention"]'); + expect(span).not.toBeNull(); + expect(span!.getAttribute("data-id")).toBe("u1"); + expect(span!.getAttribute("data-label")).toBe('A & "B"'); + expect(span!.textContent).toBe('@A & "B"'); + }); }); });