From afbc6b220200bf5c5936a4f9638b4438193e5c25 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sun, 21 Jun 2026 03:17:37 +0300 Subject: [PATCH] docs(html-embed): correct the encode-catch comment (returns '', not raw) (#78) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The encode catch comment promised 'fall back to raw' but the code returns ''; returning raw source wouldn't help anyway (un-encoded markup can't be atob-decoded downstream, so decode would yield '' regardless), and a raw value in data-source breaks the inert-storage guarantee. '' is the correct decode-symmetric failure — fix the misleading comment to say so. Adds a codec test for the encode-throw path. Co-Authored-By: Claude Opus 4.8 --- .../src/lib/html-embed/html-embed-codec.spec.ts | 15 +++++++++++++++ .../editor-ext/src/lib/html-embed/html-embed.ts | 10 +++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts b/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts index fbee45d2..917f1d51 100644 --- a/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts +++ b/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts @@ -103,6 +103,21 @@ describe("html-embed codec — Node Buffer fallback branch", () => { }); }); +describe("html-embed codec — encode failure fallback", () => { + it("returns '' (not raw source) when encoding throws", () => { + // Force the catch branch: a btoa that throws (e.g. simulating the + // Latin1-boundary error). The codec must NOT return the raw source — + // raw markup in data-source would fail to decode and undermine inert + // storage — it drops to "" symmetrically with the decode side. + const src = ""; + // @ts-expect-error — stub btoa with a throwing impl for this test. + globalThis.btoa = () => { + throw new Error("boom"); + }; + expect(encodeHtmlEmbedSource(src)).toBe(""); + }); +}); + describe("html-embed codec — decode of malformed input (browser branch)", () => { it("returns '' for input atob rejects (catch branch)", () => { // atob throws on characters outside the base64 alphabet; the codec catches diff --git a/packages/editor-ext/src/lib/html-embed/html-embed.ts b/packages/editor-ext/src/lib/html-embed/html-embed.ts index d3d004a1..2a47eb43 100644 --- a/packages/editor-ext/src/lib/html-embed/html-embed.ts +++ b/packages/editor-ext/src/lib/html-embed/html-embed.ts @@ -39,7 +39,15 @@ export function encodeHtmlEmbedSource(source: string): string { // Node fallback (server-side schema parsing has no global btoa). return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64"); } catch { - // Never swallow silently in a way that loses data: fall back to raw. + // On an encoding error we drop to "" rather than returning the raw source. + // Returning raw markup here is NOT a safe fallback: the value is stored in + // the `data-source` attribute and read back through decodeHtmlEmbedSource, + // which base64-decodes it — raw (un-encoded) HTML would make atob/ + // decodeURIComponent throw and decode to "" anyway, and an un-encoded value + // sitting in the attribute defeats the inert-storage guarantee (it could + // become an injection vector). So "" is the correct, decode-symmetric + // failure mode. In practice this is essentially unreachable: btoa runs on + // the output of encodeURIComponent, which is always Latin1-safe ASCII. return ""; } }