diff --git a/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts b/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts index fbee45d2..917f1d51 100644 --- a/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts +++ b/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts @@ -103,6 +103,21 @@ describe("html-embed codec — Node Buffer fallback branch", () => { }); }); +describe("html-embed codec — encode failure fallback", () => { + it("returns '' (not raw source) when encoding throws", () => { + // Force the catch branch: a btoa that throws (e.g. simulating the + // Latin1-boundary error). The codec must NOT return the raw source — + // raw markup in data-source would fail to decode and undermine inert + // storage — it drops to "" symmetrically with the decode side. + const src = ""; + // @ts-expect-error — stub btoa with a throwing impl for this test. + globalThis.btoa = () => { + throw new Error("boom"); + }; + expect(encodeHtmlEmbedSource(src)).toBe(""); + }); +}); + describe("html-embed codec — decode of malformed input (browser branch)", () => { it("returns '' for input atob rejects (catch branch)", () => { // atob throws on characters outside the base64 alphabet; the codec catches diff --git a/packages/editor-ext/src/lib/html-embed/html-embed.ts b/packages/editor-ext/src/lib/html-embed/html-embed.ts index d3d004a1..2a47eb43 100644 --- a/packages/editor-ext/src/lib/html-embed/html-embed.ts +++ b/packages/editor-ext/src/lib/html-embed/html-embed.ts @@ -39,7 +39,15 @@ export function encodeHtmlEmbedSource(source: string): string { // Node fallback (server-side schema parsing has no global btoa). return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64"); } catch { - // Never swallow silently in a way that loses data: fall back to raw. + // On an encoding error we drop to "" rather than returning the raw source. + // Returning raw markup here is NOT a safe fallback: the value is stored in + // the `data-source` attribute and read back through decodeHtmlEmbedSource, + // which base64-decodes it — raw (un-encoded) HTML would make atob/ + // decodeURIComponent throw and decode to "" anyway, and an un-encoded value + // sitting in the attribute defeats the inert-storage guarantee (it could + // become an injection vector). So "" is the correct, decode-symmetric + // failure mode. In practice this is essentially unreachable: btoa runs on + // the output of encodeURIComponent, which is always Latin1-safe ASCII. return ""; } }