test(integrations/client/packages): batch 2-4 unit coverage + zip-slip guard extraction

Batch 2-4 of the test-strategy rollout. Test-only except one minimal, behaviour-preserving extraction in file.utils.ts. All suites green: server 82 suites/836+1todo, editor-ext 86, mcp 270, client (new files) 86. integrations (server): - file.utils.ts: extract pure `isEntryPathSafe(entryName, targetDir)` from extractZipInternal so the zip-slip/path-traversal guard is unit-testable; call site rerouted, behaviour identical (only a warn-message string merged). - file.utils.zip-safety.spec.ts: traversal/strip/__MACOSX/prefix-confusion cases (mutation-resistant: fails if containment loses the path.sep). - import-formatter / import.utils / table-utils / export utils / import.service extractTitleAndRemoveHeading: pure import/export transforms, Notion/XWiki formatting, table colspan widths (idempotent), slug/link rewriting. client: - safeRedirectPath: open-redirect guard, every reject branch independently. - buildChatMarkdown (fence anti-breakout), label-colors, normalize-label, share tree build, page URL builders, notification time-grouping (fake clock). packages: - editor-ext: deriveFootnoteId golden table, parseHtmlEmbedHeight crafted values, orphan footnote extraction. - mcp: deriveFootnoteId parity (drift guard vs editor-ext), applyTextEdits idempotency + cross-block replaceAll, diffDocs/summarizeChange on reorder. Reviewed (APPROVE): extraction behaviour-preserving, assertions mutation-resistant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-21 18:22:15 +03:00
parent f8e8ada581
commit 0b2af34029
20 changed files with 2495 additions and 17 deletions
--- a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts
+++ b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts
@@ -0,0 +1,91 @@
+import { describe, it, expect } from "vitest";
+import { deriveFootnoteId } from "./footnote-util";
+
+/**
+ * GOLDEN TABLE for `deriveFootnoteId` (and its private alphabetic `suffix`).
+ *
+ * deriveFootnoteId is DELIBERATELY duplicated in
+ *   packages/mcp/src/lib/collaboration.ts
+ * and the two copies MUST stay byte-for-byte equivalent in behavior so the same
+ * markdown imported through the editor and through the MCP path yields identical
+ * footnote ids. This table is the SHARED contract: the parity test
+ *   packages/mcp/test/unit/derive-id-parity.test.mjs
+ * pins the exact SAME (input -> expected) pairs against the COMPILED mcp build.
+ * If either copy drifts, one of the two tests goes red.
+ *
+ * Keep this constant in sync with GOLDEN in the mcp parity test.
+ */
+export const DERIVE_GOLDEN: Array<{
+  originalId: string;
+  occurrence: number;
+  taken: string[];
+  expected: string;
+  why: string;
+}> = [
+  // Base candidate `${id}__${occurrence}` when nothing collides.
+  { originalId: "d", occurrence: 2, taken: [], expected: "d__2", why: "plain base, second occurrence" },
+  { originalId: "d", occurrence: 3, taken: [], expected: "d__3", why: "plain base, third occurrence" },
+  // The base is taken -> first alphabetic bump is "b" (NOT "a": suffix starts at 'b').
+  { originalId: "d", occurrence: 2, taken: ["d__2"], expected: "d__2b", why: "base taken -> first bump 'b'" },
+  // Base + first bump taken -> "c".
+  { originalId: "d", occurrence: 2, taken: ["d__2", "d__2b"], expected: "d__2c", why: "base+b taken -> 'c'" },
+  // A non-contiguous taken set still walks deterministically to the first free slot.
+  {
+    originalId: "d",
+    occurrence: 2,
+    taken: ["d__2", "d__2b", "d__2c", "d__2d"],
+    expected: "d__2e",
+    why: "base + b,c,d taken -> 'e'",
+  },
+  // >25 bump: base + b..z (the 25 single-letter suffixes) all taken -> "bb".
+  // suffix(26) === "bb" (base-25 over b..z, carrying to a two-letter suffix).
+  {
+    originalId: "d",
+    occurrence: 2,
+    taken: ["d__2", ...singleLetterSuffixes().map((s) => `d__2${s}`)],
+    expected: "d__2bb",
+    why: ">25 collisions -> two-letter suffix 'bb'",
+  },
+];
+
+/** The 25 single-letter suffixes the scheme uses: b, c, ..., z (n = 1..25). */
+function singleLetterSuffixes(): string[] {
+  // Mirror of the production suffix() for n in 1..25 (all single letters).
+  // n=1 -> 'b' ... n=25 -> 'z'. Used only to BUILD the taken-set for the
+  // >25 row; the EXPECTED value (d__2bb) is asserted against the real function.
+  return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i));
+}
+
+describe("deriveFootnoteId golden table (cross-package drift guard)", () => {
+  for (const row of DERIVE_GOLDEN) {
+    it(`derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) === "${row.expected}" — ${row.why}`, () => {
+      const got = deriveFootnoteId(
+        row.originalId,
+        row.occurrence,
+        new Set(row.taken),
+      );
+      expect(got).toBe(row.expected);
+    });
+  }
+
+  it("the >25 row's taken-set really contains b..z (25 single letters) plus the base", () => {
+    // Sanity-pin the construction so a typo in singleLetterSuffixes() cannot make
+    // the >25 assertion pass for the wrong reason.
+    const letters = singleLetterSuffixes();
+    expect(letters).toHaveLength(25);
+    expect(letters[0]).toBe("b");
+    expect(letters[24]).toBe("z");
+  });
+
+  it("is a PURE function: it never mutates the taken set it is given", () => {
+    const taken = new Set(["d__2"]);
+    const before = [...taken];
+    deriveFootnoteId("d", 2, taken);
+    expect([...taken]).toEqual(before);
+  });
+
+  it("is deterministic: same input -> same output across calls", () => {
+    const mk = () => new Set(["d__2", "d__2b"]);
+    expect(deriveFootnoteId("d", 2, mk())).toBe(deriveFootnoteId("d", 2, mk()));
+  });
+});
--- a/packages/editor-ext/src/lib/html-embed/html-embed.height.test.ts
+++ b/packages/editor-ext/src/lib/html-embed/html-embed.height.test.ts
@@ -0,0 +1,63 @@
+import { describe, it, expect } from "vitest";
+import {
+  parseHtmlEmbedHeight,
+  renderHtmlEmbedHeight,
+} from "./html-embed";
+
+/**
+ * PIN the CURRENT behavior of `parseHtmlEmbedHeight` for crafted/corrupt
+ * `data-height` attribute values. The function is a thin parseInt + Number.isFinite
+ * guard; these tests document EXACTLY what it does today (including the cases
+ * where today's behavior is arguably wrong) so any future change is a conscious
+ * one and shows up as a failing test rather than a silent regression.
+ */
+describe("parseHtmlEmbedHeight: crafted / corrupt data-height", () => {
+  it('"-5" passes through as -5 (DOCUMENTED QUIRK: negative height is not rejected)', () => {
+    // Number.isFinite(-5) is true, so the guard does NOT catch it. A negative
+    // fixed height is almost certainly wrong downstream (it disables auto-resize
+    // and yields a negative/clamped iframe height), but the function as written
+    // returns it verbatim. This asserts the REAL behavior, not the ideal one.
+    expect(parseHtmlEmbedHeight("-5")).toBe(-5);
+  });
+
+  it('"0" returns 0 (NOT null) — note: renderHtmlEmbedHeight treats 0 as auto-resize, so parse/render are asymmetric at 0', () => {
+    // parseInt("0") === 0 and Number.isFinite(0) is true, so parse keeps 0.
+    expect(parseHtmlEmbedHeight("0")).toBe(0);
+    // But the render side treats a falsy 0 as "auto-resize" => emits NO attribute.
+    // So a stored height of 0 does not round-trip back to data-height="0".
+    expect(renderHtmlEmbedHeight(0)).toEqual({});
+  });
+
+  it('" 300 " (surrounding whitespace) parses to 300 — parseInt trims leading space', () => {
+    expect(parseHtmlEmbedHeight(" 300 ")).toBe(300);
+  });
+
+  it('"3.9" truncates to 3 — parseInt drops the fractional part', () => {
+    expect(parseHtmlEmbedHeight("3.9")).toBe(3);
+  });
+
+  it('a huge "99999999999" passes through unclamped (finite => no upper bound here)', () => {
+    // The guard only rejects NaN/Infinity; it does not clamp magnitude. Any
+    // clamping is a downstream concern, NOT this function's job.
+    expect(parseHtmlEmbedHeight("99999999999")).toBe(99999999999);
+  });
+
+  it('"12px" parses the leading integer (12) — parseInt stops at the first non-digit', () => {
+    expect(parseHtmlEmbedHeight("12px")).toBe(12);
+  });
+
+  it("null / empty / whitespace-only / non-numeric => null (the auto-resize sentinel)", () => {
+    expect(parseHtmlEmbedHeight(null)).toBeNull();
+    expect(parseHtmlEmbedHeight("")).toBeNull();
+    expect(parseHtmlEmbedHeight("   ")).toBeNull();
+    expect(parseHtmlEmbedHeight("abc")).toBeNull();
+  });
+
+  it("never returns NaN for a non-numeric value (the Number.isFinite guard's point)", () => {
+    // NaN is typeof "number" and would slip past a naive `typeof n === number`
+    // check; the guard must map it to null. This is the core invariant.
+    const out = parseHtmlEmbedHeight("not-a-number");
+    expect(out).toBeNull();
+    expect(Number.isNaN(out as unknown as number)).toBe(false);
+  });
+});
--- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts
+++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts
@@ -0,0 +1,63 @@
+import { describe, it, expect } from "vitest";
+import { extractFootnoteDefinitions } from "./footnote.marked";
+
+/** Pull the ordered list of `data-footnote-def` ids out of the rendered section. */
+function defIds(section: string): string[] {
+  return [...section.matchAll(/data-footnote-def data-id="([^"]+)"/g)].map(
+    (m) => m[1],
+  );
+}
+
+/** Pull the ordered list of `[^id]` markers that remain in the body. */
+function bodyMarkers(body: string): string[] {
+  return [...body.matchAll(/\[\^([^\]\s]+)\]/g)].map((m) => m[1]);
+}
+
+describe("extractFootnoteDefinitions: more definitions than markers (orphans)", () => {
+  // Body has ONE `[^d]` reference marker but THREE `[^d]:` definitions. The
+  // surplus definitions have no marker to pair with — they must NOT be silently
+  // merged into one footnote (the editor's last-wins sync would otherwise drop
+  // two of them). The dedup gives each colliding definition a deterministic
+  // derived id so all three survive as distinct footnoteDefinition nodes.
+  const md = ["See[^d].", "", "[^d]: a", "[^d]: b", "[^d]: c"].join("\n");
+
+  it("emits 3 DISTINCT definition ids: d, d__2, d__3 (derived scheme, in order)", () => {
+    const { section } = extractFootnoteDefinitions(md);
+    const ids = defIds(section);
+    expect(ids).toEqual(["d", "d__2", "d__3"]);
+    // All distinct: nothing was merged away.
+    expect(new Set(ids).size).toBe(3);
+  });
+
+  it("preserves each definition's text against its (possibly derived) id", () => {
+    const { section } = extractFootnoteDefinitions(md);
+    // First definition keeps the original id and its text.
+    expect(section).toContain('data-footnote-def data-id="d"><p>a</p>');
+    // The two surplus definitions survive as orphans with derived ids.
+    expect(section).toContain('data-footnote-def data-id="d__2"><p>b</p>');
+    expect(section).toContain('data-footnote-def data-id="d__3"><p>c</p>');
+  });
+
+  it("leaves the SINGLE body marker as [^d] (no surplus marker to rewrite)", () => {
+    const { body } = extractFootnoteDefinitions(md);
+    // There is exactly one reference marker and it is untouched: the keeper
+    // definition pairs with it. The orphan defs have no marker, so the body is
+    // unchanged except for the stripped definition lines.
+    expect(bodyMarkers(body)).toEqual(["d"]);
+    expect(body).toContain("See[^d].");
+    // The definition lines themselves were pulled OUT of the body.
+    expect(body).not.toContain("[^d]: a");
+    expect(body).not.toContain("[^d]: b");
+    expect(body).not.toContain("[^d]: c");
+  });
+
+  it("does not crash and produces a well-formed footnotes section", () => {
+    const { section } = extractFootnoteDefinitions(md);
+    expect(section.startsWith("<section data-footnotes>")).toBe(true);
+    expect(section.endsWith("</section>")).toBe(true);
+    // Exactly three definition divs.
+    expect(
+      [...section.matchAll(/<div data-footnote-def/g)],
+    ).toHaveLength(3);
+  });
+});