From 3d4ad664b39bf235893e54370737c94684a69a08 Mon Sep 17 00:00:00 2001
From: claude_code <claude_code@vvzvlad.xyz>
Date: Sun, 21 Jun 2026 19:10:27 +0300
Subject: [PATCH] test(refactor-tail): extract pure cores + cover
 collab/share/ai-chat/client gate

Batches 6-9: behaviour-preserving extractions of testable pure cores plus the
tests they unblock, and a fix for the broken client test environment.
Full suites green: server 113 suites / 1117 + 1 todo, client 30 files / 338.

client (R0 infra):
- vitest.setup.ts: in-memory localStorage/sessionStorage Storage stub wired via
  setupFiles. Unblocks menu-items.gating.test.ts (was 9 failing) -> client suite
  fully green. + menu-items.suggestions.test.ts (getSuggestionItems filter/sort).

share:
- extract buildShareMetaHtml (share-seo.util.ts) from the SEO controller; tests
  for reflected-XSS escaping in <title>/og/twitter meta, noindex, truncation;
  extractPageSlugId; updateAttachmentAttr; prepareContentForShare comment-strip
  (anonymous-viewer metadata-leak guard).

ai-chat (security extractions):
- selectAccessibleHits: CASL post-filter for semantic search (restricted page in
  an accessible space must NOT leak to the agent).
- validateResolvedAddresses: SSRF connect-time guard (block if ANY resolved
  address is private).
- resolveAudioFormat: mime whitelist (dead `?? 'webm'` fallback dropped, set
  unchanged). + mcp-servers toView header-leak guard, MCP tool namespacing.

collaboration (data-loss area):
- extract computeHistoryJob (pins the "agent delay MUST stay 0" invariant) and
  resolveSource. Integration: onAuthenticate read-only matrix (collab auth
  bypass), HistoryProcessor (contributor restore on save failure), onStoreDocument
  Approach-A boundary snapshot (human revision pinned before agent overwrite).

Reviewed (APPROVE WITH SUGGESTIONS): extractions behaviour-preserving, security
tests mutation-resistant.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../slash-menu/menu-items.suggestions.test.ts |  84 +++++++
 apps/client/vitest.config.ts                  |   2 +-
 apps/client/vitest.setup.ts                   |  51 +++++
 .../authentication.extension.spec.ts          | 211 ++++++++++++++++++
 .../extensions/compute-history-job.spec.ts    | 105 +++++++++
 .../extensions/persistence-store.spec.ts      | 185 +++++++++++++++
 .../extensions/persistence.extension.ts       |  78 +++++--
 .../processors/history.processor.spec.ts      | 200 +++++++++++++++++
 .../src/core/ai-chat/ai-chat.controller.ts    |  73 +++---
 .../external-mcp/mcp-clients.service.ts       |  45 ++--
 .../external-mcp/mcp-namespacing.spec.ts      | 136 +++++++++++
 .../external-mcp/mcp-servers-to-view.spec.ts  |  85 +++++++
 .../validate-resolved-addresses.spec.ts       |  67 ++++++
 .../core/ai-chat/resolve-audio-format.spec.ts |  53 +++++
 .../ai-chat/tools/ai-chat-tools.service.ts    |  55 +++--
 .../tools/select-accessible-hits.spec.ts      |  96 ++++++++
 .../core/share/share-comment-strip.spec.ts    | 176 +++++++++++++++
 .../share-seo.controller.extract-slug.spec.ts |  41 ++++
 .../src/core/share/share-seo.controller.ts    |  23 +-
 .../src/core/share/share-seo.meta.spec.ts     | 126 +++++++++++
 apps/server/src/core/share/share-seo.util.ts  |  40 ++++
 apps/server/src/core/share/share.util.spec.ts |  62 +++++
 22 files changed, 1893 insertions(+), 101 deletions(-)
 create mode 100644 apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts
 create mode 100644 apps/client/vitest.setup.ts
 create mode 100644 apps/server/src/collaboration/extensions/authentication.extension.spec.ts
 create mode 100644 apps/server/src/collaboration/extensions/compute-history-job.spec.ts
 create mode 100644 apps/server/src/collaboration/extensions/persistence-store.spec.ts
 create mode 100644 apps/server/src/collaboration/processors/history.processor.spec.ts
 create mode 100644 apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts
 create mode 100644 apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts
 create mode 100644 apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts
 create mode 100644 apps/server/src/core/ai-chat/resolve-audio-format.spec.ts
 create mode 100644 apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts
 create mode 100644 apps/server/src/core/share/share-comment-strip.spec.ts
 create mode 100644 apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts
 create mode 100644 apps/server/src/core/share/share-seo.meta.spec.ts
 create mode 100644 apps/server/src/core/share/share-seo.util.ts
 create mode 100644 apps/server/src/core/share/share.util.spec.ts
diff --git a/apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts b/apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts
new file mode 100644
index 00000000..b20c4270
--- /dev/null
+++ b/apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts
@@ -0,0 +1,84 @@
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { getSuggestionItems } from "./menu-items";
+
+// Coverage for the filter/sort half of `getSuggestionItems` (distinct from the
+// HTML-embed gating suite). A slash query is matched against each item three
+// ways — fuzzy on the title, substring on the description, and substring on the
+// searchTerms — and matched items are sorted so title-substring hits float to
+// the top of their group. We also cover `excludeItems`.
+//
+// `getSuggestionItems` -> `isHtmlEmbedFeatureEnabled` reads the persisted
+// `currentUser` localStorage entry, so a working in-memory Storage stub is a
+// prerequisite (installed by vitest.setup.ts). We persist a `currentUser` with
+// the HTML-embed toggle OFF (the production default) so the gated "HTML embed"
+// item never leaks into these non-HTML queries.
+
+const KEY = "currentUser";
+
+function flatTitles(groups: ReturnType<typeof getSuggestionItems>): string[] {
+  return Object.values(groups)
+    .flat()
+    .map((item) => item.title);
+}
+
+beforeEach(() => {
+  // Default workspace state: HTML-embed feature OFF (matches production default).
+  localStorage.setItem(KEY, JSON.stringify({ workspace: { settings: {} } }));
+});
+
+afterEach(() => {
+  localStorage.clear();
+});
+
+describe("getSuggestionItems — filter and sort", () => {
+  it("fuzzy-matches a title (non-contiguous characters)", () => {
+    // "tdo" is not a substring of "to-do list" but matches fuzzily (t..d..o).
+    const titles = flatTitles(getSuggestionItems({ query: "tdo" }));
+    expect(titles).toContain("To-do list");
+  });
+
+  it("matches via the description when the title does not match", () => {
+    // "numbering" only appears in the description "Create a list with numbering.",
+    // not in the "Numbered list" title nor its searchTerms.
+    const titles = flatTitles(getSuggestionItems({ query: "numbering" }));
+    expect(titles).toContain("Numbered list");
+  });
+
+  it("matches via searchTerms when title and description do not match", () => {
+    // "blockquote" is only present in the "Quote" item's searchTerms.
+    const titles = flatTitles(getSuggestionItems({ query: "blockquote" }));
+    expect(titles).toContain("Quote");
+  });
+
+  it("sorts title-substring matches before non-title (description) matches", () => {
+    // For "page": several titles contain "page" (e.g. "Page break"), while
+    // "Synced block" matches only through its description (".. across pages.").
+    // The sort tie-break must place every title hit ahead of the non-title hit.
+    const titles = flatTitles(getSuggestionItems({ query: "page" }));
+
+    const syncedIndex = titles.indexOf("Synced block");
+    const pageBreakIndex = titles.indexOf("Page break");
+
+    // Sanity: both items survived the filter for this query.
+    expect(syncedIndex).toBeGreaterThanOrEqual(0);
+    expect(pageBreakIndex).toBeGreaterThanOrEqual(0);
+
+    // The title match ("Page break") sorts before the description-only match.
+    expect(pageBreakIndex).toBeLessThan(syncedIndex);
+  });
+
+  it("removes a named item via excludeItems", () => {
+    const withBullet = flatTitles(getSuggestionItems({ query: "list" }));
+    expect(withBullet).toContain("Bullet list");
+
+    const withoutBullet = flatTitles(
+      getSuggestionItems({
+        query: "list",
+        excludeItems: new Set(["Bullet list"]),
+      }),
+    );
+    expect(withoutBullet).not.toContain("Bullet list");
+    // Other "list" matches remain unaffected by the exclusion.
+    expect(withoutBullet).toContain("Numbered list");
+  });
+});
diff --git a/apps/client/vitest.config.ts b/apps/client/vitest.config.ts
index 5cde717a..334f6226 100644
--- a/apps/client/vitest.config.ts
+++ b/apps/client/vitest.config.ts
@@ -12,6 +12,6 @@ export default defineConfig({
   test: {
     environment: 'jsdom',
     globals: true,
-    setupFiles: [],
+    setupFiles: ['./vitest.setup.ts'],
   },
 });
diff --git a/apps/client/vitest.setup.ts b/apps/client/vitest.setup.ts
new file mode 100644
index 00000000..33d3d142
--- /dev/null
+++ b/apps/client/vitest.setup.ts
@@ -0,0 +1,51 @@
+// Vitest global setup (test-infra only — no production app source).
+//
+// Under Node 25 / jsdom 25 / vitest 4 the jsdom `localStorage` exposed on the
+// global is not a usable Storage: its methods (`setItem`/`getItem`/...) are not
+// callable, so any code touching `localStorage` throws `... is not a function`.
+// Production code such as `isHtmlEmbedFeatureEnabled()` reads
+// `localStorage.getItem("currentUser")`, which made dependent tests fail.
+//
+// We install a correct in-memory Storage stub on the global BEFORE tests run so
+// the Web Storage contract holds: string coercion of keys/values, `null` for
+// missing keys, working `length`/`key(index)`, and `clear()`.
+import { vi } from "vitest";
+
+// Minimal, spec-faithful in-memory implementation of the Web Storage API.
+function createStorage(): Storage {
+  let store = new Map<string, string>();
+
+  const storage: Storage = {
+    get length(): number {
+      return store.size;
+    },
+    clear(): void {
+      store = new Map<string, string>();
+    },
+    getItem(key: string): string | null {
+      // Missing keys must return `null`, not `undefined`.
+      const value = store.get(String(key));
+      return value === undefined ? null : value;
+    },
+    setItem(key: string, value: string): void {
+      // Web Storage coerces both key and value to strings.
+      store.set(String(key), String(value));
+    },
+    removeItem(key: string): void {
+      store.delete(String(key));
+    },
+    key(index: number): string | null {
+      // Insertion order matches Map iteration order; out-of-range => null.
+      const keys = Array.from(store.keys());
+      return index >= 0 && index < keys.length ? keys[index] : null;
+    },
+  };
+
+  return storage;
+}
+
+// Install on the jsdom global. `vi.stubGlobal` also reflects onto `window`
+// (jsdom shares `globalThis` and `window`), so both `localStorage` and
+// `window.localStorage` resolve to the same working stub.
+vi.stubGlobal("localStorage", createStorage());
+vi.stubGlobal("sessionStorage", createStorage());
diff --git a/apps/server/src/collaboration/extensions/authentication.extension.spec.ts b/apps/server/src/collaboration/extensions/authentication.extension.spec.ts
new file mode 100644
index 00000000..19c727ec
--- /dev/null
+++ b/apps/server/src/collaboration/extensions/authentication.extension.spec.ts
@@ -0,0 +1,211 @@
+import {
+  NotFoundException,
+  UnauthorizedException,
+} from '@nestjs/common';
+import { AuthenticationExtension } from './authentication.extension';
+import { SpaceRole } from '../../common/helpers/types/permission';
+import { JwtType } from '../../core/auth/dto/jwt-payload';
+
+/**
+ * Unit tests for the collab read-only downgrade matrix in
+ * `AuthenticationExtension.onAuthenticate`. This is a security boundary: a wrong
+ * branch here is either a collab-auth bypass (writer on a page they may only
+ * read) or a denial. We mock every repo and inspect both the thrown exception
+ * type and the `connectionConfig.readOnly` flag the extension mutates.
+ */
+
+const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
+const USER_ID = 'user-1';
+const WORKSPACE_ID = 'ws-1';
+const SPACE_ID = 'space-1';
+
+const buildUser = (overrides: Partial<any> = {}) => ({
+  id: USER_ID,
+  workspaceId: WORKSPACE_ID,
+  deactivatedAt: null,
+  deletedAt: null,
+  name: 'Alice',
+  avatarUrl: null,
+  ...overrides,
+});
+
+const buildPage = (overrides: Partial<any> = {}) => ({
+  id: PAGE_ID,
+  spaceId: SPACE_ID,
+  workspaceId: WORKSPACE_ID,
+  deletedAt: null,
+  ...overrides,
+});
+
+// Default jwt payload: a plain human collab token (no agent provenance claims).
+const buildJwt = (overrides: Partial<any> = {}) => ({
+  sub: USER_ID,
+  workspaceId: WORKSPACE_ID,
+  type: JwtType.COLLAB,
+  ...overrides,
+});
+
+describe('AuthenticationExtension.onAuthenticate', () => {
+  let ext: AuthenticationExtension;
+  let tokenService: { verifyJwt: jest.Mock };
+  let userRepo: { findById: jest.Mock };
+  let pageRepo: { findById: jest.Mock };
+  let spaceMemberRepo: { getUserSpaceRoles: jest.Mock };
+  let pagePermissionRepo: { canUserEditPage: jest.Mock };
+
+  // Build the hocuspocus onAuthenticate payload. connectionConfig.readOnly
+  // starts false; the extension flips it to true on a read-only downgrade.
+  const buildData = (token = 'tok') => ({
+    documentName: `page.${PAGE_ID}`,
+    token,
+    connectionConfig: { readOnly: false },
+  });
+
+  beforeEach(() => {
+    tokenService = { verifyJwt: jest.fn().mockResolvedValue(buildJwt()) };
+    userRepo = { findById: jest.fn().mockResolvedValue(buildUser()) };
+    pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) };
+    spaceMemberRepo = {
+      getUserSpaceRoles: jest
+        .fn()
+        .mockResolvedValue([{ userId: USER_ID, role: SpaceRole.WRITER }]),
+    };
+    pagePermissionRepo = {
+      // No page-level restriction by default → defer to space role.
+      canUserEditPage: jest.fn().mockResolvedValue({
+        hasAnyRestriction: false,
+        canAccess: true,
+        canEdit: true,
+      }),
+    };
+
+    ext = new AuthenticationExtension(
+      tokenService as any,
+      userRepo as any,
+      pageRepo as any,
+      spaceMemberRepo as any,
+      pagePermissionRepo as any,
+    );
+    // Silence the extension's logger (it warns/debugs on denial branches).
+    jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined);
+    jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined);
+  });
+
+  it('invalid token → UnauthorizedException (no repo lookups happen)', async () => {
+    tokenService.verifyJwt.mockRejectedValue(new Error('bad sig'));
+
+    await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
+      UnauthorizedException,
+    );
+    expect(userRepo.findById).not.toHaveBeenCalled();
+  });
+
+  it('user not found → Unauthorized', async () => {
+    userRepo.findById.mockResolvedValue(null);
+    await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
+      UnauthorizedException,
+    );
+  });
+
+  it('user disabled (deactivatedAt set) → Unauthorized', async () => {
+    userRepo.findById.mockResolvedValue(
+      buildUser({ deactivatedAt: new Date() }),
+    );
+    await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
+      UnauthorizedException,
+    );
+  });
+
+  it('page not found → NotFoundException', async () => {
+    pageRepo.findById.mockResolvedValue(null);
+    await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
+      NotFoundException,
+    );
+  });
+
+  it('no space role → Unauthorized', async () => {
+    spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([]);
+    await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
+      UnauthorizedException,
+    );
+  });
+
+  it('page-level restriction canAccess=false → Unauthorized (restricted-page denial)', async () => {
+    pagePermissionRepo.canUserEditPage.mockResolvedValue({
+      hasAnyRestriction: true,
+      canAccess: false,
+      canEdit: false,
+    });
+    await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
+      UnauthorizedException,
+    );
+  });
+
+  it('restriction canAccess=true & canEdit=false → readOnly (no restricted-page write)', async () => {
+    pagePermissionRepo.canUserEditPage.mockResolvedValue({
+      hasAnyRestriction: true,
+      canAccess: true,
+      canEdit: false,
+    });
+    const data = buildData();
+    const ctx = await ext.onAuthenticate(data as any);
+
+    expect(data.connectionConfig.readOnly).toBe(true);
+    expect(ctx.actor).toBe('user');
+  });
+
+  it('restriction canAccess=true & canEdit=true → writable (readOnly stays false)', async () => {
+    pagePermissionRepo.canUserEditPage.mockResolvedValue({
+      hasAnyRestriction: true,
+      canAccess: true,
+      canEdit: true,
+    });
+    const data = buildData();
+    await ext.onAuthenticate(data as any);
+
+    expect(data.connectionConfig.readOnly).toBe(false);
+  });
+
+  it('no restriction + space READER → readOnly', async () => {
+    spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([
+      { userId: USER_ID, role: SpaceRole.READER },
+    ]);
+    const data = buildData();
+    await ext.onAuthenticate(data as any);
+
+    expect(data.connectionConfig.readOnly).toBe(true);
+  });
+
+  it('no restriction + space WRITER → writable', async () => {
+    const data = buildData();
+    await ext.onAuthenticate(data as any);
+    expect(data.connectionConfig.readOnly).toBe(false);
+  });
+
+  it('soft-deleted page (deletedAt set) → readOnly even for a WRITER', async () => {
+    // A writer must NOT be able to mutate a page in the trash via collab.
+    pageRepo.findById.mockResolvedValue(buildPage({ deletedAt: new Date() }));
+    const data = buildData();
+    await ext.onAuthenticate(data as any);
+
+    expect(data.connectionConfig.readOnly).toBe(true);
+  });
+
+  it('agent JWT (actor=agent + aiChatId) propagates into the connection context', async () => {
+    tokenService.verifyJwt.mockResolvedValue(
+      buildJwt({ actor: 'agent', aiChatId: 'chat-7' }),
+    );
+    const ctx = await ext.onAuthenticate(buildData() as any);
+
+    expect(ctx.actor).toBe('agent');
+    expect(ctx.aiChatId).toBe('chat-7');
+    expect(ctx.user.id).toBe(USER_ID);
+  });
+
+  it('human JWT (no provenance claims) → actor=user, aiChatId=null', async () => {
+    const ctx = await ext.onAuthenticate(buildData() as any);
+
+    expect(ctx.actor).toBe('user');
+    expect(ctx.aiChatId).toBeNull();
+  });
+});
diff --git a/apps/server/src/collaboration/extensions/compute-history-job.spec.ts b/apps/server/src/collaboration/extensions/compute-history-job.spec.ts
new file mode 100644
index 00000000..aa21f14f
--- /dev/null
+++ b/apps/server/src/collaboration/extensions/compute-history-job.spec.ts
@@ -0,0 +1,105 @@
+import {
+  computeHistoryJob,
+  resolveSource,
+} from './persistence.extension';
+import {
+  HISTORY_FAST_INTERVAL,
+  HISTORY_FAST_THRESHOLD,
+  HISTORY_INTERVAL,
+} from '../constants';
+
+// A fixed clock + fixed createdAt make pageAge deterministic.
+const NOW = 1_700_000_000_000;
+const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
+
+// Build a minimal page whose age (NOW - createdAt) is exactly `ageMs`.
+const pageAged = (ageMs: number) => ({
+  id: PAGE_ID,
+  createdAt: new Date(NOW - ageMs),
+});
+
+describe('computeHistoryJob', () => {
+  it('agent edit → delay MUST be 0 and job id is source-keyed', () => {
+    // INVARIANT (§15 H2 / persistence.extension): the agent delay MUST stay 0.
+    // The worker re-reads the page row at run time, so any non-zero delay risks
+    // snapshotting content a later human edit has already overwritten. This is
+    // the load-bearing assertion of this spec — do not relax it.
+    const { jobId, delay } = computeHistoryJob(pageAged(0), 'agent', NOW);
+    expect(delay).toBe(0);
+    expect(jobId).toBe(`${PAGE_ID}-agent`);
+  });
+
+  it('agent edit on an OLD page is still delay 0 (age never applies to agents)', () => {
+    // Even when the page is far older than the fast threshold, the agent path
+    // must short-circuit to 0 — age-based debounce is a human-only concern.
+    const { jobId, delay } = computeHistoryJob(
+      pageAged(HISTORY_FAST_THRESHOLD + 60_000),
+      'agent',
+      NOW,
+    );
+    expect(delay).toBe(0);
+    expect(jobId).toBe(`${PAGE_ID}-agent`);
+  });
+
+  it('human edit on a YOUNG page (age < threshold) → fast interval, bare job id', () => {
+    const { jobId, delay } = computeHistoryJob(
+      pageAged(HISTORY_FAST_THRESHOLD - 1),
+      'user',
+      NOW,
+    );
+    expect(delay).toBe(HISTORY_FAST_INTERVAL);
+    expect(jobId).toBe(PAGE_ID);
+  });
+
+  it('human edit on an OLD page (age > threshold) → standard interval', () => {
+    const { jobId, delay } = computeHistoryJob(
+      pageAged(HISTORY_FAST_THRESHOLD + 1),
+      'user',
+      NOW,
+    );
+    expect(delay).toBe(HISTORY_INTERVAL);
+    expect(jobId).toBe(PAGE_ID);
+  });
+
+  it('boundary: pageAge EXACTLY === threshold takes the slow branch (the `<` is strict)', () => {
+    // Off-by-one guard: the condition is `pageAge < HISTORY_FAST_THRESHOLD`, so
+    // an age of exactly the threshold is NOT "fast" — it must use HISTORY_INTERVAL.
+    const { delay } = computeHistoryJob(
+      pageAged(HISTORY_FAST_THRESHOLD),
+      'user',
+      NOW,
+    );
+    expect(delay).toBe(HISTORY_INTERVAL);
+  });
+
+  it('treats any non-"agent" source string as human', () => {
+    // resolveSource only ever yields 'agent' | 'user', but guard the contract:
+    // the agent branch keys strictly on === 'agent'.
+    const { jobId, delay } = computeHistoryJob(pageAged(0), 'user', NOW);
+    expect(delay).toBe(HISTORY_FAST_INTERVAL);
+    expect(jobId).toBe(PAGE_ID);
+  });
+});
+
+describe('resolveSource (truth table)', () => {
+  // (sticky, actor) → expected. Marker is OR of the sticky flag and actor==='agent'.
+  it('sticky=false, actor=user → user', () => {
+    expect(resolveSource(false, 'user')).toBe('user');
+  });
+
+  it('sticky=true, actor=user → agent (sticky wins)', () => {
+    expect(resolveSource(true, 'user')).toBe('agent');
+  });
+
+  it('sticky=false, actor=agent → agent (current writer is the agent)', () => {
+    expect(resolveSource(false, 'agent')).toBe('agent');
+  });
+
+  it('sticky=true, actor=agent → agent', () => {
+    expect(resolveSource(true, 'agent')).toBe('agent');
+  });
+
+  it('sticky=false, actor=undefined → user (human collab path omits the claim)', () => {
+    expect(resolveSource(false, undefined)).toBe('user');
+  });
+});
diff --git a/apps/server/src/collaboration/extensions/persistence-store.spec.ts b/apps/server/src/collaboration/extensions/persistence-store.spec.ts
new file mode 100644
index 00000000..4262d77f
--- /dev/null
+++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts
@@ -0,0 +1,185 @@
+import { TiptapTransformer } from '@hocuspocus/transformer';
+import { PersistenceExtension } from './persistence.extension';
+import { tiptapExtensions } from '../collaboration.util';
+
+/**
+ * Integration test for `onStoreDocument`'s Approach-A boundary snapshot.
+ *
+ * The data-loss risk: when an AGENT store lands over a page whose persisted
+ * state was authored by a HUMAN, the agent overwrites that human content. If we
+ * do not pin the human revision as its own history version BEFORE the agent's
+ * updatePage, the last human edit is lost. This test pins the ordering
+ * (saveHistory(oldHumanPage) strictly before updatePage) and the idempotency
+ * skip when content is unchanged.
+ *
+ * We pass a REAL Y.Doc as the `document` arg (so TiptapTransformer.fromYdoc
+ * yields real content) and stub repos/queues + an executeTx-compatible db whose
+ * transaction().execute() invokes the callback with a trx stub.
+ */
+
+const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
+const USER_ID = 'human-1';
+
+// Build a real Y.Doc carrying the given tiptap JSON in the 'default' fragment.
+// hocuspocus augments the live document with broadcastStateless(); the bare
+// Y.Doc lacks it, so stub it for the post-store broadcast.
+const ydocFor = (json: any) => {
+  const ydoc = TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
+  (ydoc as any).broadcastStateless = jest.fn();
+  return ydoc;
+};
+
+const doc = (text: string) => ({
+  type: 'doc',
+  content: [{ type: 'paragraph', content: [{ type: 'text', text }] }],
+});
+
+describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot', () => {
+  let ext: PersistenceExtension;
+  let pageRepo: { findById: jest.Mock; updatePage: jest.Mock };
+  let pageHistoryRepo: {
+    saveHistory: jest.Mock;
+    findPageLastHistory: jest.Mock;
+  };
+  let aiQueue: { add: jest.Mock };
+  let historyQueue: { add: jest.Mock };
+  let notificationQueue: { add: jest.Mock };
+  let collabHistory: { addContributors: jest.Mock };
+  let transclusionService: {
+    syncPageTransclusions: jest.Mock;
+    syncPageReferences: jest.Mock;
+    syncPageTemplateReferences: jest.Mock;
+  };
+  let callOrder: string[];
+
+  // db whose transaction().execute(fn) runs fn with a trx stub — this lets the
+  // real executeTx() helper drive the callback without a database.
+  const trxStub = { __trx: true };
+  const db = {
+    transaction: () => ({
+      execute: (fn: (trx: any) => Promise<any>) => fn(trxStub),
+    }),
+  };
+
+  // The persisted page row the transaction reads (OLD, human-authored state).
+  const persistedHumanPage = (newAgentText: string) => ({
+    id: PAGE_ID,
+    slugId: 'slug-1',
+    spaceId: 'space-1',
+    workspaceId: 'ws-1',
+    creatorId: 'creator-1',
+    contributorIds: ['creator-1'],
+    createdAt: new Date('2020-01-01T00:00:00Z'),
+    lastUpdatedSource: 'user', // prior revision was human
+    // content differs from the new agent doc so the update branch runs.
+    content: doc('OLD HUMAN'),
+    _newAgentText: newAgentText,
+  });
+
+  const buildData = (document: any, actor: 'user' | 'agent') => ({
+    documentName: `page.${PAGE_ID}`,
+    document,
+    context: { user: { id: USER_ID, name: 'Alice' }, actor },
+  });
+
+  beforeEach(() => {
+    callOrder = [];
+    pageRepo = {
+      findById: jest.fn(),
+      updatePage: jest.fn().mockImplementation(async () => {
+        callOrder.push('updatePage');
+      }),
+    };
+    pageHistoryRepo = {
+      saveHistory: jest.fn().mockImplementation(async () => {
+        callOrder.push('saveHistory');
+      }),
+      findPageLastHistory: jest.fn().mockResolvedValue(null),
+    };
+    aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
+    historyQueue = { add: jest.fn().mockResolvedValue(undefined) };
+    notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
+    collabHistory = { addContributors: jest.fn().mockResolvedValue(undefined) };
+    transclusionService = {
+      syncPageTransclusions: jest.fn().mockResolvedValue(undefined),
+      syncPageReferences: jest.fn().mockResolvedValue(undefined),
+      syncPageTemplateReferences: jest.fn().mockResolvedValue(undefined),
+    };
+
+    ext = new PersistenceExtension(
+      pageRepo as any,
+      pageHistoryRepo as any,
+      db as any,
+      aiQueue as any,
+      historyQueue as any,
+      notificationQueue as any,
+      collabHistory as any,
+      transclusionService as any,
+    );
+    jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined);
+    jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined);
+    jest.spyOn(ext['logger'], 'error').mockImplementation(() => undefined);
+  });
+
+  it('agent store over a human page pins saveHistory(oldHumanPage) BEFORE updatePage', async () => {
+    const document = ydocFor(doc('NEW AGENT CONTENT'));
+    pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT'));
+    // No human baseline snapshot exists yet → boundary snapshot must run.
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
+
+    await ext.onStoreDocument(buildData(document, 'agent') as any);
+
+    // Boundary snapshot fired, and strictly before the agent overwrite.
+    expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
+    const saved = pageHistoryRepo.saveHistory.mock.calls[0][0];
+    expect(saved.content).toEqual(doc('OLD HUMAN')); // the OLD human revision
+    expect(callOrder).toEqual(['saveHistory', 'updatePage']);
+
+    // The agent's new content is tagged 'agent' on the update.
+    const update = pageRepo.updatePage.mock.calls[0][0];
+    expect(update.lastUpdatedSource).toBe('agent');
+  });
+
+  it('skips the boundary snapshot when the human baseline is already pinned', async () => {
+    const document = ydocFor(doc('NEW AGENT CONTENT'));
+    pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT'));
+    // Latest history already equals the current human state → no duplicate.
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue({
+      content: doc('OLD HUMAN'),
+    });
+
+    await ext.onStoreDocument(buildData(document, 'agent') as any);
+
+    expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
+    expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
+  });
+
+  it('human store does NOT trigger the boundary snapshot (no source transition)', async () => {
+    const document = ydocFor(doc('NEW HUMAN CONTENT'));
+    pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW HUMAN CONTENT'));
+
+    await ext.onStoreDocument(buildData(document, 'user') as any);
+
+    expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
+    expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
+    expect(pageRepo.updatePage.mock.calls[0][0].lastUpdatedSource).toBe('user');
+  });
+
+  it('idempotency: unchanged content → no updatePage, no history, no queues', async () => {
+    // The Y.Doc content equals the persisted content deeply → early skip.
+    // A Y.Doc round-trip normalizes attrs (e.g. paragraph indent), so derive
+    // the persisted content from fromYdoc to make the deep-equal skip genuine.
+    const document = ydocFor(doc('SAME CONTENT'));
+    const normalized = TiptapTransformer.fromYdoc(document, 'default');
+    pageRepo.findById.mockResolvedValue({
+      ...persistedHumanPage('SAME CONTENT'),
+      content: normalized,
+    });
+
+    await ext.onStoreDocument(buildData(document, 'agent') as any);
+
+    expect(pageRepo.updatePage).not.toHaveBeenCalled();
+    expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
+    expect(historyQueue.add).not.toHaveBeenCalled();
+  });
+});
diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts
index da8421c3..dd462877 100644
--- a/apps/server/src/collaboration/extensions/persistence.extension.ts
+++ b/apps/server/src/collaboration/extensions/persistence.extension.ts
@@ -40,6 +40,52 @@ import {
 } from '../constants';
 import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
 
+/**
+ * Resolve the provenance source for a coalesced snapshot.
+ *
+ * The snapshot is tagged 'agent' if any agent edit landed in the coalescing
+ * window (sticky marker) OR if the current writer is the agent; otherwise
+ * 'user'. Pure so the §15 H2 marker logic is unit-testable in isolation.
+ */
+export function resolveSource(
+  stickyTouched: boolean,
+  contextActor?: string,
+): 'agent' | 'user' {
+  return stickyTouched || contextActor === 'agent' ? 'agent' : 'user';
+}
+
+/**
+ * Compute the BullMQ job id + delay for a page-history snapshot job. Pure so
+ * the data-loss-sensitive timing arithmetic is unit-testable; `now` is injected
+ * (caller passes `Date.now()`) for determinism.
+ *
+ * - Agent edits: delay 0 and a source-keyed job id `${page.id}-agent`. The
+ *   delay MUST stay 0 — the worker re-reads the page row at run time, so any
+ *   delay risks reading content a later human edit has already overwritten
+ *   (mis-tagged snapshot). 0 minimizes that window. The `-agent` suffix keeps
+ *   the job from coalescing with the bare-page.id human job.
+ * - Human edits: age-based debounce so rapid human edits coalesce into one
+ *   snapshot; job id is the bare `page.id`.
+ *
+ * BullMQ forbids ':' in custom job ids (Redis key separator), so '-' is used;
+ * page.id is a UUID, so `${page.id}-agent` cannot collide with a human job.
+ */
+export function computeHistoryJob(
+  page: Pick<Page, 'id' | 'createdAt'>,
+  source: string,
+  now: number,
+): { jobId: string; delay: number } {
+  const isAgent = source === 'agent';
+  const pageAge = now - new Date(page.createdAt).getTime();
+  const delay = isAgent
+    ? 0
+    : pageAge < HISTORY_FAST_THRESHOLD
+      ? HISTORY_FAST_INTERVAL
+      : HISTORY_INTERVAL;
+  const jobId = isAgent ? `${page.id}-agent` : page.id;
+  return { jobId, delay };
+}
+
 @Injectable()
 export class PersistenceExtension implements Extension {
   private readonly logger = new Logger(PersistenceExtension.name);
@@ -129,9 +175,10 @@ export class PersistenceExtension implements Extension {
     // Sticky agent marker: 'agent' if any agent edit landed in this window, OR
     // if the current writer is the agent (covers a store with no prior onChange
     // agent event in the same window). §15 H2.
-    const agentTouched =
-      this.consumeAgentTouched(documentName) || context?.actor === 'agent';
-    const lastUpdatedSource = agentTouched ? 'agent' : 'user';
+    const lastUpdatedSource = resolveSource(
+      this.consumeAgentTouched(documentName),
+      context?.actor,
+    );
 
     try {
       await executeTx(this.db, async (trx) => {
@@ -311,24 +358,13 @@ export class PersistenceExtension implements Extension {
     page: Page,
     lastUpdatedSource: string,
   ): Promise<void> {
-    // Agent edits get an immediate, source-keyed history job: they snapshot
-    // deterministically as 'agent' and a later human edit (jobId = page.id)
-    // cannot coalesce/retag them. Human edits keep the age-based debounce so
-    // rapid human edits still coalesce into one snapshot.
-    // NOTE: the agent delay MUST stay 0 — the worker re-reads the page row at
-    // run time, so any delay would risk reading content a later human edit has
-    // already overwritten (mis-tagged snapshot). 0 minimizes that window.
-    const isAgent = lastUpdatedSource === 'agent';
-    const pageAge = Date.now() - new Date(page.createdAt).getTime();
-    const delay = isAgent
-      ? 0
-      : pageAge < HISTORY_FAST_THRESHOLD
-        ? HISTORY_FAST_INTERVAL
-        : HISTORY_INTERVAL;
-    // BullMQ forbids ':' in custom job IDs (it is the Redis key separator), so
-    // use '-' here. page.id is a UUID, so `${page.id}-agent` cannot collide with
-    // any human job whose id is a bare page.id.
-    const jobId = isAgent ? `${page.id}-agent` : page.id;
+    // Job id + delay arithmetic lives in the pure `computeHistoryJob` (see its
+    // doc comment for the agent-delay-0 / age-based-debounce invariants).
+    const { jobId, delay } = computeHistoryJob(
+      page,
+      lastUpdatedSource,
+      Date.now(),
+    );
 
     await this.historyQueue.add(
       QueueJob.PAGE_HISTORY,
diff --git a/apps/server/src/collaboration/processors/history.processor.spec.ts b/apps/server/src/collaboration/processors/history.processor.spec.ts
new file mode 100644
index 00000000..bdcf846e
--- /dev/null
+++ b/apps/server/src/collaboration/processors/history.processor.spec.ts
@@ -0,0 +1,200 @@
+import { Job } from 'bullmq';
+import { HistoryProcessor } from './history.processor';
+import { QueueJob } from '../../integrations/queue/constants';
+
+/**
+ * Unit tests for `HistoryProcessor.process`. This worker is the last line of
+ * defense for the page-history snapshot, so we pin the data-loss-sensitive
+ * paths: duplicate/empty history skipping (isDeepStrictEqual), and — critically
+ * — that a saveHistory failure RESTORES the contributors it popped (otherwise
+ * the contributor set is silently lost) before rethrowing.
+ */
+
+const PAGE_ID = 'page-1';
+const SPACE_ID = 'space-1';
+const WORKSPACE_ID = 'ws-1';
+
+// A non-empty content doc (distinct from the empty-paragraph doc).
+const filledContent = {
+  type: 'doc',
+  content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
+};
+const emptyContent = { type: 'doc', content: [{ type: 'paragraph' }] };
+
+const buildPage = (overrides: Partial<any> = {}) => ({
+  id: PAGE_ID,
+  spaceId: SPACE_ID,
+  workspaceId: WORKSPACE_ID,
+  content: filledContent,
+  ...overrides,
+});
+
+const buildJob = (overrides: Partial<any> = {}) =>
+  ({
+    name: QueueJob.PAGE_HISTORY,
+    data: { pageId: PAGE_ID },
+    ...overrides,
+  }) as unknown as Job<any, void>;
+
+describe('HistoryProcessor.process', () => {
+  let proc: HistoryProcessor;
+  let pageHistoryRepo: { findPageLastHistory: jest.Mock; saveHistory: jest.Mock };
+  let pageRepo: { findById: jest.Mock };
+  let collabHistory: {
+    clearContributors: jest.Mock;
+    popContributors: jest.Mock;
+    addContributors: jest.Mock;
+  };
+  let watcherService: { addPageWatchers: jest.Mock };
+  let notificationQueue: { add: jest.Mock };
+  let generalQueue: { add: jest.Mock };
+
+  beforeEach(() => {
+    pageHistoryRepo = {
+      findPageLastHistory: jest.fn().mockResolvedValue(null),
+      saveHistory: jest.fn().mockResolvedValue(undefined),
+    };
+    pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) };
+    collabHistory = {
+      clearContributors: jest.fn().mockResolvedValue(undefined),
+      popContributors: jest.fn().mockResolvedValue(['u1', 'u2']),
+      addContributors: jest.fn().mockResolvedValue(undefined),
+    };
+    watcherService = {
+      addPageWatchers: jest.fn().mockResolvedValue(undefined),
+    };
+    notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
+    generalQueue = { add: jest.fn().mockResolvedValue(undefined) };
+
+    // WorkerHost's constructor reads `this.worker`; passing repos positionally
+    // matches the constructor and avoids the Nest DI container.
+    proc = new HistoryProcessor(
+      pageHistoryRepo as any,
+      pageRepo as any,
+      collabHistory as any,
+      watcherService as any,
+      notificationQueue as any,
+      generalQueue as any,
+    );
+    jest.spyOn(proc['logger'], 'debug').mockImplementation(() => undefined);
+    jest.spyOn(proc['logger'], 'warn').mockImplementation(() => undefined);
+    jest.spyOn(proc['logger'], 'error').mockImplementation(() => undefined);
+  });
+
+  it('ignores jobs whose name is not PAGE_HISTORY (no page lookup)', async () => {
+    await proc.process(buildJob({ name: 'some.other.job' }));
+    expect(pageRepo.findById).not.toHaveBeenCalled();
+  });
+
+  it('page not found → clearContributors and return (no save)', async () => {
+    pageRepo.findById.mockResolvedValue(null);
+
+    await proc.process(buildJob());
+
+    expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID);
+    expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
+    expect(collabHistory.popContributors).not.toHaveBeenCalled();
+  });
+
+  it('first history + empty content → skip and clear contributors (no save)', async () => {
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
+    pageRepo.findById.mockResolvedValue(buildPage({ content: emptyContent }));
+
+    await proc.process(buildJob());
+
+    expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID);
+    expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
+  });
+
+  it('content unchanged vs last history → no save (isDeepStrictEqual skip)', async () => {
+    // Last history holds a deep-equal-but-distinct copy of current content.
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue({
+      content: JSON.parse(JSON.stringify(filledContent)),
+    });
+
+    await proc.process(buildJob());
+
+    expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
+    expect(collabHistory.popContributors).not.toHaveBeenCalled();
+  });
+
+  it('content changed → addPageWatchers + saveHistory + backlinks queue', async () => {
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue({
+      content: { type: 'doc', content: [] },
+    });
+
+    await proc.process(buildJob());
+
+    expect(collabHistory.popContributors).toHaveBeenCalledWith(PAGE_ID);
+    expect(watcherService.addPageWatchers).toHaveBeenCalledWith(
+      ['u1', 'u2'],
+      PAGE_ID,
+      SPACE_ID,
+      WORKSPACE_ID,
+    );
+    expect(pageHistoryRepo.saveHistory).toHaveBeenCalledWith(
+      expect.objectContaining({ id: PAGE_ID }),
+      { contributorIds: ['u1', 'u2'] },
+    );
+    expect(generalQueue.add).toHaveBeenCalledWith(
+      QueueJob.PAGE_BACKLINKS,
+      expect.objectContaining({ pageId: PAGE_ID, workspaceId: WORKSPACE_ID }),
+    );
+  });
+
+  it('first history (lastHistory null) with non-empty content → saves, no PAGE_UPDATED notification', async () => {
+    // popContributors yields users, but lastHistory?.content is falsy so the
+    // notification branch (needs a prior version) must be skipped.
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
+
+    await proc.process(buildJob());
+
+    expect(pageHistoryRepo.saveHistory).toHaveBeenCalled();
+    expect(notificationQueue.add).not.toHaveBeenCalled();
+  });
+
+  it('changed content WITH prior history + contributors → queues PAGE_UPDATED notification', async () => {
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue({
+      content: { type: 'doc', content: [] },
+    });
+
+    await proc.process(buildJob());
+
+    expect(notificationQueue.add).toHaveBeenCalledWith(
+      QueueJob.PAGE_UPDATED,
+      expect.objectContaining({
+        pageId: PAGE_ID,
+        actorIds: ['u1', 'u2'],
+      }),
+    );
+  });
+
+  it('saveHistory throws → contributors RESTORED (addContributors) AND error rethrown', async () => {
+    // The data-loss guard: if the snapshot save fails after popContributors,
+    // the popped ids MUST be returned to the pending set, then the error
+    // propagates so BullMQ retries. Assert BOTH halves.
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue({
+      content: { type: 'doc', content: [] },
+    });
+    const boom = new Error('db down');
+    pageHistoryRepo.saveHistory.mockRejectedValue(boom);
+
+    await expect(proc.process(buildJob())).rejects.toThrow('db down');
+    expect(collabHistory.addContributors).toHaveBeenCalledWith(PAGE_ID, [
+      'u1',
+      'u2',
+    ]);
+  });
+
+  it('backlinks + notification queue failures are swallowed (history still committed)', async () => {
+    pageHistoryRepo.findPageLastHistory.mockResolvedValue({
+      content: { type: 'doc', content: [] },
+    });
+    generalQueue.add.mockRejectedValue(new Error('redis backlinks down'));
+    notificationQueue.add.mockRejectedValue(new Error('redis notif down'));
+
+    // The downstream queue failures are caught internally; process resolves.
+    await expect(proc.process(buildJob())).resolves.toBeUndefined();
+    expect(pageHistoryRepo.saveHistory).toHaveBeenCalled();
+  });
+});
diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.ts b/apps/server/src/core/ai-chat/ai-chat.controller.ts
index 11c43af5..02a98971 100644
--- a/apps/server/src/core/ai-chat/ai-chat.controller.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts
@@ -228,25 +228,14 @@ export class AiChatController {
     }
     if (!file) throw new BadRequestException('No audio uploaded');
 
-    // Whitelist audio container types produced by browser MediaRecorder
-    // (Chrome/FF: webm/opus, Safari: mp4) plus common STT-accepted formats.
-    const allowedMime = new Set([
-      'audio/webm',
-      'audio/ogg',
-      'audio/mp4',
-      'audio/mpeg',
-      'audio/wav',
-      'audio/x-wav',
-      'audio/wave',
-      'audio/m4a',
-      'audio/x-m4a',
-    ]);
-    // MediaRecorder mimetypes carry parameters (e.g. "audio/webm;codecs=opus");
-    // compare only the base type.
-    const baseMime = file.mimetype.split(';')[0].trim().toLowerCase();
-    if (!allowedMime.has(baseMime)) {
+    // Resolve + whitelist the upload's container type (MediaRecorder mimetypes
+    // carry parameters, e.g. "audio/webm;codecs=opus"). A non-whitelisted type
+    // is rejected; an allowed one yields the STT container-format hint.
+    const resolved = resolveAudioFormat(file.mimetype);
+    if (!resolved.ok) {
       throw new BadRequestException('Unsupported audio format');
     }
+    const { format } = resolved;
 
     let buf: Buffer;
     try {
@@ -259,20 +248,6 @@ export class AiChatController {
       }
       throw err;
     }
-    // Container hint for JSON-style STT providers (e.g. OpenRouter); multipart
-    // endpoints ignore it.
-    const formatMap: Record<string, string> = {
-      'audio/webm': 'webm',
-      'audio/ogg': 'ogg',
-      'audio/mp4': 'mp4',
-      'audio/mpeg': 'mp3',
-      'audio/wav': 'wav',
-      'audio/x-wav': 'wav',
-      'audio/wave': 'wav',
-      'audio/m4a': 'm4a',
-      'audio/x-m4a': 'm4a',
-    };
-    const format = formatMap[baseMime] ?? 'webm';
     let text: string;
     try {
       text = await this.aiTranscription.transcribe(workspace.id, buf, format);
@@ -302,3 +277,39 @@ export class AiChatController {
     }
   }
 }
+
+/**
+ * Whitelist audio container types produced by browser MediaRecorder (Chrome/FF:
+ * webm/opus, Safari: mp4) plus common STT-accepted formats. The value maps each
+ * allowed base mime to the container-format hint passed to JSON-style STT
+ * providers (e.g. OpenRouter); multipart endpoints ignore the hint.
+ */
+const AUDIO_FORMAT_MAP: Record<string, string> = {
+  'audio/webm': 'webm',
+  'audio/ogg': 'ogg',
+  'audio/mp4': 'mp4',
+  'audio/mpeg': 'mp3',
+  'audio/wav': 'wav',
+  'audio/x-wav': 'wav',
+  'audio/wave': 'wav',
+  'audio/m4a': 'm4a',
+  'audio/x-m4a': 'm4a',
+};
+
+/**
+ * Resolve and whitelist an uploaded clip's mimetype. MediaRecorder mimetypes
+ * carry parameters (e.g. "audio/webm;codecs=opus"), so the base type is split
+ * out (lowercased, trimmed) before the whitelist check. Returns ok=false for a
+ * non-whitelisted container; otherwise the base mime and its STT format hint.
+ * Pure — the caller throws BadRequestException on !ok.
+ */
+export function resolveAudioFormat(
+  mimetype: string,
+): { ok: true; baseMime: string; format: string } | { ok: false } {
+  const baseMime = mimetype.split(';')[0].trim().toLowerCase();
+  const format = AUDIO_FORMAT_MAP[baseMime];
+  if (format === undefined) {
+    return { ok: false };
+  }
+  return { ok: true, baseMime, format };
+}
diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts
index 330a23cc..30e94dc0 100644
--- a/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts
+++ b/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts
@@ -367,6 +367,28 @@ export class McpClientsService {
   }
 }
 
+/**
+ * Apply the SSRF connect-time rule to a set of DNS-resolved addresses: block if
+ * ANY resolved address is disallowed by `isIpAllowed`, and block an EMPTY set
+ * (nothing safe to connect to). Only an all-public, non-empty set is allowed.
+ *
+ * This is the connect-time half of the DNS-rebinding defense: the dispatcher's
+ * lookup hands net/tls.connect ONLY a set that passed this check, so the kernel
+ * can never connect to an address that did not pass the guard. Pure — no I/O.
+ */
+export function validateResolvedAddresses(
+  addrs: readonly LookupAddress[],
+): { ok: boolean; blockedHost?: string } {
+  if (addrs.length === 0) {
+    return { ok: false };
+  }
+  const blocked = addrs.find((a) => !isIpAllowed(a.address).ok);
+  if (blocked) {
+    return { ok: false, blockedHost: blocked.address };
+  }
+  return { ok: true };
+}
+
 /**
  * Build the SSRF-pinned undici dispatcher. Its custom connect.lookup resolves
  * the host, validates EVERY resolved address with the same ssrf-guard, and
@@ -388,22 +410,15 @@ function buildPinnedDispatcher(): Agent {
             return;
           }
           const addrs = addresses as LookupAddress[];
-          if (addrs.length === 0) {
-            callback(
-              new Error(`No address resolved for ${hostname}`),
-              '',
-              0,
-            );
-            return;
-          }
-          const blocked = addrs.find((a) => !isIpAllowed(a.address).ok);
-          if (blocked) {
+          const verdict = validateResolvedAddresses(addrs);
+          if (!verdict.ok) {
             // Refuse the connection: net/tls.connect never sees this address.
-            callback(
-              new Error(`Blocked address for ${hostname}`),
-              '',
-              0,
-            );
+            // An empty set is treated as blocked (nothing safe to connect to).
+            const reason =
+              addrs.length === 0
+                ? `No address resolved for ${hostname}`
+                : `Blocked address for ${hostname}`;
+            callback(new Error(reason), '', 0);
             return;
           }
           // undici/net invoke this lookup with `all: true`, so the callback
diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts
new file mode 100644
index 00000000..e5fee47d
--- /dev/null
+++ b/apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts
@@ -0,0 +1,136 @@
+import { type Tool } from 'ai';
+import { McpClientsService } from './mcp-clients.service';
+
+/**
+ * Tool-name namespacing / collision tests.
+ *
+ * REACHABILITY NOTE: the helpers `namespace` / `sanitizeName` / `capName` /
+ * `disambiguate` are module-private (not exported) and `mergeNamespaced` is a
+ * PRIVATE method. The smallest reachable public path that exercises all of them
+ * is `toolsFor()` -> getOrBuildEntry -> buildEntry -> connect/tools() ->
+ * mergeNamespaced. We drive that path: stub the repo's `listEnabled` to return
+ * fake servers and spy on the private `connect` to return fake MCP clients whose
+ * `tools()` we control. We then inspect the merged tool KEYS on the returned
+ * toolset — the observable result of namespacing.
+ *
+ * What we assert (all SECURITY/correctness-relevant):
+ *  - two servers each exposing a tool `search` -> BOTH survive under distinct
+ *    namespaced keys (no silent overwrite);
+ *  - a tool name with spaces/unicode -> sanitized to ^[a-zA-Z0-9_-]+;
+ *  - an over-long name -> capped to the provider limit (<= 64);
+ *  - duplicate names WITHIN one server (collide after sanitize/truncate) ->
+ *    disambiguated, so the second is not overwritten.
+ */
+const MAX_TOOL_NAME_LENGTH = 64;
+
+function fakeTool(): Tool {
+  return { description: 'x', inputSchema: undefined } as unknown as Tool;
+}
+
+interface FakeServer {
+  id: string;
+  name: string;
+  transport: string;
+  url: string;
+  headersEnc: string | null;
+  toolAllowlist: string[] | null;
+}
+
+function server(over: Partial<FakeServer> & { id: string; name: string }): FakeServer {
+  return {
+    transport: 'http',
+    url: 'https://example.com/mcp',
+    headersEnc: null,
+    toolAllowlist: null,
+    ...over,
+  };
+}
+
+/**
+ * Build a service whose repo returns `servers` and whose `connect` returns a
+ * fake client exposing `toolsByServerId[server.id]` from tools(). Returns the
+ * merged keys produced by toolsFor.
+ */
+async function mergedKeysFor(
+  servers: FakeServer[],
+  toolsByServerId: Record<string, Record<string, Tool>>,
+): Promise<string[]> {
+  const repoStub = {
+    listEnabled: jest.fn().mockResolvedValue(servers),
+  };
+  const service = new McpClientsService(repoStub as never, {} as never);
+
+  // Map each connect() call (by server identity) to a fake client. connect is
+  // private; spy on it via a typed any-cast.
+  jest
+    .spyOn(service as unknown as { connect: (s: FakeServer) => unknown }, 'connect')
+    .mockImplementation((s: FakeServer) =>
+      Promise.resolve({
+        tools: () => Promise.resolve(toolsByServerId[s.id] ?? {}),
+        close: () => Promise.resolve(),
+      }),
+    );
+
+  const toolset = await service.toolsFor('ws-1');
+  // Release the lease so the service does not hold the fake clients open.
+  await Promise.all(toolset.clients.map((c) => c.close()));
+  return Object.keys(toolset.tools);
+}
+
+describe('external MCP tool-name namespacing (via toolsFor)', () => {
+  afterEach(() => jest.restoreAllMocks());
+
+  it('keeps tools from two servers that both expose `search` (no overwrite)', async () => {
+    const keys = await mergedKeysFor(
+      [
+        server({ id: 'id-alpha', name: 'alpha' }),
+        server({ id: 'id-beta', name: 'beta' }),
+      ],
+      {
+        'id-alpha': { search: fakeTool() },
+        'id-beta': { search: fakeTool() },
+      },
+    );
+
+    // Two distinct keys survive -> no silent overwrite.
+    expect(keys).toHaveLength(2);
+    expect(new Set(keys).size).toBe(2);
+    // The server name is prefixed onto each tool.
+    expect(keys).toContain('alpha_search');
+    expect(keys.some((k) => k !== 'alpha_search')).toBe(true);
+  });
+
+  it('sanitizes spaces/unicode in names to the allowed charset', async () => {
+    const keys = await mergedKeysFor(
+      [server({ id: 'id-1', name: 'My Server!' })],
+      { 'id-1': { 'search the wiki ✨': fakeTool() } },
+    );
+
+    expect(keys).toHaveLength(1);
+    // Only ^[a-zA-Z0-9_-]+ characters remain (no spaces, no unicode).
+    expect(keys[0]).toMatch(/^[a-zA-Z0-9_-]+$/);
+  });
+
+  it('caps an over-long name to the provider length limit', async () => {
+    const longName = 'a'.repeat(200);
+    const keys = await mergedKeysFor(
+      [server({ id: 'id-1', name: 'svr' })],
+      { 'id-1': { [longName]: fakeTool() } },
+    );
+
+    expect(keys).toHaveLength(1);
+    expect(keys[0].length).toBeLessThanOrEqual(MAX_TOOL_NAME_LENGTH);
+  });
+
+  it('disambiguates two names that collide after sanitize/truncate within one server', async () => {
+    // Both names sanitize to the same value ("a_b") -> the second must be
+    // suffix-disambiguated, not overwritten.
+    const keys = await mergedKeysFor(
+      [server({ id: 'id-1', name: 'svr' })],
+      { 'id-1': { 'a b': fakeTool(), 'a@b': fakeTool() } },
+    );
+
+    expect(keys).toHaveLength(2);
+    expect(new Set(keys).size).toBe(2);
+  });
+});
diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts
new file mode 100644
index 00000000..4c6a1afc
--- /dev/null
+++ b/apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts
@@ -0,0 +1,85 @@
+import { McpServersService } from './mcp-servers.service';
+import { AiMcpServer } from '@docmost/db/types/entity.types';
+
+/**
+ * Encrypted-header leak guard for the admin-facing view (§8.10): `toView` is
+ * private, so we drive it through the public `list()` (which maps every row
+ * with toView). The contract: a row with `headersEnc` set surfaces ONLY
+ * `hasHeaders:true` and NEVER the `headersEnc` blob; a row without headers
+ * surfaces `hasHeaders:false`. The blob must never reach an admin response.
+ */
+function row(overrides: Partial<AiMcpServer>): AiMcpServer {
+  return {
+    id: 'srv-1',
+    name: 'Tavily',
+    transport: 'http',
+    url: 'https://example.com/mcp',
+    enabled: true,
+    toolAllowlist: null,
+    headersEnc: null,
+    ...overrides,
+  } as unknown as AiMcpServer;
+}
+
+describe('McpServersService.toView (via list) — encrypted-header leak guard', () => {
+  function buildService(rows: AiMcpServer[]): McpServersService {
+    const repoStub = {
+      listByWorkspace: jest.fn().mockResolvedValue(rows),
+    };
+    // secretBox + clients are unused by the list/toView path; pass stubs to
+    // satisfy the constructor.
+    return new McpServersService(
+      repoStub as never,
+      {} as never,
+      {} as never,
+    );
+  }
+
+  it('exposes hasHeaders:true and NO headersEnc when auth headers are set', async () => {
+    const service = buildService([
+      row({ headersEnc: 'ENCRYPTED-SECRET-BLOB' }),
+    ]);
+
+    const [view] = await service.list('ws-1');
+
+    expect(view.hasHeaders).toBe(true);
+    // The encrypted blob must NEVER appear in the view, under any key.
+    expect('headersEnc' in view).toBe(false);
+    expect(Object.values(view)).not.toContain('ENCRYPTED-SECRET-BLOB');
+  });
+
+  it('exposes hasHeaders:false when no auth headers are set', async () => {
+    const service = buildService([row({ headersEnc: null })]);
+
+    const [view] = await service.list('ws-1');
+
+    expect(view.hasHeaders).toBe(false);
+    expect('headersEnc' in view).toBe(false);
+  });
+
+  it('projects only the public fields', async () => {
+    const service = buildService([
+      row({
+        id: 'srv-9',
+        name: 'My MCP',
+        transport: 'sse',
+        url: 'https://mcp.example.com/',
+        enabled: false,
+        toolAllowlist: ['search'],
+        headersEnc: 'BLOB',
+      }),
+    ]);
+
+    const [view] = await service.list('ws-1');
+
+    expect(view).toEqual({
+      id: 'srv-9',
+      name: 'My MCP',
+      transport: 'sse',
+      url: 'https://mcp.example.com/',
+      enabled: false,
+      toolAllowlist: ['search'],
+      hasHeaders: true,
+    });
+  });
+});
diff --git a/apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts b/apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts
new file mode 100644
index 00000000..3f4be947
--- /dev/null
+++ b/apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts
@@ -0,0 +1,67 @@
+import { type LookupAddress } from 'node:dns';
+import { validateResolvedAddresses } from './mcp-clients.service';
+
+/**
+ * Unit tests for validateResolvedAddresses — the connect-time half of the SSRF
+ * DNS-rebinding defense. It applies the REAL `isIpAllowed` rule (imported
+ * transitively via the service) and must block if ANY resolved address is
+ * disallowed, treat an EMPTY set as blocked, and unwrap IPv4-mapped IPv6.
+ *
+ * These tests intentionally use real public/private literals (no DNS, no mock)
+ * so they exercise the actual ssrf-guard classification.
+ */
+function addr(address: string, family = 4): LookupAddress {
+  return { address, family };
+}
+
+describe('validateResolvedAddresses', () => {
+  it('allows an all-public set', () => {
+    const res = validateResolvedAddresses([
+      addr('8.8.8.8'),
+      addr('1.1.1.1'),
+      addr('2001:4860:4860::8888', 6),
+    ]);
+    expect(res.ok).toBe(true);
+  });
+
+  it('blocks when ONE address among many is private (any-private-blocks)', () => {
+    const res = validateResolvedAddresses([
+      addr('8.8.8.8'),
+      addr('1.1.1.1'),
+      addr('10.0.0.5'), // private 10/8 hidden among public addresses
+      addr('1.0.0.1'),
+    ]);
+    expect(res.ok).toBe(false);
+    expect(res.blockedHost).toBe('10.0.0.5');
+  });
+
+  it('blocks an empty set (nothing safe to connect to)', () => {
+    expect(validateResolvedAddresses([]).ok).toBe(false);
+  });
+
+  it('blocks an IPv4-mapped IPv6 private address', () => {
+    const res = validateResolvedAddresses([addr('::ffff:10.0.0.1', 6)]);
+    expect(res.ok).toBe(false);
+  });
+
+  it('blocks the cloud metadata link-local address', () => {
+    const res = validateResolvedAddresses([
+      addr('8.8.8.8'),
+      addr('169.254.169.254'),
+    ]);
+    expect(res.ok).toBe(false);
+  });
+
+  /**
+   * Regression sentinel: if the "any private blocks" rule were weakened to
+   * "all private blocks" / "first address wins", this mixed set (public first,
+   * private second) would wrongly pass. The assertion below FAILS in that case.
+   */
+  it('FAILS if the any-private rule is weakened (sentinel)', () => {
+    const res = validateResolvedAddresses([
+      addr('8.8.8.8'), // public first
+      addr('192.168.1.1'), // private second — must still block the whole set
+    ]);
+    expect(res.ok).toBe(false);
+  });
+});
diff --git a/apps/server/src/core/ai-chat/resolve-audio-format.spec.ts b/apps/server/src/core/ai-chat/resolve-audio-format.spec.ts
new file mode 100644
index 00000000..eb8cb631
--- /dev/null
+++ b/apps/server/src/core/ai-chat/resolve-audio-format.spec.ts
@@ -0,0 +1,53 @@
+import { resolveAudioFormat } from './ai-chat.controller';
+
+/**
+ * Unit tests for resolveAudioFormat — the transcribe-endpoint mime whitelist.
+ * It splits the base mime off any MediaRecorder parameters, lowercases/trims it,
+ * checks it against the whitelist, and maps it to the STT container-format hint.
+ * A non-whitelisted container yields { ok: false } (the controller then throws
+ * BadRequestException).
+ */
+describe('resolveAudioFormat', () => {
+  it('strips MediaRecorder parameters to the base mime (audio/webm;codecs=opus)', () => {
+    const res = resolveAudioFormat('audio/webm;codecs=opus');
+    expect(res).toEqual({ ok: true, baseMime: 'audio/webm', format: 'webm' });
+  });
+
+  it('normalizes uppercase / surrounding whitespace', () => {
+    const res = resolveAudioFormat('  AUDIO/MP4 ; codecs=mp4a  ');
+    expect(res).toEqual({ ok: true, baseMime: 'audio/mp4', format: 'mp4' });
+  });
+
+  it('handles the Safari/iOS audio/x-m4a container', () => {
+    expect(resolveAudioFormat('audio/x-m4a')).toEqual({
+      ok: true,
+      baseMime: 'audio/x-m4a',
+      format: 'm4a',
+    });
+  });
+
+  it('rejects a disallowed container (audio/aiff)', () => {
+    expect(resolveAudioFormat('audio/aiff')).toEqual({ ok: false });
+  });
+
+  it('maps every whitelisted container to its STT format hint', () => {
+    const cases: Array<[string, string]> = [
+      ['audio/webm', 'webm'],
+      ['audio/ogg', 'ogg'],
+      ['audio/mp4', 'mp4'],
+      ['audio/mpeg', 'mp3'],
+      ['audio/wav', 'wav'],
+      ['audio/x-wav', 'wav'],
+      ['audio/wave', 'wav'],
+      ['audio/m4a', 'm4a'],
+      ['audio/x-m4a', 'm4a'],
+    ];
+    for (const [mime, format] of cases) {
+      expect(resolveAudioFormat(mime)).toEqual({
+        ok: true,
+        baseMime: mime,
+        format,
+      });
+    }
+  });
+});
diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts
index 74ac66b8..afe0404f 100644
--- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts
+++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts
@@ -199,21 +199,8 @@ export class AiChatToolsService {
           const accessibleSet = new Set(accessibleIds);
 
           // Keep the best (first — hits are ordered by fused score desc) chunk
-          // per page, capped to `cap`.
-          const seen = new Set<string>();
-          const results: { id: string; title: string; snippet: string }[] = [];
-          for (const hit of hits) {
-            if (!accessibleSet.has(hit.pageId)) continue;
-            if (seen.has(hit.pageId)) continue;
-            seen.add(hit.pageId);
-            results.push({
-              id: hit.pageId,
-              title: hit.title ?? '',
-              snippet: snippet(hit.content),
-            });
-            if (results.length >= cap) break;
-          }
-          return results;
+          // per page, dropping any page the user cannot access, capped to `cap`.
+          return selectAccessibleHits(hits, accessibleSet, cap);
         },
       }),
 
@@ -960,6 +947,44 @@ export class AiChatToolsService {
   }
 }
 
+/** A single hybrid-search hit: the minimal shape selectAccessibleHits needs. */
+export interface SearchHitLike {
+  pageId: string;
+  title: string | null;
+  content: string;
+}
+
+/**
+ * Post-filter hybrid-search hits into the agent-facing result list. This is the
+ * CASL leak guard for the in-process hybrid search: the hits come from a direct
+ * pgvector + full-text query that does NOT get CASL for free, so an accessible
+ * SPACE does not imply every page in it is accessible (restricted pages).
+ *
+ * Given `hits` (ordered by fused score desc), the `accessibleSet` of page ids
+ * the user may read, and `cap`, it keeps the BEST (first) chunk per page, drops
+ * any page not in `accessibleSet`, and caps the output at `cap`. Pure — no I/O.
+ */
+export function selectAccessibleHits(
+  hits: readonly SearchHitLike[],
+  accessibleSet: Set<string>,
+  cap: number,
+): { id: string; title: string; snippet: string }[] {
+  const seen = new Set<string>();
+  const results: { id: string; title: string; snippet: string }[] = [];
+  for (const hit of hits) {
+    if (!accessibleSet.has(hit.pageId)) continue;
+    if (seen.has(hit.pageId)) continue;
+    seen.add(hit.pageId);
+    results.push({
+      id: hit.pageId,
+      title: hit.title ?? '',
+      snippet: snippet(hit.content),
+    });
+    if (results.length >= cap) break;
+  }
+  return results;
+}
+
 /**
  * Trim a search highlight/snippet to a token-efficient length. The highlight
  * may contain `<b>` markers from the search backend; they are harmless to the
diff --git a/apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts b/apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts
new file mode 100644
index 00000000..21ac46ac
--- /dev/null
+++ b/apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts
@@ -0,0 +1,96 @@
+import {
+  selectAccessibleHits,
+  type SearchHitLike,
+} from './ai-chat-tools.service';
+
+/**
+ * Unit tests for selectAccessibleHits — the CASL leak guard for the in-process
+ * hybrid search. The hybrid query runs over pgvector + full-text WITHOUT CASL,
+ * so this post-filter is the ONLY thing that drops pages the user cannot read.
+ *
+ * Core invariant: a hit on a page that is NOT in `accessibleSet` is dropped,
+ * even when that page lives in an otherwise-accessible space. Plus: only the
+ * best chunk per page survives (dedupe), results are capped, and an empty
+ * accessibleSet yields nothing.
+ */
+function hit(pageId: string, title: string | null, content: string): SearchHitLike {
+  return { pageId, title, content };
+}
+
+describe('selectAccessibleHits', () => {
+  it('drops a hit on a page NOT in accessibleSet (the core leak guard)', () => {
+    const hits = [
+      hit('public-page', 'Public', 'visible body'),
+      // restricted-page is in an accessible space but NOT page-accessible.
+      hit('restricted-page', 'Secret', 'leaked body'),
+    ];
+    const accessibleSet = new Set(['public-page']);
+
+    const out = selectAccessibleHits(hits, accessibleSet, 10);
+
+    expect(out).toEqual([
+      { id: 'public-page', title: 'Public', snippet: 'visible body' },
+    ]);
+    // The restricted page must NEVER appear in the output.
+    expect(out.some((r) => r.id === 'restricted-page')).toBe(false);
+  });
+
+  it('keeps only the best (first) chunk per page when a page has duplicates', () => {
+    const hits = [
+      hit('p1', 'Page One', 'best chunk'),
+      hit('p1', 'Page One', 'lower-ranked chunk'),
+      hit('p2', 'Page Two', 'p2 chunk'),
+    ];
+    const accessibleSet = new Set(['p1', 'p2']);
+
+    const out = selectAccessibleHits(hits, accessibleSet, 10);
+
+    expect(out).toEqual([
+      { id: 'p1', title: 'Page One', snippet: 'best chunk' },
+      { id: 'p2', title: 'Page Two', snippet: 'p2 chunk' },
+    ]);
+  });
+
+  it('caps the number of results at `cap`', () => {
+    const hits = [
+      hit('p1', 't1', 'c1'),
+      hit('p2', 't2', 'c2'),
+      hit('p3', 't3', 'c3'),
+      hit('p4', 't4', 'c4'),
+    ];
+    const accessibleSet = new Set(['p1', 'p2', 'p3', 'p4']);
+
+    const out = selectAccessibleHits(hits, accessibleSet, 2);
+
+    expect(out).toHaveLength(2);
+    expect(out.map((r) => r.id)).toEqual(['p1', 'p2']);
+  });
+
+  it('returns an empty list when accessibleSet is empty', () => {
+    const hits = [hit('p1', 't1', 'c1'), hit('p2', 't2', 'c2')];
+
+    expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]);
+  });
+
+  it('defaults a null title to an empty string', () => {
+    const out = selectAccessibleHits(
+      [hit('p1', null, 'body')],
+      new Set(['p1']),
+      10,
+    );
+    expect(out).toEqual([{ id: 'p1', title: '', snippet: 'body' }]);
+  });
+
+  /**
+   * Regression sentinel for the leak guard: if the access intersection
+   * (`accessibleSet.has(hit.pageId)` filter) were removed, the restricted page
+   * would slip into the output and THIS assertion would fail. Documents that
+   * the filter — not the dedupe/cap — is what enforces page-level access.
+   */
+  it('FAILS if the access intersection is removed (sentinel)', () => {
+    const hits = [hit('restricted', 'Secret', 'leaked')];
+    // Page is NOT accessible -> output MUST be empty. Without the intersection
+    // check the function would return the restricted hit and break this test.
+    expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]);
+  });
+});
diff --git a/apps/server/src/core/share/share-comment-strip.spec.ts b/apps/server/src/core/share/share-comment-strip.spec.ts
new file mode 100644
index 00000000..19befdc1
--- /dev/null
+++ b/apps/server/src/core/share/share-comment-strip.spec.ts
@@ -0,0 +1,176 @@
+import { ShareService } from './share.service';
+
+// Exercises the REAL ShareService comment-mark stripping for shared content via
+// the smallest reachable seam: updatePublicAttachments -> prepareContentForShare
+// -> removeMarkTypeFromDoc(doc, 'comment'). This is a documented threat-model
+// item: `comment` marks are internal-team metadata (existence, location, count,
+// resolved state, and the comment ids themselves) and MUST NOT leak to anonymous
+// public-share viewers.
+//
+// prepareContentForShare is private and the page-load path (getSharedPage) needs
+// a full DB-backed resolveReadableSharePage; updatePublicAttachments is the
+// smallest public seam that runs the exact same sanitization on a doc we control.
+// Only the workspace toggle (workspaceRepo.findById) and token service are
+// touched, both mocked — no DB setup required.
+
+const WS = 'ws-1';
+const PAGE = 'page-1';
+
+function buildService() {
+  const shareRepo = { findById: jest.fn() };
+  const pageRepo = { findById: jest.fn() };
+  const pagePermissionRepo = {
+    hasRestrictedAncestor: jest.fn(async () => false),
+  };
+  const tokenService = {
+    generateAttachmentToken: jest.fn(async () => 'tok'),
+  };
+  // htmlEmbed toggle ON so the embed strip is a no-op and we isolate the
+  // comment-mark strip behaviour.
+  const workspaceRepo = {
+    findById: jest.fn(async () => ({ id: WS, settings: { htmlEmbed: true } })),
+  };
+
+  return new ShareService(
+    shareRepo as any,
+    pageRepo as any,
+    pagePermissionRepo as any,
+    {} as any, // db (unused on this path)
+    tokenService as any,
+    {} as any, // transclusionService (unused)
+    workspaceRepo as any,
+  );
+}
+
+// A paragraph whose text carries a `comment` mark with a comment id.
+function commentedText(text: string, commentId: string) {
+  return {
+    type: 'text',
+    text,
+    marks: [{ type: 'comment', attrs: { commentId, resolved: false } }],
+  };
+}
+
+async function sanitize(content: any) {
+  const service = buildService();
+  return service.updatePublicAttachments({
+    id: PAGE,
+    workspaceId: WS,
+    content,
+  } as any);
+}
+
+function countCommentMarks(doc: any): number {
+  let count = 0;
+  const walk = (node: any) => {
+    if (!node || typeof node !== 'object') return;
+    if (Array.isArray(node.marks)) {
+      for (const mark of node.marks) {
+        if (mark?.type === 'comment') count++;
+      }
+    }
+    if (Array.isArray(node.content)) node.content.forEach(walk);
+  };
+  walk(doc);
+  return count;
+}
+
+describe('ShareService comment-mark stripping for public shares (real code)', () => {
+  it('strips a top-level comment mark and preserves the visible text', async () => {
+    const content = {
+      type: 'doc',
+      content: [
+        {
+          type: 'paragraph',
+          content: [commentedText('secret-reviewed body', 'cmt-top-1')],
+        },
+      ],
+    };
+
+    const out = await sanitize(content);
+
+    expect(countCommentMarks(out)).toBe(0);
+    // The text itself survives; only the internal mark is removed.
+    expect(JSON.stringify(out)).toContain('secret-reviewed body');
+    // The comment id must not appear anywhere in the serialized output.
+    expect(JSON.stringify(out)).not.toContain('cmt-top-1');
+  });
+
+  it('strips comment marks nested inside columns and callouts', async () => {
+    const content = {
+      type: 'doc',
+      content: [
+        {
+          type: 'columns',
+          content: [
+            {
+              type: 'column',
+              content: [
+                {
+                  type: 'paragraph',
+                  content: [commentedText('col body', 'cmt-col-1')],
+                },
+              ],
+            },
+            {
+              type: 'column',
+              content: [
+                {
+                  type: 'callout',
+                  content: [
+                    {
+                      type: 'paragraph',
+                      content: [commentedText('callout body', 'cmt-callout-1')],
+                    },
+                  ],
+                },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+
+    const out = await sanitize(content);
+
+    expect(countCommentMarks(out)).toBe(0);
+    const serialized = JSON.stringify(out);
+    // Visible content of both nested branches survives.
+    expect(serialized).toContain('col body');
+    expect(serialized).toContain('callout body');
+    // No nested comment id leaks.
+    expect(serialized).not.toContain('cmt-col-1');
+    expect(serialized).not.toContain('cmt-callout-1');
+  });
+
+  it('strips every comment mark when multiple coexist (count goes to zero)', async () => {
+    const content = {
+      type: 'doc',
+      content: [
+        {
+          type: 'paragraph',
+          content: [
+            commentedText('a', 'cmt-a'),
+            { type: 'text', text: ' plain ' },
+            commentedText('b', 'cmt-b'),
+          ],
+        },
+        {
+          type: 'paragraph',
+          content: [commentedText('c', 'cmt-c')],
+        },
+      ],
+    };
+
+    // Sanity: the input genuinely has 3 comment marks before sanitization.
+    expect(countCommentMarks(content)).toBe(3);
+
+    const out = await sanitize(content);
+
+    expect(countCommentMarks(out)).toBe(0);
+    const serialized = JSON.stringify(out);
+    for (const id of ['cmt-a', 'cmt-b', 'cmt-c']) {
+      expect(serialized).not.toContain(id);
+    }
+  });
+});
diff --git a/apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts b/apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts
new file mode 100644
index 00000000..33836811
--- /dev/null
+++ b/apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts
@@ -0,0 +1,41 @@
+import { ShareSeoController } from './share-seo.controller';
+
+// Pins ShareSeoController.extractPageSlugId — the slug→pageId resolver used to
+// look up a shared page from the public URL. A full UUID must pass through
+// untouched; a "title-slug-<id>" must yield the trailing token; a single token
+// is returned as-is; falsy input yields undefined. The method does not touch
+// `this`, so the controller can be constructed with null collaborators.
+
+function buildController(): ShareSeoController {
+  return new ShareSeoController(null as any, null as any, null as any);
+}
+
+describe('ShareSeoController.extractPageSlugId', () => {
+  const controller = buildController();
+
+  it('returns a full UUID unchanged', () => {
+    const uuid = '550e8400-e29b-41d4-a716-446655440000';
+    expect(controller.extractPageSlugId(uuid)).toBe(uuid);
+  });
+
+  it('returns the trailing token of a title-slug-id form', () => {
+    expect(controller.extractPageSlugId('my-page-title-abc123')).toBe('abc123');
+  });
+
+  it('returns a single token (no hyphen) as-is', () => {
+    expect(controller.extractPageSlugId('abc123')).toBe('abc123');
+  });
+
+  it('returns the last segment for a two-token slug', () => {
+    expect(controller.extractPageSlugId('hello-world')).toBe('world');
+  });
+
+  it('returns undefined for an empty string (falsy guard)', () => {
+    expect(controller.extractPageSlugId('')).toBeUndefined();
+  });
+
+  it('returns undefined for null/undefined input', () => {
+    expect(controller.extractPageSlugId(undefined as any)).toBeUndefined();
+    expect(controller.extractPageSlugId(null as any)).toBeUndefined();
+  });
+});
diff --git a/apps/server/src/core/share/share-seo.controller.ts b/apps/server/src/core/share/share-seo.controller.ts
index db20ef0a..ad50e904 100644
--- a/apps/server/src/core/share/share-seo.controller.ts
+++ b/apps/server/src/core/share/share-seo.controller.ts
@@ -7,8 +7,8 @@ import { validate as isValidUUID } from 'uuid';
 import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
 import { EnvironmentService } from '../../integrations/environment/environment.service';
 import { Workspace } from '@docmost/db/types/entity.types';
-import { htmlEscape } from '../../common/helpers/html-escaper';
 import { injectTrackerHead } from './inject-tracker-head.util';
+import { buildShareMetaHtml } from './share-seo.util';
 
 @Controller('share')
 export class ShareSeoController {
@@ -72,24 +72,11 @@ export class ShareSeoController {
         return this.sendIndex(indexFilePath, res);
       }
 
-      const rawTitle = htmlEscape(share?.sharedPage.title ?? 'untitled');
-      const metaTitle =
-        rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}…` : rawTitle;
-
-      const metaTagVar = '<!--meta-tags-->';
-
-      const metaTags = [
-        `<meta property="og:title" content="${metaTitle}" />`,
-        `<meta property="twitter:title" content="${metaTitle}" />`,
-        !share.searchIndexing ? `<meta name="robots" content="noindex" />` : '',
-      ]
-        .filter(Boolean)
-        .join('\n    ');
-
       const html = fs.readFileSync(indexFilePath, 'utf8');
-      let transformedHtml = html
-        .replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
-        .replace(metaTagVar, metaTags);
+      let transformedHtml = buildShareMetaHtml(html, {
+        title: share?.sharedPage.title,
+        searchIndexing: share.searchIndexing,
+      });
 
       // Deliberate same-origin tracker surface: this is the ONE place where an
       // admin-authored analytics/tracker snippet (settings.trackerHead) is
diff --git a/apps/server/src/core/share/share-seo.meta.spec.ts b/apps/server/src/core/share/share-seo.meta.spec.ts
new file mode 100644
index 00000000..25d51b28
--- /dev/null
+++ b/apps/server/src/core/share/share-seo.meta.spec.ts
@@ -0,0 +1,126 @@
+import { buildShareMetaHtml } from './share-seo.util';
+
+// Pins the SEO meta-HTML builder for public share pages (extracted verbatim from
+// ShareSeoController.getShare). The shared page title is attacker-influenceable,
+// so the security-critical invariant is that it is htmlEscape'd before being
+// interpolated into BOTH the <title> element and the content="..." attributes of
+// the og:/twitter: meta tags. The XSS tests below MUST fail if the htmlEscape
+// step is ever removed.
+
+// A minimal index.html shell carrying the two placeholders the builder rewrites:
+// the <title> element and the <!--meta-tags--> marker.
+const INDEX =
+  '<html><head><title>App</title>\n    <!--meta-tags--></head><body>x</body></html>';
+
+describe('buildShareMetaHtml', () => {
+  describe('XSS: title escaping', () => {
+    it('fully htmlEscapes a </title><script> breakout in BOTH <title> and og:/twitter: meta', () => {
+      const out = buildShareMetaHtml(INDEX, {
+        title: '</title><script>alert(1)</script>',
+        searchIndexing: true,
+      });
+
+      // The raw script tag must NEVER appear anywhere in the output — it would
+      // execute in the share origin. This assertion fails if htmlEscape is removed.
+      expect(out).not.toContain('<script>');
+      expect(out).not.toContain('</title><script>');
+      // The dangerous chars are escaped to entities instead.
+      expect(out).toContain('&lt;script&gt;alert(1)&lt;/script&gt;');
+      // og:title and twitter:title both carry the escaped (not raw) value.
+      expect(out).toContain(
+        '<meta property="og:title" content="&lt;/title&gt;&lt;script&gt;alert(1)&lt;/script&gt;" />',
+      );
+      expect(out).toContain(
+        '<meta property="twitter:title" content="&lt;/title&gt;&lt;script&gt;alert(1)&lt;/script&gt;" />',
+      );
+    });
+
+    it('escapes a double quote to &quot; so the content="..." attribute cannot be broken', () => {
+      const out = buildShareMetaHtml(INDEX, {
+        title: 'a"onmouseover="alert(1)',
+        searchIndexing: true,
+      });
+
+      // A raw `"` would close the content attribute and inject a new attribute.
+      expect(out).not.toContain('content="a"onmouseover=');
+      expect(out).toContain('&quot;');
+      expect(out).toContain(
+        '<meta property="og:title" content="a&quot;onmouseover=&quot;alert(1)" />',
+      );
+    });
+  });
+
+  describe('title truncation (limit 80, applied AFTER escaping)', () => {
+    it('leaves a title of exactly 80 chars untouched (no ellipsis)', () => {
+      const title = 'a'.repeat(80);
+      const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
+      expect(out).toContain(`<title>${title}</title>`);
+      expect(out).not.toContain('…');
+    });
+
+    it('truncates a >80 char title to 77 chars + an ellipsis (78 total)', () => {
+      const title = 'b'.repeat(100);
+      const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
+      const expected = `${'b'.repeat(77)}…`;
+      expect(out).toContain(`<title>${expected}</title>`);
+      // 77 visible chars + the single ellipsis glyph.
+      expect(expected.length).toBe(78);
+      expect(out).toContain(
+        `<meta property="og:title" content="${expected}" />`,
+      );
+    });
+
+    it('truncation acts on the ESCAPED string: each < becomes &lt; first, then slice(0,77)', () => {
+      // 100 "<" chars escape to 100 * "&lt;" = 400 chars, then truncate to 77 + …
+      const title = '<'.repeat(100);
+      const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
+      const escaped = '&lt;'.repeat(100);
+      const expected = `${escaped.slice(0, 77)}…`;
+      expect(out).toContain(`<title>${expected}</title>`);
+      // No raw "<" from the title leaks through.
+      expect(out).not.toContain('<<');
+    });
+  });
+
+  describe('robots noindex meta', () => {
+    it('searchIndexing=false emits <meta name="robots" content="noindex">', () => {
+      const out = buildShareMetaHtml(INDEX, {
+        title: 'page',
+        searchIndexing: false,
+      });
+      expect(out).toContain('<meta name="robots" content="noindex" />');
+    });
+
+    it('searchIndexing=true emits NO robots tag', () => {
+      const out = buildShareMetaHtml(INDEX, {
+        title: 'page',
+        searchIndexing: true,
+      });
+      expect(out).not.toContain('robots');
+      expect(out).not.toContain('noindex');
+    });
+  });
+
+  describe('null / missing title fallback', () => {
+    it('falls back to "untitled" when title is null', () => {
+      const out = buildShareMetaHtml(INDEX, {
+        title: null as unknown as string,
+        searchIndexing: true,
+      });
+      expect(out).toContain('<title>untitled</title>');
+      expect(out).toContain('<meta property="og:title" content="untitled" />');
+    });
+  });
+
+  describe('placeholder replacement', () => {
+    it('replaces the original <title> and the <!--meta-tags--> marker', () => {
+      const out = buildShareMetaHtml(INDEX, {
+        title: 'Hello',
+        searchIndexing: true,
+      });
+      expect(out).not.toContain('<!--meta-tags-->');
+      expect(out).not.toContain('<title>App</title>');
+      expect(out).toContain('<title>Hello</title>');
+    });
+  });
+});
diff --git a/apps/server/src/core/share/share-seo.util.ts b/apps/server/src/core/share/share-seo.util.ts
new file mode 100644
index 00000000..e240dd3d
--- /dev/null
+++ b/apps/server/src/core/share/share-seo.util.ts
@@ -0,0 +1,40 @@
+import { htmlEscape } from '../../common/helpers/html-escaper';
+
+/**
+ * Build the SEO-enriched index HTML for a publicly shared page.
+ *
+ * This is the pure, side-effect-free core of ShareSeoController.getShare: given
+ * the raw index.html and the share's title + searchIndexing flag, it returns the
+ * transformed HTML with the <title> replaced and the og:/twitter:/robots meta
+ * tags injected at the <!--meta-tags--> marker.
+ *
+ * SECURITY: the title is attacker-influenceable (it is the shared page title),
+ * so it MUST be htmlEscape'd before being interpolated into both the <title>
+ * element and the content="..." attributes of the meta tags. Removing the
+ * escaping would allow a page title to break out of the attribute / element and
+ * inject markup into the share origin.
+ */
+export function buildShareMetaHtml(
+  indexHtml: string,
+  opts: { title: string | null; searchIndexing: boolean },
+): string {
+  // Escape FIRST, then truncate, so the truncation acts on the safe string and
+  // can never split a multi-char HTML entity (matches the original controller).
+  const rawTitle = htmlEscape(opts.title ?? 'untitled');
+  const metaTitle =
+    rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}…` : rawTitle;
+
+  const metaTagVar = '<!--meta-tags-->';
+
+  const metaTags = [
+    `<meta property="og:title" content="${metaTitle}" />`,
+    `<meta property="twitter:title" content="${metaTitle}" />`,
+    !opts.searchIndexing ? `<meta name="robots" content="noindex" />` : '',
+  ]
+    .filter(Boolean)
+    .join('\n    ');
+
+  return indexHtml
+    .replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
+    .replace(metaTagVar, metaTags);
+}
diff --git a/apps/server/src/core/share/share.util.spec.ts b/apps/server/src/core/share/share.util.spec.ts
new file mode 100644
index 00000000..8de1e727
--- /dev/null
+++ b/apps/server/src/core/share/share.util.spec.ts
@@ -0,0 +1,62 @@
+import { updateAttachmentAttr } from './share.util';
+
+// Pins updateAttachmentAttr — the per-attachment URL rewriter used when serving
+// shared page content. Internal attachment paths (/files… and /api/files…) must
+// be rewritten to the public form with a scoped jwt appended; anything else
+// (external URLs, null) must be left untouched so a public viewer's signed token
+// is never attached to a foreign origin. The function only reads/writes
+// node.attrs[attr], so a plain object stands in for the real ProseMirror Node.
+
+function fakeNode(attrs: Record<string, any>) {
+  return { attrs } as any;
+}
+
+const JWT = 'TOK';
+
+describe('updateAttachmentAttr', () => {
+  it('rewrites a /files path to /files/public/ with ?jwt=', () => {
+    const node = fakeNode({ src: '/files/x.png' });
+    updateAttachmentAttr(node, 'src', JWT);
+    expect(node.attrs.src).toBe(`/files/public/x.png?jwt=${JWT}`);
+  });
+
+  it('rewrites an /api/files path (keeps the /api prefix, inserts public)', () => {
+    const node = fakeNode({ src: '/api/files/y.png' });
+    updateAttachmentAttr(node, 'src', JWT);
+    expect(node.attrs.src).toBe(`/api/files/public/y.png?jwt=${JWT}`);
+  });
+
+  it('uses &jwt= when the src already carries a query string', () => {
+    const node = fakeNode({ src: '/files/x.png?w=100' });
+    updateAttachmentAttr(node, 'src', JWT);
+    expect(node.attrs.src).toBe(`/files/public/x.png?w=100&jwt=${JWT}`);
+  });
+
+  it('leaves an external https URL untouched (no token leak to a foreign origin)', () => {
+    const external = 'https://example.com/x.png';
+    const node = fakeNode({ src: external });
+    updateAttachmentAttr(node, 'src', JWT);
+    expect(node.attrs.src).toBe(external);
+  });
+
+  it('leaves a null src untouched', () => {
+    const node = fakeNode({ src: null });
+    updateAttachmentAttr(node, 'src', JWT);
+    expect(node.attrs.src).toBeNull();
+  });
+
+  it('rewrites the `url` attr variant the same way', () => {
+    const node = fakeNode({ url: '/files/doc.pdf' });
+    updateAttachmentAttr(node, 'url', JWT);
+    expect(node.attrs.url).toBe(`/files/public/doc.pdf?jwt=${JWT}`);
+  });
+
+  it('only touches the requested attr, leaving the other attr alone', () => {
+    const external = 'https://cdn.example.com/a.png';
+    const node = fakeNode({ src: '/files/a.png', url: external });
+    updateAttachmentAttr(node, 'src', JWT);
+    expect(node.attrs.src).toBe(`/files/public/a.png?jwt=${JWT}`);
+    // `url` was not requested, so it is unchanged.
+    expect(node.attrs.url).toBe(external);
+  });
+});