From 3d4ad664b39bf235893e54370737c94684a69a08 Mon Sep 17 00:00:00 2001 From: claude_code Date: Sun, 21 Jun 2026 19:10:27 +0300 Subject: [PATCH] test(refactor-tail): extract pure cores + cover collab/share/ai-chat/client gate Batches 6-9: behaviour-preserving extractions of testable pure cores plus the tests they unblock, and a fix for the broken client test environment. Full suites green: server 113 suites / 1117 + 1 todo, client 30 files / 338. client (R0 infra): - vitest.setup.ts: in-memory localStorage/sessionStorage Storage stub wired via setupFiles. Unblocks menu-items.gating.test.ts (was 9 failing) -> client suite fully green. + menu-items.suggestions.test.ts (getSuggestionItems filter/sort). share: - extract buildShareMetaHtml (share-seo.util.ts) from the SEO controller; tests for reflected-XSS escaping in /og/twitter meta, noindex, truncation; extractPageSlugId; updateAttachmentAttr; prepareContentForShare comment-strip (anonymous-viewer metadata-leak guard). ai-chat (security extractions): - selectAccessibleHits: CASL post-filter for semantic search (restricted page in an accessible space must NOT leak to the agent). - validateResolvedAddresses: SSRF connect-time guard (block if ANY resolved address is private). - resolveAudioFormat: mime whitelist (dead `?? 'webm'` fallback dropped, set unchanged). + mcp-servers toView header-leak guard, MCP tool namespacing. collaboration (data-loss area): - extract computeHistoryJob (pins the "agent delay MUST stay 0" invariant) and resolveSource. Integration: onAuthenticate read-only matrix (collab auth bypass), HistoryProcessor (contributor restore on save failure), onStoreDocument Approach-A boundary snapshot (human revision pinned before agent overwrite). Reviewed (APPROVE WITH SUGGESTIONS): extractions behaviour-preserving, security tests mutation-resistant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --- .../slash-menu/menu-items.suggestions.test.ts | 84 +++++++ apps/client/vitest.config.ts | 2 +- apps/client/vitest.setup.ts | 51 +++++ .../authentication.extension.spec.ts | 211 ++++++++++++++++++ .../extensions/compute-history-job.spec.ts | 105 +++++++++ .../extensions/persistence-store.spec.ts | 185 +++++++++++++++ .../extensions/persistence.extension.ts | 78 +++++-- .../processors/history.processor.spec.ts | 200 +++++++++++++++++ .../src/core/ai-chat/ai-chat.controller.ts | 73 +++--- .../external-mcp/mcp-clients.service.ts | 45 ++-- .../external-mcp/mcp-namespacing.spec.ts | 136 +++++++++++ .../external-mcp/mcp-servers-to-view.spec.ts | 85 +++++++ .../validate-resolved-addresses.spec.ts | 67 ++++++ .../core/ai-chat/resolve-audio-format.spec.ts | 53 +++++ .../ai-chat/tools/ai-chat-tools.service.ts | 55 +++-- .../tools/select-accessible-hits.spec.ts | 96 ++++++++ .../core/share/share-comment-strip.spec.ts | 176 +++++++++++++++ .../share-seo.controller.extract-slug.spec.ts | 41 ++++ .../src/core/share/share-seo.controller.ts | 23 +- .../src/core/share/share-seo.meta.spec.ts | 126 +++++++++++ apps/server/src/core/share/share-seo.util.ts | 40 ++++ apps/server/src/core/share/share.util.spec.ts | 62 +++++ 22 files changed, 1893 insertions(+), 101 deletions(-) create mode 100644 apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts create mode 100644 apps/client/vitest.setup.ts create mode 100644 apps/server/src/collaboration/extensions/authentication.extension.spec.ts create mode 100644 apps/server/src/collaboration/extensions/compute-history-job.spec.ts create mode 100644 apps/server/src/collaboration/extensions/persistence-store.spec.ts create mode 100644 apps/server/src/collaboration/processors/history.processor.spec.ts create mode 100644 apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts create mode 100644 apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts create mode 100644 apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts create mode 100644 apps/server/src/core/ai-chat/resolve-audio-format.spec.ts create mode 100644 apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts create mode 100644 apps/server/src/core/share/share-comment-strip.spec.ts create mode 100644 apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts create mode 100644 apps/server/src/core/share/share-seo.meta.spec.ts create mode 100644 apps/server/src/core/share/share-seo.util.ts create mode 100644 apps/server/src/core/share/share.util.spec.ts diff --git a/apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts b/apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts new file mode 100644 index 00000000..b20c4270 --- /dev/null +++ b/apps/client/src/features/editor/components/slash-menu/menu-items.suggestions.test.ts @@ -0,0 +1,84 @@ +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { getSuggestionItems } from "./menu-items"; + +// Coverage for the filter/sort half of `getSuggestionItems` (distinct from the +// HTML-embed gating suite). A slash query is matched against each item three +// ways — fuzzy on the title, substring on the description, and substring on the +// searchTerms — and matched items are sorted so title-substring hits float to +// the top of their group. We also cover `excludeItems`. +// +// `getSuggestionItems` -> `isHtmlEmbedFeatureEnabled` reads the persisted +// `currentUser` localStorage entry, so a working in-memory Storage stub is a +// prerequisite (installed by vitest.setup.ts). We persist a `currentUser` with +// the HTML-embed toggle OFF (the production default) so the gated "HTML embed" +// item never leaks into these non-HTML queries. + +const KEY = "currentUser"; + +function flatTitles(groups: ReturnType<typeof getSuggestionItems>): string[] { + return Object.values(groups) + .flat() + .map((item) => item.title); +} + +beforeEach(() => { + // Default workspace state: HTML-embed feature OFF (matches production default). + localStorage.setItem(KEY, JSON.stringify({ workspace: { settings: {} } })); +}); + +afterEach(() => { + localStorage.clear(); +}); + +describe("getSuggestionItems — filter and sort", () => { + it("fuzzy-matches a title (non-contiguous characters)", () => { + // "tdo" is not a substring of "to-do list" but matches fuzzily (t..d..o). + const titles = flatTitles(getSuggestionItems({ query: "tdo" })); + expect(titles).toContain("To-do list"); + }); + + it("matches via the description when the title does not match", () => { + // "numbering" only appears in the description "Create a list with numbering.", + // not in the "Numbered list" title nor its searchTerms. + const titles = flatTitles(getSuggestionItems({ query: "numbering" })); + expect(titles).toContain("Numbered list"); + }); + + it("matches via searchTerms when title and description do not match", () => { + // "blockquote" is only present in the "Quote" item's searchTerms. + const titles = flatTitles(getSuggestionItems({ query: "blockquote" })); + expect(titles).toContain("Quote"); + }); + + it("sorts title-substring matches before non-title (description) matches", () => { + // For "page": several titles contain "page" (e.g. "Page break"), while + // "Synced block" matches only through its description (".. across pages."). + // The sort tie-break must place every title hit ahead of the non-title hit. + const titles = flatTitles(getSuggestionItems({ query: "page" })); + + const syncedIndex = titles.indexOf("Synced block"); + const pageBreakIndex = titles.indexOf("Page break"); + + // Sanity: both items survived the filter for this query. + expect(syncedIndex).toBeGreaterThanOrEqual(0); + expect(pageBreakIndex).toBeGreaterThanOrEqual(0); + + // The title match ("Page break") sorts before the description-only match. + expect(pageBreakIndex).toBeLessThan(syncedIndex); + }); + + it("removes a named item via excludeItems", () => { + const withBullet = flatTitles(getSuggestionItems({ query: "list" })); + expect(withBullet).toContain("Bullet list"); + + const withoutBullet = flatTitles( + getSuggestionItems({ + query: "list", + excludeItems: new Set(["Bullet list"]), + }), + ); + expect(withoutBullet).not.toContain("Bullet list"); + // Other "list" matches remain unaffected by the exclusion. + expect(withoutBullet).toContain("Numbered list"); + }); +}); diff --git a/apps/client/vitest.config.ts b/apps/client/vitest.config.ts index 5cde717a..334f6226 100644 --- a/apps/client/vitest.config.ts +++ b/apps/client/vitest.config.ts @@ -12,6 +12,6 @@ export default defineConfig({ test: { environment: 'jsdom', globals: true, - setupFiles: [], + setupFiles: ['./vitest.setup.ts'], }, }); diff --git a/apps/client/vitest.setup.ts b/apps/client/vitest.setup.ts new file mode 100644 index 00000000..33d3d142 --- /dev/null +++ b/apps/client/vitest.setup.ts @@ -0,0 +1,51 @@ +// Vitest global setup (test-infra only — no production app source). +// +// Under Node 25 / jsdom 25 / vitest 4 the jsdom `localStorage` exposed on the +// global is not a usable Storage: its methods (`setItem`/`getItem`/...) are not +// callable, so any code touching `localStorage` throws `... is not a function`. +// Production code such as `isHtmlEmbedFeatureEnabled()` reads +// `localStorage.getItem("currentUser")`, which made dependent tests fail. +// +// We install a correct in-memory Storage stub on the global BEFORE tests run so +// the Web Storage contract holds: string coercion of keys/values, `null` for +// missing keys, working `length`/`key(index)`, and `clear()`. +import { vi } from "vitest"; + +// Minimal, spec-faithful in-memory implementation of the Web Storage API. +function createStorage(): Storage { + let store = new Map<string, string>(); + + const storage: Storage = { + get length(): number { + return store.size; + }, + clear(): void { + store = new Map<string, string>(); + }, + getItem(key: string): string | null { + // Missing keys must return `null`, not `undefined`. + const value = store.get(String(key)); + return value === undefined ? null : value; + }, + setItem(key: string, value: string): void { + // Web Storage coerces both key and value to strings. + store.set(String(key), String(value)); + }, + removeItem(key: string): void { + store.delete(String(key)); + }, + key(index: number): string | null { + // Insertion order matches Map iteration order; out-of-range => null. + const keys = Array.from(store.keys()); + return index >= 0 && index < keys.length ? keys[index] : null; + }, + }; + + return storage; +} + +// Install on the jsdom global. `vi.stubGlobal` also reflects onto `window` +// (jsdom shares `globalThis` and `window`), so both `localStorage` and +// `window.localStorage` resolve to the same working stub. +vi.stubGlobal("localStorage", createStorage()); +vi.stubGlobal("sessionStorage", createStorage()); diff --git a/apps/server/src/collaboration/extensions/authentication.extension.spec.ts b/apps/server/src/collaboration/extensions/authentication.extension.spec.ts new file mode 100644 index 00000000..19c727ec --- /dev/null +++ b/apps/server/src/collaboration/extensions/authentication.extension.spec.ts @@ -0,0 +1,211 @@ +import { + NotFoundException, + UnauthorizedException, +} from '@nestjs/common'; +import { AuthenticationExtension } from './authentication.extension'; +import { SpaceRole } from '../../common/helpers/types/permission'; +import { JwtType } from '../../core/auth/dto/jwt-payload'; + +/** + * Unit tests for the collab read-only downgrade matrix in + * `AuthenticationExtension.onAuthenticate`. This is a security boundary: a wrong + * branch here is either a collab-auth bypass (writer on a page they may only + * read) or a denial. We mock every repo and inspect both the thrown exception + * type and the `connectionConfig.readOnly` flag the extension mutates. + */ + +const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000'; +const USER_ID = 'user-1'; +const WORKSPACE_ID = 'ws-1'; +const SPACE_ID = 'space-1'; + +const buildUser = (overrides: Partial<any> = {}) => ({ + id: USER_ID, + workspaceId: WORKSPACE_ID, + deactivatedAt: null, + deletedAt: null, + name: 'Alice', + avatarUrl: null, + ...overrides, +}); + +const buildPage = (overrides: Partial<any> = {}) => ({ + id: PAGE_ID, + spaceId: SPACE_ID, + workspaceId: WORKSPACE_ID, + deletedAt: null, + ...overrides, +}); + +// Default jwt payload: a plain human collab token (no agent provenance claims). +const buildJwt = (overrides: Partial<any> = {}) => ({ + sub: USER_ID, + workspaceId: WORKSPACE_ID, + type: JwtType.COLLAB, + ...overrides, +}); + +describe('AuthenticationExtension.onAuthenticate', () => { + let ext: AuthenticationExtension; + let tokenService: { verifyJwt: jest.Mock }; + let userRepo: { findById: jest.Mock }; + let pageRepo: { findById: jest.Mock }; + let spaceMemberRepo: { getUserSpaceRoles: jest.Mock }; + let pagePermissionRepo: { canUserEditPage: jest.Mock }; + + // Build the hocuspocus onAuthenticate payload. connectionConfig.readOnly + // starts false; the extension flips it to true on a read-only downgrade. + const buildData = (token = 'tok') => ({ + documentName: `page.${PAGE_ID}`, + token, + connectionConfig: { readOnly: false }, + }); + + beforeEach(() => { + tokenService = { verifyJwt: jest.fn().mockResolvedValue(buildJwt()) }; + userRepo = { findById: jest.fn().mockResolvedValue(buildUser()) }; + pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) }; + spaceMemberRepo = { + getUserSpaceRoles: jest + .fn() + .mockResolvedValue([{ userId: USER_ID, role: SpaceRole.WRITER }]), + }; + pagePermissionRepo = { + // No page-level restriction by default → defer to space role. + canUserEditPage: jest.fn().mockResolvedValue({ + hasAnyRestriction: false, + canAccess: true, + canEdit: true, + }), + }; + + ext = new AuthenticationExtension( + tokenService as any, + userRepo as any, + pageRepo as any, + spaceMemberRepo as any, + pagePermissionRepo as any, + ); + // Silence the extension's logger (it warns/debugs on denial branches). + jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined); + jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined); + }); + + it('invalid token → UnauthorizedException (no repo lookups happen)', async () => { + tokenService.verifyJwt.mockRejectedValue(new Error('bad sig')); + + await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow( + UnauthorizedException, + ); + expect(userRepo.findById).not.toHaveBeenCalled(); + }); + + it('user not found → Unauthorized', async () => { + userRepo.findById.mockResolvedValue(null); + await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow( + UnauthorizedException, + ); + }); + + it('user disabled (deactivatedAt set) → Unauthorized', async () => { + userRepo.findById.mockResolvedValue( + buildUser({ deactivatedAt: new Date() }), + ); + await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow( + UnauthorizedException, + ); + }); + + it('page not found → NotFoundException', async () => { + pageRepo.findById.mockResolvedValue(null); + await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow( + NotFoundException, + ); + }); + + it('no space role → Unauthorized', async () => { + spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([]); + await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow( + UnauthorizedException, + ); + }); + + it('page-level restriction canAccess=false → Unauthorized (restricted-page denial)', async () => { + pagePermissionRepo.canUserEditPage.mockResolvedValue({ + hasAnyRestriction: true, + canAccess: false, + canEdit: false, + }); + await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow( + UnauthorizedException, + ); + }); + + it('restriction canAccess=true & canEdit=false → readOnly (no restricted-page write)', async () => { + pagePermissionRepo.canUserEditPage.mockResolvedValue({ + hasAnyRestriction: true, + canAccess: true, + canEdit: false, + }); + const data = buildData(); + const ctx = await ext.onAuthenticate(data as any); + + expect(data.connectionConfig.readOnly).toBe(true); + expect(ctx.actor).toBe('user'); + }); + + it('restriction canAccess=true & canEdit=true → writable (readOnly stays false)', async () => { + pagePermissionRepo.canUserEditPage.mockResolvedValue({ + hasAnyRestriction: true, + canAccess: true, + canEdit: true, + }); + const data = buildData(); + await ext.onAuthenticate(data as any); + + expect(data.connectionConfig.readOnly).toBe(false); + }); + + it('no restriction + space READER → readOnly', async () => { + spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([ + { userId: USER_ID, role: SpaceRole.READER }, + ]); + const data = buildData(); + await ext.onAuthenticate(data as any); + + expect(data.connectionConfig.readOnly).toBe(true); + }); + + it('no restriction + space WRITER → writable', async () => { + const data = buildData(); + await ext.onAuthenticate(data as any); + expect(data.connectionConfig.readOnly).toBe(false); + }); + + it('soft-deleted page (deletedAt set) → readOnly even for a WRITER', async () => { + // A writer must NOT be able to mutate a page in the trash via collab. + pageRepo.findById.mockResolvedValue(buildPage({ deletedAt: new Date() })); + const data = buildData(); + await ext.onAuthenticate(data as any); + + expect(data.connectionConfig.readOnly).toBe(true); + }); + + it('agent JWT (actor=agent + aiChatId) propagates into the connection context', async () => { + tokenService.verifyJwt.mockResolvedValue( + buildJwt({ actor: 'agent', aiChatId: 'chat-7' }), + ); + const ctx = await ext.onAuthenticate(buildData() as any); + + expect(ctx.actor).toBe('agent'); + expect(ctx.aiChatId).toBe('chat-7'); + expect(ctx.user.id).toBe(USER_ID); + }); + + it('human JWT (no provenance claims) → actor=user, aiChatId=null', async () => { + const ctx = await ext.onAuthenticate(buildData() as any); + + expect(ctx.actor).toBe('user'); + expect(ctx.aiChatId).toBeNull(); + }); +}); diff --git a/apps/server/src/collaboration/extensions/compute-history-job.spec.ts b/apps/server/src/collaboration/extensions/compute-history-job.spec.ts new file mode 100644 index 00000000..aa21f14f --- /dev/null +++ b/apps/server/src/collaboration/extensions/compute-history-job.spec.ts @@ -0,0 +1,105 @@ +import { + computeHistoryJob, + resolveSource, +} from './persistence.extension'; +import { + HISTORY_FAST_INTERVAL, + HISTORY_FAST_THRESHOLD, + HISTORY_INTERVAL, +} from '../constants'; + +// A fixed clock + fixed createdAt make pageAge deterministic. +const NOW = 1_700_000_000_000; +const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000'; + +// Build a minimal page whose age (NOW - createdAt) is exactly `ageMs`. +const pageAged = (ageMs: number) => ({ + id: PAGE_ID, + createdAt: new Date(NOW - ageMs), +}); + +describe('computeHistoryJob', () => { + it('agent edit → delay MUST be 0 and job id is source-keyed', () => { + // INVARIANT (§15 H2 / persistence.extension): the agent delay MUST stay 0. + // The worker re-reads the page row at run time, so any non-zero delay risks + // snapshotting content a later human edit has already overwritten. This is + // the load-bearing assertion of this spec — do not relax it. + const { jobId, delay } = computeHistoryJob(pageAged(0), 'agent', NOW); + expect(delay).toBe(0); + expect(jobId).toBe(`${PAGE_ID}-agent`); + }); + + it('agent edit on an OLD page is still delay 0 (age never applies to agents)', () => { + // Even when the page is far older than the fast threshold, the agent path + // must short-circuit to 0 — age-based debounce is a human-only concern. + const { jobId, delay } = computeHistoryJob( + pageAged(HISTORY_FAST_THRESHOLD + 60_000), + 'agent', + NOW, + ); + expect(delay).toBe(0); + expect(jobId).toBe(`${PAGE_ID}-agent`); + }); + + it('human edit on a YOUNG page (age < threshold) → fast interval, bare job id', () => { + const { jobId, delay } = computeHistoryJob( + pageAged(HISTORY_FAST_THRESHOLD - 1), + 'user', + NOW, + ); + expect(delay).toBe(HISTORY_FAST_INTERVAL); + expect(jobId).toBe(PAGE_ID); + }); + + it('human edit on an OLD page (age > threshold) → standard interval', () => { + const { jobId, delay } = computeHistoryJob( + pageAged(HISTORY_FAST_THRESHOLD + 1), + 'user', + NOW, + ); + expect(delay).toBe(HISTORY_INTERVAL); + expect(jobId).toBe(PAGE_ID); + }); + + it('boundary: pageAge EXACTLY === threshold takes the slow branch (the `<` is strict)', () => { + // Off-by-one guard: the condition is `pageAge < HISTORY_FAST_THRESHOLD`, so + // an age of exactly the threshold is NOT "fast" — it must use HISTORY_INTERVAL. + const { delay } = computeHistoryJob( + pageAged(HISTORY_FAST_THRESHOLD), + 'user', + NOW, + ); + expect(delay).toBe(HISTORY_INTERVAL); + }); + + it('treats any non-"agent" source string as human', () => { + // resolveSource only ever yields 'agent' | 'user', but guard the contract: + // the agent branch keys strictly on === 'agent'. + const { jobId, delay } = computeHistoryJob(pageAged(0), 'user', NOW); + expect(delay).toBe(HISTORY_FAST_INTERVAL); + expect(jobId).toBe(PAGE_ID); + }); +}); + +describe('resolveSource (truth table)', () => { + // (sticky, actor) → expected. Marker is OR of the sticky flag and actor==='agent'. + it('sticky=false, actor=user → user', () => { + expect(resolveSource(false, 'user')).toBe('user'); + }); + + it('sticky=true, actor=user → agent (sticky wins)', () => { + expect(resolveSource(true, 'user')).toBe('agent'); + }); + + it('sticky=false, actor=agent → agent (current writer is the agent)', () => { + expect(resolveSource(false, 'agent')).toBe('agent'); + }); + + it('sticky=true, actor=agent → agent', () => { + expect(resolveSource(true, 'agent')).toBe('agent'); + }); + + it('sticky=false, actor=undefined → user (human collab path omits the claim)', () => { + expect(resolveSource(false, undefined)).toBe('user'); + }); +}); diff --git a/apps/server/src/collaboration/extensions/persistence-store.spec.ts b/apps/server/src/collaboration/extensions/persistence-store.spec.ts new file mode 100644 index 00000000..4262d77f --- /dev/null +++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts @@ -0,0 +1,185 @@ +import { TiptapTransformer } from '@hocuspocus/transformer'; +import { PersistenceExtension } from './persistence.extension'; +import { tiptapExtensions } from '../collaboration.util'; + +/** + * Integration test for `onStoreDocument`'s Approach-A boundary snapshot. + * + * The data-loss risk: when an AGENT store lands over a page whose persisted + * state was authored by a HUMAN, the agent overwrites that human content. If we + * do not pin the human revision as its own history version BEFORE the agent's + * updatePage, the last human edit is lost. This test pins the ordering + * (saveHistory(oldHumanPage) strictly before updatePage) and the idempotency + * skip when content is unchanged. + * + * We pass a REAL Y.Doc as the `document` arg (so TiptapTransformer.fromYdoc + * yields real content) and stub repos/queues + an executeTx-compatible db whose + * transaction().execute() invokes the callback with a trx stub. + */ + +const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000'; +const USER_ID = 'human-1'; + +// Build a real Y.Doc carrying the given tiptap JSON in the 'default' fragment. +// hocuspocus augments the live document with broadcastStateless(); the bare +// Y.Doc lacks it, so stub it for the post-store broadcast. +const ydocFor = (json: any) => { + const ydoc = TiptapTransformer.toYdoc(json, 'default', tiptapExtensions); + (ydoc as any).broadcastStateless = jest.fn(); + return ydoc; +}; + +const doc = (text: string) => ({ + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text }] }], +}); + +describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot', () => { + let ext: PersistenceExtension; + let pageRepo: { findById: jest.Mock; updatePage: jest.Mock }; + let pageHistoryRepo: { + saveHistory: jest.Mock; + findPageLastHistory: jest.Mock; + }; + let aiQueue: { add: jest.Mock }; + let historyQueue: { add: jest.Mock }; + let notificationQueue: { add: jest.Mock }; + let collabHistory: { addContributors: jest.Mock }; + let transclusionService: { + syncPageTransclusions: jest.Mock; + syncPageReferences: jest.Mock; + syncPageTemplateReferences: jest.Mock; + }; + let callOrder: string[]; + + // db whose transaction().execute(fn) runs fn with a trx stub — this lets the + // real executeTx() helper drive the callback without a database. + const trxStub = { __trx: true }; + const db = { + transaction: () => ({ + execute: (fn: (trx: any) => Promise<any>) => fn(trxStub), + }), + }; + + // The persisted page row the transaction reads (OLD, human-authored state). + const persistedHumanPage = (newAgentText: string) => ({ + id: PAGE_ID, + slugId: 'slug-1', + spaceId: 'space-1', + workspaceId: 'ws-1', + creatorId: 'creator-1', + contributorIds: ['creator-1'], + createdAt: new Date('2020-01-01T00:00:00Z'), + lastUpdatedSource: 'user', // prior revision was human + // content differs from the new agent doc so the update branch runs. + content: doc('OLD HUMAN'), + _newAgentText: newAgentText, + }); + + const buildData = (document: any, actor: 'user' | 'agent') => ({ + documentName: `page.${PAGE_ID}`, + document, + context: { user: { id: USER_ID, name: 'Alice' }, actor }, + }); + + beforeEach(() => { + callOrder = []; + pageRepo = { + findById: jest.fn(), + updatePage: jest.fn().mockImplementation(async () => { + callOrder.push('updatePage'); + }), + }; + pageHistoryRepo = { + saveHistory: jest.fn().mockImplementation(async () => { + callOrder.push('saveHistory'); + }), + findPageLastHistory: jest.fn().mockResolvedValue(null), + }; + aiQueue = { add: jest.fn().mockResolvedValue(undefined) }; + historyQueue = { add: jest.fn().mockResolvedValue(undefined) }; + notificationQueue = { add: jest.fn().mockResolvedValue(undefined) }; + collabHistory = { addContributors: jest.fn().mockResolvedValue(undefined) }; + transclusionService = { + syncPageTransclusions: jest.fn().mockResolvedValue(undefined), + syncPageReferences: jest.fn().mockResolvedValue(undefined), + syncPageTemplateReferences: jest.fn().mockResolvedValue(undefined), + }; + + ext = new PersistenceExtension( + pageRepo as any, + pageHistoryRepo as any, + db as any, + aiQueue as any, + historyQueue as any, + notificationQueue as any, + collabHistory as any, + transclusionService as any, + ); + jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined); + jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined); + jest.spyOn(ext['logger'], 'error').mockImplementation(() => undefined); + }); + + it('agent store over a human page pins saveHistory(oldHumanPage) BEFORE updatePage', async () => { + const document = ydocFor(doc('NEW AGENT CONTENT')); + pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT')); + // No human baseline snapshot exists yet → boundary snapshot must run. + pageHistoryRepo.findPageLastHistory.mockResolvedValue(null); + + await ext.onStoreDocument(buildData(document, 'agent') as any); + + // Boundary snapshot fired, and strictly before the agent overwrite. + expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1); + const saved = pageHistoryRepo.saveHistory.mock.calls[0][0]; + expect(saved.content).toEqual(doc('OLD HUMAN')); // the OLD human revision + expect(callOrder).toEqual(['saveHistory', 'updatePage']); + + // The agent's new content is tagged 'agent' on the update. + const update = pageRepo.updatePage.mock.calls[0][0]; + expect(update.lastUpdatedSource).toBe('agent'); + }); + + it('skips the boundary snapshot when the human baseline is already pinned', async () => { + const document = ydocFor(doc('NEW AGENT CONTENT')); + pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT')); + // Latest history already equals the current human state → no duplicate. + pageHistoryRepo.findPageLastHistory.mockResolvedValue({ + content: doc('OLD HUMAN'), + }); + + await ext.onStoreDocument(buildData(document, 'agent') as any); + + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); + expect(pageRepo.updatePage).toHaveBeenCalledTimes(1); + }); + + it('human store does NOT trigger the boundary snapshot (no source transition)', async () => { + const document = ydocFor(doc('NEW HUMAN CONTENT')); + pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW HUMAN CONTENT')); + + await ext.onStoreDocument(buildData(document, 'user') as any); + + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); + expect(pageRepo.updatePage).toHaveBeenCalledTimes(1); + expect(pageRepo.updatePage.mock.calls[0][0].lastUpdatedSource).toBe('user'); + }); + + it('idempotency: unchanged content → no updatePage, no history, no queues', async () => { + // The Y.Doc content equals the persisted content deeply → early skip. + // A Y.Doc round-trip normalizes attrs (e.g. paragraph indent), so derive + // the persisted content from fromYdoc to make the deep-equal skip genuine. + const document = ydocFor(doc('SAME CONTENT')); + const normalized = TiptapTransformer.fromYdoc(document, 'default'); + pageRepo.findById.mockResolvedValue({ + ...persistedHumanPage('SAME CONTENT'), + content: normalized, + }); + + await ext.onStoreDocument(buildData(document, 'agent') as any); + + expect(pageRepo.updatePage).not.toHaveBeenCalled(); + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); + expect(historyQueue.add).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index da8421c3..dd462877 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -40,6 +40,52 @@ import { } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; +/** + * Resolve the provenance source for a coalesced snapshot. + * + * The snapshot is tagged 'agent' if any agent edit landed in the coalescing + * window (sticky marker) OR if the current writer is the agent; otherwise + * 'user'. Pure so the §15 H2 marker logic is unit-testable in isolation. + */ +export function resolveSource( + stickyTouched: boolean, + contextActor?: string, +): 'agent' | 'user' { + return stickyTouched || contextActor === 'agent' ? 'agent' : 'user'; +} + +/** + * Compute the BullMQ job id + delay for a page-history snapshot job. Pure so + * the data-loss-sensitive timing arithmetic is unit-testable; `now` is injected + * (caller passes `Date.now()`) for determinism. + * + * - Agent edits: delay 0 and a source-keyed job id `${page.id}-agent`. The + * delay MUST stay 0 — the worker re-reads the page row at run time, so any + * delay risks reading content a later human edit has already overwritten + * (mis-tagged snapshot). 0 minimizes that window. The `-agent` suffix keeps + * the job from coalescing with the bare-page.id human job. + * - Human edits: age-based debounce so rapid human edits coalesce into one + * snapshot; job id is the bare `page.id`. + * + * BullMQ forbids ':' in custom job ids (Redis key separator), so '-' is used; + * page.id is a UUID, so `${page.id}-agent` cannot collide with a human job. + */ +export function computeHistoryJob( + page: Pick<Page, 'id' | 'createdAt'>, + source: string, + now: number, +): { jobId: string; delay: number } { + const isAgent = source === 'agent'; + const pageAge = now - new Date(page.createdAt).getTime(); + const delay = isAgent + ? 0 + : pageAge < HISTORY_FAST_THRESHOLD + ? HISTORY_FAST_INTERVAL + : HISTORY_INTERVAL; + const jobId = isAgent ? `${page.id}-agent` : page.id; + return { jobId, delay }; +} + @Injectable() export class PersistenceExtension implements Extension { private readonly logger = new Logger(PersistenceExtension.name); @@ -129,9 +175,10 @@ export class PersistenceExtension implements Extension { // Sticky agent marker: 'agent' if any agent edit landed in this window, OR // if the current writer is the agent (covers a store with no prior onChange // agent event in the same window). §15 H2. - const agentTouched = - this.consumeAgentTouched(documentName) || context?.actor === 'agent'; - const lastUpdatedSource = agentTouched ? 'agent' : 'user'; + const lastUpdatedSource = resolveSource( + this.consumeAgentTouched(documentName), + context?.actor, + ); try { await executeTx(this.db, async (trx) => { @@ -311,24 +358,13 @@ export class PersistenceExtension implements Extension { page: Page, lastUpdatedSource: string, ): Promise<void> { - // Agent edits get an immediate, source-keyed history job: they snapshot - // deterministically as 'agent' and a later human edit (jobId = page.id) - // cannot coalesce/retag them. Human edits keep the age-based debounce so - // rapid human edits still coalesce into one snapshot. - // NOTE: the agent delay MUST stay 0 — the worker re-reads the page row at - // run time, so any delay would risk reading content a later human edit has - // already overwritten (mis-tagged snapshot). 0 minimizes that window. - const isAgent = lastUpdatedSource === 'agent'; - const pageAge = Date.now() - new Date(page.createdAt).getTime(); - const delay = isAgent - ? 0 - : pageAge < HISTORY_FAST_THRESHOLD - ? HISTORY_FAST_INTERVAL - : HISTORY_INTERVAL; - // BullMQ forbids ':' in custom job IDs (it is the Redis key separator), so - // use '-' here. page.id is a UUID, so `${page.id}-agent` cannot collide with - // any human job whose id is a bare page.id. - const jobId = isAgent ? `${page.id}-agent` : page.id; + // Job id + delay arithmetic lives in the pure `computeHistoryJob` (see its + // doc comment for the agent-delay-0 / age-based-debounce invariants). + const { jobId, delay } = computeHistoryJob( + page, + lastUpdatedSource, + Date.now(), + ); await this.historyQueue.add( QueueJob.PAGE_HISTORY, diff --git a/apps/server/src/collaboration/processors/history.processor.spec.ts b/apps/server/src/collaboration/processors/history.processor.spec.ts new file mode 100644 index 00000000..bdcf846e --- /dev/null +++ b/apps/server/src/collaboration/processors/history.processor.spec.ts @@ -0,0 +1,200 @@ +import { Job } from 'bullmq'; +import { HistoryProcessor } from './history.processor'; +import { QueueJob } from '../../integrations/queue/constants'; + +/** + * Unit tests for `HistoryProcessor.process`. This worker is the last line of + * defense for the page-history snapshot, so we pin the data-loss-sensitive + * paths: duplicate/empty history skipping (isDeepStrictEqual), and — critically + * — that a saveHistory failure RESTORES the contributors it popped (otherwise + * the contributor set is silently lost) before rethrowing. + */ + +const PAGE_ID = 'page-1'; +const SPACE_ID = 'space-1'; +const WORKSPACE_ID = 'ws-1'; + +// A non-empty content doc (distinct from the empty-paragraph doc). +const filledContent = { + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }], +}; +const emptyContent = { type: 'doc', content: [{ type: 'paragraph' }] }; + +const buildPage = (overrides: Partial<any> = {}) => ({ + id: PAGE_ID, + spaceId: SPACE_ID, + workspaceId: WORKSPACE_ID, + content: filledContent, + ...overrides, +}); + +const buildJob = (overrides: Partial<any> = {}) => + ({ + name: QueueJob.PAGE_HISTORY, + data: { pageId: PAGE_ID }, + ...overrides, + }) as unknown as Job<any, void>; + +describe('HistoryProcessor.process', () => { + let proc: HistoryProcessor; + let pageHistoryRepo: { findPageLastHistory: jest.Mock; saveHistory: jest.Mock }; + let pageRepo: { findById: jest.Mock }; + let collabHistory: { + clearContributors: jest.Mock; + popContributors: jest.Mock; + addContributors: jest.Mock; + }; + let watcherService: { addPageWatchers: jest.Mock }; + let notificationQueue: { add: jest.Mock }; + let generalQueue: { add: jest.Mock }; + + beforeEach(() => { + pageHistoryRepo = { + findPageLastHistory: jest.fn().mockResolvedValue(null), + saveHistory: jest.fn().mockResolvedValue(undefined), + }; + pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) }; + collabHistory = { + clearContributors: jest.fn().mockResolvedValue(undefined), + popContributors: jest.fn().mockResolvedValue(['u1', 'u2']), + addContributors: jest.fn().mockResolvedValue(undefined), + }; + watcherService = { + addPageWatchers: jest.fn().mockResolvedValue(undefined), + }; + notificationQueue = { add: jest.fn().mockResolvedValue(undefined) }; + generalQueue = { add: jest.fn().mockResolvedValue(undefined) }; + + // WorkerHost's constructor reads `this.worker`; passing repos positionally + // matches the constructor and avoids the Nest DI container. + proc = new HistoryProcessor( + pageHistoryRepo as any, + pageRepo as any, + collabHistory as any, + watcherService as any, + notificationQueue as any, + generalQueue as any, + ); + jest.spyOn(proc['logger'], 'debug').mockImplementation(() => undefined); + jest.spyOn(proc['logger'], 'warn').mockImplementation(() => undefined); + jest.spyOn(proc['logger'], 'error').mockImplementation(() => undefined); + }); + + it('ignores jobs whose name is not PAGE_HISTORY (no page lookup)', async () => { + await proc.process(buildJob({ name: 'some.other.job' })); + expect(pageRepo.findById).not.toHaveBeenCalled(); + }); + + it('page not found → clearContributors and return (no save)', async () => { + pageRepo.findById.mockResolvedValue(null); + + await proc.process(buildJob()); + + expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID); + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); + expect(collabHistory.popContributors).not.toHaveBeenCalled(); + }); + + it('first history + empty content → skip and clear contributors (no save)', async () => { + pageHistoryRepo.findPageLastHistory.mockResolvedValue(null); + pageRepo.findById.mockResolvedValue(buildPage({ content: emptyContent })); + + await proc.process(buildJob()); + + expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID); + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); + }); + + it('content unchanged vs last history → no save (isDeepStrictEqual skip)', async () => { + // Last history holds a deep-equal-but-distinct copy of current content. + pageHistoryRepo.findPageLastHistory.mockResolvedValue({ + content: JSON.parse(JSON.stringify(filledContent)), + }); + + await proc.process(buildJob()); + + expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled(); + expect(collabHistory.popContributors).not.toHaveBeenCalled(); + }); + + it('content changed → addPageWatchers + saveHistory + backlinks queue', async () => { + pageHistoryRepo.findPageLastHistory.mockResolvedValue({ + content: { type: 'doc', content: [] }, + }); + + await proc.process(buildJob()); + + expect(collabHistory.popContributors).toHaveBeenCalledWith(PAGE_ID); + expect(watcherService.addPageWatchers).toHaveBeenCalledWith( + ['u1', 'u2'], + PAGE_ID, + SPACE_ID, + WORKSPACE_ID, + ); + expect(pageHistoryRepo.saveHistory).toHaveBeenCalledWith( + expect.objectContaining({ id: PAGE_ID }), + { contributorIds: ['u1', 'u2'] }, + ); + expect(generalQueue.add).toHaveBeenCalledWith( + QueueJob.PAGE_BACKLINKS, + expect.objectContaining({ pageId: PAGE_ID, workspaceId: WORKSPACE_ID }), + ); + }); + + it('first history (lastHistory null) with non-empty content → saves, no PAGE_UPDATED notification', async () => { + // popContributors yields users, but lastHistory?.content is falsy so the + // notification branch (needs a prior version) must be skipped. + pageHistoryRepo.findPageLastHistory.mockResolvedValue(null); + + await proc.process(buildJob()); + + expect(pageHistoryRepo.saveHistory).toHaveBeenCalled(); + expect(notificationQueue.add).not.toHaveBeenCalled(); + }); + + it('changed content WITH prior history + contributors → queues PAGE_UPDATED notification', async () => { + pageHistoryRepo.findPageLastHistory.mockResolvedValue({ + content: { type: 'doc', content: [] }, + }); + + await proc.process(buildJob()); + + expect(notificationQueue.add).toHaveBeenCalledWith( + QueueJob.PAGE_UPDATED, + expect.objectContaining({ + pageId: PAGE_ID, + actorIds: ['u1', 'u2'], + }), + ); + }); + + it('saveHistory throws → contributors RESTORED (addContributors) AND error rethrown', async () => { + // The data-loss guard: if the snapshot save fails after popContributors, + // the popped ids MUST be returned to the pending set, then the error + // propagates so BullMQ retries. Assert BOTH halves. + pageHistoryRepo.findPageLastHistory.mockResolvedValue({ + content: { type: 'doc', content: [] }, + }); + const boom = new Error('db down'); + pageHistoryRepo.saveHistory.mockRejectedValue(boom); + + await expect(proc.process(buildJob())).rejects.toThrow('db down'); + expect(collabHistory.addContributors).toHaveBeenCalledWith(PAGE_ID, [ + 'u1', + 'u2', + ]); + }); + + it('backlinks + notification queue failures are swallowed (history still committed)', async () => { + pageHistoryRepo.findPageLastHistory.mockResolvedValue({ + content: { type: 'doc', content: [] }, + }); + generalQueue.add.mockRejectedValue(new Error('redis backlinks down')); + notificationQueue.add.mockRejectedValue(new Error('redis notif down')); + + // The downstream queue failures are caught internally; process resolves. + await expect(proc.process(buildJob())).resolves.toBeUndefined(); + expect(pageHistoryRepo.saveHistory).toHaveBeenCalled(); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.ts b/apps/server/src/core/ai-chat/ai-chat.controller.ts index 11c43af5..02a98971 100644 --- a/apps/server/src/core/ai-chat/ai-chat.controller.ts +++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts @@ -228,25 +228,14 @@ export class AiChatController { } if (!file) throw new BadRequestException('No audio uploaded'); - // Whitelist audio container types produced by browser MediaRecorder - // (Chrome/FF: webm/opus, Safari: mp4) plus common STT-accepted formats. - const allowedMime = new Set([ - 'audio/webm', - 'audio/ogg', - 'audio/mp4', - 'audio/mpeg', - 'audio/wav', - 'audio/x-wav', - 'audio/wave', - 'audio/m4a', - 'audio/x-m4a', - ]); - // MediaRecorder mimetypes carry parameters (e.g. "audio/webm;codecs=opus"); - // compare only the base type. - const baseMime = file.mimetype.split(';')[0].trim().toLowerCase(); - if (!allowedMime.has(baseMime)) { + // Resolve + whitelist the upload's container type (MediaRecorder mimetypes + // carry parameters, e.g. "audio/webm;codecs=opus"). A non-whitelisted type + // is rejected; an allowed one yields the STT container-format hint. + const resolved = resolveAudioFormat(file.mimetype); + if (!resolved.ok) { throw new BadRequestException('Unsupported audio format'); } + const { format } = resolved; let buf: Buffer; try { @@ -259,20 +248,6 @@ export class AiChatController { } throw err; } - // Container hint for JSON-style STT providers (e.g. OpenRouter); multipart - // endpoints ignore it. - const formatMap: Record<string, string> = { - 'audio/webm': 'webm', - 'audio/ogg': 'ogg', - 'audio/mp4': 'mp4', - 'audio/mpeg': 'mp3', - 'audio/wav': 'wav', - 'audio/x-wav': 'wav', - 'audio/wave': 'wav', - 'audio/m4a': 'm4a', - 'audio/x-m4a': 'm4a', - }; - const format = formatMap[baseMime] ?? 'webm'; let text: string; try { text = await this.aiTranscription.transcribe(workspace.id, buf, format); @@ -302,3 +277,39 @@ export class AiChatController { } } } + +/** + * Whitelist audio container types produced by browser MediaRecorder (Chrome/FF: + * webm/opus, Safari: mp4) plus common STT-accepted formats. The value maps each + * allowed base mime to the container-format hint passed to JSON-style STT + * providers (e.g. OpenRouter); multipart endpoints ignore the hint. + */ +const AUDIO_FORMAT_MAP: Record<string, string> = { + 'audio/webm': 'webm', + 'audio/ogg': 'ogg', + 'audio/mp4': 'mp4', + 'audio/mpeg': 'mp3', + 'audio/wav': 'wav', + 'audio/x-wav': 'wav', + 'audio/wave': 'wav', + 'audio/m4a': 'm4a', + 'audio/x-m4a': 'm4a', +}; + +/** + * Resolve and whitelist an uploaded clip's mimetype. MediaRecorder mimetypes + * carry parameters (e.g. "audio/webm;codecs=opus"), so the base type is split + * out (lowercased, trimmed) before the whitelist check. Returns ok=false for a + * non-whitelisted container; otherwise the base mime and its STT format hint. + * Pure — the caller throws BadRequestException on !ok. + */ +export function resolveAudioFormat( + mimetype: string, +): { ok: true; baseMime: string; format: string } | { ok: false } { + const baseMime = mimetype.split(';')[0].trim().toLowerCase(); + const format = AUDIO_FORMAT_MAP[baseMime]; + if (format === undefined) { + return { ok: false }; + } + return { ok: true, baseMime, format }; +} diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts index 330a23cc..30e94dc0 100644 --- a/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts +++ b/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts @@ -367,6 +367,28 @@ export class McpClientsService { } } +/** + * Apply the SSRF connect-time rule to a set of DNS-resolved addresses: block if + * ANY resolved address is disallowed by `isIpAllowed`, and block an EMPTY set + * (nothing safe to connect to). Only an all-public, non-empty set is allowed. + * + * This is the connect-time half of the DNS-rebinding defense: the dispatcher's + * lookup hands net/tls.connect ONLY a set that passed this check, so the kernel + * can never connect to an address that did not pass the guard. Pure — no I/O. + */ +export function validateResolvedAddresses( + addrs: readonly LookupAddress[], +): { ok: boolean; blockedHost?: string } { + if (addrs.length === 0) { + return { ok: false }; + } + const blocked = addrs.find((a) => !isIpAllowed(a.address).ok); + if (blocked) { + return { ok: false, blockedHost: blocked.address }; + } + return { ok: true }; +} + /** * Build the SSRF-pinned undici dispatcher. Its custom connect.lookup resolves * the host, validates EVERY resolved address with the same ssrf-guard, and @@ -388,22 +410,15 @@ function buildPinnedDispatcher(): Agent { return; } const addrs = addresses as LookupAddress[]; - if (addrs.length === 0) { - callback( - new Error(`No address resolved for ${hostname}`), - '', - 0, - ); - return; - } - const blocked = addrs.find((a) => !isIpAllowed(a.address).ok); - if (blocked) { + const verdict = validateResolvedAddresses(addrs); + if (!verdict.ok) { // Refuse the connection: net/tls.connect never sees this address. - callback( - new Error(`Blocked address for ${hostname}`), - '', - 0, - ); + // An empty set is treated as blocked (nothing safe to connect to). + const reason = + addrs.length === 0 + ? `No address resolved for ${hostname}` + : `Blocked address for ${hostname}`; + callback(new Error(reason), '', 0); return; } // undici/net invoke this lookup with `all: true`, so the callback diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts new file mode 100644 index 00000000..e5fee47d --- /dev/null +++ b/apps/server/src/core/ai-chat/external-mcp/mcp-namespacing.spec.ts @@ -0,0 +1,136 @@ +import { type Tool } from 'ai'; +import { McpClientsService } from './mcp-clients.service'; + +/** + * Tool-name namespacing / collision tests. + * + * REACHABILITY NOTE: the helpers `namespace` / `sanitizeName` / `capName` / + * `disambiguate` are module-private (not exported) and `mergeNamespaced` is a + * PRIVATE method. The smallest reachable public path that exercises all of them + * is `toolsFor()` -> getOrBuildEntry -> buildEntry -> connect/tools() -> + * mergeNamespaced. We drive that path: stub the repo's `listEnabled` to return + * fake servers and spy on the private `connect` to return fake MCP clients whose + * `tools()` we control. We then inspect the merged tool KEYS on the returned + * toolset — the observable result of namespacing. + * + * What we assert (all SECURITY/correctness-relevant): + * - two servers each exposing a tool `search` -> BOTH survive under distinct + * namespaced keys (no silent overwrite); + * - a tool name with spaces/unicode -> sanitized to ^[a-zA-Z0-9_-]+; + * - an over-long name -> capped to the provider limit (<= 64); + * - duplicate names WITHIN one server (collide after sanitize/truncate) -> + * disambiguated, so the second is not overwritten. + */ +const MAX_TOOL_NAME_LENGTH = 64; + +function fakeTool(): Tool { + return { description: 'x', inputSchema: undefined } as unknown as Tool; +} + +interface FakeServer { + id: string; + name: string; + transport: string; + url: string; + headersEnc: string | null; + toolAllowlist: string[] | null; +} + +function server(over: Partial<FakeServer> & { id: string; name: string }): FakeServer { + return { + transport: 'http', + url: 'https://example.com/mcp', + headersEnc: null, + toolAllowlist: null, + ...over, + }; +} + +/** + * Build a service whose repo returns `servers` and whose `connect` returns a + * fake client exposing `toolsByServerId[server.id]` from tools(). Returns the + * merged keys produced by toolsFor. + */ +async function mergedKeysFor( + servers: FakeServer[], + toolsByServerId: Record<string, Record<string, Tool>>, +): Promise<string[]> { + const repoStub = { + listEnabled: jest.fn().mockResolvedValue(servers), + }; + const service = new McpClientsService(repoStub as never, {} as never); + + // Map each connect() call (by server identity) to a fake client. connect is + // private; spy on it via a typed any-cast. + jest + .spyOn(service as unknown as { connect: (s: FakeServer) => unknown }, 'connect') + .mockImplementation((s: FakeServer) => + Promise.resolve({ + tools: () => Promise.resolve(toolsByServerId[s.id] ?? {}), + close: () => Promise.resolve(), + }), + ); + + const toolset = await service.toolsFor('ws-1'); + // Release the lease so the service does not hold the fake clients open. + await Promise.all(toolset.clients.map((c) => c.close())); + return Object.keys(toolset.tools); +} + +describe('external MCP tool-name namespacing (via toolsFor)', () => { + afterEach(() => jest.restoreAllMocks()); + + it('keeps tools from two servers that both expose `search` (no overwrite)', async () => { + const keys = await mergedKeysFor( + [ + server({ id: 'id-alpha', name: 'alpha' }), + server({ id: 'id-beta', name: 'beta' }), + ], + { + 'id-alpha': { search: fakeTool() }, + 'id-beta': { search: fakeTool() }, + }, + ); + + // Two distinct keys survive -> no silent overwrite. + expect(keys).toHaveLength(2); + expect(new Set(keys).size).toBe(2); + // The server name is prefixed onto each tool. + expect(keys).toContain('alpha_search'); + expect(keys.some((k) => k !== 'alpha_search')).toBe(true); + }); + + it('sanitizes spaces/unicode in names to the allowed charset', async () => { + const keys = await mergedKeysFor( + [server({ id: 'id-1', name: 'My Server!' })], + { 'id-1': { 'search the wiki ✨': fakeTool() } }, + ); + + expect(keys).toHaveLength(1); + // Only ^[a-zA-Z0-9_-]+ characters remain (no spaces, no unicode). + expect(keys[0]).toMatch(/^[a-zA-Z0-9_-]+$/); + }); + + it('caps an over-long name to the provider length limit', async () => { + const longName = 'a'.repeat(200); + const keys = await mergedKeysFor( + [server({ id: 'id-1', name: 'svr' })], + { 'id-1': { [longName]: fakeTool() } }, + ); + + expect(keys).toHaveLength(1); + expect(keys[0].length).toBeLessThanOrEqual(MAX_TOOL_NAME_LENGTH); + }); + + it('disambiguates two names that collide after sanitize/truncate within one server', async () => { + // Both names sanitize to the same value ("a_b") -> the second must be + // suffix-disambiguated, not overwritten. + const keys = await mergedKeysFor( + [server({ id: 'id-1', name: 'svr' })], + { 'id-1': { 'a b': fakeTool(), 'a@b': fakeTool() } }, + ); + + expect(keys).toHaveLength(2); + expect(new Set(keys).size).toBe(2); + }); +}); diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts new file mode 100644 index 00000000..4c6a1afc --- /dev/null +++ b/apps/server/src/core/ai-chat/external-mcp/mcp-servers-to-view.spec.ts @@ -0,0 +1,85 @@ +import { McpServersService } from './mcp-servers.service'; +import { AiMcpServer } from '@docmost/db/types/entity.types'; + +/** + * Encrypted-header leak guard for the admin-facing view (§8.10): `toView` is + * private, so we drive it through the public `list()` (which maps every row + * with toView). The contract: a row with `headersEnc` set surfaces ONLY + * `hasHeaders:true` and NEVER the `headersEnc` blob; a row without headers + * surfaces `hasHeaders:false`. The blob must never reach an admin response. + */ +function row(overrides: Partial<AiMcpServer>): AiMcpServer { + return { + id: 'srv-1', + name: 'Tavily', + transport: 'http', + url: 'https://example.com/mcp', + enabled: true, + toolAllowlist: null, + headersEnc: null, + ...overrides, + } as unknown as AiMcpServer; +} + +describe('McpServersService.toView (via list) — encrypted-header leak guard', () => { + function buildService(rows: AiMcpServer[]): McpServersService { + const repoStub = { + listByWorkspace: jest.fn().mockResolvedValue(rows), + }; + // secretBox + clients are unused by the list/toView path; pass stubs to + // satisfy the constructor. + return new McpServersService( + repoStub as never, + {} as never, + {} as never, + ); + } + + it('exposes hasHeaders:true and NO headersEnc when auth headers are set', async () => { + const service = buildService([ + row({ headersEnc: 'ENCRYPTED-SECRET-BLOB' }), + ]); + + const [view] = await service.list('ws-1'); + + expect(view.hasHeaders).toBe(true); + // The encrypted blob must NEVER appear in the view, under any key. + expect('headersEnc' in view).toBe(false); + expect(Object.values(view)).not.toContain('ENCRYPTED-SECRET-BLOB'); + }); + + it('exposes hasHeaders:false when no auth headers are set', async () => { + const service = buildService([row({ headersEnc: null })]); + + const [view] = await service.list('ws-1'); + + expect(view.hasHeaders).toBe(false); + expect('headersEnc' in view).toBe(false); + }); + + it('projects only the public fields', async () => { + const service = buildService([ + row({ + id: 'srv-9', + name: 'My MCP', + transport: 'sse', + url: 'https://mcp.example.com/', + enabled: false, + toolAllowlist: ['search'], + headersEnc: 'BLOB', + }), + ]); + + const [view] = await service.list('ws-1'); + + expect(view).toEqual({ + id: 'srv-9', + name: 'My MCP', + transport: 'sse', + url: 'https://mcp.example.com/', + enabled: false, + toolAllowlist: ['search'], + hasHeaders: true, + }); + }); +}); diff --git a/apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts b/apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts new file mode 100644 index 00000000..3f4be947 --- /dev/null +++ b/apps/server/src/core/ai-chat/external-mcp/validate-resolved-addresses.spec.ts @@ -0,0 +1,67 @@ +import { type LookupAddress } from 'node:dns'; +import { validateResolvedAddresses } from './mcp-clients.service'; + +/** + * Unit tests for validateResolvedAddresses — the connect-time half of the SSRF + * DNS-rebinding defense. It applies the REAL `isIpAllowed` rule (imported + * transitively via the service) and must block if ANY resolved address is + * disallowed, treat an EMPTY set as blocked, and unwrap IPv4-mapped IPv6. + * + * These tests intentionally use real public/private literals (no DNS, no mock) + * so they exercise the actual ssrf-guard classification. + */ +function addr(address: string, family = 4): LookupAddress { + return { address, family }; +} + +describe('validateResolvedAddresses', () => { + it('allows an all-public set', () => { + const res = validateResolvedAddresses([ + addr('8.8.8.8'), + addr('1.1.1.1'), + addr('2001:4860:4860::8888', 6), + ]); + expect(res.ok).toBe(true); + }); + + it('blocks when ONE address among many is private (any-private-blocks)', () => { + const res = validateResolvedAddresses([ + addr('8.8.8.8'), + addr('1.1.1.1'), + addr('10.0.0.5'), // private 10/8 hidden among public addresses + addr('1.0.0.1'), + ]); + expect(res.ok).toBe(false); + expect(res.blockedHost).toBe('10.0.0.5'); + }); + + it('blocks an empty set (nothing safe to connect to)', () => { + expect(validateResolvedAddresses([]).ok).toBe(false); + }); + + it('blocks an IPv4-mapped IPv6 private address', () => { + const res = validateResolvedAddresses([addr('::ffff:10.0.0.1', 6)]); + expect(res.ok).toBe(false); + }); + + it('blocks the cloud metadata link-local address', () => { + const res = validateResolvedAddresses([ + addr('8.8.8.8'), + addr('169.254.169.254'), + ]); + expect(res.ok).toBe(false); + }); + + /** + * Regression sentinel: if the "any private blocks" rule were weakened to + * "all private blocks" / "first address wins", this mixed set (public first, + * private second) would wrongly pass. The assertion below FAILS in that case. + */ + it('FAILS if the any-private rule is weakened (sentinel)', () => { + const res = validateResolvedAddresses([ + addr('8.8.8.8'), // public first + addr('192.168.1.1'), // private second — must still block the whole set + ]); + expect(res.ok).toBe(false); + }); +}); diff --git a/apps/server/src/core/ai-chat/resolve-audio-format.spec.ts b/apps/server/src/core/ai-chat/resolve-audio-format.spec.ts new file mode 100644 index 00000000..eb8cb631 --- /dev/null +++ b/apps/server/src/core/ai-chat/resolve-audio-format.spec.ts @@ -0,0 +1,53 @@ +import { resolveAudioFormat } from './ai-chat.controller'; + +/** + * Unit tests for resolveAudioFormat — the transcribe-endpoint mime whitelist. + * It splits the base mime off any MediaRecorder parameters, lowercases/trims it, + * checks it against the whitelist, and maps it to the STT container-format hint. + * A non-whitelisted container yields { ok: false } (the controller then throws + * BadRequestException). + */ +describe('resolveAudioFormat', () => { + it('strips MediaRecorder parameters to the base mime (audio/webm;codecs=opus)', () => { + const res = resolveAudioFormat('audio/webm;codecs=opus'); + expect(res).toEqual({ ok: true, baseMime: 'audio/webm', format: 'webm' }); + }); + + it('normalizes uppercase / surrounding whitespace', () => { + const res = resolveAudioFormat(' AUDIO/MP4 ; codecs=mp4a '); + expect(res).toEqual({ ok: true, baseMime: 'audio/mp4', format: 'mp4' }); + }); + + it('handles the Safari/iOS audio/x-m4a container', () => { + expect(resolveAudioFormat('audio/x-m4a')).toEqual({ + ok: true, + baseMime: 'audio/x-m4a', + format: 'm4a', + }); + }); + + it('rejects a disallowed container (audio/aiff)', () => { + expect(resolveAudioFormat('audio/aiff')).toEqual({ ok: false }); + }); + + it('maps every whitelisted container to its STT format hint', () => { + const cases: Array<[string, string]> = [ + ['audio/webm', 'webm'], + ['audio/ogg', 'ogg'], + ['audio/mp4', 'mp4'], + ['audio/mpeg', 'mp3'], + ['audio/wav', 'wav'], + ['audio/x-wav', 'wav'], + ['audio/wave', 'wav'], + ['audio/m4a', 'm4a'], + ['audio/x-m4a', 'm4a'], + ]; + for (const [mime, format] of cases) { + expect(resolveAudioFormat(mime)).toEqual({ + ok: true, + baseMime: mime, + format, + }); + } + }); +}); diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts index 74ac66b8..afe0404f 100644 --- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts +++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts @@ -199,21 +199,8 @@ export class AiChatToolsService { const accessibleSet = new Set(accessibleIds); // Keep the best (first — hits are ordered by fused score desc) chunk - // per page, capped to `cap`. - const seen = new Set<string>(); - const results: { id: string; title: string; snippet: string }[] = []; - for (const hit of hits) { - if (!accessibleSet.has(hit.pageId)) continue; - if (seen.has(hit.pageId)) continue; - seen.add(hit.pageId); - results.push({ - id: hit.pageId, - title: hit.title ?? '', - snippet: snippet(hit.content), - }); - if (results.length >= cap) break; - } - return results; + // per page, dropping any page the user cannot access, capped to `cap`. + return selectAccessibleHits(hits, accessibleSet, cap); }, }), @@ -960,6 +947,44 @@ export class AiChatToolsService { } } +/** A single hybrid-search hit: the minimal shape selectAccessibleHits needs. */ +export interface SearchHitLike { + pageId: string; + title: string | null; + content: string; +} + +/** + * Post-filter hybrid-search hits into the agent-facing result list. This is the + * CASL leak guard for the in-process hybrid search: the hits come from a direct + * pgvector + full-text query that does NOT get CASL for free, so an accessible + * SPACE does not imply every page in it is accessible (restricted pages). + * + * Given `hits` (ordered by fused score desc), the `accessibleSet` of page ids + * the user may read, and `cap`, it keeps the BEST (first) chunk per page, drops + * any page not in `accessibleSet`, and caps the output at `cap`. Pure — no I/O. + */ +export function selectAccessibleHits( + hits: readonly SearchHitLike[], + accessibleSet: Set<string>, + cap: number, +): { id: string; title: string; snippet: string }[] { + const seen = new Set<string>(); + const results: { id: string; title: string; snippet: string }[] = []; + for (const hit of hits) { + if (!accessibleSet.has(hit.pageId)) continue; + if (seen.has(hit.pageId)) continue; + seen.add(hit.pageId); + results.push({ + id: hit.pageId, + title: hit.title ?? '', + snippet: snippet(hit.content), + }); + if (results.length >= cap) break; + } + return results; +} + /** * Trim a search highlight/snippet to a token-efficient length. The highlight * may contain `<b>` markers from the search backend; they are harmless to the diff --git a/apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts b/apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts new file mode 100644 index 00000000..21ac46ac --- /dev/null +++ b/apps/server/src/core/ai-chat/tools/select-accessible-hits.spec.ts @@ -0,0 +1,96 @@ +import { + selectAccessibleHits, + type SearchHitLike, +} from './ai-chat-tools.service'; + +/** + * Unit tests for selectAccessibleHits — the CASL leak guard for the in-process + * hybrid search. The hybrid query runs over pgvector + full-text WITHOUT CASL, + * so this post-filter is the ONLY thing that drops pages the user cannot read. + * + * Core invariant: a hit on a page that is NOT in `accessibleSet` is dropped, + * even when that page lives in an otherwise-accessible space. Plus: only the + * best chunk per page survives (dedupe), results are capped, and an empty + * accessibleSet yields nothing. + */ +function hit(pageId: string, title: string | null, content: string): SearchHitLike { + return { pageId, title, content }; +} + +describe('selectAccessibleHits', () => { + it('drops a hit on a page NOT in accessibleSet (the core leak guard)', () => { + const hits = [ + hit('public-page', 'Public', 'visible body'), + // restricted-page is in an accessible space but NOT page-accessible. + hit('restricted-page', 'Secret', 'leaked body'), + ]; + const accessibleSet = new Set(['public-page']); + + const out = selectAccessibleHits(hits, accessibleSet, 10); + + expect(out).toEqual([ + { id: 'public-page', title: 'Public', snippet: 'visible body' }, + ]); + // The restricted page must NEVER appear in the output. + expect(out.some((r) => r.id === 'restricted-page')).toBe(false); + }); + + it('keeps only the best (first) chunk per page when a page has duplicates', () => { + const hits = [ + hit('p1', 'Page One', 'best chunk'), + hit('p1', 'Page One', 'lower-ranked chunk'), + hit('p2', 'Page Two', 'p2 chunk'), + ]; + const accessibleSet = new Set(['p1', 'p2']); + + const out = selectAccessibleHits(hits, accessibleSet, 10); + + expect(out).toEqual([ + { id: 'p1', title: 'Page One', snippet: 'best chunk' }, + { id: 'p2', title: 'Page Two', snippet: 'p2 chunk' }, + ]); + }); + + it('caps the number of results at `cap`', () => { + const hits = [ + hit('p1', 't1', 'c1'), + hit('p2', 't2', 'c2'), + hit('p3', 't3', 'c3'), + hit('p4', 't4', 'c4'), + ]; + const accessibleSet = new Set(['p1', 'p2', 'p3', 'p4']); + + const out = selectAccessibleHits(hits, accessibleSet, 2); + + expect(out).toHaveLength(2); + expect(out.map((r) => r.id)).toEqual(['p1', 'p2']); + }); + + it('returns an empty list when accessibleSet is empty', () => { + const hits = [hit('p1', 't1', 'c1'), hit('p2', 't2', 'c2')]; + + expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]); + }); + + it('defaults a null title to an empty string', () => { + const out = selectAccessibleHits( + [hit('p1', null, 'body')], + new Set(['p1']), + 10, + ); + expect(out).toEqual([{ id: 'p1', title: '', snippet: 'body' }]); + }); + + /** + * Regression sentinel for the leak guard: if the access intersection + * (`accessibleSet.has(hit.pageId)` filter) were removed, the restricted page + * would slip into the output and THIS assertion would fail. Documents that + * the filter — not the dedupe/cap — is what enforces page-level access. + */ + it('FAILS if the access intersection is removed (sentinel)', () => { + const hits = [hit('restricted', 'Secret', 'leaked')]; + // Page is NOT accessible -> output MUST be empty. Without the intersection + // check the function would return the restricted hit and break this test. + expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]); + }); +}); diff --git a/apps/server/src/core/share/share-comment-strip.spec.ts b/apps/server/src/core/share/share-comment-strip.spec.ts new file mode 100644 index 00000000..19befdc1 --- /dev/null +++ b/apps/server/src/core/share/share-comment-strip.spec.ts @@ -0,0 +1,176 @@ +import { ShareService } from './share.service'; + +// Exercises the REAL ShareService comment-mark stripping for shared content via +// the smallest reachable seam: updatePublicAttachments -> prepareContentForShare +// -> removeMarkTypeFromDoc(doc, 'comment'). This is a documented threat-model +// item: `comment` marks are internal-team metadata (existence, location, count, +// resolved state, and the comment ids themselves) and MUST NOT leak to anonymous +// public-share viewers. +// +// prepareContentForShare is private and the page-load path (getSharedPage) needs +// a full DB-backed resolveReadableSharePage; updatePublicAttachments is the +// smallest public seam that runs the exact same sanitization on a doc we control. +// Only the workspace toggle (workspaceRepo.findById) and token service are +// touched, both mocked — no DB setup required. + +const WS = 'ws-1'; +const PAGE = 'page-1'; + +function buildService() { + const shareRepo = { findById: jest.fn() }; + const pageRepo = { findById: jest.fn() }; + const pagePermissionRepo = { + hasRestrictedAncestor: jest.fn(async () => false), + }; + const tokenService = { + generateAttachmentToken: jest.fn(async () => 'tok'), + }; + // htmlEmbed toggle ON so the embed strip is a no-op and we isolate the + // comment-mark strip behaviour. + const workspaceRepo = { + findById: jest.fn(async () => ({ id: WS, settings: { htmlEmbed: true } })), + }; + + return new ShareService( + shareRepo as any, + pageRepo as any, + pagePermissionRepo as any, + {} as any, // db (unused on this path) + tokenService as any, + {} as any, // transclusionService (unused) + workspaceRepo as any, + ); +} + +// A paragraph whose text carries a `comment` mark with a comment id. +function commentedText(text: string, commentId: string) { + return { + type: 'text', + text, + marks: [{ type: 'comment', attrs: { commentId, resolved: false } }], + }; +} + +async function sanitize(content: any) { + const service = buildService(); + return service.updatePublicAttachments({ + id: PAGE, + workspaceId: WS, + content, + } as any); +} + +function countCommentMarks(doc: any): number { + let count = 0; + const walk = (node: any) => { + if (!node || typeof node !== 'object') return; + if (Array.isArray(node.marks)) { + for (const mark of node.marks) { + if (mark?.type === 'comment') count++; + } + } + if (Array.isArray(node.content)) node.content.forEach(walk); + }; + walk(doc); + return count; +} + +describe('ShareService comment-mark stripping for public shares (real code)', () => { + it('strips a top-level comment mark and preserves the visible text', async () => { + const content = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [commentedText('secret-reviewed body', 'cmt-top-1')], + }, + ], + }; + + const out = await sanitize(content); + + expect(countCommentMarks(out)).toBe(0); + // The text itself survives; only the internal mark is removed. + expect(JSON.stringify(out)).toContain('secret-reviewed body'); + // The comment id must not appear anywhere in the serialized output. + expect(JSON.stringify(out)).not.toContain('cmt-top-1'); + }); + + it('strips comment marks nested inside columns and callouts', async () => { + const content = { + type: 'doc', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { + type: 'paragraph', + content: [commentedText('col body', 'cmt-col-1')], + }, + ], + }, + { + type: 'column', + content: [ + { + type: 'callout', + content: [ + { + type: 'paragraph', + content: [commentedText('callout body', 'cmt-callout-1')], + }, + ], + }, + ], + }, + ], + }, + ], + }; + + const out = await sanitize(content); + + expect(countCommentMarks(out)).toBe(0); + const serialized = JSON.stringify(out); + // Visible content of both nested branches survives. + expect(serialized).toContain('col body'); + expect(serialized).toContain('callout body'); + // No nested comment id leaks. + expect(serialized).not.toContain('cmt-col-1'); + expect(serialized).not.toContain('cmt-callout-1'); + }); + + it('strips every comment mark when multiple coexist (count goes to zero)', async () => { + const content = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + commentedText('a', 'cmt-a'), + { type: 'text', text: ' plain ' }, + commentedText('b', 'cmt-b'), + ], + }, + { + type: 'paragraph', + content: [commentedText('c', 'cmt-c')], + }, + ], + }; + + // Sanity: the input genuinely has 3 comment marks before sanitization. + expect(countCommentMarks(content)).toBe(3); + + const out = await sanitize(content); + + expect(countCommentMarks(out)).toBe(0); + const serialized = JSON.stringify(out); + for (const id of ['cmt-a', 'cmt-b', 'cmt-c']) { + expect(serialized).not.toContain(id); + } + }); +}); diff --git a/apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts b/apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts new file mode 100644 index 00000000..33836811 --- /dev/null +++ b/apps/server/src/core/share/share-seo.controller.extract-slug.spec.ts @@ -0,0 +1,41 @@ +import { ShareSeoController } from './share-seo.controller'; + +// Pins ShareSeoController.extractPageSlugId — the slug→pageId resolver used to +// look up a shared page from the public URL. A full UUID must pass through +// untouched; a "title-slug-<id>" must yield the trailing token; a single token +// is returned as-is; falsy input yields undefined. The method does not touch +// `this`, so the controller can be constructed with null collaborators. + +function buildController(): ShareSeoController { + return new ShareSeoController(null as any, null as any, null as any); +} + +describe('ShareSeoController.extractPageSlugId', () => { + const controller = buildController(); + + it('returns a full UUID unchanged', () => { + const uuid = '550e8400-e29b-41d4-a716-446655440000'; + expect(controller.extractPageSlugId(uuid)).toBe(uuid); + }); + + it('returns the trailing token of a title-slug-id form', () => { + expect(controller.extractPageSlugId('my-page-title-abc123')).toBe('abc123'); + }); + + it('returns a single token (no hyphen) as-is', () => { + expect(controller.extractPageSlugId('abc123')).toBe('abc123'); + }); + + it('returns the last segment for a two-token slug', () => { + expect(controller.extractPageSlugId('hello-world')).toBe('world'); + }); + + it('returns undefined for an empty string (falsy guard)', () => { + expect(controller.extractPageSlugId('')).toBeUndefined(); + }); + + it('returns undefined for null/undefined input', () => { + expect(controller.extractPageSlugId(undefined as any)).toBeUndefined(); + expect(controller.extractPageSlugId(null as any)).toBeUndefined(); + }); +}); diff --git a/apps/server/src/core/share/share-seo.controller.ts b/apps/server/src/core/share/share-seo.controller.ts index db20ef0a..ad50e904 100644 --- a/apps/server/src/core/share/share-seo.controller.ts +++ b/apps/server/src/core/share/share-seo.controller.ts @@ -7,8 +7,8 @@ import { validate as isValidUUID } from 'uuid'; import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; import { EnvironmentService } from '../../integrations/environment/environment.service'; import { Workspace } from '@docmost/db/types/entity.types'; -import { htmlEscape } from '../../common/helpers/html-escaper'; import { injectTrackerHead } from './inject-tracker-head.util'; +import { buildShareMetaHtml } from './share-seo.util'; @Controller('share') export class ShareSeoController { @@ -72,24 +72,11 @@ export class ShareSeoController { return this.sendIndex(indexFilePath, res); } - const rawTitle = htmlEscape(share?.sharedPage.title ?? 'untitled'); - const metaTitle = - rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}…` : rawTitle; - - const metaTagVar = '<!--meta-tags-->'; - - const metaTags = [ - `<meta property="og:title" content="${metaTitle}" />`, - `<meta property="twitter:title" content="${metaTitle}" />`, - !share.searchIndexing ? `<meta name="robots" content="noindex" />` : '', - ] - .filter(Boolean) - .join('\n '); - const html = fs.readFileSync(indexFilePath, 'utf8'); - let transformedHtml = html - .replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}`) - .replace(metaTagVar, metaTags); + let transformedHtml = buildShareMetaHtml(html, { + title: share?.sharedPage.title, + searchIndexing: share.searchIndexing, + }); // Deliberate same-origin tracker surface: this is the ONE place where an // admin-authored analytics/tracker snippet (settings.trackerHead) is diff --git a/apps/server/src/core/share/share-seo.meta.spec.ts b/apps/server/src/core/share/share-seo.meta.spec.ts new file mode 100644 index 00000000..25d51b28 --- /dev/null +++ b/apps/server/src/core/share/share-seo.meta.spec.ts @@ -0,0 +1,126 @@ +import { buildShareMetaHtml } from './share-seo.util'; + +// Pins the SEO meta-HTML builder for public share pages (extracted verbatim from +// ShareSeoController.getShare). The shared page title is attacker-influenceable, +// so the security-critical invariant is that it is htmlEscape'd before being +// interpolated into BOTH the element and the content="..." attributes of +// the og:/twitter: meta tags. The XSS tests below MUST fail if the htmlEscape +// step is ever removed. + +// A minimal index.html shell carrying the two placeholders the builder rewrites: +// the <title> element and the <!--meta-tags--> marker. +const INDEX = + '<html><head><title>App\n x'; + +describe('buildShareMetaHtml', () => { + describe('XSS: title escaping', () => { + it('fully htmlEscapes a ', + searchIndexing: true, + }); + + // The raw script tag must NEVER appear anywhere in the output — it would + // execute in the share origin. This assertion fails if htmlEscape is removed. + expect(out).not.toContain('