test(refactor-tail): extract pure cores + cover collab/share/ai-chat/client gate

Batches 6-9: behaviour-preserving extractions of testable pure cores plus the
tests they unblock, and a fix for the broken client test environment.
Full suites green: server 113 suites / 1117 + 1 todo, client 30 files / 338.

client (R0 infra):
- vitest.setup.ts: in-memory localStorage/sessionStorage Storage stub wired via
  setupFiles. Unblocks menu-items.gating.test.ts (was 9 failing) -> client suite
  fully green. + menu-items.suggestions.test.ts (getSuggestionItems filter/sort).

share:
- extract buildShareMetaHtml (share-seo.util.ts) from the SEO controller; tests
  for reflected-XSS escaping in <title>/og/twitter meta, noindex, truncation;
  extractPageSlugId; updateAttachmentAttr; prepareContentForShare comment-strip
  (anonymous-viewer metadata-leak guard).

ai-chat (security extractions):
- selectAccessibleHits: CASL post-filter for semantic search (restricted page in
  an accessible space must NOT leak to the agent).
- validateResolvedAddresses: SSRF connect-time guard (block if ANY resolved
  address is private).
- resolveAudioFormat: mime whitelist (dead `?? 'webm'` fallback dropped, set
  unchanged). + mcp-servers toView header-leak guard, MCP tool namespacing.

collaboration (data-loss area):
- extract computeHistoryJob (pins the "agent delay MUST stay 0" invariant) and
  resolveSource. Integration: onAuthenticate read-only matrix (collab auth
  bypass), HistoryProcessor (contributor restore on save failure), onStoreDocument
  Approach-A boundary snapshot (human revision pinned before agent overwrite).

Reviewed (APPROVE WITH SUGGESTIONS): extractions behaviour-preserving, security
tests mutation-resistant.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-21 19:10:27 +03:00
parent 0cfc3c8f89
commit 3d4ad664b3
22 changed files with 1893 additions and 101 deletions

View File

@@ -0,0 +1,84 @@
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { getSuggestionItems } from "./menu-items";
// Coverage for the filter/sort half of `getSuggestionItems` (distinct from the
// HTML-embed gating suite). A slash query is matched against each item three
// ways — fuzzy on the title, substring on the description, and substring on the
// searchTerms — and matched items are sorted so title-substring hits float to
// the top of their group. We also cover `excludeItems`.
//
// `getSuggestionItems` -> `isHtmlEmbedFeatureEnabled` reads the persisted
// `currentUser` localStorage entry, so a working in-memory Storage stub is a
// prerequisite (installed by vitest.setup.ts). We persist a `currentUser` with
// the HTML-embed toggle OFF (the production default) so the gated "HTML embed"
// item never leaks into these non-HTML queries.
const KEY = "currentUser";
function flatTitles(groups: ReturnType<typeof getSuggestionItems>): string[] {
return Object.values(groups)
.flat()
.map((item) => item.title);
}
beforeEach(() => {
// Default workspace state: HTML-embed feature OFF (matches production default).
localStorage.setItem(KEY, JSON.stringify({ workspace: { settings: {} } }));
});
afterEach(() => {
localStorage.clear();
});
describe("getSuggestionItems — filter and sort", () => {
it("fuzzy-matches a title (non-contiguous characters)", () => {
// "tdo" is not a substring of "to-do list" but matches fuzzily (t..d..o).
const titles = flatTitles(getSuggestionItems({ query: "tdo" }));
expect(titles).toContain("To-do list");
});
it("matches via the description when the title does not match", () => {
// "numbering" only appears in the description "Create a list with numbering.",
// not in the "Numbered list" title nor its searchTerms.
const titles = flatTitles(getSuggestionItems({ query: "numbering" }));
expect(titles).toContain("Numbered list");
});
it("matches via searchTerms when title and description do not match", () => {
// "blockquote" is only present in the "Quote" item's searchTerms.
const titles = flatTitles(getSuggestionItems({ query: "blockquote" }));
expect(titles).toContain("Quote");
});
it("sorts title-substring matches before non-title (description) matches", () => {
// For "page": several titles contain "page" (e.g. "Page break"), while
// "Synced block" matches only through its description (".. across pages.").
// The sort tie-break must place every title hit ahead of the non-title hit.
const titles = flatTitles(getSuggestionItems({ query: "page" }));
const syncedIndex = titles.indexOf("Synced block");
const pageBreakIndex = titles.indexOf("Page break");
// Sanity: both items survived the filter for this query.
expect(syncedIndex).toBeGreaterThanOrEqual(0);
expect(pageBreakIndex).toBeGreaterThanOrEqual(0);
// The title match ("Page break") sorts before the description-only match.
expect(pageBreakIndex).toBeLessThan(syncedIndex);
});
it("removes a named item via excludeItems", () => {
const withBullet = flatTitles(getSuggestionItems({ query: "list" }));
expect(withBullet).toContain("Bullet list");
const withoutBullet = flatTitles(
getSuggestionItems({
query: "list",
excludeItems: new Set(["Bullet list"]),
}),
);
expect(withoutBullet).not.toContain("Bullet list");
// Other "list" matches remain unaffected by the exclusion.
expect(withoutBullet).toContain("Numbered list");
});
});

View File

@@ -12,6 +12,6 @@ export default defineConfig({
test: {
environment: 'jsdom',
globals: true,
setupFiles: [],
setupFiles: ['./vitest.setup.ts'],
},
});

View File

@@ -0,0 +1,51 @@
// Vitest global setup (test-infra only — no production app source).
//
// Under Node 25 / jsdom 25 / vitest 4 the jsdom `localStorage` exposed on the
// global is not a usable Storage: its methods (`setItem`/`getItem`/...) are not
// callable, so any code touching `localStorage` throws `... is not a function`.
// Production code such as `isHtmlEmbedFeatureEnabled()` reads
// `localStorage.getItem("currentUser")`, which made dependent tests fail.
//
// We install a correct in-memory Storage stub on the global BEFORE tests run so
// the Web Storage contract holds: string coercion of keys/values, `null` for
// missing keys, working `length`/`key(index)`, and `clear()`.
import { vi } from "vitest";
// Minimal, spec-faithful in-memory implementation of the Web Storage API.
function createStorage(): Storage {
let store = new Map<string, string>();
const storage: Storage = {
get length(): number {
return store.size;
},
clear(): void {
store = new Map<string, string>();
},
getItem(key: string): string | null {
// Missing keys must return `null`, not `undefined`.
const value = store.get(String(key));
return value === undefined ? null : value;
},
setItem(key: string, value: string): void {
// Web Storage coerces both key and value to strings.
store.set(String(key), String(value));
},
removeItem(key: string): void {
store.delete(String(key));
},
key(index: number): string | null {
// Insertion order matches Map iteration order; out-of-range => null.
const keys = Array.from(store.keys());
return index >= 0 && index < keys.length ? keys[index] : null;
},
};
return storage;
}
// Install on the jsdom global. `vi.stubGlobal` also reflects onto `window`
// (jsdom shares `globalThis` and `window`), so both `localStorage` and
// `window.localStorage` resolve to the same working stub.
vi.stubGlobal("localStorage", createStorage());
vi.stubGlobal("sessionStorage", createStorage());

View File

@@ -0,0 +1,211 @@
import {
NotFoundException,
UnauthorizedException,
} from '@nestjs/common';
import { AuthenticationExtension } from './authentication.extension';
import { SpaceRole } from '../../common/helpers/types/permission';
import { JwtType } from '../../core/auth/dto/jwt-payload';
/**
* Unit tests for the collab read-only downgrade matrix in
* `AuthenticationExtension.onAuthenticate`. This is a security boundary: a wrong
* branch here is either a collab-auth bypass (writer on a page they may only
* read) or a denial. We mock every repo and inspect both the thrown exception
* type and the `connectionConfig.readOnly` flag the extension mutates.
*/
const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
const USER_ID = 'user-1';
const WORKSPACE_ID = 'ws-1';
const SPACE_ID = 'space-1';
const buildUser = (overrides: Partial<any> = {}) => ({
id: USER_ID,
workspaceId: WORKSPACE_ID,
deactivatedAt: null,
deletedAt: null,
name: 'Alice',
avatarUrl: null,
...overrides,
});
const buildPage = (overrides: Partial<any> = {}) => ({
id: PAGE_ID,
spaceId: SPACE_ID,
workspaceId: WORKSPACE_ID,
deletedAt: null,
...overrides,
});
// Default jwt payload: a plain human collab token (no agent provenance claims).
const buildJwt = (overrides: Partial<any> = {}) => ({
sub: USER_ID,
workspaceId: WORKSPACE_ID,
type: JwtType.COLLAB,
...overrides,
});
describe('AuthenticationExtension.onAuthenticate', () => {
let ext: AuthenticationExtension;
let tokenService: { verifyJwt: jest.Mock };
let userRepo: { findById: jest.Mock };
let pageRepo: { findById: jest.Mock };
let spaceMemberRepo: { getUserSpaceRoles: jest.Mock };
let pagePermissionRepo: { canUserEditPage: jest.Mock };
// Build the hocuspocus onAuthenticate payload. connectionConfig.readOnly
// starts false; the extension flips it to true on a read-only downgrade.
const buildData = (token = 'tok') => ({
documentName: `page.${PAGE_ID}`,
token,
connectionConfig: { readOnly: false },
});
beforeEach(() => {
tokenService = { verifyJwt: jest.fn().mockResolvedValue(buildJwt()) };
userRepo = { findById: jest.fn().mockResolvedValue(buildUser()) };
pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) };
spaceMemberRepo = {
getUserSpaceRoles: jest
.fn()
.mockResolvedValue([{ userId: USER_ID, role: SpaceRole.WRITER }]),
};
pagePermissionRepo = {
// No page-level restriction by default → defer to space role.
canUserEditPage: jest.fn().mockResolvedValue({
hasAnyRestriction: false,
canAccess: true,
canEdit: true,
}),
};
ext = new AuthenticationExtension(
tokenService as any,
userRepo as any,
pageRepo as any,
spaceMemberRepo as any,
pagePermissionRepo as any,
);
// Silence the extension's logger (it warns/debugs on denial branches).
jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined);
jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined);
});
it('invalid token → UnauthorizedException (no repo lookups happen)', async () => {
tokenService.verifyJwt.mockRejectedValue(new Error('bad sig'));
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
UnauthorizedException,
);
expect(userRepo.findById).not.toHaveBeenCalled();
});
it('user not found → Unauthorized', async () => {
userRepo.findById.mockResolvedValue(null);
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
UnauthorizedException,
);
});
it('user disabled (deactivatedAt set) → Unauthorized', async () => {
userRepo.findById.mockResolvedValue(
buildUser({ deactivatedAt: new Date() }),
);
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
UnauthorizedException,
);
});
it('page not found → NotFoundException', async () => {
pageRepo.findById.mockResolvedValue(null);
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
NotFoundException,
);
});
it('no space role → Unauthorized', async () => {
spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([]);
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
UnauthorizedException,
);
});
it('page-level restriction canAccess=false → Unauthorized (restricted-page denial)', async () => {
pagePermissionRepo.canUserEditPage.mockResolvedValue({
hasAnyRestriction: true,
canAccess: false,
canEdit: false,
});
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
UnauthorizedException,
);
});
it('restriction canAccess=true & canEdit=false → readOnly (no restricted-page write)', async () => {
pagePermissionRepo.canUserEditPage.mockResolvedValue({
hasAnyRestriction: true,
canAccess: true,
canEdit: false,
});
const data = buildData();
const ctx = await ext.onAuthenticate(data as any);
expect(data.connectionConfig.readOnly).toBe(true);
expect(ctx.actor).toBe('user');
});
it('restriction canAccess=true & canEdit=true → writable (readOnly stays false)', async () => {
pagePermissionRepo.canUserEditPage.mockResolvedValue({
hasAnyRestriction: true,
canAccess: true,
canEdit: true,
});
const data = buildData();
await ext.onAuthenticate(data as any);
expect(data.connectionConfig.readOnly).toBe(false);
});
it('no restriction + space READER → readOnly', async () => {
spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([
{ userId: USER_ID, role: SpaceRole.READER },
]);
const data = buildData();
await ext.onAuthenticate(data as any);
expect(data.connectionConfig.readOnly).toBe(true);
});
it('no restriction + space WRITER → writable', async () => {
const data = buildData();
await ext.onAuthenticate(data as any);
expect(data.connectionConfig.readOnly).toBe(false);
});
it('soft-deleted page (deletedAt set) → readOnly even for a WRITER', async () => {
// A writer must NOT be able to mutate a page in the trash via collab.
pageRepo.findById.mockResolvedValue(buildPage({ deletedAt: new Date() }));
const data = buildData();
await ext.onAuthenticate(data as any);
expect(data.connectionConfig.readOnly).toBe(true);
});
it('agent JWT (actor=agent + aiChatId) propagates into the connection context', async () => {
tokenService.verifyJwt.mockResolvedValue(
buildJwt({ actor: 'agent', aiChatId: 'chat-7' }),
);
const ctx = await ext.onAuthenticate(buildData() as any);
expect(ctx.actor).toBe('agent');
expect(ctx.aiChatId).toBe('chat-7');
expect(ctx.user.id).toBe(USER_ID);
});
it('human JWT (no provenance claims) → actor=user, aiChatId=null', async () => {
const ctx = await ext.onAuthenticate(buildData() as any);
expect(ctx.actor).toBe('user');
expect(ctx.aiChatId).toBeNull();
});
});

View File

@@ -0,0 +1,105 @@
import {
computeHistoryJob,
resolveSource,
} from './persistence.extension';
import {
HISTORY_FAST_INTERVAL,
HISTORY_FAST_THRESHOLD,
HISTORY_INTERVAL,
} from '../constants';
// A fixed clock + fixed createdAt make pageAge deterministic.
const NOW = 1_700_000_000_000;
const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
// Build a minimal page whose age (NOW - createdAt) is exactly `ageMs`.
const pageAged = (ageMs: number) => ({
id: PAGE_ID,
createdAt: new Date(NOW - ageMs),
});
describe('computeHistoryJob', () => {
it('agent edit → delay MUST be 0 and job id is source-keyed', () => {
// INVARIANT (§15 H2 / persistence.extension): the agent delay MUST stay 0.
// The worker re-reads the page row at run time, so any non-zero delay risks
// snapshotting content a later human edit has already overwritten. This is
// the load-bearing assertion of this spec — do not relax it.
const { jobId, delay } = computeHistoryJob(pageAged(0), 'agent', NOW);
expect(delay).toBe(0);
expect(jobId).toBe(`${PAGE_ID}-agent`);
});
it('agent edit on an OLD page is still delay 0 (age never applies to agents)', () => {
// Even when the page is far older than the fast threshold, the agent path
// must short-circuit to 0 — age-based debounce is a human-only concern.
const { jobId, delay } = computeHistoryJob(
pageAged(HISTORY_FAST_THRESHOLD + 60_000),
'agent',
NOW,
);
expect(delay).toBe(0);
expect(jobId).toBe(`${PAGE_ID}-agent`);
});
it('human edit on a YOUNG page (age < threshold) → fast interval, bare job id', () => {
const { jobId, delay } = computeHistoryJob(
pageAged(HISTORY_FAST_THRESHOLD - 1),
'user',
NOW,
);
expect(delay).toBe(HISTORY_FAST_INTERVAL);
expect(jobId).toBe(PAGE_ID);
});
it('human edit on an OLD page (age > threshold) → standard interval', () => {
const { jobId, delay } = computeHistoryJob(
pageAged(HISTORY_FAST_THRESHOLD + 1),
'user',
NOW,
);
expect(delay).toBe(HISTORY_INTERVAL);
expect(jobId).toBe(PAGE_ID);
});
it('boundary: pageAge EXACTLY === threshold takes the slow branch (the `<` is strict)', () => {
// Off-by-one guard: the condition is `pageAge < HISTORY_FAST_THRESHOLD`, so
// an age of exactly the threshold is NOT "fast" — it must use HISTORY_INTERVAL.
const { delay } = computeHistoryJob(
pageAged(HISTORY_FAST_THRESHOLD),
'user',
NOW,
);
expect(delay).toBe(HISTORY_INTERVAL);
});
it('treats any non-"agent" source string as human', () => {
// resolveSource only ever yields 'agent' | 'user', but guard the contract:
// the agent branch keys strictly on === 'agent'.
const { jobId, delay } = computeHistoryJob(pageAged(0), 'user', NOW);
expect(delay).toBe(HISTORY_FAST_INTERVAL);
expect(jobId).toBe(PAGE_ID);
});
});
describe('resolveSource (truth table)', () => {
// (sticky, actor) → expected. Marker is OR of the sticky flag and actor==='agent'.
it('sticky=false, actor=user → user', () => {
expect(resolveSource(false, 'user')).toBe('user');
});
it('sticky=true, actor=user → agent (sticky wins)', () => {
expect(resolveSource(true, 'user')).toBe('agent');
});
it('sticky=false, actor=agent → agent (current writer is the agent)', () => {
expect(resolveSource(false, 'agent')).toBe('agent');
});
it('sticky=true, actor=agent → agent', () => {
expect(resolveSource(true, 'agent')).toBe('agent');
});
it('sticky=false, actor=undefined → user (human collab path omits the claim)', () => {
expect(resolveSource(false, undefined)).toBe('user');
});
});

View File

@@ -0,0 +1,185 @@
import { TiptapTransformer } from '@hocuspocus/transformer';
import { PersistenceExtension } from './persistence.extension';
import { tiptapExtensions } from '../collaboration.util';
/**
* Integration test for `onStoreDocument`'s Approach-A boundary snapshot.
*
* The data-loss risk: when an AGENT store lands over a page whose persisted
* state was authored by a HUMAN, the agent overwrites that human content. If we
* do not pin the human revision as its own history version BEFORE the agent's
* updatePage, the last human edit is lost. This test pins the ordering
* (saveHistory(oldHumanPage) strictly before updatePage) and the idempotency
* skip when content is unchanged.
*
* We pass a REAL Y.Doc as the `document` arg (so TiptapTransformer.fromYdoc
* yields real content) and stub repos/queues + an executeTx-compatible db whose
* transaction().execute() invokes the callback with a trx stub.
*/
const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
const USER_ID = 'human-1';
// Build a real Y.Doc carrying the given tiptap JSON in the 'default' fragment.
// hocuspocus augments the live document with broadcastStateless(); the bare
// Y.Doc lacks it, so stub it for the post-store broadcast.
const ydocFor = (json: any) => {
const ydoc = TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
(ydoc as any).broadcastStateless = jest.fn();
return ydoc;
};
const doc = (text: string) => ({
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text }] }],
});
describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot', () => {
let ext: PersistenceExtension;
let pageRepo: { findById: jest.Mock; updatePage: jest.Mock };
let pageHistoryRepo: {
saveHistory: jest.Mock;
findPageLastHistory: jest.Mock;
};
let aiQueue: { add: jest.Mock };
let historyQueue: { add: jest.Mock };
let notificationQueue: { add: jest.Mock };
let collabHistory: { addContributors: jest.Mock };
let transclusionService: {
syncPageTransclusions: jest.Mock;
syncPageReferences: jest.Mock;
syncPageTemplateReferences: jest.Mock;
};
let callOrder: string[];
// db whose transaction().execute(fn) runs fn with a trx stub — this lets the
// real executeTx() helper drive the callback without a database.
const trxStub = { __trx: true };
const db = {
transaction: () => ({
execute: (fn: (trx: any) => Promise<any>) => fn(trxStub),
}),
};
// The persisted page row the transaction reads (OLD, human-authored state).
const persistedHumanPage = (newAgentText: string) => ({
id: PAGE_ID,
slugId: 'slug-1',
spaceId: 'space-1',
workspaceId: 'ws-1',
creatorId: 'creator-1',
contributorIds: ['creator-1'],
createdAt: new Date('2020-01-01T00:00:00Z'),
lastUpdatedSource: 'user', // prior revision was human
// content differs from the new agent doc so the update branch runs.
content: doc('OLD HUMAN'),
_newAgentText: newAgentText,
});
const buildData = (document: any, actor: 'user' | 'agent') => ({
documentName: `page.${PAGE_ID}`,
document,
context: { user: { id: USER_ID, name: 'Alice' }, actor },
});
beforeEach(() => {
callOrder = [];
pageRepo = {
findById: jest.fn(),
updatePage: jest.fn().mockImplementation(async () => {
callOrder.push('updatePage');
}),
};
pageHistoryRepo = {
saveHistory: jest.fn().mockImplementation(async () => {
callOrder.push('saveHistory');
}),
findPageLastHistory: jest.fn().mockResolvedValue(null),
};
aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
historyQueue = { add: jest.fn().mockResolvedValue(undefined) };
notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
collabHistory = { addContributors: jest.fn().mockResolvedValue(undefined) };
transclusionService = {
syncPageTransclusions: jest.fn().mockResolvedValue(undefined),
syncPageReferences: jest.fn().mockResolvedValue(undefined),
syncPageTemplateReferences: jest.fn().mockResolvedValue(undefined),
};
ext = new PersistenceExtension(
pageRepo as any,
pageHistoryRepo as any,
db as any,
aiQueue as any,
historyQueue as any,
notificationQueue as any,
collabHistory as any,
transclusionService as any,
);
jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined);
jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined);
jest.spyOn(ext['logger'], 'error').mockImplementation(() => undefined);
});
it('agent store over a human page pins saveHistory(oldHumanPage) BEFORE updatePage', async () => {
const document = ydocFor(doc('NEW AGENT CONTENT'));
pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT'));
// No human baseline snapshot exists yet → boundary snapshot must run.
pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
await ext.onStoreDocument(buildData(document, 'agent') as any);
// Boundary snapshot fired, and strictly before the agent overwrite.
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
const saved = pageHistoryRepo.saveHistory.mock.calls[0][0];
expect(saved.content).toEqual(doc('OLD HUMAN')); // the OLD human revision
expect(callOrder).toEqual(['saveHistory', 'updatePage']);
// The agent's new content is tagged 'agent' on the update.
const update = pageRepo.updatePage.mock.calls[0][0];
expect(update.lastUpdatedSource).toBe('agent');
});
it('skips the boundary snapshot when the human baseline is already pinned', async () => {
const document = ydocFor(doc('NEW AGENT CONTENT'));
pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT'));
// Latest history already equals the current human state → no duplicate.
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
content: doc('OLD HUMAN'),
});
await ext.onStoreDocument(buildData(document, 'agent') as any);
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
});
it('human store does NOT trigger the boundary snapshot (no source transition)', async () => {
const document = ydocFor(doc('NEW HUMAN CONTENT'));
pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW HUMAN CONTENT'));
await ext.onStoreDocument(buildData(document, 'user') as any);
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
expect(pageRepo.updatePage.mock.calls[0][0].lastUpdatedSource).toBe('user');
});
it('idempotency: unchanged content → no updatePage, no history, no queues', async () => {
// The Y.Doc content equals the persisted content deeply → early skip.
// A Y.Doc round-trip normalizes attrs (e.g. paragraph indent), so derive
// the persisted content from fromYdoc to make the deep-equal skip genuine.
const document = ydocFor(doc('SAME CONTENT'));
const normalized = TiptapTransformer.fromYdoc(document, 'default');
pageRepo.findById.mockResolvedValue({
...persistedHumanPage('SAME CONTENT'),
content: normalized,
});
await ext.onStoreDocument(buildData(document, 'agent') as any);
expect(pageRepo.updatePage).not.toHaveBeenCalled();
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
expect(historyQueue.add).not.toHaveBeenCalled();
});
});

View File

@@ -40,6 +40,52 @@ import {
} from '../constants';
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
/**
* Resolve the provenance source for a coalesced snapshot.
*
* The snapshot is tagged 'agent' if any agent edit landed in the coalescing
* window (sticky marker) OR if the current writer is the agent; otherwise
* 'user'. Pure so the §15 H2 marker logic is unit-testable in isolation.
*/
export function resolveSource(
stickyTouched: boolean,
contextActor?: string,
): 'agent' | 'user' {
return stickyTouched || contextActor === 'agent' ? 'agent' : 'user';
}
/**
* Compute the BullMQ job id + delay for a page-history snapshot job. Pure so
* the data-loss-sensitive timing arithmetic is unit-testable; `now` is injected
* (caller passes `Date.now()`) for determinism.
*
* - Agent edits: delay 0 and a source-keyed job id `${page.id}-agent`. The
* delay MUST stay 0 — the worker re-reads the page row at run time, so any
* delay risks reading content a later human edit has already overwritten
* (mis-tagged snapshot). 0 minimizes that window. The `-agent` suffix keeps
* the job from coalescing with the bare-page.id human job.
* - Human edits: age-based debounce so rapid human edits coalesce into one
* snapshot; job id is the bare `page.id`.
*
* BullMQ forbids ':' in custom job ids (Redis key separator), so '-' is used;
* page.id is a UUID, so `${page.id}-agent` cannot collide with a human job.
*/
export function computeHistoryJob(
page: Pick<Page, 'id' | 'createdAt'>,
source: string,
now: number,
): { jobId: string; delay: number } {
const isAgent = source === 'agent';
const pageAge = now - new Date(page.createdAt).getTime();
const delay = isAgent
? 0
: pageAge < HISTORY_FAST_THRESHOLD
? HISTORY_FAST_INTERVAL
: HISTORY_INTERVAL;
const jobId = isAgent ? `${page.id}-agent` : page.id;
return { jobId, delay };
}
@Injectable()
export class PersistenceExtension implements Extension {
private readonly logger = new Logger(PersistenceExtension.name);
@@ -129,9 +175,10 @@ export class PersistenceExtension implements Extension {
// Sticky agent marker: 'agent' if any agent edit landed in this window, OR
// if the current writer is the agent (covers a store with no prior onChange
// agent event in the same window). §15 H2.
const agentTouched =
this.consumeAgentTouched(documentName) || context?.actor === 'agent';
const lastUpdatedSource = agentTouched ? 'agent' : 'user';
const lastUpdatedSource = resolveSource(
this.consumeAgentTouched(documentName),
context?.actor,
);
try {
await executeTx(this.db, async (trx) => {
@@ -311,24 +358,13 @@ export class PersistenceExtension implements Extension {
page: Page,
lastUpdatedSource: string,
): Promise<void> {
// Agent edits get an immediate, source-keyed history job: they snapshot
// deterministically as 'agent' and a later human edit (jobId = page.id)
// cannot coalesce/retag them. Human edits keep the age-based debounce so
// rapid human edits still coalesce into one snapshot.
// NOTE: the agent delay MUST stay 0 — the worker re-reads the page row at
// run time, so any delay would risk reading content a later human edit has
// already overwritten (mis-tagged snapshot). 0 minimizes that window.
const isAgent = lastUpdatedSource === 'agent';
const pageAge = Date.now() - new Date(page.createdAt).getTime();
const delay = isAgent
? 0
: pageAge < HISTORY_FAST_THRESHOLD
? HISTORY_FAST_INTERVAL
: HISTORY_INTERVAL;
// BullMQ forbids ':' in custom job IDs (it is the Redis key separator), so
// use '-' here. page.id is a UUID, so `${page.id}-agent` cannot collide with
// any human job whose id is a bare page.id.
const jobId = isAgent ? `${page.id}-agent` : page.id;
// Job id + delay arithmetic lives in the pure `computeHistoryJob` (see its
// doc comment for the agent-delay-0 / age-based-debounce invariants).
const { jobId, delay } = computeHistoryJob(
page,
lastUpdatedSource,
Date.now(),
);
await this.historyQueue.add(
QueueJob.PAGE_HISTORY,

View File

@@ -0,0 +1,200 @@
import { Job } from 'bullmq';
import { HistoryProcessor } from './history.processor';
import { QueueJob } from '../../integrations/queue/constants';
/**
* Unit tests for `HistoryProcessor.process`. This worker is the last line of
* defense for the page-history snapshot, so we pin the data-loss-sensitive
* paths: duplicate/empty history skipping (isDeepStrictEqual), and — critically
* — that a saveHistory failure RESTORES the contributors it popped (otherwise
* the contributor set is silently lost) before rethrowing.
*/
const PAGE_ID = 'page-1';
const SPACE_ID = 'space-1';
const WORKSPACE_ID = 'ws-1';
// A non-empty content doc (distinct from the empty-paragraph doc).
const filledContent = {
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
};
const emptyContent = { type: 'doc', content: [{ type: 'paragraph' }] };
const buildPage = (overrides: Partial<any> = {}) => ({
id: PAGE_ID,
spaceId: SPACE_ID,
workspaceId: WORKSPACE_ID,
content: filledContent,
...overrides,
});
const buildJob = (overrides: Partial<any> = {}) =>
({
name: QueueJob.PAGE_HISTORY,
data: { pageId: PAGE_ID },
...overrides,
}) as unknown as Job<any, void>;
describe('HistoryProcessor.process', () => {
let proc: HistoryProcessor;
let pageHistoryRepo: { findPageLastHistory: jest.Mock; saveHistory: jest.Mock };
let pageRepo: { findById: jest.Mock };
let collabHistory: {
clearContributors: jest.Mock;
popContributors: jest.Mock;
addContributors: jest.Mock;
};
let watcherService: { addPageWatchers: jest.Mock };
let notificationQueue: { add: jest.Mock };
let generalQueue: { add: jest.Mock };
beforeEach(() => {
pageHistoryRepo = {
findPageLastHistory: jest.fn().mockResolvedValue(null),
saveHistory: jest.fn().mockResolvedValue(undefined),
};
pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) };
collabHistory = {
clearContributors: jest.fn().mockResolvedValue(undefined),
popContributors: jest.fn().mockResolvedValue(['u1', 'u2']),
addContributors: jest.fn().mockResolvedValue(undefined),
};
watcherService = {
addPageWatchers: jest.fn().mockResolvedValue(undefined),
};
notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
generalQueue = { add: jest.fn().mockResolvedValue(undefined) };
// WorkerHost's constructor reads `this.worker`; passing repos positionally
// matches the constructor and avoids the Nest DI container.
proc = new HistoryProcessor(
pageHistoryRepo as any,
pageRepo as any,
collabHistory as any,
watcherService as any,
notificationQueue as any,
generalQueue as any,
);
jest.spyOn(proc['logger'], 'debug').mockImplementation(() => undefined);
jest.spyOn(proc['logger'], 'warn').mockImplementation(() => undefined);
jest.spyOn(proc['logger'], 'error').mockImplementation(() => undefined);
});
it('ignores jobs whose name is not PAGE_HISTORY (no page lookup)', async () => {
await proc.process(buildJob({ name: 'some.other.job' }));
expect(pageRepo.findById).not.toHaveBeenCalled();
});
it('page not found → clearContributors and return (no save)', async () => {
pageRepo.findById.mockResolvedValue(null);
await proc.process(buildJob());
expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID);
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
expect(collabHistory.popContributors).not.toHaveBeenCalled();
});
it('first history + empty content → skip and clear contributors (no save)', async () => {
pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
pageRepo.findById.mockResolvedValue(buildPage({ content: emptyContent }));
await proc.process(buildJob());
expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID);
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
});
it('content unchanged vs last history → no save (isDeepStrictEqual skip)', async () => {
// Last history holds a deep-equal-but-distinct copy of current content.
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
content: JSON.parse(JSON.stringify(filledContent)),
});
await proc.process(buildJob());
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
expect(collabHistory.popContributors).not.toHaveBeenCalled();
});
it('content changed → addPageWatchers + saveHistory + backlinks queue', async () => {
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
content: { type: 'doc', content: [] },
});
await proc.process(buildJob());
expect(collabHistory.popContributors).toHaveBeenCalledWith(PAGE_ID);
expect(watcherService.addPageWatchers).toHaveBeenCalledWith(
['u1', 'u2'],
PAGE_ID,
SPACE_ID,
WORKSPACE_ID,
);
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledWith(
expect.objectContaining({ id: PAGE_ID }),
{ contributorIds: ['u1', 'u2'] },
);
expect(generalQueue.add).toHaveBeenCalledWith(
QueueJob.PAGE_BACKLINKS,
expect.objectContaining({ pageId: PAGE_ID, workspaceId: WORKSPACE_ID }),
);
});
it('first history (lastHistory null) with non-empty content → saves, no PAGE_UPDATED notification', async () => {
// popContributors yields users, but lastHistory?.content is falsy so the
// notification branch (needs a prior version) must be skipped.
pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
await proc.process(buildJob());
expect(pageHistoryRepo.saveHistory).toHaveBeenCalled();
expect(notificationQueue.add).not.toHaveBeenCalled();
});
it('changed content WITH prior history + contributors → queues PAGE_UPDATED notification', async () => {
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
content: { type: 'doc', content: [] },
});
await proc.process(buildJob());
expect(notificationQueue.add).toHaveBeenCalledWith(
QueueJob.PAGE_UPDATED,
expect.objectContaining({
pageId: PAGE_ID,
actorIds: ['u1', 'u2'],
}),
);
});
it('saveHistory throws → contributors RESTORED (addContributors) AND error rethrown', async () => {
// The data-loss guard: if the snapshot save fails after popContributors,
// the popped ids MUST be returned to the pending set, then the error
// propagates so BullMQ retries. Assert BOTH halves.
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
content: { type: 'doc', content: [] },
});
const boom = new Error('db down');
pageHistoryRepo.saveHistory.mockRejectedValue(boom);
await expect(proc.process(buildJob())).rejects.toThrow('db down');
expect(collabHistory.addContributors).toHaveBeenCalledWith(PAGE_ID, [
'u1',
'u2',
]);
});
it('backlinks + notification queue failures are swallowed (history still committed)', async () => {
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
content: { type: 'doc', content: [] },
});
generalQueue.add.mockRejectedValue(new Error('redis backlinks down'));
notificationQueue.add.mockRejectedValue(new Error('redis notif down'));
// The downstream queue failures are caught internally; process resolves.
await expect(proc.process(buildJob())).resolves.toBeUndefined();
expect(pageHistoryRepo.saveHistory).toHaveBeenCalled();
});
});

View File

@@ -228,25 +228,14 @@ export class AiChatController {
}
if (!file) throw new BadRequestException('No audio uploaded');
// Whitelist audio container types produced by browser MediaRecorder
// (Chrome/FF: webm/opus, Safari: mp4) plus common STT-accepted formats.
const allowedMime = new Set([
'audio/webm',
'audio/ogg',
'audio/mp4',
'audio/mpeg',
'audio/wav',
'audio/x-wav',
'audio/wave',
'audio/m4a',
'audio/x-m4a',
]);
// MediaRecorder mimetypes carry parameters (e.g. "audio/webm;codecs=opus");
// compare only the base type.
const baseMime = file.mimetype.split(';')[0].trim().toLowerCase();
if (!allowedMime.has(baseMime)) {
// Resolve + whitelist the upload's container type (MediaRecorder mimetypes
// carry parameters, e.g. "audio/webm;codecs=opus"). A non-whitelisted type
// is rejected; an allowed one yields the STT container-format hint.
const resolved = resolveAudioFormat(file.mimetype);
if (!resolved.ok) {
throw new BadRequestException('Unsupported audio format');
}
const { format } = resolved;
let buf: Buffer;
try {
@@ -259,20 +248,6 @@ export class AiChatController {
}
throw err;
}
// Container hint for JSON-style STT providers (e.g. OpenRouter); multipart
// endpoints ignore it.
const formatMap: Record<string, string> = {
'audio/webm': 'webm',
'audio/ogg': 'ogg',
'audio/mp4': 'mp4',
'audio/mpeg': 'mp3',
'audio/wav': 'wav',
'audio/x-wav': 'wav',
'audio/wave': 'wav',
'audio/m4a': 'm4a',
'audio/x-m4a': 'm4a',
};
const format = formatMap[baseMime] ?? 'webm';
let text: string;
try {
text = await this.aiTranscription.transcribe(workspace.id, buf, format);
@@ -302,3 +277,39 @@ export class AiChatController {
}
}
}
/**
* Whitelist audio container types produced by browser MediaRecorder (Chrome/FF:
* webm/opus, Safari: mp4) plus common STT-accepted formats. The value maps each
* allowed base mime to the container-format hint passed to JSON-style STT
* providers (e.g. OpenRouter); multipart endpoints ignore the hint.
*/
const AUDIO_FORMAT_MAP: Record<string, string> = {
'audio/webm': 'webm',
'audio/ogg': 'ogg',
'audio/mp4': 'mp4',
'audio/mpeg': 'mp3',
'audio/wav': 'wav',
'audio/x-wav': 'wav',
'audio/wave': 'wav',
'audio/m4a': 'm4a',
'audio/x-m4a': 'm4a',
};
/**
* Resolve and whitelist an uploaded clip's mimetype. MediaRecorder mimetypes
* carry parameters (e.g. "audio/webm;codecs=opus"), so the base type is split
* out (lowercased, trimmed) before the whitelist check. Returns ok=false for a
* non-whitelisted container; otherwise the base mime and its STT format hint.
* Pure — the caller throws BadRequestException on !ok.
*/
export function resolveAudioFormat(
mimetype: string,
): { ok: true; baseMime: string; format: string } | { ok: false } {
const baseMime = mimetype.split(';')[0].trim().toLowerCase();
const format = AUDIO_FORMAT_MAP[baseMime];
if (format === undefined) {
return { ok: false };
}
return { ok: true, baseMime, format };
}

View File

@@ -367,6 +367,28 @@ export class McpClientsService {
}
}
/**
* Apply the SSRF connect-time rule to a set of DNS-resolved addresses: block if
* ANY resolved address is disallowed by `isIpAllowed`, and block an EMPTY set
* (nothing safe to connect to). Only an all-public, non-empty set is allowed.
*
* This is the connect-time half of the DNS-rebinding defense: the dispatcher's
* lookup hands net/tls.connect ONLY a set that passed this check, so the kernel
* can never connect to an address that did not pass the guard. Pure — no I/O.
*/
export function validateResolvedAddresses(
addrs: readonly LookupAddress[],
): { ok: boolean; blockedHost?: string } {
if (addrs.length === 0) {
return { ok: false };
}
const blocked = addrs.find((a) => !isIpAllowed(a.address).ok);
if (blocked) {
return { ok: false, blockedHost: blocked.address };
}
return { ok: true };
}
/**
* Build the SSRF-pinned undici dispatcher. Its custom connect.lookup resolves
* the host, validates EVERY resolved address with the same ssrf-guard, and
@@ -388,22 +410,15 @@ function buildPinnedDispatcher(): Agent {
return;
}
const addrs = addresses as LookupAddress[];
if (addrs.length === 0) {
callback(
new Error(`No address resolved for ${hostname}`),
'',
0,
);
return;
}
const blocked = addrs.find((a) => !isIpAllowed(a.address).ok);
if (blocked) {
const verdict = validateResolvedAddresses(addrs);
if (!verdict.ok) {
// Refuse the connection: net/tls.connect never sees this address.
callback(
new Error(`Blocked address for ${hostname}`),
'',
0,
);
// An empty set is treated as blocked (nothing safe to connect to).
const reason =
addrs.length === 0
? `No address resolved for ${hostname}`
: `Blocked address for ${hostname}`;
callback(new Error(reason), '', 0);
return;
}
// undici/net invoke this lookup with `all: true`, so the callback

View File

@@ -0,0 +1,136 @@
import { type Tool } from 'ai';
import { McpClientsService } from './mcp-clients.service';
/**
* Tool-name namespacing / collision tests.
*
* REACHABILITY NOTE: the helpers `namespace` / `sanitizeName` / `capName` /
* `disambiguate` are module-private (not exported) and `mergeNamespaced` is a
* PRIVATE method. The smallest reachable public path that exercises all of them
* is `toolsFor()` -> getOrBuildEntry -> buildEntry -> connect/tools() ->
* mergeNamespaced. We drive that path: stub the repo's `listEnabled` to return
* fake servers and spy on the private `connect` to return fake MCP clients whose
* `tools()` we control. We then inspect the merged tool KEYS on the returned
* toolset — the observable result of namespacing.
*
* What we assert (all SECURITY/correctness-relevant):
* - two servers each exposing a tool `search` -> BOTH survive under distinct
* namespaced keys (no silent overwrite);
* - a tool name with spaces/unicode -> sanitized to ^[a-zA-Z0-9_-]+;
* - an over-long name -> capped to the provider limit (<= 64);
* - duplicate names WITHIN one server (collide after sanitize/truncate) ->
* disambiguated, so the second is not overwritten.
*/
const MAX_TOOL_NAME_LENGTH = 64;
function fakeTool(): Tool {
return { description: 'x', inputSchema: undefined } as unknown as Tool;
}
interface FakeServer {
id: string;
name: string;
transport: string;
url: string;
headersEnc: string | null;
toolAllowlist: string[] | null;
}
function server(over: Partial<FakeServer> & { id: string; name: string }): FakeServer {
return {
transport: 'http',
url: 'https://example.com/mcp',
headersEnc: null,
toolAllowlist: null,
...over,
};
}
/**
* Build a service whose repo returns `servers` and whose `connect` returns a
* fake client exposing `toolsByServerId[server.id]` from tools(). Returns the
* merged keys produced by toolsFor.
*/
async function mergedKeysFor(
servers: FakeServer[],
toolsByServerId: Record<string, Record<string, Tool>>,
): Promise<string[]> {
const repoStub = {
listEnabled: jest.fn().mockResolvedValue(servers),
};
const service = new McpClientsService(repoStub as never, {} as never);
// Map each connect() call (by server identity) to a fake client. connect is
// private; spy on it via a typed any-cast.
jest
.spyOn(service as unknown as { connect: (s: FakeServer) => unknown }, 'connect')
.mockImplementation((s: FakeServer) =>
Promise.resolve({
tools: () => Promise.resolve(toolsByServerId[s.id] ?? {}),
close: () => Promise.resolve(),
}),
);
const toolset = await service.toolsFor('ws-1');
// Release the lease so the service does not hold the fake clients open.
await Promise.all(toolset.clients.map((c) => c.close()));
return Object.keys(toolset.tools);
}
describe('external MCP tool-name namespacing (via toolsFor)', () => {
afterEach(() => jest.restoreAllMocks());
it('keeps tools from two servers that both expose `search` (no overwrite)', async () => {
const keys = await mergedKeysFor(
[
server({ id: 'id-alpha', name: 'alpha' }),
server({ id: 'id-beta', name: 'beta' }),
],
{
'id-alpha': { search: fakeTool() },
'id-beta': { search: fakeTool() },
},
);
// Two distinct keys survive -> no silent overwrite.
expect(keys).toHaveLength(2);
expect(new Set(keys).size).toBe(2);
// The server name is prefixed onto each tool.
expect(keys).toContain('alpha_search');
expect(keys.some((k) => k !== 'alpha_search')).toBe(true);
});
it('sanitizes spaces/unicode in names to the allowed charset', async () => {
const keys = await mergedKeysFor(
[server({ id: 'id-1', name: 'My Server!' })],
{ 'id-1': { 'search the wiki ✨': fakeTool() } },
);
expect(keys).toHaveLength(1);
// Only ^[a-zA-Z0-9_-]+ characters remain (no spaces, no unicode).
expect(keys[0]).toMatch(/^[a-zA-Z0-9_-]+$/);
});
it('caps an over-long name to the provider length limit', async () => {
const longName = 'a'.repeat(200);
const keys = await mergedKeysFor(
[server({ id: 'id-1', name: 'svr' })],
{ 'id-1': { [longName]: fakeTool() } },
);
expect(keys).toHaveLength(1);
expect(keys[0].length).toBeLessThanOrEqual(MAX_TOOL_NAME_LENGTH);
});
it('disambiguates two names that collide after sanitize/truncate within one server', async () => {
// Both names sanitize to the same value ("a_b") -> the second must be
// suffix-disambiguated, not overwritten.
const keys = await mergedKeysFor(
[server({ id: 'id-1', name: 'svr' })],
{ 'id-1': { 'a b': fakeTool(), 'a@b': fakeTool() } },
);
expect(keys).toHaveLength(2);
expect(new Set(keys).size).toBe(2);
});
});

View File

@@ -0,0 +1,85 @@
import { McpServersService } from './mcp-servers.service';
import { AiMcpServer } from '@docmost/db/types/entity.types';
/**
* Encrypted-header leak guard for the admin-facing view (§8.10): `toView` is
* private, so we drive it through the public `list()` (which maps every row
* with toView). The contract: a row with `headersEnc` set surfaces ONLY
* `hasHeaders:true` and NEVER the `headersEnc` blob; a row without headers
* surfaces `hasHeaders:false`. The blob must never reach an admin response.
*/
function row(overrides: Partial<AiMcpServer>): AiMcpServer {
return {
id: 'srv-1',
name: 'Tavily',
transport: 'http',
url: 'https://example.com/mcp',
enabled: true,
toolAllowlist: null,
headersEnc: null,
...overrides,
} as unknown as AiMcpServer;
}
describe('McpServersService.toView (via list) — encrypted-header leak guard', () => {
function buildService(rows: AiMcpServer[]): McpServersService {
const repoStub = {
listByWorkspace: jest.fn().mockResolvedValue(rows),
};
// secretBox + clients are unused by the list/toView path; pass stubs to
// satisfy the constructor.
return new McpServersService(
repoStub as never,
{} as never,
{} as never,
);
}
it('exposes hasHeaders:true and NO headersEnc when auth headers are set', async () => {
const service = buildService([
row({ headersEnc: 'ENCRYPTED-SECRET-BLOB' }),
]);
const [view] = await service.list('ws-1');
expect(view.hasHeaders).toBe(true);
// The encrypted blob must NEVER appear in the view, under any key.
expect('headersEnc' in view).toBe(false);
expect(Object.values(view)).not.toContain('ENCRYPTED-SECRET-BLOB');
});
it('exposes hasHeaders:false when no auth headers are set', async () => {
const service = buildService([row({ headersEnc: null })]);
const [view] = await service.list('ws-1');
expect(view.hasHeaders).toBe(false);
expect('headersEnc' in view).toBe(false);
});
it('projects only the public fields', async () => {
const service = buildService([
row({
id: 'srv-9',
name: 'My MCP',
transport: 'sse',
url: 'https://mcp.example.com/',
enabled: false,
toolAllowlist: ['search'],
headersEnc: 'BLOB',
}),
]);
const [view] = await service.list('ws-1');
expect(view).toEqual({
id: 'srv-9',
name: 'My MCP',
transport: 'sse',
url: 'https://mcp.example.com/',
enabled: false,
toolAllowlist: ['search'],
hasHeaders: true,
});
});
});

View File

@@ -0,0 +1,67 @@
import { type LookupAddress } from 'node:dns';
import { validateResolvedAddresses } from './mcp-clients.service';
/**
* Unit tests for validateResolvedAddresses — the connect-time half of the SSRF
* DNS-rebinding defense. It applies the REAL `isIpAllowed` rule (imported
* transitively via the service) and must block if ANY resolved address is
* disallowed, treat an EMPTY set as blocked, and unwrap IPv4-mapped IPv6.
*
* These tests intentionally use real public/private literals (no DNS, no mock)
* so they exercise the actual ssrf-guard classification.
*/
function addr(address: string, family = 4): LookupAddress {
return { address, family };
}
describe('validateResolvedAddresses', () => {
it('allows an all-public set', () => {
const res = validateResolvedAddresses([
addr('8.8.8.8'),
addr('1.1.1.1'),
addr('2001:4860:4860::8888', 6),
]);
expect(res.ok).toBe(true);
});
it('blocks when ONE address among many is private (any-private-blocks)', () => {
const res = validateResolvedAddresses([
addr('8.8.8.8'),
addr('1.1.1.1'),
addr('10.0.0.5'), // private 10/8 hidden among public addresses
addr('1.0.0.1'),
]);
expect(res.ok).toBe(false);
expect(res.blockedHost).toBe('10.0.0.5');
});
it('blocks an empty set (nothing safe to connect to)', () => {
expect(validateResolvedAddresses([]).ok).toBe(false);
});
it('blocks an IPv4-mapped IPv6 private address', () => {
const res = validateResolvedAddresses([addr('::ffff:10.0.0.1', 6)]);
expect(res.ok).toBe(false);
});
it('blocks the cloud metadata link-local address', () => {
const res = validateResolvedAddresses([
addr('8.8.8.8'),
addr('169.254.169.254'),
]);
expect(res.ok).toBe(false);
});
/**
* Regression sentinel: if the "any private blocks" rule were weakened to
* "all private blocks" / "first address wins", this mixed set (public first,
* private second) would wrongly pass. The assertion below FAILS in that case.
*/
it('FAILS if the any-private rule is weakened (sentinel)', () => {
const res = validateResolvedAddresses([
addr('8.8.8.8'), // public first
addr('192.168.1.1'), // private second — must still block the whole set
]);
expect(res.ok).toBe(false);
});
});

View File

@@ -0,0 +1,53 @@
import { resolveAudioFormat } from './ai-chat.controller';
/**
* Unit tests for resolveAudioFormat — the transcribe-endpoint mime whitelist.
* It splits the base mime off any MediaRecorder parameters, lowercases/trims it,
* checks it against the whitelist, and maps it to the STT container-format hint.
* A non-whitelisted container yields { ok: false } (the controller then throws
* BadRequestException).
*/
describe('resolveAudioFormat', () => {
it('strips MediaRecorder parameters to the base mime (audio/webm;codecs=opus)', () => {
const res = resolveAudioFormat('audio/webm;codecs=opus');
expect(res).toEqual({ ok: true, baseMime: 'audio/webm', format: 'webm' });
});
it('normalizes uppercase / surrounding whitespace', () => {
const res = resolveAudioFormat(' AUDIO/MP4 ; codecs=mp4a ');
expect(res).toEqual({ ok: true, baseMime: 'audio/mp4', format: 'mp4' });
});
it('handles the Safari/iOS audio/x-m4a container', () => {
expect(resolveAudioFormat('audio/x-m4a')).toEqual({
ok: true,
baseMime: 'audio/x-m4a',
format: 'm4a',
});
});
it('rejects a disallowed container (audio/aiff)', () => {
expect(resolveAudioFormat('audio/aiff')).toEqual({ ok: false });
});
it('maps every whitelisted container to its STT format hint', () => {
const cases: Array<[string, string]> = [
['audio/webm', 'webm'],
['audio/ogg', 'ogg'],
['audio/mp4', 'mp4'],
['audio/mpeg', 'mp3'],
['audio/wav', 'wav'],
['audio/x-wav', 'wav'],
['audio/wave', 'wav'],
['audio/m4a', 'm4a'],
['audio/x-m4a', 'm4a'],
];
for (const [mime, format] of cases) {
expect(resolveAudioFormat(mime)).toEqual({
ok: true,
baseMime: mime,
format,
});
}
});
});

View File

@@ -199,21 +199,8 @@ export class AiChatToolsService {
const accessibleSet = new Set(accessibleIds);
// Keep the best (first — hits are ordered by fused score desc) chunk
// per page, capped to `cap`.
const seen = new Set<string>();
const results: { id: string; title: string; snippet: string }[] = [];
for (const hit of hits) {
if (!accessibleSet.has(hit.pageId)) continue;
if (seen.has(hit.pageId)) continue;
seen.add(hit.pageId);
results.push({
id: hit.pageId,
title: hit.title ?? '',
snippet: snippet(hit.content),
});
if (results.length >= cap) break;
}
return results;
// per page, dropping any page the user cannot access, capped to `cap`.
return selectAccessibleHits(hits, accessibleSet, cap);
},
}),
@@ -960,6 +947,44 @@ export class AiChatToolsService {
}
}
/** A single hybrid-search hit: the minimal shape selectAccessibleHits needs. */
export interface SearchHitLike {
pageId: string;
title: string | null;
content: string;
}
/**
* Post-filter hybrid-search hits into the agent-facing result list. This is the
* CASL leak guard for the in-process hybrid search: the hits come from a direct
* pgvector + full-text query that does NOT get CASL for free, so an accessible
* SPACE does not imply every page in it is accessible (restricted pages).
*
* Given `hits` (ordered by fused score desc), the `accessibleSet` of page ids
* the user may read, and `cap`, it keeps the BEST (first) chunk per page, drops
* any page not in `accessibleSet`, and caps the output at `cap`. Pure — no I/O.
*/
export function selectAccessibleHits(
hits: readonly SearchHitLike[],
accessibleSet: Set<string>,
cap: number,
): { id: string; title: string; snippet: string }[] {
const seen = new Set<string>();
const results: { id: string; title: string; snippet: string }[] = [];
for (const hit of hits) {
if (!accessibleSet.has(hit.pageId)) continue;
if (seen.has(hit.pageId)) continue;
seen.add(hit.pageId);
results.push({
id: hit.pageId,
title: hit.title ?? '',
snippet: snippet(hit.content),
});
if (results.length >= cap) break;
}
return results;
}
/**
* Trim a search highlight/snippet to a token-efficient length. The highlight
* may contain `<b>` markers from the search backend; they are harmless to the

View File

@@ -0,0 +1,96 @@
import {
selectAccessibleHits,
type SearchHitLike,
} from './ai-chat-tools.service';
/**
* Unit tests for selectAccessibleHits — the CASL leak guard for the in-process
* hybrid search. The hybrid query runs over pgvector + full-text WITHOUT CASL,
* so this post-filter is the ONLY thing that drops pages the user cannot read.
*
* Core invariant: a hit on a page that is NOT in `accessibleSet` is dropped,
* even when that page lives in an otherwise-accessible space. Plus: only the
* best chunk per page survives (dedupe), results are capped, and an empty
* accessibleSet yields nothing.
*/
function hit(pageId: string, title: string | null, content: string): SearchHitLike {
return { pageId, title, content };
}
describe('selectAccessibleHits', () => {
it('drops a hit on a page NOT in accessibleSet (the core leak guard)', () => {
const hits = [
hit('public-page', 'Public', 'visible body'),
// restricted-page is in an accessible space but NOT page-accessible.
hit('restricted-page', 'Secret', 'leaked body'),
];
const accessibleSet = new Set(['public-page']);
const out = selectAccessibleHits(hits, accessibleSet, 10);
expect(out).toEqual([
{ id: 'public-page', title: 'Public', snippet: 'visible body' },
]);
// The restricted page must NEVER appear in the output.
expect(out.some((r) => r.id === 'restricted-page')).toBe(false);
});
it('keeps only the best (first) chunk per page when a page has duplicates', () => {
const hits = [
hit('p1', 'Page One', 'best chunk'),
hit('p1', 'Page One', 'lower-ranked chunk'),
hit('p2', 'Page Two', 'p2 chunk'),
];
const accessibleSet = new Set(['p1', 'p2']);
const out = selectAccessibleHits(hits, accessibleSet, 10);
expect(out).toEqual([
{ id: 'p1', title: 'Page One', snippet: 'best chunk' },
{ id: 'p2', title: 'Page Two', snippet: 'p2 chunk' },
]);
});
it('caps the number of results at `cap`', () => {
const hits = [
hit('p1', 't1', 'c1'),
hit('p2', 't2', 'c2'),
hit('p3', 't3', 'c3'),
hit('p4', 't4', 'c4'),
];
const accessibleSet = new Set(['p1', 'p2', 'p3', 'p4']);
const out = selectAccessibleHits(hits, accessibleSet, 2);
expect(out).toHaveLength(2);
expect(out.map((r) => r.id)).toEqual(['p1', 'p2']);
});
it('returns an empty list when accessibleSet is empty', () => {
const hits = [hit('p1', 't1', 'c1'), hit('p2', 't2', 'c2')];
expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]);
});
it('defaults a null title to an empty string', () => {
const out = selectAccessibleHits(
[hit('p1', null, 'body')],
new Set(['p1']),
10,
);
expect(out).toEqual([{ id: 'p1', title: '', snippet: 'body' }]);
});
/**
* Regression sentinel for the leak guard: if the access intersection
* (`accessibleSet.has(hit.pageId)` filter) were removed, the restricted page
* would slip into the output and THIS assertion would fail. Documents that
* the filter — not the dedupe/cap — is what enforces page-level access.
*/
it('FAILS if the access intersection is removed (sentinel)', () => {
const hits = [hit('restricted', 'Secret', 'leaked')];
// Page is NOT accessible -> output MUST be empty. Without the intersection
// check the function would return the restricted hit and break this test.
expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]);
});
});

View File

@@ -0,0 +1,176 @@
import { ShareService } from './share.service';
// Exercises the REAL ShareService comment-mark stripping for shared content via
// the smallest reachable seam: updatePublicAttachments -> prepareContentForShare
// -> removeMarkTypeFromDoc(doc, 'comment'). This is a documented threat-model
// item: `comment` marks are internal-team metadata (existence, location, count,
// resolved state, and the comment ids themselves) and MUST NOT leak to anonymous
// public-share viewers.
//
// prepareContentForShare is private and the page-load path (getSharedPage) needs
// a full DB-backed resolveReadableSharePage; updatePublicAttachments is the
// smallest public seam that runs the exact same sanitization on a doc we control.
// Only the workspace toggle (workspaceRepo.findById) and token service are
// touched, both mocked — no DB setup required.
const WS = 'ws-1';
const PAGE = 'page-1';
function buildService() {
const shareRepo = { findById: jest.fn() };
const pageRepo = { findById: jest.fn() };
const pagePermissionRepo = {
hasRestrictedAncestor: jest.fn(async () => false),
};
const tokenService = {
generateAttachmentToken: jest.fn(async () => 'tok'),
};
// htmlEmbed toggle ON so the embed strip is a no-op and we isolate the
// comment-mark strip behaviour.
const workspaceRepo = {
findById: jest.fn(async () => ({ id: WS, settings: { htmlEmbed: true } })),
};
return new ShareService(
shareRepo as any,
pageRepo as any,
pagePermissionRepo as any,
{} as any, // db (unused on this path)
tokenService as any,
{} as any, // transclusionService (unused)
workspaceRepo as any,
);
}
// A paragraph whose text carries a `comment` mark with a comment id.
function commentedText(text: string, commentId: string) {
return {
type: 'text',
text,
marks: [{ type: 'comment', attrs: { commentId, resolved: false } }],
};
}
async function sanitize(content: any) {
const service = buildService();
return service.updatePublicAttachments({
id: PAGE,
workspaceId: WS,
content,
} as any);
}
function countCommentMarks(doc: any): number {
let count = 0;
const walk = (node: any) => {
if (!node || typeof node !== 'object') return;
if (Array.isArray(node.marks)) {
for (const mark of node.marks) {
if (mark?.type === 'comment') count++;
}
}
if (Array.isArray(node.content)) node.content.forEach(walk);
};
walk(doc);
return count;
}
describe('ShareService comment-mark stripping for public shares (real code)', () => {
it('strips a top-level comment mark and preserves the visible text', async () => {
const content = {
type: 'doc',
content: [
{
type: 'paragraph',
content: [commentedText('secret-reviewed body', 'cmt-top-1')],
},
],
};
const out = await sanitize(content);
expect(countCommentMarks(out)).toBe(0);
// The text itself survives; only the internal mark is removed.
expect(JSON.stringify(out)).toContain('secret-reviewed body');
// The comment id must not appear anywhere in the serialized output.
expect(JSON.stringify(out)).not.toContain('cmt-top-1');
});
it('strips comment marks nested inside columns and callouts', async () => {
const content = {
type: 'doc',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{
type: 'paragraph',
content: [commentedText('col body', 'cmt-col-1')],
},
],
},
{
type: 'column',
content: [
{
type: 'callout',
content: [
{
type: 'paragraph',
content: [commentedText('callout body', 'cmt-callout-1')],
},
],
},
],
},
],
},
],
};
const out = await sanitize(content);
expect(countCommentMarks(out)).toBe(0);
const serialized = JSON.stringify(out);
// Visible content of both nested branches survives.
expect(serialized).toContain('col body');
expect(serialized).toContain('callout body');
// No nested comment id leaks.
expect(serialized).not.toContain('cmt-col-1');
expect(serialized).not.toContain('cmt-callout-1');
});
it('strips every comment mark when multiple coexist (count goes to zero)', async () => {
const content = {
type: 'doc',
content: [
{
type: 'paragraph',
content: [
commentedText('a', 'cmt-a'),
{ type: 'text', text: ' plain ' },
commentedText('b', 'cmt-b'),
],
},
{
type: 'paragraph',
content: [commentedText('c', 'cmt-c')],
},
],
};
// Sanity: the input genuinely has 3 comment marks before sanitization.
expect(countCommentMarks(content)).toBe(3);
const out = await sanitize(content);
expect(countCommentMarks(out)).toBe(0);
const serialized = JSON.stringify(out);
for (const id of ['cmt-a', 'cmt-b', 'cmt-c']) {
expect(serialized).not.toContain(id);
}
});
});

View File

@@ -0,0 +1,41 @@
import { ShareSeoController } from './share-seo.controller';
// Pins ShareSeoController.extractPageSlugId — the slug→pageId resolver used to
// look up a shared page from the public URL. A full UUID must pass through
// untouched; a "title-slug-<id>" must yield the trailing token; a single token
// is returned as-is; falsy input yields undefined. The method does not touch
// `this`, so the controller can be constructed with null collaborators.
function buildController(): ShareSeoController {
return new ShareSeoController(null as any, null as any, null as any);
}
describe('ShareSeoController.extractPageSlugId', () => {
const controller = buildController();
it('returns a full UUID unchanged', () => {
const uuid = '550e8400-e29b-41d4-a716-446655440000';
expect(controller.extractPageSlugId(uuid)).toBe(uuid);
});
it('returns the trailing token of a title-slug-id form', () => {
expect(controller.extractPageSlugId('my-page-title-abc123')).toBe('abc123');
});
it('returns a single token (no hyphen) as-is', () => {
expect(controller.extractPageSlugId('abc123')).toBe('abc123');
});
it('returns the last segment for a two-token slug', () => {
expect(controller.extractPageSlugId('hello-world')).toBe('world');
});
it('returns undefined for an empty string (falsy guard)', () => {
expect(controller.extractPageSlugId('')).toBeUndefined();
});
it('returns undefined for null/undefined input', () => {
expect(controller.extractPageSlugId(undefined as any)).toBeUndefined();
expect(controller.extractPageSlugId(null as any)).toBeUndefined();
});
});

View File

@@ -7,8 +7,8 @@ import { validate as isValidUUID } from 'uuid';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { EnvironmentService } from '../../integrations/environment/environment.service';
import { Workspace } from '@docmost/db/types/entity.types';
import { htmlEscape } from '../../common/helpers/html-escaper';
import { injectTrackerHead } from './inject-tracker-head.util';
import { buildShareMetaHtml } from './share-seo.util';
@Controller('share')
export class ShareSeoController {
@@ -72,24 +72,11 @@ export class ShareSeoController {
return this.sendIndex(indexFilePath, res);
}
const rawTitle = htmlEscape(share?.sharedPage.title ?? 'untitled');
const metaTitle =
rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}` : rawTitle;
const metaTagVar = '<!--meta-tags-->';
const metaTags = [
`<meta property="og:title" content="${metaTitle}" />`,
`<meta property="twitter:title" content="${metaTitle}" />`,
!share.searchIndexing ? `<meta name="robots" content="noindex" />` : '',
]
.filter(Boolean)
.join('\n ');
const html = fs.readFileSync(indexFilePath, 'utf8');
let transformedHtml = html
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
.replace(metaTagVar, metaTags);
let transformedHtml = buildShareMetaHtml(html, {
title: share?.sharedPage.title,
searchIndexing: share.searchIndexing,
});
// Deliberate same-origin tracker surface: this is the ONE place where an
// admin-authored analytics/tracker snippet (settings.trackerHead) is

View File

@@ -0,0 +1,126 @@
import { buildShareMetaHtml } from './share-seo.util';
// Pins the SEO meta-HTML builder for public share pages (extracted verbatim from
// ShareSeoController.getShare). The shared page title is attacker-influenceable,
// so the security-critical invariant is that it is htmlEscape'd before being
// interpolated into BOTH the <title> element and the content="..." attributes of
// the og:/twitter: meta tags. The XSS tests below MUST fail if the htmlEscape
// step is ever removed.
// A minimal index.html shell carrying the two placeholders the builder rewrites:
// the <title> element and the <!--meta-tags--> marker.
const INDEX =
'<html><head><title>App</title>\n <!--meta-tags--></head><body>x</body></html>';
describe('buildShareMetaHtml', () => {
describe('XSS: title escaping', () => {
it('fully htmlEscapes a </title><script> breakout in BOTH <title> and og:/twitter: meta', () => {
const out = buildShareMetaHtml(INDEX, {
title: '</title><script>alert(1)</script>',
searchIndexing: true,
});
// The raw script tag must NEVER appear anywhere in the output — it would
// execute in the share origin. This assertion fails if htmlEscape is removed.
expect(out).not.toContain('<script>');
expect(out).not.toContain('</title><script>');
// The dangerous chars are escaped to entities instead.
expect(out).toContain('&lt;script&gt;alert(1)&lt;/script&gt;');
// og:title and twitter:title both carry the escaped (not raw) value.
expect(out).toContain(
'<meta property="og:title" content="&lt;/title&gt;&lt;script&gt;alert(1)&lt;/script&gt;" />',
);
expect(out).toContain(
'<meta property="twitter:title" content="&lt;/title&gt;&lt;script&gt;alert(1)&lt;/script&gt;" />',
);
});
it('escapes a double quote to &quot; so the content="..." attribute cannot be broken', () => {
const out = buildShareMetaHtml(INDEX, {
title: 'a"onmouseover="alert(1)',
searchIndexing: true,
});
// A raw `"` would close the content attribute and inject a new attribute.
expect(out).not.toContain('content="a"onmouseover=');
expect(out).toContain('&quot;');
expect(out).toContain(
'<meta property="og:title" content="a&quot;onmouseover=&quot;alert(1)" />',
);
});
});
describe('title truncation (limit 80, applied AFTER escaping)', () => {
it('leaves a title of exactly 80 chars untouched (no ellipsis)', () => {
const title = 'a'.repeat(80);
const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
expect(out).toContain(`<title>${title}</title>`);
expect(out).not.toContain('…');
});
it('truncates a >80 char title to 77 chars + an ellipsis (78 total)', () => {
const title = 'b'.repeat(100);
const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
const expected = `${'b'.repeat(77)}`;
expect(out).toContain(`<title>${expected}</title>`);
// 77 visible chars + the single ellipsis glyph.
expect(expected.length).toBe(78);
expect(out).toContain(
`<meta property="og:title" content="${expected}" />`,
);
});
it('truncation acts on the ESCAPED string: each < becomes &lt; first, then slice(0,77)', () => {
// 100 "<" chars escape to 100 * "&lt;" = 400 chars, then truncate to 77 + …
const title = '<'.repeat(100);
const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
const escaped = '&lt;'.repeat(100);
const expected = `${escaped.slice(0, 77)}`;
expect(out).toContain(`<title>${expected}</title>`);
// No raw "<" from the title leaks through.
expect(out).not.toContain('<<');
});
});
describe('robots noindex meta', () => {
it('searchIndexing=false emits <meta name="robots" content="noindex">', () => {
const out = buildShareMetaHtml(INDEX, {
title: 'page',
searchIndexing: false,
});
expect(out).toContain('<meta name="robots" content="noindex" />');
});
it('searchIndexing=true emits NO robots tag', () => {
const out = buildShareMetaHtml(INDEX, {
title: 'page',
searchIndexing: true,
});
expect(out).not.toContain('robots');
expect(out).not.toContain('noindex');
});
});
describe('null / missing title fallback', () => {
it('falls back to "untitled" when title is null', () => {
const out = buildShareMetaHtml(INDEX, {
title: null as unknown as string,
searchIndexing: true,
});
expect(out).toContain('<title>untitled</title>');
expect(out).toContain('<meta property="og:title" content="untitled" />');
});
});
describe('placeholder replacement', () => {
it('replaces the original <title> and the <!--meta-tags--> marker', () => {
const out = buildShareMetaHtml(INDEX, {
title: 'Hello',
searchIndexing: true,
});
expect(out).not.toContain('<!--meta-tags-->');
expect(out).not.toContain('<title>App</title>');
expect(out).toContain('<title>Hello</title>');
});
});
});

View File

@@ -0,0 +1,40 @@
import { htmlEscape } from '../../common/helpers/html-escaper';
/**
* Build the SEO-enriched index HTML for a publicly shared page.
*
* This is the pure, side-effect-free core of ShareSeoController.getShare: given
* the raw index.html and the share's title + searchIndexing flag, it returns the
* transformed HTML with the <title> replaced and the og:/twitter:/robots meta
* tags injected at the <!--meta-tags--> marker.
*
* SECURITY: the title is attacker-influenceable (it is the shared page title),
* so it MUST be htmlEscape'd before being interpolated into both the <title>
* element and the content="..." attributes of the meta tags. Removing the
* escaping would allow a page title to break out of the attribute / element and
* inject markup into the share origin.
*/
export function buildShareMetaHtml(
indexHtml: string,
opts: { title: string | null; searchIndexing: boolean },
): string {
// Escape FIRST, then truncate, so the truncation acts on the safe string and
// can never split a multi-char HTML entity (matches the original controller).
const rawTitle = htmlEscape(opts.title ?? 'untitled');
const metaTitle =
rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}` : rawTitle;
const metaTagVar = '<!--meta-tags-->';
const metaTags = [
`<meta property="og:title" content="${metaTitle}" />`,
`<meta property="twitter:title" content="${metaTitle}" />`,
!opts.searchIndexing ? `<meta name="robots" content="noindex" />` : '',
]
.filter(Boolean)
.join('\n ');
return indexHtml
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
.replace(metaTagVar, metaTags);
}

View File

@@ -0,0 +1,62 @@
import { updateAttachmentAttr } from './share.util';
// Pins updateAttachmentAttr — the per-attachment URL rewriter used when serving
// shared page content. Internal attachment paths (/files… and /api/files…) must
// be rewritten to the public form with a scoped jwt appended; anything else
// (external URLs, null) must be left untouched so a public viewer's signed token
// is never attached to a foreign origin. The function only reads/writes
// node.attrs[attr], so a plain object stands in for the real ProseMirror Node.
function fakeNode(attrs: Record<string, any>) {
return { attrs } as any;
}
const JWT = 'TOK';
describe('updateAttachmentAttr', () => {
it('rewrites a /files path to /files/public/ with ?jwt=', () => {
const node = fakeNode({ src: '/files/x.png' });
updateAttachmentAttr(node, 'src', JWT);
expect(node.attrs.src).toBe(`/files/public/x.png?jwt=${JWT}`);
});
it('rewrites an /api/files path (keeps the /api prefix, inserts public)', () => {
const node = fakeNode({ src: '/api/files/y.png' });
updateAttachmentAttr(node, 'src', JWT);
expect(node.attrs.src).toBe(`/api/files/public/y.png?jwt=${JWT}`);
});
it('uses &jwt= when the src already carries a query string', () => {
const node = fakeNode({ src: '/files/x.png?w=100' });
updateAttachmentAttr(node, 'src', JWT);
expect(node.attrs.src).toBe(`/files/public/x.png?w=100&jwt=${JWT}`);
});
it('leaves an external https URL untouched (no token leak to a foreign origin)', () => {
const external = 'https://example.com/x.png';
const node = fakeNode({ src: external });
updateAttachmentAttr(node, 'src', JWT);
expect(node.attrs.src).toBe(external);
});
it('leaves a null src untouched', () => {
const node = fakeNode({ src: null });
updateAttachmentAttr(node, 'src', JWT);
expect(node.attrs.src).toBeNull();
});
it('rewrites the `url` attr variant the same way', () => {
const node = fakeNode({ url: '/files/doc.pdf' });
updateAttachmentAttr(node, 'url', JWT);
expect(node.attrs.url).toBe(`/files/public/doc.pdf?jwt=${JWT}`);
});
it('only touches the requested attr, leaving the other attr alone', () => {
const external = 'https://cdn.example.com/a.png';
const node = fakeNode({ src: '/files/a.png', url: external });
updateAttachmentAttr(node, 'src', JWT);
expect(node.attrs.src).toBe(`/files/public/a.png?jwt=${JWT}`);
// `url` was not requested, so it is unchanged.
expect(node.attrs.url).toBe(external);
});
});