test(refactor-tail): extract pure cores + cover collab/share/ai-chat/client gate
Batches 6-9: behaviour-preserving extractions of testable pure cores plus the tests they unblock, and a fix for the broken client test environment. Full suites green: server 113 suites / 1117 + 1 todo, client 30 files / 338. client (R0 infra): - vitest.setup.ts: in-memory localStorage/sessionStorage Storage stub wired via setupFiles. Unblocks menu-items.gating.test.ts (was 9 failing) -> client suite fully green. + menu-items.suggestions.test.ts (getSuggestionItems filter/sort). share: - extract buildShareMetaHtml (share-seo.util.ts) from the SEO controller; tests for reflected-XSS escaping in <title>/og/twitter meta, noindex, truncation; extractPageSlugId; updateAttachmentAttr; prepareContentForShare comment-strip (anonymous-viewer metadata-leak guard). ai-chat (security extractions): - selectAccessibleHits: CASL post-filter for semantic search (restricted page in an accessible space must NOT leak to the agent). - validateResolvedAddresses: SSRF connect-time guard (block if ANY resolved address is private). - resolveAudioFormat: mime whitelist (dead `?? 'webm'` fallback dropped, set unchanged). + mcp-servers toView header-leak guard, MCP tool namespacing. collaboration (data-loss area): - extract computeHistoryJob (pins the "agent delay MUST stay 0" invariant) and resolveSource. Integration: onAuthenticate read-only matrix (collab auth bypass), HistoryProcessor (contributor restore on save failure), onStoreDocument Approach-A boundary snapshot (human revision pinned before agent overwrite). Reviewed (APPROVE WITH SUGGESTIONS): extractions behaviour-preserving, security tests mutation-resistant. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import { getSuggestionItems } from "./menu-items";
|
||||
|
||||
// Coverage for the filter/sort half of `getSuggestionItems` (distinct from the
|
||||
// HTML-embed gating suite). A slash query is matched against each item three
|
||||
// ways — fuzzy on the title, substring on the description, and substring on the
|
||||
// searchTerms — and matched items are sorted so title-substring hits float to
|
||||
// the top of their group. We also cover `excludeItems`.
|
||||
//
|
||||
// `getSuggestionItems` -> `isHtmlEmbedFeatureEnabled` reads the persisted
|
||||
// `currentUser` localStorage entry, so a working in-memory Storage stub is a
|
||||
// prerequisite (installed by vitest.setup.ts). We persist a `currentUser` with
|
||||
// the HTML-embed toggle OFF (the production default) so the gated "HTML embed"
|
||||
// item never leaks into these non-HTML queries.
|
||||
|
||||
const KEY = "currentUser";
|
||||
|
||||
function flatTitles(groups: ReturnType<typeof getSuggestionItems>): string[] {
|
||||
return Object.values(groups)
|
||||
.flat()
|
||||
.map((item) => item.title);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
// Default workspace state: HTML-embed feature OFF (matches production default).
|
||||
localStorage.setItem(KEY, JSON.stringify({ workspace: { settings: {} } }));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
localStorage.clear();
|
||||
});
|
||||
|
||||
describe("getSuggestionItems — filter and sort", () => {
|
||||
it("fuzzy-matches a title (non-contiguous characters)", () => {
|
||||
// "tdo" is not a substring of "to-do list" but matches fuzzily (t..d..o).
|
||||
const titles = flatTitles(getSuggestionItems({ query: "tdo" }));
|
||||
expect(titles).toContain("To-do list");
|
||||
});
|
||||
|
||||
it("matches via the description when the title does not match", () => {
|
||||
// "numbering" only appears in the description "Create a list with numbering.",
|
||||
// not in the "Numbered list" title nor its searchTerms.
|
||||
const titles = flatTitles(getSuggestionItems({ query: "numbering" }));
|
||||
expect(titles).toContain("Numbered list");
|
||||
});
|
||||
|
||||
it("matches via searchTerms when title and description do not match", () => {
|
||||
// "blockquote" is only present in the "Quote" item's searchTerms.
|
||||
const titles = flatTitles(getSuggestionItems({ query: "blockquote" }));
|
||||
expect(titles).toContain("Quote");
|
||||
});
|
||||
|
||||
it("sorts title-substring matches before non-title (description) matches", () => {
|
||||
// For "page": several titles contain "page" (e.g. "Page break"), while
|
||||
// "Synced block" matches only through its description (".. across pages.").
|
||||
// The sort tie-break must place every title hit ahead of the non-title hit.
|
||||
const titles = flatTitles(getSuggestionItems({ query: "page" }));
|
||||
|
||||
const syncedIndex = titles.indexOf("Synced block");
|
||||
const pageBreakIndex = titles.indexOf("Page break");
|
||||
|
||||
// Sanity: both items survived the filter for this query.
|
||||
expect(syncedIndex).toBeGreaterThanOrEqual(0);
|
||||
expect(pageBreakIndex).toBeGreaterThanOrEqual(0);
|
||||
|
||||
// The title match ("Page break") sorts before the description-only match.
|
||||
expect(pageBreakIndex).toBeLessThan(syncedIndex);
|
||||
});
|
||||
|
||||
it("removes a named item via excludeItems", () => {
|
||||
const withBullet = flatTitles(getSuggestionItems({ query: "list" }));
|
||||
expect(withBullet).toContain("Bullet list");
|
||||
|
||||
const withoutBullet = flatTitles(
|
||||
getSuggestionItems({
|
||||
query: "list",
|
||||
excludeItems: new Set(["Bullet list"]),
|
||||
}),
|
||||
);
|
||||
expect(withoutBullet).not.toContain("Bullet list");
|
||||
// Other "list" matches remain unaffected by the exclusion.
|
||||
expect(withoutBullet).toContain("Numbered list");
|
||||
});
|
||||
});
|
||||
@@ -12,6 +12,6 @@ export default defineConfig({
|
||||
test: {
|
||||
environment: 'jsdom',
|
||||
globals: true,
|
||||
setupFiles: [],
|
||||
setupFiles: ['./vitest.setup.ts'],
|
||||
},
|
||||
});
|
||||
|
||||
51
apps/client/vitest.setup.ts
Normal file
51
apps/client/vitest.setup.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
// Vitest global setup (test-infra only — no production app source).
|
||||
//
|
||||
// Under Node 25 / jsdom 25 / vitest 4 the jsdom `localStorage` exposed on the
|
||||
// global is not a usable Storage: its methods (`setItem`/`getItem`/...) are not
|
||||
// callable, so any code touching `localStorage` throws `... is not a function`.
|
||||
// Production code such as `isHtmlEmbedFeatureEnabled()` reads
|
||||
// `localStorage.getItem("currentUser")`, which made dependent tests fail.
|
||||
//
|
||||
// We install a correct in-memory Storage stub on the global BEFORE tests run so
|
||||
// the Web Storage contract holds: string coercion of keys/values, `null` for
|
||||
// missing keys, working `length`/`key(index)`, and `clear()`.
|
||||
import { vi } from "vitest";
|
||||
|
||||
// Minimal, spec-faithful in-memory implementation of the Web Storage API.
|
||||
function createStorage(): Storage {
|
||||
let store = new Map<string, string>();
|
||||
|
||||
const storage: Storage = {
|
||||
get length(): number {
|
||||
return store.size;
|
||||
},
|
||||
clear(): void {
|
||||
store = new Map<string, string>();
|
||||
},
|
||||
getItem(key: string): string | null {
|
||||
// Missing keys must return `null`, not `undefined`.
|
||||
const value = store.get(String(key));
|
||||
return value === undefined ? null : value;
|
||||
},
|
||||
setItem(key: string, value: string): void {
|
||||
// Web Storage coerces both key and value to strings.
|
||||
store.set(String(key), String(value));
|
||||
},
|
||||
removeItem(key: string): void {
|
||||
store.delete(String(key));
|
||||
},
|
||||
key(index: number): string | null {
|
||||
// Insertion order matches Map iteration order; out-of-range => null.
|
||||
const keys = Array.from(store.keys());
|
||||
return index >= 0 && index < keys.length ? keys[index] : null;
|
||||
},
|
||||
};
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
// Install on the jsdom global. `vi.stubGlobal` also reflects onto `window`
|
||||
// (jsdom shares `globalThis` and `window`), so both `localStorage` and
|
||||
// `window.localStorage` resolve to the same working stub.
|
||||
vi.stubGlobal("localStorage", createStorage());
|
||||
vi.stubGlobal("sessionStorage", createStorage());
|
||||
@@ -0,0 +1,211 @@
|
||||
import {
|
||||
NotFoundException,
|
||||
UnauthorizedException,
|
||||
} from '@nestjs/common';
|
||||
import { AuthenticationExtension } from './authentication.extension';
|
||||
import { SpaceRole } from '../../common/helpers/types/permission';
|
||||
import { JwtType } from '../../core/auth/dto/jwt-payload';
|
||||
|
||||
/**
|
||||
* Unit tests for the collab read-only downgrade matrix in
|
||||
* `AuthenticationExtension.onAuthenticate`. This is a security boundary: a wrong
|
||||
* branch here is either a collab-auth bypass (writer on a page they may only
|
||||
* read) or a denial. We mock every repo and inspect both the thrown exception
|
||||
* type and the `connectionConfig.readOnly` flag the extension mutates.
|
||||
*/
|
||||
|
||||
const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
|
||||
const USER_ID = 'user-1';
|
||||
const WORKSPACE_ID = 'ws-1';
|
||||
const SPACE_ID = 'space-1';
|
||||
|
||||
const buildUser = (overrides: Partial<any> = {}) => ({
|
||||
id: USER_ID,
|
||||
workspaceId: WORKSPACE_ID,
|
||||
deactivatedAt: null,
|
||||
deletedAt: null,
|
||||
name: 'Alice',
|
||||
avatarUrl: null,
|
||||
...overrides,
|
||||
});
|
||||
|
||||
const buildPage = (overrides: Partial<any> = {}) => ({
|
||||
id: PAGE_ID,
|
||||
spaceId: SPACE_ID,
|
||||
workspaceId: WORKSPACE_ID,
|
||||
deletedAt: null,
|
||||
...overrides,
|
||||
});
|
||||
|
||||
// Default jwt payload: a plain human collab token (no agent provenance claims).
|
||||
const buildJwt = (overrides: Partial<any> = {}) => ({
|
||||
sub: USER_ID,
|
||||
workspaceId: WORKSPACE_ID,
|
||||
type: JwtType.COLLAB,
|
||||
...overrides,
|
||||
});
|
||||
|
||||
describe('AuthenticationExtension.onAuthenticate', () => {
|
||||
let ext: AuthenticationExtension;
|
||||
let tokenService: { verifyJwt: jest.Mock };
|
||||
let userRepo: { findById: jest.Mock };
|
||||
let pageRepo: { findById: jest.Mock };
|
||||
let spaceMemberRepo: { getUserSpaceRoles: jest.Mock };
|
||||
let pagePermissionRepo: { canUserEditPage: jest.Mock };
|
||||
|
||||
// Build the hocuspocus onAuthenticate payload. connectionConfig.readOnly
|
||||
// starts false; the extension flips it to true on a read-only downgrade.
|
||||
const buildData = (token = 'tok') => ({
|
||||
documentName: `page.${PAGE_ID}`,
|
||||
token,
|
||||
connectionConfig: { readOnly: false },
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
tokenService = { verifyJwt: jest.fn().mockResolvedValue(buildJwt()) };
|
||||
userRepo = { findById: jest.fn().mockResolvedValue(buildUser()) };
|
||||
pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) };
|
||||
spaceMemberRepo = {
|
||||
getUserSpaceRoles: jest
|
||||
.fn()
|
||||
.mockResolvedValue([{ userId: USER_ID, role: SpaceRole.WRITER }]),
|
||||
};
|
||||
pagePermissionRepo = {
|
||||
// No page-level restriction by default → defer to space role.
|
||||
canUserEditPage: jest.fn().mockResolvedValue({
|
||||
hasAnyRestriction: false,
|
||||
canAccess: true,
|
||||
canEdit: true,
|
||||
}),
|
||||
};
|
||||
|
||||
ext = new AuthenticationExtension(
|
||||
tokenService as any,
|
||||
userRepo as any,
|
||||
pageRepo as any,
|
||||
spaceMemberRepo as any,
|
||||
pagePermissionRepo as any,
|
||||
);
|
||||
// Silence the extension's logger (it warns/debugs on denial branches).
|
||||
jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined);
|
||||
jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined);
|
||||
});
|
||||
|
||||
it('invalid token → UnauthorizedException (no repo lookups happen)', async () => {
|
||||
tokenService.verifyJwt.mockRejectedValue(new Error('bad sig'));
|
||||
|
||||
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
|
||||
UnauthorizedException,
|
||||
);
|
||||
expect(userRepo.findById).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('user not found → Unauthorized', async () => {
|
||||
userRepo.findById.mockResolvedValue(null);
|
||||
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
|
||||
UnauthorizedException,
|
||||
);
|
||||
});
|
||||
|
||||
it('user disabled (deactivatedAt set) → Unauthorized', async () => {
|
||||
userRepo.findById.mockResolvedValue(
|
||||
buildUser({ deactivatedAt: new Date() }),
|
||||
);
|
||||
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
|
||||
UnauthorizedException,
|
||||
);
|
||||
});
|
||||
|
||||
it('page not found → NotFoundException', async () => {
|
||||
pageRepo.findById.mockResolvedValue(null);
|
||||
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
|
||||
NotFoundException,
|
||||
);
|
||||
});
|
||||
|
||||
it('no space role → Unauthorized', async () => {
|
||||
spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([]);
|
||||
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
|
||||
UnauthorizedException,
|
||||
);
|
||||
});
|
||||
|
||||
it('page-level restriction canAccess=false → Unauthorized (restricted-page denial)', async () => {
|
||||
pagePermissionRepo.canUserEditPage.mockResolvedValue({
|
||||
hasAnyRestriction: true,
|
||||
canAccess: false,
|
||||
canEdit: false,
|
||||
});
|
||||
await expect(ext.onAuthenticate(buildData() as any)).rejects.toThrow(
|
||||
UnauthorizedException,
|
||||
);
|
||||
});
|
||||
|
||||
it('restriction canAccess=true & canEdit=false → readOnly (no restricted-page write)', async () => {
|
||||
pagePermissionRepo.canUserEditPage.mockResolvedValue({
|
||||
hasAnyRestriction: true,
|
||||
canAccess: true,
|
||||
canEdit: false,
|
||||
});
|
||||
const data = buildData();
|
||||
const ctx = await ext.onAuthenticate(data as any);
|
||||
|
||||
expect(data.connectionConfig.readOnly).toBe(true);
|
||||
expect(ctx.actor).toBe('user');
|
||||
});
|
||||
|
||||
it('restriction canAccess=true & canEdit=true → writable (readOnly stays false)', async () => {
|
||||
pagePermissionRepo.canUserEditPage.mockResolvedValue({
|
||||
hasAnyRestriction: true,
|
||||
canAccess: true,
|
||||
canEdit: true,
|
||||
});
|
||||
const data = buildData();
|
||||
await ext.onAuthenticate(data as any);
|
||||
|
||||
expect(data.connectionConfig.readOnly).toBe(false);
|
||||
});
|
||||
|
||||
it('no restriction + space READER → readOnly', async () => {
|
||||
spaceMemberRepo.getUserSpaceRoles.mockResolvedValue([
|
||||
{ userId: USER_ID, role: SpaceRole.READER },
|
||||
]);
|
||||
const data = buildData();
|
||||
await ext.onAuthenticate(data as any);
|
||||
|
||||
expect(data.connectionConfig.readOnly).toBe(true);
|
||||
});
|
||||
|
||||
it('no restriction + space WRITER → writable', async () => {
|
||||
const data = buildData();
|
||||
await ext.onAuthenticate(data as any);
|
||||
expect(data.connectionConfig.readOnly).toBe(false);
|
||||
});
|
||||
|
||||
it('soft-deleted page (deletedAt set) → readOnly even for a WRITER', async () => {
|
||||
// A writer must NOT be able to mutate a page in the trash via collab.
|
||||
pageRepo.findById.mockResolvedValue(buildPage({ deletedAt: new Date() }));
|
||||
const data = buildData();
|
||||
await ext.onAuthenticate(data as any);
|
||||
|
||||
expect(data.connectionConfig.readOnly).toBe(true);
|
||||
});
|
||||
|
||||
it('agent JWT (actor=agent + aiChatId) propagates into the connection context', async () => {
|
||||
tokenService.verifyJwt.mockResolvedValue(
|
||||
buildJwt({ actor: 'agent', aiChatId: 'chat-7' }),
|
||||
);
|
||||
const ctx = await ext.onAuthenticate(buildData() as any);
|
||||
|
||||
expect(ctx.actor).toBe('agent');
|
||||
expect(ctx.aiChatId).toBe('chat-7');
|
||||
expect(ctx.user.id).toBe(USER_ID);
|
||||
});
|
||||
|
||||
it('human JWT (no provenance claims) → actor=user, aiChatId=null', async () => {
|
||||
const ctx = await ext.onAuthenticate(buildData() as any);
|
||||
|
||||
expect(ctx.actor).toBe('user');
|
||||
expect(ctx.aiChatId).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,105 @@
|
||||
import {
|
||||
computeHistoryJob,
|
||||
resolveSource,
|
||||
} from './persistence.extension';
|
||||
import {
|
||||
HISTORY_FAST_INTERVAL,
|
||||
HISTORY_FAST_THRESHOLD,
|
||||
HISTORY_INTERVAL,
|
||||
} from '../constants';
|
||||
|
||||
// A fixed clock + fixed createdAt make pageAge deterministic.
|
||||
const NOW = 1_700_000_000_000;
|
||||
const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
|
||||
|
||||
// Build a minimal page whose age (NOW - createdAt) is exactly `ageMs`.
|
||||
const pageAged = (ageMs: number) => ({
|
||||
id: PAGE_ID,
|
||||
createdAt: new Date(NOW - ageMs),
|
||||
});
|
||||
|
||||
describe('computeHistoryJob', () => {
|
||||
it('agent edit → delay MUST be 0 and job id is source-keyed', () => {
|
||||
// INVARIANT (§15 H2 / persistence.extension): the agent delay MUST stay 0.
|
||||
// The worker re-reads the page row at run time, so any non-zero delay risks
|
||||
// snapshotting content a later human edit has already overwritten. This is
|
||||
// the load-bearing assertion of this spec — do not relax it.
|
||||
const { jobId, delay } = computeHistoryJob(pageAged(0), 'agent', NOW);
|
||||
expect(delay).toBe(0);
|
||||
expect(jobId).toBe(`${PAGE_ID}-agent`);
|
||||
});
|
||||
|
||||
it('agent edit on an OLD page is still delay 0 (age never applies to agents)', () => {
|
||||
// Even when the page is far older than the fast threshold, the agent path
|
||||
// must short-circuit to 0 — age-based debounce is a human-only concern.
|
||||
const { jobId, delay } = computeHistoryJob(
|
||||
pageAged(HISTORY_FAST_THRESHOLD + 60_000),
|
||||
'agent',
|
||||
NOW,
|
||||
);
|
||||
expect(delay).toBe(0);
|
||||
expect(jobId).toBe(`${PAGE_ID}-agent`);
|
||||
});
|
||||
|
||||
it('human edit on a YOUNG page (age < threshold) → fast interval, bare job id', () => {
|
||||
const { jobId, delay } = computeHistoryJob(
|
||||
pageAged(HISTORY_FAST_THRESHOLD - 1),
|
||||
'user',
|
||||
NOW,
|
||||
);
|
||||
expect(delay).toBe(HISTORY_FAST_INTERVAL);
|
||||
expect(jobId).toBe(PAGE_ID);
|
||||
});
|
||||
|
||||
it('human edit on an OLD page (age > threshold) → standard interval', () => {
|
||||
const { jobId, delay } = computeHistoryJob(
|
||||
pageAged(HISTORY_FAST_THRESHOLD + 1),
|
||||
'user',
|
||||
NOW,
|
||||
);
|
||||
expect(delay).toBe(HISTORY_INTERVAL);
|
||||
expect(jobId).toBe(PAGE_ID);
|
||||
});
|
||||
|
||||
it('boundary: pageAge EXACTLY === threshold takes the slow branch (the `<` is strict)', () => {
|
||||
// Off-by-one guard: the condition is `pageAge < HISTORY_FAST_THRESHOLD`, so
|
||||
// an age of exactly the threshold is NOT "fast" — it must use HISTORY_INTERVAL.
|
||||
const { delay } = computeHistoryJob(
|
||||
pageAged(HISTORY_FAST_THRESHOLD),
|
||||
'user',
|
||||
NOW,
|
||||
);
|
||||
expect(delay).toBe(HISTORY_INTERVAL);
|
||||
});
|
||||
|
||||
it('treats any non-"agent" source string as human', () => {
|
||||
// resolveSource only ever yields 'agent' | 'user', but guard the contract:
|
||||
// the agent branch keys strictly on === 'agent'.
|
||||
const { jobId, delay } = computeHistoryJob(pageAged(0), 'user', NOW);
|
||||
expect(delay).toBe(HISTORY_FAST_INTERVAL);
|
||||
expect(jobId).toBe(PAGE_ID);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveSource (truth table)', () => {
|
||||
// (sticky, actor) → expected. Marker is OR of the sticky flag and actor==='agent'.
|
||||
it('sticky=false, actor=user → user', () => {
|
||||
expect(resolveSource(false, 'user')).toBe('user');
|
||||
});
|
||||
|
||||
it('sticky=true, actor=user → agent (sticky wins)', () => {
|
||||
expect(resolveSource(true, 'user')).toBe('agent');
|
||||
});
|
||||
|
||||
it('sticky=false, actor=agent → agent (current writer is the agent)', () => {
|
||||
expect(resolveSource(false, 'agent')).toBe('agent');
|
||||
});
|
||||
|
||||
it('sticky=true, actor=agent → agent', () => {
|
||||
expect(resolveSource(true, 'agent')).toBe('agent');
|
||||
});
|
||||
|
||||
it('sticky=false, actor=undefined → user (human collab path omits the claim)', () => {
|
||||
expect(resolveSource(false, undefined)).toBe('user');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,185 @@
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { PersistenceExtension } from './persistence.extension';
|
||||
import { tiptapExtensions } from '../collaboration.util';
|
||||
|
||||
/**
|
||||
* Integration test for `onStoreDocument`'s Approach-A boundary snapshot.
|
||||
*
|
||||
* The data-loss risk: when an AGENT store lands over a page whose persisted
|
||||
* state was authored by a HUMAN, the agent overwrites that human content. If we
|
||||
* do not pin the human revision as its own history version BEFORE the agent's
|
||||
* updatePage, the last human edit is lost. This test pins the ordering
|
||||
* (saveHistory(oldHumanPage) strictly before updatePage) and the idempotency
|
||||
* skip when content is unchanged.
|
||||
*
|
||||
* We pass a REAL Y.Doc as the `document` arg (so TiptapTransformer.fromYdoc
|
||||
* yields real content) and stub repos/queues + an executeTx-compatible db whose
|
||||
* transaction().execute() invokes the callback with a trx stub.
|
||||
*/
|
||||
|
||||
const PAGE_ID = '550e8400-e29b-41d4-a716-446655440000';
|
||||
const USER_ID = 'human-1';
|
||||
|
||||
// Build a real Y.Doc carrying the given tiptap JSON in the 'default' fragment.
|
||||
// hocuspocus augments the live document with broadcastStateless(); the bare
|
||||
// Y.Doc lacks it, so stub it for the post-store broadcast.
|
||||
const ydocFor = (json: any) => {
|
||||
const ydoc = TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
|
||||
(ydoc as any).broadcastStateless = jest.fn();
|
||||
return ydoc;
|
||||
};
|
||||
|
||||
const doc = (text: string) => ({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text }] }],
|
||||
});
|
||||
|
||||
describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot', () => {
|
||||
let ext: PersistenceExtension;
|
||||
let pageRepo: { findById: jest.Mock; updatePage: jest.Mock };
|
||||
let pageHistoryRepo: {
|
||||
saveHistory: jest.Mock;
|
||||
findPageLastHistory: jest.Mock;
|
||||
};
|
||||
let aiQueue: { add: jest.Mock };
|
||||
let historyQueue: { add: jest.Mock };
|
||||
let notificationQueue: { add: jest.Mock };
|
||||
let collabHistory: { addContributors: jest.Mock };
|
||||
let transclusionService: {
|
||||
syncPageTransclusions: jest.Mock;
|
||||
syncPageReferences: jest.Mock;
|
||||
syncPageTemplateReferences: jest.Mock;
|
||||
};
|
||||
let callOrder: string[];
|
||||
|
||||
// db whose transaction().execute(fn) runs fn with a trx stub — this lets the
|
||||
// real executeTx() helper drive the callback without a database.
|
||||
const trxStub = { __trx: true };
|
||||
const db = {
|
||||
transaction: () => ({
|
||||
execute: (fn: (trx: any) => Promise<any>) => fn(trxStub),
|
||||
}),
|
||||
};
|
||||
|
||||
// The persisted page row the transaction reads (OLD, human-authored state).
|
||||
const persistedHumanPage = (newAgentText: string) => ({
|
||||
id: PAGE_ID,
|
||||
slugId: 'slug-1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
creatorId: 'creator-1',
|
||||
contributorIds: ['creator-1'],
|
||||
createdAt: new Date('2020-01-01T00:00:00Z'),
|
||||
lastUpdatedSource: 'user', // prior revision was human
|
||||
// content differs from the new agent doc so the update branch runs.
|
||||
content: doc('OLD HUMAN'),
|
||||
_newAgentText: newAgentText,
|
||||
});
|
||||
|
||||
const buildData = (document: any, actor: 'user' | 'agent') => ({
|
||||
documentName: `page.${PAGE_ID}`,
|
||||
document,
|
||||
context: { user: { id: USER_ID, name: 'Alice' }, actor },
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
callOrder = [];
|
||||
pageRepo = {
|
||||
findById: jest.fn(),
|
||||
updatePage: jest.fn().mockImplementation(async () => {
|
||||
callOrder.push('updatePage');
|
||||
}),
|
||||
};
|
||||
pageHistoryRepo = {
|
||||
saveHistory: jest.fn().mockImplementation(async () => {
|
||||
callOrder.push('saveHistory');
|
||||
}),
|
||||
findPageLastHistory: jest.fn().mockResolvedValue(null),
|
||||
};
|
||||
aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
historyQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
collabHistory = { addContributors: jest.fn().mockResolvedValue(undefined) };
|
||||
transclusionService = {
|
||||
syncPageTransclusions: jest.fn().mockResolvedValue(undefined),
|
||||
syncPageReferences: jest.fn().mockResolvedValue(undefined),
|
||||
syncPageTemplateReferences: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
ext = new PersistenceExtension(
|
||||
pageRepo as any,
|
||||
pageHistoryRepo as any,
|
||||
db as any,
|
||||
aiQueue as any,
|
||||
historyQueue as any,
|
||||
notificationQueue as any,
|
||||
collabHistory as any,
|
||||
transclusionService as any,
|
||||
);
|
||||
jest.spyOn(ext['logger'], 'debug').mockImplementation(() => undefined);
|
||||
jest.spyOn(ext['logger'], 'warn').mockImplementation(() => undefined);
|
||||
jest.spyOn(ext['logger'], 'error').mockImplementation(() => undefined);
|
||||
});
|
||||
|
||||
it('agent store over a human page pins saveHistory(oldHumanPage) BEFORE updatePage', async () => {
|
||||
const document = ydocFor(doc('NEW AGENT CONTENT'));
|
||||
pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT'));
|
||||
// No human baseline snapshot exists yet → boundary snapshot must run.
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
|
||||
|
||||
await ext.onStoreDocument(buildData(document, 'agent') as any);
|
||||
|
||||
// Boundary snapshot fired, and strictly before the agent overwrite.
|
||||
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
|
||||
const saved = pageHistoryRepo.saveHistory.mock.calls[0][0];
|
||||
expect(saved.content).toEqual(doc('OLD HUMAN')); // the OLD human revision
|
||||
expect(callOrder).toEqual(['saveHistory', 'updatePage']);
|
||||
|
||||
// The agent's new content is tagged 'agent' on the update.
|
||||
const update = pageRepo.updatePage.mock.calls[0][0];
|
||||
expect(update.lastUpdatedSource).toBe('agent');
|
||||
});
|
||||
|
||||
it('skips the boundary snapshot when the human baseline is already pinned', async () => {
|
||||
const document = ydocFor(doc('NEW AGENT CONTENT'));
|
||||
pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT'));
|
||||
// Latest history already equals the current human state → no duplicate.
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
|
||||
content: doc('OLD HUMAN'),
|
||||
});
|
||||
|
||||
await ext.onStoreDocument(buildData(document, 'agent') as any);
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('human store does NOT trigger the boundary snapshot (no source transition)', async () => {
|
||||
const document = ydocFor(doc('NEW HUMAN CONTENT'));
|
||||
pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW HUMAN CONTENT'));
|
||||
|
||||
await ext.onStoreDocument(buildData(document, 'user') as any);
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
|
||||
expect(pageRepo.updatePage.mock.calls[0][0].lastUpdatedSource).toBe('user');
|
||||
});
|
||||
|
||||
it('idempotency: unchanged content → no updatePage, no history, no queues', async () => {
|
||||
// The Y.Doc content equals the persisted content deeply → early skip.
|
||||
// A Y.Doc round-trip normalizes attrs (e.g. paragraph indent), so derive
|
||||
// the persisted content from fromYdoc to make the deep-equal skip genuine.
|
||||
const document = ydocFor(doc('SAME CONTENT'));
|
||||
const normalized = TiptapTransformer.fromYdoc(document, 'default');
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
...persistedHumanPage('SAME CONTENT'),
|
||||
content: normalized,
|
||||
});
|
||||
|
||||
await ext.onStoreDocument(buildData(document, 'agent') as any);
|
||||
|
||||
expect(pageRepo.updatePage).not.toHaveBeenCalled();
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
expect(historyQueue.add).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -40,6 +40,52 @@ import {
|
||||
} from '../constants';
|
||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||
|
||||
/**
|
||||
* Resolve the provenance source for a coalesced snapshot.
|
||||
*
|
||||
* The snapshot is tagged 'agent' if any agent edit landed in the coalescing
|
||||
* window (sticky marker) OR if the current writer is the agent; otherwise
|
||||
* 'user'. Pure so the §15 H2 marker logic is unit-testable in isolation.
|
||||
*/
|
||||
export function resolveSource(
|
||||
stickyTouched: boolean,
|
||||
contextActor?: string,
|
||||
): 'agent' | 'user' {
|
||||
return stickyTouched || contextActor === 'agent' ? 'agent' : 'user';
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the BullMQ job id + delay for a page-history snapshot job. Pure so
|
||||
* the data-loss-sensitive timing arithmetic is unit-testable; `now` is injected
|
||||
* (caller passes `Date.now()`) for determinism.
|
||||
*
|
||||
* - Agent edits: delay 0 and a source-keyed job id `${page.id}-agent`. The
|
||||
* delay MUST stay 0 — the worker re-reads the page row at run time, so any
|
||||
* delay risks reading content a later human edit has already overwritten
|
||||
* (mis-tagged snapshot). 0 minimizes that window. The `-agent` suffix keeps
|
||||
* the job from coalescing with the bare-page.id human job.
|
||||
* - Human edits: age-based debounce so rapid human edits coalesce into one
|
||||
* snapshot; job id is the bare `page.id`.
|
||||
*
|
||||
* BullMQ forbids ':' in custom job ids (Redis key separator), so '-' is used;
|
||||
* page.id is a UUID, so `${page.id}-agent` cannot collide with a human job.
|
||||
*/
|
||||
export function computeHistoryJob(
|
||||
page: Pick<Page, 'id' | 'createdAt'>,
|
||||
source: string,
|
||||
now: number,
|
||||
): { jobId: string; delay: number } {
|
||||
const isAgent = source === 'agent';
|
||||
const pageAge = now - new Date(page.createdAt).getTime();
|
||||
const delay = isAgent
|
||||
? 0
|
||||
: pageAge < HISTORY_FAST_THRESHOLD
|
||||
? HISTORY_FAST_INTERVAL
|
||||
: HISTORY_INTERVAL;
|
||||
const jobId = isAgent ? `${page.id}-agent` : page.id;
|
||||
return { jobId, delay };
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class PersistenceExtension implements Extension {
|
||||
private readonly logger = new Logger(PersistenceExtension.name);
|
||||
@@ -129,9 +175,10 @@ export class PersistenceExtension implements Extension {
|
||||
// Sticky agent marker: 'agent' if any agent edit landed in this window, OR
|
||||
// if the current writer is the agent (covers a store with no prior onChange
|
||||
// agent event in the same window). §15 H2.
|
||||
const agentTouched =
|
||||
this.consumeAgentTouched(documentName) || context?.actor === 'agent';
|
||||
const lastUpdatedSource = agentTouched ? 'agent' : 'user';
|
||||
const lastUpdatedSource = resolveSource(
|
||||
this.consumeAgentTouched(documentName),
|
||||
context?.actor,
|
||||
);
|
||||
|
||||
try {
|
||||
await executeTx(this.db, async (trx) => {
|
||||
@@ -311,24 +358,13 @@ export class PersistenceExtension implements Extension {
|
||||
page: Page,
|
||||
lastUpdatedSource: string,
|
||||
): Promise<void> {
|
||||
// Agent edits get an immediate, source-keyed history job: they snapshot
|
||||
// deterministically as 'agent' and a later human edit (jobId = page.id)
|
||||
// cannot coalesce/retag them. Human edits keep the age-based debounce so
|
||||
// rapid human edits still coalesce into one snapshot.
|
||||
// NOTE: the agent delay MUST stay 0 — the worker re-reads the page row at
|
||||
// run time, so any delay would risk reading content a later human edit has
|
||||
// already overwritten (mis-tagged snapshot). 0 minimizes that window.
|
||||
const isAgent = lastUpdatedSource === 'agent';
|
||||
const pageAge = Date.now() - new Date(page.createdAt).getTime();
|
||||
const delay = isAgent
|
||||
? 0
|
||||
: pageAge < HISTORY_FAST_THRESHOLD
|
||||
? HISTORY_FAST_INTERVAL
|
||||
: HISTORY_INTERVAL;
|
||||
// BullMQ forbids ':' in custom job IDs (it is the Redis key separator), so
|
||||
// use '-' here. page.id is a UUID, so `${page.id}-agent` cannot collide with
|
||||
// any human job whose id is a bare page.id.
|
||||
const jobId = isAgent ? `${page.id}-agent` : page.id;
|
||||
// Job id + delay arithmetic lives in the pure `computeHistoryJob` (see its
|
||||
// doc comment for the agent-delay-0 / age-based-debounce invariants).
|
||||
const { jobId, delay } = computeHistoryJob(
|
||||
page,
|
||||
lastUpdatedSource,
|
||||
Date.now(),
|
||||
);
|
||||
|
||||
await this.historyQueue.add(
|
||||
QueueJob.PAGE_HISTORY,
|
||||
|
||||
@@ -0,0 +1,200 @@
|
||||
import { Job } from 'bullmq';
|
||||
import { HistoryProcessor } from './history.processor';
|
||||
import { QueueJob } from '../../integrations/queue/constants';
|
||||
|
||||
/**
|
||||
* Unit tests for `HistoryProcessor.process`. This worker is the last line of
|
||||
* defense for the page-history snapshot, so we pin the data-loss-sensitive
|
||||
* paths: duplicate/empty history skipping (isDeepStrictEqual), and — critically
|
||||
* — that a saveHistory failure RESTORES the contributors it popped (otherwise
|
||||
* the contributor set is silently lost) before rethrowing.
|
||||
*/
|
||||
|
||||
const PAGE_ID = 'page-1';
|
||||
const SPACE_ID = 'space-1';
|
||||
const WORKSPACE_ID = 'ws-1';
|
||||
|
||||
// A non-empty content doc (distinct from the empty-paragraph doc).
|
||||
const filledContent = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
|
||||
};
|
||||
const emptyContent = { type: 'doc', content: [{ type: 'paragraph' }] };
|
||||
|
||||
const buildPage = (overrides: Partial<any> = {}) => ({
|
||||
id: PAGE_ID,
|
||||
spaceId: SPACE_ID,
|
||||
workspaceId: WORKSPACE_ID,
|
||||
content: filledContent,
|
||||
...overrides,
|
||||
});
|
||||
|
||||
const buildJob = (overrides: Partial<any> = {}) =>
|
||||
({
|
||||
name: QueueJob.PAGE_HISTORY,
|
||||
data: { pageId: PAGE_ID },
|
||||
...overrides,
|
||||
}) as unknown as Job<any, void>;
|
||||
|
||||
describe('HistoryProcessor.process', () => {
|
||||
let proc: HistoryProcessor;
|
||||
let pageHistoryRepo: { findPageLastHistory: jest.Mock; saveHistory: jest.Mock };
|
||||
let pageRepo: { findById: jest.Mock };
|
||||
let collabHistory: {
|
||||
clearContributors: jest.Mock;
|
||||
popContributors: jest.Mock;
|
||||
addContributors: jest.Mock;
|
||||
};
|
||||
let watcherService: { addPageWatchers: jest.Mock };
|
||||
let notificationQueue: { add: jest.Mock };
|
||||
let generalQueue: { add: jest.Mock };
|
||||
|
||||
beforeEach(() => {
|
||||
pageHistoryRepo = {
|
||||
findPageLastHistory: jest.fn().mockResolvedValue(null),
|
||||
saveHistory: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
pageRepo = { findById: jest.fn().mockResolvedValue(buildPage()) };
|
||||
collabHistory = {
|
||||
clearContributors: jest.fn().mockResolvedValue(undefined),
|
||||
popContributors: jest.fn().mockResolvedValue(['u1', 'u2']),
|
||||
addContributors: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
watcherService = {
|
||||
addPageWatchers: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
generalQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
|
||||
// WorkerHost's constructor reads `this.worker`; passing repos positionally
|
||||
// matches the constructor and avoids the Nest DI container.
|
||||
proc = new HistoryProcessor(
|
||||
pageHistoryRepo as any,
|
||||
pageRepo as any,
|
||||
collabHistory as any,
|
||||
watcherService as any,
|
||||
notificationQueue as any,
|
||||
generalQueue as any,
|
||||
);
|
||||
jest.spyOn(proc['logger'], 'debug').mockImplementation(() => undefined);
|
||||
jest.spyOn(proc['logger'], 'warn').mockImplementation(() => undefined);
|
||||
jest.spyOn(proc['logger'], 'error').mockImplementation(() => undefined);
|
||||
});
|
||||
|
||||
it('ignores jobs whose name is not PAGE_HISTORY (no page lookup)', async () => {
|
||||
await proc.process(buildJob({ name: 'some.other.job' }));
|
||||
expect(pageRepo.findById).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('page not found → clearContributors and return (no save)', async () => {
|
||||
pageRepo.findById.mockResolvedValue(null);
|
||||
|
||||
await proc.process(buildJob());
|
||||
|
||||
expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID);
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
expect(collabHistory.popContributors).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('first history + empty content → skip and clear contributors (no save)', async () => {
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
|
||||
pageRepo.findById.mockResolvedValue(buildPage({ content: emptyContent }));
|
||||
|
||||
await proc.process(buildJob());
|
||||
|
||||
expect(collabHistory.clearContributors).toHaveBeenCalledWith(PAGE_ID);
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('content unchanged vs last history → no save (isDeepStrictEqual skip)', async () => {
|
||||
// Last history holds a deep-equal-but-distinct copy of current content.
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
|
||||
content: JSON.parse(JSON.stringify(filledContent)),
|
||||
});
|
||||
|
||||
await proc.process(buildJob());
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
expect(collabHistory.popContributors).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('content changed → addPageWatchers + saveHistory + backlinks queue', async () => {
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
|
||||
content: { type: 'doc', content: [] },
|
||||
});
|
||||
|
||||
await proc.process(buildJob());
|
||||
|
||||
expect(collabHistory.popContributors).toHaveBeenCalledWith(PAGE_ID);
|
||||
expect(watcherService.addPageWatchers).toHaveBeenCalledWith(
|
||||
['u1', 'u2'],
|
||||
PAGE_ID,
|
||||
SPACE_ID,
|
||||
WORKSPACE_ID,
|
||||
);
|
||||
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ id: PAGE_ID }),
|
||||
{ contributorIds: ['u1', 'u2'] },
|
||||
);
|
||||
expect(generalQueue.add).toHaveBeenCalledWith(
|
||||
QueueJob.PAGE_BACKLINKS,
|
||||
expect.objectContaining({ pageId: PAGE_ID, workspaceId: WORKSPACE_ID }),
|
||||
);
|
||||
});
|
||||
|
||||
it('first history (lastHistory null) with non-empty content → saves, no PAGE_UPDATED notification', async () => {
|
||||
// popContributors yields users, but lastHistory?.content is falsy so the
|
||||
// notification branch (needs a prior version) must be skipped.
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue(null);
|
||||
|
||||
await proc.process(buildJob());
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).toHaveBeenCalled();
|
||||
expect(notificationQueue.add).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('changed content WITH prior history + contributors → queues PAGE_UPDATED notification', async () => {
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
|
||||
content: { type: 'doc', content: [] },
|
||||
});
|
||||
|
||||
await proc.process(buildJob());
|
||||
|
||||
expect(notificationQueue.add).toHaveBeenCalledWith(
|
||||
QueueJob.PAGE_UPDATED,
|
||||
expect.objectContaining({
|
||||
pageId: PAGE_ID,
|
||||
actorIds: ['u1', 'u2'],
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('saveHistory throws → contributors RESTORED (addContributors) AND error rethrown', async () => {
|
||||
// The data-loss guard: if the snapshot save fails after popContributors,
|
||||
// the popped ids MUST be returned to the pending set, then the error
|
||||
// propagates so BullMQ retries. Assert BOTH halves.
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
|
||||
content: { type: 'doc', content: [] },
|
||||
});
|
||||
const boom = new Error('db down');
|
||||
pageHistoryRepo.saveHistory.mockRejectedValue(boom);
|
||||
|
||||
await expect(proc.process(buildJob())).rejects.toThrow('db down');
|
||||
expect(collabHistory.addContributors).toHaveBeenCalledWith(PAGE_ID, [
|
||||
'u1',
|
||||
'u2',
|
||||
]);
|
||||
});
|
||||
|
||||
it('backlinks + notification queue failures are swallowed (history still committed)', async () => {
|
||||
pageHistoryRepo.findPageLastHistory.mockResolvedValue({
|
||||
content: { type: 'doc', content: [] },
|
||||
});
|
||||
generalQueue.add.mockRejectedValue(new Error('redis backlinks down'));
|
||||
notificationQueue.add.mockRejectedValue(new Error('redis notif down'));
|
||||
|
||||
// The downstream queue failures are caught internally; process resolves.
|
||||
await expect(proc.process(buildJob())).resolves.toBeUndefined();
|
||||
expect(pageHistoryRepo.saveHistory).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -228,25 +228,14 @@ export class AiChatController {
|
||||
}
|
||||
if (!file) throw new BadRequestException('No audio uploaded');
|
||||
|
||||
// Whitelist audio container types produced by browser MediaRecorder
|
||||
// (Chrome/FF: webm/opus, Safari: mp4) plus common STT-accepted formats.
|
||||
const allowedMime = new Set([
|
||||
'audio/webm',
|
||||
'audio/ogg',
|
||||
'audio/mp4',
|
||||
'audio/mpeg',
|
||||
'audio/wav',
|
||||
'audio/x-wav',
|
||||
'audio/wave',
|
||||
'audio/m4a',
|
||||
'audio/x-m4a',
|
||||
]);
|
||||
// MediaRecorder mimetypes carry parameters (e.g. "audio/webm;codecs=opus");
|
||||
// compare only the base type.
|
||||
const baseMime = file.mimetype.split(';')[0].trim().toLowerCase();
|
||||
if (!allowedMime.has(baseMime)) {
|
||||
// Resolve + whitelist the upload's container type (MediaRecorder mimetypes
|
||||
// carry parameters, e.g. "audio/webm;codecs=opus"). A non-whitelisted type
|
||||
// is rejected; an allowed one yields the STT container-format hint.
|
||||
const resolved = resolveAudioFormat(file.mimetype);
|
||||
if (!resolved.ok) {
|
||||
throw new BadRequestException('Unsupported audio format');
|
||||
}
|
||||
const { format } = resolved;
|
||||
|
||||
let buf: Buffer;
|
||||
try {
|
||||
@@ -259,20 +248,6 @@ export class AiChatController {
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
// Container hint for JSON-style STT providers (e.g. OpenRouter); multipart
|
||||
// endpoints ignore it.
|
||||
const formatMap: Record<string, string> = {
|
||||
'audio/webm': 'webm',
|
||||
'audio/ogg': 'ogg',
|
||||
'audio/mp4': 'mp4',
|
||||
'audio/mpeg': 'mp3',
|
||||
'audio/wav': 'wav',
|
||||
'audio/x-wav': 'wav',
|
||||
'audio/wave': 'wav',
|
||||
'audio/m4a': 'm4a',
|
||||
'audio/x-m4a': 'm4a',
|
||||
};
|
||||
const format = formatMap[baseMime] ?? 'webm';
|
||||
let text: string;
|
||||
try {
|
||||
text = await this.aiTranscription.transcribe(workspace.id, buf, format);
|
||||
@@ -302,3 +277,39 @@ export class AiChatController {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Whitelist audio container types produced by browser MediaRecorder (Chrome/FF:
|
||||
* webm/opus, Safari: mp4) plus common STT-accepted formats. The value maps each
|
||||
* allowed base mime to the container-format hint passed to JSON-style STT
|
||||
* providers (e.g. OpenRouter); multipart endpoints ignore the hint.
|
||||
*/
|
||||
const AUDIO_FORMAT_MAP: Record<string, string> = {
|
||||
'audio/webm': 'webm',
|
||||
'audio/ogg': 'ogg',
|
||||
'audio/mp4': 'mp4',
|
||||
'audio/mpeg': 'mp3',
|
||||
'audio/wav': 'wav',
|
||||
'audio/x-wav': 'wav',
|
||||
'audio/wave': 'wav',
|
||||
'audio/m4a': 'm4a',
|
||||
'audio/x-m4a': 'm4a',
|
||||
};
|
||||
|
||||
/**
|
||||
* Resolve and whitelist an uploaded clip's mimetype. MediaRecorder mimetypes
|
||||
* carry parameters (e.g. "audio/webm;codecs=opus"), so the base type is split
|
||||
* out (lowercased, trimmed) before the whitelist check. Returns ok=false for a
|
||||
* non-whitelisted container; otherwise the base mime and its STT format hint.
|
||||
* Pure — the caller throws BadRequestException on !ok.
|
||||
*/
|
||||
export function resolveAudioFormat(
|
||||
mimetype: string,
|
||||
): { ok: true; baseMime: string; format: string } | { ok: false } {
|
||||
const baseMime = mimetype.split(';')[0].trim().toLowerCase();
|
||||
const format = AUDIO_FORMAT_MAP[baseMime];
|
||||
if (format === undefined) {
|
||||
return { ok: false };
|
||||
}
|
||||
return { ok: true, baseMime, format };
|
||||
}
|
||||
|
||||
@@ -367,6 +367,28 @@ export class McpClientsService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the SSRF connect-time rule to a set of DNS-resolved addresses: block if
|
||||
* ANY resolved address is disallowed by `isIpAllowed`, and block an EMPTY set
|
||||
* (nothing safe to connect to). Only an all-public, non-empty set is allowed.
|
||||
*
|
||||
* This is the connect-time half of the DNS-rebinding defense: the dispatcher's
|
||||
* lookup hands net/tls.connect ONLY a set that passed this check, so the kernel
|
||||
* can never connect to an address that did not pass the guard. Pure — no I/O.
|
||||
*/
|
||||
export function validateResolvedAddresses(
|
||||
addrs: readonly LookupAddress[],
|
||||
): { ok: boolean; blockedHost?: string } {
|
||||
if (addrs.length === 0) {
|
||||
return { ok: false };
|
||||
}
|
||||
const blocked = addrs.find((a) => !isIpAllowed(a.address).ok);
|
||||
if (blocked) {
|
||||
return { ok: false, blockedHost: blocked.address };
|
||||
}
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the SSRF-pinned undici dispatcher. Its custom connect.lookup resolves
|
||||
* the host, validates EVERY resolved address with the same ssrf-guard, and
|
||||
@@ -388,22 +410,15 @@ function buildPinnedDispatcher(): Agent {
|
||||
return;
|
||||
}
|
||||
const addrs = addresses as LookupAddress[];
|
||||
if (addrs.length === 0) {
|
||||
callback(
|
||||
new Error(`No address resolved for ${hostname}`),
|
||||
'',
|
||||
0,
|
||||
);
|
||||
return;
|
||||
}
|
||||
const blocked = addrs.find((a) => !isIpAllowed(a.address).ok);
|
||||
if (blocked) {
|
||||
const verdict = validateResolvedAddresses(addrs);
|
||||
if (!verdict.ok) {
|
||||
// Refuse the connection: net/tls.connect never sees this address.
|
||||
callback(
|
||||
new Error(`Blocked address for ${hostname}`),
|
||||
'',
|
||||
0,
|
||||
);
|
||||
// An empty set is treated as blocked (nothing safe to connect to).
|
||||
const reason =
|
||||
addrs.length === 0
|
||||
? `No address resolved for ${hostname}`
|
||||
: `Blocked address for ${hostname}`;
|
||||
callback(new Error(reason), '', 0);
|
||||
return;
|
||||
}
|
||||
// undici/net invoke this lookup with `all: true`, so the callback
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
import { type Tool } from 'ai';
|
||||
import { McpClientsService } from './mcp-clients.service';
|
||||
|
||||
/**
|
||||
* Tool-name namespacing / collision tests.
|
||||
*
|
||||
* REACHABILITY NOTE: the helpers `namespace` / `sanitizeName` / `capName` /
|
||||
* `disambiguate` are module-private (not exported) and `mergeNamespaced` is a
|
||||
* PRIVATE method. The smallest reachable public path that exercises all of them
|
||||
* is `toolsFor()` -> getOrBuildEntry -> buildEntry -> connect/tools() ->
|
||||
* mergeNamespaced. We drive that path: stub the repo's `listEnabled` to return
|
||||
* fake servers and spy on the private `connect` to return fake MCP clients whose
|
||||
* `tools()` we control. We then inspect the merged tool KEYS on the returned
|
||||
* toolset — the observable result of namespacing.
|
||||
*
|
||||
* What we assert (all SECURITY/correctness-relevant):
|
||||
* - two servers each exposing a tool `search` -> BOTH survive under distinct
|
||||
* namespaced keys (no silent overwrite);
|
||||
* - a tool name with spaces/unicode -> sanitized to ^[a-zA-Z0-9_-]+;
|
||||
* - an over-long name -> capped to the provider limit (<= 64);
|
||||
* - duplicate names WITHIN one server (collide after sanitize/truncate) ->
|
||||
* disambiguated, so the second is not overwritten.
|
||||
*/
|
||||
const MAX_TOOL_NAME_LENGTH = 64;
|
||||
|
||||
function fakeTool(): Tool {
|
||||
return { description: 'x', inputSchema: undefined } as unknown as Tool;
|
||||
}
|
||||
|
||||
interface FakeServer {
|
||||
id: string;
|
||||
name: string;
|
||||
transport: string;
|
||||
url: string;
|
||||
headersEnc: string | null;
|
||||
toolAllowlist: string[] | null;
|
||||
}
|
||||
|
||||
function server(over: Partial<FakeServer> & { id: string; name: string }): FakeServer {
|
||||
return {
|
||||
transport: 'http',
|
||||
url: 'https://example.com/mcp',
|
||||
headersEnc: null,
|
||||
toolAllowlist: null,
|
||||
...over,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a service whose repo returns `servers` and whose `connect` returns a
|
||||
* fake client exposing `toolsByServerId[server.id]` from tools(). Returns the
|
||||
* merged keys produced by toolsFor.
|
||||
*/
|
||||
async function mergedKeysFor(
|
||||
servers: FakeServer[],
|
||||
toolsByServerId: Record<string, Record<string, Tool>>,
|
||||
): Promise<string[]> {
|
||||
const repoStub = {
|
||||
listEnabled: jest.fn().mockResolvedValue(servers),
|
||||
};
|
||||
const service = new McpClientsService(repoStub as never, {} as never);
|
||||
|
||||
// Map each connect() call (by server identity) to a fake client. connect is
|
||||
// private; spy on it via a typed any-cast.
|
||||
jest
|
||||
.spyOn(service as unknown as { connect: (s: FakeServer) => unknown }, 'connect')
|
||||
.mockImplementation((s: FakeServer) =>
|
||||
Promise.resolve({
|
||||
tools: () => Promise.resolve(toolsByServerId[s.id] ?? {}),
|
||||
close: () => Promise.resolve(),
|
||||
}),
|
||||
);
|
||||
|
||||
const toolset = await service.toolsFor('ws-1');
|
||||
// Release the lease so the service does not hold the fake clients open.
|
||||
await Promise.all(toolset.clients.map((c) => c.close()));
|
||||
return Object.keys(toolset.tools);
|
||||
}
|
||||
|
||||
describe('external MCP tool-name namespacing (via toolsFor)', () => {
|
||||
afterEach(() => jest.restoreAllMocks());
|
||||
|
||||
it('keeps tools from two servers that both expose `search` (no overwrite)', async () => {
|
||||
const keys = await mergedKeysFor(
|
||||
[
|
||||
server({ id: 'id-alpha', name: 'alpha' }),
|
||||
server({ id: 'id-beta', name: 'beta' }),
|
||||
],
|
||||
{
|
||||
'id-alpha': { search: fakeTool() },
|
||||
'id-beta': { search: fakeTool() },
|
||||
},
|
||||
);
|
||||
|
||||
// Two distinct keys survive -> no silent overwrite.
|
||||
expect(keys).toHaveLength(2);
|
||||
expect(new Set(keys).size).toBe(2);
|
||||
// The server name is prefixed onto each tool.
|
||||
expect(keys).toContain('alpha_search');
|
||||
expect(keys.some((k) => k !== 'alpha_search')).toBe(true);
|
||||
});
|
||||
|
||||
it('sanitizes spaces/unicode in names to the allowed charset', async () => {
|
||||
const keys = await mergedKeysFor(
|
||||
[server({ id: 'id-1', name: 'My Server!' })],
|
||||
{ 'id-1': { 'search the wiki ✨': fakeTool() } },
|
||||
);
|
||||
|
||||
expect(keys).toHaveLength(1);
|
||||
// Only ^[a-zA-Z0-9_-]+ characters remain (no spaces, no unicode).
|
||||
expect(keys[0]).toMatch(/^[a-zA-Z0-9_-]+$/);
|
||||
});
|
||||
|
||||
it('caps an over-long name to the provider length limit', async () => {
|
||||
const longName = 'a'.repeat(200);
|
||||
const keys = await mergedKeysFor(
|
||||
[server({ id: 'id-1', name: 'svr' })],
|
||||
{ 'id-1': { [longName]: fakeTool() } },
|
||||
);
|
||||
|
||||
expect(keys).toHaveLength(1);
|
||||
expect(keys[0].length).toBeLessThanOrEqual(MAX_TOOL_NAME_LENGTH);
|
||||
});
|
||||
|
||||
it('disambiguates two names that collide after sanitize/truncate within one server', async () => {
|
||||
// Both names sanitize to the same value ("a_b") -> the second must be
|
||||
// suffix-disambiguated, not overwritten.
|
||||
const keys = await mergedKeysFor(
|
||||
[server({ id: 'id-1', name: 'svr' })],
|
||||
{ 'id-1': { 'a b': fakeTool(), 'a@b': fakeTool() } },
|
||||
);
|
||||
|
||||
expect(keys).toHaveLength(2);
|
||||
expect(new Set(keys).size).toBe(2);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,85 @@
|
||||
import { McpServersService } from './mcp-servers.service';
|
||||
import { AiMcpServer } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
* Encrypted-header leak guard for the admin-facing view (§8.10): `toView` is
|
||||
* private, so we drive it through the public `list()` (which maps every row
|
||||
* with toView). The contract: a row with `headersEnc` set surfaces ONLY
|
||||
* `hasHeaders:true` and NEVER the `headersEnc` blob; a row without headers
|
||||
* surfaces `hasHeaders:false`. The blob must never reach an admin response.
|
||||
*/
|
||||
function row(overrides: Partial<AiMcpServer>): AiMcpServer {
|
||||
return {
|
||||
id: 'srv-1',
|
||||
name: 'Tavily',
|
||||
transport: 'http',
|
||||
url: 'https://example.com/mcp',
|
||||
enabled: true,
|
||||
toolAllowlist: null,
|
||||
headersEnc: null,
|
||||
...overrides,
|
||||
} as unknown as AiMcpServer;
|
||||
}
|
||||
|
||||
describe('McpServersService.toView (via list) — encrypted-header leak guard', () => {
|
||||
function buildService(rows: AiMcpServer[]): McpServersService {
|
||||
const repoStub = {
|
||||
listByWorkspace: jest.fn().mockResolvedValue(rows),
|
||||
};
|
||||
// secretBox + clients are unused by the list/toView path; pass stubs to
|
||||
// satisfy the constructor.
|
||||
return new McpServersService(
|
||||
repoStub as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
);
|
||||
}
|
||||
|
||||
it('exposes hasHeaders:true and NO headersEnc when auth headers are set', async () => {
|
||||
const service = buildService([
|
||||
row({ headersEnc: 'ENCRYPTED-SECRET-BLOB' }),
|
||||
]);
|
||||
|
||||
const [view] = await service.list('ws-1');
|
||||
|
||||
expect(view.hasHeaders).toBe(true);
|
||||
// The encrypted blob must NEVER appear in the view, under any key.
|
||||
expect('headersEnc' in view).toBe(false);
|
||||
expect(Object.values(view)).not.toContain('ENCRYPTED-SECRET-BLOB');
|
||||
});
|
||||
|
||||
it('exposes hasHeaders:false when no auth headers are set', async () => {
|
||||
const service = buildService([row({ headersEnc: null })]);
|
||||
|
||||
const [view] = await service.list('ws-1');
|
||||
|
||||
expect(view.hasHeaders).toBe(false);
|
||||
expect('headersEnc' in view).toBe(false);
|
||||
});
|
||||
|
||||
it('projects only the public fields', async () => {
|
||||
const service = buildService([
|
||||
row({
|
||||
id: 'srv-9',
|
||||
name: 'My MCP',
|
||||
transport: 'sse',
|
||||
url: 'https://mcp.example.com/',
|
||||
enabled: false,
|
||||
toolAllowlist: ['search'],
|
||||
headersEnc: 'BLOB',
|
||||
}),
|
||||
]);
|
||||
|
||||
const [view] = await service.list('ws-1');
|
||||
|
||||
expect(view).toEqual({
|
||||
id: 'srv-9',
|
||||
name: 'My MCP',
|
||||
transport: 'sse',
|
||||
url: 'https://mcp.example.com/',
|
||||
enabled: false,
|
||||
toolAllowlist: ['search'],
|
||||
hasHeaders: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,67 @@
|
||||
import { type LookupAddress } from 'node:dns';
|
||||
import { validateResolvedAddresses } from './mcp-clients.service';
|
||||
|
||||
/**
|
||||
* Unit tests for validateResolvedAddresses — the connect-time half of the SSRF
|
||||
* DNS-rebinding defense. It applies the REAL `isIpAllowed` rule (imported
|
||||
* transitively via the service) and must block if ANY resolved address is
|
||||
* disallowed, treat an EMPTY set as blocked, and unwrap IPv4-mapped IPv6.
|
||||
*
|
||||
* These tests intentionally use real public/private literals (no DNS, no mock)
|
||||
* so they exercise the actual ssrf-guard classification.
|
||||
*/
|
||||
function addr(address: string, family = 4): LookupAddress {
|
||||
return { address, family };
|
||||
}
|
||||
|
||||
describe('validateResolvedAddresses', () => {
|
||||
it('allows an all-public set', () => {
|
||||
const res = validateResolvedAddresses([
|
||||
addr('8.8.8.8'),
|
||||
addr('1.1.1.1'),
|
||||
addr('2001:4860:4860::8888', 6),
|
||||
]);
|
||||
expect(res.ok).toBe(true);
|
||||
});
|
||||
|
||||
it('blocks when ONE address among many is private (any-private-blocks)', () => {
|
||||
const res = validateResolvedAddresses([
|
||||
addr('8.8.8.8'),
|
||||
addr('1.1.1.1'),
|
||||
addr('10.0.0.5'), // private 10/8 hidden among public addresses
|
||||
addr('1.0.0.1'),
|
||||
]);
|
||||
expect(res.ok).toBe(false);
|
||||
expect(res.blockedHost).toBe('10.0.0.5');
|
||||
});
|
||||
|
||||
it('blocks an empty set (nothing safe to connect to)', () => {
|
||||
expect(validateResolvedAddresses([]).ok).toBe(false);
|
||||
});
|
||||
|
||||
it('blocks an IPv4-mapped IPv6 private address', () => {
|
||||
const res = validateResolvedAddresses([addr('::ffff:10.0.0.1', 6)]);
|
||||
expect(res.ok).toBe(false);
|
||||
});
|
||||
|
||||
it('blocks the cloud metadata link-local address', () => {
|
||||
const res = validateResolvedAddresses([
|
||||
addr('8.8.8.8'),
|
||||
addr('169.254.169.254'),
|
||||
]);
|
||||
expect(res.ok).toBe(false);
|
||||
});
|
||||
|
||||
/**
|
||||
* Regression sentinel: if the "any private blocks" rule were weakened to
|
||||
* "all private blocks" / "first address wins", this mixed set (public first,
|
||||
* private second) would wrongly pass. The assertion below FAILS in that case.
|
||||
*/
|
||||
it('FAILS if the any-private rule is weakened (sentinel)', () => {
|
||||
const res = validateResolvedAddresses([
|
||||
addr('8.8.8.8'), // public first
|
||||
addr('192.168.1.1'), // private second — must still block the whole set
|
||||
]);
|
||||
expect(res.ok).toBe(false);
|
||||
});
|
||||
});
|
||||
53
apps/server/src/core/ai-chat/resolve-audio-format.spec.ts
Normal file
53
apps/server/src/core/ai-chat/resolve-audio-format.spec.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import { resolveAudioFormat } from './ai-chat.controller';
|
||||
|
||||
/**
|
||||
* Unit tests for resolveAudioFormat — the transcribe-endpoint mime whitelist.
|
||||
* It splits the base mime off any MediaRecorder parameters, lowercases/trims it,
|
||||
* checks it against the whitelist, and maps it to the STT container-format hint.
|
||||
* A non-whitelisted container yields { ok: false } (the controller then throws
|
||||
* BadRequestException).
|
||||
*/
|
||||
describe('resolveAudioFormat', () => {
|
||||
it('strips MediaRecorder parameters to the base mime (audio/webm;codecs=opus)', () => {
|
||||
const res = resolveAudioFormat('audio/webm;codecs=opus');
|
||||
expect(res).toEqual({ ok: true, baseMime: 'audio/webm', format: 'webm' });
|
||||
});
|
||||
|
||||
it('normalizes uppercase / surrounding whitespace', () => {
|
||||
const res = resolveAudioFormat(' AUDIO/MP4 ; codecs=mp4a ');
|
||||
expect(res).toEqual({ ok: true, baseMime: 'audio/mp4', format: 'mp4' });
|
||||
});
|
||||
|
||||
it('handles the Safari/iOS audio/x-m4a container', () => {
|
||||
expect(resolveAudioFormat('audio/x-m4a')).toEqual({
|
||||
ok: true,
|
||||
baseMime: 'audio/x-m4a',
|
||||
format: 'm4a',
|
||||
});
|
||||
});
|
||||
|
||||
it('rejects a disallowed container (audio/aiff)', () => {
|
||||
expect(resolveAudioFormat('audio/aiff')).toEqual({ ok: false });
|
||||
});
|
||||
|
||||
it('maps every whitelisted container to its STT format hint', () => {
|
||||
const cases: Array<[string, string]> = [
|
||||
['audio/webm', 'webm'],
|
||||
['audio/ogg', 'ogg'],
|
||||
['audio/mp4', 'mp4'],
|
||||
['audio/mpeg', 'mp3'],
|
||||
['audio/wav', 'wav'],
|
||||
['audio/x-wav', 'wav'],
|
||||
['audio/wave', 'wav'],
|
||||
['audio/m4a', 'm4a'],
|
||||
['audio/x-m4a', 'm4a'],
|
||||
];
|
||||
for (const [mime, format] of cases) {
|
||||
expect(resolveAudioFormat(mime)).toEqual({
|
||||
ok: true,
|
||||
baseMime: mime,
|
||||
format,
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -199,21 +199,8 @@ export class AiChatToolsService {
|
||||
const accessibleSet = new Set(accessibleIds);
|
||||
|
||||
// Keep the best (first — hits are ordered by fused score desc) chunk
|
||||
// per page, capped to `cap`.
|
||||
const seen = new Set<string>();
|
||||
const results: { id: string; title: string; snippet: string }[] = [];
|
||||
for (const hit of hits) {
|
||||
if (!accessibleSet.has(hit.pageId)) continue;
|
||||
if (seen.has(hit.pageId)) continue;
|
||||
seen.add(hit.pageId);
|
||||
results.push({
|
||||
id: hit.pageId,
|
||||
title: hit.title ?? '',
|
||||
snippet: snippet(hit.content),
|
||||
});
|
||||
if (results.length >= cap) break;
|
||||
}
|
||||
return results;
|
||||
// per page, dropping any page the user cannot access, capped to `cap`.
|
||||
return selectAccessibleHits(hits, accessibleSet, cap);
|
||||
},
|
||||
}),
|
||||
|
||||
@@ -960,6 +947,44 @@ export class AiChatToolsService {
|
||||
}
|
||||
}
|
||||
|
||||
/** A single hybrid-search hit: the minimal shape selectAccessibleHits needs. */
|
||||
export interface SearchHitLike {
|
||||
pageId: string;
|
||||
title: string | null;
|
||||
content: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Post-filter hybrid-search hits into the agent-facing result list. This is the
|
||||
* CASL leak guard for the in-process hybrid search: the hits come from a direct
|
||||
* pgvector + full-text query that does NOT get CASL for free, so an accessible
|
||||
* SPACE does not imply every page in it is accessible (restricted pages).
|
||||
*
|
||||
* Given `hits` (ordered by fused score desc), the `accessibleSet` of page ids
|
||||
* the user may read, and `cap`, it keeps the BEST (first) chunk per page, drops
|
||||
* any page not in `accessibleSet`, and caps the output at `cap`. Pure — no I/O.
|
||||
*/
|
||||
export function selectAccessibleHits(
|
||||
hits: readonly SearchHitLike[],
|
||||
accessibleSet: Set<string>,
|
||||
cap: number,
|
||||
): { id: string; title: string; snippet: string }[] {
|
||||
const seen = new Set<string>();
|
||||
const results: { id: string; title: string; snippet: string }[] = [];
|
||||
for (const hit of hits) {
|
||||
if (!accessibleSet.has(hit.pageId)) continue;
|
||||
if (seen.has(hit.pageId)) continue;
|
||||
seen.add(hit.pageId);
|
||||
results.push({
|
||||
id: hit.pageId,
|
||||
title: hit.title ?? '',
|
||||
snippet: snippet(hit.content),
|
||||
});
|
||||
if (results.length >= cap) break;
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim a search highlight/snippet to a token-efficient length. The highlight
|
||||
* may contain `<b>` markers from the search backend; they are harmless to the
|
||||
|
||||
@@ -0,0 +1,96 @@
|
||||
import {
|
||||
selectAccessibleHits,
|
||||
type SearchHitLike,
|
||||
} from './ai-chat-tools.service';
|
||||
|
||||
/**
|
||||
* Unit tests for selectAccessibleHits — the CASL leak guard for the in-process
|
||||
* hybrid search. The hybrid query runs over pgvector + full-text WITHOUT CASL,
|
||||
* so this post-filter is the ONLY thing that drops pages the user cannot read.
|
||||
*
|
||||
* Core invariant: a hit on a page that is NOT in `accessibleSet` is dropped,
|
||||
* even when that page lives in an otherwise-accessible space. Plus: only the
|
||||
* best chunk per page survives (dedupe), results are capped, and an empty
|
||||
* accessibleSet yields nothing.
|
||||
*/
|
||||
function hit(pageId: string, title: string | null, content: string): SearchHitLike {
|
||||
return { pageId, title, content };
|
||||
}
|
||||
|
||||
describe('selectAccessibleHits', () => {
|
||||
it('drops a hit on a page NOT in accessibleSet (the core leak guard)', () => {
|
||||
const hits = [
|
||||
hit('public-page', 'Public', 'visible body'),
|
||||
// restricted-page is in an accessible space but NOT page-accessible.
|
||||
hit('restricted-page', 'Secret', 'leaked body'),
|
||||
];
|
||||
const accessibleSet = new Set(['public-page']);
|
||||
|
||||
const out = selectAccessibleHits(hits, accessibleSet, 10);
|
||||
|
||||
expect(out).toEqual([
|
||||
{ id: 'public-page', title: 'Public', snippet: 'visible body' },
|
||||
]);
|
||||
// The restricted page must NEVER appear in the output.
|
||||
expect(out.some((r) => r.id === 'restricted-page')).toBe(false);
|
||||
});
|
||||
|
||||
it('keeps only the best (first) chunk per page when a page has duplicates', () => {
|
||||
const hits = [
|
||||
hit('p1', 'Page One', 'best chunk'),
|
||||
hit('p1', 'Page One', 'lower-ranked chunk'),
|
||||
hit('p2', 'Page Two', 'p2 chunk'),
|
||||
];
|
||||
const accessibleSet = new Set(['p1', 'p2']);
|
||||
|
||||
const out = selectAccessibleHits(hits, accessibleSet, 10);
|
||||
|
||||
expect(out).toEqual([
|
||||
{ id: 'p1', title: 'Page One', snippet: 'best chunk' },
|
||||
{ id: 'p2', title: 'Page Two', snippet: 'p2 chunk' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('caps the number of results at `cap`', () => {
|
||||
const hits = [
|
||||
hit('p1', 't1', 'c1'),
|
||||
hit('p2', 't2', 'c2'),
|
||||
hit('p3', 't3', 'c3'),
|
||||
hit('p4', 't4', 'c4'),
|
||||
];
|
||||
const accessibleSet = new Set(['p1', 'p2', 'p3', 'p4']);
|
||||
|
||||
const out = selectAccessibleHits(hits, accessibleSet, 2);
|
||||
|
||||
expect(out).toHaveLength(2);
|
||||
expect(out.map((r) => r.id)).toEqual(['p1', 'p2']);
|
||||
});
|
||||
|
||||
it('returns an empty list when accessibleSet is empty', () => {
|
||||
const hits = [hit('p1', 't1', 'c1'), hit('p2', 't2', 'c2')];
|
||||
|
||||
expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]);
|
||||
});
|
||||
|
||||
it('defaults a null title to an empty string', () => {
|
||||
const out = selectAccessibleHits(
|
||||
[hit('p1', null, 'body')],
|
||||
new Set(['p1']),
|
||||
10,
|
||||
);
|
||||
expect(out).toEqual([{ id: 'p1', title: '', snippet: 'body' }]);
|
||||
});
|
||||
|
||||
/**
|
||||
* Regression sentinel for the leak guard: if the access intersection
|
||||
* (`accessibleSet.has(hit.pageId)` filter) were removed, the restricted page
|
||||
* would slip into the output and THIS assertion would fail. Documents that
|
||||
* the filter — not the dedupe/cap — is what enforces page-level access.
|
||||
*/
|
||||
it('FAILS if the access intersection is removed (sentinel)', () => {
|
||||
const hits = [hit('restricted', 'Secret', 'leaked')];
|
||||
// Page is NOT accessible -> output MUST be empty. Without the intersection
|
||||
// check the function would return the restricted hit and break this test.
|
||||
expect(selectAccessibleHits(hits, new Set<string>(), 10)).toEqual([]);
|
||||
});
|
||||
});
|
||||
176
apps/server/src/core/share/share-comment-strip.spec.ts
Normal file
176
apps/server/src/core/share/share-comment-strip.spec.ts
Normal file
@@ -0,0 +1,176 @@
|
||||
import { ShareService } from './share.service';
|
||||
|
||||
// Exercises the REAL ShareService comment-mark stripping for shared content via
|
||||
// the smallest reachable seam: updatePublicAttachments -> prepareContentForShare
|
||||
// -> removeMarkTypeFromDoc(doc, 'comment'). This is a documented threat-model
|
||||
// item: `comment` marks are internal-team metadata (existence, location, count,
|
||||
// resolved state, and the comment ids themselves) and MUST NOT leak to anonymous
|
||||
// public-share viewers.
|
||||
//
|
||||
// prepareContentForShare is private and the page-load path (getSharedPage) needs
|
||||
// a full DB-backed resolveReadableSharePage; updatePublicAttachments is the
|
||||
// smallest public seam that runs the exact same sanitization on a doc we control.
|
||||
// Only the workspace toggle (workspaceRepo.findById) and token service are
|
||||
// touched, both mocked — no DB setup required.
|
||||
|
||||
const WS = 'ws-1';
|
||||
const PAGE = 'page-1';
|
||||
|
||||
function buildService() {
|
||||
const shareRepo = { findById: jest.fn() };
|
||||
const pageRepo = { findById: jest.fn() };
|
||||
const pagePermissionRepo = {
|
||||
hasRestrictedAncestor: jest.fn(async () => false),
|
||||
};
|
||||
const tokenService = {
|
||||
generateAttachmentToken: jest.fn(async () => 'tok'),
|
||||
};
|
||||
// htmlEmbed toggle ON so the embed strip is a no-op and we isolate the
|
||||
// comment-mark strip behaviour.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({ id: WS, settings: { htmlEmbed: true } })),
|
||||
};
|
||||
|
||||
return new ShareService(
|
||||
shareRepo as any,
|
||||
pageRepo as any,
|
||||
pagePermissionRepo as any,
|
||||
{} as any, // db (unused on this path)
|
||||
tokenService as any,
|
||||
{} as any, // transclusionService (unused)
|
||||
workspaceRepo as any,
|
||||
);
|
||||
}
|
||||
|
||||
// A paragraph whose text carries a `comment` mark with a comment id.
|
||||
function commentedText(text: string, commentId: string) {
|
||||
return {
|
||||
type: 'text',
|
||||
text,
|
||||
marks: [{ type: 'comment', attrs: { commentId, resolved: false } }],
|
||||
};
|
||||
}
|
||||
|
||||
async function sanitize(content: any) {
|
||||
const service = buildService();
|
||||
return service.updatePublicAttachments({
|
||||
id: PAGE,
|
||||
workspaceId: WS,
|
||||
content,
|
||||
} as any);
|
||||
}
|
||||
|
||||
function countCommentMarks(doc: any): number {
|
||||
let count = 0;
|
||||
const walk = (node: any) => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (const mark of node.marks) {
|
||||
if (mark?.type === 'comment') count++;
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) node.content.forEach(walk);
|
||||
};
|
||||
walk(doc);
|
||||
return count;
|
||||
}
|
||||
|
||||
describe('ShareService comment-mark stripping for public shares (real code)', () => {
|
||||
it('strips a top-level comment mark and preserves the visible text', async () => {
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [commentedText('secret-reviewed body', 'cmt-top-1')],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const out = await sanitize(content);
|
||||
|
||||
expect(countCommentMarks(out)).toBe(0);
|
||||
// The text itself survives; only the internal mark is removed.
|
||||
expect(JSON.stringify(out)).toContain('secret-reviewed body');
|
||||
// The comment id must not appear anywhere in the serialized output.
|
||||
expect(JSON.stringify(out)).not.toContain('cmt-top-1');
|
||||
});
|
||||
|
||||
it('strips comment marks nested inside columns and callouts', async () => {
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [commentedText('col body', 'cmt-col-1')],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{
|
||||
type: 'callout',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [commentedText('callout body', 'cmt-callout-1')],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const out = await sanitize(content);
|
||||
|
||||
expect(countCommentMarks(out)).toBe(0);
|
||||
const serialized = JSON.stringify(out);
|
||||
// Visible content of both nested branches survives.
|
||||
expect(serialized).toContain('col body');
|
||||
expect(serialized).toContain('callout body');
|
||||
// No nested comment id leaks.
|
||||
expect(serialized).not.toContain('cmt-col-1');
|
||||
expect(serialized).not.toContain('cmt-callout-1');
|
||||
});
|
||||
|
||||
it('strips every comment mark when multiple coexist (count goes to zero)', async () => {
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
commentedText('a', 'cmt-a'),
|
||||
{ type: 'text', text: ' plain ' },
|
||||
commentedText('b', 'cmt-b'),
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [commentedText('c', 'cmt-c')],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
// Sanity: the input genuinely has 3 comment marks before sanitization.
|
||||
expect(countCommentMarks(content)).toBe(3);
|
||||
|
||||
const out = await sanitize(content);
|
||||
|
||||
expect(countCommentMarks(out)).toBe(0);
|
||||
const serialized = JSON.stringify(out);
|
||||
for (const id of ['cmt-a', 'cmt-b', 'cmt-c']) {
|
||||
expect(serialized).not.toContain(id);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,41 @@
|
||||
import { ShareSeoController } from './share-seo.controller';
|
||||
|
||||
// Pins ShareSeoController.extractPageSlugId — the slug→pageId resolver used to
|
||||
// look up a shared page from the public URL. A full UUID must pass through
|
||||
// untouched; a "title-slug-<id>" must yield the trailing token; a single token
|
||||
// is returned as-is; falsy input yields undefined. The method does not touch
|
||||
// `this`, so the controller can be constructed with null collaborators.
|
||||
|
||||
function buildController(): ShareSeoController {
|
||||
return new ShareSeoController(null as any, null as any, null as any);
|
||||
}
|
||||
|
||||
describe('ShareSeoController.extractPageSlugId', () => {
|
||||
const controller = buildController();
|
||||
|
||||
it('returns a full UUID unchanged', () => {
|
||||
const uuid = '550e8400-e29b-41d4-a716-446655440000';
|
||||
expect(controller.extractPageSlugId(uuid)).toBe(uuid);
|
||||
});
|
||||
|
||||
it('returns the trailing token of a title-slug-id form', () => {
|
||||
expect(controller.extractPageSlugId('my-page-title-abc123')).toBe('abc123');
|
||||
});
|
||||
|
||||
it('returns a single token (no hyphen) as-is', () => {
|
||||
expect(controller.extractPageSlugId('abc123')).toBe('abc123');
|
||||
});
|
||||
|
||||
it('returns the last segment for a two-token slug', () => {
|
||||
expect(controller.extractPageSlugId('hello-world')).toBe('world');
|
||||
});
|
||||
|
||||
it('returns undefined for an empty string (falsy guard)', () => {
|
||||
expect(controller.extractPageSlugId('')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for null/undefined input', () => {
|
||||
expect(controller.extractPageSlugId(undefined as any)).toBeUndefined();
|
||||
expect(controller.extractPageSlugId(null as any)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -7,8 +7,8 @@ import { validate as isValidUUID } from 'uuid';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { EnvironmentService } from '../../integrations/environment/environment.service';
|
||||
import { Workspace } from '@docmost/db/types/entity.types';
|
||||
import { htmlEscape } from '../../common/helpers/html-escaper';
|
||||
import { injectTrackerHead } from './inject-tracker-head.util';
|
||||
import { buildShareMetaHtml } from './share-seo.util';
|
||||
|
||||
@Controller('share')
|
||||
export class ShareSeoController {
|
||||
@@ -72,24 +72,11 @@ export class ShareSeoController {
|
||||
return this.sendIndex(indexFilePath, res);
|
||||
}
|
||||
|
||||
const rawTitle = htmlEscape(share?.sharedPage.title ?? 'untitled');
|
||||
const metaTitle =
|
||||
rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}…` : rawTitle;
|
||||
|
||||
const metaTagVar = '<!--meta-tags-->';
|
||||
|
||||
const metaTags = [
|
||||
`<meta property="og:title" content="${metaTitle}" />`,
|
||||
`<meta property="twitter:title" content="${metaTitle}" />`,
|
||||
!share.searchIndexing ? `<meta name="robots" content="noindex" />` : '',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join('\n ');
|
||||
|
||||
const html = fs.readFileSync(indexFilePath, 'utf8');
|
||||
let transformedHtml = html
|
||||
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
|
||||
.replace(metaTagVar, metaTags);
|
||||
let transformedHtml = buildShareMetaHtml(html, {
|
||||
title: share?.sharedPage.title,
|
||||
searchIndexing: share.searchIndexing,
|
||||
});
|
||||
|
||||
// Deliberate same-origin tracker surface: this is the ONE place where an
|
||||
// admin-authored analytics/tracker snippet (settings.trackerHead) is
|
||||
|
||||
126
apps/server/src/core/share/share-seo.meta.spec.ts
Normal file
126
apps/server/src/core/share/share-seo.meta.spec.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import { buildShareMetaHtml } from './share-seo.util';
|
||||
|
||||
// Pins the SEO meta-HTML builder for public share pages (extracted verbatim from
|
||||
// ShareSeoController.getShare). The shared page title is attacker-influenceable,
|
||||
// so the security-critical invariant is that it is htmlEscape'd before being
|
||||
// interpolated into BOTH the <title> element and the content="..." attributes of
|
||||
// the og:/twitter: meta tags. The XSS tests below MUST fail if the htmlEscape
|
||||
// step is ever removed.
|
||||
|
||||
// A minimal index.html shell carrying the two placeholders the builder rewrites:
|
||||
// the <title> element and the <!--meta-tags--> marker.
|
||||
const INDEX =
|
||||
'<html><head><title>App</title>\n <!--meta-tags--></head><body>x</body></html>';
|
||||
|
||||
describe('buildShareMetaHtml', () => {
|
||||
describe('XSS: title escaping', () => {
|
||||
it('fully htmlEscapes a </title><script> breakout in BOTH <title> and og:/twitter: meta', () => {
|
||||
const out = buildShareMetaHtml(INDEX, {
|
||||
title: '</title><script>alert(1)</script>',
|
||||
searchIndexing: true,
|
||||
});
|
||||
|
||||
// The raw script tag must NEVER appear anywhere in the output — it would
|
||||
// execute in the share origin. This assertion fails if htmlEscape is removed.
|
||||
expect(out).not.toContain('<script>');
|
||||
expect(out).not.toContain('</title><script>');
|
||||
// The dangerous chars are escaped to entities instead.
|
||||
expect(out).toContain('<script>alert(1)</script>');
|
||||
// og:title and twitter:title both carry the escaped (not raw) value.
|
||||
expect(out).toContain(
|
||||
'<meta property="og:title" content="</title><script>alert(1)</script>" />',
|
||||
);
|
||||
expect(out).toContain(
|
||||
'<meta property="twitter:title" content="</title><script>alert(1)</script>" />',
|
||||
);
|
||||
});
|
||||
|
||||
it('escapes a double quote to " so the content="..." attribute cannot be broken', () => {
|
||||
const out = buildShareMetaHtml(INDEX, {
|
||||
title: 'a"onmouseover="alert(1)',
|
||||
searchIndexing: true,
|
||||
});
|
||||
|
||||
// A raw `"` would close the content attribute and inject a new attribute.
|
||||
expect(out).not.toContain('content="a"onmouseover=');
|
||||
expect(out).toContain('"');
|
||||
expect(out).toContain(
|
||||
'<meta property="og:title" content="a"onmouseover="alert(1)" />',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('title truncation (limit 80, applied AFTER escaping)', () => {
|
||||
it('leaves a title of exactly 80 chars untouched (no ellipsis)', () => {
|
||||
const title = 'a'.repeat(80);
|
||||
const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
|
||||
expect(out).toContain(`<title>${title}</title>`);
|
||||
expect(out).not.toContain('…');
|
||||
});
|
||||
|
||||
it('truncates a >80 char title to 77 chars + an ellipsis (78 total)', () => {
|
||||
const title = 'b'.repeat(100);
|
||||
const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
|
||||
const expected = `${'b'.repeat(77)}…`;
|
||||
expect(out).toContain(`<title>${expected}</title>`);
|
||||
// 77 visible chars + the single ellipsis glyph.
|
||||
expect(expected.length).toBe(78);
|
||||
expect(out).toContain(
|
||||
`<meta property="og:title" content="${expected}" />`,
|
||||
);
|
||||
});
|
||||
|
||||
it('truncation acts on the ESCAPED string: each < becomes < first, then slice(0,77)', () => {
|
||||
// 100 "<" chars escape to 100 * "<" = 400 chars, then truncate to 77 + …
|
||||
const title = '<'.repeat(100);
|
||||
const out = buildShareMetaHtml(INDEX, { title, searchIndexing: true });
|
||||
const escaped = '<'.repeat(100);
|
||||
const expected = `${escaped.slice(0, 77)}…`;
|
||||
expect(out).toContain(`<title>${expected}</title>`);
|
||||
// No raw "<" from the title leaks through.
|
||||
expect(out).not.toContain('<<');
|
||||
});
|
||||
});
|
||||
|
||||
describe('robots noindex meta', () => {
|
||||
it('searchIndexing=false emits <meta name="robots" content="noindex">', () => {
|
||||
const out = buildShareMetaHtml(INDEX, {
|
||||
title: 'page',
|
||||
searchIndexing: false,
|
||||
});
|
||||
expect(out).toContain('<meta name="robots" content="noindex" />');
|
||||
});
|
||||
|
||||
it('searchIndexing=true emits NO robots tag', () => {
|
||||
const out = buildShareMetaHtml(INDEX, {
|
||||
title: 'page',
|
||||
searchIndexing: true,
|
||||
});
|
||||
expect(out).not.toContain('robots');
|
||||
expect(out).not.toContain('noindex');
|
||||
});
|
||||
});
|
||||
|
||||
describe('null / missing title fallback', () => {
|
||||
it('falls back to "untitled" when title is null', () => {
|
||||
const out = buildShareMetaHtml(INDEX, {
|
||||
title: null as unknown as string,
|
||||
searchIndexing: true,
|
||||
});
|
||||
expect(out).toContain('<title>untitled</title>');
|
||||
expect(out).toContain('<meta property="og:title" content="untitled" />');
|
||||
});
|
||||
});
|
||||
|
||||
describe('placeholder replacement', () => {
|
||||
it('replaces the original <title> and the <!--meta-tags--> marker', () => {
|
||||
const out = buildShareMetaHtml(INDEX, {
|
||||
title: 'Hello',
|
||||
searchIndexing: true,
|
||||
});
|
||||
expect(out).not.toContain('<!--meta-tags-->');
|
||||
expect(out).not.toContain('<title>App</title>');
|
||||
expect(out).toContain('<title>Hello</title>');
|
||||
});
|
||||
});
|
||||
});
|
||||
40
apps/server/src/core/share/share-seo.util.ts
Normal file
40
apps/server/src/core/share/share-seo.util.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { htmlEscape } from '../../common/helpers/html-escaper';
|
||||
|
||||
/**
|
||||
* Build the SEO-enriched index HTML for a publicly shared page.
|
||||
*
|
||||
* This is the pure, side-effect-free core of ShareSeoController.getShare: given
|
||||
* the raw index.html and the share's title + searchIndexing flag, it returns the
|
||||
* transformed HTML with the <title> replaced and the og:/twitter:/robots meta
|
||||
* tags injected at the <!--meta-tags--> marker.
|
||||
*
|
||||
* SECURITY: the title is attacker-influenceable (it is the shared page title),
|
||||
* so it MUST be htmlEscape'd before being interpolated into both the <title>
|
||||
* element and the content="..." attributes of the meta tags. Removing the
|
||||
* escaping would allow a page title to break out of the attribute / element and
|
||||
* inject markup into the share origin.
|
||||
*/
|
||||
export function buildShareMetaHtml(
|
||||
indexHtml: string,
|
||||
opts: { title: string | null; searchIndexing: boolean },
|
||||
): string {
|
||||
// Escape FIRST, then truncate, so the truncation acts on the safe string and
|
||||
// can never split a multi-char HTML entity (matches the original controller).
|
||||
const rawTitle = htmlEscape(opts.title ?? 'untitled');
|
||||
const metaTitle =
|
||||
rawTitle.length > 80 ? `${rawTitle.slice(0, 77)}…` : rawTitle;
|
||||
|
||||
const metaTagVar = '<!--meta-tags-->';
|
||||
|
||||
const metaTags = [
|
||||
`<meta property="og:title" content="${metaTitle}" />`,
|
||||
`<meta property="twitter:title" content="${metaTitle}" />`,
|
||||
!opts.searchIndexing ? `<meta name="robots" content="noindex" />` : '',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join('\n ');
|
||||
|
||||
return indexHtml
|
||||
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
|
||||
.replace(metaTagVar, metaTags);
|
||||
}
|
||||
62
apps/server/src/core/share/share.util.spec.ts
Normal file
62
apps/server/src/core/share/share.util.spec.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { updateAttachmentAttr } from './share.util';
|
||||
|
||||
// Pins updateAttachmentAttr — the per-attachment URL rewriter used when serving
|
||||
// shared page content. Internal attachment paths (/files… and /api/files…) must
|
||||
// be rewritten to the public form with a scoped jwt appended; anything else
|
||||
// (external URLs, null) must be left untouched so a public viewer's signed token
|
||||
// is never attached to a foreign origin. The function only reads/writes
|
||||
// node.attrs[attr], so a plain object stands in for the real ProseMirror Node.
|
||||
|
||||
function fakeNode(attrs: Record<string, any>) {
|
||||
return { attrs } as any;
|
||||
}
|
||||
|
||||
const JWT = 'TOK';
|
||||
|
||||
describe('updateAttachmentAttr', () => {
|
||||
it('rewrites a /files path to /files/public/ with ?jwt=', () => {
|
||||
const node = fakeNode({ src: '/files/x.png' });
|
||||
updateAttachmentAttr(node, 'src', JWT);
|
||||
expect(node.attrs.src).toBe(`/files/public/x.png?jwt=${JWT}`);
|
||||
});
|
||||
|
||||
it('rewrites an /api/files path (keeps the /api prefix, inserts public)', () => {
|
||||
const node = fakeNode({ src: '/api/files/y.png' });
|
||||
updateAttachmentAttr(node, 'src', JWT);
|
||||
expect(node.attrs.src).toBe(`/api/files/public/y.png?jwt=${JWT}`);
|
||||
});
|
||||
|
||||
it('uses &jwt= when the src already carries a query string', () => {
|
||||
const node = fakeNode({ src: '/files/x.png?w=100' });
|
||||
updateAttachmentAttr(node, 'src', JWT);
|
||||
expect(node.attrs.src).toBe(`/files/public/x.png?w=100&jwt=${JWT}`);
|
||||
});
|
||||
|
||||
it('leaves an external https URL untouched (no token leak to a foreign origin)', () => {
|
||||
const external = 'https://example.com/x.png';
|
||||
const node = fakeNode({ src: external });
|
||||
updateAttachmentAttr(node, 'src', JWT);
|
||||
expect(node.attrs.src).toBe(external);
|
||||
});
|
||||
|
||||
it('leaves a null src untouched', () => {
|
||||
const node = fakeNode({ src: null });
|
||||
updateAttachmentAttr(node, 'src', JWT);
|
||||
expect(node.attrs.src).toBeNull();
|
||||
});
|
||||
|
||||
it('rewrites the `url` attr variant the same way', () => {
|
||||
const node = fakeNode({ url: '/files/doc.pdf' });
|
||||
updateAttachmentAttr(node, 'url', JWT);
|
||||
expect(node.attrs.url).toBe(`/files/public/doc.pdf?jwt=${JWT}`);
|
||||
});
|
||||
|
||||
it('only touches the requested attr, leaving the other attr alone', () => {
|
||||
const external = 'https://cdn.example.com/a.png';
|
||||
const node = fakeNode({ src: '/files/a.png', url: external });
|
||||
updateAttachmentAttr(node, 'src', JWT);
|
||||
expect(node.attrs.src).toBe(`/files/public/a.png?jwt=${JWT}`);
|
||||
// `url` was not requested, so it is unchanged.
|
||||
expect(node.attrs.url).toBe(external);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user