Merge remote-tracking branch 'gitea/develop' into HEAD

# Conflicts:
#	apps/server/src/app.module.ts
#	apps/server/src/integrations/environment/environment.service.spec.ts
#	apps/server/src/integrations/environment/environment.service.ts
#	apps/server/src/integrations/environment/environment.validation.ts
#	packages/mcp/build/client.js
#	packages/mcp/build/index.js
#	packages/mcp/build/tool-specs.js
This commit is contained in:
claude code agent 227
2026-06-29 18:56:40 +03:00
53 changed files with 3397 additions and 308 deletions
+2
View File
@@ -29,6 +29,7 @@ import { NoopAuditModule } from './integrations/audit/audit.module';
import { ThrottleModule } from './integrations/throttle/throttle.module';
import { McpModule } from './integrations/mcp/mcp.module';
import { GitSyncModule } from './integrations/git-sync/git-sync.module';
import { SandboxModule } from './integrations/sandbox/sandbox.module';
import { AiModule } from './integrations/ai/ai.module';
import { AiChatModule } from './core/ai-chat/ai-chat.module';
@@ -91,6 +92,7 @@ try {
ThrottleModule,
McpModule,
GitSyncModule,
SandboxModule,
AiModule,
AiChatModule,
...enterpriseModules,
@@ -187,7 +187,7 @@ export class AiAgentRolesService {
}
// -------------------------------------------------------------------------
// Catalog (admin-only). The catalog is curated, untrusted JSON fetched +
// Catalog (admin-only). The catalog is curated, untrusted YAML fetched +
// validated by AiAgentRolesCatalogProvider; this layer resolves localized
// text and reconciles a bundle against the workspace's existing roles.
// -------------------------------------------------------------------------
@@ -1,12 +1,23 @@
import { BadGatewayException, BadRequestException } from '@nestjs/common';
import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
import {
AiAgentRolesCatalogProvider,
isCatalogBundleFile,
isCatalogIndex,
isCatalogRole,
} from './ai-agent-roles-catalog.provider';
/**
* Provider tests against a mocked remote source (no network). They cover the
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection,
* rejection of non-http(s) sources (local sources are gone), and — most
* importantly — the `^[a-z0-9-]+$` path-traversal guard that runs BEFORE any
* path/URL is built.
* happy read path (fetchIndex / fetchBundle) over the YAML catalog format, the
* block-scalar `instructions` round-trip, the malformed-shape rejection, the
* malformed-YAML rejection, rejection of non-http(s) sources (local sources are
* gone), and — most importantly — the `^[a-z0-9-]+$` path-traversal guard that
* runs BEFORE any path/URL is built. Fixtures are serialized with the same
* `yaml` library the provider parses with (`stringifyYaml`), so the tests
* exercise real YAML, not the JSON subset.
*/
describe('AiAgentRolesCatalogProvider', () => {
function makeProvider(source: string) {
@@ -71,7 +82,7 @@ describe('AiAgentRolesCatalogProvider', () => {
}
it('fetchBundle remote happy path => parses + validates', async () => {
const json = JSON.stringify({
const yaml = stringifyYaml({
schemaVersion: 1,
language: 'en',
roles: [
@@ -82,7 +93,7 @@ describe('AiAgentRolesCatalogProvider', () => {
},
],
});
const body = streamOf([new TextEncoder().encode(json)]);
const body = streamOf([new TextEncoder().encode(yaml)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
@@ -92,12 +103,12 @@ describe('AiAgentRolesCatalogProvider', () => {
});
it('fetchBundle remote malformed (role missing instructions) => BadGateway', async () => {
const json = JSON.stringify({
const yaml = stringifyYaml({
schemaVersion: 1,
language: 'fr',
roles: [{ slug: 'researcher', name: 'Chercheur' }],
});
const body = streamOf([new TextEncoder().encode(json)]);
const body = streamOf([new TextEncoder().encode(yaml)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
@@ -153,8 +164,9 @@ describe('AiAgentRolesCatalogProvider', () => {
);
global.fetch = fetchMock as never;
const provider = makeProvider('https://catalog.example.com');
// Body shape is irrelevant; an empty stream parses to invalid JSON and
// throws, but the fetch call (with its init) still happened.
// Body shape is irrelevant; an empty stream parses to an empty YAML doc
// (null), fails the shape guard and throws, but the fetch call (with its
// init) still happened.
await expect(provider.fetchIndex()).rejects.toBeDefined();
expect(fetchMock).toHaveBeenCalledWith(
expect.any(String),
@@ -190,7 +202,7 @@ describe('AiAgentRolesCatalogProvider', () => {
});
it('small streamed body parses normally (cap not hit)', async () => {
const json = JSON.stringify({
const yaml = stringifyYaml({
schemaVersion: 1,
bundles: [
{
@@ -201,7 +213,7 @@ describe('AiAgentRolesCatalogProvider', () => {
},
],
});
const body = streamOf([new TextEncoder().encode(json)]);
const body = streamOf([new TextEncoder().encode(yaml)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
@@ -227,7 +239,7 @@ describe('AiAgentRolesCatalogProvider', () => {
});
it('null body (no readable stream) => response.text() fallback parses', async () => {
const json = JSON.stringify({
const yaml = stringifyYaml({
schemaVersion: 1,
bundles: [
{
@@ -240,7 +252,7 @@ describe('AiAgentRolesCatalogProvider', () => {
});
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body: null, text: json })) as never;
.mockResolvedValue(mockResponse({ body: null, text: yaml })) as never;
const provider = makeProvider('https://catalog.example.com');
const index = await provider.fetchIndex();
expect(index.bundles[0].id).toBe('general');
@@ -259,8 +271,12 @@ describe('AiAgentRolesCatalogProvider', () => {
);
});
it('invalid JSON body => BadGateway (parse failure)', async () => {
const body = streamOf([new TextEncoder().encode('{not valid json')]);
it('invalid YAML body => BadGateway (parse failure)', async () => {
// An unterminated flow mapping is not valid YAML, so YAML.parse throws and
// the provider maps it to BadGateway (not a generic 500).
const body = streamOf([
new TextEncoder().encode('schemaVersion: {not: closed'),
]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
@@ -270,11 +286,28 @@ describe('AiAgentRolesCatalogProvider', () => {
);
});
it('malformed index.json (valid JSON, wrong shape) => BadGateway', async () => {
// Parses as JSON but fails isCatalogIndex (schemaVersion not a number).
it('YAML with a duplicate key (strict) => BadGateway (parse failure)', async () => {
// strict:true rejects duplicate mapping keys rather than last-wins coercing
// them — a defensive parse on untrusted input.
const body = streamOf([
new TextEncoder().encode(
JSON.stringify({ schemaVersion: 'x', bundles: [] }),
'schemaVersion: 1\nbundles: []\nschemaVersion: 2\n',
),
]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
const provider = makeProvider('https://catalog.example.com');
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
BadGatewayException,
);
});
it('malformed index.yaml (valid YAML, wrong shape) => BadGateway', async () => {
// Parses as YAML but fails isCatalogIndex (schemaVersion not a number).
const body = streamOf([
new TextEncoder().encode(
stringifyYaml({ schemaVersion: 'x', bundles: [] }),
),
]);
global.fetch = jest
@@ -283,6 +316,36 @@ describe('AiAgentRolesCatalogProvider', () => {
const provider = makeProvider('https://catalog.example.com');
await expect(provider.fetchIndex()).rejects.toThrow(/malformed/i);
});
it('block-scalar instructions round-trips to the exact multi-line string', async () => {
// The whole point of the YAML migration: a long `instructions` prompt is
// stored as a literal block scalar (|-) for line-by-line diffs, and must
// resolve byte-for-byte to the original multi-line string.
const instructions = [
'Line one of the prompt.',
'',
' Indented bullet that must survive.',
'Final line, no trailing newline.',
].join('\n');
const yaml = stringifyYaml(
{
schemaVersion: 1,
language: 'en',
roles: [{ slug: 'researcher', name: 'Researcher', instructions }],
},
{ lineWidth: 0 },
);
// Sanity: the fixture really uses a literal block scalar (|, optionally
// with an indentation indicator), not a flow/quoted string.
expect(yaml).toMatch(/instructions: \|/);
const body = streamOf([new TextEncoder().encode(yaml)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
const provider = makeProvider('https://catalog.example.com');
const bundle = await provider.fetchBundle('research', 'en');
expect(bundle.roles[0].instructions).toBe(instructions);
});
});
describe('path-traversal / SSRF guard (^[a-z0-9-]+$)', () => {
@@ -304,4 +367,93 @@ describe('AiAgentRolesCatalogProvider', () => {
});
}
});
// ---------------------------------------------------------------------------
// Pin the REAL shipped catalog files (not synthetic fixtures). The JSON->YAML
// migration was a hand conversion, so the realistic failure is a hand-edit
// error in one of the 5 content YAML files (the index + the four per-bundle/
// lang files: index.yaml plus bundles/{editorial,research}/{en,ru}.yaml) — a
// quote/colon in a description, a broken
// emoji/arrow, a block-scalar indent slip that silently changes or drops
// instructions). Nothing else in CI parses these files — `scripts/check.mjs`
// is not wired into any turbo/husky/CI step — so this is the only automated
// guard over the shipped content. We read them straight off disk, parse with
// the SAME options the provider uses (strict + maxAliasCount, see parseYaml in
// the provider), and run them through the provider's own type guards. A future
// edit that breaks a real file fails here.
// ---------------------------------------------------------------------------
describe('real shipped catalog files (the YAML migration must not break them)', () => {
// Spec lives at apps/server/src/core/ai-chat/roles/catalog/; the catalog
// ships at the repo root (agent-roles-catalog/) — seven levels up.
const CATALOG_DIR = join(
__dirname,
'../../../../../../../agent-roles-catalog',
);
// Match the provider's parseYaml exactly (untrusted-input parse options).
const PARSE_OPTS = { strict: true, maxAliasCount: 100 } as const;
function readCatalogYaml(rel: string): unknown {
return parseYaml(readFileSync(join(CATALOG_DIR, rel), 'utf8'), PARSE_OPTS);
}
// Load + validate the real index lazily (only when a test runs), so a broken
// real file fails ONLY these catalog tests — not collection of the entire
// spec, which also holds the unrelated mocked-remote provider tests above.
function loadRealIndex() {
const parsed = readCatalogYaml('index.yaml');
if (!isCatalogIndex(parsed)) {
throw new Error('Real index.yaml is not a valid catalog index');
}
return parsed;
}
it('index.yaml parses + validates with the provider guard', () => {
expect(isCatalogIndex(readCatalogYaml('index.yaml'))).toBe(true);
});
it('editorial bundle still ships the fact-checker role', () => {
const editorial = loadRealIndex().bundles.find((b) => b.id === 'editorial');
expect(editorial).toBeDefined();
expect(editorial?.roles.map((r) => r.slug)).toContain('fact-checker');
});
// Driven by the real index (read inside the test, so it's lazy): every
// declared bundle + language file must parse, validate, and be in EXACT slug
// correspondence with the index — every declared role present AND no
// undeclared extras — mirroring scripts/check.mjs, which requires both
// directions. A bundle or language added later is covered automatically.
it('every declared bundle/language file is valid and in exact slug correspondence', () => {
const index = loadRealIndex();
// Guard against an empty index silently passing the loops below.
expect(index.bundles.length).toBeGreaterThan(0);
for (const bundle of index.bundles) {
const declaredSlugs = bundle.roles.map((r) => r.slug);
expect(bundle.languages.length).toBeGreaterThan(0);
for (const lang of bundle.languages) {
const rel = `bundles/${bundle.id}/${lang}.yaml`;
const file = readCatalogYaml(rel);
expect(isCatalogBundleFile(file)).toBe(true);
// Narrow for TS and access fields safely.
if (!isCatalogBundleFile(file)) continue;
expect(file.language).toBe(lang);
const fileSlugs = file.roles.map((r) => r.slug);
// Existing direction: every declared role is present in the file.
for (const slug of declaredSlugs) {
expect(fileSlugs).toContain(slug);
}
// Symmetric direction: the file carries NO undeclared/extra roles, so
// file slugs and declared slugs must be the SAME set (exact match).
// Catches a hand-edit that copies a stray role into a bundle file.
expect([...fileSlugs].sort()).toEqual([...declaredSlugs].sort());
expect(file.roles.length).toBeGreaterThan(0);
for (const role of file.roles) {
expect(isCatalogRole(role)).toBe(true);
expect(typeof role.instructions).toBe('string');
expect(role.instructions.trim().length).toBeGreaterThan(0);
expect(role.name.trim().length).toBeGreaterThan(0);
}
}
}
});
});
});
@@ -4,6 +4,7 @@ import {
Injectable,
Logger,
} from '@nestjs/common';
import { parse as parseYamlDoc } from 'yaml';
import { EnvironmentService } from '../../../../integrations/environment/environment.service';
import {
CatalogBundleFile,
@@ -28,9 +29,11 @@ const MAX_BYTES = 1_000_000;
* base URL — REMOTE only; local-filesystem sources are no longer supported. The
* value is baked into the Docker image at build time (set per-branch in CI).
*
* The catalog is UNTRUSTED input: every file is JSON-parsed and run through a
* hand-written type guard before any field is exposed, and every dynamic path
* segment is validated against SEGMENT_RE up front (path-traversal + SSRF).
* The catalog is UNTRUSTED input: every file is YAML-parsed with a SAFE schema
* (standard JSON-compatible tags only — no custom `!!` tags / no code execution)
* and run through a hand-written type guard before any field is exposed, and
* every dynamic path segment is validated against SEGMENT_RE up front
* (path-traversal + SSRF).
*/
@Injectable()
export class AiAgentRolesCatalogProvider {
@@ -38,19 +41,19 @@ export class AiAgentRolesCatalogProvider {
constructor(private readonly environmentService: EnvironmentService) {}
/** Read + validate the top-level index (`index.json`). */
/** Read + validate the top-level index (`index.yaml`). */
async fetchIndex(): Promise<CatalogIndex> {
const raw = await this.readRelative('index.json');
const parsed = this.parseJson(raw, 'index.json');
const raw = await this.readRelative('index.yaml');
const parsed = this.parseYaml(raw, 'index.yaml');
if (!isCatalogIndex(parsed)) {
throw new BadGatewayException(
'Agent roles catalog index is malformed (index.json)',
'Agent roles catalog index is malformed (index.yaml)',
);
}
return parsed;
}
/** Read + validate one language file (`bundles/<bundleId>/<language>.json`). */
/** Read + validate one language file (`bundles/<bundleId>/<language>.yaml`). */
async fetchBundle(
bundleId: string,
language: string,
@@ -58,9 +61,9 @@ export class AiAgentRolesCatalogProvider {
// SECURITY: validate BEFORE building any path/URL (path-traversal + SSRF).
this.assertSegment(bundleId, 'bundleId');
this.assertSegment(language, 'language');
const rel = `bundles/${bundleId}/${language}.json`;
const rel = `bundles/${bundleId}/${language}.yaml`;
const raw = await this.readRelative(rel);
const parsed = this.parseJson(raw, rel);
const parsed = this.parseYaml(raw, rel);
if (!isCatalogBundleFile(parsed)) {
throw new BadGatewayException(
`Agent roles catalog bundle is malformed (${rel})`,
@@ -76,15 +79,29 @@ export class AiAgentRolesCatalogProvider {
}
}
/** JSON.parse with a clear BadGateway on malformed content. */
private parseJson(raw: string, rel: string): unknown {
/**
* Safe YAML parse with a clear BadGateway on malformed content. The catalog is
* untrusted, so we lean on the `yaml` library's default `core` schema, which
* only produces JSON-compatible values (objects/arrays/strings/numbers/
* booleans/null) and NEVER constructs arbitrary types or runs code — there is
* no `!!js`-style tag handling. `strict: true` rejects duplicate keys instead
* of silently coercing them. (Note: in yaml@2.8.x an unknown custom tag does
* NOT throw even under `strict` — the parser logs a warning and resolves the
* node to a plain scalar; the catalog stays safe because the default schema
* never builds arbitrary types from a tag and our hand-written type guards
* reject any value of the wrong shape.) The alias-expansion guard
* (`maxAliasCount`) bounds billion-laughs blow-ups (the 1 MB streaming
* cap already limits the input itself). JSON is a YAML subset, so a leftover
* `.json`-style body still parses here too.
*/
private parseYaml(raw: string, rel: string): unknown {
try {
return JSON.parse(raw);
return parseYamlDoc(raw, { strict: true, maxAliasCount: 100 });
} catch (err) {
const reason = shortError(err);
this.logger.error(`Agent roles catalog JSON parse failed (${rel}): ${reason}`);
this.logger.error(`Agent roles catalog YAML parse failed (${rel}): ${reason}`);
throw new BadGatewayException(
`Agent roles catalog file is not valid JSON (${rel}): ${reason}`,
`Agent roles catalog file is not valid YAML (${rel}): ${reason}`,
);
}
}
@@ -1,7 +1,8 @@
/**
* Catalog wire shapes. The catalog is curated, untrusted JSON (a GitHub repo or
* Catalog wire shapes. The catalog is curated, untrusted YAML (a GitHub repo or
* a local folder), so every shape is validated by a hand-written type guard in
* the provider before any field is used — no zod / new deps on the server.
* the provider before any field is used — no zod on the server (YAML is parsed
* with the `yaml` library's safe, JSON-compatible schema).
*
* Localized fields (`name` / `description` at the bundle level) are
* `Record<language, string>` so one bundle serves many UI languages; per-role
@@ -22,7 +23,7 @@ export interface CatalogRole {
modelConfig?: Record<string, unknown> | null;
}
/** A single language file: `bundles/<id>/<language>.json`. */
/** A single language file: `bundles/<id>/<language>.yaml`. */
export interface CatalogBundleFile {
schemaVersion: number;
language: string;
@@ -40,7 +41,7 @@ export interface CatalogBundleMeta {
roles: { slug: string; version: number }[];
}
/** Top-level catalog index: `index.json`. */
/** Top-level catalog index: `index.yaml`. */
export interface CatalogIndex {
schemaVersion: number;
bundles: CatalogBundleMeta[];
@@ -63,6 +63,12 @@ describe('AiChatToolsService deletePage guardrail (H4)', () => {
{} as never,
{} as never,
{} as never,
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
// even though these tests never execute it — return a no-op sink so the
// tool wiring in forUser() succeeds.
{
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
} as never,
);
});
@@ -175,6 +181,12 @@ describe('AiChatToolsService expanded toolset guardrails', () => {
{} as never,
{} as never,
{} as never,
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
// even though these tests never execute it — return a no-op sink so the
// tool wiring in forUser() succeeds.
{
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
} as never,
);
});
@@ -290,6 +302,12 @@ describe('AiChatToolsService node-arg JSON-string coercion', () => {
{} as never,
{} as never,
{} as never,
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
// even though these tests never execute it — return a no-op sink so the
// tool wiring in forUser() succeeds.
{
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
} as never,
);
});
@@ -440,6 +458,12 @@ describe('AiChatToolsService model-friendly input validation (#190)', () => {
{} as never,
{} as never,
{} as never,
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
// even though these tests never execute it — return a no-op sink so the
// tool wiring in forUser() succeeds.
{
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
} as never,
);
});
@@ -16,6 +16,7 @@ import {
import { resolveCurrentPageResult } from './current-page.util';
import { parseNodeArg } from './parse-node-arg';
import { modelFriendlyInput } from './model-friendly-input';
import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
/**
* Per-user, per-request adapter that exposes Docmost READ operations to the
@@ -41,6 +42,8 @@ export class AiChatToolsService {
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly spaceMemberRepo: SpaceMemberRepo,
private readonly pagePermissionRepo: PagePermissionRepo,
// Shared singleton in-RAM blob store backing the stash tool.
private readonly sandboxStore: SandboxStore,
) {}
async forUser(
@@ -86,11 +89,17 @@ export class AiChatToolsService {
aiChatId,
});
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
// package needs to keep its mirror counts honest under FIFO eviction (the
// package never touches env or the store). asSink() centralizes the uri↔id
// mapping next to putAndLink, shared with the embedded-MCP wiring site.
const { DocmostClient, sharedToolSpecs } = await loadDocmostMcp();
const client: DocmostClientLike = new DocmostClient({
apiUrl,
getToken,
getCollabToken,
sandbox: this.sandboxStore.asSink(),
});
// Build an ai-SDK tool from a shared, zod-agnostic spec. The spec owns the
@@ -625,6 +634,14 @@ export class AiChatToolsService {
async ({ pageId, edits }) => await client.editPageText(pageId, edits),
),
// Returns ONLY the short link object — never the document body — so a
// large page can be handed to an external consumer without bloating
// context.
stashPage: sharedTool(
sharedToolSpecs.stashPage,
async ({ pageId }) => await client.stashPage(pageId),
),
patchNode: tool({
description:
'Replace a single content block (by id) with a new ProseMirror ' +
@@ -155,6 +155,14 @@ export interface DocmostClientLike {
commentId: string,
resolved: boolean,
): Promise<Record<string, unknown>>;
// Serialize a page + mirror its internal images into the blob sandbox; returns
// ONLY a short anonymous URL (the body never enters the model context).
stashPage(pageId: string): Promise<{
uri: string;
sha256: string;
size: number;
images: { mirrored: number; failed: number };
}>;
}
export type DocmostClientConfig = {
@@ -162,6 +170,18 @@ export type DocmostClientConfig = {
getToken: () => Promise<string>;
// Provenance collab-token provider for content mutations (signed agent claim).
getCollabToken?: () => Promise<string>;
// Optional blob-sandbox sink for the stash tool. `put` stores a blob in the
// host's in-RAM SandboxStore and returns the anonymous read URL + integrity.
// The optional `has`/`evict` probes let stashPage keep its mirror counts
// honest under the store's FIFO eviction (mirror of the package's sink type).
sandbox?: {
put: (
buf: Buffer,
mime: string,
) => { uri: string; sha256: string; size: number };
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};
export interface DocmostClientCtor {
@@ -172,4 +172,148 @@ describe('EnvironmentService', () => {
).toBe(false);
});
});
describe('getSandboxTtlMs', () => {
// ConfigService stub: get(key, def) returns the configured value for the key
// (falling back to def), matching the @nestjs/config contract the service
// calls with (key, default).
const build = (sandboxTtl?: string) =>
new EnvironmentService({
get: (key: string, def?: string) =>
key === 'SANDBOX_TTL_MS' ? (sandboxTtl ?? def) : def,
} as any);
it.each(['0', '-5', 'abc'])(
'falls back to the 3600000 default for invalid value %s',
(value) => {
expect(build(value).getSandboxTtlMs()).toBe(3_600_000);
},
);
it('returns the parsed value for a valid positive integer', () => {
expect(build('120000').getSandboxTtlMs()).toBe(120_000);
});
it('uses the 3600000 default when SANDBOX_TTL_MS is unset', () => {
expect(build(undefined).getSandboxTtlMs()).toBe(3_600_000);
});
});
// The three byte caps share the same getPositiveIntEnv() helper as the TTL,
// so a non-integer / non-positive value ('0'/'-5'/'abc') falls back to the
// documented default and a valid positive integer is returned parsed. Note
// parseInt truncates '1.5' -> 1 (a valid positive integer), so that value is
// accepted, not rejected — same as the pre-existing TTL getter.
describe.each([
{
name: 'getSandboxMaxBytes',
key: 'SANDBOX_MAX_BYTES',
def: 8_388_608,
getter: (s: EnvironmentService) => s.getSandboxMaxBytes(),
},
{
name: 'getSandboxMaxImageBytes',
key: 'SANDBOX_MAX_IMAGE_BYTES',
def: 20_971_520,
getter: (s: EnvironmentService) => s.getSandboxMaxImageBytes(),
},
{
name: 'getSandboxMaxTotalBytes',
key: 'SANDBOX_MAX_TOTAL_BYTES',
def: 134_217_728,
getter: (s: EnvironmentService) => s.getSandboxMaxTotalBytes(),
},
])('$name', ({ key, def, getter }) => {
// ConfigService stub: get(k, d) returns the configured value for THIS cap's
// key (falling back to d), and the default for every other key.
const build = (value?: string) =>
new EnvironmentService({
get: (k: string, d?: string) =>
k === key ? (value ?? d) : d,
} as any);
it.each(['0', '-5', 'abc'])(
`falls back to the ${def} default for invalid value %s`,
(value) => {
expect(getter(build(value))).toBe(def);
},
);
it('returns the parsed value for a valid positive integer', () => {
expect(getter(build('4096'))).toBe(4096);
});
it('truncates a non-integer like "1.5" to 1 via parseInt (not rejected)', () => {
expect(getter(build('1.5'))).toBe(1);
});
it(`uses the ${def} default when the env is unset`, () => {
expect(getter(build(undefined))).toBe(def);
});
});
// getPositiveIntEnv keeps a one-shot `invalidPositiveIntWarned` set so a bad
// value is logged ONCE per key (not on every getter call, which the sandbox
// hits per-put). These tests pin that dedup so a regression to per-call logging
// would fail loudly.
describe('invalid-value warn dedup', () => {
it('warns only once per key across repeated getter calls', () => {
const service = new EnvironmentService({
get: (k: string, d?: string) =>
k === 'SANDBOX_MAX_TOTAL_BYTES' ? '-5' : d,
} as any);
const warnSpy = jest
.spyOn((service as any).logger, 'warn')
.mockImplementation(() => undefined);
service.getSandboxMaxTotalBytes();
service.getSandboxMaxTotalBytes();
expect(warnSpy).toHaveBeenCalledTimes(1);
});
it('warns independently per key (dedup is per-key, not global)', () => {
// Two DIFFERENT SANDBOX_* keys are both invalid -> each warns once, so two
// warns total. This proves the dedup set is keyed, not a single global flag.
const service = new EnvironmentService({
get: (k: string, d?: string) =>
k === 'SANDBOX_MAX_BYTES' || k === 'SANDBOX_MAX_TOTAL_BYTES'
? '-5'
: d,
} as any);
const warnSpy = jest
.spyOn((service as any).logger, 'warn')
.mockImplementation(() => undefined);
service.getSandboxMaxBytes();
service.getSandboxMaxTotalBytes();
expect(warnSpy).toHaveBeenCalledTimes(2);
});
});
describe('getSandboxPublicUrl', () => {
// Stub that resolves BOTH keys the public-url logic consults.
const build = (vals: { sandboxUrl?: string; appUrl?: string }) =>
new EnvironmentService({
get: (key: string, def?: string) =>
key === 'SANDBOX_PUBLIC_URL'
? (vals.sandboxUrl ?? def)
: key === 'APP_URL'
? (vals.appUrl ?? def)
: def,
} as any);
it('uses SANDBOX_PUBLIC_URL and trims a trailing slash', () => {
expect(
build({ sandboxUrl: 'https://docs.example.com/' }).getSandboxPublicUrl(),
).toBe('https://docs.example.com');
});
it('falls back to APP_URL (origin) when SANDBOX_PUBLIC_URL is unset', () => {
expect(
build({ appUrl: 'https://app.example.com' }).getSandboxPublicUrl(),
).toBe('https://app.example.com');
});
});
});
@@ -1,9 +1,15 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import ms, { StringValue } from 'ms';
@Injectable()
export class EnvironmentService {
private readonly logger = new Logger(EnvironmentService.name);
// Env keys already warned about for an invalid value (one-shot per key, so a
// bad SANDBOX_* value is not logged on every blob put). Mirrors the original
// sandboxTtlWarned guard, generalized across the TTL + the three byte caps.
private readonly invalidPositiveIntWarned = new Set<string>();
constructor(private configService: ConfigService) {}
getNodeEnv(): string {
@@ -425,4 +431,63 @@ export class EnvironmentService {
getGitSyncServiceUserId(): string | undefined {
return this.configService.get<string>('GIT_SYNC_SERVICE_USER_ID');
}
// --- Blob sandbox (in-RAM ephemeral blob transfer; see SandboxModule) ---
// Base URL the sandbox `uri` is built from. It MUST be reachable over the
// network by the external consumer that fetches the blobs (not a loopback
// address if that consumer is remote). Falls back to APP_URL when unset so a
// single-host deployment works out of the box; set it explicitly when the
// consumer lives on another host.
getSandboxPublicUrl(): string {
const raw =
this.configService.get<string>('SANDBOX_PUBLIC_URL') || this.getAppUrl();
// Drop any trailing slash so `${base}/api/sb/${id}` never doubles up.
return raw.replace(/\/+$/, '');
}
// Parse a REQUIRED positive-integer env (TTL in ms or a byte cap). A
// non-integer or <= 0 value would break the sandbox silently (instant expiry,
// or every put failing against a 0-byte cap), so warn once and fall back to
// the default instead. Blob bodies are never logged.
private getPositiveIntEnv(key: string, def: number): number {
const parsed = parseInt(
this.configService.get<string>(key, String(def)),
10,
);
if (!Number.isInteger(parsed) || parsed <= 0) {
if (!this.invalidPositiveIntWarned.has(key)) {
this.invalidPositiveIntWarned.add(key);
this.logger.warn(
`Invalid ${key} (must be a positive integer); falling back to the ${def} default`,
);
}
return def;
}
return parsed;
}
// Blob time-to-live. Default 1h. The unguessable UUID + this short TTL + TLS
// are the whole capability model (no tokens). A non-positive or non-integer
// value would make every blob expire instantly (silent 404s), so reject it and
// fall back to the 1h default (warned about once to avoid per-put log spam).
getSandboxTtlMs(): number {
return this.getPositiveIntEnv('SANDBOX_TTL_MS', 3_600_000);
}
// Per-blob cap for non-image blobs (the serialized document). Default 8 MiB.
getSandboxMaxBytes(): number {
return this.getPositiveIntEnv('SANDBOX_MAX_BYTES', 8_388_608);
}
// Per-blob cap for mirrored image blobs. Default 20 MiB.
getSandboxMaxImageBytes(): number {
return this.getPositiveIntEnv('SANDBOX_MAX_IMAGE_BYTES', 20_971_520);
}
// RAM guard: total bytes the whole store may hold. Default 128 MiB. On
// overflow the store evicts oldest entries to make room.
getSandboxMaxTotalBytes(): number {
return this.getPositiveIntEnv('SANDBOX_MAX_TOTAL_BYTES', 134_217_728);
}
}
@@ -2,6 +2,7 @@ import {
IsIn,
IsNotEmpty,
IsNotIn,
IsNumberString,
IsOptional,
IsString,
IsUrl,
@@ -219,6 +220,35 @@ export class EnvironmentVariables {
@IsNotEmpty()
@IsString()
GIT_SYNC_SERVICE_USER_ID: string;
// --- Blob sandbox (in-RAM ephemeral blob transfer; see SandboxModule) ---
@IsOptional()
@ValidateIf((obj) => obj.SANDBOX_PUBLIC_URL != '' && obj.SANDBOX_PUBLIC_URL != null)
@IsUrl(
{ protocols: ['http', 'https'], require_tld: false },
{
message:
'SANDBOX_PUBLIC_URL must be a valid http(s) URL reachable by the external blob consumer',
},
)
SANDBOX_PUBLIC_URL: string;
@IsOptional()
@IsNumberString({}, { message: 'SANDBOX_TTL_MS must be an integer (milliseconds)' })
SANDBOX_TTL_MS: string;
@IsOptional()
@IsNumberString({}, { message: 'SANDBOX_MAX_BYTES must be an integer (bytes)' })
SANDBOX_MAX_BYTES: string;
@IsOptional()
@IsNumberString({}, { message: 'SANDBOX_MAX_IMAGE_BYTES must be an integer (bytes)' })
SANDBOX_MAX_IMAGE_BYTES: string;
@IsOptional()
@IsNumberString({}, { message: 'SANDBOX_MAX_TOTAL_BYTES must be an integer (bytes)' })
SANDBOX_MAX_TOTAL_BYTES: string;
}
export function validate(config: Record<string, any>) {
@@ -131,10 +131,25 @@ export class FailedLoginLimiter {
}
// The per-session DocmostMcpConfig shape understood by @docmost/mcp: either the
// service-account credentials variant OR the per-user getToken variant.
export type DocmostMcpConfig =
// service-account credentials variant OR the per-user getToken variant. The
// optional `sandbox` sink (blob store for the stash tool) is common to both and
// injected by McpService after the auth decision.
export type DocmostMcpConfig = (
| { apiUrl: string; email: string; password: string }
| { apiUrl: string; getToken: () => Promise<string> };
| { apiUrl: string; getToken: () => Promise<string> }
) & {
sandbox?: {
put: (
buf: Buffer,
mime: string,
) => { uri: string; sha256: string; size: number };
// Optional live/evict probes the package uses to keep stash_page's mirror
// counts honest under the store's FIFO eviction (mirror of the package's
// sink type); older bindings omit them.
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};
export interface ResolvedMcpAuth {
config: DocmostMcpConfig;
@@ -109,13 +109,13 @@ function makeService(opts: {
};
const service = new McpService(
undefined as never, // environmentService
undefined as never, // workspaceRepo
undefined as never, // authService
undefined as never, // tokenService
undefined as never, // userRepo
undefined as never, // userSessionRepo
moduleRef as never, // moduleRef (read by the MFA branch)
undefined as never, // sandboxStore (unused by the login-gate path)
);
// Stop the constructor's unref'd sweep timer leaking across tests.
service.onModuleDestroy();
@@ -2,17 +2,15 @@ import { Module } from '@nestjs/common';
import { McpController } from './mcp.controller';
import { McpService } from './mcp.service';
import { DatabaseModule } from '@docmost/db/database.module';
import { EnvironmentModule } from '../environment/environment.module';
import { AuthModule } from '../../core/auth/auth.module';
import { TokenModule } from '../../core/auth/token.module';
// Community MCP feature: the server itself serves the Model Context Protocol
// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo and
// EnvironmentModule (global) provides EnvironmentService. AuthModule supplies
// AuthService (per-user HTTP-Basic login validation) and TokenModule supplies
// TokenService (Bearer access-JWT verification for the token fallback).
// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo. AuthModule
// supplies AuthService (per-user HTTP-Basic login validation) and TokenModule
// supplies TokenService (Bearer access-JWT verification for the token fallback).
@Module({
imports: [DatabaseModule, EnvironmentModule, AuthModule, TokenModule],
imports: [DatabaseModule, AuthModule, TokenModule],
controllers: [McpController],
providers: [McpService],
})
@@ -9,7 +9,6 @@ import { pathToFileURL } from 'node:url';
import { esmImport } from '../../common/helpers/esm-import';
import { IncomingMessage } from 'node:http';
import { FastifyReply, FastifyRequest } from 'fastify';
import { EnvironmentService } from '../environment/environment.service';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { UserSessionRepo } from '@docmost/db/repos/session/user-session.repo';
@@ -31,6 +30,7 @@ import {
DocmostMcpConfig,
ResolvedMcpAuth,
} from './mcp-auth.helpers';
import { SandboxStore } from '../sandbox/sandbox.store';
// Minimal shape of the embedded MCP HTTP handler exported by @docmost/mcp/http.
interface McpHttpHandler {
@@ -88,13 +88,14 @@ export class McpService implements OnModuleDestroy {
private readonly sweepTimer: NodeJS.Timeout;
constructor(
private readonly environmentService: EnvironmentService,
private readonly workspaceRepo: WorkspaceRepo,
private readonly authService: AuthService,
private readonly tokenService: TokenService,
private readonly userRepo: UserRepo,
private readonly userSessionRepo: UserSessionRepo,
private readonly moduleRef: ModuleRef,
// Shared singleton in-RAM blob store backing the stash tool.
private readonly sandboxStore: SandboxStore,
) {
this.sweepTimer = setInterval(() => {
try {
@@ -322,7 +323,11 @@ export class McpService implements OnModuleDestroy {
// Should never happen: handle() always stashes before delegating.
throw new UnauthorizedException('MCP authentication missing.');
}
return resolved.config;
// Inject the blob-sandbox sink after the auth decision so stash_page
// can store blobs in the shared in-RAM store regardless of which
// credential variant resolved. The sink (put/has/evict + uri↔id
// mapping) is owned by SandboxStore.asSink().
return { ...resolved.config, sandbox: this.sandboxStore.asSink() };
},
{
identify: (req: IncomingMessage) => {
@@ -0,0 +1,6 @@
// Single source of truth for the anonymous blob-sandbox route. The controller
// is mounted under the global `/api` prefix, so its decorator uses the bare
// segment while the public URL and the workspace-gate exclusion need the full
// path — derive the latter from the former so the two never drift.
export const SANDBOX_ROUTE_SEGMENT = 'sb';
export const SANDBOX_API_PATH = `/api/${SANDBOX_ROUTE_SEGMENT}`;
@@ -0,0 +1,265 @@
import { SandboxController } from './sandbox.controller';
import { SandboxEntry } from './sandbox.store';
// Capturing fake of the FastifyReply surface the controller uses:
// status()/header()/headers()/send(), all chainable.
function makeRes() {
const sent: { status: number; headers: Record<string, any>; body: any } = {
status: 200,
headers: {},
body: undefined,
};
const res: any = {
status(code: number) {
sent.status = code;
return res;
},
header(key: string, value: any) {
sent.headers[key.toLowerCase()] = value;
return res;
},
headers(obj: Record<string, any>) {
for (const k of Object.keys(obj)) sent.headers[k.toLowerCase()] = obj[k];
return res;
},
send(body?: any) {
sent.body = body;
return res;
},
_sent: sent,
};
return res;
}
function makeReq(headers: Record<string, any> = {}) {
return { headers } as any;
}
// A syntactically valid v4 UUID (version nibble 4, variant nibble 8). The
// shared `uuid` validator is stricter than a bare hex-shape regex, so the id
// must carry a real version/variant.
const VALID_ID = 'aaaaaaaa-bbbb-4ccc-8ddd-eeeeeeeeeeee';
function entry(buf: Buffer, mime: string, sha256: string): SandboxEntry {
return { buf, mime, sha256, expiresAt: Date.now() + 60_000 };
}
describe('SandboxController', () => {
it('serves 200 with body, Content-Type, Content-Length and sha256 ETag', async () => {
const buf = Buffer.from('{"ok":true}', 'utf8');
const sha = 'a'.repeat(64);
const store = { get: jest.fn().mockReturnValue(entry(buf, 'application/json', sha)) };
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(store.get).toHaveBeenCalledWith(VALID_ID);
expect(res._sent.status).toBe(200);
expect(res._sent.headers['content-type']).toBe('application/json');
expect(res._sent.headers['content-length']).toBe(buf.length);
expect(res._sent.headers['etag']).toBe(`"${sha}"`);
expect(res._sent.body).toBe(buf);
});
it('returns 404 for a missing/expired blob', async () => {
const store = { get: jest.fn().mockReturnValue(undefined) };
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(404);
expect(res._sent.body).toBeUndefined();
});
it('returns 404 for a non-UUID id WITHOUT touching the store (anti-traversal)', async () => {
const store = { get: jest.fn() };
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get('../../etc/passwd', makeReq(), res);
expect(store.get).not.toHaveBeenCalled();
expect(res._sent.status).toBe(404);
});
it('returns 304 (no body) when If-None-Match matches the ETag', async () => {
const sha = 'b'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': `"${sha}"` }), res);
expect(res._sent.status).toBe(304);
expect(res._sent.body).toBeUndefined();
expect(res._sent.headers['etag']).toBe(`"${sha}"`);
});
it('accepts a bare (unquoted) sha256 in If-None-Match too', async () => {
const sha = 'c'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': sha }), res);
expect(res._sent.status).toBe(304);
});
it('serves 200 when If-None-Match does NOT match', async () => {
const sha = 'd'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': '"stale"' }), res);
expect(res._sent.status).toBe(200);
});
it('returns 304 for a wildcard "*" If-None-Match', async () => {
const sha = 'e'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': '*' }), res);
expect(res._sent.status).toBe(304);
});
it('returns 304 for a weak validator W/"<sha>"', async () => {
const sha = 'f'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': `W/"${sha}"` }), res);
expect(res._sent.status).toBe(304);
});
it('returns 304 when a comma-separated If-None-Match list contains the sha', async () => {
const sha = '1'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(
VALID_ID,
makeReq({ 'if-none-match': `"other", "${sha}"` }),
res,
);
expect(res._sent.status).toBe(304);
});
it('sets a private, immutable Cache-Control with a max-age within the TTL on 200', async () => {
const sha = '2'.repeat(64);
// Known TTL: ~30s out, so the floored max-age must land within [0, 60].
const e: SandboxEntry = {
buf: Buffer.from('x'),
mime: 'application/json',
sha256: sha,
expiresAt: Date.now() + 30_000,
};
const store = { get: jest.fn().mockReturnValue(e) };
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(200);
const cc = res._sent.headers['cache-control'] as string;
expect(cc).toMatch(/^private, max-age=\d+, immutable$/);
const maxAge = Number(cc.match(/max-age=(\d+)/)![1]);
expect(maxAge).toBeGreaterThanOrEqual(0);
expect(maxAge).toBeLessThanOrEqual(60);
});
it('emits Cache-Control alongside ETag on the 304 branch', async () => {
const sha = '3'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': `"${sha}"` }), res);
expect(res._sent.status).toBe(304);
expect(res._sent.headers['cache-control']).toMatch(
/^private, max-age=\d+, immutable$/,
);
});
it('sets nosniff + restrictive CSP and serves an allowlisted image inline', async () => {
const sha = '4'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'image/png', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(200);
expect(res._sent.headers['x-content-type-options']).toBe('nosniff');
expect(res._sent.headers['content-security-policy']).toBe(
"base-uri 'none'; object-src 'self'; default-src 'self';",
);
expect(res._sent.headers['content-disposition']).toBe('inline');
});
it('forces an SVG to download (attachment) while keeping nosniff + CSP', async () => {
const sha = '5'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('<svg/>'), 'image/svg+xml', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(200);
expect(res._sent.headers['content-disposition']).toBe('attachment');
expect(res._sent.headers['x-content-type-options']).toBe('nosniff');
expect(res._sent.headers['content-security-policy']).toBe(
"base-uri 'none'; object-src 'self'; default-src 'self';",
);
});
it('forces text/html to download (attachment) while keeping nosniff + CSP', async () => {
const sha = '6'.repeat(64);
const store = {
get: jest
.fn()
.mockReturnValue(entry(Buffer.from('<h1>x</h1>'), 'text/html', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(200);
expect(res._sent.headers['content-disposition']).toBe('attachment');
expect(res._sent.headers['x-content-type-options']).toBe('nosniff');
expect(res._sent.headers['content-security-policy']).toBe(
"base-uri 'none'; object-src 'self'; default-src 'self';",
);
});
});
@@ -0,0 +1,130 @@
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
import { FastifyReply, FastifyRequest } from 'fastify';
import { validate as isValidUUID } from 'uuid';
import { SandboxStore } from './sandbox.store';
import { SANDBOX_ROUTE_SEGMENT } from './sandbox.constants';
// MIME types safe to render inline in a browser. SVG is deliberately EXCLUDED
// (it can carry script), as are text/html and the JSON document blob — anything
// not on this list is served as an attachment so an attacker-controlled mime can
// never execute script on this origin (the route is anonymous + same-origin).
const INLINE_SAFE_MIME = new Set([
'image/png',
'image/jpeg',
'image/gif',
'image/webp',
'image/avif',
]);
/**
* Anonymous read endpoint for the in-RAM blob sandbox.
*
* Mounted under the global `/api` prefix as `GET /api/sb/:id`. It carries NO
* `@UseGuards(JwtAuthGuard)`, so — exactly like the public attachment route
* `GET /api/files/public/...` — it is exempt from Docmost session auth. The
* route is ALSO listed in the workspace-resolution preHandler's excludedPaths
* in main.ts so a request from a remote consumer (which carries no workspace
* host) is not rejected with "Workspace not found".
*
* It only ever serves blobs looked up from the SandboxStore by a validated
* UUID; `:id` is never used as a filesystem path, so there is no traversal
* surface. Never returns tokens, never 401s.
*
* Anti-XSS hardening mirrors the public attachment route: every response sets
* `X-Content-Type-Options: nosniff` and a restrictive CSP, and serves any mime
* NOT on the inline-safe allowlist (svg/html/the JSON document blob) as an
* attachment, so an attacker-controlled `entry.mime` can never execute script
* on this same-origin anonymous route.
*/
@Controller(SANDBOX_ROUTE_SEGMENT)
export class SandboxController {
constructor(private readonly store: SandboxStore) {}
@Get(':id')
async get(
@Param('id') id: string,
@Req() req: FastifyRequest,
@Res() res: FastifyReply,
): Promise<void> {
// Validate `:id` as a real UUID via the shared `uuid` validator (same as the
// attachment routes). This is anti-traversal / input hygiene (so `:id` can
// never be a path like `../...`), NOT authorization — the capability is the
// unguessable id itself plus the short TTL plus TLS. A non-UUID id (including
// any traversal attempt) → 404 before touching the store; no stack trace
// leaks out.
if (!isValidUUID(id)) {
res.status(404).send();
return;
}
const entry = this.store.get(id);
if (!entry) {
// Missing or expired — indistinguishable to the caller, by design.
res.status(404).send();
return;
}
// Strong validator: quoted sha256, no W/ weak prefix. Same value computed
// at put() time, so an external consumer can detect a truncated/corrupted
// body — the original bug this whole channel exists to fix.
const etag = `"${entry.sha256}"`;
// Compute freshness BEFORE the conditional check: a 304 conditional
// revalidation must not lose the Cache-Control freshness directives, or a
// revalidating client would forget how long the blob stays fresh.
const ttlSeconds = Math.max(
0,
Math.floor((entry.expiresAt - Date.now()) / 1000),
);
// Capability URL — keep it out of shared caches; immutable for its TTL.
const cacheControl = `private, max-age=${ttlSeconds}, immutable`;
// Conditional request: an exact ETag match → 304 with no body. The blob is
// immutable, so the validator is stable for the blob's whole lifetime.
if (this.ifNoneMatchMatches(req.headers['if-none-match'], entry.sha256)) {
res
.status(304)
.header('ETag', etag)
.header('Cache-Control', cacheControl)
.send();
return;
}
// Non-allowlisted mimes (svg/html/the JSON blob) are forced to download so
// an attacker-controlled mime can never run script inline on this origin.
const disposition = INLINE_SAFE_MIME.has(entry.mime)
? 'inline'
: 'attachment';
// Use @Res() + res.send(Buffer) with an explicit Content-Type so the binary
// body bypasses the global JSON response transform/serializer.
res
.status(200)
.headers({
'Content-Type': entry.mime,
'Content-Length': entry.buf.length,
ETag: etag,
'Cache-Control': cacheControl,
'X-Content-Type-Options': 'nosniff',
'Content-Security-Policy':
"base-uri 'none'; object-src 'self'; default-src 'self';",
'Content-Disposition': disposition,
})
.send(entry.buf);
}
// Accept the consumer's If-None-Match whether it sends the quoted ETag, a bare
// sha256, a weak "W/"-prefixed validator, or a comma-separated list.
private ifNoneMatchMatches(
header: string | string[] | undefined,
sha256: string,
): boolean {
if (!header) return false;
const raw = Array.isArray(header) ? header.join(',') : header;
if (raw.trim() === '*') return true;
return raw
.split(',')
.map((t) => t.trim().replace(/^W\//, '').replace(/^"|"$/g, ''))
.some((t) => t === sha256);
}
}
@@ -0,0 +1,19 @@
import { Global, Module } from '@nestjs/common';
import { SandboxController } from './sandbox.controller';
import { SandboxStore } from './sandbox.store';
/**
* In-RAM blob sandbox: a SINGLE shared SandboxStore (the @Injectable singleton)
* is written to by the stash tool (via McpService / AiChatToolsService) and read
* back by the anonymous SandboxController. Marked @Global so the same store
* instance is injectable everywhere without import churn — put() and get() MUST
* hit the same Map. EnvironmentService (caps/TTL/public URL) is provided by the
* global EnvironmentModule.
*/
@Global()
@Module({
controllers: [SandboxController],
providers: [SandboxStore],
exports: [SandboxStore],
})
export class SandboxModule {}
@@ -0,0 +1,163 @@
import { createHash } from 'node:crypto';
import { validate as isValidUUID } from 'uuid';
import { SandboxStore } from './sandbox.store';
// Build a minimal EnvironmentService stub with overridable caps/TTL.
function makeEnv(
overrides: Partial<{
ttlMs: number;
maxBytes: number;
maxImageBytes: number;
maxTotalBytes: number;
}> = {},
) {
const cfg = {
ttlMs: 3_600_000,
maxBytes: 8_388_608,
maxImageBytes: 20_971_520,
maxTotalBytes: 134_217_728,
...overrides,
};
return {
getSandboxTtlMs: () => cfg.ttlMs,
getSandboxMaxBytes: () => cfg.maxBytes,
getSandboxMaxImageBytes: () => cfg.maxImageBytes,
getSandboxMaxTotalBytes: () => cfg.maxTotalBytes,
getSandboxPublicUrl: () => 'https://example.test',
} as any;
}
describe('SandboxStore', () => {
let store: SandboxStore;
afterEach(() => {
// Clear the unref'd sweep interval so it never leaks across tests.
store?.onModuleDestroy();
jest.useRealTimers();
});
it('put/get round-trips the exact bytes + mime and returns a UUID id', () => {
store = new SandboxStore(makeEnv());
const buf = Buffer.from('{"type":"doc","content":[]}', 'utf8');
const res = store.put(buf, 'application/json');
expect(isValidUUID(res.id)).toBe(true);
expect(res.size).toBe(buf.length);
const entry = store.get(res.id);
expect(entry).toBeDefined();
expect(entry!.buf.equals(buf)).toBe(true);
expect(entry!.mime).toBe('application/json');
});
it('computes sha256 over the body (matches a manual digest)', () => {
store = new SandboxStore(makeEnv());
const buf = Buffer.from('hello sandbox', 'utf8');
const expected = createHash('sha256').update(buf).digest('hex');
const res = store.put(buf, 'text/plain');
expect(res.sha256).toBe(expected);
expect(store.get(res.id)!.sha256).toBe(expected);
});
it('returns undefined for a missing id', () => {
store = new SandboxStore(makeEnv());
expect(store.get('11111111-1111-1111-1111-111111111111')).toBeUndefined();
});
it('lazily expires entries past the TTL (get returns undefined)', () => {
jest.useFakeTimers();
jest.setSystemTime(new Date('2026-01-01T00:00:00Z'));
store = new SandboxStore(makeEnv({ ttlMs: 1000 }));
const res = store.put(Buffer.from('x'), 'text/plain');
expect(store.get(res.id)).toBeDefined();
jest.setSystemTime(new Date('2026-01-01T00:00:02Z')); // +2s > 1s TTL
expect(store.get(res.id)).toBeUndefined();
// Eviction also frees the byte accounting.
expect(store.bytes).toBe(0);
});
it('background sweep drops expired entries without a get()', () => {
jest.useFakeTimers();
jest.setSystemTime(new Date('2026-01-01T00:00:00Z'));
store = new SandboxStore(makeEnv({ ttlMs: 1000 }));
store.put(Buffer.from('x'), 'text/plain');
expect(store.size).toBe(1);
jest.setSystemTime(new Date('2026-01-01T00:01:30Z')); // past TTL
jest.advanceTimersByTime(60_000); // fire the sweep interval
expect(store.size).toBe(0);
});
it('rejects a non-image blob over SANDBOX_MAX_BYTES', () => {
store = new SandboxStore(makeEnv({ maxBytes: 16 }));
expect(() => store.put(Buffer.alloc(17), 'application/json')).toThrow(
/per-blob cap/,
);
});
it('uses the larger image cap for image/* blobs', () => {
// 100 bytes exceeds the doc cap (16) but fits the image cap (1024).
store = new SandboxStore(makeEnv({ maxBytes: 16, maxImageBytes: 1024 }));
expect(() => store.put(Buffer.alloc(100), 'image/png')).not.toThrow();
// SVG counts as an image too.
expect(() => store.put(Buffer.alloc(100), 'image/svg+xml')).not.toThrow();
});
it('evicts oldest entries when the total cap would be exceeded', () => {
// Total cap 250 bytes; each blob 100 bytes -> only 2 fit at a time.
store = new SandboxStore(
makeEnv({ maxTotalBytes: 250, maxBytes: 1024 }),
);
const a = store.put(Buffer.alloc(100), 'application/json');
const b = store.put(Buffer.alloc(100), 'application/json');
const c = store.put(Buffer.alloc(100), 'application/json'); // evicts a
expect(store.get(a.id)).toBeUndefined(); // oldest evicted
expect(store.get(b.id)).toBeDefined();
expect(store.get(c.id)).toBeDefined();
expect(store.bytes).toBeLessThanOrEqual(250);
});
it('rejects a single blob larger than the whole total cap', () => {
store = new SandboxStore(
makeEnv({ maxTotalBytes: 50, maxBytes: 1024 }),
);
expect(() => store.put(Buffer.alloc(100), 'application/json')).toThrow(
/total store cap/,
);
});
it('putAndLink composes the anonymous /api/sb/<id> url with matching integrity', () => {
store = new SandboxStore(makeEnv());
const buf = Buffer.from('hello link', 'utf8');
const expected = createHash('sha256').update(buf).digest('hex');
const res = store.putAndLink(buf, 'image/png');
expect(res.uri).toMatch(/^https:\/\/example\.test\/api\/sb\/[0-9a-f-]{36}$/);
expect(res.sha256).toBe(expected);
expect(res.size).toBe(buf.length);
});
it('has()/remove() report and free a blob by id', () => {
store = new SandboxStore(makeEnv());
const { id } = store.put(Buffer.from('x'), 'text/plain');
expect(store.has(id)).toBe(true);
store.remove(id);
expect(store.has(id)).toBe(false);
expect(store.bytes).toBe(0);
});
it('asSink() round-trips put/has/evict through the anonymous uri', () => {
store = new SandboxStore(makeEnv());
const sink = store.asSink();
const buf = Buffer.from('sink bytes', 'utf8');
const r = sink.put(buf, 'image/png');
expect(sink.has(r.uri)).toBe(true);
sink.evict(r.uri);
expect(sink.has(r.uri)).toBe(false);
});
});
@@ -0,0 +1,178 @@
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
import { createHash, randomUUID } from 'node:crypto';
import { EnvironmentService } from '../environment/environment.service';
import { SANDBOX_API_PATH } from './sandbox.constants';
// In-RAM, process-local blob store. No disk, no DB. Ephemeral by design: a
// restart empties it. A blob is addressed by an unguessable randomUUID() which
// IS the read capability — there are NO tokens. Each blob is immutable (its id
// never maps to changing content), so its sha256 is a perfect strong ETag.
export interface SandboxEntry {
buf: Buffer;
mime: string;
sha256: string;
expiresAt: number;
}
export interface SandboxPutResult {
id: string;
sha256: string;
size: number;
}
@Injectable()
export class SandboxStore implements OnModuleDestroy {
private readonly logger = new Logger(SandboxStore.name);
// Map preserves insertion order, so the first key is the oldest entry — used
// for FIFO eviction when the total-bytes RAM guard is exceeded.
private readonly map = new Map<string, SandboxEntry>();
private totalBytes = 0;
// Background sweep clears expired entries so never-fetched blobs do not linger
// until the next get(). unref()'d so it never holds the event loop open;
// cleared on module destroy. Mirrors the sweepTimer pattern in
// integrations/mcp/mcp.service.ts and packages/mcp/src/http.ts.
private readonly sweepIntervalMs = 60_000;
private readonly sweepTimer: NodeJS.Timeout;
constructor(private readonly environmentService: EnvironmentService) {
this.sweepTimer = setInterval(() => {
try {
this.sweep();
} catch (err) {
this.logger.error('Sandbox sweep failed', err as Error);
}
}, this.sweepIntervalMs);
this.sweepTimer.unref?.();
}
onModuleDestroy(): void {
clearInterval(this.sweepTimer);
}
/**
* Store a blob and return its read capability id + integrity metadata. The
* per-blob cap is chosen by mime (images get the larger image cap), and the
* total-store RAM guard evicts oldest entries to make room. Throws a clear
* error when a single blob cannot fit even after eviction. Blob bodies are
* never logged.
*/
put(buf: Buffer, mime: string): SandboxPutResult {
const perBlobCap = mime.startsWith('image/')
? this.environmentService.getSandboxMaxImageBytes()
: this.environmentService.getSandboxMaxBytes();
if (buf.length > perBlobCap) {
throw new Error(
`Sandbox blob of ${buf.length} bytes exceeds the ${perBlobCap}-byte per-blob cap`,
);
}
const maxTotal = this.environmentService.getSandboxMaxTotalBytes();
if (buf.length > maxTotal) {
throw new Error(
`Sandbox blob of ${buf.length} bytes exceeds the total store cap of ${maxTotal} bytes`,
);
}
// Drop expired entries first, then evict oldest until the new blob fits.
this.sweep();
while (this.totalBytes + buf.length > maxTotal && this.map.size > 0) {
const oldest = this.map.keys().next().value as string;
this.evict(oldest);
}
const id = randomUUID();
const sha256 = createHash('sha256').update(buf).digest('hex');
const expiresAt = Date.now() + this.environmentService.getSandboxTtlMs();
this.map.set(id, { buf, mime, sha256, expiresAt });
this.totalBytes += buf.length;
return { id, sha256, size: buf.length };
}
/**
* Store a blob and return its anonymous read URL plus integrity metadata.
* Owns the single sandbox-URL composition (`${publicBase}${SANDBOX_API_PATH}/
* <id>`) so callers never hand-build the route; the raw put() stays public for
* tests/low-level callers. sha256 is also the blob's strong ETag.
*/
putAndLink(
buf: Buffer,
mime: string,
): { uri: string; sha256: string; size: number } {
const stored = this.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return {
uri: `${base}${SANDBOX_API_PATH}/${stored.id}`,
sha256: stored.sha256,
size: stored.size,
};
}
/**
* Adapter to the package's blob-sandbox sink contract `{ put, has, evict }`.
* The sink speaks anonymous `uri`s while the store is keyed by `id`, so this is
* the ONE place that maps a sandbox uri back to its id (the last path segment).
* Both wiring sites (embedded MCP + in-app agent tools) use this so the uri↔id
* mapping and URL composition live next to putAndLink, not copy-pasted.
*/
asSink(): {
put: (buf: Buffer, mime: string) => { uri: string; sha256: string; size: number };
has: (uri: string) => boolean;
evict: (uri: string) => void;
} {
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
return {
put: (buf, mime) => this.putAndLink(buf, mime),
has: (uri) => this.has(idOf(uri)),
evict: (uri) => this.remove(idOf(uri)),
};
}
/** True if the blob is still live (not evicted/expired). */
has(id: string): boolean {
return this.get(id) !== undefined;
}
/** Drop a blob by id (public wrapper over the private FIFO evict). */
remove(id: string): void {
this.evict(id);
}
/** Returns the entry, or undefined if missing OR expired (lazy expiry). */
get(id: string): SandboxEntry | undefined {
const entry = this.map.get(id);
if (!entry) return undefined;
if (entry.expiresAt <= Date.now()) {
this.evict(id);
return undefined;
}
return entry;
}
/** Current number of live entries (test/diagnostic helper). */
get size(): number {
return this.map.size;
}
/** Current total bytes held (test/diagnostic helper). */
get bytes(): number {
return this.totalBytes;
}
private evict(id: string): void {
const entry = this.map.get(id);
if (entry) {
this.totalBytes -= entry.buf.length;
this.map.delete(id);
}
}
private sweep(): void {
const now = Date.now();
for (const [id, entry] of this.map) {
if (entry.expiresAt <= now) {
this.evict(id);
}
}
}
}
+5
View File
@@ -13,6 +13,7 @@ import fastifyCookie from '@fastify/cookie';
import fastifyIp from 'fastify-ip';
import { InternalLogFilter } from './common/logger/internal-log-filter';
import { EnvironmentService } from './integrations/environment/environment.service';
import { SANDBOX_API_PATH } from './integrations/sandbox/sandbox.constants';
import { resolveFrameHeader } from './common/helpers';
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
import { GitHttpService } from './integrations/git-sync/http/git-http.service';
@@ -144,6 +145,10 @@ async function bootstrap() {
'/api/workspace/create',
'/api/workspace/joined',
'/api/workspace/find-by-email',
// Anonymous in-RAM blob sandbox: a remote consumer fetches blobs by an
// unguessable UUID without any workspace host context, so the
// workspace-resolution gate must not apply.
SANDBOX_API_PATH,
];
if (