Merge remote-tracking branch 'gitea/develop' into HEAD
# Conflicts: # apps/server/src/app.module.ts # apps/server/src/integrations/environment/environment.service.spec.ts # apps/server/src/integrations/environment/environment.service.ts # apps/server/src/integrations/environment/environment.validation.ts # packages/mcp/build/client.js # packages/mcp/build/index.js # packages/mcp/build/tool-specs.js
This commit is contained in:
@@ -29,6 +29,7 @@ import { NoopAuditModule } from './integrations/audit/audit.module';
|
||||
import { ThrottleModule } from './integrations/throttle/throttle.module';
|
||||
import { McpModule } from './integrations/mcp/mcp.module';
|
||||
import { GitSyncModule } from './integrations/git-sync/git-sync.module';
|
||||
import { SandboxModule } from './integrations/sandbox/sandbox.module';
|
||||
import { AiModule } from './integrations/ai/ai.module';
|
||||
import { AiChatModule } from './core/ai-chat/ai-chat.module';
|
||||
|
||||
@@ -91,6 +92,7 @@ try {
|
||||
ThrottleModule,
|
||||
McpModule,
|
||||
GitSyncModule,
|
||||
SandboxModule,
|
||||
AiModule,
|
||||
AiChatModule,
|
||||
...enterpriseModules,
|
||||
|
||||
@@ -187,7 +187,7 @@ export class AiAgentRolesService {
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Catalog (admin-only). The catalog is curated, untrusted JSON fetched +
|
||||
// Catalog (admin-only). The catalog is curated, untrusted YAML fetched +
|
||||
// validated by AiAgentRolesCatalogProvider; this layer resolves localized
|
||||
// text and reconciles a bundle against the workspace's existing roles.
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
+172
-20
@@ -1,12 +1,23 @@
|
||||
import { BadGatewayException, BadRequestException } from '@nestjs/common';
|
||||
import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { parse as parseYaml, stringify as stringifyYaml } from 'yaml';
|
||||
import {
|
||||
AiAgentRolesCatalogProvider,
|
||||
isCatalogBundleFile,
|
||||
isCatalogIndex,
|
||||
isCatalogRole,
|
||||
} from './ai-agent-roles-catalog.provider';
|
||||
|
||||
/**
|
||||
* Provider tests against a mocked remote source (no network). They cover the
|
||||
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection,
|
||||
* rejection of non-http(s) sources (local sources are gone), and — most
|
||||
* importantly — the `^[a-z0-9-]+$` path-traversal guard that runs BEFORE any
|
||||
* path/URL is built.
|
||||
* happy read path (fetchIndex / fetchBundle) over the YAML catalog format, the
|
||||
* block-scalar `instructions` round-trip, the malformed-shape rejection, the
|
||||
* malformed-YAML rejection, rejection of non-http(s) sources (local sources are
|
||||
* gone), and — most importantly — the `^[a-z0-9-]+$` path-traversal guard that
|
||||
* runs BEFORE any path/URL is built. Fixtures are serialized with the same
|
||||
* `yaml` library the provider parses with (`stringifyYaml`), so the tests
|
||||
* exercise real YAML, not the JSON subset.
|
||||
*/
|
||||
describe('AiAgentRolesCatalogProvider', () => {
|
||||
function makeProvider(source: string) {
|
||||
@@ -71,7 +82,7 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
}
|
||||
|
||||
it('fetchBundle remote happy path => parses + validates', async () => {
|
||||
const json = JSON.stringify({
|
||||
const yaml = stringifyYaml({
|
||||
schemaVersion: 1,
|
||||
language: 'en',
|
||||
roles: [
|
||||
@@ -82,7 +93,7 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
},
|
||||
],
|
||||
});
|
||||
const body = streamOf([new TextEncoder().encode(json)]);
|
||||
const body = streamOf([new TextEncoder().encode(yaml)]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
@@ -92,12 +103,12 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
});
|
||||
|
||||
it('fetchBundle remote malformed (role missing instructions) => BadGateway', async () => {
|
||||
const json = JSON.stringify({
|
||||
const yaml = stringifyYaml({
|
||||
schemaVersion: 1,
|
||||
language: 'fr',
|
||||
roles: [{ slug: 'researcher', name: 'Chercheur' }],
|
||||
});
|
||||
const body = streamOf([new TextEncoder().encode(json)]);
|
||||
const body = streamOf([new TextEncoder().encode(yaml)]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
@@ -153,8 +164,9 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
);
|
||||
global.fetch = fetchMock as never;
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
// Body shape is irrelevant; an empty stream parses to invalid JSON and
|
||||
// throws, but the fetch call (with its init) still happened.
|
||||
// Body shape is irrelevant; an empty stream parses to an empty YAML doc
|
||||
// (null), fails the shape guard and throws, but the fetch call (with its
|
||||
// init) still happened.
|
||||
await expect(provider.fetchIndex()).rejects.toBeDefined();
|
||||
expect(fetchMock).toHaveBeenCalledWith(
|
||||
expect.any(String),
|
||||
@@ -190,7 +202,7 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
});
|
||||
|
||||
it('small streamed body parses normally (cap not hit)', async () => {
|
||||
const json = JSON.stringify({
|
||||
const yaml = stringifyYaml({
|
||||
schemaVersion: 1,
|
||||
bundles: [
|
||||
{
|
||||
@@ -201,7 +213,7 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
},
|
||||
],
|
||||
});
|
||||
const body = streamOf([new TextEncoder().encode(json)]);
|
||||
const body = streamOf([new TextEncoder().encode(yaml)]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
@@ -227,7 +239,7 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
});
|
||||
|
||||
it('null body (no readable stream) => response.text() fallback parses', async () => {
|
||||
const json = JSON.stringify({
|
||||
const yaml = stringifyYaml({
|
||||
schemaVersion: 1,
|
||||
bundles: [
|
||||
{
|
||||
@@ -240,7 +252,7 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
});
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body: null, text: json })) as never;
|
||||
.mockResolvedValue(mockResponse({ body: null, text: yaml })) as never;
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
const index = await provider.fetchIndex();
|
||||
expect(index.bundles[0].id).toBe('general');
|
||||
@@ -259,8 +271,12 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('invalid JSON body => BadGateway (parse failure)', async () => {
|
||||
const body = streamOf([new TextEncoder().encode('{not valid json')]);
|
||||
it('invalid YAML body => BadGateway (parse failure)', async () => {
|
||||
// An unterminated flow mapping is not valid YAML, so YAML.parse throws and
|
||||
// the provider maps it to BadGateway (not a generic 500).
|
||||
const body = streamOf([
|
||||
new TextEncoder().encode('schemaVersion: {not: closed'),
|
||||
]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
@@ -270,11 +286,28 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('malformed index.json (valid JSON, wrong shape) => BadGateway', async () => {
|
||||
// Parses as JSON but fails isCatalogIndex (schemaVersion not a number).
|
||||
it('YAML with a duplicate key (strict) => BadGateway (parse failure)', async () => {
|
||||
// strict:true rejects duplicate mapping keys rather than last-wins coercing
|
||||
// them — a defensive parse on untrusted input.
|
||||
const body = streamOf([
|
||||
new TextEncoder().encode(
|
||||
JSON.stringify({ schemaVersion: 'x', bundles: [] }),
|
||||
'schemaVersion: 1\nbundles: []\nschemaVersion: 2\n',
|
||||
),
|
||||
]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
|
||||
BadGatewayException,
|
||||
);
|
||||
});
|
||||
|
||||
it('malformed index.yaml (valid YAML, wrong shape) => BadGateway', async () => {
|
||||
// Parses as YAML but fails isCatalogIndex (schemaVersion not a number).
|
||||
const body = streamOf([
|
||||
new TextEncoder().encode(
|
||||
stringifyYaml({ schemaVersion: 'x', bundles: [] }),
|
||||
),
|
||||
]);
|
||||
global.fetch = jest
|
||||
@@ -283,6 +316,36 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
await expect(provider.fetchIndex()).rejects.toThrow(/malformed/i);
|
||||
});
|
||||
|
||||
it('block-scalar instructions round-trips to the exact multi-line string', async () => {
|
||||
// The whole point of the YAML migration: a long `instructions` prompt is
|
||||
// stored as a literal block scalar (|-) for line-by-line diffs, and must
|
||||
// resolve byte-for-byte to the original multi-line string.
|
||||
const instructions = [
|
||||
'Line one of the prompt.',
|
||||
'',
|
||||
' Indented bullet that must survive.',
|
||||
'Final line, no trailing newline.',
|
||||
].join('\n');
|
||||
const yaml = stringifyYaml(
|
||||
{
|
||||
schemaVersion: 1,
|
||||
language: 'en',
|
||||
roles: [{ slug: 'researcher', name: 'Researcher', instructions }],
|
||||
},
|
||||
{ lineWidth: 0 },
|
||||
);
|
||||
// Sanity: the fixture really uses a literal block scalar (|, optionally
|
||||
// with an indentation indicator), not a flow/quoted string.
|
||||
expect(yaml).toMatch(/instructions: \|/);
|
||||
const body = streamOf([new TextEncoder().encode(yaml)]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
const bundle = await provider.fetchBundle('research', 'en');
|
||||
expect(bundle.roles[0].instructions).toBe(instructions);
|
||||
});
|
||||
});
|
||||
|
||||
describe('path-traversal / SSRF guard (^[a-z0-9-]+$)', () => {
|
||||
@@ -304,4 +367,93 @@ describe('AiAgentRolesCatalogProvider', () => {
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pin the REAL shipped catalog files (not synthetic fixtures). The JSON->YAML
|
||||
// migration was a hand conversion, so the realistic failure is a hand-edit
|
||||
// error in one of the 5 content YAML files (the index + the four per-bundle/
|
||||
// lang files: index.yaml plus bundles/{editorial,research}/{en,ru}.yaml) — a
|
||||
// quote/colon in a description, a broken
|
||||
// emoji/arrow, a block-scalar indent slip that silently changes or drops
|
||||
// instructions). Nothing else in CI parses these files — `scripts/check.mjs`
|
||||
// is not wired into any turbo/husky/CI step — so this is the only automated
|
||||
// guard over the shipped content. We read them straight off disk, parse with
|
||||
// the SAME options the provider uses (strict + maxAliasCount, see parseYaml in
|
||||
// the provider), and run them through the provider's own type guards. A future
|
||||
// edit that breaks a real file fails here.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('real shipped catalog files (the YAML migration must not break them)', () => {
|
||||
// Spec lives at apps/server/src/core/ai-chat/roles/catalog/; the catalog
|
||||
// ships at the repo root (agent-roles-catalog/) — seven levels up.
|
||||
const CATALOG_DIR = join(
|
||||
__dirname,
|
||||
'../../../../../../../agent-roles-catalog',
|
||||
);
|
||||
// Match the provider's parseYaml exactly (untrusted-input parse options).
|
||||
const PARSE_OPTS = { strict: true, maxAliasCount: 100 } as const;
|
||||
|
||||
function readCatalogYaml(rel: string): unknown {
|
||||
return parseYaml(readFileSync(join(CATALOG_DIR, rel), 'utf8'), PARSE_OPTS);
|
||||
}
|
||||
|
||||
// Load + validate the real index lazily (only when a test runs), so a broken
|
||||
// real file fails ONLY these catalog tests — not collection of the entire
|
||||
// spec, which also holds the unrelated mocked-remote provider tests above.
|
||||
function loadRealIndex() {
|
||||
const parsed = readCatalogYaml('index.yaml');
|
||||
if (!isCatalogIndex(parsed)) {
|
||||
throw new Error('Real index.yaml is not a valid catalog index');
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
it('index.yaml parses + validates with the provider guard', () => {
|
||||
expect(isCatalogIndex(readCatalogYaml('index.yaml'))).toBe(true);
|
||||
});
|
||||
|
||||
it('editorial bundle still ships the fact-checker role', () => {
|
||||
const editorial = loadRealIndex().bundles.find((b) => b.id === 'editorial');
|
||||
expect(editorial).toBeDefined();
|
||||
expect(editorial?.roles.map((r) => r.slug)).toContain('fact-checker');
|
||||
});
|
||||
|
||||
// Driven by the real index (read inside the test, so it's lazy): every
|
||||
// declared bundle + language file must parse, validate, and be in EXACT slug
|
||||
// correspondence with the index — every declared role present AND no
|
||||
// undeclared extras — mirroring scripts/check.mjs, which requires both
|
||||
// directions. A bundle or language added later is covered automatically.
|
||||
it('every declared bundle/language file is valid and in exact slug correspondence', () => {
|
||||
const index = loadRealIndex();
|
||||
// Guard against an empty index silently passing the loops below.
|
||||
expect(index.bundles.length).toBeGreaterThan(0);
|
||||
for (const bundle of index.bundles) {
|
||||
const declaredSlugs = bundle.roles.map((r) => r.slug);
|
||||
expect(bundle.languages.length).toBeGreaterThan(0);
|
||||
for (const lang of bundle.languages) {
|
||||
const rel = `bundles/${bundle.id}/${lang}.yaml`;
|
||||
const file = readCatalogYaml(rel);
|
||||
expect(isCatalogBundleFile(file)).toBe(true);
|
||||
// Narrow for TS and access fields safely.
|
||||
if (!isCatalogBundleFile(file)) continue;
|
||||
expect(file.language).toBe(lang);
|
||||
const fileSlugs = file.roles.map((r) => r.slug);
|
||||
// Existing direction: every declared role is present in the file.
|
||||
for (const slug of declaredSlugs) {
|
||||
expect(fileSlugs).toContain(slug);
|
||||
}
|
||||
// Symmetric direction: the file carries NO undeclared/extra roles, so
|
||||
// file slugs and declared slugs must be the SAME set (exact match).
|
||||
// Catches a hand-edit that copies a stray role into a bundle file.
|
||||
expect([...fileSlugs].sort()).toEqual([...declaredSlugs].sort());
|
||||
expect(file.roles.length).toBeGreaterThan(0);
|
||||
for (const role of file.roles) {
|
||||
expect(isCatalogRole(role)).toBe(true);
|
||||
expect(typeof role.instructions).toBe('string');
|
||||
expect(role.instructions.trim().length).toBeGreaterThan(0);
|
||||
expect(role.name.trim().length).toBeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
Injectable,
|
||||
Logger,
|
||||
} from '@nestjs/common';
|
||||
import { parse as parseYamlDoc } from 'yaml';
|
||||
import { EnvironmentService } from '../../../../integrations/environment/environment.service';
|
||||
import {
|
||||
CatalogBundleFile,
|
||||
@@ -28,9 +29,11 @@ const MAX_BYTES = 1_000_000;
|
||||
* base URL — REMOTE only; local-filesystem sources are no longer supported. The
|
||||
* value is baked into the Docker image at build time (set per-branch in CI).
|
||||
*
|
||||
* The catalog is UNTRUSTED input: every file is JSON-parsed and run through a
|
||||
* hand-written type guard before any field is exposed, and every dynamic path
|
||||
* segment is validated against SEGMENT_RE up front (path-traversal + SSRF).
|
||||
* The catalog is UNTRUSTED input: every file is YAML-parsed with a SAFE schema
|
||||
* (standard JSON-compatible tags only — no custom `!!` tags / no code execution)
|
||||
* and run through a hand-written type guard before any field is exposed, and
|
||||
* every dynamic path segment is validated against SEGMENT_RE up front
|
||||
* (path-traversal + SSRF).
|
||||
*/
|
||||
@Injectable()
|
||||
export class AiAgentRolesCatalogProvider {
|
||||
@@ -38,19 +41,19 @@ export class AiAgentRolesCatalogProvider {
|
||||
|
||||
constructor(private readonly environmentService: EnvironmentService) {}
|
||||
|
||||
/** Read + validate the top-level index (`index.json`). */
|
||||
/** Read + validate the top-level index (`index.yaml`). */
|
||||
async fetchIndex(): Promise<CatalogIndex> {
|
||||
const raw = await this.readRelative('index.json');
|
||||
const parsed = this.parseJson(raw, 'index.json');
|
||||
const raw = await this.readRelative('index.yaml');
|
||||
const parsed = this.parseYaml(raw, 'index.yaml');
|
||||
if (!isCatalogIndex(parsed)) {
|
||||
throw new BadGatewayException(
|
||||
'Agent roles catalog index is malformed (index.json)',
|
||||
'Agent roles catalog index is malformed (index.yaml)',
|
||||
);
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/** Read + validate one language file (`bundles/<bundleId>/<language>.json`). */
|
||||
/** Read + validate one language file (`bundles/<bundleId>/<language>.yaml`). */
|
||||
async fetchBundle(
|
||||
bundleId: string,
|
||||
language: string,
|
||||
@@ -58,9 +61,9 @@ export class AiAgentRolesCatalogProvider {
|
||||
// SECURITY: validate BEFORE building any path/URL (path-traversal + SSRF).
|
||||
this.assertSegment(bundleId, 'bundleId');
|
||||
this.assertSegment(language, 'language');
|
||||
const rel = `bundles/${bundleId}/${language}.json`;
|
||||
const rel = `bundles/${bundleId}/${language}.yaml`;
|
||||
const raw = await this.readRelative(rel);
|
||||
const parsed = this.parseJson(raw, rel);
|
||||
const parsed = this.parseYaml(raw, rel);
|
||||
if (!isCatalogBundleFile(parsed)) {
|
||||
throw new BadGatewayException(
|
||||
`Agent roles catalog bundle is malformed (${rel})`,
|
||||
@@ -76,15 +79,29 @@ export class AiAgentRolesCatalogProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/** JSON.parse with a clear BadGateway on malformed content. */
|
||||
private parseJson(raw: string, rel: string): unknown {
|
||||
/**
|
||||
* Safe YAML parse with a clear BadGateway on malformed content. The catalog is
|
||||
* untrusted, so we lean on the `yaml` library's default `core` schema, which
|
||||
* only produces JSON-compatible values (objects/arrays/strings/numbers/
|
||||
* booleans/null) and NEVER constructs arbitrary types or runs code — there is
|
||||
* no `!!js`-style tag handling. `strict: true` rejects duplicate keys instead
|
||||
* of silently coercing them. (Note: in yaml@2.8.x an unknown custom tag does
|
||||
* NOT throw even under `strict` — the parser logs a warning and resolves the
|
||||
* node to a plain scalar; the catalog stays safe because the default schema
|
||||
* never builds arbitrary types from a tag and our hand-written type guards
|
||||
* reject any value of the wrong shape.) The alias-expansion guard
|
||||
* (`maxAliasCount`) bounds billion-laughs blow-ups (the 1 MB streaming
|
||||
* cap already limits the input itself). JSON is a YAML subset, so a leftover
|
||||
* `.json`-style body still parses here too.
|
||||
*/
|
||||
private parseYaml(raw: string, rel: string): unknown {
|
||||
try {
|
||||
return JSON.parse(raw);
|
||||
return parseYamlDoc(raw, { strict: true, maxAliasCount: 100 });
|
||||
} catch (err) {
|
||||
const reason = shortError(err);
|
||||
this.logger.error(`Agent roles catalog JSON parse failed (${rel}): ${reason}`);
|
||||
this.logger.error(`Agent roles catalog YAML parse failed (${rel}): ${reason}`);
|
||||
throw new BadGatewayException(
|
||||
`Agent roles catalog file is not valid JSON (${rel}): ${reason}`,
|
||||
`Agent roles catalog file is not valid YAML (${rel}): ${reason}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
/**
|
||||
* Catalog wire shapes. The catalog is curated, untrusted JSON (a GitHub repo or
|
||||
* Catalog wire shapes. The catalog is curated, untrusted YAML (a GitHub repo or
|
||||
* a local folder), so every shape is validated by a hand-written type guard in
|
||||
* the provider before any field is used — no zod / new deps on the server.
|
||||
* the provider before any field is used — no zod on the server (YAML is parsed
|
||||
* with the `yaml` library's safe, JSON-compatible schema).
|
||||
*
|
||||
* Localized fields (`name` / `description` at the bundle level) are
|
||||
* `Record<language, string>` so one bundle serves many UI languages; per-role
|
||||
@@ -22,7 +23,7 @@ export interface CatalogRole {
|
||||
modelConfig?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
/** A single language file: `bundles/<id>/<language>.json`. */
|
||||
/** A single language file: `bundles/<id>/<language>.yaml`. */
|
||||
export interface CatalogBundleFile {
|
||||
schemaVersion: number;
|
||||
language: string;
|
||||
@@ -40,7 +41,7 @@ export interface CatalogBundleMeta {
|
||||
roles: { slug: string; version: number }[];
|
||||
}
|
||||
|
||||
/** Top-level catalog index: `index.json`. */
|
||||
/** Top-level catalog index: `index.yaml`. */
|
||||
export interface CatalogIndex {
|
||||
schemaVersion: number;
|
||||
bundles: CatalogBundleMeta[];
|
||||
|
||||
@@ -63,6 +63,12 @@ describe('AiChatToolsService deletePage guardrail (H4)', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
|
||||
// even though these tests never execute it — return a no-op sink so the
|
||||
// tool wiring in forUser() succeeds.
|
||||
{
|
||||
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
|
||||
} as never,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -175,6 +181,12 @@ describe('AiChatToolsService expanded toolset guardrails', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
|
||||
// even though these tests never execute it — return a no-op sink so the
|
||||
// tool wiring in forUser() succeeds.
|
||||
{
|
||||
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
|
||||
} as never,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -290,6 +302,12 @@ describe('AiChatToolsService node-arg JSON-string coercion', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
|
||||
// even though these tests never execute it — return a no-op sink so the
|
||||
// tool wiring in forUser() succeeds.
|
||||
{
|
||||
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
|
||||
} as never,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -440,6 +458,12 @@ describe('AiChatToolsService model-friendly input validation (#190)', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// sandboxStore: forUser() eagerly calls asSink() to wire the stash tool,
|
||||
// even though these tests never execute it — return a no-op sink so the
|
||||
// tool wiring in forUser() succeeds.
|
||||
{
|
||||
asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
|
||||
} as never,
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import {
|
||||
import { resolveCurrentPageResult } from './current-page.util';
|
||||
import { parseNodeArg } from './parse-node-arg';
|
||||
import { modelFriendlyInput } from './model-friendly-input';
|
||||
import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
|
||||
|
||||
/**
|
||||
* Per-user, per-request adapter that exposes Docmost READ operations to the
|
||||
@@ -41,6 +42,8 @@ export class AiChatToolsService {
|
||||
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
|
||||
private readonly spaceMemberRepo: SpaceMemberRepo,
|
||||
private readonly pagePermissionRepo: PagePermissionRepo,
|
||||
// Shared singleton in-RAM blob store backing the stash tool.
|
||||
private readonly sandboxStore: SandboxStore,
|
||||
) {}
|
||||
|
||||
async forUser(
|
||||
@@ -86,11 +89,17 @@ export class AiChatToolsService {
|
||||
aiChatId,
|
||||
});
|
||||
|
||||
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
|
||||
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
|
||||
// package needs to keep its mirror counts honest under FIFO eviction (the
|
||||
// package never touches env or the store). asSink() centralizes the uri↔id
|
||||
// mapping next to putAndLink, shared with the embedded-MCP wiring site.
|
||||
const { DocmostClient, sharedToolSpecs } = await loadDocmostMcp();
|
||||
const client: DocmostClientLike = new DocmostClient({
|
||||
apiUrl,
|
||||
getToken,
|
||||
getCollabToken,
|
||||
sandbox: this.sandboxStore.asSink(),
|
||||
});
|
||||
|
||||
// Build an ai-SDK tool from a shared, zod-agnostic spec. The spec owns the
|
||||
@@ -625,6 +634,14 @@ export class AiChatToolsService {
|
||||
async ({ pageId, edits }) => await client.editPageText(pageId, edits),
|
||||
),
|
||||
|
||||
// Returns ONLY the short link object — never the document body — so a
|
||||
// large page can be handed to an external consumer without bloating
|
||||
// context.
|
||||
stashPage: sharedTool(
|
||||
sharedToolSpecs.stashPage,
|
||||
async ({ pageId }) => await client.stashPage(pageId),
|
||||
),
|
||||
|
||||
patchNode: tool({
|
||||
description:
|
||||
'Replace a single content block (by id) with a new ProseMirror ' +
|
||||
|
||||
@@ -155,6 +155,14 @@ export interface DocmostClientLike {
|
||||
commentId: string,
|
||||
resolved: boolean,
|
||||
): Promise<Record<string, unknown>>;
|
||||
// Serialize a page + mirror its internal images into the blob sandbox; returns
|
||||
// ONLY a short anonymous URL (the body never enters the model context).
|
||||
stashPage(pageId: string): Promise<{
|
||||
uri: string;
|
||||
sha256: string;
|
||||
size: number;
|
||||
images: { mirrored: number; failed: number };
|
||||
}>;
|
||||
}
|
||||
|
||||
export type DocmostClientConfig = {
|
||||
@@ -162,6 +170,18 @@ export type DocmostClientConfig = {
|
||||
getToken: () => Promise<string>;
|
||||
// Provenance collab-token provider for content mutations (signed agent claim).
|
||||
getCollabToken?: () => Promise<string>;
|
||||
// Optional blob-sandbox sink for the stash tool. `put` stores a blob in the
|
||||
// host's in-RAM SandboxStore and returns the anonymous read URL + integrity.
|
||||
// The optional `has`/`evict` probes let stashPage keep its mirror counts
|
||||
// honest under the store's FIFO eviction (mirror of the package's sink type).
|
||||
sandbox?: {
|
||||
put: (
|
||||
buf: Buffer,
|
||||
mime: string,
|
||||
) => { uri: string; sha256: string; size: number };
|
||||
has?: (uri: string) => boolean;
|
||||
evict?: (uri: string) => void;
|
||||
};
|
||||
};
|
||||
|
||||
export interface DocmostClientCtor {
|
||||
|
||||
@@ -172,4 +172,148 @@ describe('EnvironmentService', () => {
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSandboxTtlMs', () => {
|
||||
// ConfigService stub: get(key, def) returns the configured value for the key
|
||||
// (falling back to def), matching the @nestjs/config contract the service
|
||||
// calls with (key, default).
|
||||
const build = (sandboxTtl?: string) =>
|
||||
new EnvironmentService({
|
||||
get: (key: string, def?: string) =>
|
||||
key === 'SANDBOX_TTL_MS' ? (sandboxTtl ?? def) : def,
|
||||
} as any);
|
||||
|
||||
it.each(['0', '-5', 'abc'])(
|
||||
'falls back to the 3600000 default for invalid value %s',
|
||||
(value) => {
|
||||
expect(build(value).getSandboxTtlMs()).toBe(3_600_000);
|
||||
},
|
||||
);
|
||||
|
||||
it('returns the parsed value for a valid positive integer', () => {
|
||||
expect(build('120000').getSandboxTtlMs()).toBe(120_000);
|
||||
});
|
||||
|
||||
it('uses the 3600000 default when SANDBOX_TTL_MS is unset', () => {
|
||||
expect(build(undefined).getSandboxTtlMs()).toBe(3_600_000);
|
||||
});
|
||||
});
|
||||
|
||||
// The three byte caps share the same getPositiveIntEnv() helper as the TTL,
|
||||
// so a non-integer / non-positive value ('0'/'-5'/'abc') falls back to the
|
||||
// documented default and a valid positive integer is returned parsed. Note
|
||||
// parseInt truncates '1.5' -> 1 (a valid positive integer), so that value is
|
||||
// accepted, not rejected — same as the pre-existing TTL getter.
|
||||
describe.each([
|
||||
{
|
||||
name: 'getSandboxMaxBytes',
|
||||
key: 'SANDBOX_MAX_BYTES',
|
||||
def: 8_388_608,
|
||||
getter: (s: EnvironmentService) => s.getSandboxMaxBytes(),
|
||||
},
|
||||
{
|
||||
name: 'getSandboxMaxImageBytes',
|
||||
key: 'SANDBOX_MAX_IMAGE_BYTES',
|
||||
def: 20_971_520,
|
||||
getter: (s: EnvironmentService) => s.getSandboxMaxImageBytes(),
|
||||
},
|
||||
{
|
||||
name: 'getSandboxMaxTotalBytes',
|
||||
key: 'SANDBOX_MAX_TOTAL_BYTES',
|
||||
def: 134_217_728,
|
||||
getter: (s: EnvironmentService) => s.getSandboxMaxTotalBytes(),
|
||||
},
|
||||
])('$name', ({ key, def, getter }) => {
|
||||
// ConfigService stub: get(k, d) returns the configured value for THIS cap's
|
||||
// key (falling back to d), and the default for every other key.
|
||||
const build = (value?: string) =>
|
||||
new EnvironmentService({
|
||||
get: (k: string, d?: string) =>
|
||||
k === key ? (value ?? d) : d,
|
||||
} as any);
|
||||
|
||||
it.each(['0', '-5', 'abc'])(
|
||||
`falls back to the ${def} default for invalid value %s`,
|
||||
(value) => {
|
||||
expect(getter(build(value))).toBe(def);
|
||||
},
|
||||
);
|
||||
|
||||
it('returns the parsed value for a valid positive integer', () => {
|
||||
expect(getter(build('4096'))).toBe(4096);
|
||||
});
|
||||
|
||||
it('truncates a non-integer like "1.5" to 1 via parseInt (not rejected)', () => {
|
||||
expect(getter(build('1.5'))).toBe(1);
|
||||
});
|
||||
|
||||
it(`uses the ${def} default when the env is unset`, () => {
|
||||
expect(getter(build(undefined))).toBe(def);
|
||||
});
|
||||
});
|
||||
|
||||
// getPositiveIntEnv keeps a one-shot `invalidPositiveIntWarned` set so a bad
|
||||
// value is logged ONCE per key (not on every getter call, which the sandbox
|
||||
// hits per-put). These tests pin that dedup so a regression to per-call logging
|
||||
// would fail loudly.
|
||||
describe('invalid-value warn dedup', () => {
|
||||
it('warns only once per key across repeated getter calls', () => {
|
||||
const service = new EnvironmentService({
|
||||
get: (k: string, d?: string) =>
|
||||
k === 'SANDBOX_MAX_TOTAL_BYTES' ? '-5' : d,
|
||||
} as any);
|
||||
const warnSpy = jest
|
||||
.spyOn((service as any).logger, 'warn')
|
||||
.mockImplementation(() => undefined);
|
||||
|
||||
service.getSandboxMaxTotalBytes();
|
||||
service.getSandboxMaxTotalBytes();
|
||||
|
||||
expect(warnSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('warns independently per key (dedup is per-key, not global)', () => {
|
||||
// Two DIFFERENT SANDBOX_* keys are both invalid -> each warns once, so two
|
||||
// warns total. This proves the dedup set is keyed, not a single global flag.
|
||||
const service = new EnvironmentService({
|
||||
get: (k: string, d?: string) =>
|
||||
k === 'SANDBOX_MAX_BYTES' || k === 'SANDBOX_MAX_TOTAL_BYTES'
|
||||
? '-5'
|
||||
: d,
|
||||
} as any);
|
||||
const warnSpy = jest
|
||||
.spyOn((service as any).logger, 'warn')
|
||||
.mockImplementation(() => undefined);
|
||||
|
||||
service.getSandboxMaxBytes();
|
||||
service.getSandboxMaxTotalBytes();
|
||||
|
||||
expect(warnSpy).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSandboxPublicUrl', () => {
|
||||
// Stub that resolves BOTH keys the public-url logic consults.
|
||||
const build = (vals: { sandboxUrl?: string; appUrl?: string }) =>
|
||||
new EnvironmentService({
|
||||
get: (key: string, def?: string) =>
|
||||
key === 'SANDBOX_PUBLIC_URL'
|
||||
? (vals.sandboxUrl ?? def)
|
||||
: key === 'APP_URL'
|
||||
? (vals.appUrl ?? def)
|
||||
: def,
|
||||
} as any);
|
||||
|
||||
it('uses SANDBOX_PUBLIC_URL and trims a trailing slash', () => {
|
||||
expect(
|
||||
build({ sandboxUrl: 'https://docs.example.com/' }).getSandboxPublicUrl(),
|
||||
).toBe('https://docs.example.com');
|
||||
});
|
||||
|
||||
it('falls back to APP_URL (origin) when SANDBOX_PUBLIC_URL is unset', () => {
|
||||
expect(
|
||||
build({ appUrl: 'https://app.example.com' }).getSandboxPublicUrl(),
|
||||
).toBe('https://app.example.com');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import ms, { StringValue } from 'ms';
|
||||
|
||||
@Injectable()
|
||||
export class EnvironmentService {
|
||||
private readonly logger = new Logger(EnvironmentService.name);
|
||||
// Env keys already warned about for an invalid value (one-shot per key, so a
|
||||
// bad SANDBOX_* value is not logged on every blob put). Mirrors the original
|
||||
// sandboxTtlWarned guard, generalized across the TTL + the three byte caps.
|
||||
private readonly invalidPositiveIntWarned = new Set<string>();
|
||||
|
||||
constructor(private configService: ConfigService) {}
|
||||
|
||||
getNodeEnv(): string {
|
||||
@@ -425,4 +431,63 @@ export class EnvironmentService {
|
||||
getGitSyncServiceUserId(): string | undefined {
|
||||
return this.configService.get<string>('GIT_SYNC_SERVICE_USER_ID');
|
||||
}
|
||||
|
||||
// --- Blob sandbox (in-RAM ephemeral blob transfer; see SandboxModule) ---
|
||||
|
||||
// Base URL the sandbox `uri` is built from. It MUST be reachable over the
|
||||
// network by the external consumer that fetches the blobs (not a loopback
|
||||
// address if that consumer is remote). Falls back to APP_URL when unset so a
|
||||
// single-host deployment works out of the box; set it explicitly when the
|
||||
// consumer lives on another host.
|
||||
getSandboxPublicUrl(): string {
|
||||
const raw =
|
||||
this.configService.get<string>('SANDBOX_PUBLIC_URL') || this.getAppUrl();
|
||||
// Drop any trailing slash so `${base}/api/sb/${id}` never doubles up.
|
||||
return raw.replace(/\/+$/, '');
|
||||
}
|
||||
|
||||
// Parse a REQUIRED positive-integer env (TTL in ms or a byte cap). A
|
||||
// non-integer or <= 0 value would break the sandbox silently (instant expiry,
|
||||
// or every put failing against a 0-byte cap), so warn once and fall back to
|
||||
// the default instead. Blob bodies are never logged.
|
||||
private getPositiveIntEnv(key: string, def: number): number {
|
||||
const parsed = parseInt(
|
||||
this.configService.get<string>(key, String(def)),
|
||||
10,
|
||||
);
|
||||
if (!Number.isInteger(parsed) || parsed <= 0) {
|
||||
if (!this.invalidPositiveIntWarned.has(key)) {
|
||||
this.invalidPositiveIntWarned.add(key);
|
||||
this.logger.warn(
|
||||
`Invalid ${key} (must be a positive integer); falling back to the ${def} default`,
|
||||
);
|
||||
}
|
||||
return def;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// Blob time-to-live. Default 1h. The unguessable UUID + this short TTL + TLS
|
||||
// are the whole capability model (no tokens). A non-positive or non-integer
|
||||
// value would make every blob expire instantly (silent 404s), so reject it and
|
||||
// fall back to the 1h default (warned about once to avoid per-put log spam).
|
||||
getSandboxTtlMs(): number {
|
||||
return this.getPositiveIntEnv('SANDBOX_TTL_MS', 3_600_000);
|
||||
}
|
||||
|
||||
// Per-blob cap for non-image blobs (the serialized document). Default 8 MiB.
|
||||
getSandboxMaxBytes(): number {
|
||||
return this.getPositiveIntEnv('SANDBOX_MAX_BYTES', 8_388_608);
|
||||
}
|
||||
|
||||
// Per-blob cap for mirrored image blobs. Default 20 MiB.
|
||||
getSandboxMaxImageBytes(): number {
|
||||
return this.getPositiveIntEnv('SANDBOX_MAX_IMAGE_BYTES', 20_971_520);
|
||||
}
|
||||
|
||||
// RAM guard: total bytes the whole store may hold. Default 128 MiB. On
|
||||
// overflow the store evicts oldest entries to make room.
|
||||
getSandboxMaxTotalBytes(): number {
|
||||
return this.getPositiveIntEnv('SANDBOX_MAX_TOTAL_BYTES', 134_217_728);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import {
|
||||
IsIn,
|
||||
IsNotEmpty,
|
||||
IsNotIn,
|
||||
IsNumberString,
|
||||
IsOptional,
|
||||
IsString,
|
||||
IsUrl,
|
||||
@@ -219,6 +220,35 @@ export class EnvironmentVariables {
|
||||
@IsNotEmpty()
|
||||
@IsString()
|
||||
GIT_SYNC_SERVICE_USER_ID: string;
|
||||
|
||||
// --- Blob sandbox (in-RAM ephemeral blob transfer; see SandboxModule) ---
|
||||
|
||||
@IsOptional()
|
||||
@ValidateIf((obj) => obj.SANDBOX_PUBLIC_URL != '' && obj.SANDBOX_PUBLIC_URL != null)
|
||||
@IsUrl(
|
||||
{ protocols: ['http', 'https'], require_tld: false },
|
||||
{
|
||||
message:
|
||||
'SANDBOX_PUBLIC_URL must be a valid http(s) URL reachable by the external blob consumer',
|
||||
},
|
||||
)
|
||||
SANDBOX_PUBLIC_URL: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsNumberString({}, { message: 'SANDBOX_TTL_MS must be an integer (milliseconds)' })
|
||||
SANDBOX_TTL_MS: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsNumberString({}, { message: 'SANDBOX_MAX_BYTES must be an integer (bytes)' })
|
||||
SANDBOX_MAX_BYTES: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsNumberString({}, { message: 'SANDBOX_MAX_IMAGE_BYTES must be an integer (bytes)' })
|
||||
SANDBOX_MAX_IMAGE_BYTES: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsNumberString({}, { message: 'SANDBOX_MAX_TOTAL_BYTES must be an integer (bytes)' })
|
||||
SANDBOX_MAX_TOTAL_BYTES: string;
|
||||
}
|
||||
|
||||
export function validate(config: Record<string, any>) {
|
||||
|
||||
@@ -131,10 +131,25 @@ export class FailedLoginLimiter {
|
||||
}
|
||||
|
||||
// The per-session DocmostMcpConfig shape understood by @docmost/mcp: either the
|
||||
// service-account credentials variant OR the per-user getToken variant.
|
||||
export type DocmostMcpConfig =
|
||||
// service-account credentials variant OR the per-user getToken variant. The
|
||||
// optional `sandbox` sink (blob store for the stash tool) is common to both and
|
||||
// injected by McpService after the auth decision.
|
||||
export type DocmostMcpConfig = (
|
||||
| { apiUrl: string; email: string; password: string }
|
||||
| { apiUrl: string; getToken: () => Promise<string> };
|
||||
| { apiUrl: string; getToken: () => Promise<string> }
|
||||
) & {
|
||||
sandbox?: {
|
||||
put: (
|
||||
buf: Buffer,
|
||||
mime: string,
|
||||
) => { uri: string; sha256: string; size: number };
|
||||
// Optional live/evict probes the package uses to keep stash_page's mirror
|
||||
// counts honest under the store's FIFO eviction (mirror of the package's
|
||||
// sink type); older bindings omit them.
|
||||
has?: (uri: string) => boolean;
|
||||
evict?: (uri: string) => void;
|
||||
};
|
||||
};
|
||||
|
||||
export interface ResolvedMcpAuth {
|
||||
config: DocmostMcpConfig;
|
||||
|
||||
@@ -109,13 +109,13 @@ function makeService(opts: {
|
||||
};
|
||||
|
||||
const service = new McpService(
|
||||
undefined as never, // environmentService
|
||||
undefined as never, // workspaceRepo
|
||||
undefined as never, // authService
|
||||
undefined as never, // tokenService
|
||||
undefined as never, // userRepo
|
||||
undefined as never, // userSessionRepo
|
||||
moduleRef as never, // moduleRef (read by the MFA branch)
|
||||
undefined as never, // sandboxStore (unused by the login-gate path)
|
||||
);
|
||||
// Stop the constructor's unref'd sweep timer leaking across tests.
|
||||
service.onModuleDestroy();
|
||||
|
||||
@@ -2,17 +2,15 @@ import { Module } from '@nestjs/common';
|
||||
import { McpController } from './mcp.controller';
|
||||
import { McpService } from './mcp.service';
|
||||
import { DatabaseModule } from '@docmost/db/database.module';
|
||||
import { EnvironmentModule } from '../environment/environment.module';
|
||||
import { AuthModule } from '../../core/auth/auth.module';
|
||||
import { TokenModule } from '../../core/auth/token.module';
|
||||
|
||||
// Community MCP feature: the server itself serves the Model Context Protocol
|
||||
// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo and
|
||||
// EnvironmentModule (global) provides EnvironmentService. AuthModule supplies
|
||||
// AuthService (per-user HTTP-Basic login validation) and TokenModule supplies
|
||||
// TokenService (Bearer access-JWT verification for the token fallback).
|
||||
// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo. AuthModule
|
||||
// supplies AuthService (per-user HTTP-Basic login validation) and TokenModule
|
||||
// supplies TokenService (Bearer access-JWT verification for the token fallback).
|
||||
@Module({
|
||||
imports: [DatabaseModule, EnvironmentModule, AuthModule, TokenModule],
|
||||
imports: [DatabaseModule, AuthModule, TokenModule],
|
||||
controllers: [McpController],
|
||||
providers: [McpService],
|
||||
})
|
||||
|
||||
@@ -9,7 +9,6 @@ import { pathToFileURL } from 'node:url';
|
||||
import { esmImport } from '../../common/helpers/esm-import';
|
||||
import { IncomingMessage } from 'node:http';
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { EnvironmentService } from '../environment/environment.service';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import { UserSessionRepo } from '@docmost/db/repos/session/user-session.repo';
|
||||
@@ -31,6 +30,7 @@ import {
|
||||
DocmostMcpConfig,
|
||||
ResolvedMcpAuth,
|
||||
} from './mcp-auth.helpers';
|
||||
import { SandboxStore } from '../sandbox/sandbox.store';
|
||||
|
||||
// Minimal shape of the embedded MCP HTTP handler exported by @docmost/mcp/http.
|
||||
interface McpHttpHandler {
|
||||
@@ -88,13 +88,14 @@ export class McpService implements OnModuleDestroy {
|
||||
private readonly sweepTimer: NodeJS.Timeout;
|
||||
|
||||
constructor(
|
||||
private readonly environmentService: EnvironmentService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly authService: AuthService,
|
||||
private readonly tokenService: TokenService,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly userSessionRepo: UserSessionRepo,
|
||||
private readonly moduleRef: ModuleRef,
|
||||
// Shared singleton in-RAM blob store backing the stash tool.
|
||||
private readonly sandboxStore: SandboxStore,
|
||||
) {
|
||||
this.sweepTimer = setInterval(() => {
|
||||
try {
|
||||
@@ -322,7 +323,11 @@ export class McpService implements OnModuleDestroy {
|
||||
// Should never happen: handle() always stashes before delegating.
|
||||
throw new UnauthorizedException('MCP authentication missing.');
|
||||
}
|
||||
return resolved.config;
|
||||
// Inject the blob-sandbox sink after the auth decision so stash_page
|
||||
// can store blobs in the shared in-RAM store regardless of which
|
||||
// credential variant resolved. The sink (put/has/evict + uri↔id
|
||||
// mapping) is owned by SandboxStore.asSink().
|
||||
return { ...resolved.config, sandbox: this.sandboxStore.asSink() };
|
||||
},
|
||||
{
|
||||
identify: (req: IncomingMessage) => {
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
// Single source of truth for the anonymous blob-sandbox route. The controller
|
||||
// is mounted under the global `/api` prefix, so its decorator uses the bare
|
||||
// segment while the public URL and the workspace-gate exclusion need the full
|
||||
// path — derive the latter from the former so the two never drift.
|
||||
export const SANDBOX_ROUTE_SEGMENT = 'sb';
|
||||
export const SANDBOX_API_PATH = `/api/${SANDBOX_ROUTE_SEGMENT}`;
|
||||
@@ -0,0 +1,265 @@
|
||||
import { SandboxController } from './sandbox.controller';
|
||||
import { SandboxEntry } from './sandbox.store';
|
||||
|
||||
// Capturing fake of the FastifyReply surface the controller uses:
|
||||
// status()/header()/headers()/send(), all chainable.
|
||||
function makeRes() {
|
||||
const sent: { status: number; headers: Record<string, any>; body: any } = {
|
||||
status: 200,
|
||||
headers: {},
|
||||
body: undefined,
|
||||
};
|
||||
const res: any = {
|
||||
status(code: number) {
|
||||
sent.status = code;
|
||||
return res;
|
||||
},
|
||||
header(key: string, value: any) {
|
||||
sent.headers[key.toLowerCase()] = value;
|
||||
return res;
|
||||
},
|
||||
headers(obj: Record<string, any>) {
|
||||
for (const k of Object.keys(obj)) sent.headers[k.toLowerCase()] = obj[k];
|
||||
return res;
|
||||
},
|
||||
send(body?: any) {
|
||||
sent.body = body;
|
||||
return res;
|
||||
},
|
||||
_sent: sent,
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
function makeReq(headers: Record<string, any> = {}) {
|
||||
return { headers } as any;
|
||||
}
|
||||
|
||||
// A syntactically valid v4 UUID (version nibble 4, variant nibble 8). The
|
||||
// shared `uuid` validator is stricter than a bare hex-shape regex, so the id
|
||||
// must carry a real version/variant.
|
||||
const VALID_ID = 'aaaaaaaa-bbbb-4ccc-8ddd-eeeeeeeeeeee';
|
||||
|
||||
function entry(buf: Buffer, mime: string, sha256: string): SandboxEntry {
|
||||
return { buf, mime, sha256, expiresAt: Date.now() + 60_000 };
|
||||
}
|
||||
|
||||
describe('SandboxController', () => {
|
||||
it('serves 200 with body, Content-Type, Content-Length and sha256 ETag', async () => {
|
||||
const buf = Buffer.from('{"ok":true}', 'utf8');
|
||||
const sha = 'a'.repeat(64);
|
||||
const store = { get: jest.fn().mockReturnValue(entry(buf, 'application/json', sha)) };
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(store.get).toHaveBeenCalledWith(VALID_ID);
|
||||
expect(res._sent.status).toBe(200);
|
||||
expect(res._sent.headers['content-type']).toBe('application/json');
|
||||
expect(res._sent.headers['content-length']).toBe(buf.length);
|
||||
expect(res._sent.headers['etag']).toBe(`"${sha}"`);
|
||||
expect(res._sent.body).toBe(buf);
|
||||
});
|
||||
|
||||
it('returns 404 for a missing/expired blob', async () => {
|
||||
const store = { get: jest.fn().mockReturnValue(undefined) };
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(res._sent.status).toBe(404);
|
||||
expect(res._sent.body).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns 404 for a non-UUID id WITHOUT touching the store (anti-traversal)', async () => {
|
||||
const store = { get: jest.fn() };
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get('../../etc/passwd', makeReq(), res);
|
||||
|
||||
expect(store.get).not.toHaveBeenCalled();
|
||||
expect(res._sent.status).toBe(404);
|
||||
});
|
||||
|
||||
it('returns 304 (no body) when If-None-Match matches the ETag', async () => {
|
||||
const sha = 'b'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': `"${sha}"` }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
expect(res._sent.body).toBeUndefined();
|
||||
expect(res._sent.headers['etag']).toBe(`"${sha}"`);
|
||||
});
|
||||
|
||||
it('accepts a bare (unquoted) sha256 in If-None-Match too', async () => {
|
||||
const sha = 'c'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': sha }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('serves 200 when If-None-Match does NOT match', async () => {
|
||||
const sha = 'd'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': '"stale"' }), res);
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
});
|
||||
|
||||
it('returns 304 for a wildcard "*" If-None-Match', async () => {
|
||||
const sha = 'e'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': '*' }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('returns 304 for a weak validator W/"<sha>"', async () => {
|
||||
const sha = 'f'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': `W/"${sha}"` }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('returns 304 when a comma-separated If-None-Match list contains the sha', async () => {
|
||||
const sha = '1'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(
|
||||
VALID_ID,
|
||||
makeReq({ 'if-none-match': `"other", "${sha}"` }),
|
||||
res,
|
||||
);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('sets a private, immutable Cache-Control with a max-age within the TTL on 200', async () => {
|
||||
const sha = '2'.repeat(64);
|
||||
// Known TTL: ~30s out, so the floored max-age must land within [0, 60].
|
||||
const e: SandboxEntry = {
|
||||
buf: Buffer.from('x'),
|
||||
mime: 'application/json',
|
||||
sha256: sha,
|
||||
expiresAt: Date.now() + 30_000,
|
||||
};
|
||||
const store = { get: jest.fn().mockReturnValue(e) };
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
const cc = res._sent.headers['cache-control'] as string;
|
||||
expect(cc).toMatch(/^private, max-age=\d+, immutable$/);
|
||||
const maxAge = Number(cc.match(/max-age=(\d+)/)![1]);
|
||||
expect(maxAge).toBeGreaterThanOrEqual(0);
|
||||
expect(maxAge).toBeLessThanOrEqual(60);
|
||||
});
|
||||
|
||||
it('emits Cache-Control alongside ETag on the 304 branch', async () => {
|
||||
const sha = '3'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': `"${sha}"` }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
expect(res._sent.headers['cache-control']).toMatch(
|
||||
/^private, max-age=\d+, immutable$/,
|
||||
);
|
||||
});
|
||||
|
||||
it('sets nosniff + restrictive CSP and serves an allowlisted image inline', async () => {
|
||||
const sha = '4'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'image/png', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
expect(res._sent.headers['x-content-type-options']).toBe('nosniff');
|
||||
expect(res._sent.headers['content-security-policy']).toBe(
|
||||
"base-uri 'none'; object-src 'self'; default-src 'self';",
|
||||
);
|
||||
expect(res._sent.headers['content-disposition']).toBe('inline');
|
||||
});
|
||||
|
||||
it('forces an SVG to download (attachment) while keeping nosniff + CSP', async () => {
|
||||
const sha = '5'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('<svg/>'), 'image/svg+xml', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
expect(res._sent.headers['content-disposition']).toBe('attachment');
|
||||
expect(res._sent.headers['x-content-type-options']).toBe('nosniff');
|
||||
expect(res._sent.headers['content-security-policy']).toBe(
|
||||
"base-uri 'none'; object-src 'self'; default-src 'self';",
|
||||
);
|
||||
});
|
||||
|
||||
it('forces text/html to download (attachment) while keeping nosniff + CSP', async () => {
|
||||
const sha = '6'.repeat(64);
|
||||
const store = {
|
||||
get: jest
|
||||
.fn()
|
||||
.mockReturnValue(entry(Buffer.from('<h1>x</h1>'), 'text/html', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
expect(res._sent.headers['content-disposition']).toBe('attachment');
|
||||
expect(res._sent.headers['x-content-type-options']).toBe('nosniff');
|
||||
expect(res._sent.headers['content-security-policy']).toBe(
|
||||
"base-uri 'none'; object-src 'self'; default-src 'self';",
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,130 @@
|
||||
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { validate as isValidUUID } from 'uuid';
|
||||
import { SandboxStore } from './sandbox.store';
|
||||
import { SANDBOX_ROUTE_SEGMENT } from './sandbox.constants';
|
||||
|
||||
// MIME types safe to render inline in a browser. SVG is deliberately EXCLUDED
|
||||
// (it can carry script), as are text/html and the JSON document blob — anything
|
||||
// not on this list is served as an attachment so an attacker-controlled mime can
|
||||
// never execute script on this origin (the route is anonymous + same-origin).
|
||||
const INLINE_SAFE_MIME = new Set([
|
||||
'image/png',
|
||||
'image/jpeg',
|
||||
'image/gif',
|
||||
'image/webp',
|
||||
'image/avif',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Anonymous read endpoint for the in-RAM blob sandbox.
|
||||
*
|
||||
* Mounted under the global `/api` prefix as `GET /api/sb/:id`. It carries NO
|
||||
* `@UseGuards(JwtAuthGuard)`, so — exactly like the public attachment route
|
||||
* `GET /api/files/public/...` — it is exempt from Docmost session auth. The
|
||||
* route is ALSO listed in the workspace-resolution preHandler's excludedPaths
|
||||
* in main.ts so a request from a remote consumer (which carries no workspace
|
||||
* host) is not rejected with "Workspace not found".
|
||||
*
|
||||
* It only ever serves blobs looked up from the SandboxStore by a validated
|
||||
* UUID; `:id` is never used as a filesystem path, so there is no traversal
|
||||
* surface. Never returns tokens, never 401s.
|
||||
*
|
||||
* Anti-XSS hardening mirrors the public attachment route: every response sets
|
||||
* `X-Content-Type-Options: nosniff` and a restrictive CSP, and serves any mime
|
||||
* NOT on the inline-safe allowlist (svg/html/the JSON document blob) as an
|
||||
* attachment, so an attacker-controlled `entry.mime` can never execute script
|
||||
* on this same-origin anonymous route.
|
||||
*/
|
||||
@Controller(SANDBOX_ROUTE_SEGMENT)
|
||||
export class SandboxController {
|
||||
constructor(private readonly store: SandboxStore) {}
|
||||
|
||||
@Get(':id')
|
||||
async get(
|
||||
@Param('id') id: string,
|
||||
@Req() req: FastifyRequest,
|
||||
@Res() res: FastifyReply,
|
||||
): Promise<void> {
|
||||
// Validate `:id` as a real UUID via the shared `uuid` validator (same as the
|
||||
// attachment routes). This is anti-traversal / input hygiene (so `:id` can
|
||||
// never be a path like `../...`), NOT authorization — the capability is the
|
||||
// unguessable id itself plus the short TTL plus TLS. A non-UUID id (including
|
||||
// any traversal attempt) → 404 before touching the store; no stack trace
|
||||
// leaks out.
|
||||
if (!isValidUUID(id)) {
|
||||
res.status(404).send();
|
||||
return;
|
||||
}
|
||||
|
||||
const entry = this.store.get(id);
|
||||
if (!entry) {
|
||||
// Missing or expired — indistinguishable to the caller, by design.
|
||||
res.status(404).send();
|
||||
return;
|
||||
}
|
||||
|
||||
// Strong validator: quoted sha256, no W/ weak prefix. Same value computed
|
||||
// at put() time, so an external consumer can detect a truncated/corrupted
|
||||
// body — the original bug this whole channel exists to fix.
|
||||
const etag = `"${entry.sha256}"`;
|
||||
|
||||
// Compute freshness BEFORE the conditional check: a 304 conditional
|
||||
// revalidation must not lose the Cache-Control freshness directives, or a
|
||||
// revalidating client would forget how long the blob stays fresh.
|
||||
const ttlSeconds = Math.max(
|
||||
0,
|
||||
Math.floor((entry.expiresAt - Date.now()) / 1000),
|
||||
);
|
||||
// Capability URL — keep it out of shared caches; immutable for its TTL.
|
||||
const cacheControl = `private, max-age=${ttlSeconds}, immutable`;
|
||||
|
||||
// Conditional request: an exact ETag match → 304 with no body. The blob is
|
||||
// immutable, so the validator is stable for the blob's whole lifetime.
|
||||
if (this.ifNoneMatchMatches(req.headers['if-none-match'], entry.sha256)) {
|
||||
res
|
||||
.status(304)
|
||||
.header('ETag', etag)
|
||||
.header('Cache-Control', cacheControl)
|
||||
.send();
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-allowlisted mimes (svg/html/the JSON blob) are forced to download so
|
||||
// an attacker-controlled mime can never run script inline on this origin.
|
||||
const disposition = INLINE_SAFE_MIME.has(entry.mime)
|
||||
? 'inline'
|
||||
: 'attachment';
|
||||
|
||||
// Use @Res() + res.send(Buffer) with an explicit Content-Type so the binary
|
||||
// body bypasses the global JSON response transform/serializer.
|
||||
res
|
||||
.status(200)
|
||||
.headers({
|
||||
'Content-Type': entry.mime,
|
||||
'Content-Length': entry.buf.length,
|
||||
ETag: etag,
|
||||
'Cache-Control': cacheControl,
|
||||
'X-Content-Type-Options': 'nosniff',
|
||||
'Content-Security-Policy':
|
||||
"base-uri 'none'; object-src 'self'; default-src 'self';",
|
||||
'Content-Disposition': disposition,
|
||||
})
|
||||
.send(entry.buf);
|
||||
}
|
||||
|
||||
// Accept the consumer's If-None-Match whether it sends the quoted ETag, a bare
|
||||
// sha256, a weak "W/"-prefixed validator, or a comma-separated list.
|
||||
private ifNoneMatchMatches(
|
||||
header: string | string[] | undefined,
|
||||
sha256: string,
|
||||
): boolean {
|
||||
if (!header) return false;
|
||||
const raw = Array.isArray(header) ? header.join(',') : header;
|
||||
if (raw.trim() === '*') return true;
|
||||
return raw
|
||||
.split(',')
|
||||
.map((t) => t.trim().replace(/^W\//, '').replace(/^"|"$/g, ''))
|
||||
.some((t) => t === sha256);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import { Global, Module } from '@nestjs/common';
|
||||
import { SandboxController } from './sandbox.controller';
|
||||
import { SandboxStore } from './sandbox.store';
|
||||
|
||||
/**
|
||||
* In-RAM blob sandbox: a SINGLE shared SandboxStore (the @Injectable singleton)
|
||||
* is written to by the stash tool (via McpService / AiChatToolsService) and read
|
||||
* back by the anonymous SandboxController. Marked @Global so the same store
|
||||
* instance is injectable everywhere without import churn — put() and get() MUST
|
||||
* hit the same Map. EnvironmentService (caps/TTL/public URL) is provided by the
|
||||
* global EnvironmentModule.
|
||||
*/
|
||||
@Global()
|
||||
@Module({
|
||||
controllers: [SandboxController],
|
||||
providers: [SandboxStore],
|
||||
exports: [SandboxStore],
|
||||
})
|
||||
export class SandboxModule {}
|
||||
@@ -0,0 +1,163 @@
|
||||
import { createHash } from 'node:crypto';
|
||||
import { validate as isValidUUID } from 'uuid';
|
||||
import { SandboxStore } from './sandbox.store';
|
||||
|
||||
// Build a minimal EnvironmentService stub with overridable caps/TTL.
|
||||
function makeEnv(
|
||||
overrides: Partial<{
|
||||
ttlMs: number;
|
||||
maxBytes: number;
|
||||
maxImageBytes: number;
|
||||
maxTotalBytes: number;
|
||||
}> = {},
|
||||
) {
|
||||
const cfg = {
|
||||
ttlMs: 3_600_000,
|
||||
maxBytes: 8_388_608,
|
||||
maxImageBytes: 20_971_520,
|
||||
maxTotalBytes: 134_217_728,
|
||||
...overrides,
|
||||
};
|
||||
return {
|
||||
getSandboxTtlMs: () => cfg.ttlMs,
|
||||
getSandboxMaxBytes: () => cfg.maxBytes,
|
||||
getSandboxMaxImageBytes: () => cfg.maxImageBytes,
|
||||
getSandboxMaxTotalBytes: () => cfg.maxTotalBytes,
|
||||
getSandboxPublicUrl: () => 'https://example.test',
|
||||
} as any;
|
||||
}
|
||||
|
||||
describe('SandboxStore', () => {
|
||||
let store: SandboxStore;
|
||||
|
||||
afterEach(() => {
|
||||
// Clear the unref'd sweep interval so it never leaks across tests.
|
||||
store?.onModuleDestroy();
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
it('put/get round-trips the exact bytes + mime and returns a UUID id', () => {
|
||||
store = new SandboxStore(makeEnv());
|
||||
const buf = Buffer.from('{"type":"doc","content":[]}', 'utf8');
|
||||
|
||||
const res = store.put(buf, 'application/json');
|
||||
expect(isValidUUID(res.id)).toBe(true);
|
||||
expect(res.size).toBe(buf.length);
|
||||
|
||||
const entry = store.get(res.id);
|
||||
expect(entry).toBeDefined();
|
||||
expect(entry!.buf.equals(buf)).toBe(true);
|
||||
expect(entry!.mime).toBe('application/json');
|
||||
});
|
||||
|
||||
it('computes sha256 over the body (matches a manual digest)', () => {
|
||||
store = new SandboxStore(makeEnv());
|
||||
const buf = Buffer.from('hello sandbox', 'utf8');
|
||||
const expected = createHash('sha256').update(buf).digest('hex');
|
||||
|
||||
const res = store.put(buf, 'text/plain');
|
||||
expect(res.sha256).toBe(expected);
|
||||
expect(store.get(res.id)!.sha256).toBe(expected);
|
||||
});
|
||||
|
||||
it('returns undefined for a missing id', () => {
|
||||
store = new SandboxStore(makeEnv());
|
||||
expect(store.get('11111111-1111-1111-1111-111111111111')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('lazily expires entries past the TTL (get returns undefined)', () => {
|
||||
jest.useFakeTimers();
|
||||
jest.setSystemTime(new Date('2026-01-01T00:00:00Z'));
|
||||
store = new SandboxStore(makeEnv({ ttlMs: 1000 }));
|
||||
const res = store.put(Buffer.from('x'), 'text/plain');
|
||||
|
||||
expect(store.get(res.id)).toBeDefined();
|
||||
jest.setSystemTime(new Date('2026-01-01T00:00:02Z')); // +2s > 1s TTL
|
||||
expect(store.get(res.id)).toBeUndefined();
|
||||
// Eviction also frees the byte accounting.
|
||||
expect(store.bytes).toBe(0);
|
||||
});
|
||||
|
||||
it('background sweep drops expired entries without a get()', () => {
|
||||
jest.useFakeTimers();
|
||||
jest.setSystemTime(new Date('2026-01-01T00:00:00Z'));
|
||||
store = new SandboxStore(makeEnv({ ttlMs: 1000 }));
|
||||
store.put(Buffer.from('x'), 'text/plain');
|
||||
expect(store.size).toBe(1);
|
||||
|
||||
jest.setSystemTime(new Date('2026-01-01T00:01:30Z')); // past TTL
|
||||
jest.advanceTimersByTime(60_000); // fire the sweep interval
|
||||
expect(store.size).toBe(0);
|
||||
});
|
||||
|
||||
it('rejects a non-image blob over SANDBOX_MAX_BYTES', () => {
|
||||
store = new SandboxStore(makeEnv({ maxBytes: 16 }));
|
||||
expect(() => store.put(Buffer.alloc(17), 'application/json')).toThrow(
|
||||
/per-blob cap/,
|
||||
);
|
||||
});
|
||||
|
||||
it('uses the larger image cap for image/* blobs', () => {
|
||||
// 100 bytes exceeds the doc cap (16) but fits the image cap (1024).
|
||||
store = new SandboxStore(makeEnv({ maxBytes: 16, maxImageBytes: 1024 }));
|
||||
expect(() => store.put(Buffer.alloc(100), 'image/png')).not.toThrow();
|
||||
// SVG counts as an image too.
|
||||
expect(() => store.put(Buffer.alloc(100), 'image/svg+xml')).not.toThrow();
|
||||
});
|
||||
|
||||
it('evicts oldest entries when the total cap would be exceeded', () => {
|
||||
// Total cap 250 bytes; each blob 100 bytes -> only 2 fit at a time.
|
||||
store = new SandboxStore(
|
||||
makeEnv({ maxTotalBytes: 250, maxBytes: 1024 }),
|
||||
);
|
||||
const a = store.put(Buffer.alloc(100), 'application/json');
|
||||
const b = store.put(Buffer.alloc(100), 'application/json');
|
||||
const c = store.put(Buffer.alloc(100), 'application/json'); // evicts a
|
||||
|
||||
expect(store.get(a.id)).toBeUndefined(); // oldest evicted
|
||||
expect(store.get(b.id)).toBeDefined();
|
||||
expect(store.get(c.id)).toBeDefined();
|
||||
expect(store.bytes).toBeLessThanOrEqual(250);
|
||||
});
|
||||
|
||||
it('rejects a single blob larger than the whole total cap', () => {
|
||||
store = new SandboxStore(
|
||||
makeEnv({ maxTotalBytes: 50, maxBytes: 1024 }),
|
||||
);
|
||||
expect(() => store.put(Buffer.alloc(100), 'application/json')).toThrow(
|
||||
/total store cap/,
|
||||
);
|
||||
});
|
||||
|
||||
it('putAndLink composes the anonymous /api/sb/<id> url with matching integrity', () => {
|
||||
store = new SandboxStore(makeEnv());
|
||||
const buf = Buffer.from('hello link', 'utf8');
|
||||
const expected = createHash('sha256').update(buf).digest('hex');
|
||||
|
||||
const res = store.putAndLink(buf, 'image/png');
|
||||
expect(res.uri).toMatch(/^https:\/\/example\.test\/api\/sb\/[0-9a-f-]{36}$/);
|
||||
expect(res.sha256).toBe(expected);
|
||||
expect(res.size).toBe(buf.length);
|
||||
});
|
||||
|
||||
it('has()/remove() report and free a blob by id', () => {
|
||||
store = new SandboxStore(makeEnv());
|
||||
const { id } = store.put(Buffer.from('x'), 'text/plain');
|
||||
|
||||
expect(store.has(id)).toBe(true);
|
||||
store.remove(id);
|
||||
expect(store.has(id)).toBe(false);
|
||||
expect(store.bytes).toBe(0);
|
||||
});
|
||||
|
||||
it('asSink() round-trips put/has/evict through the anonymous uri', () => {
|
||||
store = new SandboxStore(makeEnv());
|
||||
const sink = store.asSink();
|
||||
const buf = Buffer.from('sink bytes', 'utf8');
|
||||
|
||||
const r = sink.put(buf, 'image/png');
|
||||
expect(sink.has(r.uri)).toBe(true);
|
||||
sink.evict(r.uri);
|
||||
expect(sink.has(r.uri)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,178 @@
|
||||
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
|
||||
import { createHash, randomUUID } from 'node:crypto';
|
||||
import { EnvironmentService } from '../environment/environment.service';
|
||||
import { SANDBOX_API_PATH } from './sandbox.constants';
|
||||
|
||||
// In-RAM, process-local blob store. No disk, no DB. Ephemeral by design: a
|
||||
// restart empties it. A blob is addressed by an unguessable randomUUID() which
|
||||
// IS the read capability — there are NO tokens. Each blob is immutable (its id
|
||||
// never maps to changing content), so its sha256 is a perfect strong ETag.
|
||||
export interface SandboxEntry {
|
||||
buf: Buffer;
|
||||
mime: string;
|
||||
sha256: string;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
export interface SandboxPutResult {
|
||||
id: string;
|
||||
sha256: string;
|
||||
size: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class SandboxStore implements OnModuleDestroy {
|
||||
private readonly logger = new Logger(SandboxStore.name);
|
||||
// Map preserves insertion order, so the first key is the oldest entry — used
|
||||
// for FIFO eviction when the total-bytes RAM guard is exceeded.
|
||||
private readonly map = new Map<string, SandboxEntry>();
|
||||
private totalBytes = 0;
|
||||
|
||||
// Background sweep clears expired entries so never-fetched blobs do not linger
|
||||
// until the next get(). unref()'d so it never holds the event loop open;
|
||||
// cleared on module destroy. Mirrors the sweepTimer pattern in
|
||||
// integrations/mcp/mcp.service.ts and packages/mcp/src/http.ts.
|
||||
private readonly sweepIntervalMs = 60_000;
|
||||
private readonly sweepTimer: NodeJS.Timeout;
|
||||
|
||||
constructor(private readonly environmentService: EnvironmentService) {
|
||||
this.sweepTimer = setInterval(() => {
|
||||
try {
|
||||
this.sweep();
|
||||
} catch (err) {
|
||||
this.logger.error('Sandbox sweep failed', err as Error);
|
||||
}
|
||||
}, this.sweepIntervalMs);
|
||||
this.sweepTimer.unref?.();
|
||||
}
|
||||
|
||||
onModuleDestroy(): void {
|
||||
clearInterval(this.sweepTimer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a blob and return its read capability id + integrity metadata. The
|
||||
* per-blob cap is chosen by mime (images get the larger image cap), and the
|
||||
* total-store RAM guard evicts oldest entries to make room. Throws a clear
|
||||
* error when a single blob cannot fit even after eviction. Blob bodies are
|
||||
* never logged.
|
||||
*/
|
||||
put(buf: Buffer, mime: string): SandboxPutResult {
|
||||
const perBlobCap = mime.startsWith('image/')
|
||||
? this.environmentService.getSandboxMaxImageBytes()
|
||||
: this.environmentService.getSandboxMaxBytes();
|
||||
if (buf.length > perBlobCap) {
|
||||
throw new Error(
|
||||
`Sandbox blob of ${buf.length} bytes exceeds the ${perBlobCap}-byte per-blob cap`,
|
||||
);
|
||||
}
|
||||
|
||||
const maxTotal = this.environmentService.getSandboxMaxTotalBytes();
|
||||
if (buf.length > maxTotal) {
|
||||
throw new Error(
|
||||
`Sandbox blob of ${buf.length} bytes exceeds the total store cap of ${maxTotal} bytes`,
|
||||
);
|
||||
}
|
||||
|
||||
// Drop expired entries first, then evict oldest until the new blob fits.
|
||||
this.sweep();
|
||||
while (this.totalBytes + buf.length > maxTotal && this.map.size > 0) {
|
||||
const oldest = this.map.keys().next().value as string;
|
||||
this.evict(oldest);
|
||||
}
|
||||
|
||||
const id = randomUUID();
|
||||
const sha256 = createHash('sha256').update(buf).digest('hex');
|
||||
const expiresAt = Date.now() + this.environmentService.getSandboxTtlMs();
|
||||
this.map.set(id, { buf, mime, sha256, expiresAt });
|
||||
this.totalBytes += buf.length;
|
||||
return { id, sha256, size: buf.length };
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a blob and return its anonymous read URL plus integrity metadata.
|
||||
* Owns the single sandbox-URL composition (`${publicBase}${SANDBOX_API_PATH}/
|
||||
* <id>`) so callers never hand-build the route; the raw put() stays public for
|
||||
* tests/low-level callers. sha256 is also the blob's strong ETag.
|
||||
*/
|
||||
putAndLink(
|
||||
buf: Buffer,
|
||||
mime: string,
|
||||
): { uri: string; sha256: string; size: number } {
|
||||
const stored = this.put(buf, mime);
|
||||
const base = this.environmentService.getSandboxPublicUrl();
|
||||
return {
|
||||
uri: `${base}${SANDBOX_API_PATH}/${stored.id}`,
|
||||
sha256: stored.sha256,
|
||||
size: stored.size,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Adapter to the package's blob-sandbox sink contract `{ put, has, evict }`.
|
||||
* The sink speaks anonymous `uri`s while the store is keyed by `id`, so this is
|
||||
* the ONE place that maps a sandbox uri back to its id (the last path segment).
|
||||
* Both wiring sites (embedded MCP + in-app agent tools) use this so the uri↔id
|
||||
* mapping and URL composition live next to putAndLink, not copy-pasted.
|
||||
*/
|
||||
asSink(): {
|
||||
put: (buf: Buffer, mime: string) => { uri: string; sha256: string; size: number };
|
||||
has: (uri: string) => boolean;
|
||||
evict: (uri: string) => void;
|
||||
} {
|
||||
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
|
||||
return {
|
||||
put: (buf, mime) => this.putAndLink(buf, mime),
|
||||
has: (uri) => this.has(idOf(uri)),
|
||||
evict: (uri) => this.remove(idOf(uri)),
|
||||
};
|
||||
}
|
||||
|
||||
/** True if the blob is still live (not evicted/expired). */
|
||||
has(id: string): boolean {
|
||||
return this.get(id) !== undefined;
|
||||
}
|
||||
|
||||
/** Drop a blob by id (public wrapper over the private FIFO evict). */
|
||||
remove(id: string): void {
|
||||
this.evict(id);
|
||||
}
|
||||
|
||||
/** Returns the entry, or undefined if missing OR expired (lazy expiry). */
|
||||
get(id: string): SandboxEntry | undefined {
|
||||
const entry = this.map.get(id);
|
||||
if (!entry) return undefined;
|
||||
if (entry.expiresAt <= Date.now()) {
|
||||
this.evict(id);
|
||||
return undefined;
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
/** Current number of live entries (test/diagnostic helper). */
|
||||
get size(): number {
|
||||
return this.map.size;
|
||||
}
|
||||
|
||||
/** Current total bytes held (test/diagnostic helper). */
|
||||
get bytes(): number {
|
||||
return this.totalBytes;
|
||||
}
|
||||
|
||||
private evict(id: string): void {
|
||||
const entry = this.map.get(id);
|
||||
if (entry) {
|
||||
this.totalBytes -= entry.buf.length;
|
||||
this.map.delete(id);
|
||||
}
|
||||
}
|
||||
|
||||
private sweep(): void {
|
||||
const now = Date.now();
|
||||
for (const [id, entry] of this.map) {
|
||||
if (entry.expiresAt <= now) {
|
||||
this.evict(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import fastifyCookie from '@fastify/cookie';
|
||||
import fastifyIp from 'fastify-ip';
|
||||
import { InternalLogFilter } from './common/logger/internal-log-filter';
|
||||
import { EnvironmentService } from './integrations/environment/environment.service';
|
||||
import { SANDBOX_API_PATH } from './integrations/sandbox/sandbox.constants';
|
||||
import { resolveFrameHeader } from './common/helpers';
|
||||
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
|
||||
import { GitHttpService } from './integrations/git-sync/http/git-http.service';
|
||||
@@ -144,6 +145,10 @@ async function bootstrap() {
|
||||
'/api/workspace/create',
|
||||
'/api/workspace/joined',
|
||||
'/api/workspace/find-by-email',
|
||||
// Anonymous in-RAM blob sandbox: a remote consumer fetches blobs by an
|
||||
// unguessable UUID without any workspace host context, so the
|
||||
// workspace-resolution gate must not apply.
|
||||
SANDBOX_API_PATH,
|
||||
];
|
||||
|
||||
if (
|
||||
|
||||
Reference in New Issue
Block a user