fix(sandbox): address PR #250 review — SSRF guard, eviction safety, cleanup (#243)

Security:
- stash_page: reject path-traversal / percent-encoded srcs before the authed
  loopback fetch (resolveInternalFilePath), closing an SSRF/exfiltration hole
  where a crafted node.attrs.src could read an arbitrary internal GET endpoint
  into the anonymous sandbox.

Stability:
- stash_page: revert + recount mirrors FIFO-evicted by a later put in the same
  stash (no dangling sandbox refs, honest images.mirrored/failed); free image
  blobs if the final document put throws.
- Reject/clamp non-positive SANDBOX_TTL_MS to the 1h default (warn once).
- Log mirror failures unconditionally (console.warn, no blob bodies).

Cleanup / architecture:
- Remove dead expiresAt from SandboxPutResult.
- Centralize the /api/sb route in SANDBOX_ROUTE_SEGMENT/SANDBOX_API_PATH and
  move URL composition into SandboxStore.putAndLink; drop the duplicated sink
  closures and the now-unused EnvironmentService injection from McpService and
  AiChatToolsService.
- Un-export isInternalFileUrl; document the process-local (instance-bound)
  sandbox limitation in the tool description and .env.example.

Docs/tests:
- README/README.ru: 38 -> 39 tools + stash_page entry.
- Add traversal/normalize/recursion unit tests, stash self-eviction +
  doc-put-throw + empty/octet-stream mock tests, controller If-None-Match
  (wildcard/weak/list) + Cache-Control tests, and SANDBOX_TTL_MS validation
  tests. Regenerate packages/mcp/build.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-28 18:02:46 +03:00
parent 2fe4ca8537
commit 6eb335d5e3
24 changed files with 708 additions and 97 deletions

View File

@@ -63,9 +63,8 @@ describe('AiChatToolsService deletePage guardrail (H4)', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});
@@ -179,9 +178,8 @@ describe('AiChatToolsService expanded toolset guardrails', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});
@@ -298,9 +296,8 @@ describe('AiChatToolsService node-arg JSON-string coercion', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});
@@ -452,9 +449,8 @@ describe('AiChatToolsService model-friendly input validation (#190)', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});

View File

@@ -16,7 +16,6 @@ import {
import { resolveCurrentPageResult } from './current-page.util';
import { parseNodeArg } from './parse-node-arg';
import { modelFriendlyInput } from './model-friendly-input';
import { EnvironmentService } from '../../../integrations/environment/environment.service';
import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
/**
@@ -43,7 +42,6 @@ export class AiChatToolsService {
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly spaceMemberRepo: SpaceMemberRepo,
private readonly pagePermissionRepo: PagePermissionRepo,
private readonly environmentService: EnvironmentService,
// Shared singleton in-RAM blob store backing the stash tool.
private readonly sandboxStore: SandboxStore,
) {}
@@ -91,22 +89,23 @@ export class AiChatToolsService {
aiChatId,
});
// Bind the stash tool to the shared in-RAM SandboxStore and compose the
// anonymous public URL here (the MCP package never touches env or the
// store). put() returns the read URL + sha256/size; sha256 is also the
// blob's ETag for integrity.
const sandboxPut = (buf: Buffer, mime: string) => {
const stored = this.sandboxStore.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return { uri: `${base}/api/sb/${stored.id}`, sha256: stored.sha256, size: stored.size };
};
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
// package needs to keep its mirror counts honest under FIFO eviction (the
// package never touches env or the store). The sink speaks `uri`s, so the
// probes map a uri back to its id (the last path segment).
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
const { DocmostClient, sharedToolSpecs } = await loadDocmostMcp();
const client: DocmostClientLike = new DocmostClient({
apiUrl,
getToken,
getCollabToken,
sandbox: { put: sandboxPut },
sandbox: {
put: (buf, mime) => this.sandboxStore.putAndLink(buf, mime),
has: (uri) => this.sandboxStore.has(idOf(uri)),
evict: (uri) => this.sandboxStore.remove(idOf(uri)),
},
});
// Build an ai-SDK tool from a shared, zod-agnostic spec. The spec owns the

View File

@@ -171,11 +171,15 @@ export type DocmostClientConfig = {
getCollabToken?: () => Promise<string>;
// Optional blob-sandbox sink for the stash tool. `put` stores a blob in the
// host's in-RAM SandboxStore and returns the anonymous read URL + integrity.
// The optional `has`/`evict` probes let stashPage keep its mirror counts
// honest under the store's FIFO eviction (mirror of the package's sink type).
sandbox?: {
put: (
buf: Buffer,
mime: string,
) => { uri: string; sha256: string; size: number };
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};

View File

@@ -14,4 +14,30 @@ describe('EnvironmentService', () => {
it('should be defined', () => {
expect(service).toBeDefined();
});
describe('getSandboxTtlMs', () => {
// ConfigService stub: get(key, def) returns the configured value for the key
// (falling back to def), matching the @nestjs/config contract the service
// calls with (key, default).
const build = (sandboxTtl?: string) =>
new EnvironmentService({
get: (key: string, def?: string) =>
key === 'SANDBOX_TTL_MS' ? (sandboxTtl ?? def) : def,
} as any);
it.each(['0', '-5', 'abc'])(
'falls back to the 3600000 default for invalid value %s',
(value) => {
expect(build(value).getSandboxTtlMs()).toBe(3_600_000);
},
);
it('returns the parsed value for a valid positive integer', () => {
expect(build('120000').getSandboxTtlMs()).toBe(120_000);
});
it('uses the 3600000 default when SANDBOX_TTL_MS is unset', () => {
expect(build(undefined).getSandboxTtlMs()).toBe(3_600_000);
});
});
});

View File

@@ -1,9 +1,14 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import ms, { StringValue } from 'ms';
@Injectable()
export class EnvironmentService {
private readonly logger = new Logger(EnvironmentService.name);
// One-shot guard so an invalid SANDBOX_TTL_MS is warned about once, not on
// every getSandboxTtlMs() call (which runs per blob put).
private sandboxTtlWarned = false;
constructor(private configService: ConfigService) {}
getNodeEnv(): string {
@@ -348,12 +353,25 @@ export class EnvironmentService {
}
// Blob time-to-live. Default 1h. The unguessable UUID + this short TTL + TLS
// are the whole capability model (no tokens).
// are the whole capability model (no tokens). A non-positive or non-integer
// value would make every blob expire instantly (silent 404s), so reject it and
// fall back to the 1h default (warned about once to avoid per-put log spam).
getSandboxTtlMs(): number {
return parseInt(
const parsed = parseInt(
this.configService.get<string>('SANDBOX_TTL_MS', '3600000'),
10,
);
if (!Number.isInteger(parsed) || parsed <= 0) {
if (!this.sandboxTtlWarned) {
this.sandboxTtlWarned = true;
this.logger.warn(
`Invalid SANDBOX_TTL_MS (must be a positive integer); ` +
`falling back to the 3600000 ms default`,
);
}
return 3_600_000;
}
return parsed;
}
// Per-blob cap for non-image blobs (the serialized document). Default 8 MiB.

View File

@@ -143,6 +143,11 @@ export type DocmostMcpConfig = (
buf: Buffer,
mime: string,
) => { uri: string; sha256: string; size: number };
// Optional live/evict probes the package uses to keep stash_page's mirror
// counts honest under the store's FIFO eviction (mirror of the package's
// sink type); older bindings omit them.
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};

View File

@@ -109,7 +109,6 @@ function makeService(opts: {
};
const service = new McpService(
undefined as never, // environmentService
undefined as never, // workspaceRepo
undefined as never, // authService
undefined as never, // tokenService

View File

@@ -8,7 +8,6 @@ import { ModuleRef } from '@nestjs/core';
import { pathToFileURL } from 'node:url';
import { IncomingMessage } from 'node:http';
import { FastifyReply, FastifyRequest } from 'fastify';
import { EnvironmentService } from '../environment/environment.service';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { UserSessionRepo } from '@docmost/db/repos/session/user-session.repo';
@@ -93,7 +92,6 @@ export class McpService implements OnModuleDestroy {
private readonly sweepTimer: NodeJS.Timeout;
constructor(
private readonly environmentService: EnvironmentService,
private readonly workspaceRepo: WorkspaceRepo,
private readonly authService: AuthService,
private readonly tokenService: TokenService,
@@ -118,20 +116,17 @@ export class McpService implements OnModuleDestroy {
clearInterval(this.sweepTimer);
}
// Bind the stash tool to the shared in-RAM SandboxStore and compose the
// anonymous public URL (the MCP package owns neither env nor the store).
// put() returns the read URL + sha256/size; sha256 is also the blob ETag.
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
// package needs to keep its mirror counts honest under FIFO eviction; the
// package owns neither env nor the store. The sink speaks `uri`s, so the
// probes map a uri back to its id (the last path segment).
private buildSandboxConfig(): DocmostMcpConfig['sandbox'] {
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
return {
put: (buf: Buffer, mime: string) => {
const stored = this.sandboxStore.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return {
uri: `${base}/api/sb/${stored.id}`,
sha256: stored.sha256,
size: stored.size,
};
},
put: (buf, mime) => this.sandboxStore.putAndLink(buf, mime),
has: (uri) => this.sandboxStore.has(idOf(uri)),
evict: (uri) => this.sandboxStore.remove(idOf(uri)),
};
}

View File

@@ -0,0 +1,6 @@
// Single source of truth for the anonymous blob-sandbox route. The controller
// is mounted under the global `/api` prefix, so its decorator uses the bare
// segment while the public URL and the workspace-gate exclusion need the full
// path — derive the latter from the former so the two never drift.
export const SANDBOX_ROUTE_SEGMENT = 'sb';
export const SANDBOX_API_PATH = `/api/${SANDBOX_ROUTE_SEGMENT}`;

View File

@@ -121,4 +121,70 @@ describe('SandboxController', () => {
expect(res._sent.status).toBe(200);
});
it('returns 304 for a wildcard "*" If-None-Match', async () => {
const sha = 'e'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': '*' }), res);
expect(res._sent.status).toBe(304);
});
it('returns 304 for a weak validator W/"<sha>"', async () => {
const sha = 'f'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': `W/"${sha}"` }), res);
expect(res._sent.status).toBe(304);
});
it('returns 304 when a comma-separated If-None-Match list contains the sha', async () => {
const sha = '1'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(
VALID_ID,
makeReq({ 'if-none-match': `"other", "${sha}"` }),
res,
);
expect(res._sent.status).toBe(304);
});
it('sets a private, immutable Cache-Control with a max-age within the TTL on 200', async () => {
const sha = '2'.repeat(64);
// Known TTL: ~30s out, so the floored max-age must land within [0, 60].
const e: SandboxEntry = {
buf: Buffer.from('x'),
mime: 'application/json',
sha256: sha,
expiresAt: Date.now() + 30_000,
};
const store = { get: jest.fn().mockReturnValue(e) };
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(200);
const cc = res._sent.headers['cache-control'] as string;
expect(cc).toMatch(/^private, max-age=\d+, immutable$/);
const maxAge = Number(cc.match(/max-age=(\d+)/)![1]);
expect(maxAge).toBeGreaterThanOrEqual(0);
expect(maxAge).toBeLessThanOrEqual(60);
});
});

View File

@@ -1,6 +1,7 @@
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
import { FastifyReply, FastifyRequest } from 'fastify';
import { SandboxStore } from './sandbox.store';
import { SANDBOX_ROUTE_SEGMENT } from './sandbox.constants';
// Strict UUID v-agnostic shape. This is anti-traversal / input hygiene (so `:id`
// can never be a path like `../...`), NOT authorization — the capability is the
@@ -22,7 +23,7 @@ const UUID_RE =
* UUID; `:id` is never used as a filesystem path, so there is no traversal
* surface. Never returns tokens, never 401s.
*/
@Controller('sb')
@Controller(SANDBOX_ROUTE_SEGMENT)
export class SandboxController {
constructor(private readonly store: SandboxStore) {}

View File

@@ -1,6 +1,7 @@
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
import { createHash, randomUUID } from 'node:crypto';
import { EnvironmentService } from '../environment/environment.service';
import { SANDBOX_API_PATH } from './sandbox.constants';
// In-RAM, process-local blob store. No disk, no DB. Ephemeral by design: a
// restart empties it. A blob is addressed by an unguessable randomUUID() which
@@ -17,7 +18,6 @@ export interface SandboxPutResult {
id: string;
sha256: string;
size: number;
expiresAt: number;
}
@Injectable()
@@ -86,7 +86,36 @@ export class SandboxStore implements OnModuleDestroy {
const expiresAt = Date.now() + this.environmentService.getSandboxTtlMs();
this.map.set(id, { buf, mime, sha256, expiresAt });
this.totalBytes += buf.length;
return { id, sha256, size: buf.length, expiresAt };
return { id, sha256, size: buf.length };
}
/**
* Store a blob and return its anonymous read URL plus integrity metadata.
* Owns the single sandbox-URL composition (`${publicBase}${SANDBOX_API_PATH}/
* <id>`) so callers never hand-build the route; the raw put() stays public for
* tests/low-level callers. sha256 is also the blob's strong ETag.
*/
putAndLink(
buf: Buffer,
mime: string,
): { uri: string; sha256: string; size: number } {
const stored = this.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return {
uri: `${base}${SANDBOX_API_PATH}/${stored.id}`,
sha256: stored.sha256,
size: stored.size,
};
}
/** True if the blob is still live (not evicted/expired). */
has(id: string): boolean {
return this.get(id) !== undefined;
}
/** Drop a blob by id (public wrapper over the private FIFO evict). */
remove(id: string): void {
this.evict(id);
}
/** Returns the entry, or undefined if missing OR expired (lazy expiry). */

View File

@@ -13,6 +13,7 @@ import fastifyCookie from '@fastify/cookie';
import fastifyIp from 'fastify-ip';
import { InternalLogFilter } from './common/logger/internal-log-filter';
import { EnvironmentService } from './integrations/environment/environment.service';
import { SANDBOX_API_PATH } from './integrations/sandbox/sandbox.constants';
import { resolveFrameHeader } from './common/helpers';
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
@@ -129,7 +130,7 @@ async function bootstrap() {
// Anonymous in-RAM blob sandbox: a remote consumer fetches blobs by an
// unguessable UUID without any workspace host context, so the
// workspace-resolution gate must not apply.
'/api/sb',
SANDBOX_API_PATH,
];
if (