From aff58646d16efebd3b2596dccce03d733c8aba57 Mon Sep 17 00:00:00 2001 From: claude_code Date: Sun, 28 Jun 2026 20:21:31 +0300 Subject: [PATCH] =?UTF-8?q?refactor(sandbox):=20address=20PR=20#250=20roun?= =?UTF-8?q?d-3=20review=20=E2=80=94=20dead=20import,=20env=20validation,?= =?UTF-8?q?=20uuid=20validator,=20docs=20(#243)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Must-fix: - mcp.module: drop the now-dead EnvironmentModule import (and its stale comment). McpService no longer injects EnvironmentService; EnvironmentModule is @Global and imported at the app root, so DI still resolves. Stability: - environment.service: route getSandboxTtlMs + the three SANDBOX_MAX_*_BYTES caps through a shared getPositiveIntEnv() helper that warns once per key and falls back to the default on a non-integer or <= 0 value (previously the byte caps did a bare parseInt, so SANDBOX_MAX_TOTAL_BYTES=0 made every stash_page fail against a 0-byte cap). TTL behavior is unchanged. Simplification: - sandbox.controller: replace the homemade UUID_RE with the project's shared `uuid` validator (import { validate as isValidUUID } from 'uuid'), matching the attachment routes; update the spec fixtures to valid v4 UUIDs. - mcp.service: inline the single-caller one-liner buildSandboxConfig() to this.sandboxStore.asSink() at the wiring site. Docs: - CHANGELOG: add an [Unreleased] > Added entry for #243 (stash_page tool, anonymous GET /api/sb/:id, five SANDBOX_* env vars). - AGENTS.md: note that GET /api/sb/:id is in the workspace-gate preHandler's excludedPaths and is fully tokenless, unlike /api/files/public/... which still resolves a workspace and needs an attachment JWT. Tests: cap-getter validation (0/-5/abc -> default, valid -> parsed), updated UUID fixtures. apps/server jest sandbox/environment/mcp: 233 pass. Co-Authored-By: Claude Opus 4.8 --- AGENTS.md | 2 +- CHANGELOG.md | 9 +++ .../environment/environment.service.spec.ts | 53 +++++++++++++++++ .../environment/environment.service.ts | 59 +++++++++---------- .../server/src/integrations/mcp/mcp.module.ts | 10 ++-- .../src/integrations/mcp/mcp.service.ts | 14 +---- .../sandbox/sandbox.controller.spec.ts | 5 +- .../sandbox/sandbox.controller.ts | 17 +++--- .../sandbox/sandbox.store.spec.ts | 6 +- 9 files changed, 113 insertions(+), 62 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 3b84f0d0..70a382f7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -241,7 +241,7 @@ Migration files live in `apps/server/src/database/migrations/` and are named `YY - **API server** — `dist/main` (`apps/server/src/main.ts`), the Fastify HTTP app (`AppModule`). - **Collaboration server** — `dist/collaboration/server/collab-main` (`pnpm collab`), a Hocuspocus/Yjs WebSocket server (`apps/server/src/collaboration/`) handling real-time document editing, persistence, and page-history snapshots. It listens on `COLLAB_PORT` (default `3001`), separate from the API server's `PORT` (default `3000`), and shares state with the API server through Redis. -The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes `robots.txt`, public share pages, and `mcp` from the prefix). A `preHandler` hook enforces that a resolved `workspaceId` exists for most `/api` routes (multi-tenant by hostname/subdomain via `DomainMiddleware`). Auth is JWT (cookie + bearer); authorization is **CASL** (`core/casl`) — every data access is scoped to the user's abilities. +The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes `robots.txt`, public share pages, and `mcp` from the prefix). A `preHandler` hook enforces that a resolved `workspaceId` exists for most `/api` routes (multi-tenant by hostname/subdomain via `DomainMiddleware`). `GET /api/sb/:id` (the anonymous blob-sandbox read route) is listed in that preHandler's `excludedPaths`, so it is exempt from workspace resolution and carries no session auth at all (its capability is the unguessable UUID + TTL + TLS) — unlike `/api/files/public/...`, which still resolves a workspace and requires a workspace-bound attachment JWT. Auth is JWT (cookie + bearer); authorization is **CASL** (`core/casl`) — every data access is scoped to the user's abilities. ### Module structure (server) `AppModule` wires integration modules (`integrations/*`: storage [local/S3/Azure], mail, queue [BullMQ on Redis], security, telemetry, throttle, `mcp`, `ai`) plus `CoreModule`, `DatabaseModule`, and `CollaborationModule`. `CoreModule` (`core/*`) holds the domain modules: `page`, `space`, `comment`, `workspace`, `user`, `auth`, `group`, `attachment`, `search`, `share`, `ai-chat`, etc. Each domain module follows NestJS controller → service → repo layering; DB repos live under `database/repos` and are injected app-wide from the global `DatabaseModule`. diff --git a/CHANGELOG.md b/CHANGELOG.md index 832615d6..24aeffcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 append/prepend fragments, nor to COMMENT bodies — a comment may legitimately contain a standalone footnote definition, which canonicalization would drop. (#228) +- **Out-of-band page transfer via an in-RAM blob sandbox (`stash_page`).** A + new MCP tool serializes a whole page (its full ProseMirror JSON, with every + internal image/file mirrored) into an ephemeral in-RAM blob and returns only + a short anonymous URL, so a large page can be handed to an external consumer + without flooding the model context. Blobs are served by unguessable UUID over + a new anonymous `GET /api/sb/:id` route (strong sha256 ETag, short TTL, + `nosniff` + restrictive CSP + attachment disposition for non-image mimes) and + are RAM-only, bound to the instance that created them. Tunable via five + `SANDBOX_*` env vars (see `.env.example`). (#243) ### Changed diff --git a/apps/server/src/integrations/environment/environment.service.spec.ts b/apps/server/src/integrations/environment/environment.service.spec.ts index 3e91e10a..e40508cd 100644 --- a/apps/server/src/integrations/environment/environment.service.spec.ts +++ b/apps/server/src/integrations/environment/environment.service.spec.ts @@ -41,6 +41,59 @@ describe('EnvironmentService', () => { }); }); + // The three byte caps share the same getPositiveIntEnv() helper as the TTL, + // so a non-integer / non-positive value ('0'/'-5'/'abc') falls back to the + // documented default and a valid positive integer is returned parsed. Note + // parseInt truncates '1.5' -> 1 (a valid positive integer), so that value is + // accepted, not rejected — same as the pre-existing TTL getter. + describe.each([ + { + name: 'getSandboxMaxBytes', + key: 'SANDBOX_MAX_BYTES', + def: 8_388_608, + getter: (s: EnvironmentService) => s.getSandboxMaxBytes(), + }, + { + name: 'getSandboxMaxImageBytes', + key: 'SANDBOX_MAX_IMAGE_BYTES', + def: 20_971_520, + getter: (s: EnvironmentService) => s.getSandboxMaxImageBytes(), + }, + { + name: 'getSandboxMaxTotalBytes', + key: 'SANDBOX_MAX_TOTAL_BYTES', + def: 134_217_728, + getter: (s: EnvironmentService) => s.getSandboxMaxTotalBytes(), + }, + ])('$name', ({ key, def, getter }) => { + // ConfigService stub: get(k, d) returns the configured value for THIS cap's + // key (falling back to d), and the default for every other key. + const build = (value?: string) => + new EnvironmentService({ + get: (k: string, d?: string) => + k === key ? (value ?? d) : d, + } as any); + + it.each(['0', '-5', 'abc'])( + `falls back to the ${def} default for invalid value %s`, + (value) => { + expect(getter(build(value))).toBe(def); + }, + ); + + it('returns the parsed value for a valid positive integer', () => { + expect(getter(build('4096'))).toBe(4096); + }); + + it('truncates a non-integer like "1.5" to 1 via parseInt (not rejected)', () => { + expect(getter(build('1.5'))).toBe(1); + }); + + it(`uses the ${def} default when the env is unset`, () => { + expect(getter(build(undefined))).toBe(def); + }); + }); + describe('getSandboxPublicUrl', () => { // Stub that resolves BOTH keys the public-url logic consults. const build = (vals: { sandboxUrl?: string; appUrl?: string }) => diff --git a/apps/server/src/integrations/environment/environment.service.ts b/apps/server/src/integrations/environment/environment.service.ts index 82798727..2f525b8f 100644 --- a/apps/server/src/integrations/environment/environment.service.ts +++ b/apps/server/src/integrations/environment/environment.service.ts @@ -5,9 +5,10 @@ import ms, { StringValue } from 'ms'; @Injectable() export class EnvironmentService { private readonly logger = new Logger(EnvironmentService.name); - // One-shot guard so an invalid SANDBOX_TTL_MS is warned about once, not on - // every getSandboxTtlMs() call (which runs per blob put). - private sandboxTtlWarned = false; + // Env keys already warned about for an invalid value (one-shot per key, so a + // bad SANDBOX_* value is not logged on every blob put). Mirrors the original + // sandboxTtlWarned guard, generalized across the TTL + the three byte caps. + private readonly invalidPositiveIntWarned = new Set(); constructor(private configService: ConfigService) {} @@ -352,50 +353,48 @@ export class EnvironmentService { return raw.replace(/\/+$/, ''); } + // Parse a REQUIRED positive-integer env (TTL in ms or a byte cap). A + // non-integer or <= 0 value would break the sandbox silently (instant expiry, + // or every put failing against a 0-byte cap), so warn once and fall back to + // the default instead. Blob bodies are never logged. + private getPositiveIntEnv(key: string, def: number): number { + const parsed = parseInt( + this.configService.get(key, String(def)), + 10, + ); + if (!Number.isInteger(parsed) || parsed <= 0) { + if (!this.invalidPositiveIntWarned.has(key)) { + this.invalidPositiveIntWarned.add(key); + this.logger.warn( + `Invalid ${key} (must be a positive integer); falling back to the ${def} default`, + ); + } + return def; + } + return parsed; + } + // Blob time-to-live. Default 1h. The unguessable UUID + this short TTL + TLS // are the whole capability model (no tokens). A non-positive or non-integer // value would make every blob expire instantly (silent 404s), so reject it and // fall back to the 1h default (warned about once to avoid per-put log spam). getSandboxTtlMs(): number { - const parsed = parseInt( - this.configService.get('SANDBOX_TTL_MS', '3600000'), - 10, - ); - if (!Number.isInteger(parsed) || parsed <= 0) { - if (!this.sandboxTtlWarned) { - this.sandboxTtlWarned = true; - this.logger.warn( - `Invalid SANDBOX_TTL_MS (must be a positive integer); ` + - `falling back to the 3600000 ms default`, - ); - } - return 3_600_000; - } - return parsed; + return this.getPositiveIntEnv('SANDBOX_TTL_MS', 3_600_000); } // Per-blob cap for non-image blobs (the serialized document). Default 8 MiB. getSandboxMaxBytes(): number { - return parseInt( - this.configService.get('SANDBOX_MAX_BYTES', '8388608'), - 10, - ); + return this.getPositiveIntEnv('SANDBOX_MAX_BYTES', 8_388_608); } // Per-blob cap for mirrored image blobs. Default 20 MiB. getSandboxMaxImageBytes(): number { - return parseInt( - this.configService.get('SANDBOX_MAX_IMAGE_BYTES', '20971520'), - 10, - ); + return this.getPositiveIntEnv('SANDBOX_MAX_IMAGE_BYTES', 20_971_520); } // RAM guard: total bytes the whole store may hold. Default 128 MiB. On // overflow the store evicts oldest entries to make room. getSandboxMaxTotalBytes(): number { - return parseInt( - this.configService.get('SANDBOX_MAX_TOTAL_BYTES', '134217728'), - 10, - ); + return this.getPositiveIntEnv('SANDBOX_MAX_TOTAL_BYTES', 134_217_728); } } diff --git a/apps/server/src/integrations/mcp/mcp.module.ts b/apps/server/src/integrations/mcp/mcp.module.ts index 8ed9cb39..e2a01381 100644 --- a/apps/server/src/integrations/mcp/mcp.module.ts +++ b/apps/server/src/integrations/mcp/mcp.module.ts @@ -2,17 +2,15 @@ import { Module } from '@nestjs/common'; import { McpController } from './mcp.controller'; import { McpService } from './mcp.service'; import { DatabaseModule } from '@docmost/db/database.module'; -import { EnvironmentModule } from '../environment/environment.module'; import { AuthModule } from '../../core/auth/auth.module'; import { TokenModule } from '../../core/auth/token.module'; // Community MCP feature: the server itself serves the Model Context Protocol -// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo and -// EnvironmentModule (global) provides EnvironmentService. AuthModule supplies -// AuthService (per-user HTTP-Basic login validation) and TokenModule supplies -// TokenService (Bearer access-JWT verification for the token fallback). +// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo. AuthModule +// supplies AuthService (per-user HTTP-Basic login validation) and TokenModule +// supplies TokenService (Bearer access-JWT verification for the token fallback). @Module({ - imports: [DatabaseModule, EnvironmentModule, AuthModule, TokenModule], + imports: [DatabaseModule, AuthModule, TokenModule], controllers: [McpController], providers: [McpService], }) diff --git a/apps/server/src/integrations/mcp/mcp.service.ts b/apps/server/src/integrations/mcp/mcp.service.ts index 92ccf4c4..d6942963 100644 --- a/apps/server/src/integrations/mcp/mcp.service.ts +++ b/apps/server/src/integrations/mcp/mcp.service.ts @@ -116,15 +116,6 @@ export class McpService implements OnModuleDestroy { clearInterval(this.sweepTimer); } - // Bind the stash tool to the shared in-RAM SandboxStore. The store owns the - // anonymous-URL composition (putAndLink) and the live/evict probes the MCP - // package needs to keep its mirror counts honest under FIFO eviction; the - // package owns neither env nor the store. The uri↔id mapping now lives on the - // store (asSink), shared with the in-app agent-tools wiring site. - private buildSandboxConfig(): DocmostMcpConfig['sandbox'] { - return this.sandboxStore.asSink(); - } - // Service account the embedded MCP uses to talk back to this Docmost // instance over loopback REST + the collaboration WebSocket. Now OPTIONAL: // it is only a fallback when no per-user Basic/Bearer credentials are sent. @@ -338,8 +329,9 @@ export class McpService implements OnModuleDestroy { } // Inject the blob-sandbox sink after the auth decision so stash_page // can store blobs in the shared in-RAM store regardless of which - // credential variant resolved. - return { ...resolved.config, sandbox: this.buildSandboxConfig() }; + // credential variant resolved. The sink (put/has/evict + uri↔id + // mapping) is owned by SandboxStore.asSink(). + return { ...resolved.config, sandbox: this.sandboxStore.asSink() }; }, { identify: (req: IncomingMessage) => { diff --git a/apps/server/src/integrations/sandbox/sandbox.controller.spec.ts b/apps/server/src/integrations/sandbox/sandbox.controller.spec.ts index 9021e070..db328e88 100644 --- a/apps/server/src/integrations/sandbox/sandbox.controller.spec.ts +++ b/apps/server/src/integrations/sandbox/sandbox.controller.spec.ts @@ -35,7 +35,10 @@ function makeReq(headers: Record = {}) { return { headers } as any; } -const VALID_ID = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee'; +// A syntactically valid v4 UUID (version nibble 4, variant nibble 8). The +// shared `uuid` validator is stricter than a bare hex-shape regex, so the id +// must carry a real version/variant. +const VALID_ID = 'aaaaaaaa-bbbb-4ccc-8ddd-eeeeeeeeeeee'; function entry(buf: Buffer, mime: string, sha256: string): SandboxEntry { return { buf, mime, sha256, expiresAt: Date.now() + 60_000 }; diff --git a/apps/server/src/integrations/sandbox/sandbox.controller.ts b/apps/server/src/integrations/sandbox/sandbox.controller.ts index 4cfd320a..253a8deb 100644 --- a/apps/server/src/integrations/sandbox/sandbox.controller.ts +++ b/apps/server/src/integrations/sandbox/sandbox.controller.ts @@ -1,14 +1,9 @@ import { Controller, Get, Param, Req, Res } from '@nestjs/common'; import { FastifyReply, FastifyRequest } from 'fastify'; +import { validate as isValidUUID } from 'uuid'; import { SandboxStore } from './sandbox.store'; import { SANDBOX_ROUTE_SEGMENT } from './sandbox.constants'; -// Strict UUID v-agnostic shape. This is anti-traversal / input hygiene (so `:id` -// can never be a path like `../...`), NOT authorization — the capability is the -// unguessable id itself plus the short TTL plus TLS. -const UUID_RE = - /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/; - // MIME types safe to render inline in a browser. SVG is deliberately EXCLUDED // (it can carry script), as are text/html and the JSON document blob — anything // not on this list is served as an attachment so an attacker-controlled mime can @@ -51,9 +46,13 @@ export class SandboxController { @Req() req: FastifyRequest, @Res() res: FastifyReply, ): Promise { - // Non-UUID id (including any traversal attempt) → 404 before touching the - // store. No stack trace leaks out. - if (!UUID_RE.test(id)) { + // Validate `:id` as a real UUID via the shared `uuid` validator (same as the + // attachment routes). This is anti-traversal / input hygiene (so `:id` can + // never be a path like `../...`), NOT authorization — the capability is the + // unguessable id itself plus the short TTL plus TLS. A non-UUID id (including + // any traversal attempt) → 404 before touching the store; no stack trace + // leaks out. + if (!isValidUUID(id)) { res.status(404).send(); return; } diff --git a/apps/server/src/integrations/sandbox/sandbox.store.spec.ts b/apps/server/src/integrations/sandbox/sandbox.store.spec.ts index 8ef90774..e49a98ed 100644 --- a/apps/server/src/integrations/sandbox/sandbox.store.spec.ts +++ b/apps/server/src/integrations/sandbox/sandbox.store.spec.ts @@ -1,4 +1,5 @@ import { createHash } from 'node:crypto'; +import { validate as isValidUUID } from 'uuid'; import { SandboxStore } from './sandbox.store'; // Build a minimal EnvironmentService stub with overridable caps/TTL. @@ -26,9 +27,6 @@ function makeEnv( } as any; } -const UUID_RE = - /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/; - describe('SandboxStore', () => { let store: SandboxStore; @@ -43,7 +41,7 @@ describe('SandboxStore', () => { const buf = Buffer.from('{"type":"doc","content":[]}', 'utf8'); const res = store.put(buf, 'application/json'); - expect(res.id).toMatch(UUID_RE); + expect(isValidUUID(res.id)).toBe(true); expect(res.size).toBe(buf.length); const entry = store.get(res.id);