From 204cf9dfe7520793da949c60f89a4c743722968f Mon Sep 17 00:00:00 2001 From: claude_code Date: Sun, 28 Jun 2026 20:58:36 +0300 Subject: [PATCH] =?UTF-8?q?test(sandbox):=20address=20PR=20#250=20round-4?= =?UTF-8?q?=20review=20=E2=80=94=20SSRF=20accept-path=20tests,=20MCP=20str?= =?UTF-8?q?ucturedContent=20(#243)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mandatory (test-coverage): - internal-file-urls.test: pin the SSRF/traversal ACCEPT path of resolveInternalFilePath (the sole guard for content-controlled `src`): an absolute/protocol-relative URL has its foreign host dropped and only an /api/files/ pathname survives (http://evil.com/api/files/x/y.png -> /files/x/y.png), while a host-dropped path that escapes /api/files/ (https://evil.com/api/auth/whoami) or a backslash-traversal (/api/files\..\auth\whoami) is rejected. Locks the behavior so a future prefix-only refactor cannot silently open a bypass. Suggestions: - index.ts: the stash_page MCP tool now returns structuredContent { uri, sha256, size, images } alongside the resource_link, so the MCP output matches the documented shape (clients get the blob's sha256/ETag and the mirror counts, not just the link). No outputSchema registered. Rebuilt build/. - new stash-page-mcp-result.test: server round-trip via InMemoryTransport asserts both the resource_link and the structuredContent mirror. - internal-file-urls.test: cover the new URL parse-failure catch branch (http://[ -> "Invalid internal file src"). - environment.service.spec: assert getPositiveIntEnv warns once per key and independently across keys (the invalidPositiveIntWarned dedup). Tests: packages/mcp 383 pass; apps/server sandbox/environment/mcp 235 pass. Co-Authored-By: Claude Opus 4.8 --- .../environment/environment.service.spec.ts | 40 +++++ packages/mcp/build/index.js | 14 +- packages/mcp/src/index.ts | 14 +- .../test/mock/stash-page-mcp-result.test.mjs | 155 ++++++++++++++++++ .../mcp/test/unit/internal-file-urls.test.mjs | 40 +++++ 5 files changed, 261 insertions(+), 2 deletions(-) create mode 100644 packages/mcp/test/mock/stash-page-mcp-result.test.mjs diff --git a/apps/server/src/integrations/environment/environment.service.spec.ts b/apps/server/src/integrations/environment/environment.service.spec.ts index e40508cd..d4668598 100644 --- a/apps/server/src/integrations/environment/environment.service.spec.ts +++ b/apps/server/src/integrations/environment/environment.service.spec.ts @@ -94,6 +94,46 @@ describe('EnvironmentService', () => { }); }); + // getPositiveIntEnv keeps a one-shot `invalidPositiveIntWarned` set so a bad + // value is logged ONCE per key (not on every getter call, which the sandbox + // hits per-put). These tests pin that dedup so a regression to per-call logging + // would fail loudly. + describe('invalid-value warn dedup', () => { + it('warns only once per key across repeated getter calls', () => { + const service = new EnvironmentService({ + get: (k: string, d?: string) => + k === 'SANDBOX_MAX_TOTAL_BYTES' ? '-5' : d, + } as any); + const warnSpy = jest + .spyOn((service as any).logger, 'warn') + .mockImplementation(() => undefined); + + service.getSandboxMaxTotalBytes(); + service.getSandboxMaxTotalBytes(); + + expect(warnSpy).toHaveBeenCalledTimes(1); + }); + + it('warns independently per key (dedup is per-key, not global)', () => { + // Two DIFFERENT SANDBOX_* keys are both invalid -> each warns once, so two + // warns total. This proves the dedup set is keyed, not a single global flag. + const service = new EnvironmentService({ + get: (k: string, d?: string) => + k === 'SANDBOX_MAX_BYTES' || k === 'SANDBOX_MAX_TOTAL_BYTES' + ? '-5' + : d, + } as any); + const warnSpy = jest + .spyOn((service as any).logger, 'warn') + .mockImplementation(() => undefined); + + service.getSandboxMaxBytes(); + service.getSandboxMaxTotalBytes(); + + expect(warnSpy).toHaveBeenCalledTimes(2); + }); + }); + describe('getSandboxPublicUrl', () => { // Stub that resolves BOTH keys the public-url logic consults. const build = (vals: { sandboxUrl?: string; appUrl?: string }) => diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index c684a479..b62fd7ea 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -287,7 +287,10 @@ export function createDocmostMcpServer(config) { }); // Tool: stash_page — returns a resource_link (NOT embedded text) so the doc // body never enters the model context. Registered directly (not via - // registerShared) because that helper only emits text content. + // registerShared) because that helper only emits text content. Also returns + // `structuredContent` carrying the full documented `{uri, sha256, size, images}` + // shape alongside the resource_link, so MCP clients receive the blob's sha256 + // (its ETag, for integrity) and mirror counts, not just the link. server.registerTool(SHARED_TOOL_SPECS.stashPage.mcpName, { description: SHARED_TOOL_SPECS.stashPage.description, inputSchema: SHARED_TOOL_SPECS.stashPage.buildShape(z), @@ -303,6 +306,15 @@ export function createDocmostMcpServer(config) { size: result.size, }, ], + // Mirror the full documented result shape ({ uri, size, sha256, images }) + // as structuredContent so MCP clients get the blob's sha256 (its ETag, for + // integrity) and the mirror counts, not just the resource_link. + structuredContent: { + uri: result.uri, + sha256: result.sha256, + size: result.size, + images: result.images, + }, }; }); // Tool: patch_node diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index bc594a28..39f48194 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -410,7 +410,10 @@ registerShared(SHARED_TOOL_SPECS.editPageText, async ({ pageId, edits }) => { // Tool: stash_page — returns a resource_link (NOT embedded text) so the doc // body never enters the model context. Registered directly (not via -// registerShared) because that helper only emits text content. +// registerShared) because that helper only emits text content. Also returns +// `structuredContent` carrying the full documented `{uri, sha256, size, images}` +// shape alongside the resource_link, so MCP clients receive the blob's sha256 +// (its ETag, for integrity) and mirror counts, not just the link. server.registerTool( SHARED_TOOL_SPECS.stashPage.mcpName, { @@ -429,6 +432,15 @@ server.registerTool( size: result.size, }, ], + // Mirror the full documented result shape ({ uri, size, sha256, images }) + // as structuredContent so MCP clients get the blob's sha256 (its ETag, for + // integrity) and the mirror counts, not just the resource_link. + structuredContent: { + uri: result.uri, + sha256: result.sha256, + size: result.size, + images: result.images, + }, }; }, ); diff --git a/packages/mcp/test/mock/stash-page-mcp-result.test.mjs b/packages/mcp/test/mock/stash-page-mcp-result.test.mjs new file mode 100644 index 00000000..629c7187 --- /dev/null +++ b/packages/mcp/test/mock/stash-page-mcp-result.test.mjs @@ -0,0 +1,155 @@ +// Server round-trip test for the stash_page MCP tool result shape. The in-app +// path returns the full documented `{ uri, size, sha256, images }` object, but +// the MCP transport must deliver the SAME shape: a resource_link (primary +// payload) PLUS a `structuredContent` mirror carrying sha256 + image counts. +// This connects a real MCP Client to the server over a linked in-memory +// transport pair and asserts both halves of the result, end to end. +import { test, after } from "node:test"; +import assert from "node:assert/strict"; +import http from "node:http"; +import { createHash } from "node:crypto"; +import { createDocmostMcpServer } from "../../build/index.js"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; + +function readBody(req) { + return new Promise((resolve) => { + let raw = ""; + req.on("data", (c) => (raw += c)); + req.on("end", () => resolve(raw)); + }); +} + +function startServer(handler) { + return new Promise((resolve) => { + const server = http.createServer(handler); + server.listen(0, "127.0.0.1", () => { + const { port } = server.address(); + resolve({ server, baseURL: `http://127.0.0.1:${port}/api` }); + }); + }); +} + +const openServers = []; +async function spawn(handler) { + const { server, baseURL } = await startServer(handler); + openServers.push(server); + return baseURL; +} +after(async () => { + await Promise.all(openServers.map((s) => new Promise((r) => s.close(r)))); +}); + +// Minimal in-memory sandbox sink: store the blob and return a uri + sha256 + +// size, with has/evict probes the client's reconciliation may call. +function makeSandbox() { + const live = new Map(); + const idOf = (uri) => uri.substring(uri.lastIndexOf("/") + 1); + let n = 0; + return { + put(buf) { + const sha256 = createHash("sha256").update(buf).digest("hex"); + const id = `id-${n++}`; + live.set(id, buf.length); + return { uri: `https://sb.test/api/sb/${id}`, sha256, size: buf.length }; + }, + has(uri) { + return live.has(idOf(uri)); + }, + evict(uri) { + live.delete(idOf(uri)); + }, + }; +} + +const IMAGE_BYTES = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a]); + +// One internal image (so images.mirrored === 1) inside a normal page doc. +function pageDoc() { + return { + type: "doc", + content: [ + { + type: "image", + attrs: { src: "/api/files/att-1/pic.png", attachmentId: "att-1" }, + }, + ], + }; +} + +// Mock Docmost: login, page info, internal file bytes — same pattern as +// stash-page.test.mjs. +async function buildBaseURL() { + return spawn(async (req, res) => { + await readBody(req); + if (req.url === "/api/auth/login") { + res.writeHead(200, { + "Content-Type": "application/json", + "Set-Cookie": "authToken=tok; HttpOnly", + }); + res.end(JSON.stringify({ token: "tok" })); + return; + } + if (req.url === "/api/pages/info") { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ data: { id: "page-1", title: "T", content: pageDoc() } }), + ); + return; + } + if (req.url.startsWith("/api/files/")) { + res.writeHead(200, { "Content-Type": "image/png" }); + res.end(IMAGE_BYTES); + return; + } + res.writeHead(404); + res.end(); + }); +} + +test("stash_page MCP tool returns a resource_link AND a structuredContent mirror", async () => { + const baseURL = await buildBaseURL(); + const sandbox = makeSandbox(); + const server = createDocmostMcpServer({ + apiUrl: baseURL, + email: "u@example.com", + password: "pw", + sandbox, + }); + + const client = new Client({ name: "test-client", version: "0.0.0" }); + const [a, b] = InMemoryTransport.createLinkedPair(); + await server.connect(b); + await client.connect(a); + + try { + const res = await client.callTool({ + name: "stash_page", + arguments: { pageId: "page-1" }, + }); + + // Primary payload: a resource_link pointing at the sandbox doc blob. + const link = res.content[0]; + assert.equal(link.type, "resource_link"); + assert.match(link.uri, /^https:\/\/sb\.test\/api\/sb\//); + + // structuredContent mirrors the full documented shape. + const sc = res.structuredContent; + assert.equal(typeof sc, "object"); + assert.equal(sc.uri, link.uri); // same blob as the link + assert.match(sc.sha256, /^[0-9a-f]{64}$/); // 64-hex ETag + assert.equal(typeof sc.size, "number"); + assert.deepEqual(sc.images, { mirrored: 1, failed: 0 }); + + // Deep-equal the whole structured payload against what the mock implies. + assert.deepEqual(sc, { + uri: link.uri, + sha256: sc.sha256, + size: sc.size, + images: { mirrored: 1, failed: 0 }, + }); + } finally { + await client.close(); + await server.close(); + } +}); diff --git a/packages/mcp/test/unit/internal-file-urls.test.mjs b/packages/mcp/test/unit/internal-file-urls.test.mjs index 0f3d8fd0..e17bc74b 100644 --- a/packages/mcp/test/unit/internal-file-urls.test.mjs +++ b/packages/mcp/test/unit/internal-file-urls.test.mjs @@ -28,6 +28,46 @@ test("resolveInternalFilePath rejects traversal / encoded variants (SSRF guard)" assert.throws(() => resolveInternalFilePath("/api/files/..%2fauth")); }); +test("resolveInternalFilePath drops a foreign host and keeps only the /api/files/ pathname (SSRF accept-path)", () => { + // ACCEPT path: an absolute URL has its host dropped; only the canonical + // pathname survives, and it must still start with /api/files/. This is SAFE + // because the loopback axios client ignores any host in `src` and uses its own + // /api baseURL — so a foreign host like evil.com is never contacted. This is + // the SOLE SSRF/traversal guard for content-controlled `src`, so it must be + // pinned: a future refactor to a prefix-only check would silently open a + // bypass with no failing test. + assert.equal( + resolveInternalFilePath("http://evil.com/api/files/x/y.png"), + "/files/x/y.png", + ); + // Protocol-relative URL: host likewise dropped, pathname kept. + assert.equal( + resolveInternalFilePath("//evil.com/api/files/x/y.png"), + "/files/x/y.png", + ); +}); + +test("resolveInternalFilePath rejects a foreign-host src whose pathname escapes /api/files/", () => { + // Even though the host is dropped, the canonical pathname /api/auth/whoami + // does NOT start with /api/files/, so it is rejected. + assert.throws(() => + resolveInternalFilePath("https://evil.com/api/auth/whoami"), + ); + // The WHATWG URL parser converts backslashes to `/` for http(s), so this + // collapses to /api/auth/whoami and escapes the /api/files/ subtree. + assert.throws(() => resolveInternalFilePath("/api/files\\..\\auth\\whoami")); +}); + +test("resolveInternalFilePath wraps a new URL parse failure in a clear error", () => { + // `http://[` has no %2e/%2f so it passes the first guard, then fails the + // `new URL(...)` parse — exercising the catch branch that re-throws with a + // clear message. + assert.throws( + () => resolveInternalFilePath("http://["), + /Invalid internal file src/, + ); +}); + test("normalizeFileUrl rewrites the bare /files/ branch and leaves /api/files/ alone", () => { assert.equal( normalizeFileUrl("/files/att-1/pic.png"),