Files
gitmost/packages/mcp/build/lib/internal-file-urls.js
claude_code 6eb335d5e3 fix(sandbox): address PR #250 review — SSRF guard, eviction safety, cleanup (#243)
Security:
- stash_page: reject path-traversal / percent-encoded srcs before the authed
  loopback fetch (resolveInternalFilePath), closing an SSRF/exfiltration hole
  where a crafted node.attrs.src could read an arbitrary internal GET endpoint
  into the anonymous sandbox.

Stability:
- stash_page: revert + recount mirrors FIFO-evicted by a later put in the same
  stash (no dangling sandbox refs, honest images.mirrored/failed); free image
  blobs if the final document put throws.
- Reject/clamp non-positive SANDBOX_TTL_MS to the 1h default (warn once).
- Log mirror failures unconditionally (console.warn, no blob bodies).

Cleanup / architecture:
- Remove dead expiresAt from SandboxPutResult.
- Centralize the /api/sb route in SANDBOX_ROUTE_SEGMENT/SANDBOX_API_PATH and
  move URL composition into SandboxStore.putAndLink; drop the duplicated sink
  closures and the now-unused EnvironmentService injection from McpService and
  AiChatToolsService.
- Un-export isInternalFileUrl; document the process-local (instance-bound)
  sandbox limitation in the tool description and .env.example.

Docs/tests:
- README/README.ru: 38 -> 39 tools + stash_page entry.
- Add traversal/normalize/recursion unit tests, stash self-eviction +
  doc-put-throw + empty/octet-stream mock tests, controller If-None-Match
  (wildcard/weak/list) + Cache-Control tests, and SANDBOX_TTL_MS validation
  tests. Regenerate packages/mcp/build.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 18:02:46 +03:00

111 lines
5.0 KiB
JavaScript

// Detection + collection of INTERNAL Docmost file URLs inside a ProseMirror doc.
//
// An internal file URL is a relative path served by Docmost's authenticated
// attachment route (`GET /api/files/:fileId/:fileName`). It is useless to an
// external consumer (relative + needs a Docmost session), so the stash tool
// mirrors every such resource into the blob sandbox and rewrites its `src`.
//
// The criterion is "internal file URL", NOT the node TYPE: image, drawio,
// excalidraw, video and file nodes all carry such a `src`, so a type-agnostic
// walker covers them all. External http(s) srcs (CDNs) are left untouched.
//
// Mirrors editor-ext's isInternalFileUrl / normalizeFileUrl (kept as a local
// dup so the ESM mcp package does not depend on the editor-ext build).
function isInternalFileUrl(url) {
if (typeof url !== "string")
return false;
const normalized = url.trim();
return (normalized.startsWith("/api/files/") || normalized.startsWith("/files/"));
}
/** Normalize a bare `/files/...` src to the canonical `/api/files/...` form. */
export function normalizeFileUrl(src) {
const trimmed = src.trim();
if (trimmed.startsWith("/files/"))
return "/api" + trimmed;
return trimmed;
}
/**
* Resolve a page-content `src` into the safe, `/api`-relative path the stash
* tool may fetch over the authenticated loopback client — or THROW.
*
* SECURITY (SSRF / path-traversal): `src` comes from page content and is fully
* attacker-controllable. The mirroring fetch runs through the AUTHENTICATED
* loopback axios client whose baseURL ends in `/api`, so a naive
* `src.replace(/^\/api/, "")` lets a crafted value like
* `/api/files/../auth/whoami` collapse (via axios/WHATWG URL `..` resolution)
* into an ARBITRARY internal GET endpoint, whose authed response would then be
* stored in the anonymous sandbox (SSRF + data exfiltration). A prefix-only
* `startsWith("/api/files/")` check does NOT defend against this because the
* `..` segments are still present in the raw string and resolved later.
*
* This function defeats that by resolving the canonical pathname FIRST and only
* then asserting it still lives under `/api/files/`:
* - it rejects any percent-encoded dot/slash (`%2e` / `%2f`): the WHATWG URL
* parser collapses LITERAL `../` but does NOT decode `%2f` separators, so a
* content-controlled src must never be allowed to smuggle those past the
* canonicalization;
* - it resolves `new URL(trimmed, "http://internal.invalid").pathname`, which
* normalizes `..`/`.` segments (e.g. `/api/files/../auth/whoami` →
* `/api/auth/whoami`);
* - it then requires the canonical pathname to start with `/api/files/`, so a
* traversal that escaped that subtree is rejected.
*
* Returns the path RELATIVE to the `/api` base (e.g. `/files/<id>/<name>`),
* ready to hand to the loopback client. The throw happens BEFORE any network
* call, so a rejected src is counted as a failed mirror and its original src is
* kept (the per-image try/catch in stashPage never aborts the whole document).
*/
export function resolveInternalFilePath(src) {
const trimmed = src.trim();
// Percent-encoded dot/slash must never reach the URL canonicalizer: the
// WHATWG parser does NOT decode `%2f` into a path separator, so an encoded
// `..%2fauth` would survive canonicalization and still escape /api/files/.
if (/%2e|%2f/i.test(trimmed)) {
throw new Error(`Refusing internal file src with percent-encoded path segment: "${src}"`);
}
let pathname;
try {
// The base host is irrelevant (never contacted); it only lets the parser
// resolve a relative `src` and normalize `..`/`.` segments.
pathname = new URL(trimmed, "http://internal.invalid").pathname;
}
catch {
throw new Error(`Invalid internal file src: "${src}"`);
}
if (!pathname.startsWith("/api/files/")) {
throw new Error(`Refusing internal file src that escapes /api/files/: "${src}"`);
}
// Strip the `/api` base prefix; the loopback client's baseURL already ends
// in `/api`, so it expects the path relative to that (e.g. /files/<id>/<f>).
return pathname.replace(/^\/api/, "");
}
/**
* Recursively collect every node whose `attrs.src` is an internal file URL.
* Returns references to the live nodes (so the caller can rewrite `attrs.src`
* in place on its clone). Descends `content` arrays, covering callouts, tables,
* details and any other nested container.
*/
export function collectInternalFileNodes(doc) {
const out = [];
const visit = (node) => {
if (!node)
return;
if (Array.isArray(node)) {
for (const child of node)
visit(child);
return;
}
if (typeof node !== "object")
return;
if (node.attrs && isInternalFileUrl(node.attrs.src)) {
out.push(node);
}
if (Array.isArray(node.content)) {
for (const child of node.content)
visit(child);
}
};
visit(doc);
return out;
}