fix(sandbox): address PR #250 review — SSRF guard, eviction safety, cleanup (#243)

Security:
- stash_page: reject path-traversal / percent-encoded srcs before the authed
  loopback fetch (resolveInternalFilePath), closing an SSRF/exfiltration hole
  where a crafted node.attrs.src could read an arbitrary internal GET endpoint
  into the anonymous sandbox.

Stability:
- stash_page: revert + recount mirrors FIFO-evicted by a later put in the same
  stash (no dangling sandbox refs, honest images.mirrored/failed); free image
  blobs if the final document put throws.
- Reject/clamp non-positive SANDBOX_TTL_MS to the 1h default (warn once).
- Log mirror failures unconditionally (console.warn, no blob bodies).

Cleanup / architecture:
- Remove dead expiresAt from SandboxPutResult.
- Centralize the /api/sb route in SANDBOX_ROUTE_SEGMENT/SANDBOX_API_PATH and
  move URL composition into SandboxStore.putAndLink; drop the duplicated sink
  closures and the now-unused EnvironmentService injection from McpService and
  AiChatToolsService.
- Un-export isInternalFileUrl; document the process-local (instance-bound)
  sandbox limitation in the tool description and .env.example.

Docs/tests:
- README/README.ru: 38 -> 39 tools + stash_page entry.
- Add traversal/normalize/recursion unit tests, stash self-eviction +
  doc-put-throw + empty/octet-stream mock tests, controller If-None-Match
  (wildcard/weak/list) + Cache-Control tests, and SANDBOX_TTL_MS validation
  tests. Regenerate packages/mcp/build.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-28 18:02:46 +03:00
parent 2fe4ca8537
commit 6eb335d5e3
24 changed files with 708 additions and 97 deletions

View File

@@ -134,6 +134,10 @@ MCP_DOCMOST_PASSWORD=
# SANDBOX_PUBLIC_URL is the base used to build those URLs; it MUST be reachable
# by the consumer (do NOT use a loopback address if the consumer is remote).
# Defaults to APP_URL when unset.
# NOTE: the store is process-local — blobs live only on the instance that
# created them. Behind a multi-replica load balancer WITHOUT sticky sessions a
# consumer may hit a different instance and get a 404 (indistinguishable from an
# expired blob). Single-host deployments are unaffected.
# SANDBOX_PUBLIC_URL=https://docs.example.com
# SANDBOX_TTL_MS=3600000
# SANDBOX_MAX_BYTES=8388608

View File

@@ -63,9 +63,8 @@ describe('AiChatToolsService deletePage guardrail (H4)', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});
@@ -179,9 +178,8 @@ describe('AiChatToolsService expanded toolset guardrails', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});
@@ -298,9 +296,8 @@ describe('AiChatToolsService node-arg JSON-string coercion', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});
@@ -452,9 +449,8 @@ describe('AiChatToolsService model-friendly input validation (#190)', () => {
{} as never,
{} as never,
{} as never,
// environmentService + sandboxStore (only used by the stash tool closure,
// which these tests do not execute).
{} as never,
// sandboxStore (only used by the stash tool closure, which these tests do
// not execute).
{} as never,
);
});

View File

@@ -16,7 +16,6 @@ import {
import { resolveCurrentPageResult } from './current-page.util';
import { parseNodeArg } from './parse-node-arg';
import { modelFriendlyInput } from './model-friendly-input';
import { EnvironmentService } from '../../../integrations/environment/environment.service';
import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
/**
@@ -43,7 +42,6 @@ export class AiChatToolsService {
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly spaceMemberRepo: SpaceMemberRepo,
private readonly pagePermissionRepo: PagePermissionRepo,
private readonly environmentService: EnvironmentService,
// Shared singleton in-RAM blob store backing the stash tool.
private readonly sandboxStore: SandboxStore,
) {}
@@ -91,22 +89,23 @@ export class AiChatToolsService {
aiChatId,
});
// Bind the stash tool to the shared in-RAM SandboxStore and compose the
// anonymous public URL here (the MCP package never touches env or the
// store). put() returns the read URL + sha256/size; sha256 is also the
// blob's ETag for integrity.
const sandboxPut = (buf: Buffer, mime: string) => {
const stored = this.sandboxStore.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return { uri: `${base}/api/sb/${stored.id}`, sha256: stored.sha256, size: stored.size };
};
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
// package needs to keep its mirror counts honest under FIFO eviction (the
// package never touches env or the store). The sink speaks `uri`s, so the
// probes map a uri back to its id (the last path segment).
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
const { DocmostClient, sharedToolSpecs } = await loadDocmostMcp();
const client: DocmostClientLike = new DocmostClient({
apiUrl,
getToken,
getCollabToken,
sandbox: { put: sandboxPut },
sandbox: {
put: (buf, mime) => this.sandboxStore.putAndLink(buf, mime),
has: (uri) => this.sandboxStore.has(idOf(uri)),
evict: (uri) => this.sandboxStore.remove(idOf(uri)),
},
});
// Build an ai-SDK tool from a shared, zod-agnostic spec. The spec owns the

View File

@@ -171,11 +171,15 @@ export type DocmostClientConfig = {
getCollabToken?: () => Promise<string>;
// Optional blob-sandbox sink for the stash tool. `put` stores a blob in the
// host's in-RAM SandboxStore and returns the anonymous read URL + integrity.
// The optional `has`/`evict` probes let stashPage keep its mirror counts
// honest under the store's FIFO eviction (mirror of the package's sink type).
sandbox?: {
put: (
buf: Buffer,
mime: string,
) => { uri: string; sha256: string; size: number };
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};

View File

@@ -14,4 +14,30 @@ describe('EnvironmentService', () => {
it('should be defined', () => {
expect(service).toBeDefined();
});
describe('getSandboxTtlMs', () => {
// ConfigService stub: get(key, def) returns the configured value for the key
// (falling back to def), matching the @nestjs/config contract the service
// calls with (key, default).
const build = (sandboxTtl?: string) =>
new EnvironmentService({
get: (key: string, def?: string) =>
key === 'SANDBOX_TTL_MS' ? (sandboxTtl ?? def) : def,
} as any);
it.each(['0', '-5', 'abc'])(
'falls back to the 3600000 default for invalid value %s',
(value) => {
expect(build(value).getSandboxTtlMs()).toBe(3_600_000);
},
);
it('returns the parsed value for a valid positive integer', () => {
expect(build('120000').getSandboxTtlMs()).toBe(120_000);
});
it('uses the 3600000 default when SANDBOX_TTL_MS is unset', () => {
expect(build(undefined).getSandboxTtlMs()).toBe(3_600_000);
});
});
});

View File

@@ -1,9 +1,14 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { ConfigService } from '@nestjs/config';
import ms, { StringValue } from 'ms';
@Injectable()
export class EnvironmentService {
private readonly logger = new Logger(EnvironmentService.name);
// One-shot guard so an invalid SANDBOX_TTL_MS is warned about once, not on
// every getSandboxTtlMs() call (which runs per blob put).
private sandboxTtlWarned = false;
constructor(private configService: ConfigService) {}
getNodeEnv(): string {
@@ -348,12 +353,25 @@ export class EnvironmentService {
}
// Blob time-to-live. Default 1h. The unguessable UUID + this short TTL + TLS
// are the whole capability model (no tokens).
// are the whole capability model (no tokens). A non-positive or non-integer
// value would make every blob expire instantly (silent 404s), so reject it and
// fall back to the 1h default (warned about once to avoid per-put log spam).
getSandboxTtlMs(): number {
return parseInt(
const parsed = parseInt(
this.configService.get<string>('SANDBOX_TTL_MS', '3600000'),
10,
);
if (!Number.isInteger(parsed) || parsed <= 0) {
if (!this.sandboxTtlWarned) {
this.sandboxTtlWarned = true;
this.logger.warn(
`Invalid SANDBOX_TTL_MS (must be a positive integer); ` +
`falling back to the 3600000 ms default`,
);
}
return 3_600_000;
}
return parsed;
}
// Per-blob cap for non-image blobs (the serialized document). Default 8 MiB.

View File

@@ -143,6 +143,11 @@ export type DocmostMcpConfig = (
buf: Buffer,
mime: string,
) => { uri: string; sha256: string; size: number };
// Optional live/evict probes the package uses to keep stash_page's mirror
// counts honest under the store's FIFO eviction (mirror of the package's
// sink type); older bindings omit them.
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};

View File

@@ -109,7 +109,6 @@ function makeService(opts: {
};
const service = new McpService(
undefined as never, // environmentService
undefined as never, // workspaceRepo
undefined as never, // authService
undefined as never, // tokenService

View File

@@ -8,7 +8,6 @@ import { ModuleRef } from '@nestjs/core';
import { pathToFileURL } from 'node:url';
import { IncomingMessage } from 'node:http';
import { FastifyReply, FastifyRequest } from 'fastify';
import { EnvironmentService } from '../environment/environment.service';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { UserSessionRepo } from '@docmost/db/repos/session/user-session.repo';
@@ -93,7 +92,6 @@ export class McpService implements OnModuleDestroy {
private readonly sweepTimer: NodeJS.Timeout;
constructor(
private readonly environmentService: EnvironmentService,
private readonly workspaceRepo: WorkspaceRepo,
private readonly authService: AuthService,
private readonly tokenService: TokenService,
@@ -118,20 +116,17 @@ export class McpService implements OnModuleDestroy {
clearInterval(this.sweepTimer);
}
// Bind the stash tool to the shared in-RAM SandboxStore and compose the
// anonymous public URL (the MCP package owns neither env nor the store).
// put() returns the read URL + sha256/size; sha256 is also the blob ETag.
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
// package needs to keep its mirror counts honest under FIFO eviction; the
// package owns neither env nor the store. The sink speaks `uri`s, so the
// probes map a uri back to its id (the last path segment).
private buildSandboxConfig(): DocmostMcpConfig['sandbox'] {
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
return {
put: (buf: Buffer, mime: string) => {
const stored = this.sandboxStore.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return {
uri: `${base}/api/sb/${stored.id}`,
sha256: stored.sha256,
size: stored.size,
};
},
put: (buf, mime) => this.sandboxStore.putAndLink(buf, mime),
has: (uri) => this.sandboxStore.has(idOf(uri)),
evict: (uri) => this.sandboxStore.remove(idOf(uri)),
};
}

View File

@@ -0,0 +1,6 @@
// Single source of truth for the anonymous blob-sandbox route. The controller
// is mounted under the global `/api` prefix, so its decorator uses the bare
// segment while the public URL and the workspace-gate exclusion need the full
// path — derive the latter from the former so the two never drift.
export const SANDBOX_ROUTE_SEGMENT = 'sb';
export const SANDBOX_API_PATH = `/api/${SANDBOX_ROUTE_SEGMENT}`;

View File

@@ -121,4 +121,70 @@ describe('SandboxController', () => {
expect(res._sent.status).toBe(200);
});
it('returns 304 for a wildcard "*" If-None-Match', async () => {
const sha = 'e'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': '*' }), res);
expect(res._sent.status).toBe(304);
});
it('returns 304 for a weak validator W/"<sha>"', async () => {
const sha = 'f'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq({ 'if-none-match': `W/"${sha}"` }), res);
expect(res._sent.status).toBe(304);
});
it('returns 304 when a comma-separated If-None-Match list contains the sha', async () => {
const sha = '1'.repeat(64);
const store = {
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
};
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(
VALID_ID,
makeReq({ 'if-none-match': `"other", "${sha}"` }),
res,
);
expect(res._sent.status).toBe(304);
});
it('sets a private, immutable Cache-Control with a max-age within the TTL on 200', async () => {
const sha = '2'.repeat(64);
// Known TTL: ~30s out, so the floored max-age must land within [0, 60].
const e: SandboxEntry = {
buf: Buffer.from('x'),
mime: 'application/json',
sha256: sha,
expiresAt: Date.now() + 30_000,
};
const store = { get: jest.fn().mockReturnValue(e) };
const controller = new SandboxController(store as any);
const res = makeRes();
await controller.get(VALID_ID, makeReq(), res);
expect(res._sent.status).toBe(200);
const cc = res._sent.headers['cache-control'] as string;
expect(cc).toMatch(/^private, max-age=\d+, immutable$/);
const maxAge = Number(cc.match(/max-age=(\d+)/)![1]);
expect(maxAge).toBeGreaterThanOrEqual(0);
expect(maxAge).toBeLessThanOrEqual(60);
});
});

View File

@@ -1,6 +1,7 @@
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
import { FastifyReply, FastifyRequest } from 'fastify';
import { SandboxStore } from './sandbox.store';
import { SANDBOX_ROUTE_SEGMENT } from './sandbox.constants';
// Strict UUID v-agnostic shape. This is anti-traversal / input hygiene (so `:id`
// can never be a path like `../...`), NOT authorization — the capability is the
@@ -22,7 +23,7 @@ const UUID_RE =
* UUID; `:id` is never used as a filesystem path, so there is no traversal
* surface. Never returns tokens, never 401s.
*/
@Controller('sb')
@Controller(SANDBOX_ROUTE_SEGMENT)
export class SandboxController {
constructor(private readonly store: SandboxStore) {}

View File

@@ -1,6 +1,7 @@
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
import { createHash, randomUUID } from 'node:crypto';
import { EnvironmentService } from '../environment/environment.service';
import { SANDBOX_API_PATH } from './sandbox.constants';
// In-RAM, process-local blob store. No disk, no DB. Ephemeral by design: a
// restart empties it. A blob is addressed by an unguessable randomUUID() which
@@ -17,7 +18,6 @@ export interface SandboxPutResult {
id: string;
sha256: string;
size: number;
expiresAt: number;
}
@Injectable()
@@ -86,7 +86,36 @@ export class SandboxStore implements OnModuleDestroy {
const expiresAt = Date.now() + this.environmentService.getSandboxTtlMs();
this.map.set(id, { buf, mime, sha256, expiresAt });
this.totalBytes += buf.length;
return { id, sha256, size: buf.length, expiresAt };
return { id, sha256, size: buf.length };
}
/**
* Store a blob and return its anonymous read URL plus integrity metadata.
* Owns the single sandbox-URL composition (`${publicBase}${SANDBOX_API_PATH}/
* <id>`) so callers never hand-build the route; the raw put() stays public for
* tests/low-level callers. sha256 is also the blob's strong ETag.
*/
putAndLink(
buf: Buffer,
mime: string,
): { uri: string; sha256: string; size: number } {
const stored = this.put(buf, mime);
const base = this.environmentService.getSandboxPublicUrl();
return {
uri: `${base}${SANDBOX_API_PATH}/${stored.id}`,
sha256: stored.sha256,
size: stored.size,
};
}
/** True if the blob is still live (not evicted/expired). */
has(id: string): boolean {
return this.get(id) !== undefined;
}
/** Drop a blob by id (public wrapper over the private FIFO evict). */
remove(id: string): void {
this.evict(id);
}
/** Returns the entry, or undefined if missing OR expired (lazy expiry). */

View File

@@ -13,6 +13,7 @@ import fastifyCookie from '@fastify/cookie';
import fastifyIp from 'fastify-ip';
import { InternalLogFilter } from './common/logger/internal-log-filter';
import { EnvironmentService } from './integrations/environment/environment.service';
import { SANDBOX_API_PATH } from './integrations/sandbox/sandbox.constants';
import { resolveFrameHeader } from './common/helpers';
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
@@ -129,7 +130,7 @@ async function bootstrap() {
// Anonymous in-RAM blob sandbox: a remote consumer fetches blobs by an
// unguessable UUID without any workspace host context, so the
// workspace-resolution gate must not apply.
'/api/sb',
SANDBOX_API_PATH,
];
if (

View File

@@ -16,7 +16,7 @@ license.
> that interface. Other Docmost MCPs are human-shaped — they expose "open the page" and
> "replace the page"; this one exposes the editing primitives a model is good at.
It exposes **38 tools** built around three ideas that the other Docmost MCPs do not
It exposes **39 tools** built around three ideas that the other Docmost MCPs do not
combine:
1. **Surgical, token-cheap edits.** Address a single block by id and patch it, or run
@@ -106,7 +106,7 @@ There are several Docmost MCPs. Here is a capability-by-capability comparison.
## Tools
All 38 tools, grouped by what you'd reach for them.
All 39 tools, grouped by what you'd reach for them.
### Exploration & retrieval
@@ -203,6 +203,14 @@ All 38 tools, grouped by what you'd reach for them.
node referencing the old attachment (recursively, including callouts/tables) via the
live document, preserving comments, alignment and alt text. (In-place overwrite is
deliberately avoided — some Docmost versions corrupt the attachment on overwrite.)
- **`stash_page`** — Serialize a whole page (its full ProseMirror JSON) into an ephemeral
in-RAM blob and return ONLY a short anonymous URL — the body never enters the model
context, so it is the way to hand a large page (and its images) to an external consumer
without truncation. Every internal file/image attachment is mirrored into the same
sandbox and its `src` rewritten to a sandbox URL; external http(s) images are left
untouched. Returns `{ uri, size, sha256, images:{ mirrored, failed } }` (`sha256` is also
the blob's ETag). Blobs are RAM-only, expire after a short TTL (~1h) and are bound to the
server instance that created them.
### Comments

View File

@@ -17,7 +17,7 @@
> «открыть страницу» и «заменить страницу»; этот даёт примитивы редактирования, в которых
> модель сильна.
Сервер предоставляет **38 инструментов**, построенных вокруг трёх идей, которые другие
Сервер предоставляет **39 инструментов**, построенных вокруг трёх идей, которые другие
Docmost-MCP не сочетают:
1. **Точечные, экономичные по токенам правки.** Адресуйте отдельный блок по id и патчите
@@ -109,7 +109,7 @@ Docmost-MCP не сочетают:
## Инструменты
Все 38 инструментов, сгруппированы по задачам, для которых вы их возьмёте.
Все 39 инструментов, сгруппированы по задачам, для которых вы их возьмёте.
### Чтение и поиск
@@ -209,6 +209,15 @@ Docmost-MCP не сочетают:
коллауты/таблицы), через живой документ, сохраняя комментарии, выравнивание и alt-текст.
(Перезапись «по месту» намеренно не используется — некоторые версии Docmost портят
вложение при перезаписи.)
- **`stash_page`** — Сериализовать страницу целиком (её полный ProseMirror JSON) в
эфемерный blob в оперативной памяти и вернуть ТОЛЬКО короткий анонимный URL — тело
никогда не попадает в контекст модели, поэтому это способ передать большую страницу
(вместе с её изображениями) внешнему потребителю без усечения. Каждое внутреннее
файловое/графическое вложение зеркалируется в тот же sandbox, а его `src` переписывается
на URL sandbox; внешние http(s)-изображения остаются нетронутыми. Возвращает
`{ uri, size, sha256, images:{ mirrored, failed } }` (`sha256` — это также ETag blob'а).
Blob'ы хранятся только в оперативной памяти, истекают через короткий TTL (~1 ч) и
привязаны к тому экземпляру сервера, который их создал.
### Комментарии

View File

@@ -7,7 +7,7 @@ import { TiptapTransformer } from "@hocuspocus/transformer";
import * as Y from "yjs";
import WebSocket from "ws";
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
import { collectInternalFileNodes, normalizeFileUrl, } from "./lib/internal-file-urls.js";
import { collectInternalFileNodes, normalizeFileUrl, resolveInternalFilePath, } from "./lib/internal-file-urls.js";
import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, markdownToProseMirrorCanonical, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, applyDocToFragment, } from "./lib/collaboration.js";
import { footnoteWarningsField } from "./lib/footnote-analyze.js";
import { buildPageTree } from "./lib/tree.js";
@@ -54,6 +54,11 @@ export class DocmostClient {
getCollabTokenFn = null;
// Optional blob-sandbox sink for the stash tool. Null when not configured.
sandboxPut = null;
// Optional probes paired with the sink. `has` lets stashPage detect a blob
// FIFO-evicted by a LATER put in the same stash; `evict` lets it free this
// op's image blobs if the final doc put throws. Null when the sink omits them.
sandboxHas = null;
sandboxEvict = null;
// In-flight login dedup: when the token expires, the 401 interceptor,
// ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries
// can all call login() at once. Memoizing a single promise collapses that
@@ -82,6 +87,8 @@ export class DocmostClient {
}
if (config.sandbox) {
this.sandboxPut = config.sandbox.put;
this.sandboxHas = config.sandbox.has ?? null;
this.sandboxEvict = config.sandbox.evict ?? null;
}
this.client = axios.create({
baseURL: this.apiUrl,
@@ -619,12 +626,16 @@ export class DocmostClient {
* and the response Content-Type (mime), defaulting to octet-stream.
*
* The fetch is size-bounded (hard 64 MiB ceiling) purely to protect memory;
* the authoritative per-blob cap is enforced by the sandbox `put`. The leading
* `/api` strip means this never escapes the Docmost API base.
* the authoritative per-blob cap is enforced by the sandbox `put`. The path is
* resolved via resolveInternalFilePath, which REJECTS (throws) any traversal
* or percent-encoded src that would let an attacker-controlled `attrs.src`
* escape `/api/files/` and reach another internal endpoint (SSRF). That throw
* happens before this.client.get, so a malicious src is counted as a failed
* mirror — it never reaches the network.
*/
async fetchInternalFile(src) {
const HARD_CEILING = 64 * 1024 * 1024; // 64 MiB memory guard
const relPath = src.replace(/^\/api/, "");
const relPath = resolveInternalFilePath(src);
const response = await this.client.get(relPath, {
responseType: "arraybuffer",
timeout: 30000,
@@ -668,42 +679,82 @@ export class DocmostClient {
const cloned = structuredClone(pageJson);
// Group internal-file nodes by normalized src so each unique resource is
// fetched + stored ONCE (dedup), and every node sharing that src points at
// the one sandbox blob.
// the one sandbox blob. Capture each node's ORIGINAL raw src per-node:
// dedup groups nodes whose normalized src is equal even when their raw srcs
// differ (e.g. `/api/files/...` vs the bare `/files/...`), so on a revert we
// must restore each node's own original value, not the group key.
const bySrc = new Map();
for (const node of collectInternalFileNodes(cloned.content)) {
const src = normalizeFileUrl(String(node.attrs.src));
const origSrc = String(node.attrs.src);
const src = normalizeFileUrl(origSrc);
const entry = { node, origSrc };
const group = bySrc.get(src);
if (group)
group.push(node);
group.push(entry);
else
bySrc.set(src, [node]);
bySrc.set(src, [entry]);
}
let mirrored = 0;
let failed = 0;
// Record every successful mirror so it can be (a) reverted if its blob gets
// FIFO-evicted by a LATER put in this same stash, and (b) freed if the final
// doc put throws.
const mirrors = [];
const MAX_CONCURRENCY = 5;
const groups = [...bySrc.entries()];
for (let i = 0; i < groups.length; i += MAX_CONCURRENCY) {
const batch = groups.slice(i, i + MAX_CONCURRENCY);
await Promise.all(batch.map(async ([src, nodes]) => {
await Promise.all(batch.map(async ([src, entries]) => {
try {
const { buffer, mime } = await this.fetchInternalFile(src);
// put may throw if the blob exceeds the per-blob/total caps.
const stored = this.sandboxPut(buffer, mime);
for (const node of nodes)
node.attrs.src = stored.uri;
for (const entry of entries)
entry.node.attrs.src = stored.uri;
mirrors.push({ uri: stored.uri, entries });
mirrored++;
}
catch (err) {
// One bad/oversized image must not abort the document.
// One bad/oversized image (or a rejected traversal src) must not
// abort the document. Logged unconditionally (never the blob body),
// matching the package's ungated console.warn convention.
failed++;
if (process.env.DEBUG) {
console.error(`stash_page: failed to mirror "${src}":`, err);
}
console.warn(`stash_page: failed to mirror "${src}": ${err instanceof Error ? err.message : String(err)}`);
}
}));
}
// Reconcile against FIFO eviction: a heavy page can have a later image-put
// evict an EARLIER image stored in this SAME stash. The stored doc must not
// reference an evicted blob (consumer 404) and the counts must not lie, so
// for any mirror whose blob is gone, revert its nodes to their original
// internal srcs and re-count it as failed.
if (this.sandboxHas) {
for (const mirror of mirrors) {
if (!this.sandboxHas(mirror.uri)) {
for (const entry of mirror.entries) {
entry.node.attrs.src = entry.origSrc;
}
mirrored--;
failed++;
console.warn(`stash_page: mirrored blob ${mirror.uri} was evicted before ` +
`the doc was stored; reverted its src and counted it as failed`);
}
}
}
const docBuf = Buffer.from(JSON.stringify(cloned), "utf8");
const stored = this.sandboxPut(docBuf, "application/json");
let stored;
try {
stored = this.sandboxPut(docBuf, "application/json");
}
catch (err) {
// The doc put failed (e.g. doc exceeds the cap). Free this op's image
// blobs instead of leaking them in RAM for the whole TTL, then re-throw.
if (this.sandboxEvict) {
for (const mirror of mirrors)
this.sandboxEvict(mirror.uri);
}
throw err;
}
return {
uri: stored.uri,
sha256: stored.sha256,

View File

@@ -11,7 +11,7 @@
//
// Mirrors editor-ext's isInternalFileUrl / normalizeFileUrl (kept as a local
// dup so the ESM mcp package does not depend on the editor-ext build).
export function isInternalFileUrl(url) {
function isInternalFileUrl(url) {
if (typeof url !== "string")
return false;
const normalized = url.trim();
@@ -24,6 +24,61 @@ export function normalizeFileUrl(src) {
return "/api" + trimmed;
return trimmed;
}
/**
* Resolve a page-content `src` into the safe, `/api`-relative path the stash
* tool may fetch over the authenticated loopback client — or THROW.
*
* SECURITY (SSRF / path-traversal): `src` comes from page content and is fully
* attacker-controllable. The mirroring fetch runs through the AUTHENTICATED
* loopback axios client whose baseURL ends in `/api`, so a naive
* `src.replace(/^\/api/, "")` lets a crafted value like
* `/api/files/../auth/whoami` collapse (via axios/WHATWG URL `..` resolution)
* into an ARBITRARY internal GET endpoint, whose authed response would then be
* stored in the anonymous sandbox (SSRF + data exfiltration). A prefix-only
* `startsWith("/api/files/")` check does NOT defend against this because the
* `..` segments are still present in the raw string and resolved later.
*
* This function defeats that by resolving the canonical pathname FIRST and only
* then asserting it still lives under `/api/files/`:
* - it rejects any percent-encoded dot/slash (`%2e` / `%2f`): the WHATWG URL
* parser collapses LITERAL `../` but does NOT decode `%2f` separators, so a
* content-controlled src must never be allowed to smuggle those past the
* canonicalization;
* - it resolves `new URL(trimmed, "http://internal.invalid").pathname`, which
* normalizes `..`/`.` segments (e.g. `/api/files/../auth/whoami` →
* `/api/auth/whoami`);
* - it then requires the canonical pathname to start with `/api/files/`, so a
* traversal that escaped that subtree is rejected.
*
* Returns the path RELATIVE to the `/api` base (e.g. `/files/<id>/<name>`),
* ready to hand to the loopback client. The throw happens BEFORE any network
* call, so a rejected src is counted as a failed mirror and its original src is
* kept (the per-image try/catch in stashPage never aborts the whole document).
*/
export function resolveInternalFilePath(src) {
const trimmed = src.trim();
// Percent-encoded dot/slash must never reach the URL canonicalizer: the
// WHATWG parser does NOT decode `%2f` into a path separator, so an encoded
// `..%2fauth` would survive canonicalization and still escape /api/files/.
if (/%2e|%2f/i.test(trimmed)) {
throw new Error(`Refusing internal file src with percent-encoded path segment: "${src}"`);
}
let pathname;
try {
// The base host is irrelevant (never contacted); it only lets the parser
// resolve a relative `src` and normalize `..`/`.` segments.
pathname = new URL(trimmed, "http://internal.invalid").pathname;
}
catch {
throw new Error(`Invalid internal file src: "${src}"`);
}
if (!pathname.startsWith("/api/files/")) {
throw new Error(`Refusing internal file src that escapes /api/files/: "${src}"`);
}
// Strip the `/api` base prefix; the loopback client's baseURL already ends
// in `/api`, so it expects the path relative to that (e.g. /files/<id>/<f>).
return pathname.replace(/^\/api/, "");
}
/**
* Recursively collect every node whose `attrs.src` is an internal file URL.
* Returns references to the live nodes (so the caller can rewrite `attrs.src`

View File

@@ -224,7 +224,10 @@ export const SHARED_TOOL_SPECS = {
'failed} }. Integrity: the blob is served with ETag = its sha256, so a ' +
'truncated/corrupted fetch is detectable. Blobs are RAM-only: they expire ' +
'after a short TTL (~1h) and are cleared on restart — consume the URL ' +
'within the TTL and one uptime, or re-stash.',
'within the TTL and one uptime, or re-stash. A blob is bound to the ' +
'server instance that created it: in a multi-replica deployment without ' +
'sticky sessions a blob stored on one instance is not retrievable via the ' +
'sandbox URL on another (it 404s like an expired one).',
buildShape: (z) => ({
pageId: z.string().min(1),
}),

View File

@@ -16,6 +16,7 @@ import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
import {
collectInternalFileNodes,
normalizeFileUrl,
resolveInternalFilePath,
} from "./lib/internal-file-urls.js";
import {
updatePageContentRealtime,
@@ -122,8 +123,14 @@ export type DocmostMcpConfig = { apiUrl: string } & (
// both branches; see the type doc above.
getCollabToken?: () => Promise<string>;
// Optional blob sandbox sink. Present only where the stash tool is wired;
// when absent, stash_page throws a clear "not configured" error.
sandbox?: { put: SandboxPut };
// when absent, stash_page throws a clear "not configured" error. The
// optional `has`/`evict` probes let stashPage keep its mirror counts honest
// under the store's FIFO eviction (see stashPage); older sinks omit them.
sandbox?: {
put: SandboxPut;
has?: (uri: string) => boolean;
evict?: (uri: string) => void;
};
};
export class DocmostClient {
@@ -143,6 +150,11 @@ export class DocmostClient {
private getCollabTokenFn: (() => Promise<string>) | null = null;
// Optional blob-sandbox sink for the stash tool. Null when not configured.
private sandboxPut: SandboxPut | null = null;
// Optional probes paired with the sink. `has` lets stashPage detect a blob
// FIFO-evicted by a LATER put in the same stash; `evict` lets it free this
// op's image blobs if the final doc put throws. Null when the sink omits them.
private sandboxHas: ((uri: string) => boolean) | null = null;
private sandboxEvict: ((uri: string) => void) | null = null;
// In-flight login dedup: when the token expires, the 401 interceptor,
// ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries
// can all call login() at once. Memoizing a single promise collapses that
@@ -184,6 +196,8 @@ export class DocmostClient {
}
if (config.sandbox) {
this.sandboxPut = config.sandbox.put;
this.sandboxHas = config.sandbox.has ?? null;
this.sandboxEvict = config.sandbox.evict ?? null;
}
this.client = axios.create({
baseURL: this.apiUrl,
@@ -795,14 +809,18 @@ export class DocmostClient {
* and the response Content-Type (mime), defaulting to octet-stream.
*
* The fetch is size-bounded (hard 64 MiB ceiling) purely to protect memory;
* the authoritative per-blob cap is enforced by the sandbox `put`. The leading
* `/api` strip means this never escapes the Docmost API base.
* the authoritative per-blob cap is enforced by the sandbox `put`. The path is
* resolved via resolveInternalFilePath, which REJECTS (throws) any traversal
* or percent-encoded src that would let an attacker-controlled `attrs.src`
* escape `/api/files/` and reach another internal endpoint (SSRF). That throw
* happens before this.client.get, so a malicious src is counted as a failed
* mirror — it never reaches the network.
*/
private async fetchInternalFile(
src: string,
): Promise<{ buffer: Buffer; mime: string }> {
const HARD_CEILING = 64 * 1024 * 1024; // 64 MiB memory guard
const relPath = src.replace(/^\/api/, "");
const relPath = resolveInternalFilePath(src);
const response = await this.client.get(relPath, {
responseType: "arraybuffer",
timeout: 30000,
@@ -857,42 +875,90 @@ export class DocmostClient {
// Group internal-file nodes by normalized src so each unique resource is
// fetched + stored ONCE (dedup), and every node sharing that src points at
// the one sandbox blob.
const bySrc = new Map<string, any[]>();
// the one sandbox blob. Capture each node's ORIGINAL raw src per-node:
// dedup groups nodes whose normalized src is equal even when their raw srcs
// differ (e.g. `/api/files/...` vs the bare `/files/...`), so on a revert we
// must restore each node's own original value, not the group key.
const bySrc = new Map<string, Array<{ node: any; origSrc: string }>>();
for (const node of collectInternalFileNodes(cloned.content)) {
const src = normalizeFileUrl(String(node.attrs.src));
const origSrc = String(node.attrs.src);
const src = normalizeFileUrl(origSrc);
const entry = { node, origSrc };
const group = bySrc.get(src);
if (group) group.push(node);
else bySrc.set(src, [node]);
if (group) group.push(entry);
else bySrc.set(src, [entry]);
}
let mirrored = 0;
let failed = 0;
// Record every successful mirror so it can be (a) reverted if its blob gets
// FIFO-evicted by a LATER put in this same stash, and (b) freed if the final
// doc put throws.
const mirrors: Array<{
uri: string;
entries: Array<{ node: any; origSrc: string }>;
}> = [];
const MAX_CONCURRENCY = 5;
const groups = [...bySrc.entries()];
for (let i = 0; i < groups.length; i += MAX_CONCURRENCY) {
const batch = groups.slice(i, i + MAX_CONCURRENCY);
await Promise.all(
batch.map(async ([src, nodes]) => {
batch.map(async ([src, entries]) => {
try {
const { buffer, mime } = await this.fetchInternalFile(src);
// put may throw if the blob exceeds the per-blob/total caps.
const stored = this.sandboxPut!(buffer, mime);
for (const node of nodes) node.attrs.src = stored.uri;
for (const entry of entries) entry.node.attrs.src = stored.uri;
mirrors.push({ uri: stored.uri, entries });
mirrored++;
} catch (err) {
// One bad/oversized image must not abort the document.
// One bad/oversized image (or a rejected traversal src) must not
// abort the document. Logged unconditionally (never the blob body),
// matching the package's ungated console.warn convention.
failed++;
if (process.env.DEBUG) {
console.error(`stash_page: failed to mirror "${src}":`, err);
}
console.warn(
`stash_page: failed to mirror "${src}": ${
err instanceof Error ? err.message : String(err)
}`,
);
}
}),
);
}
// Reconcile against FIFO eviction: a heavy page can have a later image-put
// evict an EARLIER image stored in this SAME stash. The stored doc must not
// reference an evicted blob (consumer 404) and the counts must not lie, so
// for any mirror whose blob is gone, revert its nodes to their original
// internal srcs and re-count it as failed.
if (this.sandboxHas) {
for (const mirror of mirrors) {
if (!this.sandboxHas(mirror.uri)) {
for (const entry of mirror.entries) {
entry.node.attrs.src = entry.origSrc;
}
mirrored--;
failed++;
console.warn(
`stash_page: mirrored blob ${mirror.uri} was evicted before ` +
`the doc was stored; reverted its src and counted it as failed`,
);
}
}
}
const docBuf = Buffer.from(JSON.stringify(cloned), "utf8");
const stored = this.sandboxPut(docBuf, "application/json");
let stored: { uri: string; sha256: string; size: number };
try {
stored = this.sandboxPut(docBuf, "application/json");
} catch (err) {
// The doc put failed (e.g. doc exceeds the cap). Free this op's image
// blobs instead of leaking them in RAM for the whole TTL, then re-throw.
if (this.sandboxEvict) {
for (const mirror of mirrors) this.sandboxEvict(mirror.uri);
}
throw err;
}
return {
uri: stored.uri,
sha256: stored.sha256,

View File

@@ -12,7 +12,7 @@
// Mirrors editor-ext's isInternalFileUrl / normalizeFileUrl (kept as a local
// dup so the ESM mcp package does not depend on the editor-ext build).
export function isInternalFileUrl(url: unknown): boolean {
function isInternalFileUrl(url: unknown): boolean {
if (typeof url !== "string") return false;
const normalized = url.trim();
return (
@@ -27,6 +27,65 @@ export function normalizeFileUrl(src: string): string {
return trimmed;
}
/**
* Resolve a page-content `src` into the safe, `/api`-relative path the stash
* tool may fetch over the authenticated loopback client — or THROW.
*
* SECURITY (SSRF / path-traversal): `src` comes from page content and is fully
* attacker-controllable. The mirroring fetch runs through the AUTHENTICATED
* loopback axios client whose baseURL ends in `/api`, so a naive
* `src.replace(/^\/api/, "")` lets a crafted value like
* `/api/files/../auth/whoami` collapse (via axios/WHATWG URL `..` resolution)
* into an ARBITRARY internal GET endpoint, whose authed response would then be
* stored in the anonymous sandbox (SSRF + data exfiltration). A prefix-only
* `startsWith("/api/files/")` check does NOT defend against this because the
* `..` segments are still present in the raw string and resolved later.
*
* This function defeats that by resolving the canonical pathname FIRST and only
* then asserting it still lives under `/api/files/`:
* - it rejects any percent-encoded dot/slash (`%2e` / `%2f`): the WHATWG URL
* parser collapses LITERAL `../` but does NOT decode `%2f` separators, so a
* content-controlled src must never be allowed to smuggle those past the
* canonicalization;
* - it resolves `new URL(trimmed, "http://internal.invalid").pathname`, which
* normalizes `..`/`.` segments (e.g. `/api/files/../auth/whoami` →
* `/api/auth/whoami`);
* - it then requires the canonical pathname to start with `/api/files/`, so a
* traversal that escaped that subtree is rejected.
*
* Returns the path RELATIVE to the `/api` base (e.g. `/files/<id>/<name>`),
* ready to hand to the loopback client. The throw happens BEFORE any network
* call, so a rejected src is counted as a failed mirror and its original src is
* kept (the per-image try/catch in stashPage never aborts the whole document).
*/
export function resolveInternalFilePath(src: string): string {
const trimmed = src.trim();
// Percent-encoded dot/slash must never reach the URL canonicalizer: the
// WHATWG parser does NOT decode `%2f` into a path separator, so an encoded
// `..%2fauth` would survive canonicalization and still escape /api/files/.
if (/%2e|%2f/i.test(trimmed)) {
throw new Error(
`Refusing internal file src with percent-encoded path segment: "${src}"`,
);
}
let pathname: string;
try {
// The base host is irrelevant (never contacted); it only lets the parser
// resolve a relative `src` and normalize `..`/`.` segments.
pathname = new URL(trimmed, "http://internal.invalid").pathname;
} catch {
throw new Error(`Invalid internal file src: "${src}"`);
}
if (!pathname.startsWith("/api/files/")) {
throw new Error(
`Refusing internal file src that escapes /api/files/: "${src}"`,
);
}
// Strip the `/api` base prefix; the loopback client's baseURL already ends
// in `/api`, so it expects the path relative to that (e.g. /files/<id>/<f>).
return pathname.replace(/^\/api/, "");
}
/**
* Recursively collect every node whose `attrs.src` is an internal file URL.
* Returns references to the live nodes (so the caller can rewrite `attrs.src`

View File

@@ -283,7 +283,10 @@ export const SHARED_TOOL_SPECS = {
'failed} }. Integrity: the blob is served with ETag = its sha256, so a ' +
'truncated/corrupted fetch is detectable. Blobs are RAM-only: they expire ' +
'after a short TTL (~1h) and are cleared on restart — consume the URL ' +
'within the TTL and one uptime, or re-stash.',
'within the TTL and one uptime, or re-stash. A blob is bound to the ' +
'server instance that created it: in a multi-replica deployment without ' +
'sticky sessions a blob stored on one instance is not retrievable via the ' +
'sandbox URL on another (it 404s like an expired one).',
buildShape: (z) => ({
pageId: z.string().min(1),
}),

View File

@@ -41,17 +41,53 @@ after(async () => {
// In-memory sandbox sink mirroring the host binding: store the blob, return a
// uri + sha256 + size. Records every put so the test can inspect what was
// stashed (and verify the doc body never leaves via the return value).
function makeSandbox() {
// stashed (and verify the doc body never leaves via the return value). Models
// the real store's FIFO eviction + cap + the has/evict probes so B1 (self-
// eviction reconciliation and doc-put-throw cleanup) is testable. Default
// maxTotal is effectively unlimited so the happy-path tests behave as before.
//
// `throwOnJson` forces the final document put to throw, standing in for "doc
// exceeds the cap".
function makeSandbox({ maxTotal = Infinity, throwOnJson = false } = {}) {
const puts = [];
const evicted = [];
// id -> size, in insertion order (Map preserves it) so the oldest is first.
const live = new Map();
let total = 0;
const idOf = (uri) => uri.substring(uri.lastIndexOf("/") + 1);
return {
puts,
evicted,
put(buf, mime) {
if (throwOnJson && mime === "application/json") {
throw new Error("doc blob exceeds the sandbox cap");
}
const sha256 = createHash("sha256").update(buf).digest("hex");
const id = `id-${puts.length}`;
puts.push({ buf, mime, sha256 });
puts.push({ buf, mime, sha256, id });
live.set(id, buf.length);
total += buf.length;
// FIFO-evict the oldest live blobs until this put fits under the cap.
while (total > maxTotal && live.size > 0) {
const oldest = live.keys().next().value;
if (oldest === id) break; // never evict the blob we just stored
total -= live.get(oldest);
live.delete(oldest);
evicted.push(oldest);
}
return { uri: `https://sb.test/api/sb/${id}`, sha256, size: buf.length };
},
has(uri) {
return live.has(idOf(uri));
},
evict(uri) {
const id = idOf(uri);
if (live.has(id)) {
total -= live.get(id);
live.delete(id);
}
evicted.push(id);
},
};
}
@@ -80,8 +116,18 @@ function pageDoc() {
}
// Build a client wired to a server that logs in, serves the page, and serves the
// internal file bytes. `fileStatus` lets a test force the file fetch to fail.
async function buildClient(sandbox, { fileStatus = 200 } = {}) {
// internal file bytes. `fileStatus` lets a test force the file fetch to fail;
// `doc` overrides the served page; `fileBytes`/`fileHeaders` shape the file
// response (used by the empty-body / missing-Content-Type branch tests).
async function buildClient(
sandbox,
{
fileStatus = 200,
doc = pageDoc(),
fileBytes = IMAGE_BYTES,
fileHeaders = { "Content-Type": "image/png" },
} = {},
) {
const baseURL = await spawn(async (req, res) => {
await readBody(req);
if (req.url === "/api/auth/login") {
@@ -94,17 +140,17 @@ async function buildClient(sandbox, { fileStatus = 200 } = {}) {
}
if (req.url === "/api/pages/info") {
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ data: { id: "page-1", title: "T", content: pageDoc() } }));
res.end(JSON.stringify({ data: { id: "page-1", title: "T", content: doc } }));
return;
}
if (req.url.startsWith("/api/files/att-1/")) {
if (req.url.startsWith("/api/files/")) {
if (fileStatus !== 200) {
res.writeHead(fileStatus);
res.end();
return;
}
res.writeHead(200, { "Content-Type": "image/png" });
res.end(IMAGE_BYTES);
res.writeHead(200, fileHeaders);
res.end(fileBytes);
return;
}
res.writeHead(404);
@@ -114,10 +160,26 @@ async function buildClient(sandbox, { fileStatus = 200 } = {}) {
apiUrl: baseURL,
email: "u@example.com",
password: "pw",
sandbox: { put: (buf, mime) => sandbox.put(buf, mime) },
sandbox: {
put: (buf, mime) => sandbox.put(buf, mime),
has: (uri) => sandbox.has(uri),
evict: (uri) => sandbox.evict(uri),
},
});
}
// A page with several DISTINCT internal images (each a unique attachment id) so
// each is its own sandbox blob — needed to exercise FIFO self-eviction.
function multiImageDoc(n) {
return {
type: "doc",
content: Array.from({ length: n }, (_, i) => ({
type: "image",
attrs: { src: `/api/files/att-${i}/pic.png`, attachmentId: `att-${i}` },
})),
};
}
test("stashPage stores the doc + mirrors/rewrites internal images, returns only a link", async () => {
const sandbox = makeSandbox();
const client = await buildClient(sandbox);
@@ -181,3 +243,88 @@ test("stashPage throws a clear error when no sandbox is configured", async () =>
});
await assert.rejects(() => client.stashPage("page-1"), /not configured/);
});
test("stashPage reverts a FIFO-evicted image and counts it as failed (B1)", async () => {
// 3 distinct images of S=4000 bytes each; doc JSON is far smaller than one
// image. With a cap of 4500: storing img1 evicts img0, storing img2 evicts
// img1 — so only img2 survives the loop (img0 + img1 reverted). The doc
// (4000 + a few hundred bytes <= 4500) then fits alongside the survivor, so it
// does NOT trigger further eviction. The stored doc must therefore reference
// exactly one live blob and revert the other two to their internal srcs.
const BIG = Buffer.alloc(4000, 0x41);
const sandbox = makeSandbox({ maxTotal: 4500 });
const client = await buildClient(sandbox, {
doc: multiImageDoc(3),
fileBytes: BIG,
});
const result = await client.stashPage("page-1");
// Two images were evicted before the doc was stored -> counted as failed.
assert.deepEqual(result.images, { mirrored: 1, failed: 2 });
// Inspect the stashed doc: no node may point at an evicted (now-dead) blob,
// and every reverted node carries its ORIGINAL internal src again.
const docPut = sandbox.puts.find((p) => p.mime === "application/json");
const stashed = JSON.parse(docPut.buf.toString("utf8"));
const imgs = stashed.content.content.filter((n) => n.type === "image");
let live = 0;
let reverted = 0;
for (const img of imgs) {
const src = img.attrs.src;
if (src.startsWith("https://sb.test/api/sb/")) {
assert.ok(sandbox.has(src), `doc references evicted blob ${src}`);
live++;
} else {
// Reverted to the original internal src.
assert.match(src, /^\/api\/files\/att-\d+\/pic\.png$/);
reverted++;
}
}
assert.equal(live, 1);
assert.equal(reverted, 2);
});
test("stashPage frees image blobs when the doc put throws (B1)", async () => {
// Two distinct images mirror fine; the final JSON doc put throws (doc exceeds
// cap). stashPage must reject AND evict every image blob it stored this op.
const sandbox = makeSandbox({ throwOnJson: true });
const client = await buildClient(sandbox, { doc: multiImageDoc(2) });
await assert.rejects(() => client.stashPage("page-1"));
// Both image blobs were stored, then evicted on the doc-put failure.
const imagePuts = sandbox.puts.filter((p) => p.mime === "image/png");
assert.equal(imagePuts.length, 2);
for (const p of imagePuts) {
assert.ok(sandbox.evicted.includes(p.id), `image ${p.id} was not freed`);
}
});
test("stashPage counts an empty file response as failed (B1/fetchInternalFile)", async () => {
const sandbox = makeSandbox();
const client = await buildClient(sandbox, {
fileBytes: Buffer.alloc(0),
fileHeaders: { "Content-Type": "image/png", "Content-Length": "0" },
});
const result = await client.stashPage("page-1");
// The single internal image (deduped) yielded an empty body -> failed.
assert.deepEqual(result.images, { mirrored: 0, failed: 1 });
// Only the doc blob was stored.
assert.equal(sandbox.puts.filter((p) => p.mime === "image/png").length, 0);
});
test("stashPage mirrors a file with no Content-Type as octet-stream (fetchInternalFile)", async () => {
const sandbox = makeSandbox();
// No Content-Type header at all -> fetchInternalFile defaults to octet-stream.
const client = await buildClient(sandbox, { fileHeaders: {} });
const result = await client.stashPage("page-1");
assert.equal(result.images.mirrored, 1);
const imagePut = sandbox.puts.find((p) => p.mime !== "application/json");
assert.ok(imagePut, "expected an image put");
assert.equal(imagePut.mime, "application/octet-stream");
});

View File

@@ -0,0 +1,61 @@
// Unit tests for the internal-file URL helpers the stash tool relies on. The
// critical case is resolveInternalFilePath, whose whole job is to REJECT a
// content-controlled `src` that tries to escape /api/files/ (SSRF / traversal)
// before it ever reaches the authenticated loopback client.
import { test } from "node:test";
import assert from "node:assert/strict";
import {
resolveInternalFilePath,
normalizeFileUrl,
collectInternalFileNodes,
} from "../../build/lib/internal-file-urls.js";
test("resolveInternalFilePath accepts a normal internal src", () => {
assert.equal(
resolveInternalFilePath("/api/files/att-1/pic.png"),
"/files/att-1/pic.png",
);
});
test("resolveInternalFilePath rejects traversal / encoded variants (SSRF guard)", () => {
// `..` collapses to /api/auth/whoami -> outside /api/files/ -> rejected.
assert.throws(() => resolveInternalFilePath("/api/files/../auth/whoami"));
// Escapes the /api base entirely.
assert.throws(() => resolveInternalFilePath("/api/files/../../internal"));
// Percent-encoded dot -> rejected before canonicalization.
assert.throws(() => resolveInternalFilePath("/api/files/%2e%2e/x"));
// Percent-encoded slash separator -> rejected before canonicalization.
assert.throws(() => resolveInternalFilePath("/api/files/..%2fauth"));
});
test("normalizeFileUrl rewrites the bare /files/ branch and leaves /api/files/ alone", () => {
assert.equal(
normalizeFileUrl("/files/att-1/pic.png"),
"/api/files/att-1/pic.png",
);
assert.equal(
normalizeFileUrl("/api/files/att-1/pic.png"),
"/api/files/att-1/pic.png",
);
});
test("collectInternalFileNodes recurses into nested content containers", () => {
// The internal image is buried inside a callout's content array, so a
// regression on the recursion (e.g. a shallow .filter()) would miss it.
const nested = {
type: "image",
attrs: { src: "/api/files/att-9/deep.png", attachmentId: "att-9" },
};
const doc = {
type: "doc",
content: [
{
type: "callout",
content: [{ type: "paragraph", content: [nested] }],
},
],
};
const found = collectInternalFileNodes(doc);
assert.equal(found.length, 1);
assert.equal(found[0], nested);
});