Security: - stash_page: reject path-traversal / percent-encoded srcs before the authed loopback fetch (resolveInternalFilePath), closing an SSRF/exfiltration hole where a crafted node.attrs.src could read an arbitrary internal GET endpoint into the anonymous sandbox. Stability: - stash_page: revert + recount mirrors FIFO-evicted by a later put in the same stash (no dangling sandbox refs, honest images.mirrored/failed); free image blobs if the final document put throws. - Reject/clamp non-positive SANDBOX_TTL_MS to the 1h default (warn once). - Log mirror failures unconditionally (console.warn, no blob bodies). Cleanup / architecture: - Remove dead expiresAt from SandboxPutResult. - Centralize the /api/sb route in SANDBOX_ROUTE_SEGMENT/SANDBOX_API_PATH and move URL composition into SandboxStore.putAndLink; drop the duplicated sink closures and the now-unused EnvironmentService injection from McpService and AiChatToolsService. - Un-export isInternalFileUrl; document the process-local (instance-bound) sandbox limitation in the tool description and .env.example. Docs/tests: - README/README.ru: 38 -> 39 tools + stash_page entry. - Add traversal/normalize/recursion unit tests, stash self-eviction + doc-put-throw + empty/octet-stream mock tests, controller If-None-Match (wildcard/weak/list) + Cache-Control tests, and SANDBOX_TTL_MS validation tests. Regenerate packages/mcp/build. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -134,6 +134,10 @@ MCP_DOCMOST_PASSWORD=
|
||||
# SANDBOX_PUBLIC_URL is the base used to build those URLs; it MUST be reachable
|
||||
# by the consumer (do NOT use a loopback address if the consumer is remote).
|
||||
# Defaults to APP_URL when unset.
|
||||
# NOTE: the store is process-local — blobs live only on the instance that
|
||||
# created them. Behind a multi-replica load balancer WITHOUT sticky sessions a
|
||||
# consumer may hit a different instance and get a 404 (indistinguishable from an
|
||||
# expired blob). Single-host deployments are unaffected.
|
||||
# SANDBOX_PUBLIC_URL=https://docs.example.com
|
||||
# SANDBOX_TTL_MS=3600000
|
||||
# SANDBOX_MAX_BYTES=8388608
|
||||
|
||||
@@ -63,9 +63,8 @@ describe('AiChatToolsService deletePage guardrail (H4)', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// environmentService + sandboxStore (only used by the stash tool closure,
|
||||
// which these tests do not execute).
|
||||
{} as never,
|
||||
// sandboxStore (only used by the stash tool closure, which these tests do
|
||||
// not execute).
|
||||
{} as never,
|
||||
);
|
||||
});
|
||||
@@ -179,9 +178,8 @@ describe('AiChatToolsService expanded toolset guardrails', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// environmentService + sandboxStore (only used by the stash tool closure,
|
||||
// which these tests do not execute).
|
||||
{} as never,
|
||||
// sandboxStore (only used by the stash tool closure, which these tests do
|
||||
// not execute).
|
||||
{} as never,
|
||||
);
|
||||
});
|
||||
@@ -298,9 +296,8 @@ describe('AiChatToolsService node-arg JSON-string coercion', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// environmentService + sandboxStore (only used by the stash tool closure,
|
||||
// which these tests do not execute).
|
||||
{} as never,
|
||||
// sandboxStore (only used by the stash tool closure, which these tests do
|
||||
// not execute).
|
||||
{} as never,
|
||||
);
|
||||
});
|
||||
@@ -452,9 +449,8 @@ describe('AiChatToolsService model-friendly input validation (#190)', () => {
|
||||
{} as never,
|
||||
{} as never,
|
||||
{} as never,
|
||||
// environmentService + sandboxStore (only used by the stash tool closure,
|
||||
// which these tests do not execute).
|
||||
{} as never,
|
||||
// sandboxStore (only used by the stash tool closure, which these tests do
|
||||
// not execute).
|
||||
{} as never,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -16,7 +16,6 @@ import {
|
||||
import { resolveCurrentPageResult } from './current-page.util';
|
||||
import { parseNodeArg } from './parse-node-arg';
|
||||
import { modelFriendlyInput } from './model-friendly-input';
|
||||
import { EnvironmentService } from '../../../integrations/environment/environment.service';
|
||||
import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
|
||||
|
||||
/**
|
||||
@@ -43,7 +42,6 @@ export class AiChatToolsService {
|
||||
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
|
||||
private readonly spaceMemberRepo: SpaceMemberRepo,
|
||||
private readonly pagePermissionRepo: PagePermissionRepo,
|
||||
private readonly environmentService: EnvironmentService,
|
||||
// Shared singleton in-RAM blob store backing the stash tool.
|
||||
private readonly sandboxStore: SandboxStore,
|
||||
) {}
|
||||
@@ -91,22 +89,23 @@ export class AiChatToolsService {
|
||||
aiChatId,
|
||||
});
|
||||
|
||||
// Bind the stash tool to the shared in-RAM SandboxStore and compose the
|
||||
// anonymous public URL here (the MCP package never touches env or the
|
||||
// store). put() returns the read URL + sha256/size; sha256 is also the
|
||||
// blob's ETag for integrity.
|
||||
const sandboxPut = (buf: Buffer, mime: string) => {
|
||||
const stored = this.sandboxStore.put(buf, mime);
|
||||
const base = this.environmentService.getSandboxPublicUrl();
|
||||
return { uri: `${base}/api/sb/${stored.id}`, sha256: stored.sha256, size: stored.size };
|
||||
};
|
||||
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
|
||||
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
|
||||
// package needs to keep its mirror counts honest under FIFO eviction (the
|
||||
// package never touches env or the store). The sink speaks `uri`s, so the
|
||||
// probes map a uri back to its id (the last path segment).
|
||||
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
|
||||
|
||||
const { DocmostClient, sharedToolSpecs } = await loadDocmostMcp();
|
||||
const client: DocmostClientLike = new DocmostClient({
|
||||
apiUrl,
|
||||
getToken,
|
||||
getCollabToken,
|
||||
sandbox: { put: sandboxPut },
|
||||
sandbox: {
|
||||
put: (buf, mime) => this.sandboxStore.putAndLink(buf, mime),
|
||||
has: (uri) => this.sandboxStore.has(idOf(uri)),
|
||||
evict: (uri) => this.sandboxStore.remove(idOf(uri)),
|
||||
},
|
||||
});
|
||||
|
||||
// Build an ai-SDK tool from a shared, zod-agnostic spec. The spec owns the
|
||||
|
||||
@@ -171,11 +171,15 @@ export type DocmostClientConfig = {
|
||||
getCollabToken?: () => Promise<string>;
|
||||
// Optional blob-sandbox sink for the stash tool. `put` stores a blob in the
|
||||
// host's in-RAM SandboxStore and returns the anonymous read URL + integrity.
|
||||
// The optional `has`/`evict` probes let stashPage keep its mirror counts
|
||||
// honest under the store's FIFO eviction (mirror of the package's sink type).
|
||||
sandbox?: {
|
||||
put: (
|
||||
buf: Buffer,
|
||||
mime: string,
|
||||
) => { uri: string; sha256: string; size: number };
|
||||
has?: (uri: string) => boolean;
|
||||
evict?: (uri: string) => void;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -14,4 +14,30 @@ describe('EnvironmentService', () => {
|
||||
it('should be defined', () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
describe('getSandboxTtlMs', () => {
|
||||
// ConfigService stub: get(key, def) returns the configured value for the key
|
||||
// (falling back to def), matching the @nestjs/config contract the service
|
||||
// calls with (key, default).
|
||||
const build = (sandboxTtl?: string) =>
|
||||
new EnvironmentService({
|
||||
get: (key: string, def?: string) =>
|
||||
key === 'SANDBOX_TTL_MS' ? (sandboxTtl ?? def) : def,
|
||||
} as any);
|
||||
|
||||
it.each(['0', '-5', 'abc'])(
|
||||
'falls back to the 3600000 default for invalid value %s',
|
||||
(value) => {
|
||||
expect(build(value).getSandboxTtlMs()).toBe(3_600_000);
|
||||
},
|
||||
);
|
||||
|
||||
it('returns the parsed value for a valid positive integer', () => {
|
||||
expect(build('120000').getSandboxTtlMs()).toBe(120_000);
|
||||
});
|
||||
|
||||
it('uses the 3600000 default when SANDBOX_TTL_MS is unset', () => {
|
||||
expect(build(undefined).getSandboxTtlMs()).toBe(3_600_000);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { ConfigService } from '@nestjs/config';
|
||||
import ms, { StringValue } from 'ms';
|
||||
|
||||
@Injectable()
|
||||
export class EnvironmentService {
|
||||
private readonly logger = new Logger(EnvironmentService.name);
|
||||
// One-shot guard so an invalid SANDBOX_TTL_MS is warned about once, not on
|
||||
// every getSandboxTtlMs() call (which runs per blob put).
|
||||
private sandboxTtlWarned = false;
|
||||
|
||||
constructor(private configService: ConfigService) {}
|
||||
|
||||
getNodeEnv(): string {
|
||||
@@ -348,12 +353,25 @@ export class EnvironmentService {
|
||||
}
|
||||
|
||||
// Blob time-to-live. Default 1h. The unguessable UUID + this short TTL + TLS
|
||||
// are the whole capability model (no tokens).
|
||||
// are the whole capability model (no tokens). A non-positive or non-integer
|
||||
// value would make every blob expire instantly (silent 404s), so reject it and
|
||||
// fall back to the 1h default (warned about once to avoid per-put log spam).
|
||||
getSandboxTtlMs(): number {
|
||||
return parseInt(
|
||||
const parsed = parseInt(
|
||||
this.configService.get<string>('SANDBOX_TTL_MS', '3600000'),
|
||||
10,
|
||||
);
|
||||
if (!Number.isInteger(parsed) || parsed <= 0) {
|
||||
if (!this.sandboxTtlWarned) {
|
||||
this.sandboxTtlWarned = true;
|
||||
this.logger.warn(
|
||||
`Invalid SANDBOX_TTL_MS (must be a positive integer); ` +
|
||||
`falling back to the 3600000 ms default`,
|
||||
);
|
||||
}
|
||||
return 3_600_000;
|
||||
}
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// Per-blob cap for non-image blobs (the serialized document). Default 8 MiB.
|
||||
|
||||
@@ -143,6 +143,11 @@ export type DocmostMcpConfig = (
|
||||
buf: Buffer,
|
||||
mime: string,
|
||||
) => { uri: string; sha256: string; size: number };
|
||||
// Optional live/evict probes the package uses to keep stash_page's mirror
|
||||
// counts honest under the store's FIFO eviction (mirror of the package's
|
||||
// sink type); older bindings omit them.
|
||||
has?: (uri: string) => boolean;
|
||||
evict?: (uri: string) => void;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -109,7 +109,6 @@ function makeService(opts: {
|
||||
};
|
||||
|
||||
const service = new McpService(
|
||||
undefined as never, // environmentService
|
||||
undefined as never, // workspaceRepo
|
||||
undefined as never, // authService
|
||||
undefined as never, // tokenService
|
||||
|
||||
@@ -8,7 +8,6 @@ import { ModuleRef } from '@nestjs/core';
|
||||
import { pathToFileURL } from 'node:url';
|
||||
import { IncomingMessage } from 'node:http';
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { EnvironmentService } from '../environment/environment.service';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import { UserSessionRepo } from '@docmost/db/repos/session/user-session.repo';
|
||||
@@ -93,7 +92,6 @@ export class McpService implements OnModuleDestroy {
|
||||
private readonly sweepTimer: NodeJS.Timeout;
|
||||
|
||||
constructor(
|
||||
private readonly environmentService: EnvironmentService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly authService: AuthService,
|
||||
private readonly tokenService: TokenService,
|
||||
@@ -118,20 +116,17 @@ export class McpService implements OnModuleDestroy {
|
||||
clearInterval(this.sweepTimer);
|
||||
}
|
||||
|
||||
// Bind the stash tool to the shared in-RAM SandboxStore and compose the
|
||||
// anonymous public URL (the MCP package owns neither env nor the store).
|
||||
// put() returns the read URL + sha256/size; sha256 is also the blob ETag.
|
||||
// Bind the stash tool to the shared in-RAM SandboxStore. The store owns the
|
||||
// anonymous-URL composition (putAndLink) and the live/evict probes the MCP
|
||||
// package needs to keep its mirror counts honest under FIFO eviction; the
|
||||
// package owns neither env nor the store. The sink speaks `uri`s, so the
|
||||
// probes map a uri back to its id (the last path segment).
|
||||
private buildSandboxConfig(): DocmostMcpConfig['sandbox'] {
|
||||
const idOf = (uri: string) => uri.substring(uri.lastIndexOf('/') + 1);
|
||||
return {
|
||||
put: (buf: Buffer, mime: string) => {
|
||||
const stored = this.sandboxStore.put(buf, mime);
|
||||
const base = this.environmentService.getSandboxPublicUrl();
|
||||
return {
|
||||
uri: `${base}/api/sb/${stored.id}`,
|
||||
sha256: stored.sha256,
|
||||
size: stored.size,
|
||||
};
|
||||
},
|
||||
put: (buf, mime) => this.sandboxStore.putAndLink(buf, mime),
|
||||
has: (uri) => this.sandboxStore.has(idOf(uri)),
|
||||
evict: (uri) => this.sandboxStore.remove(idOf(uri)),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
// Single source of truth for the anonymous blob-sandbox route. The controller
|
||||
// is mounted under the global `/api` prefix, so its decorator uses the bare
|
||||
// segment while the public URL and the workspace-gate exclusion need the full
|
||||
// path — derive the latter from the former so the two never drift.
|
||||
export const SANDBOX_ROUTE_SEGMENT = 'sb';
|
||||
export const SANDBOX_API_PATH = `/api/${SANDBOX_ROUTE_SEGMENT}`;
|
||||
@@ -121,4 +121,70 @@ describe('SandboxController', () => {
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
});
|
||||
|
||||
it('returns 304 for a wildcard "*" If-None-Match', async () => {
|
||||
const sha = 'e'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': '*' }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('returns 304 for a weak validator W/"<sha>"', async () => {
|
||||
const sha = 'f'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq({ 'if-none-match': `W/"${sha}"` }), res);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('returns 304 when a comma-separated If-None-Match list contains the sha', async () => {
|
||||
const sha = '1'.repeat(64);
|
||||
const store = {
|
||||
get: jest.fn().mockReturnValue(entry(Buffer.from('x'), 'application/json', sha)),
|
||||
};
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(
|
||||
VALID_ID,
|
||||
makeReq({ 'if-none-match': `"other", "${sha}"` }),
|
||||
res,
|
||||
);
|
||||
|
||||
expect(res._sent.status).toBe(304);
|
||||
});
|
||||
|
||||
it('sets a private, immutable Cache-Control with a max-age within the TTL on 200', async () => {
|
||||
const sha = '2'.repeat(64);
|
||||
// Known TTL: ~30s out, so the floored max-age must land within [0, 60].
|
||||
const e: SandboxEntry = {
|
||||
buf: Buffer.from('x'),
|
||||
mime: 'application/json',
|
||||
sha256: sha,
|
||||
expiresAt: Date.now() + 30_000,
|
||||
};
|
||||
const store = { get: jest.fn().mockReturnValue(e) };
|
||||
const controller = new SandboxController(store as any);
|
||||
const res = makeRes();
|
||||
|
||||
await controller.get(VALID_ID, makeReq(), res);
|
||||
|
||||
expect(res._sent.status).toBe(200);
|
||||
const cc = res._sent.headers['cache-control'] as string;
|
||||
expect(cc).toMatch(/^private, max-age=\d+, immutable$/);
|
||||
const maxAge = Number(cc.match(/max-age=(\d+)/)![1]);
|
||||
expect(maxAge).toBeGreaterThanOrEqual(0);
|
||||
expect(maxAge).toBeLessThanOrEqual(60);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { SandboxStore } from './sandbox.store';
|
||||
import { SANDBOX_ROUTE_SEGMENT } from './sandbox.constants';
|
||||
|
||||
// Strict UUID v-agnostic shape. This is anti-traversal / input hygiene (so `:id`
|
||||
// can never be a path like `../...`), NOT authorization — the capability is the
|
||||
@@ -22,7 +23,7 @@ const UUID_RE =
|
||||
* UUID; `:id` is never used as a filesystem path, so there is no traversal
|
||||
* surface. Never returns tokens, never 401s.
|
||||
*/
|
||||
@Controller('sb')
|
||||
@Controller(SANDBOX_ROUTE_SEGMENT)
|
||||
export class SandboxController {
|
||||
constructor(private readonly store: SandboxStore) {}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
|
||||
import { createHash, randomUUID } from 'node:crypto';
|
||||
import { EnvironmentService } from '../environment/environment.service';
|
||||
import { SANDBOX_API_PATH } from './sandbox.constants';
|
||||
|
||||
// In-RAM, process-local blob store. No disk, no DB. Ephemeral by design: a
|
||||
// restart empties it. A blob is addressed by an unguessable randomUUID() which
|
||||
@@ -17,7 +18,6 @@ export interface SandboxPutResult {
|
||||
id: string;
|
||||
sha256: string;
|
||||
size: number;
|
||||
expiresAt: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
@@ -86,7 +86,36 @@ export class SandboxStore implements OnModuleDestroy {
|
||||
const expiresAt = Date.now() + this.environmentService.getSandboxTtlMs();
|
||||
this.map.set(id, { buf, mime, sha256, expiresAt });
|
||||
this.totalBytes += buf.length;
|
||||
return { id, sha256, size: buf.length, expiresAt };
|
||||
return { id, sha256, size: buf.length };
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a blob and return its anonymous read URL plus integrity metadata.
|
||||
* Owns the single sandbox-URL composition (`${publicBase}${SANDBOX_API_PATH}/
|
||||
* <id>`) so callers never hand-build the route; the raw put() stays public for
|
||||
* tests/low-level callers. sha256 is also the blob's strong ETag.
|
||||
*/
|
||||
putAndLink(
|
||||
buf: Buffer,
|
||||
mime: string,
|
||||
): { uri: string; sha256: string; size: number } {
|
||||
const stored = this.put(buf, mime);
|
||||
const base = this.environmentService.getSandboxPublicUrl();
|
||||
return {
|
||||
uri: `${base}${SANDBOX_API_PATH}/${stored.id}`,
|
||||
sha256: stored.sha256,
|
||||
size: stored.size,
|
||||
};
|
||||
}
|
||||
|
||||
/** True if the blob is still live (not evicted/expired). */
|
||||
has(id: string): boolean {
|
||||
return this.get(id) !== undefined;
|
||||
}
|
||||
|
||||
/** Drop a blob by id (public wrapper over the private FIFO evict). */
|
||||
remove(id: string): void {
|
||||
this.evict(id);
|
||||
}
|
||||
|
||||
/** Returns the entry, or undefined if missing OR expired (lazy expiry). */
|
||||
|
||||
@@ -13,6 +13,7 @@ import fastifyCookie from '@fastify/cookie';
|
||||
import fastifyIp from 'fastify-ip';
|
||||
import { InternalLogFilter } from './common/logger/internal-log-filter';
|
||||
import { EnvironmentService } from './integrations/environment/environment.service';
|
||||
import { SANDBOX_API_PATH } from './integrations/sandbox/sandbox.constants';
|
||||
import { resolveFrameHeader } from './common/helpers';
|
||||
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
|
||||
|
||||
@@ -129,7 +130,7 @@ async function bootstrap() {
|
||||
// Anonymous in-RAM blob sandbox: a remote consumer fetches blobs by an
|
||||
// unguessable UUID without any workspace host context, so the
|
||||
// workspace-resolution gate must not apply.
|
||||
'/api/sb',
|
||||
SANDBOX_API_PATH,
|
||||
];
|
||||
|
||||
if (
|
||||
|
||||
@@ -16,7 +16,7 @@ license.
|
||||
> that interface. Other Docmost MCPs are human-shaped — they expose "open the page" and
|
||||
> "replace the page"; this one exposes the editing primitives a model is good at.
|
||||
|
||||
It exposes **38 tools** built around three ideas that the other Docmost MCPs do not
|
||||
It exposes **39 tools** built around three ideas that the other Docmost MCPs do not
|
||||
combine:
|
||||
|
||||
1. **Surgical, token-cheap edits.** Address a single block by id and patch it, or run
|
||||
@@ -106,7 +106,7 @@ There are several Docmost MCPs. Here is a capability-by-capability comparison.
|
||||
|
||||
## Tools
|
||||
|
||||
All 38 tools, grouped by what you'd reach for them.
|
||||
All 39 tools, grouped by what you'd reach for them.
|
||||
|
||||
### Exploration & retrieval
|
||||
|
||||
@@ -203,6 +203,14 @@ All 38 tools, grouped by what you'd reach for them.
|
||||
node referencing the old attachment (recursively, including callouts/tables) via the
|
||||
live document, preserving comments, alignment and alt text. (In-place overwrite is
|
||||
deliberately avoided — some Docmost versions corrupt the attachment on overwrite.)
|
||||
- **`stash_page`** — Serialize a whole page (its full ProseMirror JSON) into an ephemeral
|
||||
in-RAM blob and return ONLY a short anonymous URL — the body never enters the model
|
||||
context, so it is the way to hand a large page (and its images) to an external consumer
|
||||
without truncation. Every internal file/image attachment is mirrored into the same
|
||||
sandbox and its `src` rewritten to a sandbox URL; external http(s) images are left
|
||||
untouched. Returns `{ uri, size, sha256, images:{ mirrored, failed } }` (`sha256` is also
|
||||
the blob's ETag). Blobs are RAM-only, expire after a short TTL (~1h) and are bound to the
|
||||
server instance that created them.
|
||||
|
||||
### Comments
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
> «открыть страницу» и «заменить страницу»; этот даёт примитивы редактирования, в которых
|
||||
> модель сильна.
|
||||
|
||||
Сервер предоставляет **38 инструментов**, построенных вокруг трёх идей, которые другие
|
||||
Сервер предоставляет **39 инструментов**, построенных вокруг трёх идей, которые другие
|
||||
Docmost-MCP не сочетают:
|
||||
|
||||
1. **Точечные, экономичные по токенам правки.** Адресуйте отдельный блок по id и патчите
|
||||
@@ -109,7 +109,7 @@ Docmost-MCP не сочетают:
|
||||
|
||||
## Инструменты
|
||||
|
||||
Все 38 инструментов, сгруппированы по задачам, для которых вы их возьмёте.
|
||||
Все 39 инструментов, сгруппированы по задачам, для которых вы их возьмёте.
|
||||
|
||||
### Чтение и поиск
|
||||
|
||||
@@ -209,6 +209,15 @@ Docmost-MCP не сочетают:
|
||||
коллауты/таблицы), через живой документ, сохраняя комментарии, выравнивание и alt-текст.
|
||||
(Перезапись «по месту» намеренно не используется — некоторые версии Docmost портят
|
||||
вложение при перезаписи.)
|
||||
- **`stash_page`** — Сериализовать страницу целиком (её полный ProseMirror JSON) в
|
||||
эфемерный blob в оперативной памяти и вернуть ТОЛЬКО короткий анонимный URL — тело
|
||||
никогда не попадает в контекст модели, поэтому это способ передать большую страницу
|
||||
(вместе с её изображениями) внешнему потребителю без усечения. Каждое внутреннее
|
||||
файловое/графическое вложение зеркалируется в тот же sandbox, а его `src` переписывается
|
||||
на URL sandbox; внешние http(s)-изображения остаются нетронутыми. Возвращает
|
||||
`{ uri, size, sha256, images:{ mirrored, failed } }` (`sha256` — это также ETag blob'а).
|
||||
Blob'ы хранятся только в оперативной памяти, истекают через короткий TTL (~1 ч) и
|
||||
привязаны к тому экземпляру сервера, который их создал.
|
||||
|
||||
### Комментарии
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import { TiptapTransformer } from "@hocuspocus/transformer";
|
||||
import * as Y from "yjs";
|
||||
import WebSocket from "ws";
|
||||
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
|
||||
import { collectInternalFileNodes, normalizeFileUrl, } from "./lib/internal-file-urls.js";
|
||||
import { collectInternalFileNodes, normalizeFileUrl, resolveInternalFilePath, } from "./lib/internal-file-urls.js";
|
||||
import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, markdownToProseMirrorCanonical, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, applyDocToFragment, } from "./lib/collaboration.js";
|
||||
import { footnoteWarningsField } from "./lib/footnote-analyze.js";
|
||||
import { buildPageTree } from "./lib/tree.js";
|
||||
@@ -54,6 +54,11 @@ export class DocmostClient {
|
||||
getCollabTokenFn = null;
|
||||
// Optional blob-sandbox sink for the stash tool. Null when not configured.
|
||||
sandboxPut = null;
|
||||
// Optional probes paired with the sink. `has` lets stashPage detect a blob
|
||||
// FIFO-evicted by a LATER put in the same stash; `evict` lets it free this
|
||||
// op's image blobs if the final doc put throws. Null when the sink omits them.
|
||||
sandboxHas = null;
|
||||
sandboxEvict = null;
|
||||
// In-flight login dedup: when the token expires, the 401 interceptor,
|
||||
// ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries
|
||||
// can all call login() at once. Memoizing a single promise collapses that
|
||||
@@ -82,6 +87,8 @@ export class DocmostClient {
|
||||
}
|
||||
if (config.sandbox) {
|
||||
this.sandboxPut = config.sandbox.put;
|
||||
this.sandboxHas = config.sandbox.has ?? null;
|
||||
this.sandboxEvict = config.sandbox.evict ?? null;
|
||||
}
|
||||
this.client = axios.create({
|
||||
baseURL: this.apiUrl,
|
||||
@@ -619,12 +626,16 @@ export class DocmostClient {
|
||||
* and the response Content-Type (mime), defaulting to octet-stream.
|
||||
*
|
||||
* The fetch is size-bounded (hard 64 MiB ceiling) purely to protect memory;
|
||||
* the authoritative per-blob cap is enforced by the sandbox `put`. The leading
|
||||
* `/api` strip means this never escapes the Docmost API base.
|
||||
* the authoritative per-blob cap is enforced by the sandbox `put`. The path is
|
||||
* resolved via resolveInternalFilePath, which REJECTS (throws) any traversal
|
||||
* or percent-encoded src that would let an attacker-controlled `attrs.src`
|
||||
* escape `/api/files/` and reach another internal endpoint (SSRF). That throw
|
||||
* happens before this.client.get, so a malicious src is counted as a failed
|
||||
* mirror — it never reaches the network.
|
||||
*/
|
||||
async fetchInternalFile(src) {
|
||||
const HARD_CEILING = 64 * 1024 * 1024; // 64 MiB memory guard
|
||||
const relPath = src.replace(/^\/api/, "");
|
||||
const relPath = resolveInternalFilePath(src);
|
||||
const response = await this.client.get(relPath, {
|
||||
responseType: "arraybuffer",
|
||||
timeout: 30000,
|
||||
@@ -668,42 +679,82 @@ export class DocmostClient {
|
||||
const cloned = structuredClone(pageJson);
|
||||
// Group internal-file nodes by normalized src so each unique resource is
|
||||
// fetched + stored ONCE (dedup), and every node sharing that src points at
|
||||
// the one sandbox blob.
|
||||
// the one sandbox blob. Capture each node's ORIGINAL raw src per-node:
|
||||
// dedup groups nodes whose normalized src is equal even when their raw srcs
|
||||
// differ (e.g. `/api/files/...` vs the bare `/files/...`), so on a revert we
|
||||
// must restore each node's own original value, not the group key.
|
||||
const bySrc = new Map();
|
||||
for (const node of collectInternalFileNodes(cloned.content)) {
|
||||
const src = normalizeFileUrl(String(node.attrs.src));
|
||||
const origSrc = String(node.attrs.src);
|
||||
const src = normalizeFileUrl(origSrc);
|
||||
const entry = { node, origSrc };
|
||||
const group = bySrc.get(src);
|
||||
if (group)
|
||||
group.push(node);
|
||||
group.push(entry);
|
||||
else
|
||||
bySrc.set(src, [node]);
|
||||
bySrc.set(src, [entry]);
|
||||
}
|
||||
let mirrored = 0;
|
||||
let failed = 0;
|
||||
// Record every successful mirror so it can be (a) reverted if its blob gets
|
||||
// FIFO-evicted by a LATER put in this same stash, and (b) freed if the final
|
||||
// doc put throws.
|
||||
const mirrors = [];
|
||||
const MAX_CONCURRENCY = 5;
|
||||
const groups = [...bySrc.entries()];
|
||||
for (let i = 0; i < groups.length; i += MAX_CONCURRENCY) {
|
||||
const batch = groups.slice(i, i + MAX_CONCURRENCY);
|
||||
await Promise.all(batch.map(async ([src, nodes]) => {
|
||||
await Promise.all(batch.map(async ([src, entries]) => {
|
||||
try {
|
||||
const { buffer, mime } = await this.fetchInternalFile(src);
|
||||
// put may throw if the blob exceeds the per-blob/total caps.
|
||||
const stored = this.sandboxPut(buffer, mime);
|
||||
for (const node of nodes)
|
||||
node.attrs.src = stored.uri;
|
||||
for (const entry of entries)
|
||||
entry.node.attrs.src = stored.uri;
|
||||
mirrors.push({ uri: stored.uri, entries });
|
||||
mirrored++;
|
||||
}
|
||||
catch (err) {
|
||||
// One bad/oversized image must not abort the document.
|
||||
// One bad/oversized image (or a rejected traversal src) must not
|
||||
// abort the document. Logged unconditionally (never the blob body),
|
||||
// matching the package's ungated console.warn convention.
|
||||
failed++;
|
||||
if (process.env.DEBUG) {
|
||||
console.error(`stash_page: failed to mirror "${src}":`, err);
|
||||
}
|
||||
console.warn(`stash_page: failed to mirror "${src}": ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
}));
|
||||
}
|
||||
// Reconcile against FIFO eviction: a heavy page can have a later image-put
|
||||
// evict an EARLIER image stored in this SAME stash. The stored doc must not
|
||||
// reference an evicted blob (consumer 404) and the counts must not lie, so
|
||||
// for any mirror whose blob is gone, revert its nodes to their original
|
||||
// internal srcs and re-count it as failed.
|
||||
if (this.sandboxHas) {
|
||||
for (const mirror of mirrors) {
|
||||
if (!this.sandboxHas(mirror.uri)) {
|
||||
for (const entry of mirror.entries) {
|
||||
entry.node.attrs.src = entry.origSrc;
|
||||
}
|
||||
mirrored--;
|
||||
failed++;
|
||||
console.warn(`stash_page: mirrored blob ${mirror.uri} was evicted before ` +
|
||||
`the doc was stored; reverted its src and counted it as failed`);
|
||||
}
|
||||
}
|
||||
}
|
||||
const docBuf = Buffer.from(JSON.stringify(cloned), "utf8");
|
||||
const stored = this.sandboxPut(docBuf, "application/json");
|
||||
let stored;
|
||||
try {
|
||||
stored = this.sandboxPut(docBuf, "application/json");
|
||||
}
|
||||
catch (err) {
|
||||
// The doc put failed (e.g. doc exceeds the cap). Free this op's image
|
||||
// blobs instead of leaking them in RAM for the whole TTL, then re-throw.
|
||||
if (this.sandboxEvict) {
|
||||
for (const mirror of mirrors)
|
||||
this.sandboxEvict(mirror.uri);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
return {
|
||||
uri: stored.uri,
|
||||
sha256: stored.sha256,
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Mirrors editor-ext's isInternalFileUrl / normalizeFileUrl (kept as a local
|
||||
// dup so the ESM mcp package does not depend on the editor-ext build).
|
||||
export function isInternalFileUrl(url) {
|
||||
function isInternalFileUrl(url) {
|
||||
if (typeof url !== "string")
|
||||
return false;
|
||||
const normalized = url.trim();
|
||||
@@ -24,6 +24,61 @@ export function normalizeFileUrl(src) {
|
||||
return "/api" + trimmed;
|
||||
return trimmed;
|
||||
}
|
||||
/**
|
||||
* Resolve a page-content `src` into the safe, `/api`-relative path the stash
|
||||
* tool may fetch over the authenticated loopback client — or THROW.
|
||||
*
|
||||
* SECURITY (SSRF / path-traversal): `src` comes from page content and is fully
|
||||
* attacker-controllable. The mirroring fetch runs through the AUTHENTICATED
|
||||
* loopback axios client whose baseURL ends in `/api`, so a naive
|
||||
* `src.replace(/^\/api/, "")` lets a crafted value like
|
||||
* `/api/files/../auth/whoami` collapse (via axios/WHATWG URL `..` resolution)
|
||||
* into an ARBITRARY internal GET endpoint, whose authed response would then be
|
||||
* stored in the anonymous sandbox (SSRF + data exfiltration). A prefix-only
|
||||
* `startsWith("/api/files/")` check does NOT defend against this because the
|
||||
* `..` segments are still present in the raw string and resolved later.
|
||||
*
|
||||
* This function defeats that by resolving the canonical pathname FIRST and only
|
||||
* then asserting it still lives under `/api/files/`:
|
||||
* - it rejects any percent-encoded dot/slash (`%2e` / `%2f`): the WHATWG URL
|
||||
* parser collapses LITERAL `../` but does NOT decode `%2f` separators, so a
|
||||
* content-controlled src must never be allowed to smuggle those past the
|
||||
* canonicalization;
|
||||
* - it resolves `new URL(trimmed, "http://internal.invalid").pathname`, which
|
||||
* normalizes `..`/`.` segments (e.g. `/api/files/../auth/whoami` →
|
||||
* `/api/auth/whoami`);
|
||||
* - it then requires the canonical pathname to start with `/api/files/`, so a
|
||||
* traversal that escaped that subtree is rejected.
|
||||
*
|
||||
* Returns the path RELATIVE to the `/api` base (e.g. `/files/<id>/<name>`),
|
||||
* ready to hand to the loopback client. The throw happens BEFORE any network
|
||||
* call, so a rejected src is counted as a failed mirror and its original src is
|
||||
* kept (the per-image try/catch in stashPage never aborts the whole document).
|
||||
*/
|
||||
export function resolveInternalFilePath(src) {
|
||||
const trimmed = src.trim();
|
||||
// Percent-encoded dot/slash must never reach the URL canonicalizer: the
|
||||
// WHATWG parser does NOT decode `%2f` into a path separator, so an encoded
|
||||
// `..%2fauth` would survive canonicalization and still escape /api/files/.
|
||||
if (/%2e|%2f/i.test(trimmed)) {
|
||||
throw new Error(`Refusing internal file src with percent-encoded path segment: "${src}"`);
|
||||
}
|
||||
let pathname;
|
||||
try {
|
||||
// The base host is irrelevant (never contacted); it only lets the parser
|
||||
// resolve a relative `src` and normalize `..`/`.` segments.
|
||||
pathname = new URL(trimmed, "http://internal.invalid").pathname;
|
||||
}
|
||||
catch {
|
||||
throw new Error(`Invalid internal file src: "${src}"`);
|
||||
}
|
||||
if (!pathname.startsWith("/api/files/")) {
|
||||
throw new Error(`Refusing internal file src that escapes /api/files/: "${src}"`);
|
||||
}
|
||||
// Strip the `/api` base prefix; the loopback client's baseURL already ends
|
||||
// in `/api`, so it expects the path relative to that (e.g. /files/<id>/<f>).
|
||||
return pathname.replace(/^\/api/, "");
|
||||
}
|
||||
/**
|
||||
* Recursively collect every node whose `attrs.src` is an internal file URL.
|
||||
* Returns references to the live nodes (so the caller can rewrite `attrs.src`
|
||||
|
||||
@@ -224,7 +224,10 @@ export const SHARED_TOOL_SPECS = {
|
||||
'failed} }. Integrity: the blob is served with ETag = its sha256, so a ' +
|
||||
'truncated/corrupted fetch is detectable. Blobs are RAM-only: they expire ' +
|
||||
'after a short TTL (~1h) and are cleared on restart — consume the URL ' +
|
||||
'within the TTL and one uptime, or re-stash.',
|
||||
'within the TTL and one uptime, or re-stash. A blob is bound to the ' +
|
||||
'server instance that created it: in a multi-replica deployment without ' +
|
||||
'sticky sessions a blob stored on one instance is not retrievable via the ' +
|
||||
'sandbox URL on another (it 404s like an expired one).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
|
||||
@@ -16,6 +16,7 @@ import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
|
||||
import {
|
||||
collectInternalFileNodes,
|
||||
normalizeFileUrl,
|
||||
resolveInternalFilePath,
|
||||
} from "./lib/internal-file-urls.js";
|
||||
import {
|
||||
updatePageContentRealtime,
|
||||
@@ -122,8 +123,14 @@ export type DocmostMcpConfig = { apiUrl: string } & (
|
||||
// both branches; see the type doc above.
|
||||
getCollabToken?: () => Promise<string>;
|
||||
// Optional blob sandbox sink. Present only where the stash tool is wired;
|
||||
// when absent, stash_page throws a clear "not configured" error.
|
||||
sandbox?: { put: SandboxPut };
|
||||
// when absent, stash_page throws a clear "not configured" error. The
|
||||
// optional `has`/`evict` probes let stashPage keep its mirror counts honest
|
||||
// under the store's FIFO eviction (see stashPage); older sinks omit them.
|
||||
sandbox?: {
|
||||
put: SandboxPut;
|
||||
has?: (uri: string) => boolean;
|
||||
evict?: (uri: string) => void;
|
||||
};
|
||||
};
|
||||
|
||||
export class DocmostClient {
|
||||
@@ -143,6 +150,11 @@ export class DocmostClient {
|
||||
private getCollabTokenFn: (() => Promise<string>) | null = null;
|
||||
// Optional blob-sandbox sink for the stash tool. Null when not configured.
|
||||
private sandboxPut: SandboxPut | null = null;
|
||||
// Optional probes paired with the sink. `has` lets stashPage detect a blob
|
||||
// FIFO-evicted by a LATER put in the same stash; `evict` lets it free this
|
||||
// op's image blobs if the final doc put throws. Null when the sink omits them.
|
||||
private sandboxHas: ((uri: string) => boolean) | null = null;
|
||||
private sandboxEvict: ((uri: string) => void) | null = null;
|
||||
// In-flight login dedup: when the token expires, the 401 interceptor,
|
||||
// ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries
|
||||
// can all call login() at once. Memoizing a single promise collapses that
|
||||
@@ -184,6 +196,8 @@ export class DocmostClient {
|
||||
}
|
||||
if (config.sandbox) {
|
||||
this.sandboxPut = config.sandbox.put;
|
||||
this.sandboxHas = config.sandbox.has ?? null;
|
||||
this.sandboxEvict = config.sandbox.evict ?? null;
|
||||
}
|
||||
this.client = axios.create({
|
||||
baseURL: this.apiUrl,
|
||||
@@ -795,14 +809,18 @@ export class DocmostClient {
|
||||
* and the response Content-Type (mime), defaulting to octet-stream.
|
||||
*
|
||||
* The fetch is size-bounded (hard 64 MiB ceiling) purely to protect memory;
|
||||
* the authoritative per-blob cap is enforced by the sandbox `put`. The leading
|
||||
* `/api` strip means this never escapes the Docmost API base.
|
||||
* the authoritative per-blob cap is enforced by the sandbox `put`. The path is
|
||||
* resolved via resolveInternalFilePath, which REJECTS (throws) any traversal
|
||||
* or percent-encoded src that would let an attacker-controlled `attrs.src`
|
||||
* escape `/api/files/` and reach another internal endpoint (SSRF). That throw
|
||||
* happens before this.client.get, so a malicious src is counted as a failed
|
||||
* mirror — it never reaches the network.
|
||||
*/
|
||||
private async fetchInternalFile(
|
||||
src: string,
|
||||
): Promise<{ buffer: Buffer; mime: string }> {
|
||||
const HARD_CEILING = 64 * 1024 * 1024; // 64 MiB memory guard
|
||||
const relPath = src.replace(/^\/api/, "");
|
||||
const relPath = resolveInternalFilePath(src);
|
||||
const response = await this.client.get(relPath, {
|
||||
responseType: "arraybuffer",
|
||||
timeout: 30000,
|
||||
@@ -857,42 +875,90 @@ export class DocmostClient {
|
||||
|
||||
// Group internal-file nodes by normalized src so each unique resource is
|
||||
// fetched + stored ONCE (dedup), and every node sharing that src points at
|
||||
// the one sandbox blob.
|
||||
const bySrc = new Map<string, any[]>();
|
||||
// the one sandbox blob. Capture each node's ORIGINAL raw src per-node:
|
||||
// dedup groups nodes whose normalized src is equal even when their raw srcs
|
||||
// differ (e.g. `/api/files/...` vs the bare `/files/...`), so on a revert we
|
||||
// must restore each node's own original value, not the group key.
|
||||
const bySrc = new Map<string, Array<{ node: any; origSrc: string }>>();
|
||||
for (const node of collectInternalFileNodes(cloned.content)) {
|
||||
const src = normalizeFileUrl(String(node.attrs.src));
|
||||
const origSrc = String(node.attrs.src);
|
||||
const src = normalizeFileUrl(origSrc);
|
||||
const entry = { node, origSrc };
|
||||
const group = bySrc.get(src);
|
||||
if (group) group.push(node);
|
||||
else bySrc.set(src, [node]);
|
||||
if (group) group.push(entry);
|
||||
else bySrc.set(src, [entry]);
|
||||
}
|
||||
|
||||
let mirrored = 0;
|
||||
let failed = 0;
|
||||
// Record every successful mirror so it can be (a) reverted if its blob gets
|
||||
// FIFO-evicted by a LATER put in this same stash, and (b) freed if the final
|
||||
// doc put throws.
|
||||
const mirrors: Array<{
|
||||
uri: string;
|
||||
entries: Array<{ node: any; origSrc: string }>;
|
||||
}> = [];
|
||||
const MAX_CONCURRENCY = 5;
|
||||
const groups = [...bySrc.entries()];
|
||||
for (let i = 0; i < groups.length; i += MAX_CONCURRENCY) {
|
||||
const batch = groups.slice(i, i + MAX_CONCURRENCY);
|
||||
await Promise.all(
|
||||
batch.map(async ([src, nodes]) => {
|
||||
batch.map(async ([src, entries]) => {
|
||||
try {
|
||||
const { buffer, mime } = await this.fetchInternalFile(src);
|
||||
// put may throw if the blob exceeds the per-blob/total caps.
|
||||
const stored = this.sandboxPut!(buffer, mime);
|
||||
for (const node of nodes) node.attrs.src = stored.uri;
|
||||
for (const entry of entries) entry.node.attrs.src = stored.uri;
|
||||
mirrors.push({ uri: stored.uri, entries });
|
||||
mirrored++;
|
||||
} catch (err) {
|
||||
// One bad/oversized image must not abort the document.
|
||||
// One bad/oversized image (or a rejected traversal src) must not
|
||||
// abort the document. Logged unconditionally (never the blob body),
|
||||
// matching the package's ungated console.warn convention.
|
||||
failed++;
|
||||
if (process.env.DEBUG) {
|
||||
console.error(`stash_page: failed to mirror "${src}":`, err);
|
||||
}
|
||||
console.warn(
|
||||
`stash_page: failed to mirror "${src}": ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// Reconcile against FIFO eviction: a heavy page can have a later image-put
|
||||
// evict an EARLIER image stored in this SAME stash. The stored doc must not
|
||||
// reference an evicted blob (consumer 404) and the counts must not lie, so
|
||||
// for any mirror whose blob is gone, revert its nodes to their original
|
||||
// internal srcs and re-count it as failed.
|
||||
if (this.sandboxHas) {
|
||||
for (const mirror of mirrors) {
|
||||
if (!this.sandboxHas(mirror.uri)) {
|
||||
for (const entry of mirror.entries) {
|
||||
entry.node.attrs.src = entry.origSrc;
|
||||
}
|
||||
mirrored--;
|
||||
failed++;
|
||||
console.warn(
|
||||
`stash_page: mirrored blob ${mirror.uri} was evicted before ` +
|
||||
`the doc was stored; reverted its src and counted it as failed`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const docBuf = Buffer.from(JSON.stringify(cloned), "utf8");
|
||||
const stored = this.sandboxPut(docBuf, "application/json");
|
||||
let stored: { uri: string; sha256: string; size: number };
|
||||
try {
|
||||
stored = this.sandboxPut(docBuf, "application/json");
|
||||
} catch (err) {
|
||||
// The doc put failed (e.g. doc exceeds the cap). Free this op's image
|
||||
// blobs instead of leaking them in RAM for the whole TTL, then re-throw.
|
||||
if (this.sandboxEvict) {
|
||||
for (const mirror of mirrors) this.sandboxEvict(mirror.uri);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
return {
|
||||
uri: stored.uri,
|
||||
sha256: stored.sha256,
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
// Mirrors editor-ext's isInternalFileUrl / normalizeFileUrl (kept as a local
|
||||
// dup so the ESM mcp package does not depend on the editor-ext build).
|
||||
|
||||
export function isInternalFileUrl(url: unknown): boolean {
|
||||
function isInternalFileUrl(url: unknown): boolean {
|
||||
if (typeof url !== "string") return false;
|
||||
const normalized = url.trim();
|
||||
return (
|
||||
@@ -27,6 +27,65 @@ export function normalizeFileUrl(src: string): string {
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a page-content `src` into the safe, `/api`-relative path the stash
|
||||
* tool may fetch over the authenticated loopback client — or THROW.
|
||||
*
|
||||
* SECURITY (SSRF / path-traversal): `src` comes from page content and is fully
|
||||
* attacker-controllable. The mirroring fetch runs through the AUTHENTICATED
|
||||
* loopback axios client whose baseURL ends in `/api`, so a naive
|
||||
* `src.replace(/^\/api/, "")` lets a crafted value like
|
||||
* `/api/files/../auth/whoami` collapse (via axios/WHATWG URL `..` resolution)
|
||||
* into an ARBITRARY internal GET endpoint, whose authed response would then be
|
||||
* stored in the anonymous sandbox (SSRF + data exfiltration). A prefix-only
|
||||
* `startsWith("/api/files/")` check does NOT defend against this because the
|
||||
* `..` segments are still present in the raw string and resolved later.
|
||||
*
|
||||
* This function defeats that by resolving the canonical pathname FIRST and only
|
||||
* then asserting it still lives under `/api/files/`:
|
||||
* - it rejects any percent-encoded dot/slash (`%2e` / `%2f`): the WHATWG URL
|
||||
* parser collapses LITERAL `../` but does NOT decode `%2f` separators, so a
|
||||
* content-controlled src must never be allowed to smuggle those past the
|
||||
* canonicalization;
|
||||
* - it resolves `new URL(trimmed, "http://internal.invalid").pathname`, which
|
||||
* normalizes `..`/`.` segments (e.g. `/api/files/../auth/whoami` →
|
||||
* `/api/auth/whoami`);
|
||||
* - it then requires the canonical pathname to start with `/api/files/`, so a
|
||||
* traversal that escaped that subtree is rejected.
|
||||
*
|
||||
* Returns the path RELATIVE to the `/api` base (e.g. `/files/<id>/<name>`),
|
||||
* ready to hand to the loopback client. The throw happens BEFORE any network
|
||||
* call, so a rejected src is counted as a failed mirror and its original src is
|
||||
* kept (the per-image try/catch in stashPage never aborts the whole document).
|
||||
*/
|
||||
export function resolveInternalFilePath(src: string): string {
|
||||
const trimmed = src.trim();
|
||||
// Percent-encoded dot/slash must never reach the URL canonicalizer: the
|
||||
// WHATWG parser does NOT decode `%2f` into a path separator, so an encoded
|
||||
// `..%2fauth` would survive canonicalization and still escape /api/files/.
|
||||
if (/%2e|%2f/i.test(trimmed)) {
|
||||
throw new Error(
|
||||
`Refusing internal file src with percent-encoded path segment: "${src}"`,
|
||||
);
|
||||
}
|
||||
let pathname: string;
|
||||
try {
|
||||
// The base host is irrelevant (never contacted); it only lets the parser
|
||||
// resolve a relative `src` and normalize `..`/`.` segments.
|
||||
pathname = new URL(trimmed, "http://internal.invalid").pathname;
|
||||
} catch {
|
||||
throw new Error(`Invalid internal file src: "${src}"`);
|
||||
}
|
||||
if (!pathname.startsWith("/api/files/")) {
|
||||
throw new Error(
|
||||
`Refusing internal file src that escapes /api/files/: "${src}"`,
|
||||
);
|
||||
}
|
||||
// Strip the `/api` base prefix; the loopback client's baseURL already ends
|
||||
// in `/api`, so it expects the path relative to that (e.g. /files/<id>/<f>).
|
||||
return pathname.replace(/^\/api/, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively collect every node whose `attrs.src` is an internal file URL.
|
||||
* Returns references to the live nodes (so the caller can rewrite `attrs.src`
|
||||
|
||||
@@ -283,7 +283,10 @@ export const SHARED_TOOL_SPECS = {
|
||||
'failed} }. Integrity: the blob is served with ETag = its sha256, so a ' +
|
||||
'truncated/corrupted fetch is detectable. Blobs are RAM-only: they expire ' +
|
||||
'after a short TTL (~1h) and are cleared on restart — consume the URL ' +
|
||||
'within the TTL and one uptime, or re-stash.',
|
||||
'within the TTL and one uptime, or re-stash. A blob is bound to the ' +
|
||||
'server instance that created it: in a multi-replica deployment without ' +
|
||||
'sticky sessions a blob stored on one instance is not retrievable via the ' +
|
||||
'sandbox URL on another (it 404s like an expired one).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
|
||||
@@ -41,17 +41,53 @@ after(async () => {
|
||||
|
||||
// In-memory sandbox sink mirroring the host binding: store the blob, return a
|
||||
// uri + sha256 + size. Records every put so the test can inspect what was
|
||||
// stashed (and verify the doc body never leaves via the return value).
|
||||
function makeSandbox() {
|
||||
// stashed (and verify the doc body never leaves via the return value). Models
|
||||
// the real store's FIFO eviction + cap + the has/evict probes so B1 (self-
|
||||
// eviction reconciliation and doc-put-throw cleanup) is testable. Default
|
||||
// maxTotal is effectively unlimited so the happy-path tests behave as before.
|
||||
//
|
||||
// `throwOnJson` forces the final document put to throw, standing in for "doc
|
||||
// exceeds the cap".
|
||||
function makeSandbox({ maxTotal = Infinity, throwOnJson = false } = {}) {
|
||||
const puts = [];
|
||||
const evicted = [];
|
||||
// id -> size, in insertion order (Map preserves it) so the oldest is first.
|
||||
const live = new Map();
|
||||
let total = 0;
|
||||
const idOf = (uri) => uri.substring(uri.lastIndexOf("/") + 1);
|
||||
return {
|
||||
puts,
|
||||
evicted,
|
||||
put(buf, mime) {
|
||||
if (throwOnJson && mime === "application/json") {
|
||||
throw new Error("doc blob exceeds the sandbox cap");
|
||||
}
|
||||
const sha256 = createHash("sha256").update(buf).digest("hex");
|
||||
const id = `id-${puts.length}`;
|
||||
puts.push({ buf, mime, sha256 });
|
||||
puts.push({ buf, mime, sha256, id });
|
||||
live.set(id, buf.length);
|
||||
total += buf.length;
|
||||
// FIFO-evict the oldest live blobs until this put fits under the cap.
|
||||
while (total > maxTotal && live.size > 0) {
|
||||
const oldest = live.keys().next().value;
|
||||
if (oldest === id) break; // never evict the blob we just stored
|
||||
total -= live.get(oldest);
|
||||
live.delete(oldest);
|
||||
evicted.push(oldest);
|
||||
}
|
||||
return { uri: `https://sb.test/api/sb/${id}`, sha256, size: buf.length };
|
||||
},
|
||||
has(uri) {
|
||||
return live.has(idOf(uri));
|
||||
},
|
||||
evict(uri) {
|
||||
const id = idOf(uri);
|
||||
if (live.has(id)) {
|
||||
total -= live.get(id);
|
||||
live.delete(id);
|
||||
}
|
||||
evicted.push(id);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -80,8 +116,18 @@ function pageDoc() {
|
||||
}
|
||||
|
||||
// Build a client wired to a server that logs in, serves the page, and serves the
|
||||
// internal file bytes. `fileStatus` lets a test force the file fetch to fail.
|
||||
async function buildClient(sandbox, { fileStatus = 200 } = {}) {
|
||||
// internal file bytes. `fileStatus` lets a test force the file fetch to fail;
|
||||
// `doc` overrides the served page; `fileBytes`/`fileHeaders` shape the file
|
||||
// response (used by the empty-body / missing-Content-Type branch tests).
|
||||
async function buildClient(
|
||||
sandbox,
|
||||
{
|
||||
fileStatus = 200,
|
||||
doc = pageDoc(),
|
||||
fileBytes = IMAGE_BYTES,
|
||||
fileHeaders = { "Content-Type": "image/png" },
|
||||
} = {},
|
||||
) {
|
||||
const baseURL = await spawn(async (req, res) => {
|
||||
await readBody(req);
|
||||
if (req.url === "/api/auth/login") {
|
||||
@@ -94,17 +140,17 @@ async function buildClient(sandbox, { fileStatus = 200 } = {}) {
|
||||
}
|
||||
if (req.url === "/api/pages/info") {
|
||||
res.writeHead(200, { "Content-Type": "application/json" });
|
||||
res.end(JSON.stringify({ data: { id: "page-1", title: "T", content: pageDoc() } }));
|
||||
res.end(JSON.stringify({ data: { id: "page-1", title: "T", content: doc } }));
|
||||
return;
|
||||
}
|
||||
if (req.url.startsWith("/api/files/att-1/")) {
|
||||
if (req.url.startsWith("/api/files/")) {
|
||||
if (fileStatus !== 200) {
|
||||
res.writeHead(fileStatus);
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
res.writeHead(200, { "Content-Type": "image/png" });
|
||||
res.end(IMAGE_BYTES);
|
||||
res.writeHead(200, fileHeaders);
|
||||
res.end(fileBytes);
|
||||
return;
|
||||
}
|
||||
res.writeHead(404);
|
||||
@@ -114,10 +160,26 @@ async function buildClient(sandbox, { fileStatus = 200 } = {}) {
|
||||
apiUrl: baseURL,
|
||||
email: "u@example.com",
|
||||
password: "pw",
|
||||
sandbox: { put: (buf, mime) => sandbox.put(buf, mime) },
|
||||
sandbox: {
|
||||
put: (buf, mime) => sandbox.put(buf, mime),
|
||||
has: (uri) => sandbox.has(uri),
|
||||
evict: (uri) => sandbox.evict(uri),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// A page with several DISTINCT internal images (each a unique attachment id) so
|
||||
// each is its own sandbox blob — needed to exercise FIFO self-eviction.
|
||||
function multiImageDoc(n) {
|
||||
return {
|
||||
type: "doc",
|
||||
content: Array.from({ length: n }, (_, i) => ({
|
||||
type: "image",
|
||||
attrs: { src: `/api/files/att-${i}/pic.png`, attachmentId: `att-${i}` },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
test("stashPage stores the doc + mirrors/rewrites internal images, returns only a link", async () => {
|
||||
const sandbox = makeSandbox();
|
||||
const client = await buildClient(sandbox);
|
||||
@@ -181,3 +243,88 @@ test("stashPage throws a clear error when no sandbox is configured", async () =>
|
||||
});
|
||||
await assert.rejects(() => client.stashPage("page-1"), /not configured/);
|
||||
});
|
||||
|
||||
test("stashPage reverts a FIFO-evicted image and counts it as failed (B1)", async () => {
|
||||
// 3 distinct images of S=4000 bytes each; doc JSON is far smaller than one
|
||||
// image. With a cap of 4500: storing img1 evicts img0, storing img2 evicts
|
||||
// img1 — so only img2 survives the loop (img0 + img1 reverted). The doc
|
||||
// (4000 + a few hundred bytes <= 4500) then fits alongside the survivor, so it
|
||||
// does NOT trigger further eviction. The stored doc must therefore reference
|
||||
// exactly one live blob and revert the other two to their internal srcs.
|
||||
const BIG = Buffer.alloc(4000, 0x41);
|
||||
const sandbox = makeSandbox({ maxTotal: 4500 });
|
||||
const client = await buildClient(sandbox, {
|
||||
doc: multiImageDoc(3),
|
||||
fileBytes: BIG,
|
||||
});
|
||||
|
||||
const result = await client.stashPage("page-1");
|
||||
|
||||
// Two images were evicted before the doc was stored -> counted as failed.
|
||||
assert.deepEqual(result.images, { mirrored: 1, failed: 2 });
|
||||
|
||||
// Inspect the stashed doc: no node may point at an evicted (now-dead) blob,
|
||||
// and every reverted node carries its ORIGINAL internal src again.
|
||||
const docPut = sandbox.puts.find((p) => p.mime === "application/json");
|
||||
const stashed = JSON.parse(docPut.buf.toString("utf8"));
|
||||
const imgs = stashed.content.content.filter((n) => n.type === "image");
|
||||
let live = 0;
|
||||
let reverted = 0;
|
||||
for (const img of imgs) {
|
||||
const src = img.attrs.src;
|
||||
if (src.startsWith("https://sb.test/api/sb/")) {
|
||||
assert.ok(sandbox.has(src), `doc references evicted blob ${src}`);
|
||||
live++;
|
||||
} else {
|
||||
// Reverted to the original internal src.
|
||||
assert.match(src, /^\/api\/files\/att-\d+\/pic\.png$/);
|
||||
reverted++;
|
||||
}
|
||||
}
|
||||
assert.equal(live, 1);
|
||||
assert.equal(reverted, 2);
|
||||
});
|
||||
|
||||
test("stashPage frees image blobs when the doc put throws (B1)", async () => {
|
||||
// Two distinct images mirror fine; the final JSON doc put throws (doc exceeds
|
||||
// cap). stashPage must reject AND evict every image blob it stored this op.
|
||||
const sandbox = makeSandbox({ throwOnJson: true });
|
||||
const client = await buildClient(sandbox, { doc: multiImageDoc(2) });
|
||||
|
||||
await assert.rejects(() => client.stashPage("page-1"));
|
||||
|
||||
// Both image blobs were stored, then evicted on the doc-put failure.
|
||||
const imagePuts = sandbox.puts.filter((p) => p.mime === "image/png");
|
||||
assert.equal(imagePuts.length, 2);
|
||||
for (const p of imagePuts) {
|
||||
assert.ok(sandbox.evicted.includes(p.id), `image ${p.id} was not freed`);
|
||||
}
|
||||
});
|
||||
|
||||
test("stashPage counts an empty file response as failed (B1/fetchInternalFile)", async () => {
|
||||
const sandbox = makeSandbox();
|
||||
const client = await buildClient(sandbox, {
|
||||
fileBytes: Buffer.alloc(0),
|
||||
fileHeaders: { "Content-Type": "image/png", "Content-Length": "0" },
|
||||
});
|
||||
|
||||
const result = await client.stashPage("page-1");
|
||||
|
||||
// The single internal image (deduped) yielded an empty body -> failed.
|
||||
assert.deepEqual(result.images, { mirrored: 0, failed: 1 });
|
||||
// Only the doc blob was stored.
|
||||
assert.equal(sandbox.puts.filter((p) => p.mime === "image/png").length, 0);
|
||||
});
|
||||
|
||||
test("stashPage mirrors a file with no Content-Type as octet-stream (fetchInternalFile)", async () => {
|
||||
const sandbox = makeSandbox();
|
||||
// No Content-Type header at all -> fetchInternalFile defaults to octet-stream.
|
||||
const client = await buildClient(sandbox, { fileHeaders: {} });
|
||||
|
||||
const result = await client.stashPage("page-1");
|
||||
|
||||
assert.equal(result.images.mirrored, 1);
|
||||
const imagePut = sandbox.puts.find((p) => p.mime !== "application/json");
|
||||
assert.ok(imagePut, "expected an image put");
|
||||
assert.equal(imagePut.mime, "application/octet-stream");
|
||||
});
|
||||
|
||||
61
packages/mcp/test/unit/internal-file-urls.test.mjs
Normal file
61
packages/mcp/test/unit/internal-file-urls.test.mjs
Normal file
@@ -0,0 +1,61 @@
|
||||
// Unit tests for the internal-file URL helpers the stash tool relies on. The
|
||||
// critical case is resolveInternalFilePath, whose whole job is to REJECT a
|
||||
// content-controlled `src` that tries to escape /api/files/ (SSRF / traversal)
|
||||
// before it ever reaches the authenticated loopback client.
|
||||
import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
resolveInternalFilePath,
|
||||
normalizeFileUrl,
|
||||
collectInternalFileNodes,
|
||||
} from "../../build/lib/internal-file-urls.js";
|
||||
|
||||
test("resolveInternalFilePath accepts a normal internal src", () => {
|
||||
assert.equal(
|
||||
resolveInternalFilePath("/api/files/att-1/pic.png"),
|
||||
"/files/att-1/pic.png",
|
||||
);
|
||||
});
|
||||
|
||||
test("resolveInternalFilePath rejects traversal / encoded variants (SSRF guard)", () => {
|
||||
// `..` collapses to /api/auth/whoami -> outside /api/files/ -> rejected.
|
||||
assert.throws(() => resolveInternalFilePath("/api/files/../auth/whoami"));
|
||||
// Escapes the /api base entirely.
|
||||
assert.throws(() => resolveInternalFilePath("/api/files/../../internal"));
|
||||
// Percent-encoded dot -> rejected before canonicalization.
|
||||
assert.throws(() => resolveInternalFilePath("/api/files/%2e%2e/x"));
|
||||
// Percent-encoded slash separator -> rejected before canonicalization.
|
||||
assert.throws(() => resolveInternalFilePath("/api/files/..%2fauth"));
|
||||
});
|
||||
|
||||
test("normalizeFileUrl rewrites the bare /files/ branch and leaves /api/files/ alone", () => {
|
||||
assert.equal(
|
||||
normalizeFileUrl("/files/att-1/pic.png"),
|
||||
"/api/files/att-1/pic.png",
|
||||
);
|
||||
assert.equal(
|
||||
normalizeFileUrl("/api/files/att-1/pic.png"),
|
||||
"/api/files/att-1/pic.png",
|
||||
);
|
||||
});
|
||||
|
||||
test("collectInternalFileNodes recurses into nested content containers", () => {
|
||||
// The internal image is buried inside a callout's content array, so a
|
||||
// regression on the recursion (e.g. a shallow .filter()) would miss it.
|
||||
const nested = {
|
||||
type: "image",
|
||||
attrs: { src: "/api/files/att-9/deep.png", attachmentId: "att-9" },
|
||||
};
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
{
|
||||
type: "callout",
|
||||
content: [{ type: "paragraph", content: [nested] }],
|
||||
},
|
||||
],
|
||||
};
|
||||
const found = collectInternalFileNodes(doc);
|
||||
assert.equal(found.length, 1);
|
||||
assert.equal(found[0], nested);
|
||||
});
|
||||
Reference in New Issue
Block a user