diff --git a/packages/mcp/README.md b/packages/mcp/README.md index fc61a2b2..2adb9024 100644 --- a/packages/mcp/README.md +++ b/packages/mcp/README.md @@ -194,10 +194,11 @@ All 38 tools, grouped by what you'd reach for them. ### Images -- **`insert_image`** — Upload a local image and insert it in one step: append it, drop it - in place of a text placeholder (`replaceText`), or put it after a given block - (`afterText`). Preserves all other block ids. -- **`replace_image`** — Swap an existing image. Uploads the new file as a **fresh +- **`insert_image`** — Download an image from a web (http/https) URL and insert it in one + step: append it, drop it in place of a text placeholder (`replaceText`), or put it after + a given block (`afterText`). Preserves all other block ids. +- **`replace_image`** — Swap an existing image for one fetched from a web (http/https) URL. + Uploads the new file as a **fresh attachment** (clean URL that renders and busts browser caches), then re-points every node referencing the old attachment (recursively, including callouts/tables) via the live document, preserving comments, alignment and alt text. (In-place overwrite is diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index 92ea7aef..b8111cc9 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -1,6 +1,5 @@ import FormData from "form-data"; import axios from "axios"; -import { readFileSync, statSync } from "fs"; import { basename, extname } from "path"; import { filterWorkspace, filterSpace, filterPage, filterComment, filterSearchResult, } from "./lib/filters.js"; import { HocuspocusProvider } from "@hocuspocus/provider"; @@ -18,6 +17,23 @@ import { getCollabToken, performLogin } from "./lib/auth-utils.js"; import { diffDocs } from "./lib/diff.js"; import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js"; import vm from "node:vm"; +// Supported image types, kept as two lookup tables so both a local file +// extension and a remote Content-Type can be mapped to the same canonical set. +const EXT_TO_MIME = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", +}; +const MIME_TO_EXT = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/gif": ".gif", + "image/webp": ".webp", + "image/svg+xml": ".svg", +}; export class DocmostClient { client; token = null; @@ -1658,22 +1674,103 @@ export class DocmostClient { }; } // --- Image upload / embedding --- - /** Map a file extension to a supported image MIME type (throws otherwise). */ - imageMimeFromPath(filePath) { - const ext = extname(filePath).toLowerCase(); - const map = { - ".png": "image/png", - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".gif": "image/gif", - ".webp": "image/webp", - ".svg": "image/svg+xml", - }; - const mime = map[ext]; - if (!mime) { - throw new Error(`unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`); + /** Map a Content-Type string to a supported MIME type, or null if unsupported. */ + supportedImageMime(ct) { + return MIME_TO_EXT[ct] ? ct : null; + } + /** + * Download a remote image from a caller-supplied URL and resolve its bytes, + * MIME and a filename. + * + * SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is + * fetched BY THE SERVER, so it must be guarded before and after the request. + * The guards mirror the local-file trust boundary in uploadImage: + * - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc., + * so the caller cannot use this path to read local files or other schemes; + * - a size cap enforced both via axios maxContentLength/maxBodyLength AND a + * post-download buffer.length re-check (defends against a missing/lying + * Content-Length), so a huge response cannot exhaust memory; + * - a 30s timeout. The timeout matters because replaceImage holds the + * per-page lock across this upload, so a hung download would wedge the + * lock for that page. + * We deliberately do NOT block private IP ranges: the MCP caller is already + * trusted to read arbitrary host files via the filePath path, so the marginal + * trust granted by fetching internal URLs is comparable, and blocking would + * break legitimate internal-image use. + */ + async fetchRemoteImage(url, maxBytes) { + // Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes + // (file:, data:, ftp:, ...) before any network request is made. + let parsed; + try { + parsed = new URL(url); } - return mime; + catch (e) { + throw new Error(`Invalid image URL "${url}": ${e.message}`); + } + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { + throw new Error(`unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`); + } + let response; + try { + response = await axios.get(url, { + responseType: "arraybuffer", + timeout: 30000, + maxContentLength: maxBytes, + maxBodyLength: maxBytes, + headers: { Accept: "image/*" }, + }); + } + catch (error) { + // Keep the thrown message free of the raw response body (it may echo + // server internals); surface only status/statusText. The full body is + // logged under DEBUG for diagnostics. + if (axios.isAxiosError(error)) { + if (process.env.DEBUG) { + console.error("Image download failed; response body:", JSON.stringify(error.response?.data)); + } + throw new Error(`Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim()); + } + throw error; + } + // axios returns an ArrayBuffer for responseType: "arraybuffer". + const buffer = Buffer.from(response.data); + // Re-check the size: maxContentLength relies on Content-Length, which may be + // absent or lie, so guard against the actual byte count too. + if (buffer.length === 0) { + throw new Error(`Empty image response from "${url}"`); + } + if (buffer.length > maxBytes) { + throw new Error(`Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`); + } + // Resolve MIME: prefer the response Content-Type (strip any "; charset=..." + // parameter, lowercase, trim) mapped through the supported set; if the + // header is generic/missing/unsupported, fall back to the URL path + // extension via the existing extension->MIME logic. + const rawCt = response.headers?.["content-type"]; + let mime = null; + if (typeof rawCt === "string" && rawCt.length > 0) { + const ct = rawCt.split(";")[0].trim().toLowerCase(); + mime = this.supportedImageMime(ct); + } + if (!mime) { + // Fall back to the URL path extension. Use the pathname so the query + // string never contaminates the extension lookup. + const ext = extname(parsed.pathname).toLowerCase(); + mime = EXT_TO_MIME[ext] ?? null; + } + if (!mime) { + throw new Error(`cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`); + } + // Build a filename from the URL path basename (ignore the query string), + // defaulting to "image" when empty, and ensure it ends with the canonical + // extension for the resolved MIME (append it when missing/mismatched). + const canonicalExt = MIME_TO_EXT[mime]; + let fileName = basename(parsed.pathname) || "image"; + if (extname(fileName).toLowerCase() !== canonicalExt) { + fileName += canonicalExt; + } + return { buffer, mime, fileName }; } /** Build a Docmost ProseMirror image node from an uploaded attachment. */ buildImageNode(att, align, alt) { @@ -1699,45 +1796,20 @@ export class DocmostClient { return node; } /** - * Upload a local image file as an attachment of a page and return the - * attachment metadata plus a ready-to-insert ProseMirror image node. + * Download a remote image from an http(s) URL and upload it as an attachment + * of a page, returning the attachment metadata plus a ready-to-insert + * ProseMirror image node. Local file paths are intentionally not supported: + * the MCP caller is a remote AI with no access to this server's filesystem. */ - async uploadImage(pageId, filePath) { + async uploadImage(pageId, url) { await this.ensureAuthenticated(); - // HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at - // the server host's local filesystem, so it must be validated BEFORE any - // bytes are read. Without these guards a caller could (a) read an arbitrary - // file via path traversal, (b) follow a symlink to a sensitive target, or - // (c) exhaust memory by reading a huge file. Order matters: validate the - // extension, then stat (regular-file + size cap), and only then read. - // (a) Extension allowlist first — cheap, and rejects non-images up front. - const mime = this.imageMimeFromPath(filePath); - // (b) Stat the path: it must be a regular file (rejects directories, FIFOs, - // devices, sockets) and stay under the size cap. statSync follows symlinks, - // so a symlink is only accepted when its TARGET is a regular file within - // the cap — the intended behaviour for a local image path. const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB - let stat; - try { - stat = statSync(filePath); - } - catch (e) { - throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`); - } - if (!stat.isFile()) { - throw new Error(`Not a regular file: "${filePath}"`); - } - if (stat.size > MAX_IMAGE_BYTES) { - throw new Error(`Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`); - } - // (c) Only now read the bytes. - let fileBuffer; - try { - fileBuffer = readFileSync(filePath); - } - catch (e) { - throw new Error(`Cannot read image file at "${filePath}": ${e.message}`); - } + // Fetch + validate the remote image (scheme allowlist, size cap, timeout). + // See fetchRemoteImage for the SSRF / resource trust boundary. + const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES); + const fileBuffer = fetched.buffer; + const mime = fetched.mime; + const fileName = fetched.fileName; // Build a FRESH FormData for every send attempt. A FormData body is a // single-use stream that is CONSUMED on the first send, so it cannot be // replayed by this.client's response interceptor (replaying a consumed @@ -1749,12 +1821,14 @@ export class DocmostClient { const form = new FormData(); form.append("pageId", pageId); form.append("file", fileBuffer, { - filename: basename(filePath), + filename: fileName, contentType: mime, }); return form; }; - const url = `${this.apiUrl}/files/upload`; + // Local name distinct from the `url` parameter (the source image URL): this + // is the /files/upload endpoint we POST the multipart body to. + const uploadUrl = `${this.apiUrl}/files/upload`; let response; try { // Call buildForm() ONCE per attempt and reuse the instance for both @@ -1767,7 +1841,7 @@ export class DocmostClient { // ensureAuthenticated() above guarantees login() ran, so the default // header exists here. A 60s timeout keeps a hung upload from wedging the // per-page lock (replaceImage holds withPageLock across this call). - response = await axios.post(url, form, { + response = await axios.post(uploadUrl, form, { headers: { ...form.getHeaders(), Authorization: this.client.defaults.headers.common["Authorization"], @@ -1782,7 +1856,7 @@ export class DocmostClient { (error.response?.status === 401 || error.response?.status === 403)) { await this.login(); const form2 = buildForm(); - response = await axios.post(url, form2, { + response = await axios.post(uploadUrl, form2, { headers: { ...form2.getHeaders(), Authorization: this.client.defaults.headers.common["Authorization"], @@ -1809,10 +1883,9 @@ export class DocmostClient { throw new Error("Unexpected /files/upload response: " + JSON.stringify(response.data)); } // Some Docmost versions omit fileSize from the upload response. Fall back - // to the local stat size (the bytes we just uploaded) so callers never get - // an undefined size. - const localSize = stat.size; - const resolvedSize = att.fileSize ?? localSize; + // to the fetched byte length (the bytes we just uploaded) so callers never + // get an undefined size. + const resolvedSize = att.fileSize ?? fileBuffer.length; return { attachmentId: att.id, fileName: att.fileName, @@ -1822,14 +1895,15 @@ export class DocmostClient { }; } /** - * Upload a local image and insert it into a page in one step. + * Upload an image from a web (http/https) URL and insert it into a page in + * one step. * By default the image is appended at the end. With replaceText, the first * top-level block whose text contains the string is replaced; with afterText, * the image is inserted right after the first matching block. All other * block ids are preserved (only one top-level block is added or swapped). */ - async insertImage(pageId, filePath, opts = {}) { - const up = await this.uploadImage(pageId, filePath); + async insertImage(pageId, url, opts = {}) { + const up = await this.uploadImage(pageId, url); // Reuse the node from uploadImage (clean /api/files// src), then // apply align/alt onto a shallow attrs copy. const node = { ...up.imageNode, attrs: { ...up.imageNode.attrs } }; @@ -1918,9 +1992,10 @@ export class DocmostClient { }; } /** - * Replace an existing image in a page with a new file. Uploads the new file as - * a brand-new attachment, which yields a fresh clean URL that both renders - * correctly and busts browser caches (the URL changed). Finds every image node + * Replace an existing image in a page with a new image fetched from a web + * (http/https) URL. Uploads the new file as a brand-new attachment, which + * yields a fresh clean URL that both renders correctly and busts browser + * caches (the URL changed). Finds every image node * whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested * in callouts/tables) and repoints its src/attachmentId/size, preserving * comments, alignment and alt. Operates on the live collab document so comments @@ -1934,7 +2009,7 @@ export class DocmostClient { * In-place byte overwrite is deliberately NOT used because some Docmost * versions corrupt the attachment (HTTP 500) when its bytes are overwritten. */ - async replaceImage(pageId, oldAttachmentId, filePath, opts = {}) { + async replaceImage(pageId, oldAttachmentId, url, opts = {}) { const collabToken = await this.getCollabTokenWithReauth(); // Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write). // Previously the scan and the write were two separate mutatePageContent @@ -1981,7 +2056,7 @@ export class DocmostClient { // id, new clean URL) and repoint every matching node in a second pass. // Still inside the SAME lock, so no other op can have changed the page // since the scan. - const up = await this.uploadImage(pageId, filePath); + const up = await this.uploadImage(pageId, url); let replaced = 0; // Swap the source of one image node, preserving align/alt/title/geometry. const repoint = (node) => { diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index b169f495..d759a53a 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -21,7 +21,7 @@ const VERSION = packageJson.version; // --- Modern McpServer Implementation --- // Editing guide surfaced to MCP clients in the initialize result so they can // pick the right tool by intent and avoid resending whole documents. -const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " + +const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " + "Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " + "Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " + "Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown."; @@ -449,7 +449,8 @@ export function createDocmostMcpServer(config) { }); // Tool: insert_image server.registerTool("insert_image", { - description: "Upload a local image and insert it into a page in one step. By default " + + description: "Download an image from a web (http/https) URL and insert it into " + + "a page in one step. By default " + "appends the image at the end of the page. With replaceText, replaces the " + "first top-level block whose text contains that string (handy for " + 'swapping a text placeholder like "[image: foo.png]" for the real image). ' + @@ -457,10 +458,10 @@ export function createDocmostMcpServer(config) { "that string. Preserves all other block ids.", inputSchema: { pageId: z.string().min(1), - filePath: z + imageUrl: z .string() .min(1) - .describe("Absolute local path to the image file"), + .describe("http(s) URL of the image to download and upload"), align: z.enum(["left", "center", "right"]).optional(), alt: z.string().optional(), replaceText: z @@ -472,8 +473,8 @@ export function createDocmostMcpServer(config) { .optional() .describe("Insert the image right after the first top-level block whose text contains this string"), }, - }, async ({ pageId, filePath, align, alt, replaceText, afterText }) => { - const result = await docmostClient.insertImage(pageId, filePath, { + }, async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => { + const result = await docmostClient.insertImage(pageId, imageUrl, { align, alt, replaceText, @@ -483,7 +484,8 @@ export function createDocmostMcpServer(config) { }); // Tool: replace_image server.registerTool("replace_image", { - description: "Replace an existing image on a page: uploads the new file as a NEW " + + description: "Replace an existing image on a page with a new image fetched from a web " + + "(http/https) URL: uploads the new file as a NEW " + "attachment (fresh clean URL that renders and busts browser caches), then " + "repoints every image node referencing the old attachmentId (recursively, " + "incl. callouts/tables) via the live document, preserving comments, " + @@ -497,15 +499,15 @@ export function createDocmostMcpServer(config) { .string() .min(1) .describe("attachmentId of the image currently in the page to replace"), - filePath: z + imageUrl: z .string() .min(1) - .describe("Absolute local path to the new image file"), + .describe("http(s) URL of the new image to download"), align: z.enum(["left", "center", "right"]).optional(), alt: z.string().optional(), }, - }, async ({ pageId, attachmentId, filePath, align, alt }) => { - const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, { + }, async ({ pageId, attachmentId, imageUrl, align, alt }) => { + const result = await docmostClient.replaceImage(pageId, attachmentId, imageUrl, { align, alt, }); diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 093c0ab8..cbf3a31e 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1,6 +1,5 @@ import FormData from "form-data"; import axios, { AxiosInstance } from "axios"; -import { readFileSync, statSync } from "fs"; import { basename, extname } from "path"; import { filterWorkspace, @@ -59,6 +58,24 @@ import { } from "./lib/transforms.js"; import vm from "node:vm"; +// Supported image types, kept as two lookup tables so both a local file +// extension and a remote Content-Type can be mapped to the same canonical set. +const EXT_TO_MIME: Record = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", +}; +const MIME_TO_EXT: Record = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/gif": ".gif", + "image/webp": ".webp", + "image/svg+xml": ".svg", +}; + /** * Configuration for a DocmostClient / MCP server instance. A discriminated * union: either service-account credentials (email/password — the client calls @@ -2024,24 +2041,121 @@ export class DocmostClient { // --- Image upload / embedding --- - /** Map a file extension to a supported image MIME type (throws otherwise). */ - private imageMimeFromPath(filePath: string): string { - const ext = extname(filePath).toLowerCase(); - const map: Record = { - ".png": "image/png", - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".gif": "image/gif", - ".webp": "image/webp", - ".svg": "image/svg+xml", - }; - const mime = map[ext]; - if (!mime) { + /** Map a Content-Type string to a supported MIME type, or null if unsupported. */ + private supportedImageMime(ct: string): string | null { + return MIME_TO_EXT[ct] ? ct : null; + } + + /** + * Download a remote image from a caller-supplied URL and resolve its bytes, + * MIME and a filename. + * + * SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is + * fetched BY THE SERVER, so it must be guarded before and after the request. + * The guards mirror the local-file trust boundary in uploadImage: + * - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc., + * so the caller cannot use this path to read local files or other schemes; + * - a size cap enforced both via axios maxContentLength/maxBodyLength AND a + * post-download buffer.length re-check (defends against a missing/lying + * Content-Length), so a huge response cannot exhaust memory; + * - a 30s timeout. The timeout matters because replaceImage holds the + * per-page lock across this upload, so a hung download would wedge the + * lock for that page. + * We deliberately do NOT block private IP ranges: the MCP caller is already + * trusted to read arbitrary host files via the filePath path, so the marginal + * trust granted by fetching internal URLs is comparable, and blocking would + * break legitimate internal-image use. + */ + private async fetchRemoteImage( + url: string, + maxBytes: number, + ): Promise<{ buffer: Buffer; mime: string; fileName: string }> { + // Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes + // (file:, data:, ftp:, ...) before any network request is made. + let parsed: URL; + try { + parsed = new URL(url); + } catch (e: any) { + throw new Error(`Invalid image URL "${url}": ${e.message}`); + } + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") { throw new Error( - `unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`, + `unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`, ); } - return mime; + + let response; + try { + response = await axios.get(url, { + responseType: "arraybuffer", + timeout: 30000, + maxContentLength: maxBytes, + maxBodyLength: maxBytes, + headers: { Accept: "image/*" }, + }); + } catch (error) { + // Keep the thrown message free of the raw response body (it may echo + // server internals); surface only status/statusText. The full body is + // logged under DEBUG for diagnostics. + if (axios.isAxiosError(error)) { + if (process.env.DEBUG) { + console.error( + "Image download failed; response body:", + JSON.stringify(error.response?.data), + ); + } + throw new Error( + `Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim(), + ); + } + throw error; + } + + // axios returns an ArrayBuffer for responseType: "arraybuffer". + const buffer = Buffer.from(response.data); + // Re-check the size: maxContentLength relies on Content-Length, which may be + // absent or lie, so guard against the actual byte count too. + if (buffer.length === 0) { + throw new Error(`Empty image response from "${url}"`); + } + if (buffer.length > maxBytes) { + throw new Error( + `Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`, + ); + } + + // Resolve MIME: prefer the response Content-Type (strip any "; charset=..." + // parameter, lowercase, trim) mapped through the supported set; if the + // header is generic/missing/unsupported, fall back to the URL path + // extension via the existing extension->MIME logic. + const rawCt = response.headers?.["content-type"]; + let mime: string | null = null; + if (typeof rawCt === "string" && rawCt.length > 0) { + const ct = rawCt.split(";")[0].trim().toLowerCase(); + mime = this.supportedImageMime(ct); + } + if (!mime) { + // Fall back to the URL path extension. Use the pathname so the query + // string never contaminates the extension lookup. + const ext = extname(parsed.pathname).toLowerCase(); + mime = EXT_TO_MIME[ext] ?? null; + } + if (!mime) { + throw new Error( + `cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`, + ); + } + + // Build a filename from the URL path basename (ignore the query string), + // defaulting to "image" when empty, and ensure it ends with the canonical + // extension for the resolved MIME (append it when missing/mismatched). + const canonicalExt = MIME_TO_EXT[mime]; + let fileName = basename(parsed.pathname) || "image"; + if (extname(fileName).toLowerCase() !== canonicalExt) { + fileName += canonicalExt; + } + + return { buffer, mime, fileName }; } /** Build a Docmost ProseMirror image node from an uploaded attachment. */ @@ -2072,49 +2186,22 @@ export class DocmostClient { } /** - * Upload a local image file as an attachment of a page and return the - * attachment metadata plus a ready-to-insert ProseMirror image node. + * Download a remote image from an http(s) URL and upload it as an attachment + * of a page, returning the attachment metadata plus a ready-to-insert + * ProseMirror image node. Local file paths are intentionally not supported: + * the MCP caller is a remote AI with no access to this server's filesystem. */ - async uploadImage(pageId: string, filePath: string) { + async uploadImage(pageId: string, url: string) { await this.ensureAuthenticated(); - // HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at - // the server host's local filesystem, so it must be validated BEFORE any - // bytes are read. Without these guards a caller could (a) read an arbitrary - // file via path traversal, (b) follow a symlink to a sensitive target, or - // (c) exhaust memory by reading a huge file. Order matters: validate the - // extension, then stat (regular-file + size cap), and only then read. - - // (a) Extension allowlist first — cheap, and rejects non-images up front. - const mime = this.imageMimeFromPath(filePath); - - // (b) Stat the path: it must be a regular file (rejects directories, FIFOs, - // devices, sockets) and stay under the size cap. statSync follows symlinks, - // so a symlink is only accepted when its TARGET is a regular file within - // the cap — the intended behaviour for a local image path. const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB - let stat; - try { - stat = statSync(filePath); - } catch (e: any) { - throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`); - } - if (!stat.isFile()) { - throw new Error(`Not a regular file: "${filePath}"`); - } - if (stat.size > MAX_IMAGE_BYTES) { - throw new Error( - `Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`, - ); - } - // (c) Only now read the bytes. - let fileBuffer: Buffer; - try { - fileBuffer = readFileSync(filePath); - } catch (e: any) { - throw new Error(`Cannot read image file at "${filePath}": ${e.message}`); - } + // Fetch + validate the remote image (scheme allowlist, size cap, timeout). + // See fetchRemoteImage for the SSRF / resource trust boundary. + const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES); + const fileBuffer = fetched.buffer; + const mime = fetched.mime; + const fileName = fetched.fileName; // Build a FRESH FormData for every send attempt. A FormData body is a // single-use stream that is CONSUMED on the first send, so it cannot be @@ -2127,13 +2214,15 @@ export class DocmostClient { const form = new FormData(); form.append("pageId", pageId); form.append("file", fileBuffer, { - filename: basename(filePath), + filename: fileName, contentType: mime, }); return form; }; - const url = `${this.apiUrl}/files/upload`; + // Local name distinct from the `url` parameter (the source image URL): this + // is the /files/upload endpoint we POST the multipart body to. + const uploadUrl = `${this.apiUrl}/files/upload`; let response; try { // Call buildForm() ONCE per attempt and reuse the instance for both @@ -2146,7 +2235,7 @@ export class DocmostClient { // ensureAuthenticated() above guarantees login() ran, so the default // header exists here. A 60s timeout keeps a hung upload from wedging the // per-page lock (replaceImage holds withPageLock across this call). - response = await axios.post(url, form, { + response = await axios.post(uploadUrl, form, { headers: { ...form.getHeaders(), Authorization: this.client.defaults.headers.common["Authorization"], @@ -2162,7 +2251,7 @@ export class DocmostClient { ) { await this.login(); const form2 = buildForm(); - response = await axios.post(url, form2, { + response = await axios.post(uploadUrl, form2, { headers: { ...form2.getHeaders(), Authorization: @@ -2196,10 +2285,9 @@ export class DocmostClient { } // Some Docmost versions omit fileSize from the upload response. Fall back - // to the local stat size (the bytes we just uploaded) so callers never get - // an undefined size. - const localSize = stat.size; - const resolvedSize = att.fileSize ?? localSize; + // to the fetched byte length (the bytes we just uploaded) so callers never + // get an undefined size. + const resolvedSize = att.fileSize ?? fileBuffer.length; return { attachmentId: att.id, @@ -2211,7 +2299,8 @@ export class DocmostClient { } /** - * Upload a local image and insert it into a page in one step. + * Upload an image from a web (http/https) URL and insert it into a page in + * one step. * By default the image is appended at the end. With replaceText, the first * top-level block whose text contains the string is replaced; with afterText, * the image is inserted right after the first matching block. All other @@ -2219,7 +2308,7 @@ export class DocmostClient { */ async insertImage( pageId: string, - filePath: string, + url: string, opts: { align?: "left" | "center" | "right"; alt?: string; @@ -2227,7 +2316,7 @@ export class DocmostClient { afterText?: string; } = {}, ) { - const up = await this.uploadImage(pageId, filePath); + const up = await this.uploadImage(pageId, url); // Reuse the node from uploadImage (clean /api/files// src), then // apply align/alt onto a shallow attrs copy. const node: any = { ...up.imageNode, attrs: { ...up.imageNode.attrs } }; @@ -2331,9 +2420,10 @@ export class DocmostClient { } /** - * Replace an existing image in a page with a new file. Uploads the new file as - * a brand-new attachment, which yields a fresh clean URL that both renders - * correctly and busts browser caches (the URL changed). Finds every image node + * Replace an existing image in a page with a new image fetched from a web + * (http/https) URL. Uploads the new file as a brand-new attachment, which + * yields a fresh clean URL that both renders correctly and busts browser + * caches (the URL changed). Finds every image node * whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested * in callouts/tables) and repoints its src/attachmentId/size, preserving * comments, alignment and alt. Operates on the live collab document so comments @@ -2350,7 +2440,7 @@ export class DocmostClient { async replaceImage( pageId: string, oldAttachmentId: string, - filePath: string, + url: string, opts: { align?: "left" | "center" | "right"; alt?: string } = {}, ) { const collabToken = await this.getCollabTokenWithReauth(); @@ -2405,7 +2495,7 @@ export class DocmostClient { // id, new clean URL) and repoint every matching node in a second pass. // Still inside the SAME lock, so no other op can have changed the page // since the scan. - const up = await this.uploadImage(pageId, filePath); + const up = await this.uploadImage(pageId, url); let replaced = 0; diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 95a649e6..f573d530 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -30,7 +30,7 @@ const VERSION = packageJson.version; // Editing guide surfaced to MCP clients in the initialize result so they can // pick the right tool by intent and avoid resending whole documents. const SERVER_INSTRUCTIONS = - "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " + + "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " + "Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " + "Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " + "Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown."; @@ -612,7 +612,8 @@ server.registerTool( "insert_image", { description: - "Upload a local image and insert it into a page in one step. By default " + + "Download an image from a web (http/https) URL and insert it into " + + "a page in one step. By default " + "appends the image at the end of the page. With replaceText, replaces the " + "first top-level block whose text contains that string (handy for " + 'swapping a text placeholder like "[image: foo.png]" for the real image). ' + @@ -620,10 +621,10 @@ server.registerTool( "that string. Preserves all other block ids.", inputSchema: { pageId: z.string().min(1), - filePath: z + imageUrl: z .string() .min(1) - .describe("Absolute local path to the image file"), + .describe("http(s) URL of the image to download and upload"), align: z.enum(["left", "center", "right"]).optional(), alt: z.string().optional(), replaceText: z @@ -640,8 +641,8 @@ server.registerTool( ), }, }, - async ({ pageId, filePath, align, alt, replaceText, afterText }) => { - const result = await docmostClient.insertImage(pageId, filePath, { + async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => { + const result = await docmostClient.insertImage(pageId, imageUrl, { align, alt, replaceText, @@ -656,7 +657,8 @@ server.registerTool( "replace_image", { description: - "Replace an existing image on a page: uploads the new file as a NEW " + + "Replace an existing image on a page with a new image fetched from a web " + + "(http/https) URL: uploads the new file as a NEW " + "attachment (fresh clean URL that renders and busts browser caches), then " + "repoints every image node referencing the old attachmentId (recursively, " + "incl. callouts/tables) via the live document, preserving comments, " + @@ -670,19 +672,24 @@ server.registerTool( .string() .min(1) .describe("attachmentId of the image currently in the page to replace"), - filePath: z + imageUrl: z .string() .min(1) - .describe("Absolute local path to the new image file"), + .describe("http(s) URL of the new image to download"), align: z.enum(["left", "center", "right"]).optional(), alt: z.string().optional(), }, }, - async ({ pageId, attachmentId, filePath, align, alt }) => { - const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, { - align, - alt, - }); + async ({ pageId, attachmentId, imageUrl, align, alt }) => { + const result = await docmostClient.replaceImage( + pageId, + attachmentId, + imageUrl, + { + align, + alt, + }, + ); return jsonContent(result); }, );