feat(mcp): fetch insert_image/replace_image sources from web URLs

The insert_image and replace_image MCP tools previously uploaded only local files (filePath), which an AI MCP client cannot provide — it has no access to the server filesystem. Replace filePath with a required imageUrl and download the image over http(s). - client.ts: add fetchRemoteImage(url, maxBytes) — http/https-only scheme allowlist, 20 MiB cap (maxContentLength + post-download length recheck), 30s timeout, Content-Type→MIME resolution with URL-extension fallback, filename derivation with canonical extension - client.ts: rewrite uploadImage(pageId, url) as URL-only; drop the local-file branch, imageMimeFromPath and the fs import; insertImage/ replaceImage now take a url - index.ts: drop filePath, add required imageUrl to both tools; update tool descriptions and SERVER_INSTRUCTIONS - README: document the web-URL behaviour
2026-06-18 01:28:23 +03:00
parent 060c14cc27
commit 334a50f003
5 changed files with 342 additions and 167 deletions
--- a/packages/mcp/README.md
+++ b/packages/mcp/README.md
@@ -194,10 +194,11 @@ All 38 tools, grouped by what you'd reach for them.

 ### Images

- **`insert_image`** — Upload a local image and insert it in one step: append it, drop it
-  in place of a text placeholder (`replaceText`), or put it after a given block
-  (`afterText`). Preserves all other block ids.
- **`replace_image`** — Swap an existing image. Uploads the new file as a **fresh
+- **`insert_image`** — Download an image from a web (http/https) URL and insert it in one
+  step: append it, drop it in place of a text placeholder (`replaceText`), or put it after
+  a given block (`afterText`). Preserves all other block ids.
+- **`replace_image`** — Swap an existing image for one fetched from a web (http/https) URL.
+  Uploads the new file as a **fresh
  attachment** (clean URL that renders and busts browser caches), then re-points every
  node referencing the old attachment (recursively, including callouts/tables) via the
  live document, preserving comments, alignment and alt text. (In-place overwrite is
--- a/packages/mcp/build/client.js
+++ b/packages/mcp/build/client.js
@@ -1,6 +1,5 @@
 import FormData from "form-data";
 import axios from "axios";
-import { readFileSync, statSync } from "fs";
 import { basename, extname } from "path";
 import { filterWorkspace, filterSpace, filterPage, filterComment, filterSearchResult, } from "./lib/filters.js";
 import { HocuspocusProvider } from "@hocuspocus/provider";
@@ -18,6 +17,23 @@ import { getCollabToken, performLogin } from "./lib/auth-utils.js";
 import { diffDocs } from "./lib/diff.js";
 import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js";
 import vm from "node:vm";
+// Supported image types, kept as two lookup tables so both a local file
+// extension and a remote Content-Type can be mapped to the same canonical set.
+const EXT_TO_MIME = {
+    ".png": "image/png",
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".svg": "image/svg+xml",
+};
+const MIME_TO_EXT = {
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/gif": ".gif",
+    "image/webp": ".webp",
+    "image/svg+xml": ".svg",
+};
 export class DocmostClient {
    client;
    token = null;
@@ -1658,22 +1674,103 @@ export class DocmostClient {
        };
    }
    // --- Image upload / embedding ---
-    /** Map a file extension to a supported image MIME type (throws otherwise). */
-    imageMimeFromPath(filePath) {
-        const ext = extname(filePath).toLowerCase();
-        const map = {
-            ".png": "image/png",
-            ".jpg": "image/jpeg",
-            ".jpeg": "image/jpeg",
-            ".gif": "image/gif",
-            ".webp": "image/webp",
-            ".svg": "image/svg+xml",
-        };
-        const mime = map[ext];
-        if (!mime) {
-            throw new Error(`unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`);
+    /** Map a Content-Type string to a supported MIME type, or null if unsupported. */
+    supportedImageMime(ct) {
+        return MIME_TO_EXT[ct] ? ct : null;
    }
-        return mime;
+    /**
+     * Download a remote image from a caller-supplied URL and resolve its bytes,
+     * MIME and a filename.
+     *
+     * SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is
+     * fetched BY THE SERVER, so it must be guarded before and after the request.
+     * The guards mirror the local-file trust boundary in uploadImage:
+     *   - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc.,
+     *     so the caller cannot use this path to read local files or other schemes;
+     *   - a size cap enforced both via axios maxContentLength/maxBodyLength AND a
+     *     post-download buffer.length re-check (defends against a missing/lying
+     *     Content-Length), so a huge response cannot exhaust memory;
+     *   - a 30s timeout. The timeout matters because replaceImage holds the
+     *     per-page lock across this upload, so a hung download would wedge the
+     *     lock for that page.
+     * We deliberately do NOT block private IP ranges: the MCP caller is already
+     * trusted to read arbitrary host files via the filePath path, so the marginal
+     * trust granted by fetching internal URLs is comparable, and blocking would
+     * break legitimate internal-image use.
+     */
+    async fetchRemoteImage(url, maxBytes) {
+        // Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes
+        // (file:, data:, ftp:, ...) before any network request is made.
+        let parsed;
+        try {
+            parsed = new URL(url);
+        }
+        catch (e) {
+            throw new Error(`Invalid image URL "${url}": ${e.message}`);
+        }
+        if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+            throw new Error(`unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`);
+        }
+        let response;
+        try {
+            response = await axios.get(url, {
+                responseType: "arraybuffer",
+                timeout: 30000,
+                maxContentLength: maxBytes,
+                maxBodyLength: maxBytes,
+                headers: { Accept: "image/*" },
+            });
+        }
+        catch (error) {
+            // Keep the thrown message free of the raw response body (it may echo
+            // server internals); surface only status/statusText. The full body is
+            // logged under DEBUG for diagnostics.
+            if (axios.isAxiosError(error)) {
+                if (process.env.DEBUG) {
+                    console.error("Image download failed; response body:", JSON.stringify(error.response?.data));
+                }
+                throw new Error(`Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim());
+            }
+            throw error;
+        }
+        // axios returns an ArrayBuffer for responseType: "arraybuffer".
+        const buffer = Buffer.from(response.data);
+        // Re-check the size: maxContentLength relies on Content-Length, which may be
+        // absent or lie, so guard against the actual byte count too.
+        if (buffer.length === 0) {
+            throw new Error(`Empty image response from "${url}"`);
+        }
+        if (buffer.length > maxBytes) {
+            throw new Error(`Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`);
+        }
+        // Resolve MIME: prefer the response Content-Type (strip any "; charset=..."
+        // parameter, lowercase, trim) mapped through the supported set; if the
+        // header is generic/missing/unsupported, fall back to the URL path
+        // extension via the existing extension->MIME logic.
+        const rawCt = response.headers?.["content-type"];
+        let mime = null;
+        if (typeof rawCt === "string" && rawCt.length > 0) {
+            const ct = rawCt.split(";")[0].trim().toLowerCase();
+            mime = this.supportedImageMime(ct);
+        }
+        if (!mime) {
+            // Fall back to the URL path extension. Use the pathname so the query
+            // string never contaminates the extension lookup.
+            const ext = extname(parsed.pathname).toLowerCase();
+            mime = EXT_TO_MIME[ext] ?? null;
+        }
+        if (!mime) {
+            throw new Error(`cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`);
+        }
+        // Build a filename from the URL path basename (ignore the query string),
+        // defaulting to "image" when empty, and ensure it ends with the canonical
+        // extension for the resolved MIME (append it when missing/mismatched).
+        const canonicalExt = MIME_TO_EXT[mime];
+        let fileName = basename(parsed.pathname) || "image";
+        if (extname(fileName).toLowerCase() !== canonicalExt) {
+            fileName += canonicalExt;
+        }
+        return { buffer, mime, fileName };
    }
    /** Build a Docmost ProseMirror image node from an uploaded attachment. */
    buildImageNode(att, align, alt) {
@@ -1699,45 +1796,20 @@ export class DocmostClient {
        return node;
    }
    /**
-     * Upload a local image file as an attachment of a page and return the
-     * attachment metadata plus a ready-to-insert ProseMirror image node.
+     * Download a remote image from an http(s) URL and upload it as an attachment
+     * of a page, returning the attachment metadata plus a ready-to-insert
+     * ProseMirror image node. Local file paths are intentionally not supported:
+     * the MCP caller is a remote AI with no access to this server's filesystem.
     */
-    async uploadImage(pageId, filePath) {
+    async uploadImage(pageId, url) {
        await this.ensureAuthenticated();
-        // HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at
-        // the server host's local filesystem, so it must be validated BEFORE any
-        // bytes are read. Without these guards a caller could (a) read an arbitrary
-        // file via path traversal, (b) follow a symlink to a sensitive target, or
-        // (c) exhaust memory by reading a huge file. Order matters: validate the
-        // extension, then stat (regular-file + size cap), and only then read.
-        // (a) Extension allowlist first — cheap, and rejects non-images up front.
-        const mime = this.imageMimeFromPath(filePath);
-        // (b) Stat the path: it must be a regular file (rejects directories, FIFOs,
-        // devices, sockets) and stay under the size cap. statSync follows symlinks,
-        // so a symlink is only accepted when its TARGET is a regular file within
-        // the cap — the intended behaviour for a local image path.
        const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB
-        let stat;
-        try {
-            stat = statSync(filePath);
-        }
-        catch (e) {
-            throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`);
-        }
-        if (!stat.isFile()) {
-            throw new Error(`Not a regular file: "${filePath}"`);
-        }
-        if (stat.size > MAX_IMAGE_BYTES) {
-            throw new Error(`Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`);
-        }
-        // (c) Only now read the bytes.
-        let fileBuffer;
-        try {
-            fileBuffer = readFileSync(filePath);
-        }
-        catch (e) {
-            throw new Error(`Cannot read image file at "${filePath}": ${e.message}`);
-        }
+        // Fetch + validate the remote image (scheme allowlist, size cap, timeout).
+        // See fetchRemoteImage for the SSRF / resource trust boundary.
+        const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES);
+        const fileBuffer = fetched.buffer;
+        const mime = fetched.mime;
+        const fileName = fetched.fileName;
        // Build a FRESH FormData for every send attempt. A FormData body is a
        // single-use stream that is CONSUMED on the first send, so it cannot be
        // replayed by this.client's response interceptor (replaying a consumed
@@ -1749,12 +1821,14 @@ export class DocmostClient {
            const form = new FormData();
            form.append("pageId", pageId);
            form.append("file", fileBuffer, {
-                filename: basename(filePath),
+                filename: fileName,
                contentType: mime,
            });
            return form;
        };
-        const url = `${this.apiUrl}/files/upload`;
+        // Local name distinct from the `url` parameter (the source image URL): this
+        // is the /files/upload endpoint we POST the multipart body to.
+        const uploadUrl = `${this.apiUrl}/files/upload`;
        let response;
        try {
            // Call buildForm() ONCE per attempt and reuse the instance for both
@@ -1767,7 +1841,7 @@ export class DocmostClient {
            // ensureAuthenticated() above guarantees login() ran, so the default
            // header exists here. A 60s timeout keeps a hung upload from wedging the
            // per-page lock (replaceImage holds withPageLock across this call).
-            response = await axios.post(url, form, {
+            response = await axios.post(uploadUrl, form, {
                headers: {
                    ...form.getHeaders(),
                    Authorization: this.client.defaults.headers.common["Authorization"],
@@ -1782,7 +1856,7 @@ export class DocmostClient {
                (error.response?.status === 401 || error.response?.status === 403)) {
                await this.login();
                const form2 = buildForm();
-                response = await axios.post(url, form2, {
+                response = await axios.post(uploadUrl, form2, {
                    headers: {
                        ...form2.getHeaders(),
                        Authorization: this.client.defaults.headers.common["Authorization"],
@@ -1809,10 +1883,9 @@ export class DocmostClient {
            throw new Error("Unexpected /files/upload response: " + JSON.stringify(response.data));
        }
        // Some Docmost versions omit fileSize from the upload response. Fall back
-        // to the local stat size (the bytes we just uploaded) so callers never get
-        // an undefined size.
-        const localSize = stat.size;
-        const resolvedSize = att.fileSize ?? localSize;
+        // to the fetched byte length (the bytes we just uploaded) so callers never
+        // get an undefined size.
+        const resolvedSize = att.fileSize ?? fileBuffer.length;
        return {
            attachmentId: att.id,
            fileName: att.fileName,
@@ -1822,14 +1895,15 @@ export class DocmostClient {
        };
    }
    /**
-     * Upload a local image and insert it into a page in one step.
+     * Upload an image from a web (http/https) URL and insert it into a page in
+     * one step.
     * By default the image is appended at the end. With replaceText, the first
     * top-level block whose text contains the string is replaced; with afterText,
     * the image is inserted right after the first matching block. All other
     * block ids are preserved (only one top-level block is added or swapped).
     */
-    async insertImage(pageId, filePath, opts = {}) {
-        const up = await this.uploadImage(pageId, filePath);
+    async insertImage(pageId, url, opts = {}) {
+        const up = await this.uploadImage(pageId, url);
        // Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then
        // apply align/alt onto a shallow attrs copy.
        const node = { ...up.imageNode, attrs: { ...up.imageNode.attrs } };
@@ -1918,9 +1992,10 @@ export class DocmostClient {
        };
    }
    /**
-     * Replace an existing image in a page with a new file. Uploads the new file as
-     * a brand-new attachment, which yields a fresh clean URL that both renders
-     * correctly and busts browser caches (the URL changed). Finds every image node
+     * Replace an existing image in a page with a new image fetched from a web
+     * (http/https) URL. Uploads the new file as a brand-new attachment, which
+     * yields a fresh clean URL that both renders correctly and busts browser
+     * caches (the URL changed). Finds every image node
     * whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested
     * in callouts/tables) and repoints its src/attachmentId/size, preserving
     * comments, alignment and alt. Operates on the live collab document so comments
@@ -1934,7 +2009,7 @@ export class DocmostClient {
     * In-place byte overwrite is deliberately NOT used because some Docmost
     * versions corrupt the attachment (HTTP 500) when its bytes are overwritten.
     */
-    async replaceImage(pageId, oldAttachmentId, filePath, opts = {}) {
+    async replaceImage(pageId, oldAttachmentId, url, opts = {}) {
        const collabToken = await this.getCollabTokenWithReauth();
        // Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write).
        // Previously the scan and the write were two separate mutatePageContent
@@ -1981,7 +2056,7 @@ export class DocmostClient {
            // id, new clean URL) and repoint every matching node in a second pass.
            // Still inside the SAME lock, so no other op can have changed the page
            // since the scan.
-            const up = await this.uploadImage(pageId, filePath);
+            const up = await this.uploadImage(pageId, url);
            let replaced = 0;
            // Swap the source of one image node, preserving align/alt/title/geometry.
            const repoint = (node) => {
--- a/packages/mcp/build/index.js
+++ b/packages/mcp/build/index.js
@@ -21,7 +21,7 @@ const VERSION = packageJson.version;
 // --- Modern McpServer Implementation ---
 // Editing guide surfaced to MCP clients in the initialize result so they can
 // pick the right tool by intent and avoid resending whole documents.
-const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
+const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
    "Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " +
    "Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " +
    "Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
@@ -449,7 +449,8 @@ export function createDocmostMcpServer(config) {
    });
    // Tool: insert_image
    server.registerTool("insert_image", {
-        description: "Upload a local image and insert it into a page in one step. By default " +
+        description: "Download an image from a web (http/https) URL and insert it into " +
+            "a page in one step. By default " +
            "appends the image at the end of the page. With replaceText, replaces the " +
            "first top-level block whose text contains that string (handy for " +
            'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
@@ -457,10 +458,10 @@ export function createDocmostMcpServer(config) {
            "that string. Preserves all other block ids.",
        inputSchema: {
            pageId: z.string().min(1),
-            filePath: z
+            imageUrl: z
                .string()
                .min(1)
-                .describe("Absolute local path to the image file"),
+                .describe("http(s) URL of the image to download and upload"),
            align: z.enum(["left", "center", "right"]).optional(),
            alt: z.string().optional(),
            replaceText: z
@@ -472,8 +473,8 @@ export function createDocmostMcpServer(config) {
                .optional()
                .describe("Insert the image right after the first top-level block whose text contains this string"),
        },
-    }, async ({ pageId, filePath, align, alt, replaceText, afterText }) => {
-        const result = await docmostClient.insertImage(pageId, filePath, {
+    }, async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
+        const result = await docmostClient.insertImage(pageId, imageUrl, {
            align,
            alt,
            replaceText,
@@ -483,7 +484,8 @@ export function createDocmostMcpServer(config) {
    });
    // Tool: replace_image
    server.registerTool("replace_image", {
-        description: "Replace an existing image on a page: uploads the new file as a NEW " +
+        description: "Replace an existing image on a page with a new image fetched from a web " +
+            "(http/https) URL: uploads the new file as a NEW " +
            "attachment (fresh clean URL that renders and busts browser caches), then " +
            "repoints every image node referencing the old attachmentId (recursively, " +
            "incl. callouts/tables) via the live document, preserving comments, " +
@@ -497,15 +499,15 @@ export function createDocmostMcpServer(config) {
                .string()
                .min(1)
                .describe("attachmentId of the image currently in the page to replace"),
-            filePath: z
+            imageUrl: z
                .string()
                .min(1)
-                .describe("Absolute local path to the new image file"),
+                .describe("http(s) URL of the new image to download"),
            align: z.enum(["left", "center", "right"]).optional(),
            alt: z.string().optional(),
        },
-    }, async ({ pageId, attachmentId, filePath, align, alt }) => {
-        const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, {
+    }, async ({ pageId, attachmentId, imageUrl, align, alt }) => {
+        const result = await docmostClient.replaceImage(pageId, attachmentId, imageUrl, {
            align,
            alt,
        });
--- a/packages/mcp/src/client.ts
+++ b/packages/mcp/src/client.ts
@@ -1,6 +1,5 @@
 import FormData from "form-data";
 import axios, { AxiosInstance } from "axios";
-import { readFileSync, statSync } from "fs";
 import { basename, extname } from "path";
 import {
  filterWorkspace,
@@ -59,6 +58,24 @@ import {
 } from "./lib/transforms.js";
 import vm from "node:vm";

+// Supported image types, kept as two lookup tables so both a local file
+// extension and a remote Content-Type can be mapped to the same canonical set.
+const EXT_TO_MIME: Record<string, string> = {
+  ".png": "image/png",
+  ".jpg": "image/jpeg",
+  ".jpeg": "image/jpeg",
+  ".gif": "image/gif",
+  ".webp": "image/webp",
+  ".svg": "image/svg+xml",
+};
+const MIME_TO_EXT: Record<string, string> = {
+  "image/png": ".png",
+  "image/jpeg": ".jpg",
+  "image/gif": ".gif",
+  "image/webp": ".webp",
+  "image/svg+xml": ".svg",
+};
+
 /**
 * Configuration for a DocmostClient / MCP server instance. A discriminated
 * union: either service-account credentials (email/password — the client calls
@@ -2024,24 +2041,121 @@ export class DocmostClient {

  // --- Image upload / embedding ---

-  /** Map a file extension to a supported image MIME type (throws otherwise). */
-  private imageMimeFromPath(filePath: string): string {
-    const ext = extname(filePath).toLowerCase();
-    const map: Record<string, string> = {
-      ".png": "image/png",
-      ".jpg": "image/jpeg",
-      ".jpeg": "image/jpeg",
-      ".gif": "image/gif",
-      ".webp": "image/webp",
-      ".svg": "image/svg+xml",
-    };
-    const mime = map[ext];
-    if (!mime) {
+  /** Map a Content-Type string to a supported MIME type, or null if unsupported. */
+  private supportedImageMime(ct: string): string | null {
+    return MIME_TO_EXT[ct] ? ct : null;
+  }
+
+  /**
+   * Download a remote image from a caller-supplied URL and resolve its bytes,
+   * MIME and a filename.
+   *
+   * SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is
+   * fetched BY THE SERVER, so it must be guarded before and after the request.
+   * The guards mirror the local-file trust boundary in uploadImage:
+   *   - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc.,
+   *     so the caller cannot use this path to read local files or other schemes;
+   *   - a size cap enforced both via axios maxContentLength/maxBodyLength AND a
+   *     post-download buffer.length re-check (defends against a missing/lying
+   *     Content-Length), so a huge response cannot exhaust memory;
+   *   - a 30s timeout. The timeout matters because replaceImage holds the
+   *     per-page lock across this upload, so a hung download would wedge the
+   *     lock for that page.
+   * We deliberately do NOT block private IP ranges: the MCP caller is already
+   * trusted to read arbitrary host files via the filePath path, so the marginal
+   * trust granted by fetching internal URLs is comparable, and blocking would
+   * break legitimate internal-image use.
+   */
+  private async fetchRemoteImage(
+    url: string,
+    maxBytes: number,
+  ): Promise<{ buffer: Buffer; mime: string; fileName: string }> {
+    // Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes
+    // (file:, data:, ftp:, ...) before any network request is made.
+    let parsed: URL;
+    try {
+      parsed = new URL(url);
+    } catch (e: any) {
+      throw new Error(`Invalid image URL "${url}": ${e.message}`);
+    }
+    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
      throw new Error(
-        `unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`,
+        `unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`,
      );
    }
-    return mime;
+
+    let response;
+    try {
+      response = await axios.get(url, {
+        responseType: "arraybuffer",
+        timeout: 30000,
+        maxContentLength: maxBytes,
+        maxBodyLength: maxBytes,
+        headers: { Accept: "image/*" },
+      });
+    } catch (error) {
+      // Keep the thrown message free of the raw response body (it may echo
+      // server internals); surface only status/statusText. The full body is
+      // logged under DEBUG for diagnostics.
+      if (axios.isAxiosError(error)) {
+        if (process.env.DEBUG) {
+          console.error(
+            "Image download failed; response body:",
+            JSON.stringify(error.response?.data),
+          );
+        }
+        throw new Error(
+          `Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim(),
+        );
+      }
+      throw error;
+    }
+
+    // axios returns an ArrayBuffer for responseType: "arraybuffer".
+    const buffer = Buffer.from(response.data);
+    // Re-check the size: maxContentLength relies on Content-Length, which may be
+    // absent or lie, so guard against the actual byte count too.
+    if (buffer.length === 0) {
+      throw new Error(`Empty image response from "${url}"`);
+    }
+    if (buffer.length > maxBytes) {
+      throw new Error(
+        `Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`,
+      );
+    }
+
+    // Resolve MIME: prefer the response Content-Type (strip any "; charset=..."
+    // parameter, lowercase, trim) mapped through the supported set; if the
+    // header is generic/missing/unsupported, fall back to the URL path
+    // extension via the existing extension->MIME logic.
+    const rawCt = response.headers?.["content-type"];
+    let mime: string | null = null;
+    if (typeof rawCt === "string" && rawCt.length > 0) {
+      const ct = rawCt.split(";")[0].trim().toLowerCase();
+      mime = this.supportedImageMime(ct);
+    }
+    if (!mime) {
+      // Fall back to the URL path extension. Use the pathname so the query
+      // string never contaminates the extension lookup.
+      const ext = extname(parsed.pathname).toLowerCase();
+      mime = EXT_TO_MIME[ext] ?? null;
+    }
+    if (!mime) {
+      throw new Error(
+        `cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`,
+      );
+    }
+
+    // Build a filename from the URL path basename (ignore the query string),
+    // defaulting to "image" when empty, and ensure it ends with the canonical
+    // extension for the resolved MIME (append it when missing/mismatched).
+    const canonicalExt = MIME_TO_EXT[mime];
+    let fileName = basename(parsed.pathname) || "image";
+    if (extname(fileName).toLowerCase() !== canonicalExt) {
+      fileName += canonicalExt;
+    }
+
+    return { buffer, mime, fileName };
  }

  /** Build a Docmost ProseMirror image node from an uploaded attachment. */
@@ -2072,49 +2186,22 @@ export class DocmostClient {
  }

  /**
-   * Upload a local image file as an attachment of a page and return the
-   * attachment metadata plus a ready-to-insert ProseMirror image node.
+   * Download a remote image from an http(s) URL and upload it as an attachment
+   * of a page, returning the attachment metadata plus a ready-to-insert
+   * ProseMirror image node. Local file paths are intentionally not supported:
+   * the MCP caller is a remote AI with no access to this server's filesystem.
   */
-  async uploadImage(pageId: string, filePath: string) {
+  async uploadImage(pageId: string, url: string) {
    await this.ensureAuthenticated();

-    // HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at
-    // the server host's local filesystem, so it must be validated BEFORE any
-    // bytes are read. Without these guards a caller could (a) read an arbitrary
-    // file via path traversal, (b) follow a symlink to a sensitive target, or
-    // (c) exhaust memory by reading a huge file. Order matters: validate the
-    // extension, then stat (regular-file + size cap), and only then read.
-
-    // (a) Extension allowlist first — cheap, and rejects non-images up front.
-    const mime = this.imageMimeFromPath(filePath);
-
-    // (b) Stat the path: it must be a regular file (rejects directories, FIFOs,
-    // devices, sockets) and stay under the size cap. statSync follows symlinks,
-    // so a symlink is only accepted when its TARGET is a regular file within
-    // the cap — the intended behaviour for a local image path.
    const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB
-    let stat;
-    try {
-      stat = statSync(filePath);
-    } catch (e: any) {
-      throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`);
-    }
-    if (!stat.isFile()) {
-      throw new Error(`Not a regular file: "${filePath}"`);
-    }
-    if (stat.size > MAX_IMAGE_BYTES) {
-      throw new Error(
-        `Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`,
-      );
-    }

-    // (c) Only now read the bytes.
-    let fileBuffer: Buffer;
-    try {
-      fileBuffer = readFileSync(filePath);
-    } catch (e: any) {
-      throw new Error(`Cannot read image file at "${filePath}": ${e.message}`);
-    }
+    // Fetch + validate the remote image (scheme allowlist, size cap, timeout).
+    // See fetchRemoteImage for the SSRF / resource trust boundary.
+    const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES);
+    const fileBuffer = fetched.buffer;
+    const mime = fetched.mime;
+    const fileName = fetched.fileName;

    // Build a FRESH FormData for every send attempt. A FormData body is a
    // single-use stream that is CONSUMED on the first send, so it cannot be
@@ -2127,13 +2214,15 @@ export class DocmostClient {
      const form = new FormData();
      form.append("pageId", pageId);
      form.append("file", fileBuffer, {
-        filename: basename(filePath),
+        filename: fileName,
        contentType: mime,
      });
      return form;
    };

-    const url = `${this.apiUrl}/files/upload`;
+    // Local name distinct from the `url` parameter (the source image URL): this
+    // is the /files/upload endpoint we POST the multipart body to.
+    const uploadUrl = `${this.apiUrl}/files/upload`;
    let response;
    try {
      // Call buildForm() ONCE per attempt and reuse the instance for both
@@ -2146,7 +2235,7 @@ export class DocmostClient {
      // ensureAuthenticated() above guarantees login() ran, so the default
      // header exists here. A 60s timeout keeps a hung upload from wedging the
      // per-page lock (replaceImage holds withPageLock across this call).
-      response = await axios.post(url, form, {
+      response = await axios.post(uploadUrl, form, {
        headers: {
          ...form.getHeaders(),
          Authorization: this.client.defaults.headers.common["Authorization"],
@@ -2162,7 +2251,7 @@ export class DocmostClient {
      ) {
        await this.login();
        const form2 = buildForm();
-        response = await axios.post(url, form2, {
+        response = await axios.post(uploadUrl, form2, {
          headers: {
            ...form2.getHeaders(),
            Authorization:
@@ -2196,10 +2285,9 @@ export class DocmostClient {
    }

    // Some Docmost versions omit fileSize from the upload response. Fall back
-    // to the local stat size (the bytes we just uploaded) so callers never get
-    // an undefined size.
-    const localSize = stat.size;
-    const resolvedSize = att.fileSize ?? localSize;
+    // to the fetched byte length (the bytes we just uploaded) so callers never
+    // get an undefined size.
+    const resolvedSize = att.fileSize ?? fileBuffer.length;

    return {
      attachmentId: att.id,
@@ -2211,7 +2299,8 @@ export class DocmostClient {
  }

  /**
-   * Upload a local image and insert it into a page in one step.
+   * Upload an image from a web (http/https) URL and insert it into a page in
+   * one step.
   * By default the image is appended at the end. With replaceText, the first
   * top-level block whose text contains the string is replaced; with afterText,
   * the image is inserted right after the first matching block. All other
@@ -2219,7 +2308,7 @@ export class DocmostClient {
   */
  async insertImage(
    pageId: string,
-    filePath: string,
+    url: string,
    opts: {
      align?: "left" | "center" | "right";
      alt?: string;
@@ -2227,7 +2316,7 @@ export class DocmostClient {
      afterText?: string;
    } = {},
  ) {
-    const up = await this.uploadImage(pageId, filePath);
+    const up = await this.uploadImage(pageId, url);
    // Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then
    // apply align/alt onto a shallow attrs copy.
    const node: any = { ...up.imageNode, attrs: { ...up.imageNode.attrs } };
@@ -2331,9 +2420,10 @@ export class DocmostClient {
  }

  /**
-   * Replace an existing image in a page with a new file. Uploads the new file as
-   * a brand-new attachment, which yields a fresh clean URL that both renders
-   * correctly and busts browser caches (the URL changed). Finds every image node
+   * Replace an existing image in a page with a new image fetched from a web
+   * (http/https) URL. Uploads the new file as a brand-new attachment, which
+   * yields a fresh clean URL that both renders correctly and busts browser
+   * caches (the URL changed). Finds every image node
   * whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested
   * in callouts/tables) and repoints its src/attachmentId/size, preserving
   * comments, alignment and alt. Operates on the live collab document so comments
@@ -2350,7 +2440,7 @@ export class DocmostClient {
  async replaceImage(
    pageId: string,
    oldAttachmentId: string,
-    filePath: string,
+    url: string,
    opts: { align?: "left" | "center" | "right"; alt?: string } = {},
  ) {
    const collabToken = await this.getCollabTokenWithReauth();
@@ -2405,7 +2495,7 @@ export class DocmostClient {
      // id, new clean URL) and repoint every matching node in a second pass.
      // Still inside the SAME lock, so no other op can have changed the page
      // since the scan.
-      const up = await this.uploadImage(pageId, filePath);
+      const up = await this.uploadImage(pageId, url);

      let replaced = 0;

--- a/packages/mcp/src/index.ts
+++ b/packages/mcp/src/index.ts
@@ -30,7 +30,7 @@ const VERSION = packageJson.version;
 // Editing guide surfaced to MCP clients in the initialize result so they can
 // pick the right tool by intent and avoid resending whole documents.
 const SERVER_INSTRUCTIONS =
-  "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
+  "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
  "Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " +
  "Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " +
  "Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
@@ -612,7 +612,8 @@ server.registerTool(
  "insert_image",
  {
    description:
-      "Upload a local image and insert it into a page in one step. By default " +
+      "Download an image from a web (http/https) URL and insert it into " +
+      "a page in one step. By default " +
      "appends the image at the end of the page. With replaceText, replaces the " +
      "first top-level block whose text contains that string (handy for " +
      'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
@@ -620,10 +621,10 @@ server.registerTool(
      "that string. Preserves all other block ids.",
    inputSchema: {
      pageId: z.string().min(1),
-      filePath: z
+      imageUrl: z
        .string()
        .min(1)
-        .describe("Absolute local path to the image file"),
+        .describe("http(s) URL of the image to download and upload"),
      align: z.enum(["left", "center", "right"]).optional(),
      alt: z.string().optional(),
      replaceText: z
@@ -640,8 +641,8 @@ server.registerTool(
        ),
    },
  },
-  async ({ pageId, filePath, align, alt, replaceText, afterText }) => {
-    const result = await docmostClient.insertImage(pageId, filePath, {
+  async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
+    const result = await docmostClient.insertImage(pageId, imageUrl, {
      align,
      alt,
      replaceText,
@@ -656,7 +657,8 @@ server.registerTool(
  "replace_image",
  {
    description:
-      "Replace an existing image on a page: uploads the new file as a NEW " +
+      "Replace an existing image on a page with a new image fetched from a web " +
+      "(http/https) URL: uploads the new file as a NEW " +
      "attachment (fresh clean URL that renders and busts browser caches), then " +
      "repoints every image node referencing the old attachmentId (recursively, " +
      "incl. callouts/tables) via the live document, preserving comments, " +
@@ -670,19 +672,24 @@ server.registerTool(
        .string()
        .min(1)
        .describe("attachmentId of the image currently in the page to replace"),
-      filePath: z
+      imageUrl: z
        .string()
        .min(1)
-        .describe("Absolute local path to the new image file"),
+        .describe("http(s) URL of the new image to download"),
      align: z.enum(["left", "center", "right"]).optional(),
      alt: z.string().optional(),
    },
  },
-  async ({ pageId, attachmentId, filePath, align, alt }) => {
-    const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, {
+  async ({ pageId, attachmentId, imageUrl, align, alt }) => {
+    const result = await docmostClient.replaceImage(
+      pageId,
+      attachmentId,
+      imageUrl,
+      {
        align,
        alt,
-    });
+      },
+    );
    return jsonContent(result);
  },
 );