feat(mcp): fetch insert_image/replace_image sources from web URLs
The insert_image and replace_image MCP tools previously uploaded only local files (filePath), which an AI MCP client cannot provide — it has no access to the server filesystem. Replace filePath with a required imageUrl and download the image over http(s). - client.ts: add fetchRemoteImage(url, maxBytes) — http/https-only scheme allowlist, 20 MiB cap (maxContentLength + post-download length recheck), 30s timeout, Content-Type→MIME resolution with URL-extension fallback, filename derivation with canonical extension - client.ts: rewrite uploadImage(pageId, url) as URL-only; drop the local-file branch, imageMimeFromPath and the fs import; insertImage/ replaceImage now take a url - index.ts: drop filePath, add required imageUrl to both tools; update tool descriptions and SERVER_INSTRUCTIONS - README: document the web-URL behaviour
This commit is contained in:
@@ -194,10 +194,11 @@ All 38 tools, grouped by what you'd reach for them.
|
||||
|
||||
### Images
|
||||
|
||||
- **`insert_image`** — Upload a local image and insert it in one step: append it, drop it
|
||||
in place of a text placeholder (`replaceText`), or put it after a given block
|
||||
(`afterText`). Preserves all other block ids.
|
||||
- **`replace_image`** — Swap an existing image. Uploads the new file as a **fresh
|
||||
- **`insert_image`** — Download an image from a web (http/https) URL and insert it in one
|
||||
step: append it, drop it in place of a text placeholder (`replaceText`), or put it after
|
||||
a given block (`afterText`). Preserves all other block ids.
|
||||
- **`replace_image`** — Swap an existing image for one fetched from a web (http/https) URL.
|
||||
Uploads the new file as a **fresh
|
||||
attachment** (clean URL that renders and busts browser caches), then re-points every
|
||||
node referencing the old attachment (recursively, including callouts/tables) via the
|
||||
live document, preserving comments, alignment and alt text. (In-place overwrite is
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import FormData from "form-data";
|
||||
import axios from "axios";
|
||||
import { readFileSync, statSync } from "fs";
|
||||
import { basename, extname } from "path";
|
||||
import { filterWorkspace, filterSpace, filterPage, filterComment, filterSearchResult, } from "./lib/filters.js";
|
||||
import { HocuspocusProvider } from "@hocuspocus/provider";
|
||||
@@ -18,6 +17,23 @@ import { getCollabToken, performLogin } from "./lib/auth-utils.js";
|
||||
import { diffDocs } from "./lib/diff.js";
|
||||
import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js";
|
||||
import vm from "node:vm";
|
||||
// Supported image types, kept as two lookup tables so both a local file
|
||||
// extension and a remote Content-Type can be mapped to the same canonical set.
|
||||
const EXT_TO_MIME = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".svg": "image/svg+xml",
|
||||
};
|
||||
const MIME_TO_EXT = {
|
||||
"image/png": ".png",
|
||||
"image/jpeg": ".jpg",
|
||||
"image/gif": ".gif",
|
||||
"image/webp": ".webp",
|
||||
"image/svg+xml": ".svg",
|
||||
};
|
||||
export class DocmostClient {
|
||||
client;
|
||||
token = null;
|
||||
@@ -1658,22 +1674,103 @@ export class DocmostClient {
|
||||
};
|
||||
}
|
||||
// --- Image upload / embedding ---
|
||||
/** Map a file extension to a supported image MIME type (throws otherwise). */
|
||||
imageMimeFromPath(filePath) {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
const map = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".svg": "image/svg+xml",
|
||||
};
|
||||
const mime = map[ext];
|
||||
if (!mime) {
|
||||
throw new Error(`unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`);
|
||||
/** Map a Content-Type string to a supported MIME type, or null if unsupported. */
|
||||
supportedImageMime(ct) {
|
||||
return MIME_TO_EXT[ct] ? ct : null;
|
||||
}
|
||||
return mime;
|
||||
/**
|
||||
* Download a remote image from a caller-supplied URL and resolve its bytes,
|
||||
* MIME and a filename.
|
||||
*
|
||||
* SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is
|
||||
* fetched BY THE SERVER, so it must be guarded before and after the request.
|
||||
* The guards mirror the local-file trust boundary in uploadImage:
|
||||
* - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc.,
|
||||
* so the caller cannot use this path to read local files or other schemes;
|
||||
* - a size cap enforced both via axios maxContentLength/maxBodyLength AND a
|
||||
* post-download buffer.length re-check (defends against a missing/lying
|
||||
* Content-Length), so a huge response cannot exhaust memory;
|
||||
* - a 30s timeout. The timeout matters because replaceImage holds the
|
||||
* per-page lock across this upload, so a hung download would wedge the
|
||||
* lock for that page.
|
||||
* We deliberately do NOT block private IP ranges: the MCP caller is already
|
||||
* trusted to read arbitrary host files via the filePath path, so the marginal
|
||||
* trust granted by fetching internal URLs is comparable, and blocking would
|
||||
* break legitimate internal-image use.
|
||||
*/
|
||||
async fetchRemoteImage(url, maxBytes) {
|
||||
// Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes
|
||||
// (file:, data:, ftp:, ...) before any network request is made.
|
||||
let parsed;
|
||||
try {
|
||||
parsed = new URL(url);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid image URL "${url}": ${e.message}`);
|
||||
}
|
||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
||||
throw new Error(`unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`);
|
||||
}
|
||||
let response;
|
||||
try {
|
||||
response = await axios.get(url, {
|
||||
responseType: "arraybuffer",
|
||||
timeout: 30000,
|
||||
maxContentLength: maxBytes,
|
||||
maxBodyLength: maxBytes,
|
||||
headers: { Accept: "image/*" },
|
||||
});
|
||||
}
|
||||
catch (error) {
|
||||
// Keep the thrown message free of the raw response body (it may echo
|
||||
// server internals); surface only status/statusText. The full body is
|
||||
// logged under DEBUG for diagnostics.
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (process.env.DEBUG) {
|
||||
console.error("Image download failed; response body:", JSON.stringify(error.response?.data));
|
||||
}
|
||||
throw new Error(`Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim());
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
// axios returns an ArrayBuffer for responseType: "arraybuffer".
|
||||
const buffer = Buffer.from(response.data);
|
||||
// Re-check the size: maxContentLength relies on Content-Length, which may be
|
||||
// absent or lie, so guard against the actual byte count too.
|
||||
if (buffer.length === 0) {
|
||||
throw new Error(`Empty image response from "${url}"`);
|
||||
}
|
||||
if (buffer.length > maxBytes) {
|
||||
throw new Error(`Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`);
|
||||
}
|
||||
// Resolve MIME: prefer the response Content-Type (strip any "; charset=..."
|
||||
// parameter, lowercase, trim) mapped through the supported set; if the
|
||||
// header is generic/missing/unsupported, fall back to the URL path
|
||||
// extension via the existing extension->MIME logic.
|
||||
const rawCt = response.headers?.["content-type"];
|
||||
let mime = null;
|
||||
if (typeof rawCt === "string" && rawCt.length > 0) {
|
||||
const ct = rawCt.split(";")[0].trim().toLowerCase();
|
||||
mime = this.supportedImageMime(ct);
|
||||
}
|
||||
if (!mime) {
|
||||
// Fall back to the URL path extension. Use the pathname so the query
|
||||
// string never contaminates the extension lookup.
|
||||
const ext = extname(parsed.pathname).toLowerCase();
|
||||
mime = EXT_TO_MIME[ext] ?? null;
|
||||
}
|
||||
if (!mime) {
|
||||
throw new Error(`cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`);
|
||||
}
|
||||
// Build a filename from the URL path basename (ignore the query string),
|
||||
// defaulting to "image" when empty, and ensure it ends with the canonical
|
||||
// extension for the resolved MIME (append it when missing/mismatched).
|
||||
const canonicalExt = MIME_TO_EXT[mime];
|
||||
let fileName = basename(parsed.pathname) || "image";
|
||||
if (extname(fileName).toLowerCase() !== canonicalExt) {
|
||||
fileName += canonicalExt;
|
||||
}
|
||||
return { buffer, mime, fileName };
|
||||
}
|
||||
/** Build a Docmost ProseMirror image node from an uploaded attachment. */
|
||||
buildImageNode(att, align, alt) {
|
||||
@@ -1699,45 +1796,20 @@ export class DocmostClient {
|
||||
return node;
|
||||
}
|
||||
/**
|
||||
* Upload a local image file as an attachment of a page and return the
|
||||
* attachment metadata plus a ready-to-insert ProseMirror image node.
|
||||
* Download a remote image from an http(s) URL and upload it as an attachment
|
||||
* of a page, returning the attachment metadata plus a ready-to-insert
|
||||
* ProseMirror image node. Local file paths are intentionally not supported:
|
||||
* the MCP caller is a remote AI with no access to this server's filesystem.
|
||||
*/
|
||||
async uploadImage(pageId, filePath) {
|
||||
async uploadImage(pageId, url) {
|
||||
await this.ensureAuthenticated();
|
||||
// HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at
|
||||
// the server host's local filesystem, so it must be validated BEFORE any
|
||||
// bytes are read. Without these guards a caller could (a) read an arbitrary
|
||||
// file via path traversal, (b) follow a symlink to a sensitive target, or
|
||||
// (c) exhaust memory by reading a huge file. Order matters: validate the
|
||||
// extension, then stat (regular-file + size cap), and only then read.
|
||||
// (a) Extension allowlist first — cheap, and rejects non-images up front.
|
||||
const mime = this.imageMimeFromPath(filePath);
|
||||
// (b) Stat the path: it must be a regular file (rejects directories, FIFOs,
|
||||
// devices, sockets) and stay under the size cap. statSync follows symlinks,
|
||||
// so a symlink is only accepted when its TARGET is a regular file within
|
||||
// the cap — the intended behaviour for a local image path.
|
||||
const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB
|
||||
let stat;
|
||||
try {
|
||||
stat = statSync(filePath);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`);
|
||||
}
|
||||
if (!stat.isFile()) {
|
||||
throw new Error(`Not a regular file: "${filePath}"`);
|
||||
}
|
||||
if (stat.size > MAX_IMAGE_BYTES) {
|
||||
throw new Error(`Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`);
|
||||
}
|
||||
// (c) Only now read the bytes.
|
||||
let fileBuffer;
|
||||
try {
|
||||
fileBuffer = readFileSync(filePath);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Cannot read image file at "${filePath}": ${e.message}`);
|
||||
}
|
||||
// Fetch + validate the remote image (scheme allowlist, size cap, timeout).
|
||||
// See fetchRemoteImage for the SSRF / resource trust boundary.
|
||||
const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES);
|
||||
const fileBuffer = fetched.buffer;
|
||||
const mime = fetched.mime;
|
||||
const fileName = fetched.fileName;
|
||||
// Build a FRESH FormData for every send attempt. A FormData body is a
|
||||
// single-use stream that is CONSUMED on the first send, so it cannot be
|
||||
// replayed by this.client's response interceptor (replaying a consumed
|
||||
@@ -1749,12 +1821,14 @@ export class DocmostClient {
|
||||
const form = new FormData();
|
||||
form.append("pageId", pageId);
|
||||
form.append("file", fileBuffer, {
|
||||
filename: basename(filePath),
|
||||
filename: fileName,
|
||||
contentType: mime,
|
||||
});
|
||||
return form;
|
||||
};
|
||||
const url = `${this.apiUrl}/files/upload`;
|
||||
// Local name distinct from the `url` parameter (the source image URL): this
|
||||
// is the /files/upload endpoint we POST the multipart body to.
|
||||
const uploadUrl = `${this.apiUrl}/files/upload`;
|
||||
let response;
|
||||
try {
|
||||
// Call buildForm() ONCE per attempt and reuse the instance for both
|
||||
@@ -1767,7 +1841,7 @@ export class DocmostClient {
|
||||
// ensureAuthenticated() above guarantees login() ran, so the default
|
||||
// header exists here. A 60s timeout keeps a hung upload from wedging the
|
||||
// per-page lock (replaceImage holds withPageLock across this call).
|
||||
response = await axios.post(url, form, {
|
||||
response = await axios.post(uploadUrl, form, {
|
||||
headers: {
|
||||
...form.getHeaders(),
|
||||
Authorization: this.client.defaults.headers.common["Authorization"],
|
||||
@@ -1782,7 +1856,7 @@ export class DocmostClient {
|
||||
(error.response?.status === 401 || error.response?.status === 403)) {
|
||||
await this.login();
|
||||
const form2 = buildForm();
|
||||
response = await axios.post(url, form2, {
|
||||
response = await axios.post(uploadUrl, form2, {
|
||||
headers: {
|
||||
...form2.getHeaders(),
|
||||
Authorization: this.client.defaults.headers.common["Authorization"],
|
||||
@@ -1809,10 +1883,9 @@ export class DocmostClient {
|
||||
throw new Error("Unexpected /files/upload response: " + JSON.stringify(response.data));
|
||||
}
|
||||
// Some Docmost versions omit fileSize from the upload response. Fall back
|
||||
// to the local stat size (the bytes we just uploaded) so callers never get
|
||||
// an undefined size.
|
||||
const localSize = stat.size;
|
||||
const resolvedSize = att.fileSize ?? localSize;
|
||||
// to the fetched byte length (the bytes we just uploaded) so callers never
|
||||
// get an undefined size.
|
||||
const resolvedSize = att.fileSize ?? fileBuffer.length;
|
||||
return {
|
||||
attachmentId: att.id,
|
||||
fileName: att.fileName,
|
||||
@@ -1822,14 +1895,15 @@ export class DocmostClient {
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Upload a local image and insert it into a page in one step.
|
||||
* Upload an image from a web (http/https) URL and insert it into a page in
|
||||
* one step.
|
||||
* By default the image is appended at the end. With replaceText, the first
|
||||
* top-level block whose text contains the string is replaced; with afterText,
|
||||
* the image is inserted right after the first matching block. All other
|
||||
* block ids are preserved (only one top-level block is added or swapped).
|
||||
*/
|
||||
async insertImage(pageId, filePath, opts = {}) {
|
||||
const up = await this.uploadImage(pageId, filePath);
|
||||
async insertImage(pageId, url, opts = {}) {
|
||||
const up = await this.uploadImage(pageId, url);
|
||||
// Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then
|
||||
// apply align/alt onto a shallow attrs copy.
|
||||
const node = { ...up.imageNode, attrs: { ...up.imageNode.attrs } };
|
||||
@@ -1918,9 +1992,10 @@ export class DocmostClient {
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Replace an existing image in a page with a new file. Uploads the new file as
|
||||
* a brand-new attachment, which yields a fresh clean URL that both renders
|
||||
* correctly and busts browser caches (the URL changed). Finds every image node
|
||||
* Replace an existing image in a page with a new image fetched from a web
|
||||
* (http/https) URL. Uploads the new file as a brand-new attachment, which
|
||||
* yields a fresh clean URL that both renders correctly and busts browser
|
||||
* caches (the URL changed). Finds every image node
|
||||
* whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested
|
||||
* in callouts/tables) and repoints its src/attachmentId/size, preserving
|
||||
* comments, alignment and alt. Operates on the live collab document so comments
|
||||
@@ -1934,7 +2009,7 @@ export class DocmostClient {
|
||||
* In-place byte overwrite is deliberately NOT used because some Docmost
|
||||
* versions corrupt the attachment (HTTP 500) when its bytes are overwritten.
|
||||
*/
|
||||
async replaceImage(pageId, oldAttachmentId, filePath, opts = {}) {
|
||||
async replaceImage(pageId, oldAttachmentId, url, opts = {}) {
|
||||
const collabToken = await this.getCollabTokenWithReauth();
|
||||
// Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write).
|
||||
// Previously the scan and the write were two separate mutatePageContent
|
||||
@@ -1981,7 +2056,7 @@ export class DocmostClient {
|
||||
// id, new clean URL) and repoint every matching node in a second pass.
|
||||
// Still inside the SAME lock, so no other op can have changed the page
|
||||
// since the scan.
|
||||
const up = await this.uploadImage(pageId, filePath);
|
||||
const up = await this.uploadImage(pageId, url);
|
||||
let replaced = 0;
|
||||
// Swap the source of one image node, preserving align/alt/title/geometry.
|
||||
const repoint = (node) => {
|
||||
|
||||
@@ -21,7 +21,7 @@ const VERSION = packageJson.version;
|
||||
// --- Modern McpServer Implementation ---
|
||||
// Editing guide surfaced to MCP clients in the initialize result so they can
|
||||
// pick the right tool by intent and avoid resending whole documents.
|
||||
const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
|
||||
const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
|
||||
"Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " +
|
||||
"Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " +
|
||||
"Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
|
||||
@@ -449,7 +449,8 @@ export function createDocmostMcpServer(config) {
|
||||
});
|
||||
// Tool: insert_image
|
||||
server.registerTool("insert_image", {
|
||||
description: "Upload a local image and insert it into a page in one step. By default " +
|
||||
description: "Download an image from a web (http/https) URL and insert it into " +
|
||||
"a page in one step. By default " +
|
||||
"appends the image at the end of the page. With replaceText, replaces the " +
|
||||
"first top-level block whose text contains that string (handy for " +
|
||||
'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
|
||||
@@ -457,10 +458,10 @@ export function createDocmostMcpServer(config) {
|
||||
"that string. Preserves all other block ids.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
filePath: z
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("Absolute local path to the image file"),
|
||||
.describe("http(s) URL of the image to download and upload"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
replaceText: z
|
||||
@@ -472,8 +473,8 @@ export function createDocmostMcpServer(config) {
|
||||
.optional()
|
||||
.describe("Insert the image right after the first top-level block whose text contains this string"),
|
||||
},
|
||||
}, async ({ pageId, filePath, align, alt, replaceText, afterText }) => {
|
||||
const result = await docmostClient.insertImage(pageId, filePath, {
|
||||
}, async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
|
||||
const result = await docmostClient.insertImage(pageId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
replaceText,
|
||||
@@ -483,7 +484,8 @@ export function createDocmostMcpServer(config) {
|
||||
});
|
||||
// Tool: replace_image
|
||||
server.registerTool("replace_image", {
|
||||
description: "Replace an existing image on a page: uploads the new file as a NEW " +
|
||||
description: "Replace an existing image on a page with a new image fetched from a web " +
|
||||
"(http/https) URL: uploads the new file as a NEW " +
|
||||
"attachment (fresh clean URL that renders and busts browser caches), then " +
|
||||
"repoints every image node referencing the old attachmentId (recursively, " +
|
||||
"incl. callouts/tables) via the live document, preserving comments, " +
|
||||
@@ -497,15 +499,15 @@ export function createDocmostMcpServer(config) {
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("attachmentId of the image currently in the page to replace"),
|
||||
filePath: z
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("Absolute local path to the new image file"),
|
||||
.describe("http(s) URL of the new image to download"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
},
|
||||
}, async ({ pageId, attachmentId, filePath, align, alt }) => {
|
||||
const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, {
|
||||
}, async ({ pageId, attachmentId, imageUrl, align, alt }) => {
|
||||
const result = await docmostClient.replaceImage(pageId, attachmentId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
});
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import FormData from "form-data";
|
||||
import axios, { AxiosInstance } from "axios";
|
||||
import { readFileSync, statSync } from "fs";
|
||||
import { basename, extname } from "path";
|
||||
import {
|
||||
filterWorkspace,
|
||||
@@ -59,6 +58,24 @@ import {
|
||||
} from "./lib/transforms.js";
|
||||
import vm from "node:vm";
|
||||
|
||||
// Supported image types, kept as two lookup tables so both a local file
|
||||
// extension and a remote Content-Type can be mapped to the same canonical set.
|
||||
const EXT_TO_MIME: Record<string, string> = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".svg": "image/svg+xml",
|
||||
};
|
||||
const MIME_TO_EXT: Record<string, string> = {
|
||||
"image/png": ".png",
|
||||
"image/jpeg": ".jpg",
|
||||
"image/gif": ".gif",
|
||||
"image/webp": ".webp",
|
||||
"image/svg+xml": ".svg",
|
||||
};
|
||||
|
||||
/**
|
||||
* Configuration for a DocmostClient / MCP server instance. A discriminated
|
||||
* union: either service-account credentials (email/password — the client calls
|
||||
@@ -2024,24 +2041,121 @@ export class DocmostClient {
|
||||
|
||||
// --- Image upload / embedding ---
|
||||
|
||||
/** Map a file extension to a supported image MIME type (throws otherwise). */
|
||||
private imageMimeFromPath(filePath: string): string {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
const map: Record<string, string> = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".svg": "image/svg+xml",
|
||||
};
|
||||
const mime = map[ext];
|
||||
if (!mime) {
|
||||
/** Map a Content-Type string to a supported MIME type, or null if unsupported. */
|
||||
private supportedImageMime(ct: string): string | null {
|
||||
return MIME_TO_EXT[ct] ? ct : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a remote image from a caller-supplied URL and resolve its bytes,
|
||||
* MIME and a filename.
|
||||
*
|
||||
* SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is
|
||||
* fetched BY THE SERVER, so it must be guarded before and after the request.
|
||||
* The guards mirror the local-file trust boundary in uploadImage:
|
||||
* - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc.,
|
||||
* so the caller cannot use this path to read local files or other schemes;
|
||||
* - a size cap enforced both via axios maxContentLength/maxBodyLength AND a
|
||||
* post-download buffer.length re-check (defends against a missing/lying
|
||||
* Content-Length), so a huge response cannot exhaust memory;
|
||||
* - a 30s timeout. The timeout matters because replaceImage holds the
|
||||
* per-page lock across this upload, so a hung download would wedge the
|
||||
* lock for that page.
|
||||
* We deliberately do NOT block private IP ranges: the MCP caller is already
|
||||
* trusted to read arbitrary host files via the filePath path, so the marginal
|
||||
* trust granted by fetching internal URLs is comparable, and blocking would
|
||||
* break legitimate internal-image use.
|
||||
*/
|
||||
private async fetchRemoteImage(
|
||||
url: string,
|
||||
maxBytes: number,
|
||||
): Promise<{ buffer: Buffer; mime: string; fileName: string }> {
|
||||
// Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes
|
||||
// (file:, data:, ftp:, ...) before any network request is made.
|
||||
let parsed: URL;
|
||||
try {
|
||||
parsed = new URL(url);
|
||||
} catch (e: any) {
|
||||
throw new Error(`Invalid image URL "${url}": ${e.message}`);
|
||||
}
|
||||
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
||||
throw new Error(
|
||||
`unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`,
|
||||
`unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`,
|
||||
);
|
||||
}
|
||||
return mime;
|
||||
|
||||
let response;
|
||||
try {
|
||||
response = await axios.get(url, {
|
||||
responseType: "arraybuffer",
|
||||
timeout: 30000,
|
||||
maxContentLength: maxBytes,
|
||||
maxBodyLength: maxBytes,
|
||||
headers: { Accept: "image/*" },
|
||||
});
|
||||
} catch (error) {
|
||||
// Keep the thrown message free of the raw response body (it may echo
|
||||
// server internals); surface only status/statusText. The full body is
|
||||
// logged under DEBUG for diagnostics.
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (process.env.DEBUG) {
|
||||
console.error(
|
||||
"Image download failed; response body:",
|
||||
JSON.stringify(error.response?.data),
|
||||
);
|
||||
}
|
||||
throw new Error(
|
||||
`Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim(),
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
// axios returns an ArrayBuffer for responseType: "arraybuffer".
|
||||
const buffer = Buffer.from(response.data);
|
||||
// Re-check the size: maxContentLength relies on Content-Length, which may be
|
||||
// absent or lie, so guard against the actual byte count too.
|
||||
if (buffer.length === 0) {
|
||||
throw new Error(`Empty image response from "${url}"`);
|
||||
}
|
||||
if (buffer.length > maxBytes) {
|
||||
throw new Error(
|
||||
`Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`,
|
||||
);
|
||||
}
|
||||
|
||||
// Resolve MIME: prefer the response Content-Type (strip any "; charset=..."
|
||||
// parameter, lowercase, trim) mapped through the supported set; if the
|
||||
// header is generic/missing/unsupported, fall back to the URL path
|
||||
// extension via the existing extension->MIME logic.
|
||||
const rawCt = response.headers?.["content-type"];
|
||||
let mime: string | null = null;
|
||||
if (typeof rawCt === "string" && rawCt.length > 0) {
|
||||
const ct = rawCt.split(";")[0].trim().toLowerCase();
|
||||
mime = this.supportedImageMime(ct);
|
||||
}
|
||||
if (!mime) {
|
||||
// Fall back to the URL path extension. Use the pathname so the query
|
||||
// string never contaminates the extension lookup.
|
||||
const ext = extname(parsed.pathname).toLowerCase();
|
||||
mime = EXT_TO_MIME[ext] ?? null;
|
||||
}
|
||||
if (!mime) {
|
||||
throw new Error(
|
||||
`cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`,
|
||||
);
|
||||
}
|
||||
|
||||
// Build a filename from the URL path basename (ignore the query string),
|
||||
// defaulting to "image" when empty, and ensure it ends with the canonical
|
||||
// extension for the resolved MIME (append it when missing/mismatched).
|
||||
const canonicalExt = MIME_TO_EXT[mime];
|
||||
let fileName = basename(parsed.pathname) || "image";
|
||||
if (extname(fileName).toLowerCase() !== canonicalExt) {
|
||||
fileName += canonicalExt;
|
||||
}
|
||||
|
||||
return { buffer, mime, fileName };
|
||||
}
|
||||
|
||||
/** Build a Docmost ProseMirror image node from an uploaded attachment. */
|
||||
@@ -2072,49 +2186,22 @@ export class DocmostClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload a local image file as an attachment of a page and return the
|
||||
* attachment metadata plus a ready-to-insert ProseMirror image node.
|
||||
* Download a remote image from an http(s) URL and upload it as an attachment
|
||||
* of a page, returning the attachment metadata plus a ready-to-insert
|
||||
* ProseMirror image node. Local file paths are intentionally not supported:
|
||||
* the MCP caller is a remote AI with no access to this server's filesystem.
|
||||
*/
|
||||
async uploadImage(pageId: string, filePath: string) {
|
||||
async uploadImage(pageId: string, url: string) {
|
||||
await this.ensureAuthenticated();
|
||||
|
||||
// HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at
|
||||
// the server host's local filesystem, so it must be validated BEFORE any
|
||||
// bytes are read. Without these guards a caller could (a) read an arbitrary
|
||||
// file via path traversal, (b) follow a symlink to a sensitive target, or
|
||||
// (c) exhaust memory by reading a huge file. Order matters: validate the
|
||||
// extension, then stat (regular-file + size cap), and only then read.
|
||||
|
||||
// (a) Extension allowlist first — cheap, and rejects non-images up front.
|
||||
const mime = this.imageMimeFromPath(filePath);
|
||||
|
||||
// (b) Stat the path: it must be a regular file (rejects directories, FIFOs,
|
||||
// devices, sockets) and stay under the size cap. statSync follows symlinks,
|
||||
// so a symlink is only accepted when its TARGET is a regular file within
|
||||
// the cap — the intended behaviour for a local image path.
|
||||
const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB
|
||||
let stat;
|
||||
try {
|
||||
stat = statSync(filePath);
|
||||
} catch (e: any) {
|
||||
throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`);
|
||||
}
|
||||
if (!stat.isFile()) {
|
||||
throw new Error(`Not a regular file: "${filePath}"`);
|
||||
}
|
||||
if (stat.size > MAX_IMAGE_BYTES) {
|
||||
throw new Error(
|
||||
`Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`,
|
||||
);
|
||||
}
|
||||
|
||||
// (c) Only now read the bytes.
|
||||
let fileBuffer: Buffer;
|
||||
try {
|
||||
fileBuffer = readFileSync(filePath);
|
||||
} catch (e: any) {
|
||||
throw new Error(`Cannot read image file at "${filePath}": ${e.message}`);
|
||||
}
|
||||
// Fetch + validate the remote image (scheme allowlist, size cap, timeout).
|
||||
// See fetchRemoteImage for the SSRF / resource trust boundary.
|
||||
const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES);
|
||||
const fileBuffer = fetched.buffer;
|
||||
const mime = fetched.mime;
|
||||
const fileName = fetched.fileName;
|
||||
|
||||
// Build a FRESH FormData for every send attempt. A FormData body is a
|
||||
// single-use stream that is CONSUMED on the first send, so it cannot be
|
||||
@@ -2127,13 +2214,15 @@ export class DocmostClient {
|
||||
const form = new FormData();
|
||||
form.append("pageId", pageId);
|
||||
form.append("file", fileBuffer, {
|
||||
filename: basename(filePath),
|
||||
filename: fileName,
|
||||
contentType: mime,
|
||||
});
|
||||
return form;
|
||||
};
|
||||
|
||||
const url = `${this.apiUrl}/files/upload`;
|
||||
// Local name distinct from the `url` parameter (the source image URL): this
|
||||
// is the /files/upload endpoint we POST the multipart body to.
|
||||
const uploadUrl = `${this.apiUrl}/files/upload`;
|
||||
let response;
|
||||
try {
|
||||
// Call buildForm() ONCE per attempt and reuse the instance for both
|
||||
@@ -2146,7 +2235,7 @@ export class DocmostClient {
|
||||
// ensureAuthenticated() above guarantees login() ran, so the default
|
||||
// header exists here. A 60s timeout keeps a hung upload from wedging the
|
||||
// per-page lock (replaceImage holds withPageLock across this call).
|
||||
response = await axios.post(url, form, {
|
||||
response = await axios.post(uploadUrl, form, {
|
||||
headers: {
|
||||
...form.getHeaders(),
|
||||
Authorization: this.client.defaults.headers.common["Authorization"],
|
||||
@@ -2162,7 +2251,7 @@ export class DocmostClient {
|
||||
) {
|
||||
await this.login();
|
||||
const form2 = buildForm();
|
||||
response = await axios.post(url, form2, {
|
||||
response = await axios.post(uploadUrl, form2, {
|
||||
headers: {
|
||||
...form2.getHeaders(),
|
||||
Authorization:
|
||||
@@ -2196,10 +2285,9 @@ export class DocmostClient {
|
||||
}
|
||||
|
||||
// Some Docmost versions omit fileSize from the upload response. Fall back
|
||||
// to the local stat size (the bytes we just uploaded) so callers never get
|
||||
// an undefined size.
|
||||
const localSize = stat.size;
|
||||
const resolvedSize = att.fileSize ?? localSize;
|
||||
// to the fetched byte length (the bytes we just uploaded) so callers never
|
||||
// get an undefined size.
|
||||
const resolvedSize = att.fileSize ?? fileBuffer.length;
|
||||
|
||||
return {
|
||||
attachmentId: att.id,
|
||||
@@ -2211,7 +2299,8 @@ export class DocmostClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload a local image and insert it into a page in one step.
|
||||
* Upload an image from a web (http/https) URL and insert it into a page in
|
||||
* one step.
|
||||
* By default the image is appended at the end. With replaceText, the first
|
||||
* top-level block whose text contains the string is replaced; with afterText,
|
||||
* the image is inserted right after the first matching block. All other
|
||||
@@ -2219,7 +2308,7 @@ export class DocmostClient {
|
||||
*/
|
||||
async insertImage(
|
||||
pageId: string,
|
||||
filePath: string,
|
||||
url: string,
|
||||
opts: {
|
||||
align?: "left" | "center" | "right";
|
||||
alt?: string;
|
||||
@@ -2227,7 +2316,7 @@ export class DocmostClient {
|
||||
afterText?: string;
|
||||
} = {},
|
||||
) {
|
||||
const up = await this.uploadImage(pageId, filePath);
|
||||
const up = await this.uploadImage(pageId, url);
|
||||
// Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then
|
||||
// apply align/alt onto a shallow attrs copy.
|
||||
const node: any = { ...up.imageNode, attrs: { ...up.imageNode.attrs } };
|
||||
@@ -2331,9 +2420,10 @@ export class DocmostClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace an existing image in a page with a new file. Uploads the new file as
|
||||
* a brand-new attachment, which yields a fresh clean URL that both renders
|
||||
* correctly and busts browser caches (the URL changed). Finds every image node
|
||||
* Replace an existing image in a page with a new image fetched from a web
|
||||
* (http/https) URL. Uploads the new file as a brand-new attachment, which
|
||||
* yields a fresh clean URL that both renders correctly and busts browser
|
||||
* caches (the URL changed). Finds every image node
|
||||
* whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested
|
||||
* in callouts/tables) and repoints its src/attachmentId/size, preserving
|
||||
* comments, alignment and alt. Operates on the live collab document so comments
|
||||
@@ -2350,7 +2440,7 @@ export class DocmostClient {
|
||||
async replaceImage(
|
||||
pageId: string,
|
||||
oldAttachmentId: string,
|
||||
filePath: string,
|
||||
url: string,
|
||||
opts: { align?: "left" | "center" | "right"; alt?: string } = {},
|
||||
) {
|
||||
const collabToken = await this.getCollabTokenWithReauth();
|
||||
@@ -2405,7 +2495,7 @@ export class DocmostClient {
|
||||
// id, new clean URL) and repoint every matching node in a second pass.
|
||||
// Still inside the SAME lock, so no other op can have changed the page
|
||||
// since the scan.
|
||||
const up = await this.uploadImage(pageId, filePath);
|
||||
const up = await this.uploadImage(pageId, url);
|
||||
|
||||
let replaced = 0;
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ const VERSION = packageJson.version;
|
||||
// Editing guide surfaced to MCP clients in the initialize result so they can
|
||||
// pick the right tool by intent and avoid resending whole documents.
|
||||
const SERVER_INSTRUCTIONS =
|
||||
"Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
|
||||
"Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
|
||||
"Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " +
|
||||
"Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " +
|
||||
"Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
|
||||
@@ -612,7 +612,8 @@ server.registerTool(
|
||||
"insert_image",
|
||||
{
|
||||
description:
|
||||
"Upload a local image and insert it into a page in one step. By default " +
|
||||
"Download an image from a web (http/https) URL and insert it into " +
|
||||
"a page in one step. By default " +
|
||||
"appends the image at the end of the page. With replaceText, replaces the " +
|
||||
"first top-level block whose text contains that string (handy for " +
|
||||
'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
|
||||
@@ -620,10 +621,10 @@ server.registerTool(
|
||||
"that string. Preserves all other block ids.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
filePath: z
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("Absolute local path to the image file"),
|
||||
.describe("http(s) URL of the image to download and upload"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
replaceText: z
|
||||
@@ -640,8 +641,8 @@ server.registerTool(
|
||||
),
|
||||
},
|
||||
},
|
||||
async ({ pageId, filePath, align, alt, replaceText, afterText }) => {
|
||||
const result = await docmostClient.insertImage(pageId, filePath, {
|
||||
async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
|
||||
const result = await docmostClient.insertImage(pageId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
replaceText,
|
||||
@@ -656,7 +657,8 @@ server.registerTool(
|
||||
"replace_image",
|
||||
{
|
||||
description:
|
||||
"Replace an existing image on a page: uploads the new file as a NEW " +
|
||||
"Replace an existing image on a page with a new image fetched from a web " +
|
||||
"(http/https) URL: uploads the new file as a NEW " +
|
||||
"attachment (fresh clean URL that renders and busts browser caches), then " +
|
||||
"repoints every image node referencing the old attachmentId (recursively, " +
|
||||
"incl. callouts/tables) via the live document, preserving comments, " +
|
||||
@@ -670,19 +672,24 @@ server.registerTool(
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("attachmentId of the image currently in the page to replace"),
|
||||
filePath: z
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("Absolute local path to the new image file"),
|
||||
.describe("http(s) URL of the new image to download"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
},
|
||||
},
|
||||
async ({ pageId, attachmentId, filePath, align, alt }) => {
|
||||
const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, {
|
||||
async ({ pageId, attachmentId, imageUrl, align, alt }) => {
|
||||
const result = await docmostClient.replaceImage(
|
||||
pageId,
|
||||
attachmentId,
|
||||
imageUrl,
|
||||
{
|
||||
align,
|
||||
alt,
|
||||
});
|
||||
},
|
||||
);
|
||||
return jsonContent(result);
|
||||
},
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user