Security: - stash_page: reject path-traversal / percent-encoded srcs before the authed loopback fetch (resolveInternalFilePath), closing an SSRF/exfiltration hole where a crafted node.attrs.src could read an arbitrary internal GET endpoint into the anonymous sandbox. Stability: - stash_page: revert + recount mirrors FIFO-evicted by a later put in the same stash (no dangling sandbox refs, honest images.mirrored/failed); free image blobs if the final document put throws. - Reject/clamp non-positive SANDBOX_TTL_MS to the 1h default (warn once). - Log mirror failures unconditionally (console.warn, no blob bodies). Cleanup / architecture: - Remove dead expiresAt from SandboxPutResult. - Centralize the /api/sb route in SANDBOX_ROUTE_SEGMENT/SANDBOX_API_PATH and move URL composition into SandboxStore.putAndLink; drop the duplicated sink closures and the now-unused EnvironmentService injection from McpService and AiChatToolsService. - Un-export isInternalFileUrl; document the process-local (instance-bound) sandbox limitation in the tool description and .env.example. Docs/tests: - README/README.ru: 38 -> 39 tools + stash_page entry. - Add traversal/normalize/recursion unit tests, stash self-eviction + doc-put-throw + empty/octet-stream mock tests, controller If-None-Match (wildcard/weak/list) + Cache-Control tests, and SANDBOX_TTL_MS validation tests. Regenerate packages/mcp/build. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2777 lines
132 KiB
JavaScript
2777 lines
132 KiB
JavaScript
import FormData from "form-data";
|
|
import axios from "axios";
|
|
import { basename, extname } from "path";
|
|
import { filterWorkspace, filterSpace, filterPage, filterComment, filterSearchResult, } from "./lib/filters.js";
|
|
import { HocuspocusProvider } from "@hocuspocus/provider";
|
|
import { TiptapTransformer } from "@hocuspocus/transformer";
|
|
import * as Y from "yjs";
|
|
import WebSocket from "ws";
|
|
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
|
|
import { collectInternalFileNodes, normalizeFileUrl, resolveInternalFilePath, } from "./lib/internal-file-urls.js";
|
|
import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, markdownToProseMirrorCanonical, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, applyDocToFragment, } from "./lib/collaboration.js";
|
|
import { footnoteWarningsField } from "./lib/footnote-analyze.js";
|
|
import { buildPageTree } from "./lib/tree.js";
|
|
import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js";
|
|
import { replaceNodeById, deleteNodeById, assertUnambiguousMatch, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js";
|
|
import { withPageLock } from "./lib/page-lock.js";
|
|
import { applyTextEdits, } from "./lib/json-edit.js";
|
|
import { getCollabToken, performLogin } from "./lib/auth-utils.js";
|
|
import { diffDocs, summarizeChange } from "./lib/diff.js";
|
|
import { applyAnchorInDoc, canAnchorInDoc } from "./lib/comment-anchor.js";
|
|
import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, canonicalizeFootnotes, insertInlineFootnote, } from "./lib/transforms.js";
|
|
import vm from "node:vm";
|
|
// Supported image types, kept as two lookup tables so both a local file
|
|
// extension and a remote Content-Type can be mapped to the same canonical set.
|
|
const EXT_TO_MIME = {
|
|
".png": "image/png",
|
|
".jpg": "image/jpeg",
|
|
".jpeg": "image/jpeg",
|
|
".gif": "image/gif",
|
|
".webp": "image/webp",
|
|
".svg": "image/svg+xml",
|
|
};
|
|
const MIME_TO_EXT = {
|
|
"image/png": ".png",
|
|
"image/jpeg": ".jpg",
|
|
"image/gif": ".gif",
|
|
"image/webp": ".webp",
|
|
"image/svg+xml": ".svg",
|
|
};
|
|
export class DocmostClient {
|
|
client;
|
|
token = null;
|
|
apiUrl;
|
|
// email/password are only set on the service-account (credentials) variant;
|
|
// null on the getToken variant (where there are no credentials to log in with).
|
|
email = null;
|
|
password = null;
|
|
// Per-user token provider. When set, login() calls it to obtain a BARE access
|
|
// JWT instead of performLogin, and the 401/403 re-auth path re-calls it.
|
|
getTokenFn = null;
|
|
// Optional collab-token provider. When set, getCollabTokenWithReauth() returns
|
|
// its token instead of calling POST /auth/collab-token; on a 401/403 it is
|
|
// re-invoked once. Used by the internal agent to carry signed provenance.
|
|
getCollabTokenFn = null;
|
|
// Optional blob-sandbox sink for the stash tool. Null when not configured.
|
|
sandboxPut = null;
|
|
// Optional probes paired with the sink. `has` lets stashPage detect a blob
|
|
// FIFO-evicted by a LATER put in the same stash; `evict` lets it free this
|
|
// op's image blobs if the final doc put throws. Null when the sink omits them.
|
|
sandboxHas = null;
|
|
sandboxEvict = null;
|
|
// In-flight login dedup: when the token expires, the 401 interceptor,
|
|
// ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries
|
|
// can all call login() at once. Memoizing a single promise collapses that
|
|
// thundering herd into ONE /auth/login request that everyone awaits.
|
|
loginPromise = null;
|
|
constructor(configOrBaseURL, email, password) {
|
|
// Normalize the legacy positional form into the object union.
|
|
const config = typeof configOrBaseURL === "string"
|
|
? { apiUrl: configOrBaseURL, email: email, password: password }
|
|
: configOrBaseURL;
|
|
this.apiUrl = config.apiUrl;
|
|
if ("getToken" in config) {
|
|
// Token variant: carry the user's JWT via getToken; no credentials, so
|
|
// login() must never call performLogin (there is nothing to log in with).
|
|
this.getTokenFn = config.getToken;
|
|
}
|
|
else {
|
|
// Service-account variant: behaves exactly as before (performLogin).
|
|
this.email = config.email;
|
|
this.password = config.password;
|
|
}
|
|
// Optional, available to both variants. When present, content mutations get
|
|
// their collab token from here instead of POST /auth/collab-token.
|
|
if (config.getCollabToken) {
|
|
this.getCollabTokenFn = config.getCollabToken;
|
|
}
|
|
if (config.sandbox) {
|
|
this.sandboxPut = config.sandbox.put;
|
|
this.sandboxHas = config.sandbox.has ?? null;
|
|
this.sandboxEvict = config.sandbox.evict ?? null;
|
|
}
|
|
this.client = axios.create({
|
|
baseURL: this.apiUrl,
|
|
// Default request timeout so a hung connection cannot wedge a per-page
|
|
// lock or block the server indefinitely. Multipart uploads override this
|
|
// with a longer per-request timeout.
|
|
timeout: 30000,
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
},
|
|
});
|
|
// Re-authenticate transparently on a 401/403 once: the JWT authToken can
|
|
// expire while the server is long-running, after which every cached-token
|
|
// request would otherwise fail until a manual restart. On such a response,
|
|
// clear the stale token, perform a fresh login, and replay the original
|
|
// request exactly once (guarded by config._retry to avoid infinite loops;
|
|
// the login request itself is never retried).
|
|
this.client.interceptors.response.use((response) => response, async (error) => {
|
|
const config = error.config;
|
|
const status = error.response?.status;
|
|
const isAuthError = status === 401 || status === 403;
|
|
const isLoginRequest = typeof config?.url === "string" && config.url.includes("/auth/login");
|
|
if (config && isAuthError && !config._retry && !isLoginRequest) {
|
|
config._retry = true;
|
|
// Drop the stale token + Authorization header before re-login.
|
|
this.token = null;
|
|
delete this.client.defaults.headers.common["Authorization"];
|
|
try {
|
|
await this.login();
|
|
}
|
|
catch (loginError) {
|
|
// Re-login failed: surface the original error to the caller.
|
|
return Promise.reject(error);
|
|
}
|
|
// Re-issue the original request with the freshly minted Bearer token.
|
|
// Read it from the default header that login() just set, not from
|
|
// this.token, to avoid a theoretical "Bearer null" if this.token was
|
|
// cleared between login() resolving and this point.
|
|
config.headers = config.headers || {};
|
|
config.headers["Authorization"] =
|
|
this.client.defaults.headers.common["Authorization"];
|
|
return this.client.request(config);
|
|
}
|
|
return Promise.reject(error);
|
|
});
|
|
}
|
|
/** Application base URL (API URL without the /api suffix). */
|
|
get appUrl() {
|
|
return this.apiUrl.replace(/\/api\/?$/, "");
|
|
}
|
|
async login() {
|
|
// Reuse an in-flight login if one is already running so concurrent callers
|
|
// share a single token fetch instead of each issuing their own.
|
|
if (!this.loginPromise) {
|
|
// Token variant: re-fetch a BARE JWT via getToken() (there are no
|
|
// credentials to log in with — on a 401/403 the interceptor below calls
|
|
// login() again, which re-invokes getToken()). Credentials variant:
|
|
// performLogin against /auth/login exactly as before.
|
|
const fetchToken = this.getTokenFn
|
|
? this.getTokenFn()
|
|
: performLogin(this.apiUrl, this.email, this.password);
|
|
this.loginPromise = fetchToken
|
|
.then((token) => {
|
|
// Guard against an empty/invalid token (e.g. a getToken provider that
|
|
// resolves to "" or null): without this an empty token would set a
|
|
// literal "Authorization: Bearer null"/"Bearer " header and every
|
|
// request would 401 with a confusing error. Fail loudly instead.
|
|
if (typeof token !== "string" || token.length === 0) {
|
|
throw new Error("getToken returned an empty token");
|
|
}
|
|
this.token = token;
|
|
this.client.defaults.headers.common["Authorization"] =
|
|
`Bearer ${token}`;
|
|
})
|
|
.finally(() => {
|
|
this.loginPromise = null;
|
|
});
|
|
}
|
|
return this.loginPromise;
|
|
}
|
|
async ensureAuthenticated() {
|
|
if (!this.token) {
|
|
await this.login();
|
|
}
|
|
}
|
|
/**
|
|
* Fetch a collaboration token, transparently re-authenticating once on a
|
|
* 401/403. getCollabToken() uses bare axios internally, so it is NOT covered
|
|
* by this.client's response interceptor; this helper replicates that
|
|
* behaviour for collab-token requests: ensure a token, try once, and on an
|
|
* expired-token auth error perform a fresh login and retry exactly once.
|
|
*/
|
|
async getCollabTokenWithReauth() {
|
|
// Collab-token PROVIDER path: when a getCollabToken provider was supplied
|
|
// (the internal agent's provenance collab token), use it instead of the
|
|
// REST /auth/collab-token endpoint. Re-invoke it once on a 401/403 (e.g. the
|
|
// signed token expired between content mutations in a long agent turn).
|
|
if (this.getCollabTokenFn) {
|
|
try {
|
|
const token = await this.getCollabTokenFn();
|
|
if (typeof token !== "string" || token.length === 0) {
|
|
throw new Error("getCollabToken returned an empty token");
|
|
}
|
|
return token;
|
|
}
|
|
catch (e) {
|
|
const axiosStatus = axios.isAxiosError(e)
|
|
? e.response?.status
|
|
: undefined;
|
|
const attachedStatus = e?.status;
|
|
const isAuthError = axiosStatus === 401 ||
|
|
axiosStatus === 403 ||
|
|
attachedStatus === 401 ||
|
|
attachedStatus === 403;
|
|
if (isAuthError) {
|
|
const token = await this.getCollabTokenFn();
|
|
if (typeof token !== "string" || token.length === 0) {
|
|
throw new Error("getCollabToken returned an empty token");
|
|
}
|
|
return token;
|
|
}
|
|
throw e;
|
|
}
|
|
}
|
|
await this.ensureAuthenticated();
|
|
try {
|
|
return await getCollabToken(this.apiUrl, this.token);
|
|
}
|
|
catch (e) {
|
|
// getCollabToken wraps the AxiosError in a plain Error but attaches the
|
|
// HTTP status as `.status`, so detect an auth failure via either the raw
|
|
// AxiosError shape OR the attached status.
|
|
const axiosStatus = axios.isAxiosError(e)
|
|
? e.response?.status
|
|
: undefined;
|
|
const attachedStatus = e?.status;
|
|
const isAuthError = axiosStatus === 401 ||
|
|
axiosStatus === 403 ||
|
|
attachedStatus === 401 ||
|
|
attachedStatus === 403;
|
|
if (isAuthError) {
|
|
await this.login();
|
|
return await getCollabToken(this.apiUrl, this.token);
|
|
}
|
|
throw e;
|
|
}
|
|
}
|
|
/**
|
|
* Connect to the collaboration websocket, read the live doc, apply
|
|
* `transform`, write the result, and wait for the server to persist it —
|
|
* WITHOUT acquiring the per-page lock.
|
|
*
|
|
* This mirrors collaboration.mutatePageContent EXCEPT that it does not call
|
|
* withPageLock. It exists solely so replaceImage can hold ONE withPageLock
|
|
* across its scan -> upload -> write sequence: the per-page mutex is NOT
|
|
* reentrant, so calling the normal (self-locking) mutatePageContent inside an
|
|
* outer withPageLock for the same pageId would deadlock. The caller MUST hold
|
|
* the page lock for the whole operation; this helper assumes that invariant.
|
|
*
|
|
* `transform` receives the live ProseMirror doc and returns the NEW full doc
|
|
* to write, or `null` to abort with no write. Errors thrown by `transform`
|
|
* propagate to the caller.
|
|
*
|
|
* Resolves a `MutationResult { doc, verify }` mirroring mutatePageContent, so
|
|
* every content mutator (including replaceImage) can return a verifiable
|
|
* change report. The report is computed AFTER the atomic read->write and
|
|
* never throws.
|
|
*/
|
|
mutateLiveContentUnlocked(pageId, collabToken, transform) {
|
|
const CONNECT_TIMEOUT_MS = 25000;
|
|
const PERSIST_TIMEOUT_MS = 20000;
|
|
const ydoc = new Y.Doc();
|
|
const wsUrl = buildCollabWsUrl(this.apiUrl);
|
|
return new Promise((resolve, reject) => {
|
|
let provider;
|
|
let applied = false; // onSynced may fire again on reconnect — apply once.
|
|
let settled = false;
|
|
let connectionLost = false;
|
|
let connectTimer;
|
|
let persistTimer;
|
|
let unsyncedHandler;
|
|
// The verifiable result resolved on every success/abort path. Set on abort
|
|
// (no-op report) and after a real write (computed change report).
|
|
let mutationResult;
|
|
const cleanup = () => {
|
|
if (connectTimer)
|
|
clearTimeout(connectTimer);
|
|
if (persistTimer)
|
|
clearTimeout(persistTimer);
|
|
if (provider) {
|
|
if (unsyncedHandler) {
|
|
try {
|
|
provider.off("unsyncedChanges", unsyncedHandler);
|
|
}
|
|
catch (err) { }
|
|
}
|
|
try {
|
|
provider.destroy();
|
|
}
|
|
catch (err) { }
|
|
}
|
|
};
|
|
const finish = (err, value) => {
|
|
if (settled)
|
|
return;
|
|
settled = true;
|
|
cleanup();
|
|
if (err)
|
|
reject(err);
|
|
else
|
|
resolve(value);
|
|
};
|
|
connectTimer = setTimeout(() => {
|
|
finish(new Error("Connection timeout to collaboration server"));
|
|
}, CONNECT_TIMEOUT_MS);
|
|
const waitForPersistence = () => {
|
|
if (settled)
|
|
return;
|
|
if (!provider) {
|
|
finish(new Error("collab provider gone before persistence"));
|
|
return;
|
|
}
|
|
if (provider.unsyncedChanges === 0) {
|
|
finish(null, mutationResult);
|
|
return;
|
|
}
|
|
persistTimer = setTimeout(() => {
|
|
finish(new Error("Timeout waiting for collaboration server to persist the update"));
|
|
}, PERSIST_TIMEOUT_MS);
|
|
unsyncedHandler = (data) => {
|
|
if (data.number === 0 && !connectionLost) {
|
|
finish(null, mutationResult);
|
|
}
|
|
};
|
|
provider.on("unsyncedChanges", unsyncedHandler);
|
|
};
|
|
provider = new HocuspocusProvider({
|
|
url: wsUrl,
|
|
name: `page.${pageId}`,
|
|
document: ydoc,
|
|
token: collabToken,
|
|
// @ts-ignore - Required for Node.js environment
|
|
WebSocketPolyfill: WebSocket,
|
|
onDisconnect: () => {
|
|
connectionLost = true;
|
|
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
|
|
},
|
|
onClose: () => {
|
|
connectionLost = true;
|
|
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
|
|
},
|
|
onSynced: () => {
|
|
if (applied || settled)
|
|
return;
|
|
applied = true;
|
|
// CRITICAL: keep everything between reading and writing the live doc
|
|
// synchronous (no await) so no remote update can interleave.
|
|
let newDoc;
|
|
let beforeDoc;
|
|
try {
|
|
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
|
|
if (!liveDoc ||
|
|
typeof liveDoc !== "object" ||
|
|
!Array.isArray(liveDoc.content)) {
|
|
liveDoc = { type: "doc", content: [] };
|
|
}
|
|
// Snapshot the before-doc for the change report (safe deep clone).
|
|
beforeDoc = JSON.parse(JSON.stringify(liveDoc));
|
|
newDoc = transform(liveDoc);
|
|
if (newDoc == null) {
|
|
// Transform aborted — write nothing, return the live doc with a
|
|
// no-op change report.
|
|
mutationResult = {
|
|
doc: liveDoc,
|
|
verify: {
|
|
changed: false,
|
|
textInserted: 0,
|
|
textDeleted: 0,
|
|
blocksChanged: 0,
|
|
marks: {},
|
|
summary: "no changes (transform aborted)",
|
|
},
|
|
};
|
|
finish(null, mutationResult);
|
|
return;
|
|
}
|
|
// Structural diff into the live fragment (issue #152), mirroring
|
|
// the main write path: preserves the Yjs ids of unchanged nodes so
|
|
// an open editor's cursor is not yanked to the end of the document.
|
|
// The previous destructive rewrite (delete-all + applyUpdate of a
|
|
// fresh Y.Doc) discarded every node id, so replaceImage — the only
|
|
// caller of this method — still reproduced the #152 cursor jump
|
|
// (#164). applyDocToFragment runs its own atomic `transact`.
|
|
applyDocToFragment(ydoc, newDoc);
|
|
}
|
|
catch (e) {
|
|
finish(e instanceof Error ? e : new Error(String(e)));
|
|
return;
|
|
}
|
|
// Compute the verifiable change report AFTER the transact write: it
|
|
// only needs the JSON before/after, so it cannot affect the atomic
|
|
// read->write window, and summarizeChange never throws.
|
|
mutationResult = {
|
|
doc: newDoc,
|
|
verify: summarizeChange(beforeDoc, newDoc),
|
|
};
|
|
waitForPersistence();
|
|
},
|
|
onAuthenticationFailed: () => {
|
|
finish(new Error("Authentication failed for collaboration connection"));
|
|
},
|
|
});
|
|
});
|
|
}
|
|
/**
|
|
* Generic pagination handler for Docmost API endpoints
|
|
*/
|
|
async paginateAll(endpoint, basePayload = {}, limit = 100) {
|
|
await this.ensureAuthenticated();
|
|
const clampedLimit = Math.max(1, Math.min(100, limit));
|
|
// Hard ceiling on the number of pages to fetch: guards against a server
|
|
// that returns a perpetually-true hasNextPage (which would otherwise loop
|
|
// forever and accumulate duplicates).
|
|
const MAX_PAGES = 50;
|
|
let page = 1;
|
|
let allItems = [];
|
|
let hasNextPage = true;
|
|
while (hasNextPage && page <= MAX_PAGES) {
|
|
const response = await this.client.post(endpoint, {
|
|
...basePayload,
|
|
limit: clampedLimit,
|
|
page,
|
|
});
|
|
const data = response.data;
|
|
const items = data.data?.items || data.items || [];
|
|
const meta = data.data?.meta || data.meta;
|
|
allItems = allItems.concat(items);
|
|
// Stop if the page is empty or shorter than the requested size: a full
|
|
// page worth of items is the only situation where another page can exist,
|
|
// so this defends against a stuck hasNextPage flag in addition to it.
|
|
if (items.length === 0 || items.length < clampedLimit) {
|
|
break;
|
|
}
|
|
hasNextPage = meta?.hasNextPage || false;
|
|
page++;
|
|
}
|
|
// If the loop stopped because it hit the MAX_PAGES ceiling while the server
|
|
// still reported more results (hasNextPage true and the last page was
|
|
// full), the result set is truncated — warn so the caller is not silently
|
|
// handed an incomplete list.
|
|
if (hasNextPage && page > MAX_PAGES) {
|
|
console.warn(`paginateAll: results from "${endpoint}" truncated at the ${MAX_PAGES}-page cap; more pages exist on the server`);
|
|
}
|
|
return allItems;
|
|
}
|
|
async getWorkspace() {
|
|
await this.ensureAuthenticated();
|
|
const response = await this.client.post("/workspace/info", {});
|
|
return {
|
|
data: filterWorkspace(response.data?.data ?? response.data),
|
|
success: response.data.success,
|
|
};
|
|
}
|
|
async getSpaces() {
|
|
const spaces = await this.paginateAll("/spaces", {});
|
|
return spaces.map((space) => filterSpace(space));
|
|
}
|
|
/**
|
|
* List pages in one of two modes.
|
|
*
|
|
* Default (`tree` false): most recent pages by updatedAt (descending),
|
|
* bounded. Fetching the whole space can exceed MCP response/time limits on
|
|
* large instances, so a single bounded page of results is returned (default
|
|
* 50, max 100) via the `/pages/recent` feed.
|
|
*
|
|
* Tree (`tree` true): the space's FULL page hierarchy as a nested tree (each
|
|
* node has a `children` array). This mode REQUIRES `spaceId` (a page tree is
|
|
* scoped to one space) and IGNORES `limit` — the whole hierarchy is returned.
|
|
* It walks the sidebar tree via `enumerateSpacePages`, which performs N
|
|
* sidebar requests and is bounded by that method's 10000-node cap (and skips
|
|
* soft-deleted pages server-side).
|
|
*/
|
|
async listPages(spaceId, limit = 50, tree = false) {
|
|
await this.ensureAuthenticated();
|
|
if (tree) {
|
|
if (!spaceId) {
|
|
throw new Error("list_pages: tree mode requires a spaceId (a page tree is scoped to one space). Pass spaceId, or omit tree to get the recent-pages list.");
|
|
}
|
|
const nodes = await this.enumerateSpacePages(spaceId);
|
|
return buildPageTree(nodes);
|
|
}
|
|
const clampedLimit = Math.max(1, Math.min(100, limit));
|
|
const payload = { limit: clampedLimit, page: 1 };
|
|
if (spaceId)
|
|
payload.spaceId = spaceId;
|
|
const response = await this.client.post("/pages/recent", payload);
|
|
const data = response.data;
|
|
const items = data.data?.items || data.items || [];
|
|
return items.map((page) => filterPage(page));
|
|
}
|
|
/**
|
|
* List sidebar pages for a space. With no pageId the request returns the
|
|
* space ROOT pages; with a pageId it returns the direct CHILDREN of that
|
|
* page. pageId is therefore optional and is only included in the POST body
|
|
* when provided (an empty/undefined pageId would otherwise change the
|
|
* semantics on the server).
|
|
*/
|
|
async listSidebarPages(spaceId, pageId) {
|
|
await this.ensureAuthenticated();
|
|
// Paginate: the endpoint returns server-paged children, so posting only
|
|
// { page: 1 } silently dropped every child beyond the first page. Loop on
|
|
// meta.hasNextPage (with a MAX_PAGES ceiling like paginateAll, guarding
|
|
// against a stuck hasNextPage flag) and accumulate all children.
|
|
const MAX_PAGES = 50;
|
|
let page = 1;
|
|
let allItems = [];
|
|
let hasNextPage = true;
|
|
while (hasNextPage && page <= MAX_PAGES) {
|
|
// Only send pageId when scoping to a page's children; omit it for roots.
|
|
const payload = { spaceId, page };
|
|
if (pageId)
|
|
payload.pageId = pageId;
|
|
const response = await this.client.post("/pages/sidebar-pages", payload);
|
|
const data = response.data?.data ?? response.data;
|
|
const items = data?.items || [];
|
|
allItems = allItems.concat(items);
|
|
hasNextPage = data?.meta?.hasNextPage || false;
|
|
page++;
|
|
}
|
|
return allItems;
|
|
}
|
|
/**
|
|
* Enumerate EVERY page in a space (or in a subtree, when rootPageId is given)
|
|
* by walking the sidebar-pages tree.
|
|
*
|
|
* Starting set: the children of rootPageId when provided, otherwise the
|
|
* space root pages. From there it does an iterative breadth-first walk: each
|
|
* node is collected, and when node.hasChildren is true its direct children
|
|
* are fetched via listSidebarPages(spaceId, node.id) and enqueued.
|
|
*
|
|
* This replaces the old "/pages/recent" enumeration, which is a bounded
|
|
* recent-activity feed (~5000 cap) and therefore misses comments on older
|
|
* pages that were never recently touched.
|
|
*
|
|
* Safeguards: a `visited` Set of page ids prevents re-processing a node
|
|
* (cycles / duplicate references), and a hard node cap bounds pathological
|
|
* trees so the walk always terminates.
|
|
*/
|
|
async enumerateSpacePages(spaceId, rootPageId) {
|
|
const MAX_NODES = 10000;
|
|
const result = [];
|
|
const visited = new Set();
|
|
// Seed the queue with the starting level (subtree children or roots).
|
|
const queue = await this.listSidebarPages(spaceId, rootPageId);
|
|
while (queue.length > 0 && result.length < MAX_NODES) {
|
|
const node = queue.shift();
|
|
if (!node || typeof node !== "object" || !node.id)
|
|
continue;
|
|
// Skip already-seen ids to guard against cycles / duplicate references.
|
|
if (visited.has(node.id))
|
|
continue;
|
|
visited.add(node.id);
|
|
result.push(node);
|
|
if (node.hasChildren) {
|
|
try {
|
|
const children = await this.listSidebarPages(spaceId, node.id);
|
|
for (const child of children)
|
|
queue.push(child);
|
|
}
|
|
catch (e) {
|
|
// A failure fetching one node's children must not abort the whole
|
|
// walk: skip this branch and keep enumerating the rest.
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
/** Raw page info including the ProseMirror JSON content and slugId. */
|
|
async getPageRaw(pageId) {
|
|
await this.ensureAuthenticated();
|
|
const response = await this.client.post("/pages/info", { pageId });
|
|
return response.data?.data ?? response.data;
|
|
}
|
|
async getPage(pageId) {
|
|
await this.ensureAuthenticated();
|
|
const resultData = await this.getPageRaw(pageId);
|
|
let content = resultData.content
|
|
? convertProseMirrorToMarkdown(resultData.content)
|
|
: "";
|
|
// Always fetch subpages to provide context to the agent
|
|
let subpages = [];
|
|
try {
|
|
// `pageId` may be a slugId, but the sidebar-pages endpoint requires the
|
|
// UUID; `resultData.id` holds the resolved UUID returned by getPageRaw.
|
|
subpages = await this.listSidebarPages(resultData.spaceId, resultData.id);
|
|
}
|
|
catch (e) {
|
|
console.warn("Failed to fetch subpages:", e);
|
|
}
|
|
// Resolve subpages if the placeholder exists
|
|
if (content && content.includes("{{SUBPAGES}}")) {
|
|
if (subpages && subpages.length > 0) {
|
|
const list = subpages
|
|
.map((p) => `- [${p.title}](page:${p.id})`)
|
|
.join("\n");
|
|
content = content.replace("{{SUBPAGES}}", `### Subpages\n${list}`);
|
|
}
|
|
else {
|
|
content = content.replace("{{SUBPAGES}}", "");
|
|
}
|
|
}
|
|
return {
|
|
data: filterPage(resultData, content, subpages),
|
|
success: true,
|
|
};
|
|
}
|
|
/** Page info + raw ProseMirror JSON content (lossless representation). */
|
|
async getPageJson(pageId) {
|
|
const data = await this.getPageRaw(pageId);
|
|
return {
|
|
id: data.id,
|
|
slugId: data.slugId,
|
|
title: data.title,
|
|
parentPageId: data.parentPageId,
|
|
spaceId: data.spaceId,
|
|
updatedAt: data.updatedAt,
|
|
content: data.content || { type: "doc", content: [] },
|
|
};
|
|
}
|
|
/**
|
|
* Fetch an INTERNAL Docmost file (authed loopback) for sandbox mirroring.
|
|
* `src` is normalized to `/api/files/<id>/<file>`; `this.client.baseURL`
|
|
* already ends in `/api`, so we strip the leading `/api` and request the
|
|
* relative path with the client's Authorization header. Returns the raw bytes
|
|
* and the response Content-Type (mime), defaulting to octet-stream.
|
|
*
|
|
* The fetch is size-bounded (hard 64 MiB ceiling) purely to protect memory;
|
|
* the authoritative per-blob cap is enforced by the sandbox `put`. The path is
|
|
* resolved via resolveInternalFilePath, which REJECTS (throws) any traversal
|
|
* or percent-encoded src that would let an attacker-controlled `attrs.src`
|
|
* escape `/api/files/` and reach another internal endpoint (SSRF). That throw
|
|
* happens before this.client.get, so a malicious src is counted as a failed
|
|
* mirror — it never reaches the network.
|
|
*/
|
|
async fetchInternalFile(src) {
|
|
const HARD_CEILING = 64 * 1024 * 1024; // 64 MiB memory guard
|
|
const relPath = resolveInternalFilePath(src);
|
|
const response = await this.client.get(relPath, {
|
|
responseType: "arraybuffer",
|
|
timeout: 30000,
|
|
maxContentLength: HARD_CEILING,
|
|
maxBodyLength: HARD_CEILING,
|
|
});
|
|
const buffer = Buffer.from(response.data);
|
|
if (buffer.length === 0) {
|
|
throw new Error(`Empty file response from "${src}"`);
|
|
}
|
|
const rawCt = response.headers?.["content-type"];
|
|
const mime = typeof rawCt === "string" && rawCt.length > 0
|
|
? rawCt.split(";")[0].trim().toLowerCase()
|
|
: "application/octet-stream";
|
|
return { buffer, mime };
|
|
}
|
|
/**
|
|
* Stash a page's full content into the in-RAM blob sandbox and return ONLY a
|
|
* short anonymous URL — the body never enters the model context (this is the
|
|
* whole point: ~30KB+ ProseMirror docs blow the model context if passed as a
|
|
* tool argument). Every INTERNAL file/image src (the type-agnostic criterion,
|
|
* so drawio/excalidraw/video/file nodes are covered too) is mirrored into the
|
|
* sandbox and its `src` rewritten to the sandbox URL, so an external consumer
|
|
* can fetch the images anonymously. External http(s) srcs are left untouched.
|
|
*
|
|
* Blobs live in RAM with a short TTL and are cleared on restart — consume the
|
|
* URLs within the TTL and one uptime. A failed image fetch never aborts the
|
|
* doc: the original src is kept and the failure counted.
|
|
*
|
|
* Returns { uri, sha256, size, images:{mirrored, failed} }. `uri` and `sha256`
|
|
* are for the document blob; `sha256` is also the blob's ETag (integrity).
|
|
*/
|
|
async stashPage(pageId) {
|
|
if (!this.sandboxPut) {
|
|
throw new Error("stash_page is unavailable: the blob sandbox is not configured on this server");
|
|
}
|
|
await this.ensureAuthenticated();
|
|
// Stash the SAME shape get_page_json returns (id/title/.../content), with a
|
|
// deep clone so the rewrite never mutates anything shared.
|
|
const pageJson = await this.getPageJson(pageId);
|
|
const cloned = structuredClone(pageJson);
|
|
// Group internal-file nodes by normalized src so each unique resource is
|
|
// fetched + stored ONCE (dedup), and every node sharing that src points at
|
|
// the one sandbox blob. Capture each node's ORIGINAL raw src per-node:
|
|
// dedup groups nodes whose normalized src is equal even when their raw srcs
|
|
// differ (e.g. `/api/files/...` vs the bare `/files/...`), so on a revert we
|
|
// must restore each node's own original value, not the group key.
|
|
const bySrc = new Map();
|
|
for (const node of collectInternalFileNodes(cloned.content)) {
|
|
const origSrc = String(node.attrs.src);
|
|
const src = normalizeFileUrl(origSrc);
|
|
const entry = { node, origSrc };
|
|
const group = bySrc.get(src);
|
|
if (group)
|
|
group.push(entry);
|
|
else
|
|
bySrc.set(src, [entry]);
|
|
}
|
|
let mirrored = 0;
|
|
let failed = 0;
|
|
// Record every successful mirror so it can be (a) reverted if its blob gets
|
|
// FIFO-evicted by a LATER put in this same stash, and (b) freed if the final
|
|
// doc put throws.
|
|
const mirrors = [];
|
|
const MAX_CONCURRENCY = 5;
|
|
const groups = [...bySrc.entries()];
|
|
for (let i = 0; i < groups.length; i += MAX_CONCURRENCY) {
|
|
const batch = groups.slice(i, i + MAX_CONCURRENCY);
|
|
await Promise.all(batch.map(async ([src, entries]) => {
|
|
try {
|
|
const { buffer, mime } = await this.fetchInternalFile(src);
|
|
// put may throw if the blob exceeds the per-blob/total caps.
|
|
const stored = this.sandboxPut(buffer, mime);
|
|
for (const entry of entries)
|
|
entry.node.attrs.src = stored.uri;
|
|
mirrors.push({ uri: stored.uri, entries });
|
|
mirrored++;
|
|
}
|
|
catch (err) {
|
|
// One bad/oversized image (or a rejected traversal src) must not
|
|
// abort the document. Logged unconditionally (never the blob body),
|
|
// matching the package's ungated console.warn convention.
|
|
failed++;
|
|
console.warn(`stash_page: failed to mirror "${src}": ${err instanceof Error ? err.message : String(err)}`);
|
|
}
|
|
}));
|
|
}
|
|
// Reconcile against FIFO eviction: a heavy page can have a later image-put
|
|
// evict an EARLIER image stored in this SAME stash. The stored doc must not
|
|
// reference an evicted blob (consumer 404) and the counts must not lie, so
|
|
// for any mirror whose blob is gone, revert its nodes to their original
|
|
// internal srcs and re-count it as failed.
|
|
if (this.sandboxHas) {
|
|
for (const mirror of mirrors) {
|
|
if (!this.sandboxHas(mirror.uri)) {
|
|
for (const entry of mirror.entries) {
|
|
entry.node.attrs.src = entry.origSrc;
|
|
}
|
|
mirrored--;
|
|
failed++;
|
|
console.warn(`stash_page: mirrored blob ${mirror.uri} was evicted before ` +
|
|
`the doc was stored; reverted its src and counted it as failed`);
|
|
}
|
|
}
|
|
}
|
|
const docBuf = Buffer.from(JSON.stringify(cloned), "utf8");
|
|
let stored;
|
|
try {
|
|
stored = this.sandboxPut(docBuf, "application/json");
|
|
}
|
|
catch (err) {
|
|
// The doc put failed (e.g. doc exceeds the cap). Free this op's image
|
|
// blobs instead of leaking them in RAM for the whole TTL, then re-throw.
|
|
if (this.sandboxEvict) {
|
|
for (const mirror of mirrors)
|
|
this.sandboxEvict(mirror.uri);
|
|
}
|
|
throw err;
|
|
}
|
|
return {
|
|
uri: stored.uri,
|
|
sha256: stored.sha256,
|
|
size: stored.size,
|
|
images: { mirrored, failed },
|
|
};
|
|
}
|
|
/**
|
|
* Compact outline of a page's top-level blocks (no full document body).
|
|
* Cheap way to locate sections/tables and grab block ids before drilling in
|
|
* with get_node / patch_node / insert_node.
|
|
*/
|
|
async getOutline(pageId) {
|
|
await this.ensureAuthenticated();
|
|
const data = await this.getPageRaw(pageId);
|
|
return {
|
|
pageId,
|
|
slugId: data.slugId,
|
|
title: data.title,
|
|
outline: buildOutline(data.content ?? { type: "doc", content: [] }),
|
|
};
|
|
}
|
|
/**
|
|
* Fetch a single node's full ProseMirror subtree (lossless) by reference:
|
|
* a block id (headings/paragraphs/callouts/images), or `#<index>` to select
|
|
* a top-level block by its outline index (the only way to reach tables/rows/
|
|
* cells, which carry no id).
|
|
*/
|
|
async getNode(pageId, nodeId) {
|
|
await this.ensureAuthenticated();
|
|
const data = await this.getPageRaw(pageId);
|
|
const hit = getNodeByRef(data.content ?? { type: "doc", content: [] }, nodeId);
|
|
if (!hit) {
|
|
throw new Error(`get_node: no node found for "${nodeId}" on page ${pageId} (use a block id from get_outline, or "#<index>" for a top-level block such as a table)`);
|
|
}
|
|
return {
|
|
pageId,
|
|
ref: nodeId,
|
|
path: hit.path,
|
|
type: hit.type,
|
|
node: hit.node,
|
|
};
|
|
}
|
|
/**
|
|
* Read a table as a matrix. `tableRef` is `#<index>` (from get_outline) or a
|
|
* block id of any node inside the table. Returns the cell texts plus a
|
|
* parallel cellIds matrix (each cell's first paragraph id, or null) so a
|
|
* caller can patch_node a cell for rich-formatted edits. Throws when no table
|
|
* resolves for the reference.
|
|
*/
|
|
async getTable(pageId, tableRef) {
|
|
await this.ensureAuthenticated();
|
|
const data = await this.getPageRaw(pageId);
|
|
const t = readTable(data.content ?? { type: "doc", content: [] }, tableRef);
|
|
if (!t) {
|
|
throw new Error(`table_get: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`);
|
|
}
|
|
return {
|
|
pageId,
|
|
table: tableRef,
|
|
rows: t.rows,
|
|
cols: t.cols,
|
|
path: t.path,
|
|
cells: t.cells,
|
|
cellIds: t.cellIds,
|
|
};
|
|
}
|
|
/**
|
|
* Insert a row of plain-text cells into a table on the LIVE collab document.
|
|
* `tableRef` is `#<index>` or a block id inside the target table. `cells` is
|
|
* padded to the table's column count (more cells than columns throws); `index`
|
|
* is a 0-based insert position (omit/out-of-range to append). Throws when no
|
|
* table resolves for the reference.
|
|
*/
|
|
async tableInsertRow(pageId, tableRef, cells, index) {
|
|
await this.ensureAuthenticated();
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Track insertion in an outer var, reset per-transform, so a collab retry
|
|
// recomputes it cleanly (mirrors insertNode's pattern).
|
|
let inserted = false;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
inserted = false;
|
|
const { doc: nd, inserted: ins } = insertTableRow(liveDoc, tableRef, cells, index);
|
|
inserted = ins;
|
|
if (!inserted)
|
|
return null; // table not found -> skip the write entirely
|
|
return nd;
|
|
});
|
|
if (!inserted) {
|
|
throw new Error(`table_insert_row: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`);
|
|
}
|
|
return {
|
|
success: true,
|
|
table: tableRef,
|
|
inserted: true,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Delete the row at 0-based `index` from a table on the LIVE collab document.
|
|
* `tableRef` is `#<index>` or a block id inside the target table. The helper's
|
|
* out-of-range and last-row errors propagate; a missing table throws here.
|
|
*/
|
|
async tableDeleteRow(pageId, tableRef, index) {
|
|
await this.ensureAuthenticated();
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
let deleted = false;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
deleted = false;
|
|
const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index);
|
|
deleted = del;
|
|
if (!deleted)
|
|
return null; // table not found -> skip the write entirely
|
|
return nd;
|
|
});
|
|
if (!deleted) {
|
|
throw new Error(`table_delete_row: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`);
|
|
}
|
|
return {
|
|
success: true,
|
|
table: tableRef,
|
|
deleted: true,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Set the plain-text content of cell `[row, col]` (0-based) in a table on the
|
|
* LIVE collab document, replacing the cell's content with a single text
|
|
* paragraph (the cell's first-paragraph id is preserved). `tableRef` is
|
|
* `#<index>` or a block id inside the target table. The helper's out-of-range
|
|
* error propagates; a missing table throws here.
|
|
*/
|
|
async tableUpdateCell(pageId, tableRef, row, col, text) {
|
|
await this.ensureAuthenticated();
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
let updated = false;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
updated = false;
|
|
const { doc: nd, updated: upd } = updateTableCell(liveDoc, tableRef, row, col, text);
|
|
updated = upd;
|
|
if (!updated)
|
|
return null; // table not found -> skip the write entirely
|
|
return nd;
|
|
});
|
|
if (!updated) {
|
|
throw new Error(`table_update_cell: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`);
|
|
}
|
|
return {
|
|
success: true,
|
|
table: tableRef,
|
|
row,
|
|
col,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Create a new page with title and content.
|
|
* Uses the /pages/import workaround (the only endpoint accepting content),
|
|
* then moves the page and restores the exact title: the import endpoint
|
|
* derives the title from the FILENAME and replaces spaces with
|
|
* underscores, so we explicitly re-set it via /pages/update afterwards.
|
|
*/
|
|
async createPage(title, content, spaceId, parentPageId) {
|
|
await this.ensureAuthenticated();
|
|
if (parentPageId) {
|
|
try {
|
|
await this.getPage(parentPageId);
|
|
}
|
|
catch (e) {
|
|
throw new Error(`Parent page with ID ${parentPageId} not found.`);
|
|
}
|
|
}
|
|
// 1. Create content via Import (using multipart/form-data).
|
|
// Build a FRESH FormData per send attempt: a FormData body is a single-use
|
|
// stream consumed on the first send, so it cannot be replayed by
|
|
// this.client's response interceptor (replay fails with 'socket hang up').
|
|
// Multipart re-auth is therefore done here with bare axios and an explicit
|
|
// one-shot 401/403 retry that rebuilds the body.
|
|
const fileContent = Buffer.from(content, "utf-8");
|
|
const buildForm = () => {
|
|
const form = new FormData();
|
|
form.append("spaceId", spaceId);
|
|
form.append("file", fileContent, {
|
|
filename: `${title || "import"}.md`,
|
|
contentType: "text/markdown",
|
|
});
|
|
return form;
|
|
};
|
|
const importUrl = `${this.apiUrl}/pages/import`;
|
|
let response;
|
|
try {
|
|
// Call buildForm() ONCE per attempt and reuse the instance for both
|
|
// getHeaders() and the body so the Content-Type boundary matches the body.
|
|
const form = buildForm();
|
|
// Read the Authorization header from this.client's defaults (set by
|
|
// login(), only ever deleted — never set to null) instead of building
|
|
// `Bearer ${this.token}`: a concurrent JSON 401 can null this.token
|
|
// mid-flight, which would otherwise produce a literal "Bearer null".
|
|
// ensureAuthenticated() above guarantees login() ran, so the default
|
|
// header exists here.
|
|
response = await axios.post(importUrl, form, {
|
|
headers: {
|
|
...form.getHeaders(),
|
|
Authorization: this.client.defaults.headers.common["Authorization"],
|
|
},
|
|
timeout: 60000,
|
|
});
|
|
}
|
|
catch (error) {
|
|
// On an expired-token auth error, re-login and retry exactly once with a
|
|
// freshly-rebuilt FormData (the previous one was already consumed).
|
|
if (axios.isAxiosError(error) &&
|
|
(error.response?.status === 401 || error.response?.status === 403)) {
|
|
await this.login();
|
|
const form2 = buildForm();
|
|
response = await axios.post(importUrl, form2, {
|
|
headers: {
|
|
...form2.getHeaders(),
|
|
Authorization: this.client.defaults.headers.common["Authorization"],
|
|
},
|
|
timeout: 60000,
|
|
});
|
|
}
|
|
else {
|
|
throw error;
|
|
}
|
|
}
|
|
const newPageId = (response.data?.data ?? response.data).id;
|
|
// 2. Move to parent if needed
|
|
if (parentPageId) {
|
|
await this.movePage(newPageId, parentPageId);
|
|
}
|
|
// 3. Restore the exact title (import mangles spaces into underscores)
|
|
if (title) {
|
|
await this.client.post("/pages/update", { pageId: newPageId, title });
|
|
}
|
|
const page = await this.getPage(newPageId);
|
|
// Surface non-fatal footnote problems (dangling refs, empty/duplicate
|
|
// definitions, markers in tables) so the agent can fix its markup (#166).
|
|
return { ...page, ...footnoteWarningsField(content) };
|
|
}
|
|
/**
|
|
* Update a page's content from markdown and optionally its title.
|
|
* NOTE: full re-import — block ids regenerate. For surgical changes
|
|
* use editPageText / updatePageJson instead.
|
|
*/
|
|
async updatePage(pageId, content, title) {
|
|
await this.ensureAuthenticated();
|
|
// Write the BODY first, then the title (#159 split-brain). If the collab
|
|
// body write fails (e.g. a persist timeout), the title must be left
|
|
// UNTOUCHED so the page never ends up with a new title over its old body.
|
|
// A title write failing AFTER a successful body is rarer (REST is fast) and
|
|
// leaves correct content under a stale title — the lesser inconsistency.
|
|
let collabToken = "";
|
|
let mutation;
|
|
try {
|
|
collabToken = await this.getCollabTokenWithReauth();
|
|
mutation = await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl);
|
|
}
|
|
catch (error) {
|
|
// Verbose diagnostics (incl. anything that could expose a token prefix)
|
|
// are gated behind DEBUG; the thrown Error below carries no token data.
|
|
if (process.env.DEBUG) {
|
|
console.error("Failed to update page content via realtime collaboration:", error);
|
|
const tokenPreview = collabToken
|
|
? collabToken.substring(0, 15) + "..."
|
|
: "null";
|
|
console.error(`Collab token preview: ${tokenPreview}`);
|
|
}
|
|
throw new Error(`Failed to update page content: ${error.message}`);
|
|
}
|
|
// Body persisted successfully — now it is safe to set the title.
|
|
if (title) {
|
|
await this.client.post("/pages/update", { pageId, title });
|
|
}
|
|
return {
|
|
success: true,
|
|
modified: true,
|
|
message: "Page updated successfully.",
|
|
pageId: pageId,
|
|
verify: mutation.verify,
|
|
// Non-fatal footnote diagnostics (#166); omitted when there are none.
|
|
...footnoteWarningsField(content),
|
|
};
|
|
}
|
|
/**
|
|
* Validate a URL string against a scheme allowlist for a given context.
|
|
*
|
|
* The markdown link path enforces safe schemes via TipTap, but the raw
|
|
* JSON path (updatePageJson) bypasses that — so this is the sanitization
|
|
* choke point for ProseMirror JSON written directly by the caller.
|
|
*
|
|
* - "link": reject javascript:, vbscript:, data: (any scheme that can
|
|
* execute or smuggle script when the href is clicked).
|
|
* - "src": allow only http(s):, mailto:, /api/files paths, or a
|
|
* scheme-less relative/absolute path; reject
|
|
* javascript:/vbscript:/data:/file:.
|
|
*/
|
|
isSafeUrl(url, context) {
|
|
if (typeof url !== "string")
|
|
return false;
|
|
const trimmed = url.trim();
|
|
if (trimmed === "")
|
|
return true; // empty href/src is harmless
|
|
// Extract a leading "scheme:" if present. A scheme must start with a
|
|
// letter and contain only letters/digits/+/-/. before the colon. Strip
|
|
// whitespace and ASCII control chars first so a tab/newline embedded in
|
|
// the scheme cannot smuggle a dangerous scheme past the check.
|
|
const cleaned = trimmed.replace(/[\s\x00-\x1f]+/g, "");
|
|
const schemeMatch = /^([a-zA-Z][a-zA-Z0-9+.-]*):/.exec(cleaned);
|
|
const scheme = schemeMatch ? schemeMatch[1].toLowerCase() : null;
|
|
const dangerous = new Set(["javascript", "vbscript", "data", "file"]);
|
|
if (context === "link") {
|
|
if (scheme === null)
|
|
return true; // relative/anchor link is fine
|
|
// For links, data: is also blocked (can carry script payloads).
|
|
return !new Set(["javascript", "vbscript", "data"]).has(scheme);
|
|
}
|
|
// context === "src"
|
|
if (scheme === null)
|
|
return true; // relative/absolute path (incl. /api/files)
|
|
if (dangerous.has(scheme))
|
|
return false;
|
|
return scheme === "http" || scheme === "https" || scheme === "mailto";
|
|
}
|
|
/**
|
|
* Recursively walk a ProseMirror doc and reject any unsafe URL on a link
|
|
* mark href or on a media node's src/url. Media nodes covered: image,
|
|
* attachment, video, plus embed (rendered as an iframe), youtube, drawio
|
|
* and excalidraw — all of which carry a user-controlled URL that Docmost
|
|
* renders. Throws a clear error on the first violation. A max-depth guard
|
|
* turns an over-deep document into a clean error instead of a RangeError
|
|
* stack overflow.
|
|
*/
|
|
validateDocUrls(node, depth = 0) {
|
|
const MAX_DEPTH = 200;
|
|
if (depth > MAX_DEPTH) {
|
|
throw new Error(`document nesting exceeds the maximum depth of ${MAX_DEPTH}`);
|
|
}
|
|
if (!node || typeof node !== "object")
|
|
return;
|
|
// Link marks on text nodes: validate the href.
|
|
if (Array.isArray(node.marks)) {
|
|
for (const mark of node.marks) {
|
|
if (mark && mark.type === "link" && mark.attrs) {
|
|
if (!this.isSafeUrl(mark.attrs.href, "link")) {
|
|
throw new Error(`unsafe link href rejected: "${mark.attrs.href}"`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Media nodes: validate src/url against the stricter src allowlist.
|
|
// embed renders as an iframe (highest risk); youtube/drawio/excalidraw
|
|
// likewise carry a user-controlled URL Docmost renders, so they get the
|
|
// same scheme check as image/attachment/video.
|
|
if (node.type === "image" ||
|
|
node.type === "attachment" ||
|
|
node.type === "video" ||
|
|
node.type === "embed" ||
|
|
node.type === "youtube" ||
|
|
node.type === "drawio" ||
|
|
node.type === "excalidraw" ||
|
|
node.type === "audio" ||
|
|
node.type === "pdf") {
|
|
const attrs = node.attrs || {};
|
|
for (const key of ["src", "url"]) {
|
|
if (attrs[key] != null && !this.isSafeUrl(attrs[key], "src")) {
|
|
throw new Error(`unsafe ${node.type} ${key} rejected: "${attrs[key]}"`);
|
|
}
|
|
}
|
|
}
|
|
if (Array.isArray(node.content)) {
|
|
for (const child of node.content) {
|
|
this.validateDocUrls(child, depth + 1);
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Recursively validate the STRUCTURE of a ProseMirror node (reuses the
|
|
* recursion shape of validateDocUrls). Every node must be an object with a
|
|
* string `type`; when present, `content` must be an array, `marks` must be
|
|
* an array of objects each with a string `type`, and a text node's `text`
|
|
* must be a string. Throws a clear "invalid ProseMirror document" error on
|
|
* the first violation. A max-depth guard turns an over-deep document into a
|
|
* clean error instead of a RangeError stack overflow.
|
|
*/
|
|
validateDocStructure(node, depth = 0) {
|
|
const MAX_DEPTH = 200;
|
|
if (depth > MAX_DEPTH) {
|
|
throw new Error(`invalid ProseMirror document: nesting exceeds the maximum depth of ${MAX_DEPTH}`);
|
|
}
|
|
if (!node || typeof node !== "object" || typeof node.type !== "string") {
|
|
throw new Error("invalid ProseMirror document: every node must be an object with a string `type`");
|
|
}
|
|
if ("text" in node &&
|
|
node.type === "text" &&
|
|
typeof node.text !== "string") {
|
|
throw new Error("invalid ProseMirror document: a text node must have a string `text`");
|
|
}
|
|
if (node.marks !== undefined) {
|
|
if (!Array.isArray(node.marks)) {
|
|
throw new Error("invalid ProseMirror document: `marks` must be an array");
|
|
}
|
|
for (const mark of node.marks) {
|
|
if (!mark ||
|
|
typeof mark !== "object" ||
|
|
typeof mark.type !== "string") {
|
|
throw new Error("invalid ProseMirror document: every mark must be an object with a string `type`");
|
|
}
|
|
}
|
|
}
|
|
if (node.content !== undefined) {
|
|
if (!Array.isArray(node.content)) {
|
|
throw new Error("invalid ProseMirror document: `content` must be an array when present");
|
|
}
|
|
for (const child of node.content) {
|
|
this.validateDocStructure(child, depth + 1);
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Replace page content with a raw ProseMirror JSON document (lossless) and/or
|
|
* update its title. Both `doc` and `title` are optional, but at least one must
|
|
* be supplied:
|
|
* - `doc` provided -> validate + full-overwrite the body (and update the
|
|
* title too when `title` is also given).
|
|
* - `doc` omitted, `title` given -> title-only update; the body is NOT
|
|
* touched/resent (no collab write happens).
|
|
* - neither given -> throws (nothing to update).
|
|
*/
|
|
async updatePageJson(pageId, doc, title) {
|
|
await this.ensureAuthenticated();
|
|
// Title-only / no-op handling: when no document is supplied, do NOT write
|
|
// the body. Update the title if one was given; otherwise there is nothing
|
|
// to do, so fail loudly rather than silently no-op.
|
|
if (doc == null) {
|
|
if (!title) {
|
|
throw new Error("update_page_json: nothing to update (provide content and/or title)");
|
|
}
|
|
await this.client.post("/pages/update", { pageId, title });
|
|
return {
|
|
success: true,
|
|
modified: true,
|
|
message: "Page title updated (content left unchanged).",
|
|
pageId,
|
|
};
|
|
}
|
|
// Validate the document shape before a full overwrite: a malformed doc
|
|
// would otherwise silently corrupt the page (full-overwrite is the
|
|
// documented behaviour; no optimistic-concurrency is applied here).
|
|
if (typeof doc !== "object" ||
|
|
doc.type !== "doc" ||
|
|
!Array.isArray(doc.content)) {
|
|
throw new Error('content must be a ProseMirror document ({"type":"doc","content":[...]}) ' +
|
|
"where content is an array of nodes each having a string `type`");
|
|
}
|
|
// Recurse the WHOLE document so a malformed nested node (e.g. a node with a
|
|
// non-string type, a non-array content/marks, or a text node missing its
|
|
// string text) is rejected up front rather than silently corrupting the
|
|
// page on overwrite.
|
|
this.validateDocStructure(doc);
|
|
// Sanitize URLs before writing. This closes the JSON-path bypass: unlike
|
|
// the markdown link path (which TipTap sanitizes), raw JSON could otherwise
|
|
// inject javascript:/data: link hrefs or media srcs straight into the doc.
|
|
this.validateDocUrls(doc);
|
|
// Canonicalize footnotes (idempotent): an agent-authored JSON doc cannot
|
|
// leave footnotes out of order, orphaned, or in multiple lists — the bottom
|
|
// list + numbering are always derived from reference order. No-op when the
|
|
// footnotes are already canonical.
|
|
doc = canonicalizeFootnotes(doc);
|
|
// Write the BODY first, then the title (#159 split-brain): a failed body
|
|
// write (e.g. persist timeout) must not leave a new title over the old body.
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
const mutation = await this.replacePage(pageId, doc, collabToken, this.apiUrl);
|
|
// Body persisted successfully — now it is safe to set the title.
|
|
if (title) {
|
|
await this.client.post("/pages/update", { pageId, title });
|
|
}
|
|
return {
|
|
success: true,
|
|
modified: true,
|
|
message: "Page content replaced from ProseMirror JSON.",
|
|
pageId,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* AUTHOR-INLINE footnote insertion. The agent supplies only WHERE
|
|
* (`anchorText`, a snippet of body text to attach the marker after) and WHAT
|
|
* (`text`, the footnote content as markdown). Numbering and the bottom
|
|
* `footnotesList` are derived deterministically server-side
|
|
* (`insertInlineFootnote` -> `canonicalizeFootnotes`): the agent never sees,
|
|
* assigns, or edits a footnote number or the list, so it CANNOT desync.
|
|
*
|
|
* Content DEDUP: when an existing definition has the same content, its id is
|
|
* reused (one number, one definition, several references). The write is atomic
|
|
* via `mutatePageContent` (single-writer, page-locked); if the anchor text is
|
|
* not found the transform aborts with a clear error and no write happens.
|
|
*/
|
|
async insertFootnote(pageId, anchorText, text) {
|
|
await this.ensureAuthenticated();
|
|
if (!anchorText || !anchorText.trim()) {
|
|
throw new Error("insert_footnote: anchorText is required");
|
|
}
|
|
if (text == null || `${text}`.trim() === "") {
|
|
throw new Error("insert_footnote: text is required");
|
|
}
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
let result = null;
|
|
const mutation = await this.mutatePage(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
const r = insertInlineFootnote(liveDoc, { anchorText, text });
|
|
if (!r.inserted) {
|
|
// Abort the page-locked write by throwing: mutatePageContent does not
|
|
// persist when the transform throws, so a missing anchor leaves the
|
|
// page untouched (no partial write).
|
|
throw new Error(`insert_footnote: anchor text not found: ${JSON.stringify(anchorText.slice(0, 80))}`);
|
|
}
|
|
result = { footnoteId: r.footnoteId, reused: r.reused };
|
|
return r.doc;
|
|
});
|
|
// The not-found path throws inside the transform (aborting mutatePage), so by
|
|
// here `result` is always set.
|
|
const r = result;
|
|
return {
|
|
success: true,
|
|
modified: true,
|
|
pageId,
|
|
footnoteId: r.footnoteId,
|
|
reused: r.reused,
|
|
message: r.reused
|
|
? "Footnote inserted (reused an existing same-content definition)."
|
|
: "Footnote inserted.",
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Page-locked write seam over collaboration.mutatePageContent. Production just
|
|
* delegates; it exists as an overridable method so the insert_footnote wrapper
|
|
* (transform abort-on-not-found + response shaping) can be unit-tested without
|
|
* standing up a live Hocuspocus collab socket.
|
|
*/
|
|
mutatePage(pageId, collabToken, apiUrl, transform) {
|
|
return mutatePageContent(pageId, collabToken, apiUrl, transform);
|
|
}
|
|
/**
|
|
* Full-document write seam over collaboration.replacePageContent. Production
|
|
* just delegates; it exists as an overridable method so the full-doc write
|
|
* tools (update_page_json, copy_page_content) can have their footnote-
|
|
* canonicalization binding unit-tested without a live Hocuspocus collab socket.
|
|
*/
|
|
replacePage(pageId, doc, collabToken, apiUrl) {
|
|
return replacePageContent(pageId, doc, collabToken, apiUrl);
|
|
}
|
|
/**
|
|
* Export a page to a single self-contained Docmost-flavoured markdown file:
|
|
* meta block + body (with inline comment anchors + diagrams) + comment
|
|
* threads. Lossless round-trip target; see importPageMarkdown for the inverse.
|
|
*/
|
|
async exportPageMarkdown(pageId) {
|
|
await this.ensureAuthenticated();
|
|
const page = await this.getPageRaw(pageId);
|
|
const body = page.content ? convertProseMirrorToMarkdown(page.content) : "";
|
|
let comments = [];
|
|
try {
|
|
comments = await this.listComments(pageId);
|
|
}
|
|
catch (e) {
|
|
// A comments fetch failure must not lose the body; export with [] and let
|
|
// the caller see the (empty) comments block. Log under DEBUG only.
|
|
if (process.env.DEBUG)
|
|
console.error("export: listComments failed", e);
|
|
}
|
|
const meta = {
|
|
version: 1,
|
|
pageId: page.id,
|
|
slugId: page.slugId,
|
|
title: page.title,
|
|
spaceId: page.spaceId,
|
|
parentPageId: page.parentPageId ?? null,
|
|
};
|
|
return serializeDocmostMarkdown(meta, body, comments);
|
|
}
|
|
/**
|
|
* Import a self-contained Docmost markdown file back into a page. Parses out
|
|
* the meta + comments metadata blocks, converts the body to ProseMirror
|
|
* (restoring comment marks + diagrams from their inline HTML), and replaces
|
|
* the page content. Comment THREAD records are NOT written to the server in
|
|
* this version — they are preserved in the file and the inline marks are
|
|
* re-applied so the highlights survive; managing comment records stays with
|
|
* the comment tools/UI.
|
|
*/
|
|
async importPageMarkdown(pageId, fullMarkdown) {
|
|
await this.ensureAuthenticated();
|
|
const { meta, body, comments } = parseDocmostMarkdown(fullMarkdown);
|
|
// PAGE import: canonicalize footnotes (see markdownToProseMirrorCanonical).
|
|
const doc = await markdownToProseMirrorCanonical(body);
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
const mutation = await replacePageContent(pageId, doc, collabToken, this.apiUrl);
|
|
// Collect distinct comment ids that actually became comment marks in the doc.
|
|
const collectCommentIds = (node, acc) => {
|
|
if (!node || typeof node !== "object")
|
|
return acc;
|
|
if (Array.isArray(node.marks)) {
|
|
for (const mk of node.marks) {
|
|
if (mk && mk.type === "comment" && mk.attrs?.commentId) {
|
|
acc.add(mk.attrs.commentId);
|
|
}
|
|
}
|
|
}
|
|
if (Array.isArray(node.content)) {
|
|
for (const child of node.content)
|
|
collectCommentIds(child, acc);
|
|
}
|
|
return acc;
|
|
};
|
|
// Count reflects the comment marks present in the written document, so an id
|
|
// that only appears as inert text (e.g. inside a fenced code block) is not
|
|
// counted because it never becomes a comment mark.
|
|
const anchoredIds = collectCommentIds(doc, new Set());
|
|
const result = {
|
|
success: true,
|
|
pageId,
|
|
anchoredCommentCount: anchoredIds.size,
|
|
commentsInFile: Array.isArray(comments) ? comments.length : 0,
|
|
verify: mutation.verify,
|
|
};
|
|
// Warn (non-fatal) if the file was exported from a DIFFERENT page.
|
|
if (meta?.pageId && meta.pageId !== pageId) {
|
|
result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
|
|
}
|
|
// Non-fatal footnote diagnostics (#166), analyzed on the BODY (the part after
|
|
// the docmost:meta / docmost:comments blocks) — so a `[^x]`-like token inside
|
|
// those JSON blocks never produces a false warning, while real markers in the
|
|
// body do. `body` comes from parseDocmostMarkdown(fullMarkdown) above.
|
|
Object.assign(result, footnoteWarningsField(body));
|
|
return result;
|
|
}
|
|
/**
|
|
* Rename a page (change its title only) without touching or resending its
|
|
* content. The slug is derived from the page record, not the body, so it is
|
|
* left intact too.
|
|
*/
|
|
async renamePage(pageId, title) {
|
|
await this.ensureAuthenticated();
|
|
await this.client.post("/pages/update", { pageId, title });
|
|
return { success: true, pageId, title };
|
|
}
|
|
/**
|
|
* Copy the WHOLE content of one page onto another, entirely server-side: the
|
|
* source's ProseMirror document is read and written verbatim onto the target
|
|
* via the live collab path, so the document never passes through the model.
|
|
*
|
|
* Only the target's BODY is replaced — its title and slug live on the page
|
|
* record (not in the content), so they are untouched. The source page is not
|
|
* modified at all.
|
|
*/
|
|
async copyPageContent(sourcePageId, targetPageId) {
|
|
await this.ensureAuthenticated();
|
|
// A self-copy would be a no-op overwrite; reject it explicitly so a caller
|
|
// mistake surfaces as a clear error rather than a silent round-trip.
|
|
if (sourcePageId === targetPageId) {
|
|
throw new Error("copy_page_content: sourcePageId and targetPageId are the same page (no-op copy)");
|
|
}
|
|
const source = await this.getPageRaw(sourcePageId);
|
|
const content = source?.content;
|
|
if (!content ||
|
|
typeof content !== "object" ||
|
|
content.type !== "doc" ||
|
|
!Array.isArray(content.content)) {
|
|
throw new Error(`copy_page_content: source page ${sourcePageId} has no usable ProseMirror content to copy`);
|
|
}
|
|
// Defense-in-depth: run the same URL-scheme sanitizer the JSON write path
|
|
// uses, so copying never lands a javascript:/data: href/src on the target
|
|
// (parity with updatePageJson; harmless for already-stored source content).
|
|
this.validateDocUrls(content);
|
|
// Defense-in-depth (#228): this is a FULL-document write, so canonicalize
|
|
// footnotes before copying — a no-op on already-canonical source content, but
|
|
// it guarantees a copy can never propagate a non-canonical footnote topology
|
|
// to the target (parity with the other full-doc write paths).
|
|
const canonical = canonicalizeFootnotes(content);
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
const mutation = await this.replacePage(targetPageId, canonical, collabToken, this.apiUrl);
|
|
return {
|
|
success: true,
|
|
sourcePageId,
|
|
targetPageId,
|
|
copiedNodes: canonical.content.length,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Surgical text edits: find/replace inside text nodes of the live
|
|
* document. Preserves all block ids, marks, callouts and tables.
|
|
*/
|
|
async editPageText(pageId, edits) {
|
|
await this.ensureAuthenticated();
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Apply the edits against the LIVE synced document, not the debounced REST
|
|
// snapshot, so concurrent human edits/comments are preserved. applyTextEdits
|
|
// records per-edit match problems in `failed` instead of throwing, and
|
|
// applies whatever it can; we abort the write only when nothing applied.
|
|
let results;
|
|
let failed;
|
|
// Whether we actually wrote new content. Set inside the transform: a
|
|
// degenerate edit (e.g. find === replace, or a batch that nets to no change)
|
|
// can "apply" yet leave the document byte-for-byte identical, in which case
|
|
// we must NOT write (no spurious history version) and must not claim a write
|
|
// happened.
|
|
let wrote = false;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
wrote = false;
|
|
const r = applyTextEdits(liveDoc, edits);
|
|
results = r.results;
|
|
failed = r.failed;
|
|
// Nothing applied -> abort the write (mutatePageContent treats a null
|
|
// return from the transform as "write nothing").
|
|
if (r.results.length === 0)
|
|
return null;
|
|
// Edits "applied" but produced an identical document: skip the write so
|
|
// no new history version is created. Stable structural comparison via
|
|
// JSON.stringify (both docs come from the same deep-copied source, so
|
|
// key order is stable).
|
|
if (JSON.stringify(r.doc) === JSON.stringify(liveDoc))
|
|
return null;
|
|
wrote = true;
|
|
return r.doc;
|
|
});
|
|
if ((results?.length ?? 0) === 0 && (failed?.length ?? 0) > 0) {
|
|
// No edit applied: surface an aggregated, actionable error so the caller
|
|
// does not mistake a no-op for a partial success.
|
|
throw new Error("edit_page_text: no edits were applied (nothing written). " +
|
|
failed.map((f) => `"${f.find}": ${f.reason}`).join("; "));
|
|
}
|
|
// Edits matched but produced no content change (identical document): report
|
|
// a successful no-op — NOT a failure — and do not falsely claim a write.
|
|
if (!wrote) {
|
|
return {
|
|
success: true,
|
|
pageId,
|
|
applied: results,
|
|
failed,
|
|
message: "No changes written (edits produced identical content).",
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
const result = {
|
|
success: true,
|
|
pageId,
|
|
applied: results,
|
|
failed,
|
|
message: (failed?.length ?? 0)
|
|
? `Applied ${results?.length ?? 0} edit(s); ${failed.length} failed (see failed[]). Node ids and formatting preserved.`
|
|
: "Text edits applied (node ids and formatting preserved).",
|
|
verify: mutation.verify,
|
|
};
|
|
// If any applied edit matched only after stripping markdown (the
|
|
// normalized fallback), warn that edit_page_text preserved existing marks
|
|
// and did NOT change formatting — so a caller who intended a formatting
|
|
// change is pointed at patch_node.
|
|
if (results?.some((r) => r.normalized === true)) {
|
|
result.warning =
|
|
"Some edits matched only after stripping markdown from your find string; " +
|
|
"edit_page_text preserved existing marks (it did not change bold/strike/etc.). " +
|
|
"If you intended a formatting change, use patch_node.";
|
|
}
|
|
return result;
|
|
}
|
|
/**
|
|
* Replace EVERY node whose attrs.id === nodeId (recursively, including nodes
|
|
* nested in callouts/tables) with the supplied node. Operates on the LIVE
|
|
* collab document so comments and concurrent edits are preserved.
|
|
*
|
|
* The replacement node's block id is preserved: if node.attrs is missing it
|
|
* is created, and if node.attrs.id is missing it is set to nodeId so the
|
|
* replacement keeps the same id it replaced. Throws if no node matches.
|
|
*/
|
|
async patchNode(pageId, nodeId, node) {
|
|
await this.ensureAuthenticated();
|
|
if (!node || typeof node !== "object" || typeof node.type !== "string") {
|
|
throw new Error("patch_node: `node` must be an object with a string `type`");
|
|
}
|
|
// Preserve the block id WITHOUT mutating the caller's object: build a local
|
|
// copy whose attrs.id === nodeId (so the swapped-in node keeps the id of the
|
|
// node it replaces).
|
|
const target = {
|
|
...node,
|
|
attrs: {
|
|
...(node.attrs && typeof node.attrs === "object" ? node.attrs : {}),
|
|
},
|
|
};
|
|
if (target.attrs.id == null) {
|
|
target.attrs.id = nodeId;
|
|
}
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Track the replacement count in an outer var, reset per-transform, so a
|
|
// collab retry recomputes it cleanly (mirrors replaceImage's pattern).
|
|
let replaced = 0;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
replaced = 0;
|
|
const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target);
|
|
replaced = r;
|
|
// 0 matches -> skip the write. >1 matches -> the id is AMBIGUOUS: Docmost
|
|
// duplicates block ids on copy/paste (and copyPageContent writes them
|
|
// verbatim), so replacing "the node with id X" would silently clobber
|
|
// EVERY duplicate (#159). Refuse: skip the write and throw below so the
|
|
// model re-targets with a more specific anchor instead of corrupting the
|
|
// page. Only an unambiguous single match is written.
|
|
if (replaced !== 1)
|
|
return null;
|
|
return nd;
|
|
});
|
|
// 0 -> "no node"; >1 -> "ambiguous, refused" (the transform already skipped
|
|
// the write for any count !== 1). Single shared guard (#159, #185 review).
|
|
assertUnambiguousMatch("patch_node", "replace", replaced, nodeId, pageId);
|
|
return { success: true, replaced, nodeId, verify: mutation.verify };
|
|
}
|
|
/**
|
|
* Insert a node relative to an anchor (or append it at the top level).
|
|
* Operates on the LIVE collab document so comments and concurrent edits are
|
|
* preserved.
|
|
*
|
|
* opts.position:
|
|
* - "append": push the node at the end of the top-level content.
|
|
* - "before"/"after": insert the node as a sibling of the anchor, just
|
|
* before/after it. Exactly one of anchorNodeId / anchorText must be given;
|
|
* anchorNodeId locates a node anywhere by attrs.id, anchorText matches the
|
|
* first top-level block whose plain text includes it.
|
|
*
|
|
* Throws if the anchor cannot be found.
|
|
*/
|
|
async insertNode(pageId, node, opts) {
|
|
await this.ensureAuthenticated();
|
|
if (!node || typeof node !== "object" || typeof node.type !== "string") {
|
|
throw new Error("insert_node: `node` must be an object with a string `type`");
|
|
}
|
|
if (!opts ||
|
|
(opts.position !== "before" &&
|
|
opts.position !== "after" &&
|
|
opts.position !== "append")) {
|
|
throw new Error('insert_node: `position` must be one of "before", "after", "append"');
|
|
}
|
|
if (opts.position === "before" || opts.position === "after") {
|
|
// before/after require EXACTLY ONE anchor (an id or a text fragment).
|
|
const hasId = typeof opts.anchorNodeId === "string" && opts.anchorNodeId.length > 0;
|
|
const hasText = typeof opts.anchorText === "string" && opts.anchorText.length > 0;
|
|
if (hasId === hasText) {
|
|
throw new Error(`insert_node: position "${opts.position}" requires exactly one of anchorNodeId or anchorText`);
|
|
}
|
|
}
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Track insertion in an outer var, reset per-transform, so a collab retry
|
|
// recomputes it cleanly (mirrors replaceImage's pattern).
|
|
let inserted = false;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
inserted = false;
|
|
const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts);
|
|
inserted = ins;
|
|
if (!inserted)
|
|
return null; // anchor not found -> skip the write entirely
|
|
return nd;
|
|
});
|
|
if (!inserted) {
|
|
const anchorDesc = opts.anchorNodeId
|
|
? `anchorNodeId "${opts.anchorNodeId}"`
|
|
: `anchorText "${opts.anchorText}"`;
|
|
// anchorText is matched against the block's literal RENDERED plain text;
|
|
// markdown/emoji are tolerated only as a strip-and-retry fallback, so a
|
|
// miss usually means the text differs from what's on the page.
|
|
const hint = opts.anchorText
|
|
? " anchorText must be the block's literal rendered plain text (no markdown wrappers or emoji); anchorNodeId from get_page_json is more reliable."
|
|
: "";
|
|
throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}.${hint}`);
|
|
}
|
|
return {
|
|
success: true,
|
|
inserted: true,
|
|
position: opts.position,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Remove EVERY node whose attrs.id === nodeId (recursively, including nodes
|
|
* nested in callouts/tables) from its parent content array. Operates on the
|
|
* LIVE collab document so comments and concurrent edits are preserved.
|
|
* Throws if no node matches.
|
|
*/
|
|
async deleteNode(pageId, nodeId) {
|
|
await this.ensureAuthenticated();
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Track the deletion count in an outer var, reset per-transform, so a
|
|
// collab retry recomputes it cleanly (mirrors replaceImage's pattern).
|
|
let deleted = 0;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
deleted = 0;
|
|
const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId);
|
|
deleted = d;
|
|
// 0 matches -> skip the write. >1 matches -> the id is AMBIGUOUS (block
|
|
// ids are duplicated on copy/paste, #159): deleting "the node with id X"
|
|
// would silently remove EVERY duplicate. Refuse: skip the write and throw
|
|
// below so the model re-targets. Only an unambiguous single match is
|
|
// deleted.
|
|
if (deleted !== 1)
|
|
return null;
|
|
return nd;
|
|
});
|
|
// 0 -> "no node"; >1 -> "ambiguous, refused" (the transform already skipped
|
|
// the write for any count !== 1). Single shared guard (#159, #185 review).
|
|
assertUnambiguousMatch("delete_node", "delete", deleted, nodeId, pageId);
|
|
return { success: true, deleted, nodeId, verify: mutation.verify };
|
|
}
|
|
/** Build the public share URL for a page. */
|
|
shareUrl(shareKey, slugId) {
|
|
return `${this.appUrl}/share/${shareKey}/p/${slugId}`;
|
|
}
|
|
/** Share a page publicly (idempotent) and return the public URL. */
|
|
async sharePage(pageId, searchIndexing = true) {
|
|
await this.ensureAuthenticated();
|
|
const response = await this.client.post("/shares/create", {
|
|
pageId,
|
|
includeSubPages: false,
|
|
searchIndexing,
|
|
});
|
|
const share = response.data?.data ?? response.data;
|
|
const slugId = share.page?.slugId || (await this.getPageRaw(pageId)).slugId;
|
|
return {
|
|
shareId: share.id,
|
|
key: share.key,
|
|
pageId: share.pageId,
|
|
publicUrl: this.shareUrl(share.key, slugId),
|
|
searchIndexing: share.searchIndexing,
|
|
};
|
|
}
|
|
/** List all public shares in the workspace with their URLs. */
|
|
async listShares() {
|
|
const shares = await this.paginateAll("/shares", {});
|
|
return shares.map((s) => ({
|
|
shareId: s.id,
|
|
key: s.key,
|
|
pageId: s.pageId,
|
|
pageTitle: s.page?.title,
|
|
publicUrl: s.page?.slugId ? this.shareUrl(s.key, s.page.slugId) : null,
|
|
searchIndexing: s.searchIndexing,
|
|
createdAt: s.createdAt,
|
|
}));
|
|
}
|
|
/** Remove the public share of a page. */
|
|
async unsharePage(pageId) {
|
|
await this.ensureAuthenticated();
|
|
const shares = await this.listShares();
|
|
const share = shares.find((s) => s.pageId === pageId);
|
|
if (!share) {
|
|
throw new Error(`Page ${pageId} is not shared.`);
|
|
}
|
|
await this.client.post("/shares/delete", { shareId: share.shareId });
|
|
return { success: true, removedShareId: share.shareId, pageId };
|
|
}
|
|
async search(query, spaceId, limit) {
|
|
await this.ensureAuthenticated();
|
|
const payload = { query, spaceId };
|
|
// Clamp an optional caller-supplied limit into a sane 1..100 range before
|
|
// forwarding it to the server; omit it entirely when not provided so the
|
|
// server applies its own default.
|
|
if (limit !== undefined) {
|
|
payload.limit = Math.max(1, Math.min(100, limit));
|
|
}
|
|
const response = await this.client.post("/search", payload);
|
|
// Normalize both response shapes: bare array and paginated { items: [...] }
|
|
const data = response.data?.data;
|
|
const items = Array.isArray(data) ? data : data?.items || [];
|
|
const filteredItems = items.map((item) => filterSearchResult(item));
|
|
return {
|
|
items: filteredItems,
|
|
success: response.data?.success || false,
|
|
};
|
|
}
|
|
async movePage(pageId, parentPageId, position) {
|
|
await this.ensureAuthenticated();
|
|
// Docmost requires position >= 5 chars.
|
|
const validPosition = position || "a00000";
|
|
return this.client
|
|
.post("/pages/move", {
|
|
pageId,
|
|
parentPageId,
|
|
position: validPosition,
|
|
})
|
|
.then((res) => res.data);
|
|
}
|
|
async deletePage(pageId) {
|
|
await this.ensureAuthenticated();
|
|
return this.client
|
|
.post("/pages/delete", { pageId })
|
|
.then((res) => res.data);
|
|
}
|
|
// --- Comment methods (ported from upstream PR #3 by Max Nikitin) ---
|
|
/**
|
|
* Normalize a comment's `content` into a ProseMirror doc object before
|
|
* markdown conversion. createComment/updateComment send content as a
|
|
* JSON.stringify(...) STRING, and the server stores it as-is, so on read it
|
|
* comes back as a string. convertProseMirrorToMarkdown returns "" for a
|
|
* string, so parse it first (guarded — fall back to the raw value on any
|
|
* parse failure so a non-JSON legacy value is still handled gracefully).
|
|
*/
|
|
parseCommentContent(content) {
|
|
if (typeof content !== "string")
|
|
return content;
|
|
try {
|
|
return JSON.parse(content);
|
|
}
|
|
catch {
|
|
return content;
|
|
}
|
|
}
|
|
/** List all comments on a page (cursor-paginated), content as markdown. */
|
|
async listComments(pageId) {
|
|
await this.ensureAuthenticated();
|
|
let allComments = [];
|
|
let cursor = null;
|
|
do {
|
|
const payload = { pageId, limit: 100 };
|
|
if (cursor)
|
|
payload.cursor = cursor;
|
|
const response = await this.client.post("/comments", payload);
|
|
const data = response.data.data || response.data;
|
|
const items = data.items || [];
|
|
allComments = allComments.concat(items);
|
|
cursor = data.meta?.nextCursor || null;
|
|
} while (cursor);
|
|
return allComments.map((comment) => {
|
|
const markdown = comment.content
|
|
? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content))
|
|
: "";
|
|
return filterComment(comment, markdown);
|
|
});
|
|
}
|
|
async getComment(commentId) {
|
|
await this.ensureAuthenticated();
|
|
const response = await this.client.post("/comments/info", { commentId });
|
|
const comment = response.data.data || response.data;
|
|
const markdown = comment.content
|
|
? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content))
|
|
: "";
|
|
return {
|
|
data: filterComment(comment, markdown),
|
|
success: true,
|
|
};
|
|
}
|
|
/**
|
|
* Create an inline comment anchored to its `selection` text, or a reply.
|
|
*
|
|
* Top-level comments (no `parentCommentId`) are ALWAYS inline and MUST carry a
|
|
* `selection`: the `type` argument is kept for interface compatibility but the
|
|
* effective type is coerced to "inline". The selection has to anchor in the
|
|
* document; if it cannot, the comment is rolled back and an error is thrown so
|
|
* the caller is forced to supply a proper inline selection rather than leaving
|
|
* an orphan, unanchored comment behind. Replies (parentCommentId set) inherit
|
|
* their parent's anchor: they take NO selection and are not anchored.
|
|
*/
|
|
async createComment(pageId, content, type = "page", selection, parentCommentId) {
|
|
await this.ensureAuthenticated();
|
|
const isReply = !!parentCommentId;
|
|
// Only top-level comments are inline-anchored, so they are stored as
|
|
// "inline". Replies carry no inline selection, so they keep the historical
|
|
// general ("page") type — both backward-compatible and semantically correct.
|
|
// The `type` argument is kept for interface compatibility; createComment
|
|
// normalizes the effective type internally, so callers may pass "inline".
|
|
const effectiveType = isReply ? "page" : "inline";
|
|
if (!isReply && (!selection || !selection.trim())) {
|
|
throw new Error("create_comment: an inline 'selection' (exact text to anchor on) is required for a top-level comment");
|
|
}
|
|
// For a top-level comment, fail BEFORE creating anything when the selection
|
|
// is not present in the persisted document — this avoids leaving an orphan
|
|
// comment + notification behind. A read failure (network) is non-fatal: the
|
|
// live anchor step below still enforces the anchoring invariant.
|
|
if (!isReply && selection) {
|
|
try {
|
|
const page = await this.getPageJson(pageId);
|
|
if (!canAnchorInDoc(page.content, selection)) {
|
|
throw new Error("create_comment: could not find the selection text in the page to anchor the comment. " +
|
|
"Provide the EXACT contiguous text from a single paragraph/block (<=250 chars).");
|
|
}
|
|
}
|
|
catch (e) {
|
|
// Rethrow our own "not found" error; swallow read/network errors so the
|
|
// live anchor step can still try (and enforce) the anchoring.
|
|
if (e instanceof Error &&
|
|
e.message.startsWith("create_comment: could not find the selection")) {
|
|
throw e;
|
|
}
|
|
if (process.env.DEBUG) {
|
|
console.error("Pre-check getPageJson failed; deferring to live anchor step:", e);
|
|
}
|
|
}
|
|
}
|
|
// Convert through the full Docmost schema. Deliberately the NON-canonicalizing
|
|
// variant: a comment body may carry a footnote definition with no matching
|
|
// reference, and canonicalization would drop it (data loss). See
|
|
// markdownToProseMirror vs markdownToProseMirrorCanonical.
|
|
const jsonContent = await markdownToProseMirror(content);
|
|
const payload = {
|
|
pageId,
|
|
content: JSON.stringify(jsonContent),
|
|
type: effectiveType,
|
|
};
|
|
if (!isReply && selection)
|
|
payload.selection = selection;
|
|
if (parentCommentId)
|
|
payload.parentCommentId = parentCommentId;
|
|
const response = await this.client.post("/comments/create", payload);
|
|
const comment = response.data.data || response.data;
|
|
const markdown = comment.content
|
|
? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content))
|
|
: content;
|
|
const result = {
|
|
data: filterComment(comment, markdown),
|
|
success: true,
|
|
};
|
|
// Replies inherit the parent's anchor: no selection, no anchoring.
|
|
if (isReply) {
|
|
return result;
|
|
}
|
|
// Anchor the comment in the document. The /comments/create API records the
|
|
// comment + its `selection` text, but it does NOT insert the comment MARK
|
|
// into the page content, so without this the inline comment has no
|
|
// highlight/anchor and is not clickable. If anchoring fails the comment is
|
|
// rolled back (deleted) and an error is thrown — never an orphan comment.
|
|
const newCommentId = comment.id;
|
|
// Guard: a create response without an id would mean writing a comment mark
|
|
// with commentId: undefined and a later delete of a falsy id. We have no id
|
|
// to roll back here (nothing was created with an id), so just fail loudly.
|
|
if (!newCommentId) {
|
|
throw new Error("create_comment: the server returned no comment id, so the comment could not be anchored");
|
|
}
|
|
let anchored = false;
|
|
try {
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
const doc = liveDoc && liveDoc.type === "doc"
|
|
? liveDoc
|
|
: { type: "doc", content: [] };
|
|
if (applyAnchorInDoc(doc, selection, newCommentId)) {
|
|
anchored = true;
|
|
return doc;
|
|
}
|
|
// Selection text not found in the LIVE document: abort the write. The
|
|
// rollback + throw below turns this into a hard error.
|
|
return null;
|
|
});
|
|
result.verify = mutation.verify;
|
|
}
|
|
catch (e) {
|
|
// The comment record already exists; roll it back so we never leave an
|
|
// orphan, then rethrow the original anchoring error.
|
|
await this.safeDeleteComment(newCommentId);
|
|
throw e;
|
|
}
|
|
if (!anchored) {
|
|
// Mutation aborted because the selection was not found in the live
|
|
// document. Roll back the comment and surface a hard error.
|
|
await this.safeDeleteComment(newCommentId);
|
|
throw new Error("create_comment: failed to anchor the comment (selection not found in the live document); the comment was rolled back");
|
|
}
|
|
result.anchored = true;
|
|
return result;
|
|
}
|
|
/**
|
|
* Best-effort rollback of a just-created comment. Swallows any delete failure
|
|
* (logging under DEBUG) so a failed cleanup never masks the original error.
|
|
*/
|
|
async safeDeleteComment(commentId) {
|
|
// Defense in depth: never call the delete API with a falsy id — there is
|
|
// nothing to roll back, and deleteComment(undefined) would hit a bad route.
|
|
if (!commentId)
|
|
return;
|
|
try {
|
|
await this.deleteComment(commentId);
|
|
}
|
|
catch (delErr) {
|
|
if (process.env.DEBUG) {
|
|
console.error("Failed to roll back comment after anchoring error:", delErr);
|
|
}
|
|
}
|
|
}
|
|
async updateComment(commentId, content) {
|
|
await this.ensureAuthenticated();
|
|
// NON-canonicalizing on purpose (comment body — see createComment).
|
|
const jsonContent = await markdownToProseMirror(content);
|
|
await this.client.post("/comments/update", {
|
|
commentId,
|
|
content: JSON.stringify(jsonContent),
|
|
});
|
|
return {
|
|
success: true,
|
|
commentId,
|
|
message: "Comment updated successfully.",
|
|
};
|
|
}
|
|
async deleteComment(commentId) {
|
|
await this.ensureAuthenticated();
|
|
return this.client
|
|
.post("/comments/delete", { commentId })
|
|
.then((res) => res.data);
|
|
}
|
|
/**
|
|
* Resolve or reopen a top-level comment thread (reversible — `resolved`
|
|
* toggles the state). Only top-level comments can be resolved; the server
|
|
* rejects resolving a reply. Hits POST /comments/resolve.
|
|
*/
|
|
async resolveComment(commentId, resolved) {
|
|
await this.ensureAuthenticated();
|
|
const response = await this.client.post("/comments/resolve", {
|
|
commentId,
|
|
resolved,
|
|
});
|
|
const comment = response.data?.data ?? response.data;
|
|
return {
|
|
success: true,
|
|
commentId,
|
|
resolved,
|
|
comment,
|
|
};
|
|
}
|
|
/**
|
|
* Check for new comments across pages in a space (optionally scoped to a
|
|
* subtree): pages updated after `since` are scanned and their comments
|
|
* filtered by createdAt > since.
|
|
*/
|
|
async checkNewComments(spaceId, since, parentPageId) {
|
|
await this.ensureAuthenticated();
|
|
const sinceDate = new Date(since);
|
|
// Reject an unparseable `since`: comparing against an Invalid Date silently
|
|
// yields zero new comments (every `>` against NaN is false), which would
|
|
// mask a malformed input as "nothing new" instead of erroring.
|
|
if (Number.isNaN(sinceDate.getTime())) {
|
|
throw new Error(`checkNewComments: invalid "since" date "${since}"; expected an ISO-8601 timestamp`);
|
|
}
|
|
// 1. Enumerate the FULL set of pages in scope by walking the sidebar-pages
|
|
// tree (a complete page index), NOT the bounded "/pages/recent" feed which
|
|
// caps at ~5000 recent items and silently misses comments on older pages.
|
|
//
|
|
// Subtree scope: when parentPageId is given, the scope is that page ITSELF
|
|
// plus every descendant (enumerateSpacePages walks its children). Otherwise
|
|
// the scope is the whole space (all roots and their descendants).
|
|
//
|
|
// NOTE: do NOT pre-filter by page.updatedAt — creating a comment does not
|
|
// bump it (verified on a live server), so such a filter silently misses
|
|
// comments on pages that were not otherwise edited. The complete tree walk
|
|
// already restricts the scope correctly, so no recent-feed allow-list is
|
|
// needed any more.
|
|
let pagesInScope;
|
|
if (parentPageId) {
|
|
const subtree = await this.enumerateSpacePages(spaceId, parentPageId);
|
|
// Include the parent page node itself alongside its descendants. Fetch it
|
|
// so its title/id are available even though it is not returned by its own
|
|
// children listing.
|
|
let parentNode = { id: parentPageId };
|
|
try {
|
|
parentNode = await this.getPageRaw(parentPageId);
|
|
}
|
|
catch (e) {
|
|
// Fall back to a minimal node if the parent can't be fetched; its
|
|
// comments are still attempted below (the fetch there is non-fatal).
|
|
}
|
|
pagesInScope = [parentNode, ...subtree];
|
|
}
|
|
else {
|
|
pagesInScope = await this.enumerateSpacePages(spaceId);
|
|
}
|
|
// 2. Fetch comments for each page, keep ones created after since
|
|
const results = [];
|
|
for (const page of pagesInScope) {
|
|
try {
|
|
const comments = await this.listComments(page.id);
|
|
const newComments = comments.filter((c) => new Date(c.createdAt) > sinceDate);
|
|
if (newComments.length > 0) {
|
|
results.push({
|
|
pageId: page.id,
|
|
pageTitle: page.title,
|
|
comments: newComments,
|
|
});
|
|
}
|
|
}
|
|
catch (e) {
|
|
// Skip pages with errors (e.g. deleted between calls)
|
|
}
|
|
}
|
|
const totalNewComments = results.reduce((sum, r) => sum + r.comments.length, 0);
|
|
// enumerateSpacePages caps traversal at 10000 nodes; flag when that cap was
|
|
// hit so the caller knows the scan may be incomplete (some pages skipped).
|
|
const truncated = pagesInScope.length >= 10000;
|
|
return {
|
|
since,
|
|
scope: parentPageId ? `subtree of ${parentPageId}` : `space ${spaceId}`,
|
|
checkedPages: pagesInScope.length,
|
|
pagesWithNewComments: results.length,
|
|
totalNewComments,
|
|
truncated,
|
|
comments: results,
|
|
};
|
|
}
|
|
// --- Image upload / embedding ---
|
|
/** Map a Content-Type string to a supported MIME type, or null if unsupported. */
|
|
supportedImageMime(ct) {
|
|
return MIME_TO_EXT[ct] ? ct : null;
|
|
}
|
|
/**
|
|
* Download a remote image from a caller-supplied URL and resolve its bytes,
|
|
* MIME and a filename.
|
|
*
|
|
* SSRF / RESOURCE TRUST BOUNDARY: the URL comes from the MCP caller and is
|
|
* fetched BY THE SERVER, so it must be guarded before and after the request.
|
|
* The guards mirror the local-file trust boundary in uploadImage:
|
|
* - scheme allowlist (http/https only) — rejects file:, data:, ftp:, etc.,
|
|
* so the caller cannot use this path to read local files or other schemes;
|
|
* - a size cap enforced both via axios maxContentLength/maxBodyLength AND a
|
|
* post-download buffer.length re-check (defends against a missing/lying
|
|
* Content-Length), so a huge response cannot exhaust memory;
|
|
* - a 30s timeout. The timeout matters because replaceImage holds the
|
|
* per-page lock across this upload, so a hung download would wedge the
|
|
* lock for that page.
|
|
* We deliberately do NOT block private IP ranges: the MCP caller is already
|
|
* trusted to read arbitrary host files via the filePath path, so the marginal
|
|
* trust granted by fetching internal URLs is comparable, and blocking would
|
|
* break legitimate internal-image use.
|
|
*/
|
|
async fetchRemoteImage(url, maxBytes) {
|
|
// Scheme allowlist first — cheapest guard, and rejects non-http(s) schemes
|
|
// (file:, data:, ftp:, ...) before any network request is made.
|
|
let parsed;
|
|
try {
|
|
parsed = new URL(url);
|
|
}
|
|
catch (e) {
|
|
throw new Error(`Invalid image URL "${url}": ${e.message}`);
|
|
}
|
|
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
throw new Error(`unsupported image URL scheme "${parsed.protocol}"; only http and https are allowed`);
|
|
}
|
|
let response;
|
|
try {
|
|
response = await axios.get(url, {
|
|
responseType: "arraybuffer",
|
|
timeout: 30000,
|
|
maxContentLength: maxBytes,
|
|
maxBodyLength: maxBytes,
|
|
headers: { Accept: "image/*" },
|
|
});
|
|
}
|
|
catch (error) {
|
|
// Keep the thrown message free of the raw response body (it may echo
|
|
// server internals); surface only status/statusText. The full body is
|
|
// logged under DEBUG for diagnostics.
|
|
if (axios.isAxiosError(error)) {
|
|
if (process.env.DEBUG) {
|
|
console.error("Image download failed; response body:", JSON.stringify(error.response?.data));
|
|
}
|
|
throw new Error(`Image download failed for "${url}": ${error.response?.status ?? ""} ${error.response?.statusText ?? error.message}`.trim());
|
|
}
|
|
throw error;
|
|
}
|
|
// axios returns an ArrayBuffer for responseType: "arraybuffer".
|
|
const buffer = Buffer.from(response.data);
|
|
// Re-check the size: maxContentLength relies on Content-Length, which may be
|
|
// absent or lie, so guard against the actual byte count too.
|
|
if (buffer.length === 0) {
|
|
throw new Error(`Empty image response from "${url}"`);
|
|
}
|
|
if (buffer.length > maxBytes) {
|
|
throw new Error(`Image too large: ${buffer.length} bytes exceeds the ${maxBytes}-byte cap`);
|
|
}
|
|
// Resolve MIME: prefer the response Content-Type (strip any "; charset=..."
|
|
// parameter, lowercase, trim) mapped through the supported set; if the
|
|
// header is generic/missing/unsupported, fall back to the URL path
|
|
// extension via the existing extension->MIME logic.
|
|
const rawCt = response.headers?.["content-type"];
|
|
let mime = null;
|
|
if (typeof rawCt === "string" && rawCt.length > 0) {
|
|
const ct = rawCt.split(";")[0].trim().toLowerCase();
|
|
mime = this.supportedImageMime(ct);
|
|
}
|
|
if (!mime) {
|
|
// Fall back to the URL path extension. Use the pathname so the query
|
|
// string never contaminates the extension lookup.
|
|
const ext = extname(parsed.pathname).toLowerCase();
|
|
mime = EXT_TO_MIME[ext] ?? null;
|
|
}
|
|
if (!mime) {
|
|
throw new Error(`cannot determine supported image type for "${url}"; supported: png, jpg, jpeg, gif, webp, svg`);
|
|
}
|
|
// Build a filename from the URL path basename (ignore the query string),
|
|
// defaulting to "image" when empty, and ensure it ends with the canonical
|
|
// extension for the resolved MIME (append it when missing/mismatched).
|
|
const canonicalExt = MIME_TO_EXT[mime];
|
|
let fileName = basename(parsed.pathname) || "image";
|
|
if (extname(fileName).toLowerCase() !== canonicalExt) {
|
|
fileName += canonicalExt;
|
|
}
|
|
return { buffer, mime, fileName };
|
|
}
|
|
/** Build a Docmost ProseMirror image node from an uploaded attachment. */
|
|
buildImageNode(att, align, alt) {
|
|
// Clean file URL, matching Docmost's native behaviour. No cache-busting
|
|
// query: the server serves the bare URL correctly, and replacement creates
|
|
// a new attachment id (a new URL) which busts caches naturally.
|
|
const src = `/api/files/${att.id}/${att.fileName}`;
|
|
const node = {
|
|
type: "image",
|
|
attrs: {
|
|
src,
|
|
attachmentId: att.id,
|
|
// Default to null when the server omits fileSize so the attr is never
|
|
// undefined (undefined would be dropped on serialization / break the
|
|
// ProseMirror image schema which expects size present).
|
|
size: att.fileSize ?? null,
|
|
align: align || "center",
|
|
width: null,
|
|
},
|
|
};
|
|
if (alt)
|
|
node.attrs.alt = alt;
|
|
return node;
|
|
}
|
|
/**
|
|
* Download a remote image from an http(s) URL and upload it as an attachment
|
|
* of a page, returning the attachment metadata plus a ready-to-insert
|
|
* ProseMirror image node. Local file paths are intentionally not supported:
|
|
* the MCP caller is a remote AI with no access to this server's filesystem.
|
|
*/
|
|
async uploadImage(pageId, url) {
|
|
await this.ensureAuthenticated();
|
|
const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB
|
|
// Fetch + validate the remote image (scheme allowlist, size cap, timeout).
|
|
// See fetchRemoteImage for the SSRF / resource trust boundary.
|
|
const fetched = await this.fetchRemoteImage(url, MAX_IMAGE_BYTES);
|
|
const fileBuffer = fetched.buffer;
|
|
const mime = fetched.mime;
|
|
const fileName = fetched.fileName;
|
|
// Build a FRESH FormData for every send attempt. A FormData body is a
|
|
// single-use stream that is CONSUMED on the first send, so it cannot be
|
|
// replayed by this.client's response interceptor (replaying a consumed
|
|
// stream fails with 'socket hang up'). Multipart re-auth is therefore done
|
|
// here with bare axios and an explicit one-shot 401/403 retry that rebuilds
|
|
// the body. Field order matters: text fields must precede the file part so
|
|
// the server reads them; the server always generates a fresh attachment id.
|
|
const buildForm = () => {
|
|
const form = new FormData();
|
|
form.append("pageId", pageId);
|
|
form.append("file", fileBuffer, {
|
|
filename: fileName,
|
|
contentType: mime,
|
|
});
|
|
return form;
|
|
};
|
|
// Local name distinct from the `url` parameter (the source image URL): this
|
|
// is the /files/upload endpoint we POST the multipart body to.
|
|
const uploadUrl = `${this.apiUrl}/files/upload`;
|
|
let response;
|
|
try {
|
|
// Call buildForm() ONCE per attempt and reuse the instance for both
|
|
// getHeaders() and the body so the Content-Type boundary matches the body.
|
|
const form = buildForm();
|
|
// Read the Authorization header from this.client's defaults (set by
|
|
// login(), only ever deleted — never set to null) instead of building
|
|
// `Bearer ${this.token}`: a concurrent JSON 401 can null this.token
|
|
// mid-flight, which would otherwise produce a literal "Bearer null".
|
|
// ensureAuthenticated() above guarantees login() ran, so the default
|
|
// header exists here. A 60s timeout keeps a hung upload from wedging the
|
|
// per-page lock (replaceImage holds withPageLock across this call).
|
|
response = await axios.post(uploadUrl, form, {
|
|
headers: {
|
|
...form.getHeaders(),
|
|
Authorization: this.client.defaults.headers.common["Authorization"],
|
|
},
|
|
timeout: 60000,
|
|
});
|
|
}
|
|
catch (error) {
|
|
// On an expired-token auth error, re-login and retry exactly once with a
|
|
// freshly-rebuilt FormData (the previous one was already consumed).
|
|
if (axios.isAxiosError(error) &&
|
|
(error.response?.status === 401 || error.response?.status === 403)) {
|
|
await this.login();
|
|
const form2 = buildForm();
|
|
response = await axios.post(uploadUrl, form2, {
|
|
headers: {
|
|
...form2.getHeaders(),
|
|
Authorization: this.client.defaults.headers.common["Authorization"],
|
|
},
|
|
timeout: 60000,
|
|
});
|
|
}
|
|
else if (axios.isAxiosError(error)) {
|
|
// Keep the thrown message free of the raw response body (it may echo
|
|
// request data or server internals); surface only status/statusText.
|
|
// The full body is logged under DEBUG for diagnostics.
|
|
if (process.env.DEBUG) {
|
|
console.error("Image upload failed; response body:", JSON.stringify(error.response?.data));
|
|
}
|
|
throw new Error(`Image upload failed: ${error.response?.status} ${error.response?.statusText}`);
|
|
}
|
|
else {
|
|
throw error;
|
|
}
|
|
}
|
|
// The attachment may arrive bare or wrapped in a { data } envelope.
|
|
const att = response.data?.data ?? response.data;
|
|
if (!att?.id || !att?.fileName) {
|
|
throw new Error("Unexpected /files/upload response: " + JSON.stringify(response.data));
|
|
}
|
|
// Some Docmost versions omit fileSize from the upload response. Fall back
|
|
// to the fetched byte length (the bytes we just uploaded) so callers never
|
|
// get an undefined size.
|
|
const resolvedSize = att.fileSize ?? fileBuffer.length;
|
|
return {
|
|
attachmentId: att.id,
|
|
fileName: att.fileName,
|
|
fileSize: resolvedSize,
|
|
src: `/api/files/${att.id}/${att.fileName}`,
|
|
imageNode: this.buildImageNode({ ...att, fileSize: resolvedSize }),
|
|
};
|
|
}
|
|
/**
|
|
* Upload an image from a web (http/https) URL and insert it into a page in
|
|
* one step.
|
|
* By default the image is appended at the end. With replaceText, the first
|
|
* top-level block whose text contains the string is replaced; with afterText,
|
|
* the image is inserted right after the first matching block. All other
|
|
* block ids are preserved (only one top-level block is added or swapped).
|
|
*/
|
|
async insertImage(pageId, url, opts = {}) {
|
|
const up = await this.uploadImage(pageId, url);
|
|
// Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then
|
|
// apply align/alt onto a shallow attrs copy.
|
|
const node = { ...up.imageNode, attrs: { ...up.imageNode.attrs } };
|
|
if (opts.align)
|
|
node.attrs.align = opts.align;
|
|
if (opts.alt)
|
|
node.attrs.alt = opts.alt;
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Recursively collect the plain text of a top-level block.
|
|
const blockText = (n) => {
|
|
let out = "";
|
|
if (n.type === "text")
|
|
out += n.text || "";
|
|
for (const child of n.content || [])
|
|
out += blockText(child);
|
|
return out;
|
|
};
|
|
// Insert into the LIVE synced document, not the debounced REST snapshot, so
|
|
// concurrent edits/comments/images are preserved and parallel insert_image
|
|
// calls (serialized by the per-page lock) each see the previous insertion.
|
|
let placement;
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
|
const doc = liveDoc && liveDoc.type === "doc"
|
|
? liveDoc
|
|
: { type: "doc", content: [] };
|
|
if (!Array.isArray(doc.content))
|
|
doc.content = [];
|
|
if (opts.replaceText) {
|
|
// Ambiguity guard (mirrors editPageText): count matching top-level
|
|
// blocks first, so a non-unique fragment cannot silently replace the
|
|
// wrong block (e.g. text that also appears inside a callout/table).
|
|
const matches = doc.content.filter((b) => blockText(b).includes(opts.replaceText));
|
|
if (matches.length === 0) {
|
|
throw new Error(`replaceText not found: "${opts.replaceText}"`);
|
|
}
|
|
if (matches.length > 1) {
|
|
throw new Error(`replaceText "${opts.replaceText}" matches ${matches.length} blocks; use a longer unique fragment`);
|
|
}
|
|
const idx = doc.content.findIndex((b) => blockText(b).includes(opts.replaceText));
|
|
// Data-loss guard: replaceText swaps the WHOLE top-level block, so if
|
|
// the fragment only appears nested inside a container (table, callout,
|
|
// list, blockquote) the entire structure would be destroyed. Refuse
|
|
// when the matched block is a container rather than a leaf
|
|
// paragraph/heading and point the caller at a safer tool.
|
|
const CONTAINER_TYPES = new Set([
|
|
"table",
|
|
"callout",
|
|
"bulletList",
|
|
"orderedList",
|
|
"taskList",
|
|
"blockquote",
|
|
]);
|
|
const matchedBlock = doc.content[idx];
|
|
if (matchedBlock && CONTAINER_TYPES.has(matchedBlock.type)) {
|
|
throw new Error(`replaceText matched a ${matchedBlock.type} container block; replacing it would destroy the whole structure. ` +
|
|
`Use afterText to insert near it, or update_page_json for surgical edits.`);
|
|
}
|
|
doc.content.splice(idx, 1, node);
|
|
placement = "replaced";
|
|
}
|
|
else if (opts.afterText) {
|
|
// Ambiguity guard (mirrors editPageText): refuse a non-unique fragment.
|
|
const matches = doc.content.filter((b) => blockText(b).includes(opts.afterText));
|
|
if (matches.length === 0) {
|
|
throw new Error(`afterText not found: "${opts.afterText}"`);
|
|
}
|
|
if (matches.length > 1) {
|
|
throw new Error(`afterText "${opts.afterText}" matches ${matches.length} blocks; use a longer unique fragment`);
|
|
}
|
|
const idx = doc.content.findIndex((b) => blockText(b).includes(opts.afterText));
|
|
doc.content.splice(idx + 1, 0, node);
|
|
placement = "after";
|
|
}
|
|
else {
|
|
doc.content.push(node);
|
|
placement = "appended";
|
|
}
|
|
return doc;
|
|
});
|
|
return {
|
|
success: true,
|
|
pageId,
|
|
attachmentId: up.attachmentId,
|
|
src: up.src,
|
|
placement,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Replace an existing image in a page with a new image fetched from a web
|
|
* (http/https) URL. Uploads the new file as a brand-new attachment, which
|
|
* yields a fresh clean URL that both renders correctly and busts browser
|
|
* caches (the URL changed). Finds every image node
|
|
* whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested
|
|
* in callouts/tables) and repoints its src/attachmentId/size, preserving
|
|
* comments, alignment and alt. Operates on the live collab document so comments
|
|
* and concurrent edits are preserved. Throws if no matching image is found.
|
|
*
|
|
* The OLD attachment is left in place as an unreferenced orphan: Docmost
|
|
* exposes NO HTTP API to delete a single content attachment (verified against
|
|
* the attachment controller/service and by probing the live API — deletion
|
|
* happens only by cascade when the page, space or user is removed). This is the
|
|
* same outcome as Docmost's own editor when an image is removed/replaced.
|
|
* In-place byte overwrite is deliberately NOT used because some Docmost
|
|
* versions corrupt the attachment (HTTP 500) when its bytes are overwritten.
|
|
*/
|
|
async replaceImage(pageId, oldAttachmentId, url, opts = {}) {
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
// Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write).
|
|
// Previously the scan and the write were two separate mutatePageContent
|
|
// calls, each acquiring + releasing the lock, with the upload happening in
|
|
// the UNLOCKED gap between them. A concurrent op could interleave there: it
|
|
// could remove the target image so the write pass matches nothing, leaving
|
|
// the freshly-uploaded attachment as an un-deletable orphan (Docmost has no
|
|
// API to delete a single content attachment). Acquiring the lock once and
|
|
// using the non-locking collab helper inside (the per-page mutex is NOT
|
|
// reentrant, so the self-locking mutatePageContent would deadlock here)
|
|
// closes that TOCTOU window. uploadImage hits /files/upload over plain HTTP
|
|
// and does not touch the page lock, so it is safe to call while held.
|
|
return withPageLock(pageId, async () => {
|
|
// STEP 1: read-only live check. Scan the live document for any image node
|
|
// matching oldAttachmentId BEFORE uploading anything, so a wrong/stale id
|
|
// throws without ever creating an orphan attachment.
|
|
let matchFound = false;
|
|
const scan = (nodes) => {
|
|
for (const node of nodes) {
|
|
if (!node)
|
|
continue;
|
|
if (node.type === "image" &&
|
|
node.attrs &&
|
|
node.attrs.attachmentId === oldAttachmentId) {
|
|
matchFound = true;
|
|
}
|
|
if (Array.isArray(node.content))
|
|
scan(node.content);
|
|
}
|
|
};
|
|
await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => {
|
|
matchFound = false; // reset per-transform (collab may retry the read).
|
|
const doc = liveDoc && liveDoc.type === "doc"
|
|
? liveDoc
|
|
: { type: "doc", content: [] };
|
|
if (Array.isArray(doc.content))
|
|
scan(doc.content);
|
|
return null; // read-only: never write on the check pass.
|
|
});
|
|
if (!matchFound) {
|
|
throw new Error(`replace_image: no image with attachmentId "${oldAttachmentId}" found on page ${pageId}`);
|
|
}
|
|
// STEP 2: a match exists — upload the new file as a FRESH attachment (new
|
|
// id, new clean URL) and repoint every matching node in a second pass.
|
|
// Still inside the SAME lock, so no other op can have changed the page
|
|
// since the scan.
|
|
const up = await this.uploadImage(pageId, url);
|
|
let replaced = 0;
|
|
// Swap the source of one image node, preserving align/alt/title/geometry.
|
|
const repoint = (node) => {
|
|
node.attrs = {
|
|
...node.attrs,
|
|
src: up.src,
|
|
attachmentId: up.attachmentId,
|
|
// Default to null when fileSize is unknown so the attr is never
|
|
// undefined.
|
|
size: up.fileSize ?? null,
|
|
};
|
|
if (opts.align)
|
|
node.attrs.align = opts.align;
|
|
if (opts.alt !== undefined)
|
|
node.attrs.alt = opts.alt;
|
|
replaced++;
|
|
};
|
|
// Recursively repoint every image node (incl. ones nested in callouts/tables).
|
|
const walk = (nodes) => {
|
|
for (const node of nodes) {
|
|
if (!node)
|
|
continue;
|
|
if (node.type === "image" &&
|
|
node.attrs &&
|
|
node.attrs.attachmentId === oldAttachmentId) {
|
|
repoint(node);
|
|
}
|
|
if (Array.isArray(node.content))
|
|
walk(node.content);
|
|
}
|
|
};
|
|
const mutation = await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => {
|
|
// Reset per-transform so collab retries recompute cleanly (no double-count).
|
|
replaced = 0;
|
|
const doc = liveDoc && liveDoc.type === "doc"
|
|
? liveDoc
|
|
: { type: "doc", content: [] };
|
|
if (!Array.isArray(doc.content))
|
|
doc.content = [];
|
|
walk(doc.content);
|
|
if (replaced === 0)
|
|
return null; // no match -> skip the write entirely
|
|
return doc;
|
|
});
|
|
// KNOWN LIMITATION: a same-count image SRC swap (image count unchanged, no
|
|
// text/mark change) may still report verify.changed === false, because the
|
|
// text+marks+integrity-count model in summarizeChange does not inspect
|
|
// image `src`/attachmentId attributes. That is acceptable here — the
|
|
// replace is confirmed by `replaced` below, and verify is supplementary.
|
|
if (replaced === 0) {
|
|
// The pass-1 SCAN found the target (matchFound was true) and we already
|
|
// uploaded the new attachment, but pass-2 matched nothing — a concurrent
|
|
// editor must have removed the node between the two passes. Do NOT throw
|
|
// here (that would leak the just-uploaded attachment AND report failure);
|
|
// instead report success with the upload flagged as an unreferenced
|
|
// orphan so the caller knows. (The early throw above still covers the
|
|
// case where pass-1 finds nothing, before any upload happens.)
|
|
return {
|
|
success: true,
|
|
replaced: 0,
|
|
pageId,
|
|
oldAttachmentId,
|
|
newAttachmentId: up.attachmentId,
|
|
src: up.src,
|
|
orphanedAttachmentId: up.attachmentId,
|
|
warning: "target image was removed concurrently; uploaded attachment is unreferenced",
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
return {
|
|
success: true,
|
|
pageId,
|
|
replaced,
|
|
oldAttachmentId,
|
|
newAttachmentId: up.attachmentId,
|
|
src: up.src,
|
|
verify: mutation.verify,
|
|
};
|
|
});
|
|
}
|
|
// --- Page history / diff / transform ---
|
|
/**
|
|
* List the saved versions (history snapshots) of a page, newest first.
|
|
* Docmost auto-snapshots on every save. Returns one cursor-paginated page of
|
|
* results: `{ items, nextCursor }`. The history record's id field is `id`.
|
|
*/
|
|
async listPageHistory(pageId, cursor) {
|
|
await this.ensureAuthenticated();
|
|
const payload = { pageId };
|
|
if (cursor)
|
|
payload.cursor = cursor;
|
|
const response = await this.client.post("/pages/history", payload);
|
|
const data = response.data?.data ?? response.data;
|
|
return {
|
|
items: data?.items ?? [],
|
|
nextCursor: data?.meta?.nextCursor ?? null,
|
|
};
|
|
}
|
|
/**
|
|
* Fetch a single page-history version including its lossless ProseMirror
|
|
* `content`. The version also carries pageId/title/createdAt.
|
|
*/
|
|
async getPageHistory(historyId) {
|
|
await this.ensureAuthenticated();
|
|
const response = await this.client.post("/pages/history/info", {
|
|
historyId,
|
|
});
|
|
return response.data?.data ?? response.data;
|
|
}
|
|
/**
|
|
* "Restore" a version: Docmost has NO restore endpoint, so we take the
|
|
* version's `content` and write it as the page's current content via the live
|
|
* collab path (which itself creates a new history snapshot). Returns the
|
|
* affected pageId and the source historyId.
|
|
*/
|
|
async restorePageVersion(historyId) {
|
|
await this.ensureAuthenticated();
|
|
const version = await this.getPageHistory(historyId);
|
|
if (!version ||
|
|
!version.pageId ||
|
|
!version.content ||
|
|
typeof version.content !== "object") {
|
|
throw new Error(`restore_page_version: history ${historyId} has no usable content`);
|
|
}
|
|
// Defense-in-depth: sanitize URLs in the restored content (parity with the
|
|
// JSON write path) before writing it back.
|
|
this.validateDocUrls(version.content);
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
const mutation = await mutatePageContent(version.pageId, collabToken, this.apiUrl, () => version.content);
|
|
return {
|
|
pageId: version.pageId,
|
|
restoredFrom: historyId,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
/**
|
|
* Diff two versions of a page and return a Docmost-equivalent change set.
|
|
* `from`/`to` each resolve to a ProseMirror doc:
|
|
* - null / undefined / "current" -> the page's CURRENT content;
|
|
* - any other string -> that historyId's content.
|
|
* Returns the diff plus the resolved version metadata for each side.
|
|
*/
|
|
async diffPageVersions(pageId, from, to) {
|
|
await this.ensureAuthenticated();
|
|
const isCurrent = (v) => v == null || v === "" || v === "current";
|
|
const resolveSide = async (v) => {
|
|
if (isCurrent(v)) {
|
|
const raw = await this.getPageRaw(pageId);
|
|
return {
|
|
doc: raw.content || { type: "doc", content: [] },
|
|
meta: {
|
|
kind: "current",
|
|
pageId,
|
|
title: raw.title,
|
|
updatedAt: raw.updatedAt,
|
|
},
|
|
};
|
|
}
|
|
const version = await this.getPageHistory(v);
|
|
return {
|
|
doc: version.content || { type: "doc", content: [] },
|
|
meta: {
|
|
kind: "history",
|
|
historyId: version.id,
|
|
pageId: version.pageId,
|
|
title: version.title,
|
|
createdAt: version.createdAt,
|
|
},
|
|
};
|
|
};
|
|
const fromSide = await resolveSide(from);
|
|
const toSide = await resolveSide(to);
|
|
const diff = diffDocs(fromSide.doc, toSide.doc);
|
|
return { from: fromSide.meta, to: toSide.meta, diff };
|
|
}
|
|
/**
|
|
* Edit a page by running an arbitrary user-supplied JS transform against the
|
|
* live document, with a diff preview + page-history safety net.
|
|
*
|
|
* The transform string is evaluated as `(doc, ctx) => doc` inside a node:vm
|
|
* sandbox: it gets ONLY `{ doc, ctx, structuredClone, console }` as globals,
|
|
* a 5s timeout, and NO access to require/process/fs/network. It must return a
|
|
* `{ type: "doc" }` node, which is validated structurally before any write.
|
|
*
|
|
* `ctx` exposes:
|
|
* - comments: the page's comments (fetched before the live read);
|
|
* - log: an array the transform can push diagnostics to (via console.log);
|
|
* - consume(id): mark a comment id as consumed (for deleteComments);
|
|
* - helpers: the transforms.ts primitives + commentsToFootnotes.
|
|
*
|
|
* Footnote convention used by the helpers: footnote markers are plain "[N]"
|
|
* text in the body, and the notes are an orderedList under a heading whose
|
|
* text is "Примечания переводчика".
|
|
*
|
|
* dryRun (default true): read the page's current content, run the transform,
|
|
* and return `{ pushed:false, diff, log }` WITHOUT opening the collab socket.
|
|
* Otherwise the transform runs atomically inside mutatePageContent, optionally
|
|
* deletes consumed comments, and returns the new historyId + diff + log.
|
|
*/
|
|
async transformPage(pageId, transformJs, opts = {}) {
|
|
const dryRun = opts.dryRun ?? true;
|
|
const deleteComments = opts.deleteComments ?? false;
|
|
await this.ensureAuthenticated();
|
|
const comments = await this.listComments(pageId);
|
|
// ctx handed to the sandbox. consume() records ids; helpers are the pure
|
|
// transform primitives. log is captured from console.log inside the sandbox.
|
|
const ctx = {
|
|
comments,
|
|
log: [],
|
|
consumed: new Set(),
|
|
consume(id) {
|
|
this.consumed.add(id);
|
|
},
|
|
helpers: {
|
|
blockText,
|
|
walk,
|
|
getList,
|
|
insertMarkerAfter,
|
|
setCalloutRange,
|
|
noteItem,
|
|
mdToInlineNodes,
|
|
commentsToFootnotes,
|
|
canonicalizeFootnotes,
|
|
insertInlineFootnote,
|
|
},
|
|
};
|
|
// Captured oldDoc / newDoc for the diff (set inside runTransform).
|
|
let oldDoc;
|
|
let newDoc;
|
|
// SYNCHRONOUS transform runner — safe to call inside mutatePageContent's
|
|
// onSynced (no await between the live read and the write).
|
|
const runTransform = (liveDoc) => {
|
|
oldDoc = structuredClone(liveDoc);
|
|
const sandbox = {
|
|
doc: structuredClone(liveDoc),
|
|
ctx,
|
|
structuredClone,
|
|
console: {
|
|
log: (...a) => ctx.log.push(a.map((x) => String(x)).join(" ")),
|
|
},
|
|
};
|
|
// Wrap the provided string in parentheses so both an expression-arrow
|
|
// (`(doc, ctx) => {...}`) and a parenthesized function work. Run it in a
|
|
// fresh context with no require/process/module so the transform cannot
|
|
// touch fs/network/process. 5s wall-clock timeout.
|
|
let fn;
|
|
try {
|
|
fn = vm.runInNewContext("(" + transformJs + ")", sandbox, {
|
|
timeout: 5000,
|
|
});
|
|
}
|
|
catch (e) {
|
|
throw new Error(`transform did not compile: ${e?.message ?? e}`);
|
|
}
|
|
if (typeof fn !== "function") {
|
|
throw new Error("transform must evaluate to a function (doc, ctx) => doc");
|
|
}
|
|
const raw = vm.runInNewContext("f(d, c)", { f: fn, d: sandbox.doc, c: ctx }, { timeout: 5000 });
|
|
if (!raw ||
|
|
typeof raw !== "object" ||
|
|
raw.type !== "doc" ||
|
|
!Array.isArray(raw.content)) {
|
|
throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })');
|
|
}
|
|
// Validate the RAW transform output FIRST (structure — including the
|
|
// MAX_DEPTH guard — and URLs), mirroring updatePageJson. The canonicalizer
|
|
// recurses without a depth limiter, so validating after it would turn a
|
|
// too-deep doc into an opaque "Maximum call stack size exceeded" instead of
|
|
// the intended "nesting exceeds the maximum depth" error.
|
|
this.validateDocStructure(raw);
|
|
this.validateDocUrls(raw);
|
|
// Auto-canonicalize footnotes after the transform (idempotent): no write
|
|
// path can leave footnotes out of order / orphaned / in a raw `[^id]`
|
|
// block. In a dryRun preview this may surface footnote edits the script
|
|
// author did not write (the canonicalizer tidied them) — that is expected.
|
|
const result = canonicalizeFootnotes(raw);
|
|
newDoc = result;
|
|
return result;
|
|
};
|
|
if (dryRun) {
|
|
// Preview only: run against the current REST snapshot, never open the
|
|
// socket. oldDoc/newDoc are captured by runTransform.
|
|
const raw = await this.getPageRaw(pageId);
|
|
const current = raw.content || { type: "doc", content: [] };
|
|
runTransform(current);
|
|
// Run an independent Yjs-encodability check (same sanitize + schema as the
|
|
// apply path), so the preview fails with the same descriptive error when
|
|
// the doc is not encodable instead of returning a misleadingly-green diff.
|
|
assertYjsEncodable(newDoc);
|
|
return {
|
|
pushed: false,
|
|
diff: diffDocs(oldDoc, newDoc),
|
|
log: ctx.log,
|
|
};
|
|
}
|
|
// Apply atomically against the live doc.
|
|
const collabToken = await this.getCollabTokenWithReauth();
|
|
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform);
|
|
// Optionally delete consumed comments (best-effort; a delete failure must
|
|
// not undo the successful write).
|
|
const deletedComments = [];
|
|
if (deleteComments) {
|
|
for (const id of ctx.consumed) {
|
|
try {
|
|
await this.deleteComment(id);
|
|
deletedComments.push(id);
|
|
}
|
|
catch (e) {
|
|
if (process.env.DEBUG) {
|
|
console.error(`transform: failed to delete comment ${id}:`, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Fetch the newest historyId (Docmost snapshots on the write above).
|
|
let historyId = null;
|
|
try {
|
|
const hist = await this.listPageHistory(pageId);
|
|
historyId = hist.items?.[0]?.id ?? null;
|
|
}
|
|
catch (e) {
|
|
if (process.env.DEBUG) {
|
|
console.error("transform: failed to fetch history id:", e);
|
|
}
|
|
}
|
|
return {
|
|
pushed: true,
|
|
historyId,
|
|
diff: diffDocs(oldDoc, newDoc),
|
|
deletedComments,
|
|
log: ctx.log,
|
|
verify: mutation.verify,
|
|
};
|
|
}
|
|
}
|