Files
docmost-sync/packages/docmost-client/src/client.ts
vvzvlad c6edd73324 refactor(pull): extract tested vault-layout module; harden pull; close review findings
Address the Increment-1 code review (3 warnings + suggestions).

- layout: new pure src/layout.ts (buildVaultLayout) — page-tree -> vault paths,
  sibling + full-path collision disambiguation (sanitized ~slugId suffix), parent
  cycle guard; pull.ts is now a thin I/O loop
- layout: resolve orphan/root collisions at the NAME stage so an orphan ancestor
  can't desync its children's folder segments (fixes review Major); covered by test
- pull: per-page try/catch (one bad page no longer aborts the mirror), bounded
  concurrency (6), progress logging, process.exitCode=1 on partial mirror
- security: filename disambiguation suffix now passes through sanitizeTitle
- docs: AGENTS.md -> Increment 1 status/structure/run targets; pull.ts meta-block
  comment; collectRecentSince JSDoc (lexicographic UTC-ISO precondition)
- tests: layout (9), markdown-document round-trip (no comments block, SPEC §3),
  firstDivergence; export firstDivergence. 49 tests green.
2026-06-16 21:09:40 +03:00

2776 lines
100 KiB
TypeScript

import FormData from "form-data";
import axios, { AxiosInstance } from "axios";
import { readFileSync, statSync } from "fs";
import { basename, extname } from "path";
import {
filterWorkspace,
filterSpace,
filterPage,
filterComment,
filterSearchResult,
} from "./lib/filters.js";
import { HocuspocusProvider } from "@hocuspocus/provider";
import { TiptapTransformer } from "@hocuspocus/transformer";
import * as Y from "yjs";
import WebSocket from "ws";
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
import {
updatePageContentRealtime,
replacePageContent,
markdownToProseMirror,
mutatePageContent,
buildCollabWsUrl,
assertYjsEncodable,
} from "./lib/collaboration.js";
import { docmostExtensions } from "./lib/docmost-schema.js";
import {
serializeDocmostMarkdown,
parseDocmostMarkdown,
serializeDocmostMarkdownBody,
} from "./lib/markdown-document.js";
import {
replaceNodeById,
deleteNodeById,
insertNodeRelative,
buildOutline,
getNodeByRef,
readTable,
insertTableRow,
deleteTableRow,
updateTableCell,
} from "./lib/node-ops.js";
import { withPageLock } from "./lib/page-lock.js";
import { applyTextEdits, TextEdit, TextEditResult } from "./lib/json-edit.js";
import { getCollabToken, performLogin } from "./lib/auth-utils.js";
import { diffDocs } from "./lib/diff.js";
import {
blockText,
walk,
getList,
insertMarkerAfter,
setCalloutRange,
noteItem,
mdToInlineNodes,
commentsToFootnotes,
} from "./lib/transforms.js";
import vm from "node:vm";
export class DocmostClient {
private client: AxiosInstance;
private token: string | null = null;
private apiUrl: string;
private email: string;
private password: string;
// In-flight login dedup: when the token expires, the 401 interceptor,
// ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries
// can all call login() at once. Memoizing a single promise collapses that
// thundering herd into ONE /auth/login request that everyone awaits.
private loginPromise: Promise<void> | null = null;
constructor(baseURL: string, email: string, password: string) {
this.apiUrl = baseURL;
this.email = email;
this.password = password;
this.client = axios.create({
baseURL,
// Default request timeout so a hung connection cannot wedge a per-page
// lock or block the server indefinitely. Multipart uploads override this
// with a longer per-request timeout.
timeout: 30000,
headers: {
"Content-Type": "application/json",
},
});
// Re-authenticate transparently on a 401/403 once: the JWT authToken can
// expire while the server is long-running, after which every cached-token
// request would otherwise fail until a manual restart. On such a response,
// clear the stale token, perform a fresh login, and replay the original
// request exactly once (guarded by config._retry to avoid infinite loops;
// the login request itself is never retried).
this.client.interceptors.response.use(
(response) => response,
async (error) => {
const config = error.config;
const status = error.response?.status;
const isAuthError = status === 401 || status === 403;
const isLoginRequest =
typeof config?.url === "string" && config.url.includes("/auth/login");
if (config && isAuthError && !config._retry && !isLoginRequest) {
config._retry = true;
// Drop the stale token + Authorization header before re-login.
this.token = null;
delete this.client.defaults.headers.common["Authorization"];
try {
await this.login();
} catch (loginError) {
// Re-login failed: surface the original error to the caller.
return Promise.reject(error);
}
// Re-issue the original request with the freshly minted Bearer token.
// Read it from the default header that login() just set, not from
// this.token, to avoid a theoretical "Bearer null" if this.token was
// cleared between login() resolving and this point.
config.headers = config.headers || {};
config.headers["Authorization"] =
this.client.defaults.headers.common["Authorization"];
return this.client.request(config);
}
return Promise.reject(error);
},
);
}
/** Application base URL (API URL without the /api suffix). */
get appUrl(): string {
return this.apiUrl.replace(/\/api\/?$/, "");
}
async login() {
// Reuse an in-flight login if one is already running so concurrent callers
// share a single /auth/login request instead of each issuing their own.
if (!this.loginPromise) {
this.loginPromise = performLogin(this.apiUrl, this.email, this.password)
.then((token) => {
this.token = token;
this.client.defaults.headers.common["Authorization"] =
`Bearer ${token}`;
})
.finally(() => {
this.loginPromise = null;
});
}
return this.loginPromise;
}
async ensureAuthenticated() {
if (!this.token) {
await this.login();
}
}
/**
* Fetch a collaboration token, transparently re-authenticating once on a
* 401/403. getCollabToken() uses bare axios internally, so it is NOT covered
* by this.client's response interceptor; this helper replicates that
* behaviour for collab-token requests: ensure a token, try once, and on an
* expired-token auth error perform a fresh login and retry exactly once.
*/
private async getCollabTokenWithReauth(): Promise<string> {
await this.ensureAuthenticated();
try {
return await getCollabToken(this.apiUrl, this.token!);
} catch (e) {
// getCollabToken wraps the AxiosError in a plain Error but attaches the
// HTTP status as `.status`, so detect an auth failure via either the raw
// AxiosError shape OR the attached status.
const axiosStatus = axios.isAxiosError(e) ? e.response?.status : undefined;
const attachedStatus = (e as any)?.status;
const isAuthError =
axiosStatus === 401 ||
axiosStatus === 403 ||
attachedStatus === 401 ||
attachedStatus === 403;
if (isAuthError) {
await this.login();
return await getCollabToken(this.apiUrl, this.token!);
}
throw e;
}
}
/**
* Connect to the collaboration websocket, read the live doc, apply
* `transform`, write the result, and wait for the server to persist it —
* WITHOUT acquiring the per-page lock.
*
* This mirrors collaboration.mutatePageContent EXCEPT that it does not call
* withPageLock. It exists solely so replaceImage can hold ONE withPageLock
* across its scan -> upload -> write sequence: the per-page mutex is NOT
* reentrant, so calling the normal (self-locking) mutatePageContent inside an
* outer withPageLock for the same pageId would deadlock. The caller MUST hold
* the page lock for the whole operation; this helper assumes that invariant.
*
* `transform` receives the live ProseMirror doc and returns the NEW full doc
* to write, or `null` to abort with no write. Errors thrown by `transform`
* propagate to the caller.
*/
private mutateLiveContentUnlocked(
pageId: string,
collabToken: string,
transform: (liveDoc: any) => any | null,
): Promise<any> {
const CONNECT_TIMEOUT_MS = 25000;
const PERSIST_TIMEOUT_MS = 20000;
const ydoc = new Y.Doc();
const wsUrl = buildCollabWsUrl(this.apiUrl);
return new Promise<any>((resolve, reject) => {
let provider: HocuspocusProvider | undefined;
let applied = false; // onSynced may fire again on reconnect — apply once.
let settled = false;
let connectionLost = false;
let connectTimer: ReturnType<typeof setTimeout> | undefined;
let persistTimer: ReturnType<typeof setTimeout> | undefined;
let unsyncedHandler: ((data: { number: number }) => void) | undefined;
let lastWrittenDoc: any;
const cleanup = () => {
if (connectTimer) clearTimeout(connectTimer);
if (persistTimer) clearTimeout(persistTimer);
if (provider) {
if (unsyncedHandler) {
try {
provider.off("unsyncedChanges", unsyncedHandler);
} catch (err) {}
}
try {
provider.destroy();
} catch (err) {}
}
};
const finish = (err: Error | null, value?: any) => {
if (settled) return;
settled = true;
cleanup();
if (err) reject(err);
else resolve(value);
};
connectTimer = setTimeout(() => {
finish(new Error("Connection timeout to collaboration server"));
}, CONNECT_TIMEOUT_MS);
const waitForPersistence = () => {
if (settled) return;
if (!provider) {
finish(new Error("collab provider gone before persistence"));
return;
}
if (provider.unsyncedChanges === 0) {
finish(null, lastWrittenDoc);
return;
}
persistTimer = setTimeout(() => {
finish(
new Error(
"Timeout waiting for collaboration server to persist the update",
),
);
}, PERSIST_TIMEOUT_MS);
unsyncedHandler = (data: { number: number }) => {
if (data.number === 0 && !connectionLost) {
finish(null, lastWrittenDoc);
}
};
provider.on("unsyncedChanges", unsyncedHandler);
};
provider = new HocuspocusProvider({
url: wsUrl,
name: `page.${pageId}`,
document: ydoc,
token: collabToken,
// @ts-ignore - Required for Node.js environment
WebSocketPolyfill: WebSocket,
onDisconnect: () => {
connectionLost = true;
finish(
new Error(
"Collaboration connection closed before the update was persisted/synced",
),
);
},
onClose: () => {
connectionLost = true;
finish(
new Error(
"Collaboration connection closed before the update was persisted/synced",
),
);
},
onSynced: () => {
if (applied || settled) return;
applied = true;
// CRITICAL: keep everything between reading and writing the live doc
// synchronous (no await) so no remote update can interleave.
let newDoc: any;
try {
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
if (
!liveDoc ||
typeof liveDoc !== "object" ||
!Array.isArray(liveDoc.content)
) {
liveDoc = { type: "doc", content: [] };
}
newDoc = transform(liveDoc);
if (newDoc == null) {
// Transform aborted — write nothing, return the live doc.
lastWrittenDoc = liveDoc;
finish(null, liveDoc);
return;
}
const tempDoc = TiptapTransformer.toYdoc(
newDoc,
"default",
docmostExtensions,
);
const fragment = ydoc.getXmlFragment("default");
ydoc.transact(() => {
if (fragment.length > 0) {
fragment.delete(0, fragment.length);
}
Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc));
});
} catch (e) {
finish(e instanceof Error ? e : new Error(String(e)));
return;
}
lastWrittenDoc = newDoc;
waitForPersistence();
},
onAuthenticationFailed: () => {
finish(
new Error("Authentication failed for collaboration connection"),
);
},
});
});
}
/**
* Generic pagination handler for Docmost API endpoints
*/
async paginateAll<T = any>(
endpoint: string,
basePayload: Record<string, any> = {},
limit: number = 100,
): Promise<T[]> {
await this.ensureAuthenticated();
const clampedLimit = Math.max(1, Math.min(100, limit));
// Hard ceiling on the number of pages to fetch: guards against a server
// that returns a perpetually-true hasNextPage (which would otherwise loop
// forever and accumulate duplicates).
const MAX_PAGES = 50;
let page = 1;
let allItems: T[] = [];
let hasNextPage = true;
while (hasNextPage && page <= MAX_PAGES) {
const response = await this.client.post(endpoint, {
...basePayload,
limit: clampedLimit,
page,
});
const data = response.data;
const items = data.data?.items || data.items || [];
const meta = data.data?.meta || data.meta;
allItems = allItems.concat(items);
// Stop if the page is empty or shorter than the requested size: a full
// page worth of items is the only situation where another page can exist,
// so this defends against a stuck hasNextPage flag in addition to it.
if (items.length === 0 || items.length < clampedLimit) {
break;
}
hasNextPage = meta?.hasNextPage || false;
page++;
}
// If the loop stopped because it hit the MAX_PAGES ceiling while the server
// still reported more results (hasNextPage true and the last page was
// full), the result set is truncated — warn so the caller is not silently
// handed an incomplete list.
if (hasNextPage && page > MAX_PAGES) {
console.warn(
`paginateAll: results from "${endpoint}" truncated at the ${MAX_PAGES}-page cap; more pages exist on the server`,
);
}
return allItems;
}
async getWorkspace() {
await this.ensureAuthenticated();
const response = await this.client.post("/workspace/info", {});
return {
data: filterWorkspace(response.data?.data ?? response.data),
success: response.data.success,
};
}
async getSpaces() {
const spaces = await this.paginateAll("/spaces", {});
return spaces.map((space) => filterSpace(space));
}
/**
* List most recent pages (bounded). Fetching the whole space can exceed
* MCP response/time limits on large instances, so a single bounded page
* of results is returned (default 50, max 100).
*/
async listPages(spaceId?: string, limit: number = 50) {
await this.ensureAuthenticated();
const clampedLimit = Math.max(1, Math.min(100, limit));
const payload: Record<string, any> = { limit: clampedLimit, page: 1 };
if (spaceId) payload.spaceId = spaceId;
const response = await this.client.post("/pages/recent", payload);
const data = response.data;
const items = data.data?.items || data.items || [];
return items.map((page: any) => filterPage(page));
}
/**
* List sidebar pages for a space. With no pageId the request returns the
* space ROOT pages; with a pageId it returns the direct CHILDREN of that
* page. pageId is therefore optional and is only included in the POST body
* when provided (an empty/undefined pageId would otherwise change the
* semantics on the server).
*/
async listSidebarPages(spaceId: string, pageId?: string) {
await this.ensureAuthenticated();
// Paginate: the endpoint returns server-paged children, so posting only
// { page: 1 } silently dropped every child beyond the first page. Loop on
// meta.hasNextPage (with a MAX_PAGES ceiling like paginateAll, guarding
// against a stuck hasNextPage flag) and accumulate all children.
const MAX_PAGES = 50;
let page = 1;
let allItems: any[] = [];
let hasNextPage = true;
while (hasNextPage && page <= MAX_PAGES) {
// Only send pageId when scoping to a page's children; omit it for roots.
const payload: Record<string, any> = { spaceId, page };
if (pageId) payload.pageId = pageId;
const response = await this.client.post("/pages/sidebar-pages", payload);
const data = response.data?.data ?? response.data;
const items = data?.items || [];
allItems = allItems.concat(items);
hasNextPage = data?.meta?.hasNextPage || false;
page++;
}
return allItems;
}
/**
* Enumerate EVERY page in a space (or in a subtree, when rootPageId is given)
* by walking the sidebar-pages tree.
*
* Starting set: the children of rootPageId when provided, otherwise the
* space root pages. From there it does an iterative breadth-first walk: each
* node is collected, and when node.hasChildren is true its direct children
* are fetched via listSidebarPages(spaceId, node.id) and enqueued.
*
* This replaces the old "/pages/recent" enumeration, which is a bounded
* recent-activity feed (~5000 cap) and therefore misses comments on older
* pages that were never recently touched.
*
* Safeguards: a `visited` Set of page ids prevents re-processing a node
* (cycles / duplicate references), and a hard node cap bounds pathological
* trees so the walk always terminates.
*/
private async enumerateSpacePages(
spaceId: string,
rootPageId?: string,
): Promise<any[]> {
const MAX_NODES = 10000;
const result: any[] = [];
const visited = new Set<string>();
// Seed the queue with the starting level (subtree children or roots).
const queue: any[] = await this.listSidebarPages(spaceId, rootPageId);
while (queue.length > 0 && result.length < MAX_NODES) {
const node = queue.shift();
if (!node || typeof node !== "object" || !node.id) continue;
// Skip already-seen ids to guard against cycles / duplicate references.
if (visited.has(node.id)) continue;
visited.add(node.id);
result.push(node);
if (node.hasChildren) {
try {
const children = await this.listSidebarPages(spaceId, node.id);
for (const child of children) queue.push(child);
} catch (e: any) {
// A failure fetching one node's children must not abort the whole
// walk: skip this branch and keep enumerating the rest.
}
}
}
return result;
}
/** Raw page info including the ProseMirror JSON content and slugId. */
async getPageRaw(pageId: string) {
await this.ensureAuthenticated();
const response = await this.client.post("/pages/info", { pageId });
return response.data?.data ?? response.data;
}
async getPage(pageId: string) {
await this.ensureAuthenticated();
const resultData = await this.getPageRaw(pageId);
let content = resultData.content
? convertProseMirrorToMarkdown(resultData.content)
: "";
// Always fetch subpages to provide context to the agent
let subpages: any[] = [];
try {
subpages = await this.listSidebarPages(resultData.spaceId, pageId);
} catch (e: any) {
console.warn("Failed to fetch subpages:", e);
}
// Resolve subpages if the placeholder exists
if (content && content.includes("{{SUBPAGES}}")) {
if (subpages && subpages.length > 0) {
const list = subpages
.map((p: any) => `- [${p.title}](page:${p.id})`)
.join("\n");
content = content.replace("{{SUBPAGES}}", `### Subpages\n${list}`);
} else {
content = content.replace("{{SUBPAGES}}", "");
}
}
return {
data: filterPage(resultData, content, subpages),
success: true,
};
}
/** Page info + raw ProseMirror JSON content (lossless representation). */
async getPageJson(pageId: string) {
const data = await this.getPageRaw(pageId);
return {
id: data.id,
slugId: data.slugId,
title: data.title,
parentPageId: data.parentPageId,
spaceId: data.spaceId,
updatedAt: data.updatedAt,
content: data.content || { type: "doc", content: [] },
};
}
/**
* Compact outline of a page's top-level blocks (no full document body).
* Cheap way to locate sections/tables and grab block ids before drilling in
* with get_node / patch_node / insert_node.
*/
async getOutline(pageId: string) {
await this.ensureAuthenticated();
const data = await this.getPageRaw(pageId);
return {
pageId,
slugId: data.slugId,
title: data.title,
outline: buildOutline(data.content ?? { type: "doc", content: [] }),
};
}
/**
* Fetch a single node's full ProseMirror subtree (lossless) by reference:
* a block id (headings/paragraphs/callouts/images), or `#<index>` to select
* a top-level block by its outline index (the only way to reach tables/rows/
* cells, which carry no id).
*/
async getNode(pageId: string, nodeId: string) {
await this.ensureAuthenticated();
const data = await this.getPageRaw(pageId);
const hit = getNodeByRef(
data.content ?? { type: "doc", content: [] },
nodeId,
);
if (!hit) {
throw new Error(
`get_node: no node found for "${nodeId}" on page ${pageId} (use a block id from get_outline, or "#<index>" for a top-level block such as a table)`,
);
}
return {
pageId,
ref: nodeId,
path: hit.path,
type: hit.type,
node: hit.node,
};
}
/**
* Read a table as a matrix. `tableRef` is `#<index>` (from get_outline) or a
* block id of any node inside the table. Returns the cell texts plus a
* parallel cellIds matrix (each cell's first paragraph id, or null) so a
* caller can patch_node a cell for rich-formatted edits. Throws when no table
* resolves for the reference.
*/
async getTable(pageId: string, tableRef: string) {
await this.ensureAuthenticated();
const data = await this.getPageRaw(pageId);
const t = readTable(data.content ?? { type: "doc", content: [] }, tableRef);
if (!t) {
throw new Error(
`table_get: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`,
);
}
return {
pageId,
table: tableRef,
rows: t.rows,
cols: t.cols,
path: t.path,
cells: t.cells,
cellIds: t.cellIds,
};
}
/**
* Insert a row of plain-text cells into a table on the LIVE collab document.
* `tableRef` is `#<index>` or a block id inside the target table. `cells` is
* padded to the table's column count (more cells than columns throws); `index`
* is a 0-based insert position (omit/out-of-range to append). Throws when no
* table resolves for the reference.
*/
async tableInsertRow(
pageId: string,
tableRef: string,
cells: string[],
index?: number,
) {
await this.ensureAuthenticated();
const collabToken = await this.getCollabTokenWithReauth();
// Track insertion in an outer var, reset per-transform, so a collab retry
// recomputes it cleanly (mirrors insertNode's pattern).
let inserted = false;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
inserted = false;
const { doc: nd, inserted: ins } = insertTableRow(
liveDoc,
tableRef,
cells,
index,
);
inserted = ins;
if (!inserted) return null; // table not found -> skip the write entirely
return nd;
});
if (!inserted) {
throw new Error(
`table_insert_row: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`,
);
}
return { success: true, table: tableRef, inserted: true };
}
/**
* Delete the row at 0-based `index` from a table on the LIVE collab document.
* `tableRef` is `#<index>` or a block id inside the target table. The helper's
* out-of-range and last-row errors propagate; a missing table throws here.
*/
async tableDeleteRow(pageId: string, tableRef: string, index: number) {
await this.ensureAuthenticated();
const collabToken = await this.getCollabTokenWithReauth();
let deleted = false;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
deleted = false;
const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index);
deleted = del;
if (!deleted) return null; // table not found -> skip the write entirely
return nd;
});
if (!deleted) {
throw new Error(
`table_delete_row: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`,
);
}
return { success: true, table: tableRef, deleted: true };
}
/**
* Set the plain-text content of cell `[row, col]` (0-based) in a table on the
* LIVE collab document, replacing the cell's content with a single text
* paragraph (the cell's first-paragraph id is preserved). `tableRef` is
* `#<index>` or a block id inside the target table. The helper's out-of-range
* error propagates; a missing table throws here.
*/
async tableUpdateCell(
pageId: string,
tableRef: string,
row: number,
col: number,
text: string,
) {
await this.ensureAuthenticated();
const collabToken = await this.getCollabTokenWithReauth();
let updated = false;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
updated = false;
const { doc: nd, updated: upd } = updateTableCell(
liveDoc,
tableRef,
row,
col,
text,
);
updated = upd;
if (!updated) return null; // table not found -> skip the write entirely
return nd;
});
if (!updated) {
throw new Error(
`table_update_cell: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`,
);
}
return { success: true, table: tableRef, row, col };
}
/**
* Create a new page with title and content.
* Uses the /pages/import workaround (the only endpoint accepting content),
* then moves the page and restores the exact title: the import endpoint
* derives the title from the FILENAME and replaces spaces with
* underscores, so we explicitly re-set it via /pages/update afterwards.
*/
async createPage(
title: string,
content: string,
spaceId: string,
parentPageId?: string,
) {
await this.ensureAuthenticated();
if (parentPageId) {
try {
await this.getPage(parentPageId);
} catch (e) {
throw new Error(`Parent page with ID ${parentPageId} not found.`);
}
}
// 1. Create content via Import (using multipart/form-data).
// Build a FRESH FormData per send attempt: a FormData body is a single-use
// stream consumed on the first send, so it cannot be replayed by
// this.client's response interceptor (replay fails with 'socket hang up').
// Multipart re-auth is therefore done here with bare axios and an explicit
// one-shot 401/403 retry that rebuilds the body.
const fileContent = Buffer.from(content, "utf-8");
const buildForm = () => {
const form = new FormData();
form.append("spaceId", spaceId);
form.append("file", fileContent, {
filename: `${title || "import"}.md`,
contentType: "text/markdown",
});
return form;
};
const importUrl = `${this.apiUrl}/pages/import`;
let response;
try {
// Call buildForm() ONCE per attempt and reuse the instance for both
// getHeaders() and the body so the Content-Type boundary matches the body.
const form = buildForm();
// Read the Authorization header from this.client's defaults (set by
// login(), only ever deleted — never set to null) instead of building
// `Bearer ${this.token}`: a concurrent JSON 401 can null this.token
// mid-flight, which would otherwise produce a literal "Bearer null".
// ensureAuthenticated() above guarantees login() ran, so the default
// header exists here.
response = await axios.post(importUrl, form, {
headers: {
...form.getHeaders(),
Authorization: this.client.defaults.headers.common["Authorization"],
},
timeout: 60000,
});
} catch (error) {
// On an expired-token auth error, re-login and retry exactly once with a
// freshly-rebuilt FormData (the previous one was already consumed).
if (
axios.isAxiosError(error) &&
(error.response?.status === 401 || error.response?.status === 403)
) {
await this.login();
const form2 = buildForm();
response = await axios.post(importUrl, form2, {
headers: {
...form2.getHeaders(),
Authorization:
this.client.defaults.headers.common["Authorization"],
},
timeout: 60000,
});
} else {
throw error;
}
}
const newPageId = (response.data?.data ?? response.data).id;
// 2. Move to parent if needed
if (parentPageId) {
await this.movePage(newPageId, parentPageId);
}
// 3. Restore the exact title (import mangles spaces into underscores)
if (title) {
await this.client.post("/pages/update", { pageId: newPageId, title });
}
return this.getPage(newPageId);
}
/**
* Update a page's content from markdown and optionally its title.
* NOTE: full re-import — block ids regenerate. For surgical changes
* use editPageText / updatePageJson instead.
*/
async updatePage(pageId: string, content: string, title?: string) {
await this.ensureAuthenticated();
if (title) {
await this.client.post("/pages/update", { pageId, title });
}
let collabToken = "";
try {
collabToken = await this.getCollabTokenWithReauth();
await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl);
} catch (error: any) {
// Verbose diagnostics (incl. anything that could expose a token prefix)
// are gated behind DEBUG; the thrown Error below carries no token data.
if (process.env.DEBUG) {
console.error(
"Failed to update page content via realtime collaboration:",
error,
);
const tokenPreview = collabToken
? collabToken.substring(0, 15) + "..."
: "null";
console.error(`Collab token preview: ${tokenPreview}`);
}
throw new Error(`Failed to update page content: ${error.message}`);
}
return {
success: true,
modified: true,
message: "Page updated successfully.",
pageId: pageId,
};
}
/**
* Validate a URL string against a scheme allowlist for a given context.
*
* The markdown link path enforces safe schemes via TipTap, but the raw
* JSON path (updatePageJson) bypasses that — so this is the sanitization
* choke point for ProseMirror JSON written directly by the caller.
*
* - "link": reject javascript:, vbscript:, data: (any scheme that can
* execute or smuggle script when the href is clicked).
* - "src": allow only http(s):, mailto:, /api/files paths, or a
* scheme-less relative/absolute path; reject
* javascript:/vbscript:/data:/file:.
*/
private isSafeUrl(url: unknown, context: "link" | "src"): boolean {
if (typeof url !== "string") return false;
const trimmed = url.trim();
if (trimmed === "") return true; // empty href/src is harmless
// Extract a leading "scheme:" if present. A scheme must start with a
// letter and contain only letters/digits/+/-/. before the colon. Strip
// whitespace and ASCII control chars first so a tab/newline embedded in
// the scheme cannot smuggle a dangerous scheme past the check.
const cleaned = trimmed.replace(/[\s\x00-\x1f]+/g, "");
const schemeMatch = /^([a-zA-Z][a-zA-Z0-9+.-]*):/.exec(cleaned);
const scheme = schemeMatch ? schemeMatch[1].toLowerCase() : null;
const dangerous = new Set(["javascript", "vbscript", "data", "file"]);
if (context === "link") {
if (scheme === null) return true; // relative/anchor link is fine
// For links, data: is also blocked (can carry script payloads).
return !new Set(["javascript", "vbscript", "data"]).has(scheme);
}
// context === "src"
if (scheme === null) return true; // relative/absolute path (incl. /api/files)
if (dangerous.has(scheme)) return false;
return scheme === "http" || scheme === "https" || scheme === "mailto";
}
/**
* Recursively walk a ProseMirror doc and reject any unsafe URL on a link
* mark href or on a media node's src/url. Media nodes covered: image,
* attachment, video, plus embed (rendered as an iframe), youtube, drawio
* and excalidraw — all of which carry a user-controlled URL that Docmost
* renders. Throws a clear error on the first violation. A max-depth guard
* turns an over-deep document into a clean error instead of a RangeError
* stack overflow.
*/
private validateDocUrls(node: any, depth: number = 0): void {
const MAX_DEPTH = 200;
if (depth > MAX_DEPTH) {
throw new Error(
`document nesting exceeds the maximum depth of ${MAX_DEPTH}`,
);
}
if (!node || typeof node !== "object") return;
// Link marks on text nodes: validate the href.
if (Array.isArray(node.marks)) {
for (const mark of node.marks) {
if (mark && mark.type === "link" && mark.attrs) {
if (!this.isSafeUrl(mark.attrs.href, "link")) {
throw new Error(
`unsafe link href rejected: "${mark.attrs.href}"`,
);
}
}
}
}
// Media nodes: validate src/url against the stricter src allowlist.
// embed renders as an iframe (highest risk); youtube/drawio/excalidraw
// likewise carry a user-controlled URL Docmost renders, so they get the
// same scheme check as image/attachment/video.
if (
node.type === "image" ||
node.type === "attachment" ||
node.type === "video" ||
node.type === "embed" ||
node.type === "youtube" ||
node.type === "drawio" ||
node.type === "excalidraw" ||
node.type === "audio" ||
node.type === "pdf"
) {
const attrs = node.attrs || {};
for (const key of ["src", "url"]) {
if (attrs[key] != null && !this.isSafeUrl(attrs[key], "src")) {
throw new Error(
`unsafe ${node.type} ${key} rejected: "${attrs[key]}"`,
);
}
}
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
this.validateDocUrls(child, depth + 1);
}
}
}
/**
* Recursively validate the STRUCTURE of a ProseMirror node (reuses the
* recursion shape of validateDocUrls). Every node must be an object with a
* string `type`; when present, `content` must be an array, `marks` must be
* an array of objects each with a string `type`, and a text node's `text`
* must be a string. Throws a clear "invalid ProseMirror document" error on
* the first violation. A max-depth guard turns an over-deep document into a
* clean error instead of a RangeError stack overflow.
*/
private validateDocStructure(node: any, depth: number = 0): void {
const MAX_DEPTH = 200;
if (depth > MAX_DEPTH) {
throw new Error(
`invalid ProseMirror document: nesting exceeds the maximum depth of ${MAX_DEPTH}`,
);
}
if (!node || typeof node !== "object" || typeof node.type !== "string") {
throw new Error(
"invalid ProseMirror document: every node must be an object with a string `type`",
);
}
if ("text" in node && node.type === "text" && typeof node.text !== "string") {
throw new Error(
"invalid ProseMirror document: a text node must have a string `text`",
);
}
if (node.marks !== undefined) {
if (!Array.isArray(node.marks)) {
throw new Error(
"invalid ProseMirror document: `marks` must be an array",
);
}
for (const mark of node.marks) {
if (!mark || typeof mark !== "object" || typeof mark.type !== "string") {
throw new Error(
"invalid ProseMirror document: every mark must be an object with a string `type`",
);
}
}
}
if (node.content !== undefined) {
if (!Array.isArray(node.content)) {
throw new Error(
"invalid ProseMirror document: `content` must be an array when present",
);
}
for (const child of node.content) {
this.validateDocStructure(child, depth + 1);
}
}
}
/**
* Replace page content with a raw ProseMirror JSON document (lossless) and/or
* update its title. Both `doc` and `title` are optional, but at least one must
* be supplied:
* - `doc` provided -> validate + full-overwrite the body (and update the
* title too when `title` is also given).
* - `doc` omitted, `title` given -> title-only update; the body is NOT
* touched/resent (no collab write happens).
* - neither given -> throws (nothing to update).
*/
async updatePageJson(pageId: string, doc?: any, title?: string) {
await this.ensureAuthenticated();
// Title-only / no-op handling: when no document is supplied, do NOT write
// the body. Update the title if one was given; otherwise there is nothing
// to do, so fail loudly rather than silently no-op.
if (doc == null) {
if (!title) {
throw new Error(
"update_page_json: nothing to update (provide content and/or title)",
);
}
await this.client.post("/pages/update", { pageId, title });
return {
success: true,
modified: true,
message: "Page title updated (content left unchanged).",
pageId,
};
}
// Validate the document shape before a full overwrite: a malformed doc
// would otherwise silently corrupt the page (full-overwrite is the
// documented behaviour; no optimistic-concurrency is applied here).
if (
typeof doc !== "object" ||
doc.type !== "doc" ||
!Array.isArray(doc.content)
) {
throw new Error(
'content must be a ProseMirror document ({"type":"doc","content":[...]}) ' +
"where content is an array of nodes each having a string `type`",
);
}
// Recurse the WHOLE document so a malformed nested node (e.g. a node with a
// non-string type, a non-array content/marks, or a text node missing its
// string text) is rejected up front rather than silently corrupting the
// page on overwrite.
this.validateDocStructure(doc);
// Sanitize URLs before writing. This closes the JSON-path bypass: unlike
// the markdown link path (which TipTap sanitizes), raw JSON could otherwise
// inject javascript:/data: link hrefs or media srcs straight into the doc.
this.validateDocUrls(doc);
if (title) {
await this.client.post("/pages/update", { pageId, title });
}
const collabToken = await this.getCollabTokenWithReauth();
await replacePageContent(pageId, doc, collabToken, this.apiUrl);
return {
success: true,
modified: true,
message: "Page content replaced from ProseMirror JSON.",
pageId,
};
}
/**
* Export a page to a single self-contained Docmost-flavoured markdown file:
* meta block + body (with inline comment anchors + diagrams) + comment
* threads. Lossless round-trip target; see importPageMarkdown for the inverse.
*/
async exportPageMarkdown(pageId: string): Promise<string> {
await this.ensureAuthenticated();
const page = await this.getPageRaw(pageId);
const body = page.content
? convertProseMirrorToMarkdown(page.content)
: "";
let comments: any[] = [];
try {
comments = await this.listComments(pageId);
} catch (e) {
// A comments fetch failure must not lose the body; export with [] and let
// the caller see the (empty) comments block. Log under DEBUG only.
if (process.env.DEBUG) console.error("export: listComments failed", e);
}
const meta = {
version: 1,
pageId: page.id,
slugId: page.slugId,
title: page.title,
spaceId: page.spaceId,
parentPageId: page.parentPageId ?? null,
};
return serializeDocmostMarkdown(meta, body, comments);
}
/**
* Import a self-contained Docmost markdown file back into a page. Parses out
* the meta + comments metadata blocks, converts the body to ProseMirror
* (restoring comment marks + diagrams from their inline HTML), and replaces
* the page content. Comment THREAD records are NOT written to the server in
* this version — they are preserved in the file and the inline marks are
* re-applied so the highlights survive; managing comment records stays with
* the comment tools/UI.
*/
async importPageMarkdown(pageId: string, fullMarkdown: string): Promise<any> {
await this.ensureAuthenticated();
const { meta, body, comments } = parseDocmostMarkdown(fullMarkdown);
const doc = await markdownToProseMirror(body);
const collabToken = await this.getCollabTokenWithReauth();
await replacePageContent(pageId, doc, collabToken, this.apiUrl);
// Collect distinct comment ids that actually became comment marks in the doc.
const collectCommentIds = (node: any, acc: Set<string>): Set<string> => {
if (!node || typeof node !== "object") return acc;
if (Array.isArray(node.marks)) {
for (const mk of node.marks) {
if (mk && mk.type === "comment" && mk.attrs?.commentId) {
acc.add(mk.attrs.commentId);
}
}
}
if (Array.isArray(node.content)) {
for (const child of node.content) collectCommentIds(child, acc);
}
return acc;
};
// Count reflects the comment marks present in the written document, so an id
// that only appears as inert text (e.g. inside a fenced code block) is not
// counted because it never becomes a comment mark.
const anchoredIds = collectCommentIds(doc, new Set<string>());
const result: any = {
success: true,
pageId,
anchoredCommentCount: anchoredIds.size,
commentsInFile: Array.isArray(comments) ? comments.length : 0,
};
// Warn (non-fatal) if the file was exported from a DIFFERENT page.
if (meta?.pageId && meta.pageId !== pageId) {
result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
}
return result;
}
/**
* Rename a page (change its title only) without touching or resending its
* content. The slug is derived from the page record, not the body, so it is
* left intact too.
*/
async renamePage(pageId: string, title: string) {
await this.ensureAuthenticated();
await this.client.post("/pages/update", { pageId, title });
return { success: true, pageId, title };
}
/**
* Copy the WHOLE content of one page onto another, entirely server-side: the
* source's ProseMirror document is read and written verbatim onto the target
* via the live collab path, so the document never passes through the model.
*
* Only the target's BODY is replaced — its title and slug live on the page
* record (not in the content), so they are untouched. The source page is not
* modified at all.
*/
async copyPageContent(sourcePageId: string, targetPageId: string) {
await this.ensureAuthenticated();
// A self-copy would be a no-op overwrite; reject it explicitly so a caller
// mistake surfaces as a clear error rather than a silent round-trip.
if (sourcePageId === targetPageId) {
throw new Error(
"copy_page_content: sourcePageId and targetPageId are the same page (no-op copy)",
);
}
const source = await this.getPageRaw(sourcePageId);
const content = source?.content;
if (
!content ||
typeof content !== "object" ||
content.type !== "doc" ||
!Array.isArray(content.content)
) {
throw new Error(
`copy_page_content: source page ${sourcePageId} has no usable ProseMirror content to copy`,
);
}
// Defense-in-depth: run the same URL-scheme sanitizer the JSON write path
// uses, so copying never lands a javascript:/data: href/src on the target
// (parity with updatePageJson; harmless for already-stored source content).
this.validateDocUrls(content);
const collabToken = await this.getCollabTokenWithReauth();
await replacePageContent(targetPageId, content, collabToken, this.apiUrl);
return {
success: true,
sourcePageId,
targetPageId,
copiedNodes: content.content.length,
};
}
/**
* Surgical text edits: find/replace inside text nodes of the live
* document. Preserves all block ids, marks, callouts and tables.
*/
async editPageText(pageId: string, edits: TextEdit[]) {
await this.ensureAuthenticated();
const collabToken = await this.getCollabTokenWithReauth();
// Apply the edits against the LIVE synced document, not the debounced REST
// snapshot, so concurrent human edits/comments are preserved. applyTextEdits
// throws descriptive errors on zero/multiple matches — let them propagate.
let results: TextEditResult[] | undefined;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
const r = applyTextEdits(liveDoc, edits);
results = r.results;
return r.doc;
});
return {
success: true,
pageId,
edits: results,
message: "Text edits applied (node ids and formatting preserved).",
};
}
/**
* Replace EVERY node whose attrs.id === nodeId (recursively, including nodes
* nested in callouts/tables) with the supplied node. Operates on the LIVE
* collab document so comments and concurrent edits are preserved.
*
* The replacement node's block id is preserved: if node.attrs is missing it
* is created, and if node.attrs.id is missing it is set to nodeId so the
* replacement keeps the same id it replaced. Throws if no node matches.
*/
async patchNode(pageId: string, nodeId: string, node: any) {
await this.ensureAuthenticated();
if (!node || typeof node !== "object" || typeof node.type !== "string") {
throw new Error(
"patch_node: `node` must be an object with a string `type`",
);
}
// Preserve the block id WITHOUT mutating the caller's object: build a local
// copy whose attrs.id === nodeId (so the swapped-in node keeps the id of the
// node it replaces).
const target = {
...node,
attrs: {
...(node.attrs && typeof node.attrs === "object" ? node.attrs : {}),
},
};
if (target.attrs.id == null) {
target.attrs.id = nodeId;
}
const collabToken = await this.getCollabTokenWithReauth();
// Track the replacement count in an outer var, reset per-transform, so a
// collab retry recomputes it cleanly (mirrors replaceImage's pattern).
let replaced = 0;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
replaced = 0;
const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target);
replaced = r;
if (replaced === 0) return null; // no match -> skip the write entirely
return nd;
});
if (replaced === 0) {
throw new Error(
`patch_node: no node with id "${nodeId}" found on page ${pageId}`,
);
}
return { success: true, replaced, nodeId };
}
/**
* Insert a node relative to an anchor (or append it at the top level).
* Operates on the LIVE collab document so comments and concurrent edits are
* preserved.
*
* opts.position:
* - "append": push the node at the end of the top-level content.
* - "before"/"after": insert the node as a sibling of the anchor, just
* before/after it. Exactly one of anchorNodeId / anchorText must be given;
* anchorNodeId locates a node anywhere by attrs.id, anchorText matches the
* first top-level block whose plain text includes it.
*
* Throws if the anchor cannot be found.
*/
async insertNode(
pageId: string,
node: any,
opts: {
position: "before" | "after" | "append";
anchorNodeId?: string;
anchorText?: string;
},
) {
await this.ensureAuthenticated();
if (!node || typeof node !== "object" || typeof node.type !== "string") {
throw new Error(
"insert_node: `node` must be an object with a string `type`",
);
}
if (
!opts ||
(opts.position !== "before" &&
opts.position !== "after" &&
opts.position !== "append")
) {
throw new Error(
'insert_node: `position` must be one of "before", "after", "append"',
);
}
if (opts.position === "before" || opts.position === "after") {
// before/after require EXACTLY ONE anchor (an id or a text fragment).
const hasId =
typeof opts.anchorNodeId === "string" && opts.anchorNodeId.length > 0;
const hasText =
typeof opts.anchorText === "string" && opts.anchorText.length > 0;
if (hasId === hasText) {
throw new Error(
`insert_node: position "${opts.position}" requires exactly one of anchorNodeId or anchorText`,
);
}
}
const collabToken = await this.getCollabTokenWithReauth();
// Track insertion in an outer var, reset per-transform, so a collab retry
// recomputes it cleanly (mirrors replaceImage's pattern).
let inserted = false;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
inserted = false;
const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts);
inserted = ins;
if (!inserted) return null; // anchor not found -> skip the write entirely
return nd;
});
if (!inserted) {
const anchorDesc = opts.anchorNodeId
? `anchorNodeId "${opts.anchorNodeId}"`
: `anchorText "${opts.anchorText}"`;
throw new Error(
`insert_node: anchor not found (${anchorDesc}) on page ${pageId}`,
);
}
return { success: true, inserted: true, position: opts.position };
}
/**
* Remove EVERY node whose attrs.id === nodeId (recursively, including nodes
* nested in callouts/tables) from its parent content array. Operates on the
* LIVE collab document so comments and concurrent edits are preserved.
* Throws if no node matches.
*/
async deleteNode(pageId: string, nodeId: string) {
await this.ensureAuthenticated();
const collabToken = await this.getCollabTokenWithReauth();
// Track the deletion count in an outer var, reset per-transform, so a
// collab retry recomputes it cleanly (mirrors replaceImage's pattern).
let deleted = 0;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
deleted = 0;
const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId);
deleted = d;
if (deleted === 0) return null; // no match -> skip the write entirely
return nd;
});
if (deleted === 0) {
throw new Error(
`delete_node: no node with id "${nodeId}" found on page ${pageId}`,
);
}
return { success: true, deleted, nodeId };
}
/** Build the public share URL for a page. */
private shareUrl(shareKey: string, slugId: string): string {
return `${this.appUrl}/share/${shareKey}/p/${slugId}`;
}
/** Share a page publicly (idempotent) and return the public URL. */
async sharePage(pageId: string, searchIndexing: boolean = true) {
await this.ensureAuthenticated();
const response = await this.client.post("/shares/create", {
pageId,
includeSubPages: false,
searchIndexing,
});
const share = response.data?.data ?? response.data;
const slugId = share.page?.slugId || (await this.getPageRaw(pageId)).slugId;
return {
shareId: share.id,
key: share.key,
pageId: share.pageId,
publicUrl: this.shareUrl(share.key, slugId),
searchIndexing: share.searchIndexing,
};
}
/** List all public shares in the workspace with their URLs. */
async listShares() {
const shares = await this.paginateAll("/shares", {});
return shares.map((s: any) => ({
shareId: s.id,
key: s.key,
pageId: s.pageId,
pageTitle: s.page?.title,
publicUrl: s.page?.slugId ? this.shareUrl(s.key, s.page.slugId) : null,
searchIndexing: s.searchIndexing,
createdAt: s.createdAt,
}));
}
/** Remove the public share of a page. */
async unsharePage(pageId: string) {
await this.ensureAuthenticated();
const shares = await this.listShares();
const share = shares.find((s: any) => s.pageId === pageId);
if (!share) {
throw new Error(`Page ${pageId} is not shared.`);
}
await this.client.post("/shares/delete", { shareId: share.shareId });
return { success: true, removedShareId: share.shareId, pageId };
}
async search(query: string, spaceId?: string, limit?: number) {
await this.ensureAuthenticated();
const payload: Record<string, any> = { query, spaceId };
// Clamp an optional caller-supplied limit into a sane 1..100 range before
// forwarding it to the server; omit it entirely when not provided so the
// server applies its own default.
if (limit !== undefined) {
payload.limit = Math.max(1, Math.min(100, limit));
}
const response = await this.client.post("/search", payload);
// Normalize both response shapes: bare array and paginated { items: [...] }
const data = response.data?.data;
const items = Array.isArray(data) ? data : data?.items || [];
const filteredItems = items.map((item: any) => filterSearchResult(item));
return {
items: filteredItems,
success: response.data?.success || false,
};
}
async movePage(
pageId: string,
parentPageId: string | null,
position?: string,
) {
await this.ensureAuthenticated();
// Docmost requires position >= 5 chars.
const validPosition = position || "a00000";
return this.client
.post("/pages/move", {
pageId,
parentPageId,
position: validPosition,
})
.then((res) => res.data);
}
async deletePage(pageId: string) {
await this.ensureAuthenticated();
return this.client
.post("/pages/delete", { pageId })
.then((res) => res.data);
}
// --- Comment methods (ported from upstream PR #3 by Max Nikitin) ---
/**
* Normalize a comment's `content` into a ProseMirror doc object before
* markdown conversion. createComment/updateComment send content as a
* JSON.stringify(...) STRING, and the server stores it as-is, so on read it
* comes back as a string. convertProseMirrorToMarkdown returns "" for a
* string, so parse it first (guarded — fall back to the raw value on any
* parse failure so a non-JSON legacy value is still handled gracefully).
*/
private parseCommentContent(content: any): any {
if (typeof content !== "string") return content;
try {
return JSON.parse(content);
} catch {
return content;
}
}
/** List all comments on a page (cursor-paginated), content as markdown. */
async listComments(pageId: string) {
await this.ensureAuthenticated();
let allComments: any[] = [];
let cursor: string | null = null;
do {
const payload: Record<string, any> = { pageId, limit: 100 };
if (cursor) payload.cursor = cursor;
const response = await this.client.post("/comments", payload);
const data = response.data.data || response.data;
const items = data.items || [];
allComments = allComments.concat(items);
cursor = data.meta?.nextCursor || null;
} while (cursor);
return allComments.map((comment: any) => {
const markdown = comment.content
? convertProseMirrorToMarkdown(
this.parseCommentContent(comment.content),
)
: "";
return filterComment(comment, markdown);
});
}
async getComment(commentId: string) {
await this.ensureAuthenticated();
const response = await this.client.post("/comments/info", { commentId });
const comment = response.data.data || response.data;
const markdown = comment.content
? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content))
: "";
return {
data: filterComment(comment, markdown),
success: true,
};
}
/** Create a page-level or inline comment; content is markdown. */
async createComment(
pageId: string,
content: string,
type: "page" | "inline" = "page",
selection?: string,
parentCommentId?: string,
) {
await this.ensureAuthenticated();
// Convert through the full Docmost schema (consistent with page paths)
const jsonContent = await markdownToProseMirror(content);
const payload: Record<string, any> = {
pageId,
content: JSON.stringify(jsonContent),
type,
};
if (selection) payload.selection = selection;
if (parentCommentId) payload.parentCommentId = parentCommentId;
const response = await this.client.post("/comments/create", payload);
const comment = response.data.data || response.data;
const markdown = comment.content
? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content))
: content;
const result: any = {
data: filterComment(comment, markdown),
success: true,
};
// Anchor the comment in the document. The /comments/create API records the
// comment + its `selection` text, but it does NOT insert the comment MARK
// into the page content, so without this the inline comment has no
// highlight/anchor and is not clickable. Only top-level inline comments are
// anchored: replies (parentCommentId set) inherit their parent's anchor,
// and page-type comments have no text range.
if (type === "inline" && selection && !parentCommentId && comment?.id) {
const newCommentId: string = comment.id;
let anchored = false;
try {
const collabToken = await this.getCollabTokenWithReauth();
await mutatePageContent(
pageId,
collabToken,
this.apiUrl,
(liveDoc) => {
const doc =
liveDoc && liveDoc.type === "doc"
? liveDoc
: { type: "doc", content: [] };
// Find the FIRST text node containing the selection text, then
// split it into before / marked / after, copying the node's
// existing marks onto all three parts and adding the comment mark
// only to the middle part. Returns true once a match is wrapped.
const wrapInFirstMatch = (
nodes: any[],
depth: number,
): boolean => {
const MAX_DEPTH = 200;
if (depth > MAX_DEPTH || !Array.isArray(nodes)) return false;
for (let i = 0; i < nodes.length; i++) {
const n = nodes[i];
if (!n || typeof n !== "object") continue;
if (
n.type === "text" &&
typeof n.text === "string" &&
n.text.includes(selection)
) {
const idx = n.text.indexOf(selection);
const before = n.text.slice(0, idx);
const middleText = selection;
const after = n.text.slice(idx + selection.length);
const baseMarks = Array.isArray(n.marks) ? n.marks : [];
// Drop any pre-existing comment mark from the marks applied to
// the middle fragment so it ends up with exactly one comment
// mark (the new one) rather than two. Other fragments and the
// base marks list are left untouched.
const middleBaseMarks = baseMarks.filter(
(m: any) => !(m && m.type === "comment"),
);
const commentMark = {
type: "comment",
// The comment mark schema declares both commentId and
// resolved; include resolved:false for completeness.
attrs: { commentId: newCommentId, resolved: false },
};
const parts: any[] = [];
if (before.length > 0) {
parts.push({ ...n, text: before, marks: [...baseMarks] });
}
parts.push({
...n,
text: middleText,
marks: [...middleBaseMarks, commentMark],
});
if (after.length > 0) {
parts.push({ ...n, text: after, marks: [...baseMarks] });
}
nodes.splice(i, 1, ...parts);
return true;
}
if (Array.isArray(n.content)) {
if (wrapInFirstMatch(n.content, depth + 1)) return true;
}
}
return false;
};
if (Array.isArray(doc.content) && wrapInFirstMatch(doc.content, 0)) {
anchored = true;
return doc;
}
// Selection text not found: do NOT fail (the comment already
// exists). Abort the write so nothing changes.
return null;
},
);
} catch (e) {
// The comment record already exists; an anchoring failure must not turn
// a successful create into an error. Report anchored:false instead.
if (process.env.DEBUG) {
console.error("Failed to anchor inline comment mark:", e);
}
anchored = false;
}
result.anchored = anchored;
}
return result;
}
async updateComment(commentId: string, content: string) {
await this.ensureAuthenticated();
const jsonContent = await markdownToProseMirror(content);
await this.client.post("/comments/update", {
commentId,
content: JSON.stringify(jsonContent),
});
return {
success: true,
commentId,
message: "Comment updated successfully.",
};
}
async deleteComment(commentId: string) {
await this.ensureAuthenticated();
return this.client
.post("/comments/delete", { commentId })
.then((res) => res.data);
}
/**
* Check for new comments across pages in a space (optionally scoped to a
* subtree): pages updated after `since` are scanned and their comments
* filtered by createdAt > since.
*/
async checkNewComments(spaceId: string, since: string, parentPageId?: string) {
await this.ensureAuthenticated();
const sinceDate = new Date(since);
// Reject an unparseable `since`: comparing against an Invalid Date silently
// yields zero new comments (every `>` against NaN is false), which would
// mask a malformed input as "nothing new" instead of erroring.
if (Number.isNaN(sinceDate.getTime())) {
throw new Error(
`checkNewComments: invalid "since" date "${since}"; expected an ISO-8601 timestamp`,
);
}
// 1. Enumerate the FULL set of pages in scope by walking the sidebar-pages
// tree (a complete page index), NOT the bounded "/pages/recent" feed which
// caps at ~5000 recent items and silently misses comments on older pages.
//
// Subtree scope: when parentPageId is given, the scope is that page ITSELF
// plus every descendant (enumerateSpacePages walks its children). Otherwise
// the scope is the whole space (all roots and their descendants).
//
// NOTE: do NOT pre-filter by page.updatedAt — creating a comment does not
// bump it (verified on a live server), so such a filter silently misses
// comments on pages that were not otherwise edited. The complete tree walk
// already restricts the scope correctly, so no recent-feed allow-list is
// needed any more.
let pagesInScope: any[];
if (parentPageId) {
const subtree = await this.enumerateSpacePages(spaceId, parentPageId);
// Include the parent page node itself alongside its descendants. Fetch it
// so its title/id are available even though it is not returned by its own
// children listing.
let parentNode: any = { id: parentPageId };
try {
parentNode = await this.getPageRaw(parentPageId);
} catch (e: any) {
// Fall back to a minimal node if the parent can't be fetched; its
// comments are still attempted below (the fetch there is non-fatal).
}
pagesInScope = [parentNode, ...subtree];
} else {
pagesInScope = await this.enumerateSpacePages(spaceId);
}
// 2. Fetch comments for each page, keep ones created after since
const results: any[] = [];
for (const page of pagesInScope) {
try {
const comments = await this.listComments(page.id);
const newComments = comments.filter(
(c: any) => new Date(c.createdAt) > sinceDate,
);
if (newComments.length > 0) {
results.push({
pageId: page.id,
pageTitle: page.title,
comments: newComments,
});
}
} catch (e: any) {
// Skip pages with errors (e.g. deleted between calls)
}
}
const totalNewComments = results.reduce(
(sum, r) => sum + r.comments.length,
0,
);
// enumerateSpacePages caps traversal at 10000 nodes; flag when that cap was
// hit so the caller knows the scan may be incomplete (some pages skipped).
const truncated = pagesInScope.length >= 10000;
return {
since,
scope: parentPageId ? `subtree of ${parentPageId}` : `space ${spaceId}`,
checkedPages: pagesInScope.length,
pagesWithNewComments: results.length,
totalNewComments,
truncated,
comments: results,
};
}
// --- Image upload / embedding ---
/** Map a file extension to a supported image MIME type (throws otherwise). */
private imageMimeFromPath(filePath: string): string {
const ext = extname(filePath).toLowerCase();
const map: Record<string, string> = {
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".gif": "image/gif",
".webp": "image/webp",
".svg": "image/svg+xml",
};
const mime = map[ext];
if (!mime) {
throw new Error(
`unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`,
);
}
return mime;
}
/** Build a Docmost ProseMirror image node from an uploaded attachment. */
private buildImageNode(
att: { id: string; fileName: string; fileSize?: number },
align?: "left" | "center" | "right",
alt?: string,
): any {
// Clean file URL, matching Docmost's native behaviour. No cache-busting
// query: the server serves the bare URL correctly, and replacement creates
// a new attachment id (a new URL) which busts caches naturally.
const src = `/api/files/${att.id}/${att.fileName}`;
const node: any = {
type: "image",
attrs: {
src,
attachmentId: att.id,
// Default to null when the server omits fileSize so the attr is never
// undefined (undefined would be dropped on serialization / break the
// ProseMirror image schema which expects size present).
size: att.fileSize ?? null,
align: align || "center",
width: null,
},
};
if (alt) node.attrs.alt = alt;
return node;
}
/**
* Upload a local image file as an attachment of a page and return the
* attachment metadata plus a ready-to-insert ProseMirror image node.
*/
async uploadImage(pageId: string, filePath: string) {
await this.ensureAuthenticated();
// HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at
// the server host's local filesystem, so it must be validated BEFORE any
// bytes are read. Without these guards a caller could (a) read an arbitrary
// file via path traversal, (b) follow a symlink to a sensitive target, or
// (c) exhaust memory by reading a huge file. Order matters: validate the
// extension, then stat (regular-file + size cap), and only then read.
// (a) Extension allowlist first — cheap, and rejects non-images up front.
const mime = this.imageMimeFromPath(filePath);
// (b) Stat the path: it must be a regular file (rejects directories, FIFOs,
// devices, sockets) and stay under the size cap. statSync follows symlinks,
// so a symlink is only accepted when its TARGET is a regular file within
// the cap — the intended behaviour for a local image path.
const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB
let stat;
try {
stat = statSync(filePath);
} catch (e: any) {
throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`);
}
if (!stat.isFile()) {
throw new Error(`Not a regular file: "${filePath}"`);
}
if (stat.size > MAX_IMAGE_BYTES) {
throw new Error(
`Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`,
);
}
// (c) Only now read the bytes.
let fileBuffer: Buffer;
try {
fileBuffer = readFileSync(filePath);
} catch (e: any) {
throw new Error(`Cannot read image file at "${filePath}": ${e.message}`);
}
// Build a FRESH FormData for every send attempt. A FormData body is a
// single-use stream that is CONSUMED on the first send, so it cannot be
// replayed by this.client's response interceptor (replaying a consumed
// stream fails with 'socket hang up'). Multipart re-auth is therefore done
// here with bare axios and an explicit one-shot 401/403 retry that rebuilds
// the body. Field order matters: text fields must precede the file part so
// the server reads them; the server always generates a fresh attachment id.
const buildForm = () => {
const form = new FormData();
form.append("pageId", pageId);
form.append("file", fileBuffer, {
filename: basename(filePath),
contentType: mime,
});
return form;
};
const url = `${this.apiUrl}/files/upload`;
let response;
try {
// Call buildForm() ONCE per attempt and reuse the instance for both
// getHeaders() and the body so the Content-Type boundary matches the body.
const form = buildForm();
// Read the Authorization header from this.client's defaults (set by
// login(), only ever deleted — never set to null) instead of building
// `Bearer ${this.token}`: a concurrent JSON 401 can null this.token
// mid-flight, which would otherwise produce a literal "Bearer null".
// ensureAuthenticated() above guarantees login() ran, so the default
// header exists here. A 60s timeout keeps a hung upload from wedging the
// per-page lock (replaceImage holds withPageLock across this call).
response = await axios.post(url, form, {
headers: {
...form.getHeaders(),
Authorization: this.client.defaults.headers.common["Authorization"],
},
timeout: 60000,
});
} catch (error) {
// On an expired-token auth error, re-login and retry exactly once with a
// freshly-rebuilt FormData (the previous one was already consumed).
if (
axios.isAxiosError(error) &&
(error.response?.status === 401 || error.response?.status === 403)
) {
await this.login();
const form2 = buildForm();
response = await axios.post(url, form2, {
headers: {
...form2.getHeaders(),
Authorization:
this.client.defaults.headers.common["Authorization"],
},
timeout: 60000,
});
} else if (axios.isAxiosError(error)) {
// Keep the thrown message free of the raw response body (it may echo
// request data or server internals); surface only status/statusText.
// The full body is logged under DEBUG for diagnostics.
if (process.env.DEBUG) {
console.error(
"Image upload failed; response body:",
JSON.stringify(error.response?.data),
);
}
throw new Error(
`Image upload failed: ${error.response?.status} ${error.response?.statusText}`,
);
} else {
throw error;
}
}
// The attachment may arrive bare or wrapped in a { data } envelope.
const att = response.data?.data ?? response.data;
if (!att?.id || !att?.fileName) {
throw new Error(
"Unexpected /files/upload response: " + JSON.stringify(response.data),
);
}
// Some Docmost versions omit fileSize from the upload response. Fall back
// to the local stat size (the bytes we just uploaded) so callers never get
// an undefined size.
const localSize = stat.size;
const resolvedSize = att.fileSize ?? localSize;
return {
attachmentId: att.id,
fileName: att.fileName,
fileSize: resolvedSize,
src: `/api/files/${att.id}/${att.fileName}`,
imageNode: this.buildImageNode({ ...att, fileSize: resolvedSize }),
};
}
/**
* Upload a local image and insert it into a page in one step.
* By default the image is appended at the end. With replaceText, the first
* top-level block whose text contains the string is replaced; with afterText,
* the image is inserted right after the first matching block. All other
* block ids are preserved (only one top-level block is added or swapped).
*/
async insertImage(
pageId: string,
filePath: string,
opts: {
align?: "left" | "center" | "right";
alt?: string;
replaceText?: string;
afterText?: string;
} = {},
) {
const up = await this.uploadImage(pageId, filePath);
// Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then
// apply align/alt onto a shallow attrs copy.
const node: any = { ...up.imageNode, attrs: { ...up.imageNode.attrs } };
if (opts.align) node.attrs.align = opts.align;
if (opts.alt) node.attrs.alt = opts.alt;
const collabToken = await this.getCollabTokenWithReauth();
// Recursively collect the plain text of a top-level block.
const blockText = (n: any): string => {
let out = "";
if (n.type === "text") out += n.text || "";
for (const child of n.content || []) out += blockText(child);
return out;
};
// Insert into the LIVE synced document, not the debounced REST snapshot, so
// concurrent edits/comments/images are preserved and parallel insert_image
// calls (serialized by the per-page lock) each see the previous insertion.
let placement: "replaced" | "after" | "appended" | undefined;
await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
const doc =
liveDoc && liveDoc.type === "doc"
? liveDoc
: { type: "doc", content: [] };
if (!Array.isArray(doc.content)) doc.content = [];
if (opts.replaceText) {
// Ambiguity guard (mirrors editPageText): count matching top-level
// blocks first, so a non-unique fragment cannot silently replace the
// wrong block (e.g. text that also appears inside a callout/table).
const matches = doc.content.filter((b: any) =>
blockText(b).includes(opts.replaceText!),
);
if (matches.length === 0) {
throw new Error(`replaceText not found: "${opts.replaceText}"`);
}
if (matches.length > 1) {
throw new Error(
`replaceText "${opts.replaceText}" matches ${matches.length} blocks; use a longer unique fragment`,
);
}
const idx = doc.content.findIndex((b: any) =>
blockText(b).includes(opts.replaceText!),
);
// Data-loss guard: replaceText swaps the WHOLE top-level block, so if
// the fragment only appears nested inside a container (table, callout,
// list, blockquote) the entire structure would be destroyed. Refuse
// when the matched block is a container rather than a leaf
// paragraph/heading and point the caller at a safer tool.
const CONTAINER_TYPES = new Set([
"table",
"callout",
"bulletList",
"orderedList",
"taskList",
"blockquote",
]);
const matchedBlock = doc.content[idx];
if (matchedBlock && CONTAINER_TYPES.has(matchedBlock.type)) {
throw new Error(
`replaceText matched a ${matchedBlock.type} container block; replacing it would destroy the whole structure. ` +
`Use afterText to insert near it, or update_page_json for surgical edits.`,
);
}
doc.content.splice(idx, 1, node);
placement = "replaced";
} else if (opts.afterText) {
// Ambiguity guard (mirrors editPageText): refuse a non-unique fragment.
const matches = doc.content.filter((b: any) =>
blockText(b).includes(opts.afterText!),
);
if (matches.length === 0) {
throw new Error(`afterText not found: "${opts.afterText}"`);
}
if (matches.length > 1) {
throw new Error(
`afterText "${opts.afterText}" matches ${matches.length} blocks; use a longer unique fragment`,
);
}
const idx = doc.content.findIndex((b: any) =>
blockText(b).includes(opts.afterText!),
);
doc.content.splice(idx + 1, 0, node);
placement = "after";
} else {
doc.content.push(node);
placement = "appended";
}
return doc;
});
return {
success: true,
pageId,
attachmentId: up.attachmentId,
src: up.src,
placement,
};
}
/**
* Replace an existing image in a page with a new file. Uploads the new file as
* a brand-new attachment, which yields a fresh clean URL that both renders
* correctly and busts browser caches (the URL changed). Finds every image node
* whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested
* in callouts/tables) and repoints its src/attachmentId/size, preserving
* comments, alignment and alt. Operates on the live collab document so comments
* and concurrent edits are preserved. Throws if no matching image is found.
*
* The OLD attachment is left in place as an unreferenced orphan: Docmost
* exposes NO HTTP API to delete a single content attachment (verified against
* the attachment controller/service and by probing the live API — deletion
* happens only by cascade when the page, space or user is removed). This is the
* same outcome as Docmost's own editor when an image is removed/replaced.
* In-place byte overwrite is deliberately NOT used because some Docmost
* versions corrupt the attachment (HTTP 500) when its bytes are overwritten.
*/
async replaceImage(
pageId: string,
oldAttachmentId: string,
filePath: string,
opts: { align?: "left" | "center" | "right"; alt?: string } = {},
) {
const collabToken = await this.getCollabTokenWithReauth();
// Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write).
// Previously the scan and the write were two separate mutatePageContent
// calls, each acquiring + releasing the lock, with the upload happening in
// the UNLOCKED gap between them. A concurrent op could interleave there: it
// could remove the target image so the write pass matches nothing, leaving
// the freshly-uploaded attachment as an un-deletable orphan (Docmost has no
// API to delete a single content attachment). Acquiring the lock once and
// using the non-locking collab helper inside (the per-page mutex is NOT
// reentrant, so the self-locking mutatePageContent would deadlock here)
// closes that TOCTOU window. uploadImage hits /files/upload over plain HTTP
// and does not touch the page lock, so it is safe to call while held.
return withPageLock(pageId, async () => {
// STEP 1: read-only live check. Scan the live document for any image node
// matching oldAttachmentId BEFORE uploading anything, so a wrong/stale id
// throws without ever creating an orphan attachment.
let matchFound = false;
const scan = (nodes: any[]) => {
for (const node of nodes) {
if (!node) continue;
if (
node.type === "image" &&
node.attrs &&
node.attrs.attachmentId === oldAttachmentId
) {
matchFound = true;
}
if (Array.isArray(node.content)) scan(node.content);
}
};
await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => {
matchFound = false; // reset per-transform (collab may retry the read).
const doc =
liveDoc && liveDoc.type === "doc"
? liveDoc
: { type: "doc", content: [] };
if (Array.isArray(doc.content)) scan(doc.content);
return null; // read-only: never write on the check pass.
});
if (!matchFound) {
throw new Error(
`replace_image: no image with attachmentId "${oldAttachmentId}" found on page ${pageId}`,
);
}
// STEP 2: a match exists — upload the new file as a FRESH attachment (new
// id, new clean URL) and repoint every matching node in a second pass.
// Still inside the SAME lock, so no other op can have changed the page
// since the scan.
const up = await this.uploadImage(pageId, filePath);
let replaced = 0;
// Swap the source of one image node, preserving align/alt/title/geometry.
const repoint = (node: any) => {
node.attrs = {
...node.attrs,
src: up.src,
attachmentId: up.attachmentId,
// Default to null when fileSize is unknown so the attr is never
// undefined.
size: up.fileSize ?? null,
};
if (opts.align) node.attrs.align = opts.align;
if (opts.alt !== undefined) node.attrs.alt = opts.alt;
replaced++;
};
// Recursively repoint every image node (incl. ones nested in callouts/tables).
const walk = (nodes: any[]) => {
for (const node of nodes) {
if (!node) continue;
if (
node.type === "image" &&
node.attrs &&
node.attrs.attachmentId === oldAttachmentId
) {
repoint(node);
}
if (Array.isArray(node.content)) walk(node.content);
}
};
await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => {
// Reset per-transform so collab retries recompute cleanly (no double-count).
replaced = 0;
const doc =
liveDoc && liveDoc.type === "doc"
? liveDoc
: { type: "doc", content: [] };
if (!Array.isArray(doc.content)) doc.content = [];
walk(doc.content);
if (replaced === 0) return null; // no match -> skip the write entirely
return doc;
});
if (replaced === 0) {
// The pass-1 SCAN found the target (matchFound was true) and we already
// uploaded the new attachment, but pass-2 matched nothing — a concurrent
// editor must have removed the node between the two passes. Do NOT throw
// here (that would leak the just-uploaded attachment AND report failure);
// instead report success with the upload flagged as an unreferenced
// orphan so the caller knows. (The early throw above still covers the
// case where pass-1 finds nothing, before any upload happens.)
return {
success: true,
replaced: 0,
pageId,
oldAttachmentId,
newAttachmentId: up.attachmentId,
src: up.src,
orphanedAttachmentId: up.attachmentId,
warning:
"target image was removed concurrently; uploaded attachment is unreferenced",
};
}
return {
success: true,
pageId,
replaced,
oldAttachmentId,
newAttachmentId: up.attachmentId,
src: up.src,
};
});
}
// --- Page history / diff / transform ---
/**
* List the saved versions (history snapshots) of a page, newest first.
* Docmost auto-snapshots on every save. Returns one cursor-paginated page of
* results: `{ items, nextCursor }`. The history record's id field is `id`.
*/
async listPageHistory(pageId: string, cursor?: string) {
await this.ensureAuthenticated();
const payload: Record<string, any> = { pageId };
if (cursor) payload.cursor = cursor;
const response = await this.client.post("/pages/history", payload);
const data = response.data?.data ?? response.data;
return {
items: data?.items ?? [],
nextCursor: data?.meta?.nextCursor ?? null,
};
}
/**
* Fetch a single page-history version including its lossless ProseMirror
* `content`. The version also carries pageId/title/createdAt.
*/
async getPageHistory(historyId: string) {
await this.ensureAuthenticated();
const response = await this.client.post("/pages/history/info", {
historyId,
});
return response.data?.data ?? response.data;
}
/**
* "Restore" a version: Docmost has NO restore endpoint, so we take the
* version's `content` and write it as the page's current content via the live
* collab path (which itself creates a new history snapshot). Returns the
* affected pageId and the source historyId.
*/
async restorePageVersion(historyId: string) {
await this.ensureAuthenticated();
const version = await this.getPageHistory(historyId);
if (
!version ||
!version.pageId ||
!version.content ||
typeof version.content !== "object"
) {
throw new Error(
`restore_page_version: history ${historyId} has no usable content`,
);
}
// Defense-in-depth: sanitize URLs in the restored content (parity with the
// JSON write path) before writing it back.
this.validateDocUrls(version.content);
const collabToken = await this.getCollabTokenWithReauth();
await mutatePageContent(
version.pageId,
collabToken,
this.apiUrl,
() => version.content,
);
return { pageId: version.pageId, restoredFrom: historyId };
}
/**
* Diff two versions of a page and return a Docmost-equivalent change set.
* `from`/`to` each resolve to a ProseMirror doc:
* - null / undefined / "current" -> the page's CURRENT content;
* - any other string -> that historyId's content.
* Returns the diff plus the resolved version metadata for each side.
*/
async diffPageVersions(pageId: string, from?: string, to?: string) {
await this.ensureAuthenticated();
const isCurrent = (v?: string) =>
v == null || v === "" || v === "current";
const resolveSide = async (
v?: string,
): Promise<{ doc: any; meta: any }> => {
if (isCurrent(v)) {
const raw = await this.getPageRaw(pageId);
return {
doc: raw.content || { type: "doc", content: [] },
meta: {
kind: "current",
pageId,
title: raw.title,
updatedAt: raw.updatedAt,
},
};
}
const version = await this.getPageHistory(v as string);
return {
doc: version.content || { type: "doc", content: [] },
meta: {
kind: "history",
historyId: version.id,
pageId: version.pageId,
title: version.title,
createdAt: version.createdAt,
},
};
};
const fromSide = await resolveSide(from);
const toSide = await resolveSide(to);
const diff = diffDocs(fromSide.doc, toSide.doc);
return { from: fromSide.meta, to: toSide.meta, diff };
}
/**
* Edit a page by running an arbitrary user-supplied JS transform against the
* live document, with a diff preview + page-history safety net.
*
* The transform string is evaluated as `(doc, ctx) => doc` inside a node:vm
* sandbox: it gets ONLY `{ doc, ctx, structuredClone, console }` as globals,
* a 5s timeout, and NO access to require/process/fs/network. It must return a
* `{ type: "doc" }` node, which is validated structurally before any write.
*
* `ctx` exposes:
* - comments: the page's comments (fetched before the live read);
* - log: an array the transform can push diagnostics to (via console.log);
* - consume(id): mark a comment id as consumed (for deleteComments);
* - helpers: the transforms.ts primitives + commentsToFootnotes.
*
* Footnote convention used by the helpers: footnote markers are plain "[N]"
* text in the body, and the notes are an orderedList under a heading whose
* text is "Примечания переводчика".
*
* dryRun (default true): read the page's current content, run the transform,
* and return `{ pushed:false, diff, log }` WITHOUT opening the collab socket.
* Otherwise the transform runs atomically inside mutatePageContent, optionally
* deletes consumed comments, and returns the new historyId + diff + log.
*/
async transformPage(
pageId: string,
transformJs: string,
opts: { dryRun?: boolean; deleteComments?: boolean } = {},
) {
const dryRun = opts.dryRun ?? true;
const deleteComments = opts.deleteComments ?? false;
await this.ensureAuthenticated();
const comments = await this.listComments(pageId);
// ctx handed to the sandbox. consume() records ids; helpers are the pure
// transform primitives. log is captured from console.log inside the sandbox.
const ctx = {
comments,
log: [] as string[],
consumed: new Set<string>(),
consume(id: string) {
this.consumed.add(id);
},
helpers: {
blockText,
walk,
getList,
insertMarkerAfter,
setCalloutRange,
noteItem,
mdToInlineNodes,
commentsToFootnotes,
},
};
// Captured oldDoc / newDoc for the diff (set inside runTransform).
let oldDoc: any;
let newDoc: any;
// SYNCHRONOUS transform runner — safe to call inside mutatePageContent's
// onSynced (no await between the live read and the write).
const runTransform = (liveDoc: any): any => {
oldDoc = structuredClone(liveDoc);
const sandbox: Record<string, any> = {
doc: structuredClone(liveDoc),
ctx,
structuredClone,
console: {
log: (...a: any[]) => ctx.log.push(a.map((x) => String(x)).join(" ")),
},
};
// Wrap the provided string in parentheses so both an expression-arrow
// (`(doc, ctx) => {...}`) and a parenthesized function work. Run it in a
// fresh context with no require/process/module so the transform cannot
// touch fs/network/process. 5s wall-clock timeout.
let fn: any;
try {
fn = vm.runInNewContext("(" + transformJs + ")", sandbox, {
timeout: 5000,
});
} catch (e: any) {
throw new Error(`transform did not compile: ${e?.message ?? e}`);
}
if (typeof fn !== "function") {
throw new Error("transform must evaluate to a function (doc, ctx) => doc");
}
const result = vm.runInNewContext(
"f(d, c)",
{ f: fn, d: sandbox.doc, c: ctx },
{ timeout: 5000 },
);
if (
!result ||
typeof result !== "object" ||
result.type !== "doc" ||
!Array.isArray(result.content)
) {
throw new Error(
'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })',
);
}
// Validate the returned doc before it can be written.
this.validateDocStructure(result);
this.validateDocUrls(result);
newDoc = result;
return result;
};
if (dryRun) {
// Preview only: run against the current REST snapshot, never open the
// socket. oldDoc/newDoc are captured by runTransform.
const raw = await this.getPageRaw(pageId);
const current = raw.content || { type: "doc", content: [] };
runTransform(current);
// Exercise the same Yjs encoder the apply path uses, so the preview
// fails with the SAME descriptive error when the doc is not encodable
// instead of returning a misleadingly-green diff.
assertYjsEncodable(newDoc);
return {
pushed: false,
diff: diffDocs(oldDoc, newDoc),
log: ctx.log,
};
}
// Apply atomically against the live doc.
const collabToken = await this.getCollabTokenWithReauth();
await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform);
// Optionally delete consumed comments (best-effort; a delete failure must
// not undo the successful write).
const deletedComments: string[] = [];
if (deleteComments) {
for (const id of ctx.consumed) {
try {
await this.deleteComment(id);
deletedComments.push(id);
} catch (e) {
if (process.env.DEBUG) {
console.error(`transform: failed to delete comment ${id}:`, e);
}
}
}
}
// Fetch the newest historyId (Docmost snapshots on the write above).
let historyId: string | null = null;
try {
const hist = await this.listPageHistory(pageId);
historyId = hist.items?.[0]?.id ?? null;
} catch (e) {
if (process.env.DEBUG) {
console.error("transform: failed to fetch history id:", e);
}
}
return {
pushed: true,
historyId,
diff: diffDocs(oldDoc, newDoc),
deletedComments,
log: ctx.log,
};
}
// --- docmost-sync additions (backport target: docmost-mcp/src/client.ts) ---
//
// REST-only helpers added for the docmost-sync engine. They reuse the
// existing patterns above (this.client.post, this.ensureAuthenticated(),
// this.paginateAll, the private enumerateSpacePages) so the diff can be
// copied back into docmost-mcp verbatim.
/**
* List the contents of a space's trash (soft-deleted pages).
*
* Per SPEC §8 the trash endpoint is PER-SPACE — there is no workspace-wide
* variant — so callers must enumerate spaces and poll each one. The response
* items carry `deletedAt`, `parentPageId`, `spaceId` (and even `content`),
* which is enough to detect deletions precisely rather than inferring them
* from a pageId disappearing from the active tree.
*/
async listTrash(spaceId: string): Promise<any[]> {
return this.paginateAll("/pages/trash", { spaceId });
}
/**
* Restore a soft-deleted page from trash (resets its `deletedAt`).
*/
async restorePage(pageId: string) {
await this.ensureAuthenticated();
return this.client.post("/pages/restore", { pageId }).then((r) => r.data);
}
/**
* Public wrapper for a full space tree walk via sidebar-pages.
*
* Returns every page node in the space (or in the subtree rooted at
* rootPageId), each carrying `id`, `slugId`, `title`, `position`,
* `parentPageId`, `icon`, `hasChildren` — but NOT `content`. Use getPageRaw /
* exportPageBody per node to fetch the body.
*/
async listAllSpacePages(
spaceId: string,
rootPageId?: string,
): Promise<any[]> {
return this.enumerateSpacePages(spaceId, rootPageId);
}
/**
* "Changes since T" scan (SPEC §16). There is NO server-side `updatedAt`
* filter in Docmost and `/pages/recent` is CURSOR-paginated, so this is a
* descending CURSOR scan with a client-side cutoff: each page of items is
* sorted `updatedAt DESC`, we accumulate them and STOP as soon as we hit an
* item whose `updatedAt` is `<= sinceIso` (that item and everything after it
* is already known, so it is NOT included). Only items strictly newer than
* `sinceIso` are returned, in server (descending) order.
*
* - `spaceId` is optional: omit it to scan the whole workspace, pass it to
* scope to one space.
* - `sinceIso === null` means "no previous cutoff" — return just the first
* page (the most recent activity), which seeds the initial `T_last`.
* - `hardPageCap` is a safety ceiling on the number of pages fetched; if it
* is hit before the cutoff is reached a warning is logged because the
* result may be incomplete.
*
* The pagination loop itself lives in the pure, testable `collectRecentSince`
* helper below; this method only supplies a real `fetchPage` bound to the
* REST client. It mirrors the cursor pattern used by `listComments` /
* `listPageHistory` (payload `cursor` + `data.meta?.nextCursor`).
*/
async listRecentSince(
spaceId: string | undefined,
sinceIso: string | null,
hardPageCap = 50,
): Promise<any[]> {
return collectRecentSince(
async (cursor) => {
await this.ensureAuthenticated();
const response = await this.client.post("/pages/recent", {
limit: 100,
...(spaceId ? { spaceId } : {}),
...(cursor ? { cursor } : {}),
});
const data = response.data?.data ?? response.data;
return {
items: data?.items || [],
nextCursor: data?.meta?.nextCursor || null,
};
},
sinceIso,
hardPageCap,
);
}
/**
* Export a page as a self-contained markdown file with meta + body ONLY —
* NO `docmost:comments` block and WITHOUT calling `/comments` at all.
*
* This is the docmost-sync export mode (SPEC §3): the sync never touches
* comment threads, so the file carries page identity (meta) and the body,
* with comment threads surviving only as inline `<span data-comment-id>`
* anchor marks inside the body. Contrast with `exportPageMarkdown`, which
* additionally fetches and appends the comment threads block.
*/
async exportPageBody(pageId: string): Promise<string> {
const page = await this.getPageRaw(pageId);
const body = page.content
? convertProseMirrorToMarkdown(page.content)
: "";
const meta = {
version: 1,
pageId: page.id,
slugId: page.slugId,
title: page.title,
spaceId: page.spaceId,
parentPageId: page.parentPageId ?? null,
};
return serializeDocmostMarkdownBody(meta, body);
}
}
// --- docmost-sync additions (module scope) ---------------------------------
//
// Pure pagination helper extracted from DocmostClient.listRecentSince so the
// cursor-walk logic is unit-testable without a live server (the method only
// binds a real `fetchPage`). Lives at module scope because it is `export`ed;
// the class method above delegates to it.
/**
* Walk a cursor-paginated "recent" feed (sorted updatedAt DESC) newest-first,
* collecting items strictly newer than sinceIso and stopping at the first item
* with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id
* guards a server that ignores the cursor; hardPageCap bounds the walk.
*
* Precondition: `sinceIso` and each `item.updatedAt` MUST be the SAME UTC
* ISO-8601 format that Docmost emits, because the cutoff comparison is purely
* lexicographic (string `<=`); mixed formats or non-UTC offsets would compare
* incorrectly.
*/
export async function collectRecentSince(
fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>,
sinceIso: string | null,
hardPageCap = 50,
): Promise<any[]> {
const collected: any[] = [];
// Track every page id we have already accepted so we can dedup, AND stop when
// a fetched page yields zero NEW ids. This guards against a server that
// ignores the cursor and re-returns the same page forever: without it the
// loop would re-collect the same items up to hardPageCap.
const seen = new Set<string>();
let cursor: string | null = null;
let pages = 0;
let reachedCutoff = false;
while (pages < hardPageCap) {
const data = await fetchPage(cursor);
pages++;
const items: any[] = data.items || [];
let newThisPage = 0;
for (const item of items) {
// Descending scan: the first item at or below the cutoff means every
// remaining item is older too, so stop scanning entirely.
if (
sinceIso !== null &&
item.updatedAt != null &&
item.updatedAt <= sinceIso
) {
reachedCutoff = true;
break;
}
// Skip ids we have already accepted (a server that ignores the cursor
// will re-serve the same items); only genuinely new ids count toward
// progress and get collected.
if (item.id != null && seen.has(item.id)) {
continue;
}
if (item.id != null) seen.add(item.id);
collected.push(item);
newThisPage++;
}
// With a null cutoff we only want the first page.
if (sinceIso === null) break;
if (reachedCutoff) break;
// No next cursor means there are no further pages to fetch.
if (!data.nextCursor) break;
// A page that added no unseen items means the server is not advancing (it
// ignored the cursor), so further fetches cannot make progress — stop.
if (newThisPage === 0) break;
cursor = data.nextCursor;
}
if (sinceIso !== null && !reachedCutoff && pages >= hardPageCap) {
console.warn(
`collectRecentSince: hit hardPageCap=${hardPageCap} before reaching the updatedAt cutoff; the result may be truncated`,
);
}
return collected;
}