chore(mcp): stop committing build/ and node_modules; build in CI/Docker

Same hygiene fix as git-sync (review #2), applied to packages/mcp which had the
identical pre-existing problem: committed build/ (20 files) + node_modules (28,
pnpm symlinks with a baked /home/claude store path).

- git rm --cached packages/mcp/{build,node_modules}.
- .gitignore: add packages/mcp/build/ (packages/*/node_modules/ already covers it).
- Build where consumed: apps/server `pretest` and the CI Test workflow now build
  @docmost/mcp too. The Dockerfile builder already runs `pnpm build` (nx builds
  mcp) and already COPYs packages/mcp/build into the runtime image.

Verified: wiped build/, rebuilt via `pnpm --filter @docmost/mcp build`; the mcp
server suites (96 tests) pass against the freshly-built, non-committed output.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-23 15:50:03 +03:00
parent 75d3ac4305
commit 14882cf833
51 changed files with 8 additions and 9206 deletions

View File

@@ -68,12 +68,12 @@ jobs:
- name: Build editor-ext - name: Build editor-ext
run: pnpm --filter @docmost/editor-ext build run: pnpm --filter @docmost/editor-ext build
# git-sync is no longer committed in built form (build/ is gitignored), so # git-sync and mcp are no longer committed in built form (build/ is
# CI must compile it: the server suite imports the package via its built # gitignored), so CI must compile them: the server resolves both via their
# build/index.js. The server pretest also builds it, but building here keeps # built build/index.js. The server pretest also builds them, but building
# it explicit and independent of pnpm lifecycle ordering. # here keeps it explicit and independent of pnpm lifecycle ordering.
- name: Build git-sync - name: Build git-sync and mcp
run: pnpm --filter @docmost/git-sync build run: pnpm --filter @docmost/git-sync build && pnpm --filter @docmost/mcp build
- name: Run unit tests - name: Run unit tests
run: pnpm -r test run: pnpm -r test

1
.gitignore vendored
View File

@@ -10,6 +10,7 @@ data
# via `pnpm build`, never committed, so src/ and prod can never silently diverge). # via `pnpm build`, never committed, so src/ and prod can never silently diverge).
packages/*/node_modules/ packages/*/node_modules/
packages/git-sync/build/ packages/git-sync/build/
packages/mcp/build/
# Logs # Logs
logs logs

View File

@@ -23,7 +23,7 @@
"migration:reset": "tsx src/database/migrate.ts down-to NO_MIGRATIONS", "migration:reset": "tsx src/database/migrate.ts down-to NO_MIGRATIONS",
"migration:codegen": "kysely-codegen --dialect=postgres --camel-case --env-file=../../.env --out-file=./src/database/types/db.d.ts", "migration:codegen": "kysely-codegen --dialect=postgres --camel-case --env-file=../../.env --out-file=./src/database/types/db.d.ts",
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix", "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix",
"pretest": "pnpm --filter @docmost/editor-ext build && pnpm --filter @docmost/git-sync build", "pretest": "pnpm --filter @docmost/editor-ext build && pnpm --filter @docmost/git-sync build && pnpm --filter @docmost/mcp build",
"test": "jest", "test": "jest",
"test:int": "jest --config test/jest-integration.json", "test:int": "jest --config test/jest-integration.json",
"test:watch": "jest --watch", "test:watch": "jest --watch",

File diff suppressed because it is too large Load Diff

View File

@@ -1,133 +0,0 @@
import { randomUUID } from "node:crypto";
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
import { createDocmostMcpServer } from "./index.js";
/**
* Build a stateful Streamable-HTTP handler for the Docmost MCP server. The
* embedding host (the gitmost NestJS server) bridges its raw Node req/res into
* `handleRequest`. One McpServer + transport is created per MCP session and
* kept alive between requests, keyed by the `mcp-session-id` header.
*
* `config` is EITHER a static `DocmostMcpConfig` (back-compat: stdio + the env
* service account, unchanged) OR a `McpConfigResolver` run once per session at
* `initialize` to bind that session to the request's identity.
*/
export function createMcpHttpHandler(config, options = {}) {
// One transport (and one McpServer) per MCP session, keyed by session id.
const transports = {};
// Last activity timestamp per session id, used for idle eviction.
const lastSeen = {};
// Anti-session-fixation: the opaque identity key bound to each session at
// initialize. A later request for that session whose key differs is rejected.
const sessionIdentity = {};
// Write a JSON-RPC error and end the response. Used for the 400/401 paths so
// every early rejection is a well-formed JSON-RPC error, not a torn response.
const sendJsonRpcError = (res, statusCode, code, message) => {
res.statusCode = statusCode;
res.setHeader("Content-Type", "application/json");
res.end(JSON.stringify({
jsonrpc: "2.0",
error: { code, message },
id: null,
}));
};
// Idle session TTL (ms): a session with no activity for this long is evicted.
// Defaults to 30 min; overridable via MCP_SESSION_IDLE_MS.
const idleTtlMs = (() => {
const parsed = parseInt(process.env.MCP_SESSION_IDLE_MS ?? "", 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 30 * 60 * 1000;
})();
// Periodically close transports idle longer than the TTL. transport.close()
// triggers its onclose, which removes it from `transports`; we also drop the
// lastSeen entry. unref() so this timer never keeps the process alive.
const sweepIntervalMs = 5 * 60 * 1000;
const sweepTimer = setInterval(() => {
const now = Date.now();
for (const sid of Object.keys(transports)) {
if (now - (lastSeen[sid] ?? 0) > idleTtlMs) {
void transports[sid].close();
delete lastSeen[sid];
delete sessionIdentity[sid];
}
}
}, sweepIntervalMs);
sweepTimer.unref();
async function handleRequest(req, res, parsedBody) {
const sessionId = req.headers["mcp-session-id"];
const method = (req.method || "GET").toUpperCase();
let transport = sessionId ? transports[sessionId] : undefined;
if (method === "POST" && !transport) {
// A new session may only be created by an initialize request without a
// session id.
if (sessionId || !isInitializeRequest(parsedBody)) {
sendJsonRpcError(res, 400, -32000, "Bad Request: no valid session ID provided");
return;
}
// Resolve the per-session config from the request (per-user identity) when
// a resolver was supplied; otherwise use the static config unchanged. The
// resolver may throw (e.g. bad credentials) — surface a clean 401, never
// a created session.
let sessionConfig;
let identity;
try {
sessionConfig =
typeof config === "function" ? await config(req) : config;
if (options.identify)
identity = await options.identify(req);
}
catch (err) {
sendJsonRpcError(res, 401, -32001, err instanceof Error ? err.message : "Unauthorized");
return;
}
transport = new StreamableHTTPServerTransport({
sessionIdGenerator: () => randomUUID(),
onsessioninitialized: (sid) => {
transports[sid] = transport;
lastSeen[sid] = Date.now();
// Bind the resolved identity to the new session id for anti-fixation.
if (identity !== undefined)
sessionIdentity[sid] = identity;
},
});
transport.onclose = () => {
const sid = transport.sessionId;
if (sid && transports[sid])
delete transports[sid];
if (sid)
delete sessionIdentity[sid];
};
const server = createDocmostMcpServer(sessionConfig);
await server.connect(transport);
await transport.handleRequest(req, res, parsedBody);
return;
}
if (!transport) {
sendJsonRpcError(res, 400, -32000, "Bad Request: no valid session ID provided");
return;
}
// Anti-session-fixation: a request reusing an existing session id must
// present credentials/token that resolve to the SAME identity bound at
// initialize, otherwise reject with 401. This prevents hijacking another
// user's established session by replaying its session id with different
// credentials.
if (options.identify && sessionId && sessionId in sessionIdentity) {
let presented;
try {
presented = await options.identify(req);
}
catch (err) {
sendJsonRpcError(res, 401, -32001, err instanceof Error ? err.message : "Unauthorized");
return;
}
if (presented !== sessionIdentity[sessionId]) {
sendJsonRpcError(res, 401, -32001, "Credentials do not match the user that owns this MCP session.");
return;
}
}
// Routing to an existing transport: refresh its idle timestamp.
if (sessionId)
lastSeen[sessionId] = Date.now();
await transport.handleRequest(req, res, parsedBody);
}
return { handleRequest };
}

View File

@@ -1,691 +0,0 @@
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import { readFileSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, join } from "path";
import { DocmostClient } from "./client.js";
import { parseNodeArg } from "./lib/parse-node-arg.js";
import { SHARED_TOOL_SPECS } from "./tool-specs.js";
// Re-export the client and its config type so embedding hosts (e.g. the gitmost
// NestJS server) can `import('@docmost/mcp')` and construct a DocmostClient
// directly — for the credentials variant OR the per-user getToken variant.
export { DocmostClient } from "./client.js";
// Re-export the zod-agnostic shared tool-spec registry so the in-app AI-SDK
// service can read it off the loaded module (it cannot import the ESM package's
// internals directly; it goes through loadDocmostMcp()).
export { SHARED_TOOL_SPECS } from "./tool-specs.js";
// Read version from package.json
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const packageJson = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8"));
const VERSION = packageJson.version;
// Configuration for an MCP server instance is the DocmostMcpConfig union
// (credentials OR getToken) defined and re-exported above. The factory below is
// fully side-effect-free on import: it reads no environment variables and opens
// no transport. The standalone stdio entrypoint (stdio.ts) and the HTTP handler
// (http.ts) supply this config and own the process/transport lifecycle.
// --- Modern McpServer Implementation ---
// Editing guide surfaced to MCP clients in the initialize result so they can
// pick the right tool by intent and avoid resending whole documents.
const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (always inline; requires an EXACT selection — the contiguous text to anchor/highlight on; fails rather than leaving an unanchored comment), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
"Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " +
"Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " +
"Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
// Helper to format JSON responses
const jsonContent = (data) => ({
content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
});
/**
* Create a fully configured Docmost MCP server. Side-effect-free: it does not
* read environment variables and does not connect any transport — the caller
* decides how to expose it (stdio or HTTP). The client talks to Docmost over
* REST + the collaboration WebSocket using the provided service-account
* credentials and auto-re-authenticates.
*/
export function createDocmostMcpServer(config) {
// Pass the whole config union through: the client branches internally on
// credentials vs. getToken, so both the external /mcp (creds) and the
// internal per-user (getToken) paths are wired here unchanged.
const docmostClient = new DocmostClient(config);
const server = new McpServer({
name: "docmost-mcp",
version: VERSION,
}, { instructions: SERVER_INSTRUCTIONS });
// Register a tool from the shared, zod-agnostic spec registry. The spec owns
// the canonical name + model-facing description + (optional) schema builder;
// only the execute body is supplied per call. buildShape is invoked with THIS
// package's zod (v3); the in-app layer passes its own zod (v4).
//
// The spec's schema builder returns a plain ZodRawShape (Record<string,
// unknown> in the shared module since it must stay zod-agnostic), so the
// McpServer.registerTool overloads cannot infer the execute arg's shape from
// it. We type `execute` loosely and cast the call through `any`; runtime
// behaviour is unchanged — each execute body destructures the same fields the
// builder declares.
const registerShared = (spec, execute) => server.registerTool(spec.mcpName, spec.buildShape
? { description: spec.description, inputSchema: spec.buildShape(z) }
: { description: spec.description }, execute);
// Tool: get_workspace
registerShared(SHARED_TOOL_SPECS.getWorkspace, async () => {
const workspace = await docmostClient.getWorkspace();
return jsonContent(workspace);
});
// Tool: list_spaces
registerShared(SHARED_TOOL_SPECS.listSpaces, async () => {
const spaces = await docmostClient.getSpaces();
return jsonContent(spaces);
});
// Tool: list_pages
server.registerTool("list_pages", {
description: "List most recent pages in a space ordered by updatedAt (descending). " +
"Returns a bounded list (default 50, max 100) — use search for lookups " +
"in large spaces. Pass tree:true (with spaceId) to instead get the " +
"space's full page hierarchy as a nested tree.",
inputSchema: {
spaceId: z.string().optional(),
limit: z
.number()
.int()
.min(1)
.max(100)
.optional()
.describe("Max pages to return (default 50, max 100)"),
tree: z
.boolean()
.optional()
.describe("When true, return the space's full page hierarchy as a nested tree (each node has a children array) instead of the recent-by-updatedAt flat list. Requires spaceId; ignores limit."),
},
}, async ({ spaceId, limit, tree }) => {
const result = await docmostClient.listPages(spaceId, limit ?? 50, tree ?? false);
return jsonContent(result);
});
// Tool: get_page
server.registerTool("get_page", {
description: "Get page details with content converted to Markdown. The conversion is " +
"LOSSY (block ids, exact table/callout structure are approximated); for a " +
"lossless representation use get_page_json.",
inputSchema: {
pageId: z.string().min(1),
},
}, async ({ pageId }) => {
const page = await docmostClient.getPage(pageId);
return jsonContent(page);
});
// Tool: get_page_json
registerShared(SHARED_TOOL_SPECS.getPageJson, async ({ pageId }) => {
const page = await docmostClient.getPageJson(pageId);
return jsonContent(page);
});
// Tool: get_outline
registerShared(SHARED_TOOL_SPECS.getOutline, async ({ pageId }) => {
const result = await docmostClient.getOutline(pageId);
return jsonContent(result);
});
// Tool: get_node
registerShared(SHARED_TOOL_SPECS.getNode, async ({ pageId, nodeId }) => {
const result = await docmostClient.getNode(pageId, nodeId);
return jsonContent(result);
});
// Tool: table_get
server.registerTool("table_get", {
description: "Read a table as a matrix. Returns {rows, cols, cells (text[][]), " +
"cellIds (paragraph id per cell, or null)}. `table` = `#<index>` from " +
"get_outline, or any block id inside the table. Use cellIds with " +
"patch_node for rich-formatted cell edits. `cols` is the FIRST row's " +
"width; ragged tables may vary per row, so use the per-row length of " +
"`cells` for each row.",
inputSchema: {
pageId: z.string().min(1),
table: z.string().min(1),
},
}, async ({ pageId, table }) => {
const result = await docmostClient.getTable(pageId, table);
return jsonContent(result);
});
// Tool: table_insert_row
server.registerTool("table_insert_row", {
description: "Insert a row of plain-text cells into a table. `table` = `#<index>` or " +
"a block id inside it. `cells` = text per column (padded to the table's " +
"column count; error if more cells than columns). `index` = 0-based " +
"insert position (0 inserts before the header); omit to append at the end.",
inputSchema: {
pageId: z.string().min(1),
table: z.string().min(1),
cells: z.array(z.string()),
index: z.number().int().optional(),
},
}, async ({ pageId, table, cells, index }) => {
const result = await docmostClient.tableInsertRow(pageId, table, cells, index);
return jsonContent(result);
});
// Tool: table_delete_row
server.registerTool("table_delete_row", {
description: "Delete the row at 0-based `index` from a table (`table` = `#<index>` or " +
"a block id inside it). Refuses to delete the table's only row. An " +
"out-of-range `index` throws. Deleting `index` 0 removes the header row, " +
"and the next row becomes the new header.",
inputSchema: {
pageId: z.string().min(1),
table: z.string().min(1),
index: z.number().int(),
},
}, async ({ pageId, table, index }) => {
const result = await docmostClient.tableDeleteRow(pageId, table, index);
return jsonContent(result);
});
// Tool: table_update_cell
server.registerTool("table_update_cell", {
description: "Set the plain-text content of cell [row,col] (0-based) in a table " +
"(`table` = `#<index>` or a block id inside it). Replaces the cell's " +
"content with a single text paragraph; for rich formatting use patch_node " +
"on the cell's paragraph id from table_get.",
inputSchema: {
pageId: z.string().min(1),
table: z.string().min(1),
row: z.number().int(),
col: z.number().int(),
text: z.string(),
},
}, async ({ pageId, table, row, col, text }) => {
const result = await docmostClient.tableUpdateCell(pageId, table, row, col, text);
return jsonContent(result);
});
// Tool: create_page
server.registerTool("create_page", {
description: "Create a new page with content (automatically moves it to the correct hierarchy).",
inputSchema: {
title: z.string().min(1).describe("Title of the page"),
content: z.string().min(1).describe("Markdown content"),
spaceId: z.string().min(1),
parentPageId: z
.string()
.optional()
.describe("Optional parent page ID to nest under"),
},
}, async ({ title, content, spaceId, parentPageId }) => {
const result = await docmostClient.createPage(title, content, spaceId, parentPageId);
return jsonContent(result);
});
// Tool: update_page_json
server.registerTool("update_page_json", {
description: "Replace a page's content with a raw ProseMirror JSON document " +
"(lossless write: preserves the block ids, callouts, tables and " +
"attributes you pass in). Typical flow: get_page_json -> modify the " +
"JSON -> update_page_json. Keep existing node ids intact so heading " +
"anchors and history stay stable. Minimal full-doc example: " +
'{"type":"doc","content":[{"type":"paragraph","content":' +
'[{"type":"text","text":"Hi"}]}]}. `content` may be a JSON object or a ' +
"JSON string (both accepted), and is OPTIONAL: omit it to update only " +
"the title (though prefer rename_page for a title-only change). " +
"Supplying neither content nor title is an error.",
inputSchema: {
pageId: z.string().min(1).describe("ID of the page to update"),
content: z
.any()
.optional()
.describe('ProseMirror document {"type":"doc","content":[...]} (JSON object or ' +
"JSON string). Omit to rename only."),
title: z.string().optional().describe("Optional new title"),
},
}, async ({ pageId, content, title }) => {
// Only parse/validate the document when it was actually supplied; when it
// is omitted, pass it straight through so the client performs a title-only
// (or no-op) update.
let doc;
if (content === undefined || content === null) {
doc = undefined;
}
else {
// String -> JSON.parse (throwing on invalid); object passes through.
doc = parseNodeArg(content, "content was a string but not valid JSON");
}
const result = await docmostClient.updatePageJson(pageId, doc, title);
return jsonContent(result);
});
// Tool: export_page_markdown
server.registerTool("export_page_markdown", {
description: "Export a page to a single self-contained, lossless Docmost-flavoured " +
"Markdown file (custom extensions): YAML-free meta header, body with " +
"inline comment anchors and diagrams, and a trailing comments-thread " +
"block. Designed for a download -> edit body -> import_page_markdown " +
"round-trip that preserves everything, including comment highlights. " +
"Comment THREADS are preserved in the file but are not re-pushed to the " +
"server on import.",
inputSchema: {
pageId: z.string().min(1),
},
}, async ({ pageId }) => {
const md = await docmostClient.exportPageMarkdown(pageId);
return { content: [{ type: "text", text: md }] };
});
// Tool: import_page_markdown
registerShared(SHARED_TOOL_SPECS.importPageMarkdown, async ({ pageId, markdown }) => {
const res = await docmostClient.importPageMarkdown(pageId, markdown);
return jsonContent(res);
});
// Tool: copy_page_content
registerShared(SHARED_TOOL_SPECS.copyPageContent, async ({ sourcePageId, targetPageId }) => {
const result = await docmostClient.copyPageContent(sourcePageId, targetPageId);
return jsonContent(result);
});
// Tool: rename_page
server.registerTool("rename_page", {
description: "Rename a page (change its title only) without touching or resending " +
"its content.",
inputSchema: {
pageId: z.string().min(1).describe("ID of the page to rename"),
title: z.string().min(1).describe("New title"),
},
}, async ({ pageId, title }) => {
const result = await docmostClient.renamePage(pageId, title);
return jsonContent(result);
});
// Tool: edit_page_text
registerShared(SHARED_TOOL_SPECS.editPageText, async ({ pageId, edits }) => {
const result = await docmostClient.editPageText(pageId, edits);
return jsonContent(result);
});
// Tool: patch_node
server.registerTool("patch_node", {
description: "Replaces a single block identified by its attrs.id WITHOUT resending the " +
"whole document. Get the block id from get_page_json, then pass a " +
"ProseMirror node to put in its place. Example node: a paragraph " +
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
'heading {"type":"heading","attrs":{"level":2},"content":' +
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
"JSON object or a JSON string (both accepted). Cheaper and safer than " +
"update_page_json for one-block structural edits.",
inputSchema: {
pageId: z.string().min(1),
nodeId: z.string().min(1),
node: z
.any()
.describe("ProseMirror node to put in place of the node with this id, e.g. " +
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' +
"JSON object or JSON string both accepted."),
},
}, async ({ pageId, nodeId, node }) => {
const parsedNode = parseNodeArg(node);
const result = await docmostClient.patchNode(pageId, nodeId, parsedNode);
return jsonContent(result);
});
// Tool: insert_node
server.registerTool("insert_node", {
description: "Insert a block before/after another block (by attrs.id or anchor text) " +
"or append at the end. Get anchor block ids from get_page_json. Avoids " +
"resending the whole document. Can also insert table structure: to add a " +
"tableRow, pass a tableRow node with position before/after and anchor " +
"INSIDE the target table — anchorNodeId of any block/cell in it, or " +
"anchorText matching the table; to add a tableCell/tableHeader, use " +
"anchorNodeId of a block inside the target row (anchorText only resolves " +
"top-level blocks, so it cannot target a row). `anchorText` is matched " +
"against the block's literal rendered plain text (no markdown); " +
"markdown/emoji are tolerated as a fallback; prefer plain text or " +
"anchorNodeId. Note: append is top-level " +
"only and rejects structural table nodes. Example node: a paragraph " +
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
'heading {"type":"heading","attrs":{"level":2},"content":' +
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
"JSON object or a JSON string (both accepted).",
inputSchema: {
pageId: z.string().min(1),
node: z
.any()
.describe("ProseMirror node to insert, e.g. " +
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' +
"JSON object or JSON string both accepted."),
position: z.enum(["before", "after", "append"]),
anchorNodeId: z.string().optional(),
anchorText: z.string().optional(),
},
}, async ({ pageId, node, position, anchorNodeId, anchorText }) => {
const parsedNode = parseNodeArg(node);
const result = await docmostClient.insertNode(pageId, parsedNode, {
position,
anchorNodeId,
anchorText,
});
return jsonContent(result);
});
// Tool: delete_node
registerShared(SHARED_TOOL_SPECS.deleteNode, async ({ pageId, nodeId }) => {
const result = await docmostClient.deleteNode(pageId, nodeId);
return jsonContent(result);
});
// Tool: insert_image
server.registerTool("insert_image", {
description: "Download an image from a web (http/https) URL and insert it into " +
"a page in one step. By default " +
"appends the image at the end of the page. With replaceText, replaces the " +
"first top-level block whose text contains that string (handy for " +
'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
"With afterText, inserts the image right after the first block containing " +
"that string. Preserves all other block ids.",
inputSchema: {
pageId: z.string().min(1),
imageUrl: z
.string()
.min(1)
.describe("http(s) URL of the image to download and upload"),
align: z.enum(["left", "center", "right"]).optional(),
alt: z.string().optional(),
replaceText: z
.string()
.optional()
.describe("Replace the first top-level block whose text contains this string with the image"),
afterText: z
.string()
.optional()
.describe("Insert the image right after the first top-level block whose text contains this string"),
},
}, async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
const result = await docmostClient.insertImage(pageId, imageUrl, {
align,
alt,
replaceText,
afterText,
});
return jsonContent(result);
});
// Tool: replace_image
server.registerTool("replace_image", {
description: "Replace an existing image on a page with a new image fetched from a web " +
"(http/https) URL: uploads the new file as a NEW " +
"attachment (fresh clean URL that renders and busts browser caches), then " +
"repoints every image node referencing the old attachmentId (recursively, " +
"incl. callouts/tables) via the live document, preserving comments, " +
"alignment and alt. The old attachment is left as an unreferenced orphan " +
"(Docmost has no API to delete a single attachment; it is removed only when " +
"the page/space is deleted). In-place byte overwrite is avoided because some " +
"Docmost versions corrupt the attachment (HTTP 500) on overwrite.",
inputSchema: {
pageId: z.string().min(1),
attachmentId: z
.string()
.min(1)
.describe("attachmentId of the image currently in the page to replace"),
imageUrl: z
.string()
.min(1)
.describe("http(s) URL of the new image to download"),
align: z.enum(["left", "center", "right"]).optional(),
alt: z.string().optional(),
},
}, async ({ pageId, attachmentId, imageUrl, align, alt }) => {
const result = await docmostClient.replaceImage(pageId, attachmentId, imageUrl, {
align,
alt,
});
return jsonContent(result);
});
// Tool: share_page
server.registerTool("share_page", {
description: "Make a page publicly accessible (idempotent) and return its public " +
"URL. The URL format is <app>/share/<key>/p/<slugId>.",
inputSchema: {
pageId: z.string().min(1).describe("ID of the page to share"),
searchIndexing: z
.boolean()
.optional()
.describe("Allow search engines to index the page (default true)"),
},
}, async ({ pageId, searchIndexing }) => {
const result = await docmostClient.sharePage(pageId, searchIndexing ?? true);
return jsonContent(result);
});
// Tool: unshare_page
registerShared(SHARED_TOOL_SPECS.unsharePage, async ({ pageId }) => {
const result = await docmostClient.unsharePage(pageId);
return jsonContent(result);
});
// Tool: list_shares
registerShared(SHARED_TOOL_SPECS.listShares, async () => {
const result = await docmostClient.listShares();
return jsonContent(result);
});
// Tool: move_page
server.registerTool("move_page", {
description: "Move a page to a new parent (nesting) or root. Essential for organizing pages created via 'create_page'.",
inputSchema: {
pageId: z.string().min(1),
parentPageId: z
.string()
.nullable()
.optional()
.describe("Target parent page ID. Pass 'null' or empty string to move to root."),
position: z
.string()
.min(5)
.optional()
.describe("fractional-index position key; min 5 chars; omit to append at the end."),
},
}, async ({ pageId, parentPageId, position }) => {
const finalParentId = parentPageId === "" || parentPageId === "null" ? null : parentPageId;
// Cheap cycle guard: a page cannot be moved directly under itself.
// (Deeper descendant-cycle detection is intentionally out of scope.)
if (finalParentId !== null && finalParentId === pageId) {
throw new Error("cannot move a page under itself");
}
const result = await docmostClient.movePage(pageId, finalParentId || null, position);
// Require POSITIVE confirmation: the live /pages/move success shape is
// exactly { success: true, status: 200 }. An empty body, a 204, or any odd
// shape lacking success === true must NOT be reported as a successful move,
// so we surface the raw API result instead of declaring success.
if (!(result && typeof result === "object" && result.success === true)) {
throw new Error(`Failed to move page ${pageId}: ${JSON.stringify(result)}`);
}
return jsonContent({
message: `Successfully moved page ${pageId} to parent ${finalParentId || "root"}`,
result,
});
});
// Tool: delete_page
server.registerTool("delete_page", {
description: "Delete a single page by ID.",
inputSchema: {
pageId: z.string().min(1),
},
}, async ({ pageId }) => {
await docmostClient.deletePage(pageId);
return {
content: [
{ type: "text", text: `Successfully deleted page ${pageId}` },
],
};
});
// --- Comment tools (ported from upstream PR #3 by Max Nikitin) ---
// Tool: list_comments
server.registerTool("list_comments", {
description: "List all comments on a page (paginated). Content is returned as Markdown.",
inputSchema: {
pageId: z.string().describe("ID of the page"),
},
}, async ({ pageId }) => {
const comments = await docmostClient.listComments(pageId);
return jsonContent(comments);
});
// Tool: create_comment
server.registerTool("create_comment", {
description: "Create a new comment on a page. The comment is ALWAYS inline and is " +
"anchored to (highlights) its `selection` text — there are no page-level " +
"comments. Content is provided as Markdown and automatically converted. " +
"A top-level comment REQUIRES an exact `selection`; if the selection " +
"cannot be found in the page the call fails (no orphan comment is left). " +
"Replies (parentCommentId set) inherit the parent's anchor and take no " +
"selection.",
inputSchema: {
pageId: z.string().describe("ID of the page to comment on"),
content: z.string().min(1).describe("Comment content in Markdown format"),
selection: z
.string()
.min(1)
// Enforce the documented 250-char cap to match the description above.
.max(250)
.optional()
.describe("EXACT contiguous text from a single paragraph/block to anchor the " +
"comment on (<=250 chars). Required for a top-level comment; omit " +
"only when replying via parentCommentId."),
parentCommentId: z
.string()
.optional()
.describe("Parent comment ID to create a reply (max 2 nesting levels)"),
},
}, async ({ pageId, content, selection, parentCommentId }) => {
if (!parentCommentId && (!selection || !selection.trim())) {
throw new Error("create_comment: a 'selection' (exact text to anchor on) is required for a top-level comment; omit it only when replying via parentCommentId.");
}
const result = await docmostClient.createComment(pageId, content, "inline", selection, parentCommentId);
return jsonContent(result);
});
// Tool: update_comment
server.registerTool("update_comment", {
description: "Update an existing comment's content. Only the comment creator can " +
"update it. Content is provided as Markdown.",
inputSchema: {
commentId: z.string().min(1).describe("ID of the comment to update"),
content: z
.string()
.min(1)
.describe("New comment content in Markdown format"),
},
}, async ({ commentId, content }) => {
const result = await docmostClient.updateComment(commentId, content);
return jsonContent(result);
});
// Tool: delete_comment
server.registerTool("delete_comment", {
description: "Delete a comment. Only the comment creator or space admin can delete it.",
inputSchema: {
commentId: z.string().min(1).describe("ID of the comment to delete"),
},
}, async ({ commentId }) => {
await docmostClient.deleteComment(commentId);
return {
content: [
{
type: "text",
text: `Successfully deleted comment ${commentId}`,
},
],
};
});
// Tool: check_new_comments
server.registerTool("check_new_comments", {
description: "Check for new comments across pages in a space since a given timestamp. " +
"Optionally scope to a page subtree (folder). Returns only comments " +
"created after the specified time.",
inputSchema: {
spaceId: z.string().describe("Space ID to check for new comments"),
since: z
.string()
.min(1)
.describe("ISO 8601 timestamp — only return comments created after this time (e.g. '2026-03-10T00:00:00Z')"),
parentPageId: z
.string()
.optional()
.describe("Optional root page ID to scope the check to a subtree (folder). " +
"Only pages under this parent will be checked."),
},
}, async ({ spaceId, since, parentPageId }) => {
// Reject an unparseable timestamp up front: otherwise the comparison
// against NaN silently treats every comment as "not new" and the tool
// returns zero results without signalling the bad input.
if (Number.isNaN(Date.parse(since))) {
throw new Error(`Invalid 'since' timestamp: ${JSON.stringify(since)} — expected an ISO 8601 date (e.g. '2026-03-10T00:00:00Z')`);
}
const result = await docmostClient.checkNewComments(spaceId, since, parentPageId);
return jsonContent(result);
});
// Tool: search
server.registerTool("search", {
description: "Search for pages and content. Results are bounded by `limit` " +
"(default applied by the client, max 100).",
inputSchema: {
query: z.string().min(1).describe("Search query"),
limit: z
.number()
.int()
.min(1)
.max(100)
.optional()
.describe("Max results to return (max 100)"),
},
}, async ({ query, limit }) => {
// The tool exposes no spaceId filter, so pass undefined for the client's
// optional spaceId parameter and forward limit into its correct slot.
const result = await docmostClient.search(query, undefined, limit);
return jsonContent(result);
});
// Tool: docmost_transform
server.registerTool("docmost_transform", {
description: "Edit a page by running an arbitrary JS transform `(doc, ctx) => doc` " +
"against its LIVE ProseMirror document, with a diff preview and page " +
"history as the safety net. By default dryRun=true: returns a diff " +
"preview WITHOUT writing. Set dryRun=false to apply (atomic, won't " +
"clobber concurrent edits). `doc` is the lossless ProseMirror document " +
"({type:'doc',content:[...]}); return a new doc of the same shape. " +
"`ctx` gives you: comments (the page's comments, each {id, content " +
"(markdown), selection, type}); log (array; console.log pushes to it); " +
"consume(id) (mark a comment id as consumed — those are deleted when " +
"deleteComments=true after a successful apply); and helpers: " +
"blockText(node) (plain text), walk(node, fn) (depth-first over all " +
"nodes incl. callouts/tables/lists), getList(doc, predicate) (find a " +
"node even without attrs.id), insertMarkerAfter(doc, anchor, marker, " +
"{beforeBlock}) (insert a plain unmarked text run after anchor, " +
"mark-safe), setCalloutRange(doc, n) (sync a [1]…[K] callout range to " +
"[1]…[n]), noteItem(inlineNodes) (wrap inline nodes in a listItem with a " +
"fresh id), mdToInlineNodes(markdown) (comment markdown -> inline nodes), " +
"and commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
"comments into numbered footnotes). Footnote convention: markers are " +
"plain '[N]' text in the body; the notes are an orderedList under a " +
"heading whose text is 'Примечания переводчика'. The transform runs " +
"sandboxed (no require/process/fs/network, 5s timeout) and must return a " +
"{type:'doc'} node.",
inputSchema: {
pageId: z.string().min(1),
transformJs: z
.string()
.min(1)
.describe("A JS function `(doc, ctx) => doc` (expression-arrow or " +
"parenthesized function). It receives a clone of the live doc and " +
"ctx (comments, log, consume(id), helpers: blockText/walk/getList/" +
"insertMarkerAfter/setCalloutRange/noteItem/mdToInlineNodes/" +
"commentsToFootnotes) and must return a {type:'doc'} node."),
dryRun: z
.boolean()
.optional()
.default(true)
.describe("Preview only (no write) when true (default)."),
deleteComments: z
.boolean()
.optional()
.default(false)
.describe("After a successful apply, delete every comment id passed to " +
"ctx.consume(id)."),
},
}, async ({ pageId, transformJs, dryRun, deleteComments }) => {
const result = await docmostClient.transformPage(pageId, transformJs, {
dryRun,
deleteComments,
});
return jsonContent(result);
});
// Tool: diff_page_versions
registerShared(SHARED_TOOL_SPECS.diffPageVersions, async ({ pageId, from, to }) => {
const result = await docmostClient.diffPageVersions(pageId, from, to);
return jsonContent(result);
});
// Tool: list_page_history
registerShared(SHARED_TOOL_SPECS.listPageHistory, async ({ pageId, cursor }) => {
const result = await docmostClient.listPageHistory(pageId, cursor);
return jsonContent(result);
});
// Tool: restore_page_version
registerShared(SHARED_TOOL_SPECS.restorePageVersion, async ({ historyId }) => {
const result = await docmostClient.restorePageVersion(historyId);
return jsonContent(result);
});
return server;
}

View File

@@ -1,74 +0,0 @@
import axios from "axios";
export async function getCollabToken(baseUrl, apiToken) {
try {
const response = await axios.post(`${baseUrl}/auth/collab-token`, {}, {
headers: {
Authorization: `Bearer ${apiToken}`,
"Content-Type": "application/json",
},
});
// console.error('Collab Token Response:', response.data);
// Response is wrapped in { data: { token: ... } }
return response.data.data?.token || response.data.token;
}
catch (error) {
if (axios.isAxiosError(error)) {
// Attach the HTTP status to the plain Error so callers (e.g.
// getCollabTokenWithReauth) can still detect a 401/403 after the
// original AxiosError has been wrapped away.
// Avoid leaking the full server response body by default; include only
// status + statusText. Append the body only when DEBUG is set.
let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`;
if (process.env.DEBUG) {
message += ` - ${JSON.stringify(error.response?.data)}`;
}
const err = new Error(message);
err.status = error.response?.status;
throw err;
}
throw error;
}
}
export async function performLogin(baseUrl, email, password) {
try {
const response = await axios.post(`${baseUrl}/auth/login`, {
email,
password,
});
// Extract token from Set-Cookie header
const cookies = response.headers["set-cookie"];
if (!cookies) {
throw new Error("No Set-Cookie header found in login response");
}
// Match the cookie name exactly to avoid matching a future
// authTokenRefresh cookie (startsWith would catch it).
const authCookie = cookies.find((c) => {
const kv = c.split(";")[0];
return kv.slice(0, kv.indexOf("=")) === "authToken";
});
if (!authCookie) {
throw new Error("No authToken cookie found in login response");
}
// Take everything after the FIRST "=" up to the first ";".
// Splitting on "=" would truncate base64 values containing "=" padding.
const kv = authCookie.split(";")[0];
const token = kv.slice(kv.indexOf("=") + 1);
return token;
}
catch (error) {
// Avoid leaking the full server response body by default; log only the
// HTTP status. Log the verbose body only when DEBUG is set.
if (axios.isAxiosError(error)) {
if (process.env.DEBUG) {
console.error("Login failed:", error.response?.data);
}
else {
console.error("Login failed:", error.response?.status);
}
}
else {
console.error("Login failed:", error.message);
}
throw error;
}
}

View File

@@ -1,713 +0,0 @@
import { HocuspocusProvider } from "@hocuspocus/provider";
import { TiptapTransformer } from "@hocuspocus/transformer";
import * as Y from "yjs";
import WebSocket from "ws";
import { marked } from "marked";
import { generateJSON } from "@tiptap/html";
import { Node as PMNode } from "@tiptap/pm/model";
import { updateYFragment } from "y-prosemirror";
import { JSDOM } from "jsdom";
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
import { withPageLock } from "./page-lock.js";
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
import { lexFootnoteLines } from "./footnote-lex.js";
import { summarizeChange } from "./diff.js";
/**
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
* content type"), shared by both encode paths (`buildYDoc` -> `toYdoc` and
* `applyDocToFragment` -> `updateYFragment`) so the message wording stays in one
* place. `label` names the stage that failed (diagnostic). `sanitizeForYjs`
* already stripped `undefined` attrs, so a remaining failure is pinpointed via
* `findUnstorableAttr`.
*/
function unstorableYjsError(safe, label, e) {
const bad = findUnstorableAttr(safe);
return new Error(`Failed to encode document to Yjs (${label}): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`);
}
// Setup DOM environment for Tiptap HTML parsing in Node.js
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
global.window = dom.window;
global.document = dom.window.document;
// @ts-ignore
global.Element = dom.window.Element;
// @ts-ignore
global.WebSocket = WebSocket;
// Navigator is read-only in newer Node versions and already exists
// global.navigator = dom.window.navigator;
/**
* Hard ceiling above which we skip callout preprocessing entirely. The linear
* scanner below has no quadratic blow-up, but we still cap input defensively so
* a pathological multi-megabyte payload cannot tie up the event loop; in that
* case the markdown is passed through verbatim (callouts are simply not
* detected) rather than risking a slow scan.
*/
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
/** Matches a bare closing callout fence: `:::`. */
const CALLOUT_CLOSE_RE = /^:::\s*$/;
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
/**
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
* callout blocks (the syntax our markdown export produces) into HTML
* divs that the callout extension parses. The inner content is rendered
* through marked as regular markdown.
*
* Implemented as a single linear pass over the lines (no quadratic regex
* rescan). It:
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
* `:::` line that lives inside a code fence as a callout delimiter, so a
* callout body that itself contains a fenced code block with a `:::` line is
* no longer corrupted;
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
* nesting level, supporting NESTED callouts via a depth counter (an inner
* `:::type` opens a deeper level and consumes a matching `:::`);
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
* (inner rendered through marked) as the previous regex implementation.
*/
async function preprocessCallouts(markdown) {
// Defensive cap: skip preprocessing for pathologically large inputs.
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
return markdown;
}
// Recursively transform a slice of lines, converting top-level callouts in
// that slice into <div> blocks and rendering their inner content (which may
// itself contain nested callouts) through this same function.
const transform = async (lines) => {
const out = [];
let inCodeFence = false;
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
let i = 0;
while (i < lines.length) {
const line = lines[i];
// Inside a code fence, only its matching closing fence is significant;
// everything else (including `:::` lines) is copied through verbatim.
if (inCodeFence) {
out.push(line);
const fence = line.match(CODE_FENCE_RE);
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
fence[2].length >= codeFenceMarker.length) {
inCodeFence = false;
codeFenceMarker = "";
}
i++;
continue;
}
// A code fence opening outside any callout body: enter code-fence mode.
const fenceOpen = line.match(CODE_FENCE_RE);
if (fenceOpen) {
inCodeFence = true;
codeFenceMarker = fenceOpen[2];
out.push(line);
i++;
continue;
}
// An opening callout fence: scan forward (with code-fence and nested
// callout awareness) for its matching closing `:::` at the same level.
const open = line.match(CALLOUT_OPEN_RE);
if (open) {
const type = open[1].toLowerCase();
const bodyLines = [];
let depth = 1;
let innerInCodeFence = false;
let innerCodeFenceMarker = "";
let j = i + 1;
for (; j < lines.length; j++) {
const bl = lines[j];
if (innerInCodeFence) {
const f = bl.match(CODE_FENCE_RE);
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
f[2].length >= innerCodeFenceMarker.length) {
innerInCodeFence = false;
innerCodeFenceMarker = "";
}
bodyLines.push(bl);
continue;
}
const innerFence = bl.match(CODE_FENCE_RE);
if (innerFence) {
innerInCodeFence = true;
innerCodeFenceMarker = innerFence[2];
bodyLines.push(bl);
continue;
}
if (CALLOUT_OPEN_RE.test(bl)) {
depth++;
bodyLines.push(bl);
continue;
}
if (CALLOUT_CLOSE_RE.test(bl)) {
depth--;
if (depth === 0)
break; // matching close for THIS callout
bodyLines.push(bl);
continue;
}
bodyLines.push(bl);
}
if (j < lines.length) {
// Found the matching closing fence: render the body (recursively, so
// nested callouts are handled) and emit the callout div.
const inner = await transform(bodyLines);
const renderedInner = await marked.parse(inner);
out.push(`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`);
i = j + 1; // skip past the closing `:::`
continue;
}
// No matching close (unterminated callout): treat the opener as a
// literal line and continue, preserving the original text.
out.push(line);
i++;
continue;
}
out.push(line);
i++;
}
return out.join("\n");
};
return transform(markdown.split("\n"));
}
/**
* Bridge marked's checkbox lists to TipTap task lists.
*
* marked renders GitHub task list items (`- [x] done`) as a plain
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
* into the shape those extensions expect:
* TaskList parseHTML matches `ul[data-type="taskList"]`,
* TaskItem matches `li[data-type="taskItem"]`,
* the checked state is read from `data-checked === "true"`.
*
* A list is only converted when it has at least one `<li>` and EVERY direct
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
*/
function bridgeTaskLists(html) {
// Cheap early-out: if the markup contains no checkbox input at all there is
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
// common case (most pages have no task lists).
if (!/type=["']?checkbox/i.test(html)) {
return html;
}
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
// pathologically large inputs rather than running a second expensive JSDOM
// parse on a multi-megabyte payload. The markup is passed through verbatim.
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
return html;
}
const dom = new JSDOM(html);
const document = dom.window.document;
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
// bullet <li> that merely contains a nested task sublist is not misdetected.
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
// ALL of them so none survive into the converted item.
const directCheckboxes = (li) => {
const found = [];
for (const child of Array.from(li.children)) {
if (child.tagName === "INPUT" &&
child.getAttribute("type") === "checkbox") {
found.push(child);
continue;
}
if (child.tagName === "P") {
for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) {
found.push(inp);
}
}
}
return found;
};
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
// its own checkbox is a numbered checklist that must also become a taskList.
const lists = Array.from(document.querySelectorAll("ul, ol"));
for (const list of lists) {
// Only consider DIRECT child <li> elements; nested lists are handled by
// their own iteration of the outer loop.
const items = Array.from(list.children).filter((child) => child.tagName === "LI");
if (items.length === 0)
continue;
const itemCheckboxes = items.map((li) => directCheckboxes(li));
// Convert only when every direct <li> carries at least one OWN checkbox.
if (!itemCheckboxes.every((boxes) => boxes.length > 0))
continue;
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
// <ol> while tagging it data-type="taskList": generateJSON would then match
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
// emitting a phantom empty orderedList beside the real taskList. So rename a
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
let target = list;
if (list.tagName === "OL") {
const ul = document.createElement("ul");
// Carry over existing attributes (e.g. class) so nothing is silently lost.
for (const attr of Array.from(list.attributes)) {
ul.setAttribute(attr.name, attr.value);
}
// Move every child node (including the <li>s we collected) into the <ul>.
while (list.firstChild) {
ul.appendChild(list.firstChild);
}
list.replaceWith(ul);
target = ul;
}
target.setAttribute("data-type", "taskList");
items.forEach((li, index) => {
const boxes = itemCheckboxes[index];
// The first checkbox determines the checked state (matches the previous
// single-checkbox behaviour); any extras only need removing.
const input = boxes[0] ?? null;
li.setAttribute("data-type", "taskItem");
const checked = input != null &&
(input.hasAttribute("checked") || input.checked);
li.setAttribute("data-checked", checked ? "true" : "false");
// Remove ALL direct checkbox inputs so none survive into the content
// (a raw-inline-HTML <li> may carry more than one).
for (const box of boxes) {
box.remove();
}
});
}
return document.body.innerHTML;
}
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
// definition lines are collected into a single <section data-footnotes>.
// Definition detection + fence handling are shared with analyzeFootnotes via
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
function escapeFootnoteAttr(value) {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
}
const footnoteRefMarkedExtension = {
name: "footnoteRef",
level: "inline",
start(src) {
return src.match(/\[\^/)?.index ?? -1;
},
tokenizer(src) {
const match = FOOTNOTE_REF_RE.exec(src);
if (match && match.index === 0) {
return { type: "footnoteRef", raw: match[0], id: match[1] };
}
return undefined;
},
renderer(token) {
return `<sup data-footnote-ref data-id="${escapeFootnoteAttr(token.id)}"></sup>`;
},
};
marked.use({ extensions: [footnoteRefMarkedExtension] });
/**
* Pull `[^id]: text` definition lines out of the body and render a single
* <section data-footnotes> for them (or "" when there are none).
*/
function extractFootnotes(markdown) {
const bodyLines = [];
const defs = [];
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
// block is inert and stays in the body verbatim; only real definition lines
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
// match exactly what import keeps/strips (#166).
for (const tok of lexFootnoteLines(markdown)) {
if (!tok.inFence && tok.definition)
defs.push(tok.definition);
else
bodyLines.push(tok.line);
}
if (defs.length === 0)
return { body: markdown, section: "" };
// Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
// editor-ext extractFootnoteDefinitions). Reference markers are left untouched
// so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
// #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
// (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
// editor-ext mirror.
const firstById = new Map(); // id -> first definition text
for (const def of defs) {
if (!firstById.has(def.id))
firstById.set(def.id, def.text);
}
const inner = [...firstById.entries()]
.map(([id, text]) => `<div data-footnote-def data-id="${escapeFootnoteAttr(id)}"><p>${marked.parseInline(text || "")}</p></div>`)
.join("");
return {
body: bodyLines.join("\n"),
section: `<section data-footnotes>${inner}</section>`,
};
}
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(markdownContent) {
const withCallouts = await preprocessCallouts(markdownContent);
const { body, section } = extractFootnotes(withCallouts);
const html = (await marked.parse(body)) + section;
const bridged = bridgeTaskLists(html);
return generateJSON(bridged, docmostExtensions);
}
/**
* Build the collaboration WebSocket URL from an API base URL:
* switch http(s)->ws(s), strip a trailing /api, mount on /collab.
* Shared by the live read and the mutate path so both target the same socket.
*/
export function buildCollabWsUrl(baseUrl) {
let wsUrl = baseUrl.replace(/^http/, "ws");
try {
const urlObj = new URL(wsUrl);
if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) {
urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, "");
}
urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab";
// Drop any query/hash from the base URL so it is not carried into the
// collaboration ws URL.
urlObj.search = "";
urlObj.hash = "";
wsUrl = urlObj.toString();
}
catch (e) {
// Fallback if URL parsing fails
if (!wsUrl.endsWith("/collab")) {
wsUrl = wsUrl.replace(/\/$/, "") + "/collab";
}
}
return wsUrl;
}
/**
* Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning
* the opaque yjs "Unexpected content type" failure into a descriptive error.
*
* `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of
* the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point
* at the offending attribute path.
*/
export function buildYDoc(doc) {
const safe = sanitizeForYjs(doc);
try {
return TiptapTransformer.toYdoc(safe, "default", docmostExtensions);
}
catch (e) {
throw unstorableYjsError(safe, "toYdoc", e);
}
}
/**
* Write a new ProseMirror doc into the live Yjs fragment by STRUCTURAL DIFF,
* preserving the Yjs identity of unchanged nodes (issue #152).
*
* The previous approach deleted the whole fragment and re-applied a fresh Y.Doc,
* which discarded every Yjs node id. y-prosemirror anchors the editor selection
* to those ids, so an open editor's cursor lost its anchor and snapped to the
* end of the document on every agent write (most visibly on comment anchoring,
* which changes no text at all). `updateYFragment` is exactly the routine the
* editor itself uses to sync ProseMirror edits into Yjs: it diffs the new node
* against the current fragment and touches only the changed children, so
* unchanged nodes keep their ids and the live cursor stays put.
*
* Must run inside a single `transact` so the diff applies atomically (no remote
* update interleaves). Keeps `buildYDoc`'s `findUnstorableAttr` diagnostic for
* the opaque "Unexpected content type" encode failure.
*/
export function applyDocToFragment(ydoc, newDoc) {
const safe = sanitizeForYjs(newDoc);
const fragment = ydoc.getXmlFragment("default");
// Hydrate the ProseMirror node in its OWN try so a failure here (e.g. an
// unknown node type) is labelled "fromJSON" — the stage that actually threw —
// instead of being misattributed to the Yjs write stage (#154 review).
let pmNode;
try {
pmNode = PMNode.fromJSON(docmostSchema, safe);
}
catch (e) {
throw unstorableYjsError(safe, "fromJSON", e);
}
try {
ydoc.transact(() => {
updateYFragment(ydoc, fragment, pmNode, {
mapping: new Map(),
isOMark: new Map(),
});
});
}
catch (e) {
throw unstorableYjsError(safe, "updateYFragment", e);
}
}
/**
* Run an independent Yjs-encodability check (the same `sanitizeForYjs` + schema
* the apply path uses) and throw the same descriptive error when the doc cannot
* be stored. Used by the dry-run preview.
*
* Note: it does NOT run `updateYFragment` against the live fragment, so it is an
* encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc`
* (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different
* encoders that nonetheless reject the same unstorable attributes. To narrow the
* preview/apply gap it ALSO rehearses the apply path's `PMNode.fromJSON`
* hydration, so a doc that would only fail there (e.g. an unknown node type) is
* rejected at preview time too (#154 review). Still cheap: no live fragment, no
* `updateYFragment`.
*/
export function assertYjsEncodable(doc) {
buildYDoc(doc);
const safe = sanitizeForYjs(doc);
try {
PMNode.fromJSON(docmostSchema, safe);
}
catch (e) {
throw unstorableYjsError(safe, "fromJSON", e);
}
}
/** Time we wait for the initial handshake/sync before giving up. */
const CONNECT_TIMEOUT_MS = 25000;
/** Time we wait for the server to acknowledge our write before giving up. */
const PERSIST_TIMEOUT_MS = 20000;
/**
* Safely mutate the live content of a page over the collaboration websocket.
*
* This is the single safe write path for every MCP content mutation. It:
* 1. serializes per-page writes through withPageLock (no two MCP writes on
* the same page overlap);
* 2. connects to Hocuspocus and waits for the initial sync so the local ydoc
* mirrors the authoritative server doc — INCLUDING edits/comments/images
* that are not yet in the debounced REST snapshot;
* 3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and
* writes the result back — with no `await` between read and write so no
* remote update can interleave and clobber concurrent human edits;
* 4. waits for the server to acknowledge the write (unsyncedChanges -> 0)
* before resolving, so the next operation observes our change.
*
* `transform` receives the live ProseMirror doc and returns the NEW full
* ProseMirror doc to write, or `null` to abort with no write (a no-op). If
* `transform` throws, the error is propagated to the caller (not swallowed).
*
* Resolves a `MutationResult { doc, verify }`: `doc` is the doc that was
* written (or the live doc when the transform aborted), and `verify` is a
* verifiable change report (text/block/mark deltas) of what actually changed.
* The report is computed AFTER the atomic read->write, so it never widens the
* read->write window, and it never throws (it can NEVER break a write).
*/
export async function mutatePageContent(pageId, collabToken, baseUrl, transform) {
return withPageLock(pageId, () => {
if (process.env.DEBUG) {
console.error(`Starting realtime content mutate for page ${pageId}`);
// Token prefix is sensitive; only log it under DEBUG.
console.error(`Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`);
}
const ydoc = new Y.Doc();
const wsUrl = buildCollabWsUrl(baseUrl);
if (process.env.DEBUG)
console.error(`Connecting to WebSocket: ${wsUrl}`);
return new Promise((resolve, reject) => {
let provider;
let applied = false; // onSynced may fire again on reconnect — apply once.
let settled = false;
// Set true on disconnect/close so a reconnect-driven unsyncedChanges->0
// cannot be mistaken for a successful persist of our write.
let connectionLost = false;
let connectTimer;
let persistTimer;
let unsyncedHandler;
const cleanup = () => {
if (connectTimer)
clearTimeout(connectTimer);
if (persistTimer)
clearTimeout(persistTimer);
if (provider) {
if (unsyncedHandler) {
try {
provider.off("unsyncedChanges", unsyncedHandler);
}
catch (err) { }
}
try {
provider.destroy();
}
catch (err) { }
}
};
const finish = (err, value) => {
if (settled)
return;
settled = true;
cleanup();
if (err)
reject(err);
else
resolve(value);
};
connectTimer = setTimeout(() => {
finish(new Error("Connection timeout to collaboration server"));
}, CONNECT_TIMEOUT_MS);
// Resolve once the server has acknowledged our update. The provider
// increments unsyncedChanges when our local update is sent and
// decrements it when the server replies with a SyncStatus(applied=true);
// reaching 0 means the authoritative in-memory ydoc on the server now
// contains our write.
const waitForPersistence = () => {
if (settled)
return;
// A missing provider is a failure, not a success: without it the write
// can never have been acknowledged. Only an actual unsyncedChanges===0
// on a live provider counts as persisted.
if (!provider) {
finish(new Error("collab provider gone before persistence"));
return;
}
if (provider.unsyncedChanges === 0) {
finish(null, mutationResult);
return;
}
persistTimer = setTimeout(() => {
finish(new Error("Timeout waiting for collaboration server to persist the update"));
}, PERSIST_TIMEOUT_MS);
unsyncedHandler = (data) => {
// Only treat unsyncedChanges->0 as success when the connection is
// still up. A transient disconnect + reconnect handshake can drive
// the counter back to 0 without our write being re-transmitted; in
// that case let the disconnect/close error win instead.
if (data.number === 0 && !connectionLost) {
finish(null, mutationResult);
}
};
provider.on("unsyncedChanges", unsyncedHandler);
};
// The verifiable result resolved on every success/abort path. Set on
// abort (no-op report) and after a real write (computed change report).
let mutationResult;
provider = new HocuspocusProvider({
url: wsUrl,
name: `page.${pageId}`,
document: ydoc,
token: collabToken,
// @ts-ignore - Required for Node.js environment
WebSocketPolyfill: WebSocket,
onConnect: () => {
if (process.env.DEBUG)
console.error("WS Connect");
},
// An unexpected disconnect/close while we are still waiting (during the
// connect-wait before onSynced, or during the persistence wait after the
// write) means the update will never be acknowledged — surface it now
// instead of hanging until the connect/persist timeout fires. `finish`
// is idempotent via the `settled` flag, so the onClose that our own
// cleanup()->provider.destroy() triggers (after settled=true is set) is
// a harmless no-op and cannot cause a double-resolve.
onDisconnect: () => {
if (process.env.DEBUG)
console.error("WS Disconnect");
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
// sees the connection as lost and won't report a false success.
connectionLost = true;
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
},
onClose: () => {
if (process.env.DEBUG)
console.error("WS Close");
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
// sees the connection as lost and won't report a false success.
connectionLost = true;
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
},
onSynced: () => {
if (applied || settled)
return;
applied = true;
if (process.env.DEBUG)
console.error("Connected and synced!");
// CRITICAL: everything between reading the live doc and writing it
// back must stay synchronous (no await). While the JS event loop is
// not yielded, no incoming remote update can interleave, so any
// already-synced concurrent edits are preserved in liveDoc.
let newDoc;
let beforeDoc;
try {
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
if (!liveDoc ||
typeof liveDoc !== "object" ||
!Array.isArray(liveDoc.content)) {
liveDoc = { type: "doc", content: [] };
}
// Snapshot the before-doc for the change report. Docs are
// JSON-serializable, so this is a safe deep clone.
beforeDoc = JSON.parse(JSON.stringify(liveDoc));
newDoc = transform(liveDoc);
if (newDoc == null) {
// Transform aborted — write nothing, return the live doc with a
// no-op change report.
mutationResult = {
doc: liveDoc,
verify: {
changed: false,
textInserted: 0,
textDeleted: 0,
blocksChanged: 0,
marks: {},
summary: "no changes (transform aborted)",
},
};
finish(null, mutationResult);
return;
}
// Structural diff into the live fragment (issue #152): preserves
// the Yjs ids of unchanged nodes, so an open editor's cursor is not
// yanked to the end of the document on every agent write.
applyDocToFragment(ydoc, newDoc);
}
catch (e) {
// Includes errors thrown by transform (e.g. "afterText not found",
// "text not found"): propagate them verbatim to the caller.
finish(e instanceof Error ? e : new Error(String(e)));
return;
}
// Compute the verifiable change report AFTER the transact write: it
// only needs the JSON before/after, so it cannot affect the atomic
// read->write window, and summarizeChange never throws.
mutationResult = {
doc: newDoc,
verify: summarizeChange(beforeDoc, newDoc),
};
if (process.env.DEBUG)
console.error("Content written, waiting for server to persist...");
waitForPersistence();
},
onAuthenticationFailed: () => {
finish(new Error("Authentication failed for collaboration connection"));
},
});
});
});
}
/**
* Replace the live content of a page over the collaboration websocket.
* Accepts a ready ProseMirror JSON document; the caller controls whether
* it was produced from markdown (ids regenerate) or edited in place
* (existing block ids preserved).
*
* This is an intentional full replace (used by update_page / update_page_json),
* but now runs under the per-page lock and waits for server persistence via
* mutatePageContent.
*/
export async function replacePageContent(pageId, prosemirrorDoc, collabToken, baseUrl) {
// Fail fast on a bad document instead of deferring the failure into the
// collaboration write (where TiptapTransformer.toYdoc(undefined) used to
// throw). The transform must return a valid ProseMirror doc.
if (prosemirrorDoc == null ||
typeof prosemirrorDoc !== "object" ||
prosemirrorDoc.type !== "doc") {
throw new Error("replacePageContent: invalid ProseMirror document");
}
return await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc);
}
/**
* Markdown update path (kept for backwards compatibility).
* NOTE: this re-imports the whole document — block ids are regenerated.
* Tables and :::callout::: blocks survive thanks to the full schema.
*/
export async function updatePageContentRealtime(pageId, markdownContent, collabToken, baseUrl) {
const tiptapJson = await markdownToProseMirror(markdownContent);
return await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson);
}

View File

@@ -1,239 +0,0 @@
/**
* Inline-comment anchoring against a ProseMirror document.
*
* Docmost stores an inline comment's highlight as a `comment` MARK on the
* document text (`{ type: "comment", attrs: { commentId, resolved } }`); the
* `/comments/create` API only records the comment row + its `selection` text and
* does NOT insert that mark, so the anchor has to be written into the page
* content separately. This module finds where a selection lives in the document
* and splices the comment mark across the matched range.
*
* Matching has to be robust because the agent supplies the selection as plain
* text while the document stores rich inline content: a selection can span
* several adjacent text nodes (inline code / bold / links each become their own
* text node), and the document may use smart/typographic quotes, dash variants,
* non-breaking spaces, or collapsed runs of whitespace that the agent typed as
* ASCII quotes/hyphens/single spaces. We therefore normalize both sides before
* comparing and match across maximal runs of consecutive text nodes within a
* single block, while mapping every normalized character back to its raw index
* so the mark lands on the exact original characters.
*/
/** Typographic double-quote variants mapped to ASCII `"`. */
const DOUBLE_QUOTES = "«»„“”‟〝〞"";
/** Typographic single-quote/apostrophe variants mapped to ASCII `'`. */
const SINGLE_QUOTES = "‘’‚‛";
/** Dash variants mapped to ASCII `-`. */
const DASHES = "–—―−‐‑‒";
/** Guard against pathological/cyclic documents in the depth-first walk. */
const MAX_DEPTH = 200;
/** The comment mark Docmost stores on anchored text. */
function makeCommentMark(commentId) {
// The comment mark schema declares both commentId and resolved; include
// resolved:false for completeness so the stored mark matches the editor's.
return { type: "comment", attrs: { commentId, resolved: false } };
}
/** True for any character we collapse/replace with a single normal space. */
function isWhitespaceChar(ch) {
// Regular ASCII whitespace plus the special spaces called out in the spec:
// nbsp, narrow nbsp, en/em/thin/hair/figure spaces, etc. \s covers tab and
// newline; the explicit code points cover the non-breaking variants \s misses
// in some engines, so list them for determinism.
return (/\s/.test(ch) ||
ch === " " || // no-break space
ch === " " || // figure space
ch === " " || // narrow no-break space
ch === " " || // thin space
ch === " " || // hair space
ch === " " || // en space
ch === " " // em space
);
}
/**
* Normalize a string for matching and return both the normalized text and a
* `map` where `map[i]` is the index into the ORIGINAL `s` of the i-th
* normalized character.
*
* Rules: map smart quotes / dashes / special spaces to their ASCII forms,
* collapse any run of whitespace to a SINGLE space (whose map entry points at
* the FIRST raw whitespace char of the run), and DO NOT lowercase (anchoring is
* case-sensitive to match the exact document text).
*/
export function normalizeForMatch(s) {
let norm = "";
const map = [];
let i = 0;
while (i < s.length) {
const ch = s[i];
if (isWhitespaceChar(ch)) {
// Collapse the whole whitespace run to one space mapped to the run start.
const runStart = i;
while (i < s.length && isWhitespaceChar(s[i]))
i++;
norm += " ";
map.push(runStart);
continue;
}
let mapped = ch;
if (DOUBLE_QUOTES.indexOf(ch) !== -1)
mapped = '"';
else if (SINGLE_QUOTES.indexOf(ch) !== -1)
mapped = "'";
else if (DASHES.indexOf(ch) !== -1)
mapped = "-";
norm += mapped;
map.push(i);
i++;
}
return { norm, map };
}
/**
* Find a selection inside a SINGLE block's direct `content` array.
*
* Builds maximal runs of consecutive `text` nodes (any non-text inline node,
* e.g. a mention, breaks the run), normalizes each run and the selection the
* same way, then searches each run for the normalized selection. Returns the
* child/offset range of the FIRST matching run, or `null` if none match.
*/
export function findAnchorInBlock(blockContent, selection) {
if (!Array.isArray(blockContent))
return null;
const normSelObj = normalizeForMatch(selection);
// Trim leading/trailing spaces on the NORMALIZED selection only.
const normSel = normSelObj.norm.trim();
if (normSel.length === 0)
return null;
let i = 0;
while (i < blockContent.length) {
const node = blockContent[i];
if (!node || typeof node !== "object" || node.type !== "text") {
i++;
continue;
}
// Accumulate a maximal run of consecutive text nodes.
let rawRun = "";
const rawToChild = [];
let j = i;
while (j < blockContent.length) {
const n = blockContent[j];
if (!n || typeof n !== "object" || n.type !== "text")
break;
const text = typeof n.text === "string" ? n.text : "";
for (let k = 0; k < text.length; k++) {
rawToChild.push({ childIdx: j, offset: k });
}
rawRun += text;
j++;
}
// Try to match within this run.
const { norm, map } = normalizeForMatch(rawRun);
const idx = norm.indexOf(normSel);
if (idx !== -1) {
const rawStart = map[idx];
const rawEndExclusive = idx + normSel.length < map.length
? map[idx + normSel.length]
: rawRun.length;
const startLoc = rawToChild[rawStart];
// rawEndExclusive points at the raw char AFTER the match; the last matched
// raw char is at rawEndExclusive-1, so endOffset is its offset + 1.
const lastLoc = rawToChild[rawEndExclusive - 1];
return {
startChild: startLoc.childIdx,
startOffset: startLoc.offset,
endChild: lastLoc.childIdx,
endOffset: lastLoc.offset + 1,
};
}
// No match in this run: continue scanning AFTER it.
i = j > i ? j : i + 1;
}
return null;
}
/**
* Depth-first, document-order check for whether `selection` can be anchored
* anywhere in `doc`. At each node with an array `content`, first try to match
* within that node's own content, then recurse into children that themselves
* have a `content` array.
*/
export function canAnchorInDoc(doc, selection) {
const visit = (node, depth) => {
if (depth > MAX_DEPTH || !node || typeof node !== "object")
return false;
if (!Array.isArray(node.content))
return false;
if (findAnchorInBlock(node.content, selection))
return true;
for (const child of node.content) {
if (child && typeof child === "object" && Array.isArray(child.content)) {
if (visit(child, depth + 1))
return true;
}
}
return false;
};
return visit(doc, 0);
}
/**
* Split the matched text nodes and splice the comment mark across the range.
* `blockContent` is mutated IN PLACE. `match.startChild..endChild` are all text
* nodes (guaranteed by findAnchorInBlock building runs of text nodes).
*/
function spliceCommentMark(blockContent, match, commentId) {
const { startChild, startOffset, endChild, endOffset } = match;
const commentMark = makeCommentMark(commentId);
const fragments = [];
for (let k = startChild; k <= endChild; k++) {
const n = blockContent[k];
const text = typeof n.text === "string" ? n.text : "";
const sliceStart = k === startChild ? startOffset : 0;
const sliceEnd = k === endChild ? endOffset : text.length;
const before = k === startChild ? text.slice(0, startOffset) : "";
const marked = text.slice(sliceStart, sliceEnd);
const after = k === endChild ? text.slice(endOffset) : "";
// Process per-node so each node's OWN marks/attrs are preserved.
const ownMarks = Array.isArray(n.marks) ? n.marks : [];
// Drop any pre-existing comment mark from the marked fragment so it ends up
// with exactly one comment mark (the new one) rather than two.
const markedBaseMarks = ownMarks.filter((m) => !(m && m.type === "comment"));
if (before.length > 0) {
fragments.push({ ...n, text: before, marks: [...ownMarks] });
}
if (marked.length > 0) {
fragments.push({
...n,
text: marked,
marks: [...markedBaseMarks, commentMark],
});
}
if (after.length > 0) {
fragments.push({ ...n, text: after, marks: [...ownMarks] });
}
}
blockContent.splice(startChild, endChild - startChild + 1, ...fragments);
}
/**
* Depth-first (same order as canAnchorInDoc) over `doc`; on the FIRST block
* whose content matches `selection`, splice the comment mark across the matched
* range in place and return true. Returns false (and does NOT mutate) when no
* block matches.
*/
export function applyAnchorInDoc(doc, selection, commentId) {
const visit = (node, depth) => {
if (depth > MAX_DEPTH || !node || typeof node !== "object")
return false;
if (!Array.isArray(node.content))
return false;
const match = findAnchorInBlock(node.content, selection);
if (match) {
spliceCommentMark(node.content, match, commentId);
return true;
}
for (const child of node.content) {
if (child && typeof child === "object" && Array.isArray(child.content)) {
if (visit(child, depth + 1))
return true;
}
}
return false;
};
return visit(doc, 0);
}

View File

@@ -1,423 +0,0 @@
/**
* Headless, Docmost-equivalent document diff.
*
* Docmost's history editor computes a change set with the exact pipeline below
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
* editor decorations. This module runs the SAME computation but serializes the
* result to text + integrity counts instead of decorations, so a diff can be
* previewed without a browser.
*
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
* maintained published fork of the MIT prosemirror-recreate-steps source that
* Docmost vendors in @docmost/editor-ext; it exposes the identical
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
* signature.
*
* If recreateTransform / the changeset throws on a pathological document pair,
* we fall back to a coarse block-level text diff so the tool never hard-fails.
*/
import { Node } from "@tiptap/pm/model";
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
import { docmostSchema } from "./docmost-schema.js";
/** Recursively concatenate the plain text of a JSON node. */
function plainText(node) {
if (!node || typeof node !== "object")
return "";
let out = "";
if (typeof node.text === "string")
out += node.text;
if (Array.isArray(node.content)) {
for (const child of node.content)
out += plainText(child);
}
return out;
}
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
function countNodes(doc, pred) {
let n = 0;
const visit = (node) => {
if (!node || typeof node !== "object")
return;
if (pred(node))
n++;
if (Array.isArray(node.content))
for (const c of node.content)
visit(c);
};
visit(doc);
return n;
}
/**
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
* run); counting link-bearing runs would over-count it. Walking the tree and
* collecting hrefs into a Set keys each distinct link once. Link marks with a
* missing/empty href are bucketed under a single "" key so a malformed link is
* still counted as one.
*/
function countUniqueLinks(doc) {
const hrefs = new Set();
const visit = (node) => {
if (!node || typeof node !== "object")
return;
if (node.type === "text" && Array.isArray(node.marks)) {
for (const m of node.marks) {
if (m && m.type === "link") {
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
hrefs.add(href);
}
}
}
if (Array.isArray(node.content))
for (const c of node.content)
visit(c);
};
visit(doc);
return hrefs.size;
}
/** Count footnoteReference nodes anywhere under a node (reading order). */
function countFootnoteRefs(node) {
if (!node || typeof node !== "object")
return 0;
let n = node.type === "footnoteReference" ? 1 : 0;
if (Array.isArray(node.content)) {
for (const child of node.content)
n += countFootnoteRefs(child);
}
return n;
}
/**
* Ordered list of footnote marker numbers found in the BODY only (every
* top-level block before the first "Примечания..." notes heading; if no such
* heading, the whole doc), in reading order.
*
* Supports BOTH representations:
* - real `footnoteReference` nodes (the current footnote feature) — numbered
* 1..n by reading position, since their visible number is derived;
* - legacy `[N]` text markers (older translated docs) — the literal N.
*/
function footnoteMarkers(doc, notesHeading) {
const top = Array.isArray(doc?.content) ? doc.content : [];
const notesIdx = top.findIndex((n) => n &&
n.type === "heading" &&
plainText(n).trim() === notesHeading);
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
// Real footnoteReference nodes take precedence: when present, number them by
// reading position (their displayed number is not stored).
let refCount = 0;
for (const block of bodyBlocks)
refCount += countFootnoteRefs(block);
if (refCount > 0) {
return Array.from({ length: refCount }, (_, i) => i + 1);
}
// Fallback: legacy `[N]` text markers.
const markers = [];
const re = /\[(\d+)\]/g;
for (const block of bodyBlocks) {
const text = plainText(block);
let m;
re.lastIndex = 0;
while ((m = re.exec(text)) !== null) {
markers.push(Number(m[1]));
}
}
return markers;
}
/** Compute the [old,new] integrity tuples for two JSON docs. */
function computeIntegrity(oldDoc, newDoc, notesHeading) {
const images = [
countNodes(oldDoc, (n) => n.type === "image"),
countNodes(newDoc, (n) => n.type === "image"),
];
const links = [
countUniqueLinks(oldDoc),
countUniqueLinks(newDoc),
];
const tables = [
countNodes(oldDoc, (n) => n.type === "table"),
countNodes(newDoc, (n) => n.type === "table"),
];
const callouts = [
countNodes(oldDoc, (n) => n.type === "callout"),
countNodes(newDoc, (n) => n.type === "callout"),
];
const fns = [
footnoteMarkers(oldDoc, notesHeading),
footnoteMarkers(newDoc, notesHeading),
];
return { images, links, tables, callouts, footnoteMarkers: fns };
}
/**
* Resolve the lead text of the top-level block in a ProseMirror Node that
* contains the given document position. Returns "" when out of range.
*/
function blockContextAt(node, pos) {
try {
const clamped = Math.max(0, Math.min(pos, node.content.size));
const $pos = node.resolve(clamped);
// depth 1 is the top-level block in a doc node.
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
const text = block.textContent || "";
return text.length > 80 ? text.slice(0, 77) + "..." : text;
}
catch {
return "";
}
}
/** Truncate a string for the markdown summary. */
function truncate(s, n = 120) {
return s.length > n ? s.slice(0, n - 3) + "..." : s;
}
/**
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
*/
function coarseDiff(oldDoc, newDoc) {
const oldBlocks = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
const newBlocks = Array.isArray(newDoc?.content) ? newDoc.content : [];
const oldTexts = oldBlocks.map(plainText);
const newTexts = newBlocks.map(plainText);
const oldSet = new Set(oldTexts);
const newSet = new Set(newTexts);
const changes = [];
for (const t of oldTexts) {
if (!newSet.has(t) && t.trim() !== "") {
changes.push({ op: "delete", block: truncate(t, 80), text: t });
}
}
for (const t of newTexts) {
if (!oldSet.has(t) && t.trim() !== "") {
changes.push({ op: "insert", block: truncate(t, 80), text: t });
}
}
return changes;
}
/** Build the human-readable unified-ish markdown summary. */
function renderMarkdown(result, fellBack) {
const lines = [];
const { summary, integrity, changes } = result;
lines.push(`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`);
if (fellBack) {
lines.push("");
lines.push("> note: precise diff failed; coarse block-level diff shown.");
}
lines.push("");
lines.push("## Integrity (old -> new)");
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
lines.push(`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`);
lines.push("");
lines.push("## Changes");
if (changes.length === 0) {
lines.push("(no textual changes)");
}
else {
for (const c of changes) {
const sign = c.op === "insert" ? "+" : "-";
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
}
}
return lines.join("\n");
}
/**
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
* serialize the result to text + integrity counts.
*
* @param oldDocJson the earlier document
* @param newDocJson the later document
* @param notesHeading heading delimiting body from notes for footnote counting
*/
export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
let changes = [];
let inserted = 0;
let deleted = 0;
let fellBack = false;
const changedBlocks = new Set();
try {
const oldNode = Node.fromJSON(docmostSchema, oldDocJson);
const newNode = Node.fromJSON(docmostSchema, newDocJson);
const tr = recreateTransform(oldNode, newNode, {
complexSteps: false,
wordDiffs: true,
simplifyDiff: true,
});
const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
const simplified = simplifyChanges(changeSet.changes, newNode);
for (const change of simplified) {
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
if (change.toA > change.fromA) {
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
if (text.length > 0) {
deleted += text.length;
const block = blockContextAt(oldNode, change.fromA);
changes.push({ op: "delete", block, text });
if (block)
changedBlocks.add("d:" + block);
}
}
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
if (change.toB > change.fromB) {
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
if (text.length > 0) {
inserted += text.length;
const block = blockContextAt(newNode, change.fromB);
changes.push({ op: "insert", block, text });
if (block)
changedBlocks.add("i:" + block);
}
}
}
}
catch {
// Pathological pair: degrade to a coarse block-level diff so we never throw.
fellBack = true;
changes = coarseDiff(oldDocJson, newDocJson);
for (const c of changes) {
if (c.op === "insert")
inserted += c.text.length;
else
deleted += c.text.length;
if (c.block)
changedBlocks.add(c.op[0] + ":" + c.block);
}
}
const partial = {
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
integrity,
changes,
};
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
}
/**
* Recursively walk every `text` node and tally the count of each mark by
* `mark.type` (e.g. `{ bold: 5, strike: 3, link: 2 }`). Pure and never throws.
*/
function markCounts(doc) {
const counts = {};
const visit = (node) => {
if (!node || typeof node !== "object")
return;
if (node.type === "text" && Array.isArray(node.marks)) {
for (const m of node.marks) {
if (m && typeof m.type === "string") {
counts[m.type] = (counts[m.type] || 0) + 1;
}
}
}
if (Array.isArray(node.content))
for (const c of node.content)
visit(c);
};
visit(doc);
return counts;
}
/**
* Build a VerifyReport for a content mutation. Pure and never throws — on any
* internal error it returns a minimal "changed (diff unavailable)" report so it
* can NEVER break a write.
*
* `changed` is VALUE-based, not JSON-string-based: it is derived from the actual
* deltas (text chars, blocks, mark counts, structural integrity counts), so two
* value-equal docs that differ only in JSON key order report cleanly as
* `changed:false` / "no content change" rather than a misleading +0/-0 change.
*
* The structural integrity delta (from diffDocs's `integrity` tuples) is what
* makes `changed` true for an image/table/callout/link count change that diffs
* to zero text — closing a verify blind spot for insert_image, delete_node on a
* table, etc.
*/
export function summarizeChange(before, after) {
try {
const diff = diffDocs(before, after);
// Per-mark-type delta: include a type only when its count actually changed.
const beforeMarks = markCounts(before);
const afterMarks = markCounts(after);
const marks = {};
for (const type of new Set([
...Object.keys(beforeMarks),
...Object.keys(afterMarks),
])) {
const b = beforeMarks[type] || 0;
const a = afterMarks[type] || 0;
if (b !== a)
marks[type] = [b, a];
}
// Structural integrity delta from diffDocs: count-based [old,new] tuples for
// images/links/tables/callouts. Include a type only when old != new.
const integrity = diff.integrity;
const structure = {};
const countTypes = [
"images",
"links",
"tables",
"callouts",
];
for (const type of countTypes) {
const [b, a] = integrity[type];
if (b !== a)
structure[type] = [b, a];
}
const textInserted = diff.summary.inserted;
const textDeleted = diff.summary.deleted;
const blocksChanged = diff.summary.blocksChanged;
const hasMarkDelta = Object.keys(marks).length > 0;
const hasStructureDelta = Object.keys(structure).length > 0;
// VALUE-based change decision: ignore JSON key-order no-ops entirely.
const changed = textInserted > 0 ||
textDeleted > 0 ||
blocksChanged > 0 ||
hasMarkDelta ||
hasStructureDelta;
if (!changed) {
return {
changed: false,
textInserted: 0,
textDeleted: 0,
blocksChanged: 0,
marks: {},
summary: "no content change",
};
}
const parts = [];
// Only mention text/blocks when they actually changed (avoid a misleading
// "+0/-0 chars, 0 block(s)" prefix on a pure mark/structure change).
if (textInserted > 0 || textDeleted > 0 || blocksChanged > 0) {
parts.push(`+${textInserted}/-${textDeleted} chars, ${blocksChanged} block(s)`);
}
const markParts = Object.entries(marks).map(([type, [b, a]]) => `${type} ${b}${a}`);
if (markParts.length > 0)
parts.push(`marks: ${markParts.join(", ")}`);
const structureParts = Object.entries(structure).map(([type, [b, a]]) => `${type} ${b}${a}`);
if (structureParts.length > 0)
parts.push(structureParts.join(", "));
// `changed` is true here, so at least one group is present and parts is non-empty.
const summary = `changed: ${parts.join("; ")}`;
const report = {
changed: true,
textInserted,
textDeleted,
blocksChanged,
marks,
summary,
};
if (hasStructureDelta)
report.structure = structure;
return report;
}
catch {
// A pathological pair must never break a write: degrade to a minimal report.
return {
changed: true,
textInserted: 0,
textDeleted: 0,
blocksChanged: 0,
marks: {},
summary: "changed (diff unavailable)",
};
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,87 +0,0 @@
/**
* Filter functions to extract only relevant information from API responses
* for better agent consumption
*/
export function filterWorkspace(data) {
return {
id: data.id,
name: data.name,
description: data.description,
defaultSpaceId: data.defaultSpaceId,
createdAt: data.createdAt,
updatedAt: data.updatedAt,
deletedAt: data.deletedAt,
};
}
export function filterSpace(space) {
return {
id: space.id,
name: space.name,
description: space.description,
slug: space.slug,
visibility: space.visibility,
createdAt: space.createdAt,
updatedAt: space.updatedAt,
deletedAt: space.deletedAt,
};
}
export function filterGroup(group) {
return {
id: group.id,
name: group.name,
description: group.description,
workspaceId: group.workspaceId,
createdAt: group.createdAt,
updatedAt: group.updatedAt,
deletedAt: group.deletedAt,
};
}
export function filterPage(page, content, subpages) {
return {
id: page.id,
slugId: page.slugId,
title: page.title,
parentPageId: page.parentPageId,
spaceId: page.spaceId,
isLocked: page.isLocked,
createdAt: page.createdAt,
updatedAt: page.updatedAt,
deletedAt: page.deletedAt,
// Include converted markdown content if valid string (even empty)
...(typeof content === "string" && { content }),
// Include subpages if provided
...(subpages &&
subpages.length > 0 && {
subpages: subpages.map((p) => ({ id: p.id, title: p.title })),
}),
};
}
export function filterComment(comment, markdownContent) {
return {
id: comment.id,
pageId: comment.pageId,
content: markdownContent ?? comment.content,
selection: comment.selection || null,
type: comment.type || "page",
parentCommentId: comment.parentCommentId || null,
creatorId: comment.creatorId,
creatorName: comment.creator?.name || null,
createdAt: comment.createdAt,
editedAt: comment.editedAt || null,
resolvedAt: comment.resolvedAt || null,
resolvedById: comment.resolvedById || null,
};
}
export function filterSearchResult(result) {
return {
id: result.id,
title: result.title,
parentPageId: result.parentPageId,
createdAt: result.createdAt,
updatedAt: result.updatedAt,
rank: result.rank,
highlight: result.highlight,
spaceId: result.space?.id,
spaceName: result.space?.name,
};
}

View File

@@ -1,393 +0,0 @@
/**
* Surgical text edits on a ProseMirror document without re-importing it.
*
* Each edit replaces an exact substring of a block's inline text, preserving
* every node id, mark and attribute around it. Matching works at the
* INLINE-CONTAINER (block) level: a block's text nodes are flattened into a
* per-character array, so a `find` may freely cross bold/italic/link
* boundaries (separate text nodes). The replacement inherits marks from the
* unchanged common prefix/suffix of the match, so editing plain text next to a
* bold word keeps the bold word bold, and editing the inside of a bold word
* keeps the inserted text bold. This is the safe alternative to a full markdown
* re-import for small wording fixes.
*/
import { stripInlineMarkdown, stripBalancedWrappers } from "./text-normalize.js";
/** Placeholder code unit standing in for one opaque (non-text) inline node. */
const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER
/**
* Find every VALID occurrence of `needle` in a block's flattened slots.
*
* A candidate occurrence at slot range [start, start+needle.length) is valid
* ONLY IF none of the slots in that range are atoms (non-text inline nodes).
* This makes atom matching collision-safe against the U+FFFC placeholder: an
* atom slot can never be part of a match, while a real text node containing a
* literal U+FFFC code unit still matches normally (its slot has no `.atom`).
*
* Overlapping candidates that touch an atom are skipped (not counted, not
* spliced); the scan resumes one code unit past the rejected start so a valid
* match that begins just after an atom is not missed.
*/
function findValidMatches(chars, plain, needle) {
if (!needle)
return [];
const positions = [];
let idx = plain.indexOf(needle);
while (idx !== -1) {
const end = idx + needle.length;
let hasAtom = false;
for (let i = idx; i < end; i++) {
if (chars[i] && chars[i].atom) {
hasAtom = true;
break;
}
}
if (!hasAtom) {
positions.push(idx);
// Non-overlapping: skip past this match.
idx = plain.indexOf(needle, end);
}
else {
// This candidate crosses an atom: reject it and resume one unit later so
// an overlapping valid match starting after the atom is still found.
idx = plain.indexOf(needle, idx + 1);
}
}
return positions;
}
/** Order-sensitive deep-equality of two marks arrays. */
function marksEqual(a, b) {
if (a === b)
return true;
if (a.length !== b.length)
return false;
for (let i = 0; i < a.length; i++) {
if (JSON.stringify(a[i]) !== JSON.stringify(b[i]))
return false;
}
return true;
}
/** A block is any node that DIRECTLY contains at least one inline text child. */
function isInlineBlock(node) {
return (Array.isArray(node?.content) &&
node.content.some((child) => child && child.type === "text"));
}
/** Flatten a block's inline content into a per-code-unit slot array. */
function flattenBlock(node) {
const chars = [];
for (const child of node.content || []) {
if (child && child.type === "text" && typeof child.text === "string") {
const marks = child.marks || [];
// Iterate by UTF-16 code unit so indices align with String.indexOf.
for (let i = 0; i < child.text.length; i++) {
chars.push({ ch: child.text[i], marks });
}
}
else {
// Any non-text inline node becomes one opaque slot.
chars.push({
ch: ATOM_PLACEHOLDER,
marks: (child && child.marks) || [],
atom: child,
});
}
}
return chars;
}
/** Re-tokenize a slot array back into ProseMirror inline nodes. */
function tokenizeChars(chars) {
const out = [];
let buffer = "";
let bufferMarks = null;
const flush = () => {
if (buffer.length === 0)
return;
const textNode = { type: "text", text: buffer };
if (bufferMarks && bufferMarks.length > 0)
textNode.marks = bufferMarks;
out.push(textNode);
buffer = "";
bufferMarks = null;
};
for (const slot of chars) {
if (slot.atom) {
flush();
out.push(slot.atom);
continue;
}
if (bufferMarks !== null && !marksEqual(bufferMarks, slot.marks)) {
flush();
}
if (bufferMarks === null)
bufferMarks = slot.marks;
buffer += slot.ch;
}
flush();
return out;
}
/** Longest common prefix length of two strings. */
function commonPrefixLen(a, b) {
const max = Math.min(a.length, b.length);
let i = 0;
while (i < max && a[i] === b[i])
i++;
return i;
}
/** Longest common suffix length of two strings, capped so it can't overlap. */
function commonSuffixLen(a, b, cap) {
const max = Math.min(a.length, b.length, cap);
let i = 0;
while (i < max && a[a.length - 1 - i] === b[b.length - 1 - i])
i++;
return i;
}
/**
* Apply one edit to one block's flattened slot array.
*
* The caller passes only VALID (atom-free) match positions (see
* findValidMatches), so no match range can overlap an atom slot here.
*/
function applyEditToChars(chars, edit, matchPositions) {
// Pre-compute the diff slices once (find/replace are constant per edit).
const p = commonPrefixLen(edit.find, edit.replace);
const s = commonSuffixLen(edit.find, edit.replace, Math.min(edit.find.length, edit.replace.length) - p);
const insertText = edit.replace.slice(p, edit.replace.length - s);
// Rebuild the slot array in a single left-to-right pass, splicing at each
// match start. Offsets into `chars` stay valid because we copy through.
const newChars = [];
let cursor = 0;
let spliced = 0;
for (const mStart of matchPositions) {
const mEnd = mStart + edit.find.length;
const changedStart = mStart + p;
const changedEnd = mEnd - s;
// Copy through everything up to the changed region (incl. the prefix).
for (; cursor < changedStart; cursor++)
newChars.push(chars[cursor]);
const removed = chars.slice(changedStart, changedEnd);
// Choose the marks for the inserted characters.
let chosenMarks = [];
if (removed.length > 0 &&
removed.every((r) => marksEqual(r.marks, removed[0].marks))) {
// Uniform removed region: inherit its marks directly.
chosenMarks = removed[0].marks;
}
else {
// Empty or non-uniform removed region: inherit from the nearest TEXT
// neighbour, skipping atom slots (an atom carries marks that do not
// belong on inserted text). Scan left first, then right; fall back to [].
let inherited = null;
for (let i = changedStart - 1; i >= 0; i--) {
if (!chars[i].atom) {
inherited = chars[i].marks;
break;
}
}
if (inherited === null) {
for (let i = changedEnd; i < chars.length; i++) {
if (!chars[i].atom) {
inherited = chars[i].marks;
break;
}
}
}
chosenMarks = inherited === null ? [] : inherited;
}
// Emit the inserted text (one slot per code unit).
for (let i = 0; i < insertText.length; i++) {
newChars.push({ ch: insertText[i], marks: chosenMarks });
}
// Skip the removed region.
cursor = changedEnd;
spliced++;
}
// Copy through the tail.
for (; cursor < chars.length; cursor++)
newChars.push(chars[cursor]);
return { newChars, spliced };
}
/**
* Apply text edits to a ProseMirror doc (operates on a deep copy, returns it).
*
* Returns { doc, results, failed }:
* - results: edits that applied (replacements >= 1).
* - failed: edits that matched zero times, were ambiguous (multi-match
* without replaceAll), or whose changed region crosses a non-text inline
* node. These do NOT throw — they are recorded so the caller can surface an
* actionable message and still keep the edits that did apply.
*
* Edits apply IN ORDER to the same working copy, so a later edit can target
* text produced by an earlier one. The input doc is never mutated. The only
* thrown error is for invalid input (an empty `edit.find`).
*/
export function applyTextEdits(doc, edits) {
const copy = JSON.parse(JSON.stringify(doc));
const results = [];
const failed = [];
for (const edit of edits) {
if (!edit.find)
throw new Error("edit.find must be a non-empty string");
// HARD-REFUSE formatting changes. edit_page_text edits PLAIN TEXT only and
// writes the replacement verbatim, so it cannot add/remove marks. We refuse
// only a pure formatting TOGGLE: find and replace differ ONLY by balanced
// markdown markers (e.g. find:"~~$69~~" / replace:"$69", or find:"M5Stack" /
// replace:"**M5Stack**" which would write literal `**`).
//
// The detector is the STRICT stripBalancedWrappers, NOT the lenient locator
// stripInlineMarkdown: the lenient one also trims whitespace/emoji and
// collapses lone `*`/`_` runs, which gives false positives on ordinary
// plain-text edits (trailing-space trim, snake_case, `2 * 3 * 4`, URLs with
// underscores) and wrongly refuses them. Comparing the strict strip of both
// sides symmetrically catches every real formatting toggle while leaving
// plain text alone; a typo fix wrapped in markdown still applies because its
// stripped find != stripped replace.
const formattingOnly = edit.find !== edit.replace &&
stripBalancedWrappers(edit.find) === stripBalancedWrappers(edit.replace);
if (formattingOnly) {
failed.push({
find: edit.find,
reason: "edit_page_text edits plain text only and cannot add or remove formatting marks (bold/italic/strike/code/link); it writes the replacement as LITERAL text. This edit looks like a formatting change (markdown markers in find/replace). To change marks, read the block with get_page_json and use patch_node (or update_page_json) to set the node's marks array.",
});
continue;
}
// Gather every inline block in document order (recurse the whole tree so
// nested containers — callouts, list items, table cells, blockquotes — are
// all covered).
const blocks = [];
(function collect(node) {
if (isInlineBlock(node))
blocks.push(node);
for (const child of node.content || [])
collect(child);
})(copy);
// Find every VALID (atom-free) occurrence per block. A candidate whose slot
// range overlaps a non-text inline atom is never a match (collision-safe vs
// the U+FFFC placeholder), so it is excluded from both the uniqueness count
// and the splicing.
const blockChars = blocks.map((b) => flattenBlock(b));
const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join(""));
// EXACT MATCH WINS: try the verbatim locator first.
let effectiveFind = edit.find;
let normalized = false;
let validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find));
let total = 0;
for (const positions of validPerBlock)
total += positions.length;
// FALLBACK: only if the verbatim locator matched nothing, retry with the
// markdown-stripped form. `edit.replace` is never touched — this only
// changes what we LOCATE, not what we insert.
const stripped = stripInlineMarkdown(edit.find);
if (total === 0 && stripped !== edit.find && stripped.length > 0) {
const strippedPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], stripped));
let strippedTotal = 0;
for (const positions of strippedPerBlock)
strippedTotal += positions.length;
if (strippedTotal >= 1) {
validPerBlock = strippedPerBlock;
total = strippedTotal;
effectiveFind = stripped;
normalized = true;
}
}
if (total === 0) {
// Distinguish "the text exists but only across an atom" from a plain
// not-found: if a raw substring scan (atoms included) WOULD have hit —
// for EITHER the verbatim or the stripped locator — the only thing
// blocking the edit is the atom, so report that.
const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1 ||
(stripped !== edit.find && plain.indexOf(stripped) !== -1));
let reason;
if (existsAcrossAtom) {
reason =
"match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes.";
}
else {
// Append a bounded "closest text" hint: find the FIRST block that
// contains the longest whitespace-delimited token (>= 3 chars) of the
// (stripped, then raw) locator, and quote that block's plain text.
reason = "text not found in the document.";
const tokenSource = stripped.length > 0 ? stripped : edit.find;
const longestToken = tokenSource
.split(/\s+/)
.filter((t) => t.length >= 3)
.sort((a, b) => b.length - a.length)[0];
if (longestToken) {
const hitBlock = blockPlain.find((plain) => plain.includes(longestToken));
if (hitBlock) {
// Truncate by code point (spread iterates by code point) so a
// surrogate pair is never split; append the ellipsis only when the
// text was actually longer than the limit.
const points = [...hitBlock];
const snippet = points.length > 120
? points.slice(0, 120).join("") + "…"
: hitBlock;
reason += ` Closest block text: "${snippet}".`;
}
}
}
failed.push({ find: edit.find, reason });
continue;
}
if (total > 1 && !edit.replaceAll) {
failed.push({
find: edit.find,
reason: `matches ${total} times. Provide a longer, unique fragment or set replaceAll: true.`,
});
continue;
}
// Plan the splices from the valid positions. For a non-replaceAll edit we
// splice only the first valid match (left-to-right across blocks); for
// replaceAll we splice every valid match.
const plannedPerBlock = blockChars.map(() => []);
let takenFirst = false;
for (let b = 0; b < validPerBlock.length; b++) {
for (const idx of validPerBlock[b]) {
if (edit.replaceAll) {
plannedPerBlock[b].push(idx);
}
else if (!takenFirst) {
plannedPerBlock[b].push(idx);
takenFirst = true;
break;
}
else {
break;
}
}
if (!edit.replaceAll && takenFirst)
break;
}
// Apply the splices block-by-block and re-tokenize changed blocks. The
// local edit uses `effectiveFind` (verbatim or normalized) so the
// prefix/suffix diff is computed against the ACTUALLY matched text, while
// `edit.replace` stays literal — never stripped.
const effectiveEdit = {
find: effectiveFind,
replace: edit.replace,
replaceAll: edit.replaceAll,
};
let spliced = 0;
for (let b = 0; b < blocks.length; b++) {
if (plannedPerBlock[b].length === 0)
continue;
const { newChars, spliced: n } = applyEditToChars(blockChars[b], effectiveEdit, plannedPerBlock[b]);
spliced += n;
blocks[b].content = tokenizeChars(newChars);
}
// Keep `find: edit.find` (the original) so the caller can correlate.
const result = { find: edit.find, replacements: spliced };
if (normalized)
result.normalized = true;
results.push(result);
}
// Safety net: drop any empty text nodes (ProseMirror forbids them). The
// re-tokenizer never emits empty text nodes, but untouched blocks could in
// principle carry one in from upstream.
(function prune(node) {
if (Array.isArray(node.content)) {
node.content = node.content.filter((child) => !(child.type === "text" && child.text === ""));
for (const child of node.content)
prune(child);
}
})(copy);
return { doc: copy, results, failed };
}

View File

@@ -1,816 +0,0 @@
/**
* Convert ProseMirror/TipTap JSON content to Markdown
* Supports all Docmost-specific node types and extensions
*/
export function convertProseMirrorToMarkdown(content) {
if (!content || !content.content)
return "";
// Escape a value interpolated into an HTML double-quoted attribute value
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
// ATTRIBUTE context only the quote that delimits the value and the ampersand
// that starts an entity are special, so we escape ONLY & " (and ' for safety
// when single-quoted delimiters are used). We deliberately do NOT escape < or
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
// &lt;/&gt; back inside attribute values, so escaping them would corrupt the
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
// every round-trip (`a < b` -> `a &lt; b` -> `a &amp;lt; b`). Escaping & "
// keeps the value inert against attribute-injection while staying idempotent.
// NOTE: escape ONLY & and " here. The value is always wrapped in double
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
// value, and parse5 does not decode &#39; back inside attribute values, so
// escaping ' would (like < >) corrupt the value and accumulate &amp; on every
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
const escapeAttr = (value) => String(value)
.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;");
// Escape a value placed as HTML element TEXT content (between tags), where
// <, >, and & are all significant. Used for text rendered inside raw-HTML
// blocks (table cells / columns) so stored characters cannot inject markup.
const escapeHtmlText = (value) => String(value)
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;");
// Percent-encode characters that would break out of a markdown URL target
// (...) — whitespace/newlines and parentheses — so a stored src stays a
// single inert token (used for image/video/youtube srcs).
const encodeMdUrl = (value) => String(value || "")
.replace(/\s/g, (c) => (c === " " ? "%20" : encodeURIComponent(c)))
.replace(/\(/g, "%28")
.replace(/\)/g, "%29");
const processNode = (node) => {
const type = node.type;
const nodeContent = node.content || [];
switch (type) {
case "doc":
return nodeContent.map(processNode).join("\n\n");
case "paragraph":
const text = nodeContent.map(processNode).join("");
const align = node.attrs?.textAlign;
if (align && align !== "left") {
return `<div align="${escapeAttr(align)}">${text}</div>`;
}
return text || "";
case "heading":
const level = node.attrs?.level || 1;
const headingText = nodeContent.map(processNode).join("");
return "#".repeat(level) + " " + headingText;
case "text":
let textContent = node.text || "";
// Apply marks (bold, italic, code, etc.)
if (node.marks) {
// Markdown code spans (`...`) cannot carry inner formatting, so when a
// run has the `code` mark alongside ANY other mark, backtick syntax
// would leak literal ** / []() into the code text. In that case emit
// nested HTML (<code> innermost, the other marks wrapping it as HTML)
// so the output is at least well-formed and re-parseable.
//
// NOTE: this does NOT round-trip both marks. The schema's `code` mark
// has `excludes: "_"` (it excludes every other mark), so on import the
// co-occurring mark is always dropped — the run comes back as `code`
// only. We keep the emission simple and accept that the other mark is
// lost; preserving both is impossible while `code` excludes them.
// Only use the backtick form when `code` is the sole mark.
const markTypes = node.marks.map((m) => m.type);
const hasCode = markTypes.includes("code");
const codeCombined = hasCode && markTypes.length > 1;
for (const mark of node.marks) {
switch (mark.type) {
case "bold":
textContent = codeCombined
? `<strong>${textContent}</strong>`
: `**${textContent}**`;
break;
case "italic":
textContent = codeCombined
? `<em>${textContent}</em>`
: `*${textContent}*`;
break;
case "code":
// When combined with another mark, wrap as <code> so the
// surrounding HTML marks can nest around it; otherwise use the
// plain backtick span.
textContent = codeCombined
? `<code>${textContent}</code>`
: `\`${textContent}\``;
break;
case "link": {
const href = mark.attrs?.href || "";
const title = mark.attrs?.title;
if (codeCombined) {
// Emit an HTML anchor so it can wrap the nested <code>.
const safeHref = escapeAttr(href);
if (title) {
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
}
else {
textContent = `<a href="${safeHref}">${textContent}</a>`;
}
}
else if (title) {
// Emit the optional markdown link title; escape an embedded
// double-quote so it cannot terminate the title string early.
const safeTitle = String(title).replace(/"/g, '\\"');
textContent = `[${textContent}](${href} "${safeTitle}")`;
}
else {
textContent = `[${textContent}](${href})`;
}
break;
}
case "strike":
textContent = codeCombined
? `<s>${textContent}</s>`
: `~~${textContent}~~`;
break;
case "underline":
textContent = `<u>${textContent}</u>`;
break;
case "subscript":
textContent = `<sub>${textContent}</sub>`;
break;
case "superscript":
textContent = `<sup>${textContent}</sup>`;
break;
case "highlight": {
// Preserve a null/empty color as a plain highlight (a bare
// <mark> with no background-color); only emit the style when a
// color is actually set, so a plain highlight is not forced to
// yellow on export.
const color = mark.attrs?.color;
textContent = color
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
: `<mark>${textContent}</mark>`;
break;
}
case "textStyle":
if (mark.attrs?.color) {
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
}
break;
case "comment": {
// Emit the inline comment anchor so highlights round-trip. The
// schema's Comment mark parses span[data-comment-id] (attrs
// commentId/resolved).
const cid = mark.attrs?.commentId;
if (cid) {
const resolvedAttr = mark.attrs?.resolved
? ` data-resolved="true"`
: "";
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
}
break;
}
}
}
}
return textContent;
case "codeBlock":
const language = node.attrs?.language || "";
// Strip ALL trailing newlines so the export is idempotent: marked
// re-adds exactly one trailing "\n" on import, so trimming only one
// here would let the text grow by "\n" on each round-trip. Removing
// every trailing newline makes repeated cycles stable.
const code = nodeContent
.map(processNode)
.join("")
.replace(/\n+$/, "");
return "```" + language + "\n" + code + "\n```";
case "bulletList":
return nodeContent
.map((item) => processListItem(item, "-"))
.join("\n");
case "orderedList":
return nodeContent
.map((item, index) => processListItem(item, `${index + 1}.`))
.join("\n");
case "taskList":
return nodeContent.map((item) => processTaskItem(item)).join("\n");
case "taskItem":
// Delegate to the same helper used by taskList so multi-block and
// nested task items render and indent consistently.
return processTaskItem(node);
case "listItem":
return nodeContent.map(processNode).join("\n");
case "blockquote":
// Prefix EVERY line of EVERY child with "> " and separate block-level
// children with a blank ">" line so code blocks / multi-paragraph
// quotes round-trip correctly.
return nodeContent
.map((n) => processNode(n)
.split("\n")
.map((line) => (line.length ? `> ${line}` : ">"))
.join("\n"))
.join("\n>\n");
case "horizontalRule":
return "---";
case "hardBreak":
// Two trailing spaces before the newline encode a markdown hard break;
// a bare "\n" would be reimported as a soft break and lost.
return " \n";
case "image":
const imgAlt = node.attrs?.alt || "";
// Neutralize characters that could break out of the markdown image
// URL: spaces/newlines and parentheses would terminate the (...) target
// and let a stored src inject following markdown/HTML. Percent-encode
// them so the URL stays a single inert token.
const imgSrc = encodeMdUrl(node.attrs?.src);
// No "caption" attribute exists in the Docmost image schema, so we do
// not emit one (the previous caption branch was dead).
return `![${imgAlt}](${imgSrc})`;
case "video": {
// Emit the schema-matching <video> element so generateJSON rebuilds the
// node with its attrs intact. The schema's parseHTML reads src/aria-label
// from the standard attributes and the remaining attrs from data-*.
const attrs = node.attrs || {};
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
if (attrs.alt)
parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.width != null)
parts.push(`width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`height="${escapeAttr(attrs.height)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
if (attrs.aspectRatio != null)
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
// Wrap in a block <div> so marked treats it as a block (a bare <video>
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
// empty paragraph beside the hoisted block atom). The wrapper has no
// data-type, so the schema parser ignores it and just hoists the video.
return `<div><video ${parts.join(" ")}></video></div>`;
}
case "youtube": {
// Emit the schema-matching div[data-type="youtube"]; the schema reads
// src from data-src and width/height/align from data-* attributes.
const attrs = node.attrs || {};
const parts = [
`data-type="youtube"`,
`data-src="${escapeAttr(attrs.src ?? "")}"`,
];
if (attrs.width != null)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "table": {
// A GFM pipe table cannot represent merged cells. If ANY cell carries
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
// rowspan read from the same-named HTML attrs and align via parseHTML)
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
const tableRows = nodeContent;
if (tableRows.length === 0)
return "";
const hasSpan = tableRows.some((row) => (row.content || []).some((cell) => (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1));
if (hasSpan) {
// Render each cell's block children to HTML (marked does NOT parse
// markdown inside a raw HTML block, so emitting markdown here would
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
// HTML so inner formatting re-parses into the right marks/nodes.
const renderHtmlCell = (cell) => {
const tag = cell.type === "tableHeader" ? "th" : "td";
const a = cell.attrs || {};
const cellParts = [];
if ((a.colspan ?? 1) > 1)
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
if ((a.rowspan ?? 1) > 1)
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
if (a.align)
cellParts.push(`align="${escapeAttr(a.align)}"`);
const open = cellParts.length
? `<${tag} ${cellParts.join(" ")}>`
: `<${tag}>`;
const inner = (cell.content || [])
.map((block) => blockToHtml(block))
.join("");
return `${open}${inner}</${tag}>`;
};
const htmlRows = tableRows
.map((row) => `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`)
.join("");
return `<table><tbody>${htmlRows}</tbody></table>`;
}
// No merged cells: emit a GFM table (header row + separator) so the
// markdown can be parsed back into a table on re-import.
const rows = tableRows.map(processNode);
const headerCells = tableRows[0]?.content || [];
const columns = headerCells.length || 1;
// Derive alignment markers (:--, :-:, --:) from each header cell.
const markers = Array.from({ length: columns }, (_, i) => {
const align = headerCells[i]?.attrs?.align;
switch (align) {
case "left":
return ":--";
case "center":
return ":-:";
case "right":
return "--:";
default:
return "---";
}
});
const separator = "| " + markers.join(" | ") + " |";
return [rows[0], separator, ...rows.slice(1)].join("\n");
}
case "tableRow":
return "| " + nodeContent.map(processNode).join(" | ") + " |";
case "tableCell":
case "tableHeader": {
// Join multiple block children with a space (not "") so adjacent blocks
// like a paragraph followed by a list don't collide into "line1- a".
// Then collapse newlines and escape pipes so a cell containing "|" or a
// line break cannot corrupt the surrounding GFM row.
return nodeContent
.map(processNode)
.join(" ")
.replace(/\r?\n/g, " ")
.replace(/\|/g, "\\|");
}
case "callout":
const calloutType = node.attrs?.type || "info";
const calloutContent = nodeContent.map(processNode).join("\n");
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
case "details":
return nodeContent.map(processNode).join("\n");
case "detailsSummary":
const summaryText = nodeContent.map(processNode).join("");
return `<details>\n<summary>${summaryText}</summary>\n`;
case "detailsContent":
const detailsText = nodeContent.map(processNode).join("\n");
return `${detailsText}\n</details>`;
case "mathInline": {
// The schema's `text` attribute has no parseHTML, so TipTap's default
// parser reads it from the `text` HTML attribute (NOT the element's text
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
// `text="..."` attribute so it round-trips. marked cannot parse $...$
// back, so the previous form was lossy.
const inlineMath = node.attrs?.text || "";
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
}
case "mathBlock": {
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
// for the schema's default parser to recover it.
const blockMath = node.attrs?.text || "";
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
}
case "mention": {
// Emit span[data-type="mention"] with the schema's data-* attributes so
// generateJSON rebuilds the mention node instead of leaving "@label"
// plain text that cannot re-parse.
const attrs = node.attrs || {};
const parts = [`data-type="mention"`];
if (attrs.id)
parts.push(`data-id="${escapeAttr(attrs.id)}"`);
if (attrs.label)
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
if (attrs.entityType)
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
if (attrs.entityId)
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
if (attrs.slugId)
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
if (attrs.creatorId)
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
if (attrs.anchorId)
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
// Keep the label as visible text content too; the schema reads attrs
// from data-*, so the inner text is purely cosmetic and harmless.
const mentionLabel = attrs.label || attrs.id || "";
// The label is visible element TEXT content here (the data-* attrs above
// carry the real values), so escape it for the text context, not attrs.
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
}
case "footnoteReference": {
// Pandoc/GFM inline marker. The number is derived (not stored), so the
// id is the stable anchor.
const fnId = node.attrs?.id || "";
return fnId ? `[^${fnId}]` : "";
}
case "footnotesList":
// The container renders its definitions, each on its own `[^id]: ...`
// line. A blank line separates the body from the notes block.
return nodeContent.map(processNode).join("\n");
case "footnoteDefinition": {
const defId = node.attrs?.id || "";
// Collapse the definition's paragraphs into a single line; multi-line
// footnotes are a v2 refinement.
const defText = nodeContent
.map(processNode)
.join(" ")
.replace(/\s*\n+\s*/g, " ")
.trim();
return defId ? `[^${defId}]: ${defText}` : "";
}
case "attachment": {
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
// the schema stores name/url (plus mime/size/attachmentId). Emit the
// schema-matching div[data-type="attachment"] with data-attachment-*
// attrs so the node round-trips instead of degrading to a markdown link.
const attrs = node.attrs || {};
const parts = [
`data-type="attachment"`,
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
];
if (attrs.name)
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
if (attrs.mime)
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
if (attrs.size != null)
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "drawio":
case "excalidraw": {
// Emit the schema-matching div[data-type=...] carrying the diagram's
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
// diagram round-trips instead of degrading to a lossy placeholder.
const attrs = node.attrs || {};
const parts = [
`data-type="${type}"`,
`data-src="${escapeAttr(attrs.src ?? "")}"`,
];
if (attrs.title != null)
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
if (attrs.alt != null)
parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
if (attrs.width != null)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.aspectRatio != null)
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "embed": {
// Emit the schema-matching div[data-type="embed"]; the schema reads
// src/provider/align/width/height from data-* attributes so the node
// (and its provider iframe info) survives the round-trip.
const attrs = node.attrs || {};
const parts = [
`data-type="embed"`,
`data-src="${escapeAttr(attrs.src ?? "")}"`,
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
];
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
if (attrs.width != null)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "audio": {
// Emit the schema-matching <audio> element (was emitting nothing). The
// schema reads src from src and attachmentId/size from data-*.
const attrs = node.attrs || {};
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
// Wrap in a block <div> for the same reason as video: a bare <audio> is
// inline-level HTML that marked would wrap in <p>.
return `<div><audio ${parts.join(" ")}></audio></div>`;
}
case "pdf": {
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
// The schema reads src/width/height from standard attrs and name/
// attachmentId/size from data-*.
const attrs = node.attrs || {};
const parts = [
`data-type="pdf"`,
`src="${escapeAttr(attrs.src ?? "")}"`,
];
if (attrs.name)
parts.push(`data-name="${escapeAttr(attrs.name)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.width != null)
parts.push(`width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`height="${escapeAttr(attrs.height)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "columns": {
// Emit the schema-matching div[data-type="columns"] wrapper so the
// multi-column layout survives. Without a case the children were
// concatenated with no separator and the text merged. The schema reads
// layout from data-layout and widthMode from data-width-mode. The whole
// block is raw HTML, so render children via blockToHtml (NOT markdown,
// which marked would not re-parse inside a raw HTML block).
const attrs = node.attrs || {};
const parts = [`data-type="columns"`];
if (attrs.layout)
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
if (attrs.widthMode && attrs.widthMode !== "normal")
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
return `<div ${parts.join(" ")}>${inner}</div>`;
}
case "column": {
// Emit the schema-matching div[data-type="column"]; the schema reads the
// column width from data-width. Children are rendered as HTML so their
// formatting survives inside this raw HTML block.
const attrs = node.attrs || {};
const parts = [`data-type="column"`];
if (attrs.width)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
return `<div ${parts.join(" ")}>${inner}</div>`;
}
case "subpages":
return "{{SUBPAGES}}";
default:
// Fallback: process children
return nodeContent.map(processNode).join("");
}
};
// Render inline content (text runs + their marks) to HTML. Used by the raw
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
// accepts so it re-imports as the matching ProseMirror mark.
const inlineToHtml = (inlineNodes) => (inlineNodes || [])
.map((n) => {
if (n.type === "hardBreak")
return "<br>";
if (n.type !== "text") {
// Inline atoms (mention, mathInline) already emit schema HTML.
return processNode(n);
}
let t = escapeHtmlText(n.text || "");
for (const mark of n.marks || []) {
switch (mark.type) {
case "bold":
t = `<strong>${t}</strong>`;
break;
case "italic":
t = `<em>${t}</em>`;
break;
case "code":
t = `<code>${t}</code>`;
break;
case "strike":
t = `<s>${t}</s>`;
break;
case "underline":
t = `<u>${t}</u>`;
break;
case "subscript":
t = `<sub>${t}</sub>`;
break;
case "superscript":
t = `<sup>${t}</sup>`;
break;
case "link":
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
break;
case "highlight":
t = mark.attrs?.color
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
: `<mark>${t}</mark>`;
break;
case "textStyle":
if (mark.attrs?.color)
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
break;
case "comment":
// Inline comment anchor inside a raw-HTML container (columns /
// spanned table cells), so commented text there also round-trips.
if (mark.attrs?.commentId) {
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
}
break;
}
}
return t;
})
.join("");
// Emit the schema-matching <img> for an image node. Shared so the image is
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
// or a spanned table cell), where markdown `![](...)` would NOT be re-parsed
// and would survive as literal text. The Image extension reads src/alt from
// the standard attributes; the Docmost extra attrs (width/height/align/size/
// attachmentId/aspectRatio) are global attributes read from same-named DOM
// attributes, so emit them by name.
const imageToHtml = (node) => {
const attrs = node.attrs || {};
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
if (attrs.alt)
parts.push(`alt="${escapeAttr(attrs.alt)}"`);
if (attrs.title)
parts.push(`title="${escapeAttr(attrs.title)}"`);
if (attrs.width != null)
parts.push(`width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`height="${escapeAttr(attrs.height)}"`);
if (attrs.align)
parts.push(`align="${escapeAttr(attrs.align)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.aspectRatio != null)
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
return `<img ${parts.join(" ")}>`;
};
// Emit the schema-matching div[data-type="callout"] for a callout node. The
// schema reads the banner type from data-callout-type. Children are rendered
// as HTML so they survive inside a raw-HTML container.
const calloutToHtml = (node) => {
const type = (node.attrs?.type || "info").toLowerCase();
const inner = (node.content || []).map(blockToHtml).join("");
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
};
// Emit a schema-matching <details> tree. The schema parses <details>,
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
const detailsToHtml = (node) => {
const inner = (node.content || []).map(blockToHtml).join("");
return `<details>${inner}</details>`;
};
const detailsSummaryToHtml = (node) => `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
const detailsContentToHtml = (node) => {
const inner = (node.content || []).map(blockToHtml).join("");
return `<div data-type="detailsContent">${inner}</div>`;
};
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
// collaboration.ts) recognizes ul[data-type="taskList"] with
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
// task lists inside columns/cells from degrading to literal "- [ ]" text.
const taskListToHtml = (node) => {
const items = (node.content || [])
.map((it) => {
const checked = it.attrs?.checked ? "true" : "false";
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
})
.join("");
return `<ul data-type="taskList">${items}</ul>`;
};
// Render a block node to HTML for the raw-HTML containers (spanned tables,
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
// EVERY block type that can appear inside a column or a spanned cell must be
// emitted as schema-matching HTML here — never as markdown, or it would land
// as literal text on re-import. Nodes whose processNode case already produces
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
// table) are delegated to processNode; the markdown-emitting cases
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
const blockToHtml = (block) => {
const children = block.content || [];
switch (block.type) {
case "paragraph":
return `<p>${inlineToHtml(children)}</p>`;
case "heading": {
const level = block.attrs?.level || 1;
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
}
case "bulletList":
return `<ul>${children
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
.join("")}</ul>`;
case "orderedList":
return `<ol>${children
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
.join("")}</ol>`;
case "codeBlock": {
const lang = block.attrs?.language || "";
// The code itself is element TEXT content (between <code> tags), so it
// must escape < > & — NOT the attribute escaper. The language rides in
// a class ATTRIBUTE, so it uses escapeAttr.
const code = escapeHtmlText(children
.map(processNode)
.join("")
.replace(/\n+$/, ""));
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
return `<pre><code${cls}>${code}</code></pre>`;
}
case "image":
return imageToHtml(block);
case "blockquote":
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
case "horizontalRule":
return "<hr>";
case "callout":
return calloutToHtml(block);
case "details":
return detailsToHtml(block);
case "detailsSummary":
return detailsSummaryToHtml(block);
case "detailsContent":
return detailsContentToHtml(block);
case "taskList":
return taskListToHtml(block);
case "taskItem":
// A bare taskItem (outside a taskList) still needs a wrapping list so
// the schema parses it; wrap it in a single-item taskList.
return taskListToHtml({ content: [block] });
// table (incl. spanned), columns/column, math, media, embed, attachment,
// mention, etc. already emit schema-matching HTML from processNode.
case "table":
case "columns":
case "column":
case "mathBlock":
case "video":
case "audio":
case "pdf":
case "youtube":
case "embed":
case "attachment":
case "drawio":
case "excalidraw":
return processNode(block);
default:
// Any still-unhandled block type: NEVER fall back to markdown inside a
// raw-HTML block (it would become literal text). Wrap its rendered
// children in a <div> so their content is preserved; if it has no block
// children, render its inline content instead.
if (children.length && children.some((c) => c.type !== "text")) {
return `<div>${children.map(blockToHtml).join("")}</div>`;
}
return `<div>${inlineToHtml(children)}</div>`;
}
};
// Render the block children of a list item to HTML (a listItem holds block+
// content). Mirrors processListItem but for the HTML fallback path.
const blockChildrenToHtml = (item) => (item.content || []).map((b) => blockToHtml(b)).join("");
// Indent the rendered children of a list item under a marker prefix.
// Each child block is a (possibly multi-line) string. The very first physical
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
// other line — the remaining lines of the first child AND all lines of every
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
// to align under the marker. Without indenting these continuation lines, the
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
//
// The continuation indent MUST equal the LIST marker width, which is not the
// same as the visible prefix width:
// - bullet "- " -> 2 columns
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
// CommonMark anchors nested content to the marker column, so an ordered item
// indented to only 2 columns would be re-parsed as a sibling/loose content on
// re-import. Callers therefore pass the exact indent width to use.
const indentItemChildren = (childStrings, prefix, indentWidth) => {
const indent = " ".repeat(indentWidth);
const lines = [];
childStrings.forEach((child, childIndex) => {
child.split("\n").forEach((line, lineIndex) => {
if (childIndex === 0 && lineIndex === 0) {
// First physical line of the first block gets the marker.
lines.push(`${prefix} ${line}`);
}
else {
// Indent every continuation line by the marker width; keep blank
// lines blank rather than emitting trailing whitespace.
lines.push(line.length ? `${indent}${line}` : "");
}
});
});
return lines.join("\n");
};
const processListItem = (item, prefix) => {
const itemContent = item.content || [];
const childStrings = itemContent.map(processNode);
if (childStrings.length === 0)
return prefix;
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
// and thus the continuation indent — is prefix.length + 1. This is correct
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
// since for those the visible prefix IS the list marker.
return indentItemChildren(childStrings, prefix, prefix.length + 1);
};
const processTaskItem = (item) => {
const checked = item.attrs?.checked || false;
const checkbox = checked ? "[x]" : "[ ]";
const prefix = `- ${checkbox}`;
const itemContent = item.content || [];
const childStrings = itemContent.map(processNode);
// An empty task item still needs its checkbox marker; without this guard
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
if (childStrings.length === 0)
return prefix;
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
// checkbox is item content, NOT part of the marker. So the continuation
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
return indentItemChildren(childStrings, prefix, 2);
};
return processNode(content).trim();
}

View File

@@ -1,104 +0,0 @@
/**
* Self-contained Docmost-flavoured Markdown document (custom extensions).
*
* A single `.md` file that packages everything needed to losslessly round-trip
* a page through "download -> edit body -> re-upload":
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
* - a trailing `docmost:comments` block: a one-line JSON array of comment
* threads.
*
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
* importer without first stripping the blocks, the metadata cannot leak into the
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
* codeBlock node, so a fenced block is deliberately NOT used.)
*
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
* re-pastes an exported `.md` into a page, or a page documents this very
* format). To stay robust, parsing treats only the FINAL, document-ending
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
* opener whose closing `-->` sits at the very end of the file. Any earlier
* literal occurrence is left in the body untouched.
*
* NOTE on comments: in this version the comment THREAD records are preserved in
* the file but are NOT pushed back to the server on import — only the inline
* comment marks (anchors) embedded in the body are restored. Managing comment
* records stays with the comment tools/UI.
*/
// Match the leading meta block (allow leading whitespace). Capture group 1 is
// the JSON text between the markers.
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
// rather than end-anchoring a single regex (which would mis-capture across a
// literal opener that appears earlier in the body).
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
/**
* Assemble the full self-contained markdown file: meta block, body, and the
* comments block. The meta block is always emitted; the comments block is always
* emitted too (with `[]` when there are no comments) so the format stays uniform
* and parsing stays simple.
*/
export function serializeDocmostMarkdown(meta, body, comments) {
const metaJson = JSON.stringify(meta);
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
const trimmedBody = (body ?? "").trim();
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
`${trimmedBody}\n\n` +
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
}
/**
* Split a self-contained file back into its parts. Tolerant: if the meta or
* comments block is missing (e.g. a hand-written plain-markdown file), the
* corresponding value is returned as `null` and the whole input is treated as
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
* inside a block that IS present is surfaced as a thrown Error with a clear
* message. Robust to `\r\n` line endings.
*/
export function parseDocmostMarkdown(full) {
// Normalize line endings so the anchored regexes work regardless of CRLF.
const normalized = (full ?? "").replace(/\r\n/g, "\n");
// Extract the leading meta block (start-anchored — already unambiguous).
let meta = null;
let metaEnd = 0;
const metaMatch = normalized.match(META_RE);
if (metaMatch) {
try {
meta = JSON.parse(metaMatch[1]);
}
catch (e) {
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
}
// Body starts right after the matched meta block.
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
}
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
// the final one whose closing `-->` ends the document. Any earlier literal
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
let lastOpenStart = -1;
let lastOpenEnd = -1;
let m;
COMMENTS_OPEN_RE.lastIndex = 0;
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
lastOpenStart = m.index;
lastOpenEnd = m.index + m[0].length;
}
let comments = null;
let bodyEnd = normalized.length;
if (lastOpenStart !== -1) {
const rest = normalized.slice(lastOpenEnd);
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
if (close) {
const jsonText = rest.slice(0, close.index);
try {
comments = JSON.parse(jsonText);
}
catch (e) {
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
}
bodyEnd = lastOpenStart; // strip from the opener to end of document
}
}
const body = normalized.slice(metaEnd, bodyEnd).trim();
return { meta, body, comments };
}

View File

@@ -1,821 +0,0 @@
/**
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
* tree by node id.
*
* A ProseMirror node here is a plain JSON object of the shape produced by
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
* table cells hold their children in `content` just like any other block, so a
* single recursive walk reaches them all.
*
* Every exported function operates on a DEEP CLONE of the input document and
* returns the new document. The input doc and any `newNode`/`node` argument are
* never mutated. All functions are defensively null-safe: missing/!Array
* `content`, non-object nodes, and absent `attrs` are tolerated.
*/
import { stripInlineMarkdown } from "./text-normalize.js";
/** Deep-clone a JSON-serializable value without mutating the original. */
function clone(value) {
if (typeof structuredClone === "function") {
return structuredClone(value);
}
// Fallback for environments without structuredClone.
return JSON.parse(JSON.stringify(value));
}
/** True if `value` is a non-null object (and not an array). */
function isObject(value) {
return value != null && typeof value === "object" && !Array.isArray(value);
}
/** True if `node` carries the given id in `node.attrs.id`. */
function matchesId(node, nodeId) {
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
}
/**
* Recursively concatenate all text contained in a node.
*
* Text nodes contribute their `text` string; container nodes contribute the
* joined `blockPlainText` of their `content` children. Returns "" for nullish
* or non-object inputs.
*/
export function blockPlainText(node) {
if (!isObject(node))
return "";
let out = "";
if (typeof node.text === "string") {
out += node.text;
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
out += blockPlainText(child);
}
}
return out;
}
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
function truncate(text, n) {
return text.length > n ? text.slice(0, n) + "…" : text;
}
/**
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
* table cells — compactness is the point; use `getNodeByRef` to drill into a
* specific block.
*
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
* cell texts as `header`; list blocks (types ending in "List") add `items`.
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
* a missing or non-object doc/content yields `[]`.
*/
export function buildOutline(doc) {
if (!isObject(doc) || !Array.isArray(doc.content))
return [];
const out = [];
for (let i = 0; i < doc.content.length; i++) {
const block = doc.content[i];
const type = isObject(block) ? block.type : undefined;
const entry = {
index: i,
type,
id: isObject(block) && isObject(block.attrs)
? (block.attrs.id ?? null)
: null,
firstText: truncate(blockPlainText(block), 100),
};
if (type === "heading") {
entry.level = isObject(block.attrs) ? (block.attrs.level ?? null) : null;
}
else if (type === "table") {
const headerRow = block.content?.[0]?.content ?? [];
entry.rows = block.content?.length ?? 0;
entry.cols = block.content?.[0]?.content?.length ?? 0;
entry.header = headerRow.map((cell) => truncate(blockPlainText(cell), 40));
}
else if (typeof type === "string" && type.endsWith("List")) {
entry.items = block.content?.length ?? 0;
}
out.push(entry);
}
return out;
}
/**
* Resolve a single node by reference and return `{ node, path, type }`, or
* `null` when nothing matches.
*
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
* `n` in `doc.content`. This is the only way to address table/tableRow/
* tableCell nodes, which carry no `attrs.id`.
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
* tree with `attrs.id === ref` is returned.
*
* `path` is the array of child indices from the doc root down to the node
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
* so callers can mutate it without touching the input doc. Null-safe.
*/
export function getNodeByRef(doc, ref) {
if (!isObject(doc))
return null;
// "#<n>": index into the top-level content array.
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
if (indexMatch) {
const index = Number(indexMatch[1]);
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
if (!isObject(block))
return null;
return { node: clone(block), path: [index], type: block.type };
}
// Otherwise: depth-first search for the first node with attrs.id === ref.
const search = (node, trail) => {
if (!isObject(node))
return null;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const path = [...trail, i];
if (matchesId(child, ref)) {
return { node: clone(child), path, type: child.type };
}
const hit = search(child, path);
if (hit != null)
return hit;
}
}
return null;
};
return search(doc, []);
}
/**
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
* `newNode`, anywhere in the tree (including inside callouts and table cells).
*
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
* is the number of nodes substituted. A fresh clone of `newNode` is used for
* each match so they do not share references.
*/
export function replaceNodeById(doc, nodeId, newNode) {
const out = clone(doc);
let replaced = 0;
// Walk a content array, replacing direct matches and recursing into the
// (possibly new) children of non-matching nodes.
const walkContent = (content) => {
for (let i = 0; i < content.length; i++) {
const child = content[i];
if (matchesId(child, nodeId)) {
content[i] = clone(newNode);
replaced++;
// Do not recurse into a freshly substituted node.
continue;
}
if (isObject(child) && Array.isArray(child.content)) {
walkContent(child.content);
}
}
};
if (isObject(out) && Array.isArray(out.content)) {
walkContent(out.content);
}
return { doc: out, replaced };
}
/**
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
* array, anywhere in the tree (recursive, including callouts and tables).
*
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
* the number of nodes removed.
*/
export function deleteNodeById(doc, nodeId) {
const out = clone(doc);
let deleted = 0;
// Filter a content array in place, dropping matches and recursing into the
// surviving children.
const walkContent = (content) => {
const kept = [];
for (const child of content) {
if (matchesId(child, nodeId)) {
deleted++;
continue;
}
if (isObject(child) && Array.isArray(child.content)) {
child.content = walkContent(child.content);
}
kept.push(child);
}
return kept;
};
if (isObject(out) && Array.isArray(out.content)) {
out.content = walkContent(out.content);
}
return { doc: out, deleted };
}
/**
* Throw a clear, model-actionable error when a node-id write op did NOT match
* exactly one node (#159). `count === 0` -> "no node found"; `count > 1` ->
* "ambiguous, refused" — Docmost duplicates block ids on copy/paste, so a write
* by id could clobber/remove EVERY duplicate. The caller skips the write for any
* `count !== 1` (the transform returns null), so this only REPORTS; nothing was
* changed. No-op for the unambiguous single-match case.
*/
export function assertUnambiguousMatch(op, verb, count, nodeId, pageId) {
if (count === 0) {
throw new Error(`${op}: no node with id "${nodeId}" found on page ${pageId}`);
}
if (count > 1) {
throw new Error(`${op}: id "${nodeId}" is ambiguous — ${count} nodes on page ${pageId} share it (block ids are duplicated on copy/paste). Refusing to ${verb} all of them; nothing was changed. Re-target with a more specific anchor.`);
}
}
/**
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
* "Unexpected content type" when asked to store an `undefined` attribute value).
*
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
* legitimate JSON-storable values and are preserved. Operates on a clone and
* returns it; the input is never mutated. Defensively null-safe like the rest
* of the file.
*/
export function sanitizeForYjs(doc) {
const out = clone(doc);
// Drop every key whose value is strictly `undefined` from an attrs object.
const stripUndefined = (attrs) => {
if (!isObject(attrs))
return;
for (const key of Object.keys(attrs)) {
if (attrs[key] === undefined) {
delete attrs[key];
}
}
};
const walk = (node) => {
if (!isObject(node))
return;
stripUndefined(node.attrs);
if (Array.isArray(node.marks)) {
for (const mark of node.marks) {
if (isObject(mark))
stripUndefined(mark.attrs);
}
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
walk(child);
}
}
};
walk(out);
return out;
}
/**
* Diagnostics helper: walk the tree and return a human-readable path string for
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
* every attribute is storable. Null-safe.
*/
export function findUnstorableAttr(doc) {
const isUnstorable = (value) => {
if (value === undefined)
return "undefined";
const t = typeof value;
if (t === "function")
return "function";
if (t === "symbol")
return "symbol";
if (t === "bigint")
return "bigint";
return null;
};
// Check an attrs object; return the offending sub-path or null.
const checkAttrs = (attrs, basePath) => {
if (!isObject(attrs))
return null;
for (const key of Object.keys(attrs)) {
const kind = isUnstorable(attrs[key]);
if (kind != null)
return `${basePath}.${key} (${kind})`;
}
return null;
};
const walk = (node, path) => {
if (!isObject(node))
return null;
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
if (attrHit != null)
return attrHit;
if (Array.isArray(node.marks)) {
for (let i = 0; i < node.marks.length; i++) {
const markHit = checkAttrs(node.marks[i]?.attrs, `${path}.marks[${i}].attrs`);
if (markHit != null)
return markHit;
}
}
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const childHit = walk(node.content[i], `${path}.content[${i}]`);
if (childHit != null)
return childHit;
}
}
return null;
};
// The root doc node carries no useful index, so start the path at "doc".
if (!isObject(doc))
return null;
const attrHit = checkAttrs(doc.attrs, "attrs");
if (attrHit != null)
return attrHit;
if (Array.isArray(doc.content)) {
for (let i = 0; i < doc.content.length; i++) {
const childHit = walk(doc.content[i], `content[${i}]`);
if (childHit != null)
return childHit;
}
}
return null;
}
/**
* Table structural node types and the container each must live directly inside.
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
* rather than blindly into the anchor's direct parent (which would corrupt the
* table's nesting).
*/
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
const REQUIRED_CONTAINER = {
tableRow: "table",
tableCell: "tableRow",
tableHeader: "tableRow",
};
/**
* Find the index of the first TOP-LEVEL block whose plain text includes the
* anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches.
*
* Two passes preserve "exact wins globally":
* - Pass 1: first block containing the verbatim `anchorText`.
* - Pass 2 (only if pass 1 found nothing): first block containing the
* markdown-stripped anchor, when stripping actually changed it.
*/
function findAnchorTextIndex(content, anchorText) {
if (!Array.isArray(content))
return -1;
// Pass 1: exact.
for (let i = 0; i < content.length; i++) {
if (blockPlainText(content[i]).includes(anchorText))
return i;
}
// Pass 2: markdown-stripped fallback.
const a = stripInlineMarkdown(anchorText);
if (a !== anchorText && a.length > 0) {
for (let i = 0; i < content.length; i++) {
if (blockPlainText(content[i]).includes(a))
return i;
}
}
return -1;
}
/**
* Locate an anchor and return its ancestor chain (from `doc` down to and
* including the matched node). Each chain entry is `{ node, index }` where
* `index` is the node's position inside its parent's `content` array (the root
* doc has index -1). Returns `null` when the anchor cannot be resolved.
*/
function findAnchorChain(doc, opts) {
if (!isObject(doc))
return null;
// DFS by id anywhere in the tree, accumulating the path.
if (opts.anchorNodeId != null) {
const targetId = opts.anchorNodeId;
const search = (node, index, trail) => {
if (!isObject(node))
return null;
const here = [...trail, { node, index }];
if (matchesId(node, targetId))
return here;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const hit = search(node.content[i], i, here);
if (hit != null)
return hit;
}
}
return null;
};
return search(doc, -1, []);
}
// By text: only top-level blocks are scanned (same rule as the JSON path).
// Exact match wins; a markdown-stripped fallback is tried only on a miss.
if (opts.anchorText != null && Array.isArray(doc.content)) {
const i = findAnchorTextIndex(doc.content, opts.anchorText);
if (i !== -1) {
return [
{ node: doc, index: -1 },
{ node: doc.content[i], index: i },
];
}
}
return null;
}
/**
* Insert a deep clone of `node` relative to an anchor.
*
* - position "append": push the node onto the top-level `doc.content`.
* - position "before"/"after": locate the anchor and splice the node into the
* anchor's parent `content` array immediately before / after it.
*
* Anchor resolution for before/after:
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
* anywhere in the tree (recursive);
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
*
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
* false when the anchor could not be resolved (the doc is returned unchanged
* apart from being cloned).
*/
export function insertNodeRelative(doc, node, opts) {
const out = clone(doc);
const fresh = clone(node);
// Defensive: stay null-safe like the other exports — a missing opts means
// there is nothing actionable to do.
if (!isObject(opts))
return { doc: out, inserted: false };
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
// "append": top-level push.
if (opts.position === "append") {
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
// top level — appending one would produce invalid nesting.
if (isStructural) {
throw new Error(`insert_node: cannot append a ${node.type} at the top level; use ` +
`position before/after with an anchor inside the target table`);
}
if (isObject(out)) {
if (!Array.isArray(out.content))
out.content = [];
out.content.push(fresh);
return { doc: out, inserted: true };
}
return { doc: out, inserted: false };
}
const offset = opts.position === "after" ? 1 : 0;
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
// into the nearest enclosing table/tableRow rather than the anchor's direct
// parent, so the row/cell lands at the correct level of the table.
if (isStructural) {
const containerType = REQUIRED_CONTAINER[node.type];
const chain = findAnchorChain(out, opts);
// Anchor not resolved at all — keep the existing "anchor not found" path.
if (chain == null)
return { doc: out, inserted: false };
// Find the DEEPEST ancestor (including the anchor itself) of the required
// container type.
let containerIdx = -1;
for (let i = chain.length - 1; i >= 0; i--) {
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
containerIdx = i;
break;
}
}
if (containerIdx === -1) {
throw new Error(`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
`that lives inside the target table.`);
}
const container = chain[containerIdx].node;
if (!Array.isArray(container.content))
container.content = [];
if (containerIdx === chain.length - 1) {
// The matched container IS the anchor node itself (e.g. anchorText
// resolved to the table block): append/prepend within it.
const at = opts.position === "after" ? container.content.length : 0;
container.content.splice(at, 0, fresh);
}
else {
// The immediate child on the path leading to the anchor is the row/cell
// to splice next to.
const enclosingChildIndex = chain[containerIdx + 1].index;
container.content.splice(enclosingChildIndex + offset, 0, fresh);
}
return { doc: out, inserted: true };
}
// Resolve by id anywhere in the tree: splice into the parent content array.
if (opts.anchorNodeId != null) {
let inserted = false;
const walkContent = (content) => {
for (let i = 0; i < content.length; i++) {
const child = content[i];
if (matchesId(child, opts.anchorNodeId)) {
content.splice(i + offset, 0, fresh);
inserted = true;
return;
}
if (isObject(child) && Array.isArray(child.content)) {
walkContent(child.content);
if (inserted)
return;
}
}
};
if (isObject(out) && Array.isArray(out.content)) {
walkContent(out.content);
}
return { doc: out, inserted };
}
// Resolve by text: only top-level doc.content blocks are scanned. Exact
// match wins; a markdown-stripped fallback is tried only on a miss.
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
const i = findAnchorTextIndex(out.content, opts.anchorText);
if (i !== -1) {
out.content.splice(i + offset, 0, fresh);
return { doc: out, inserted: true };
}
}
return { doc: out, inserted: false };
}
// ===========================================================================
// Table editing helpers
//
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
// table -> { type:"table", content:[tableRow...] }
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
// content:[paragraph...] }
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
// of the input doc (via `clone`) and never mutate their inputs.
// ===========================================================================
/**
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
* `makeFreshId` so generated paragraph ids never collide with existing ones.
*/
function collectIds(node, used) {
if (!isObject(node))
return;
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
used.add(node.attrs.id);
}
if (Array.isArray(node.content)) {
for (const child of node.content)
collectIds(child, used);
}
}
/**
* Fresh-id generator: returns a random Docmost-style id (12 chars from
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
* exact string, so randomness is fine — and unlike a module-local counter it
* needs no reset and cannot become predictable across calls.
*/
function makeFreshId(used) {
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
let id;
do {
id = "";
for (let i = 0; i < 12; i++) {
id += alphabet[Math.floor(Math.random() * alphabet.length)];
}
} while (used.has(id) || id === "");
used.add(id);
return id;
}
/**
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
* its index path. Returns null when no table matches.
*
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
* ancestor chain to the nearest `type === "table"` ancestor.
*/
function locateTable(rootClone, tableRef) {
if (!isObject(rootClone))
return null;
// "#<n>": index into the top-level content array; must be a table.
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
if (indexMatch) {
const index = Number(indexMatch[1]);
const block = Array.isArray(rootClone.content)
? rootClone.content[index]
: undefined;
if (isObject(block) && block.type === "table") {
return { table: block, path: [index] };
}
return null;
}
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
// climb to the nearest enclosing table.
const search = (node, trail) => {
if (!isObject(node))
return null;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const here = [...trail, { node: child, index: i }];
if (matchesId(child, tableRef)) {
// Walk UP to the nearest table ancestor (including the match itself).
for (let j = here.length - 1; j >= 0; j--) {
if (isObject(here[j].node) && here[j].node.type === "table") {
return {
table: here[j].node,
path: here.slice(0, j + 1).map((e) => e.index),
};
}
}
return null; // id found but no enclosing table
}
const hit = search(child, here);
if (hit != null)
return hit;
}
}
return null;
};
return search(rootClone, []);
}
/** Build the plain-text → single-paragraph cell content used by all writers. */
function makeCellParagraph(id, text) {
return {
type: "paragraph",
attrs: { id, indent: 0 },
// Empty string → a paragraph with an empty content array.
content: text ? [{ type: "text", text }] : [],
};
}
/**
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
*
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
* Tables may be ragged (rows of differing length), so `cols` reflects only
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
* width.
* - `cells`: `string[][]` of each cell's `blockPlainText`.
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
* so callers can `patch_node` a cell for rich-formatted edits.
* - `path`: index path of the table within the doc.
*/
export function readTable(doc, tableRef) {
const root = clone(doc);
const located = locateTable(root, tableRef);
if (located == null)
return null;
const { table, path } = located;
const rowNodes = Array.isArray(table.content) ? table.content : [];
const rows = rowNodes.length;
const cols = rowNodes[0]?.content?.length ?? 0;
const cells = [];
const cellIds = [];
for (const rowNode of rowNodes) {
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
const rowText = [];
const rowIds = [];
for (const cellNode of cellNodes) {
rowText.push(blockPlainText(cellNode));
// The cell's first paragraph carries the id used for patch_node.
const firstPara = Array.isArray(cellNode?.content)
? cellNode.content[0]
: undefined;
const id = isObject(firstPara) && isObject(firstPara.attrs)
? (firstPara.attrs.id ?? null)
: null;
rowIds.push(id);
}
cells.push(rowText);
cellIds.push(rowIds);
}
return { rows, cols, cells, cellIds, path };
}
/**
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
*
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
* MORE cells than columns throws. Each new cell copies `colwidth` for its
* column from the header row when present, gets a fresh-id paragraph, and a
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
* the row there; otherwise the row is appended at the end.
*/
export function insertTableRow(doc, tableRef, cells, index) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
return { doc: out, inserted: false };
const { table } = located;
if (!Array.isArray(table.content))
table.content = [];
const rows = table.content.length;
const headerRow = table.content[0];
const headerCells = Array.isArray(headerRow?.content)
? headerRow.content
: [];
// Column count is the WIDEST existing row, so the guard below stays
// meaningful for ragged tables and the new row matches the table's width.
// Fall back to the supplied cell count only when the table has no rows.
let colCount = 0;
for (const r of table.content) {
if (isObject(r) && Array.isArray(r.content))
colCount = Math.max(colCount, r.content.length);
}
if (colCount === 0)
colCount = Array.isArray(cells) ? cells.length : 0;
if (Array.isArray(cells) && cells.length > colCount) {
throw new Error(`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`);
}
// Resolve the landing index up front so the cell-type decision and the splice
// below agree: a valid integer in [0, rows] splices there, else we append.
const landingIndex = typeof index === "number" &&
Number.isInteger(index) &&
index >= 0 &&
index <= rows
? index
: rows;
// Seed the id generator with every id already in the doc so the new cell
// paragraph ids are unique within the whole document.
const used = new Set();
collectIds(out, used);
const newCells = [];
for (let i = 0; i < colCount; i++) {
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
const attrs = { colspan: 1, rowspan: 1 };
// Copy this column's colwidth from the header row's cell when present.
const colwidth = headerCells[i]?.attrs?.colwidth;
if (colwidth !== undefined)
attrs.colwidth = colwidth;
// A row landing at index 0 becomes the new header row, so inherit the
// current header cell's type per column (Docmost uses "tableHeader" there);
// every other position is a plain data cell.
const cellType = landingIndex === 0 ? (headerCells[i]?.type ?? "tableCell") : "tableCell";
newCells.push({
type: cellType,
attrs,
content: [makeCellParagraph(makeFreshId(used), text)],
});
}
const newRow = { type: "tableRow", content: newCells };
// Splice at the resolved landing index (append when index was omitted/invalid).
table.content.splice(landingIndex, 0, newRow);
return { doc: out, inserted: true };
}
/**
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
* `deleted` is false only when the table cannot be located. Throws on an
* out-of-range index, and refuses to delete the table's only row.
*/
export function deleteTableRow(doc, tableRef, index) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
return { doc: out, deleted: false };
const { table } = located;
if (!Array.isArray(table.content))
table.content = [];
const rows = table.content.length;
if (!Number.isInteger(index) || index < 0 || index >= rows) {
throw new Error(`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`);
}
if (rows <= 1) {
throw new Error("table_delete_row: refusing to delete the only row of the table");
}
table.content.splice(index, 1);
return { doc: out, deleted: true };
}
/**
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
* that reuses the cell's existing first-paragraph id when present, else a fresh
* one.
*/
export function updateTableCell(doc, tableRef, row, col, text) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
return { doc: out, updated: false };
const { table } = located;
const rowNodes = Array.isArray(table.content) ? table.content : [];
const rows = rowNodes.length;
const rowNode = rowNodes[row];
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
? rowNode.content.length
: 0;
if (!Number.isInteger(row) ||
row < 0 ||
row >= rows ||
!Number.isInteger(col) ||
col < 0 ||
col >= cols) {
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
}
const cellNode = rowNode.content[col];
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
const existingPara = Array.isArray(cellNode?.content)
? cellNode.content[0]
: undefined;
let id = isObject(existingPara) && isObject(existingPara.attrs)
? existingPara.attrs.id
: undefined;
if (typeof id !== "string" || id.length === 0) {
const used = new Set();
collectIds(out, used);
id = makeFreshId(used);
}
cellNode.content = [makeCellParagraph(id, text)];
return { doc: out, updated: true };
}

View File

@@ -1,31 +0,0 @@
/**
* Per-page async mutex.
*
* Content writes over the collaboration websocket must never overlap for the
* same page: two concurrent full-document replaces would race on the live Yjs
* fragment. We serialize them with a per-pageId promise chain — each new
* operation waits for the previous one on that page to settle (success or
* failure) before it runs. Different pages never block each other.
*/
const chains = new Map();
// The returned promise carries the real result/rejection of `fn` and MUST be
// awaited/handled by the caller; only the internal chaining tail swallows
// errors (purely to gate ordering).
export function withPageLock(pageId, fn) {
// Wait for the previous op on this page; swallow its error so a failure does
// not poison the queue for the next caller.
const prev = (chains.get(pageId) ?? Promise.resolve()).catch(() => { });
const run = prev.then(fn);
// The tail used for chaining must also swallow errors (it only gates order).
const tail = run.catch(() => { });
chains.set(pageId, tail);
// Drop the map entry once this op is the tail and has settled, to avoid an
// unbounded map of resolved promises.
tail.then(() => {
if (chains.get(pageId) === tail) {
chains.delete(pageId);
}
});
// Callers get the real result/rejection of fn.
return run;
}

View File

@@ -1,15 +0,0 @@
// The model sometimes serializes a ProseMirror node arg as a JSON string
// instead of an object. Normalize: parse a string to an object (throwing on
// invalid JSON), pass an object through unchanged. Shared by patch_node /
// insert_node (and the analogous update_page_json content parsing).
export function parseNodeArg(node, errMsg = "node was a string but not valid JSON") {
if (typeof node === "string") {
try {
return JSON.parse(node);
}
catch {
throw new Error(errMsg);
}
}
return node;
}

View File

@@ -1,108 +0,0 @@
/**
* Locator normalization: strip inline markdown wrappers and trailing
* decoration from a LOCATOR string so a find/anchor that the model wrote with
* markdown (or a stray emoji) can still match the document's plain text.
*
* This is used ONLY as a fallback for LOCATING (after an exact match fails);
* it is never applied to replacement text or inserted node content, so no
* formatting is ever lost.
*/
/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */
const MAX_PASSES = 8;
/**
* Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so
* `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is
* non-greedy and capture group 1 is the inner text. Applied repeatedly until
* the string stops changing (nested wrappers like `**_x_**`).
*/
const WRAPPER_PATTERNS = [
/\*\*([^*]+?)\*\*/g, // **x**
/__([^_]+?)__/g, // __x__
/~~([^~]+?)~~/g, // ~~x~~
/\*([^*]+?)\*/g, // *x*
/_([^_]+?)_/g, // _x_
/``([^`]+?)``/g, // ``x``
/`([^`]+?)`/g, // `x`
];
/** Links/images -> their visible text. `!?` covers both `[t](u)` and `![a](s)`. */
const LINK_IMAGE_RE = /!?\[([^\]]*)\]\([^)]*\)/g;
/**
* Apply ONLY the two balanced/link passes shared by both normalizers: first
* collapse links/images to their visible text, then collapse balanced inline
* wrappers repeatedly until stable. Does NOT trim decoration, does NOT guard
* against an empty result — it returns exactly the transformed string.
*/
function stripWrappersAndLinks(s) {
// 1. Links/images -> their visible text.
let out = s.replace(LINK_IMAGE_RE, "$1");
// 2. Strip balanced wrappers, repeating until the string is stable so nested
// wrappers (`**_x_**`) and adjacent runs both collapse.
for (let pass = 0; pass < MAX_PASSES; pass++) {
const before = out;
for (const re of WRAPPER_PATTERNS) {
out = out.replace(re, "$1");
}
if (out === before)
break;
}
return out;
}
/**
* STRICT formatting detector — distinct from the lenient locator
* normalization below. It strips ONLY what unambiguously is markdown markup:
* 1. links/images `[text](url)` -> `text`, `![alt](src)` -> `alt`, and
* 2. balanced inline `**`/`__`/`~~`/`*`/`_`/`` ` `` wrappers (repeat-until-stable),
* and DELIBERATELY does NOT trim leading/trailing whitespace, emoji, or lone
* marker chars (the lenient extras `stripInlineMarkdown` does in its step 3).
*
* It exists ONLY to recognize formatting-vs-plain INTENT in `applyTextEdits`
* (deciding whether find/replace differ purely by markdown markers). Because it
* skips the lenient trimming, ordinary plain-text edits are NOT misread as
* formatting: a trailing-space trim, snake_case (`my_var_name`), math (`2 * 3`),
* and identifiers/URLs with underscores all stay untouched here (their `_x_` /
* `*x*` runs are only collapsed when actually balanced, and even then they are
* compared symmetrically, so plain text never collapses to a different string).
*
* Do NOT use this for LOCATING — the locator fallback must keep using the
* lenient `stripInlineMarkdown` (it trims stray decoration so a find still
* matches the document's plain text).
*/
export function stripBalancedWrappers(s) {
if (typeof s !== "string" || s.length === 0)
return s;
return stripWrappersAndLinks(s);
}
/**
* Conservatively strip inline markdown from a locator string.
*
* Deterministic, order-fixed steps:
* 1. Links/images: `[text](url)` -> `text`, `![alt](src)` -> `alt`.
* 2. Balanced inline wrappers (strong before emphasis, code, strikethrough),
* applied repeatedly until stable for nested cases.
* 3. Trim leading/trailing decoration only: whitespace, leftover marker chars
* (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,`
* etc.) are NEVER trimmed.
*
* If the result is empty (e.g. the input was only markers like `***`), the
* ORIGINAL string is returned so a locator can never normalize down to "" and
* match everything.
*/
export function stripInlineMarkdown(s) {
if (typeof s !== "string" || s.length === 0)
return s;
// 1 + 2. Shared link/image and balanced-wrapper passes.
let out = stripWrappersAndLinks(s);
// 3. Trim leading/trailing decoration: whitespace, leftover markdown markers,
// and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the
// regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT
// Extended_Pictographic). The `u` flag enables the Unicode property escape.
// Anchored runs only — interior text and sentence punctuation are untouched.
const DECORATION = "[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+";
out = out
.replace(new RegExp("^" + DECORATION, "u"), "")
.replace(new RegExp(DECORATION + "$", "u"), "");
// 4. Never normalize a locator down to nothing.
if (out.length === 0)
return s;
return out;
}

View File

@@ -1,473 +0,0 @@
/**
* Pure, network-free transform primitives for a ProseMirror/TipTap document
* tree, plus one higher-level orchestration (commentsToFootnotes).
*
* A ProseMirror node here is a plain JSON object of the shape produced by
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
* `content` array; callouts, tables, lists all hold their children in
* `content`, so a single recursive walk reaches them all.
*
* Conventions (matching node-ops.ts):
* - functions that produce a new document deep-clone their input and return a
* `{ doc, ... }` object; the caller's objects are never mutated.
* - functions are defensively null-safe.
* - `marks` arrays are preserved verbatim when fragments are split/reordered.
*/
import { blockPlainText } from "./node-ops.js";
/** Deep-clone a JSON-serializable value without mutating the original. */
function clone(value) {
if (typeof structuredClone === "function") {
return structuredClone(value);
}
// Fallback for environments without structuredClone.
return JSON.parse(JSON.stringify(value));
}
/** True if `value` is a non-null object (and not an array). */
function isObject(value) {
return value != null && typeof value === "object" && !Array.isArray(value);
}
/**
* Plain text of a node (re-export of node-ops' blockPlainText so transform
* authors have a single import surface). Recurses through nested content.
*/
export function blockText(node) {
return blockPlainText(node);
}
/**
* Depth-first visit of every node in the tree, including the root and the
* nested content of callouts, tables, lists, etc. `fn` is called once per node.
* Null-safe: a nullish or non-object node is ignored.
*/
export function walk(node, fn) {
if (!isObject(node))
return;
fn(node);
if (Array.isArray(node.content)) {
for (const child of node.content) {
walk(child, fn);
}
}
}
/**
* Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
* Works even when the node carries no `attrs.id` (it searches the raw tree, not
* an id index). Returns the live node reference inside `doc` (NOT a clone), or
* null when nothing matches. Typical use: `getList(doc, n => n.type ===
* "orderedList")`.
*/
export function getList(doc, predicate) {
let found = null;
walk(doc, (node) => {
if (found == null && predicate(node)) {
found = node;
}
});
return found;
}
/**
* Insert `marker` as a PLAIN (unmarked) text run right after the first
* occurrence of `anchor`.
*
* The text run that contains the END of the anchor is SPLIT at the anchor end,
* so all existing marks (links, bold, ...) on the surrounding text are
* preserved, while the inserted marker run carries NO marks. The marker is
* inserted as a leading-space-padded run (`" " + marker`) so it visually
* separates from the preceding word.
*
* The anchor is matched against the concatenated plain text of each top-level
* block (so an anchor that spans several text/mark runs still matches). The
* insertion happens inside the inline content array that holds the anchor's
* final character.
*
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
* false when the anchor text was not found in any in-scope block.
*/
export function insertMarkerAfter(doc, anchor, marker, opts = {}) {
const out = clone(doc);
if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
return { doc: out, inserted: false };
}
const limit = typeof opts.beforeBlock === "number"
? Math.min(opts.beforeBlock, out.content.length)
: out.content.length;
for (let b = 0; b < limit; b++) {
const block = out.content[b];
if (!isObject(block))
continue;
// Quick reject: skip blocks whose plain text cannot contain the anchor.
if (!blockPlainText(block).includes(anchor))
continue;
// Walk the inline content arrays inside this block, tracking a running
// character offset so we can locate the inline array + text run that holds
// the END of the anchor's first occurrence.
let inserted = false;
let offset = 0; // characters of plain text seen so far in this block
const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
// Recurse into inline-bearing containers (paragraph, heading, table cell,
// callout child paragraphs, ...). We only split inside an array of inline
// nodes (text/inline atoms); the FIRST array whose cumulative range covers
// anchorEnd receives the split + marker.
const visit = (container) => {
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
return;
}
const inline = container.content;
// Detect whether this array is an inline array (contains text nodes).
const hasText = inline.some((n) => isObject(n) && n.type === "text");
if (hasText) {
for (let i = 0; i < inline.length; i++) {
const n = inline[i];
const len = isObject(n) ? blockPlainText(n).length : 0;
const runStart = offset;
const runEnd = offset + len;
// The run that contains the anchor end (anchorEnd lands inside this
// run, i.e. runStart < anchorEnd <= runEnd) is the split point.
if (!inserted &&
isObject(n) &&
n.type === "text" &&
typeof n.text === "string" &&
anchorEnd > runStart &&
anchorEnd <= runEnd) {
const cut = anchorEnd - runStart; // split index within this text run
const before = n.text.slice(0, cut);
const after = n.text.slice(cut);
const marks = Array.isArray(n.marks) ? n.marks : [];
const parts = [];
if (before.length > 0) {
parts.push({ ...n, text: before, marks: [...marks] });
}
// Marker is a PLAIN run: no marks copied. Leading space separates it.
parts.push({ type: "text", text: " " + marker });
if (after.length > 0) {
parts.push({ ...n, text: after, marks: [...marks] });
}
inline.splice(i, 1, ...parts);
inserted = true;
return;
}
offset = runEnd;
}
}
else {
// Not an inline array: recurse into children (e.g. callout -> paragraph).
for (const child of inline) {
visit(child);
if (inserted)
return;
}
}
};
visit(block);
if (inserted) {
return { doc: out, inserted: true };
}
// If the block matched in plain text but we could not split (e.g. anchor
// lands inside an atom), fall through to the next block rather than failing.
}
return { doc: out, inserted: false };
}
/**
* In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
*
* Docmost translations use a callout that states the footnote range, e.g.
* "[1]…[5]". When the number of notes changes, this rewrites the trailing
* number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
* callout's text nodes to `[1]…[n]`. Operates on a clone; returns
* `{ doc, changed }` where `changed` is the number of text nodes rewritten.
*/
export function setCalloutRange(doc, n) {
const out = clone(doc);
let changed = 0;
// Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
walk(out, (node) => {
if (node.type === "callout") {
walk(node, (inner) => {
if (inner.type === "text" &&
typeof inner.text === "string" &&
rangeRe.test(inner.text)) {
rangeRe.lastIndex = 0;
inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
changed++;
}
rangeRe.lastIndex = 0;
});
}
});
return { doc: out, changed };
}
/**
* Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
* a base36 random string is sufficient here (uniqueness within one document).
*/
function freshId() {
return (Math.random().toString(36).slice(2, 12) +
Math.random().toString(36).slice(2, 6));
}
/**
* Wrap inline ProseMirror nodes in a list item:
* { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
* with a fresh random block id on the paragraph. The inline nodes are cloned so
* the result shares no references with the caller's input.
*/
export function noteItem(inlineNodes) {
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
return {
type: "listItem",
content: [
{
type: "paragraph",
attrs: { id: freshId() },
content,
},
],
};
}
/**
* Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
* { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
* (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
*/
export function footnoteDefinition(id, inlineNodes) {
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
return {
type: "footnoteDefinition",
attrs: { id },
content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
};
}
/**
* Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
* an inline content array with a real `footnoteReference` node, in reading
* order. `onMarker` is called for each replaced marker (with the original `[N]`
* number or the placeholder index) and returns the fresh footnote id to attach
* to the inserted node. Mutates `inline` in place.
*/
function replaceMarkersWithReferences(inline, onMarker) {
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
for (let i = 0; i < inline.length; i++) {
const n = inline[i];
if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
continue;
}
if (!re.test(n.text))
continue;
re.lastIndex = 0;
const marks = Array.isArray(n.marks) ? n.marks : [];
const parts = [];
let last = 0;
let m;
while ((m = re.exec(n.text)) !== null) {
if (m.index > last) {
parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
}
const oldNum = m[1] != null ? Number(m[1]) : undefined;
const phIdx = m[2] != null ? Number(m[2]) : undefined;
const fnId = onMarker({ oldNum, phIdx });
parts.push({ type: "footnoteReference", attrs: { id: fnId } });
last = m.index + m[0].length;
}
if (last < n.text.length) {
parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
}
// Drop any zero-length text runs the slicing may have produced.
const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
inline.splice(i, 1, ...cleaned);
i += cleaned.length - 1;
}
}
/**
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
* ProseMirror nodes.
*
* A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
* stripped first. Then a minimal bold-split is applied: a leading
* `**bold lead**` run becomes a text node with a bold mark, and the remainder
* becomes a plain text node. This keeps the conversion synchronous (the
* transform sandbox runs synchronously) and dependency-free; the existing
* async markdownToProseMirror is intentionally NOT used here.
*/
export function mdToInlineNodes(markdown) {
let md = typeof markdown === "string" ? markdown : "";
// Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
md = md.replace(/^\s*комментарий\s*:\s*/i, "");
md = md.replace(/^\s*\d+\.\s+/, "");
md = md.trim();
if (md === "")
return [];
const nodes = [];
// Leading bold lead: **...** at the very start.
const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
if (leadMatch) {
const leadText = leadMatch[1];
nodes.push({
type: "text",
text: leadText,
marks: [{ type: "bold" }],
});
const rest = md.slice(leadMatch[0].length);
if (rest.length > 0) {
// Preserve the separating space that followed the bold lead.
const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
const spacing = sep ? sep[1] : "";
nodes.push({ type: "text", text: spacing + rest });
}
return nodes;
}
// No bold lead: emit the whole thing as a single plain text node, with any
// remaining **bold** spans split out inline.
return splitInlineBold(md);
}
/**
* Split a string with inline `**bold**` spans into text nodes, bolding the
* spans. Used as the no-lead fallback in mdToInlineNodes.
*/
function splitInlineBold(text) {
const nodes = [];
const re = /\*\*([^*]+)\*\*/g;
let last = 0;
let m;
while ((m = re.exec(text)) !== null) {
if (m.index > last) {
nodes.push({ type: "text", text: text.slice(last, m.index) });
}
nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
last = m.index + m[0].length;
}
if (last < text.length) {
nodes.push({ type: "text", text: text.slice(last) });
}
return nodes.length > 0 ? nodes : [{ type: "text", text }];
}
/**
* Turn inline comments into numbered footnotes.
*
* For each inline comment that carries a `selection`:
* 1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
* sentinel) right after the selection text in the BODY (before the
* notes heading);
* 2. build a note list item from the comment's markdown content.
*
* Then RENUMBER every footnote marker in the body by reading order: existing
* `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
* sequential `[seq]`, and the notes orderedList is reordered so each note lines
* up with its marker's reading-order position. Finally the disclaimer callout
* range is synced to the new note count.
*
* Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
* were successfully anchored (their selection was found and a placeholder
* inserted). Operates on a clone of `doc`.
*/
export function commentsToFootnotes(doc, comments, opts = {}) {
let working = clone(doc);
const notesHeading = opts.notesHeading ?? "Примечания переводчика";
const top = Array.isArray(working.content) ? working.content : [];
const notesIdx = top.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
if (notesIdx < 0) {
throw new Error(`heading "${notesHeading}" not found`);
}
// The notes orderedList lives at or after the heading.
const notesList = top
.slice(notesIdx)
.find((n) => isObject(n) && n.type === "orderedList");
if (!notesList) {
throw new Error("notes orderedList not found");
}
const consumed = [];
const noteInlineByPh = new Map();
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
if (!c || !c.selection)
return;
// Collision-proof sentinel delimited by NUL control chars, which never occur
// in real Docmost prose - so the marker regex cannot mistake any body text
// (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
// transient: the placeholder is inserted here and replaced by a
// footnoteReference node below; it never persists in a returned document.
const ph = `\u0000FN${i}\u0000`;
// insertMarkerAfter returns a NEW cloned doc; reassign `working`.
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
beforeBlock: notesIdx,
});
if (!r.inserted)
return;
working = r.doc;
noteInlineByPh.set(ph, mdToInlineNodes(c.content));
consumed.push(c.id);
});
// Re-resolve references into the (possibly re-cloned) working doc.
const top2 = Array.isArray(working.content) ? working.content : [];
const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
if (!notesList2) {
throw new Error("notes orderedList not found");
}
// Inline content of each existing note (listItem -> paragraph -> inline).
const oldNoteInline = (Array.isArray(notesList2.content)
? notesList2.content
: []).map((item) => {
const para = isObject(item) && Array.isArray(item.content)
? item.content.find((c) => isObject(c) && c.type === "paragraph")
: null;
return para && Array.isArray(para.content) ? para.content : [];
});
// Walk the body in reading order, turning each "[N]" / placeholder marker into
// a real footnoteReference node and collecting its definition inline content.
const definitions = [];
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
// Recursively visit inline arrays inside a block (paragraph, heading, callout
// child paragraphs, table cells, ...), preserving document reading order.
const visitInlineArrays = (container) => {
if (!isObject(container) || !Array.isArray(container.content))
return;
const hasText = container.content.some((n) => isObject(n) && n.type === "text");
if (hasText) {
replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
const fnId = freshId();
if (oldNum != null) {
const inline = oldNoteInline[oldNum - 1];
// Every existing body marker MUST map to a real note. An out-of-range
// marker means the document is internally inconsistent; fail loudly.
if (inline === undefined) {
throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
}
definitions.push(footnoteDefinition(fnId, inline));
}
else {
const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
definitions.push(footnoteDefinition(fnId, inline));
}
return fnId;
});
}
else {
for (const child of container.content)
visitInlineArrays(child);
}
};
const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
for (let i = 0; i < notesBoundary; i++) {
// Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
// marker and is synced separately by setCalloutRange.
if (isObject(top2[i]) &&
top2[i].type === "callout" &&
disclaimerRangeRe.test(blockText(top2[i]))) {
continue;
}
visitInlineArrays(top2[i]);
}
// Replace the old orderedList with a real footnotesList of the collected
// definitions (reading order). If there are no definitions, drop the list.
if (definitions.length > 0) {
top2[oldListIndex] = {
type: "footnotesList",
content: definitions,
};
}
else {
top2.splice(oldListIndex, 1);
}
// Sync the disclaimer callout range to the new note count.
const synced = setCalloutRange(working, definitions.length);
return { doc: synced.doc, consumed };
}

View File

@@ -1,89 +0,0 @@
/**
* Pure tree-builder: turn a flat array of sidebar-style page nodes (as produced
* by `enumerateSpacePages`) into a nested tree.
*
* Input: a flat array of nodes. Each node is expected to carry at least
* { id, slugId, title, position, parentPageId } (extra fields are ignored).
*
* Output: an array of ROOT nodes, each shaped as
* { id, slugId, title, children? }
* where `children` is the array of child nodes (same shape, recursively). The
* `children` key is OMITTED entirely when a node has no children — consistent
* with how `filterPage` omits an empty `subpages` array — to keep the payload
* lean (nesting alone conveys the structure; parentPageId/position/hasChildren
* are intentionally dropped from the output).
*
* Linking rule: a node is attached as a child of `parentPageId` only when that
* parent id is actually present in the input. Otherwise — including a null /
* undefined `parentPageId`, or a parent that was capped out of the bounded walk
* — the node is promoted to a ROOT. So "orphan whose parent is missing" is the
* defined behavior: it surfaces at the top level rather than disappearing.
*
* Ordering rule: the roots array and every `children` array are sorted ascending
* by the node's `position` string. The comparator is a plain code-unit (byte)
* comparison — NOT localeCompare — because the server orders sidebar pages by
* `collate "C"` (byte order), which a raw `<`/`>` compare approximates for the
* fractional-index ASCII keys (e.g. "a0", "a1"). Nodes with a missing/undefined
* `position` sort last.
*
* Pure: no I/O, no network, deterministic.
*/
export function buildPageTree(nodes) {
// Map id -> output node. Build the lean output shape up front.
const byId = new Map();
// Preserve the original position string for sorting (kept off the output).
const positionById = new Map();
for (const node of nodes) {
if (!node || typeof node !== "object" || !node.id)
continue;
// Defensive against duplicate ids: last one wins (overwrites the earlier
// entry). `enumerateSpacePages` already dedups, so this is belt-and-braces.
byId.set(node.id, {
id: node.id,
slugId: node.slugId,
title: node.title,
});
positionById.set(node.id, node.position);
}
// Stable comparator on the position string: code-unit order, missing last.
const byPosition = (aId, bId) => {
const a = positionById.get(aId);
const b = positionById.get(bId);
if (a === undefined || a === null)
return b === undefined || b === null ? 0 : 1;
if (b === undefined || b === null)
return -1;
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
};
const roots = [];
const childrenIdsByParent = new Map();
for (const node of nodes) {
if (!node || typeof node !== "object" || !node.id)
continue;
const parentId = node.parentPageId;
// Child only when the parent is actually present in the input; otherwise
// (null/undefined parent, or parent capped out of the walk) -> root.
if (parentId && byId.has(parentId)) {
const list = childrenIdsByParent.get(parentId) ?? [];
list.push(node.id);
childrenIdsByParent.set(parentId, list);
}
else {
roots.push(node.id);
}
}
// Attach sorted children arrays to each parent, omitting empty ones.
for (const [parentId, childIds] of childrenIdsByParent) {
const parent = byId.get(parentId);
if (!parent)
continue;
childIds.sort(byPosition);
parent.children = childIds.map((id) => byId.get(id));
}
roots.sort(byPosition);
return roots.map((id) => byId.get(id));
}

View File

@@ -1,40 +0,0 @@
#!/usr/bin/env node
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { createDocmostMcpServer } from "./index.js";
// Standalone stdio entrypoint. This restores the original behavior of the
// package when run as a CLI (`docmost-mcp`): it reads credentials from the
// environment and serves the MCP protocol over stdin/stdout. The factory in
// index.ts stays side-effect-free; all the process/transport lifecycle lives
// here.
const API_URL = process.env.DOCMOST_API_URL;
const EMAIL = process.env.DOCMOST_EMAIL;
const PASSWORD = process.env.DOCMOST_PASSWORD;
if (!API_URL || !EMAIL || !PASSWORD) {
console.error("Error: DOCMOST_API_URL, DOCMOST_EMAIL, and DOCMOST_PASSWORD environment variables are required.");
process.exit(1);
}
async function run() {
// Global safety nets so a stray rejection/exception cannot silently kill
// the stdio server. Per-tool errors still flow through the SDK and are not
// affected by these handlers; these only catch errors raised OUTSIDE a tool
// call (e.g. a transient ws/collab socket "error" event). Such errors must
// NOT tear down the whole stdio server, so we log only and keep running.
// Genuine startup failures are still fatal via run().catch(...) below.
process.on("unhandledRejection", (reason) => {
console.error("Unhandled promise rejection:", reason);
});
process.on("uncaughtException", (error) => {
console.error("Uncaught exception:", error);
});
const server = createDocmostMcpServer({
apiUrl: API_URL,
email: EMAIL,
password: PASSWORD,
});
const transport = new StdioServerTransport();
await server.connect(transport);
}
run().catch((error) => {
console.error("Fatal error running server:", error);
process.exit(1);
});

View File

@@ -1,212 +0,0 @@
// Zod-agnostic shared tool-spec registry consumed by BOTH the zod-v3 MCP server
// (packages/mcp/src/index.ts) and the zod-v4 in-app AI-SDK service
// (apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts). Intentionally
// imports NO zod: each consumer passes its OWN zod namespace into buildShape,
// because the two packages are on different zod majors (v3 here, v4 in the
// server) and a zod schema object built with one major cannot be reused by the
// other. The builders below only touch z.string()/.min()/.optional()/.describe(),
// z.array() and z.object() — API identical across v3 and v4 — so a single
// builder works with either namespace.
//
// Only tools whose snake_case/camelCase name, input schema AND model-facing
// description are genuinely identical across both layers live here. Tools that
// diverge on purpose (security guardrails, tuned UX, "Reversible" framing on
// some write tools, different limits, hybrid-RRF search, etc.) stay defined
// per-layer and are NOT represented here.
export const SHARED_TOOL_SPECS = {
// --- no-argument read tools ---
getWorkspace: {
mcpName: 'get_workspace',
inAppKey: 'getWorkspace',
description: 'Fetch metadata about the current workspace (name, settings).',
},
listSpaces: {
mcpName: 'list_spaces',
inAppKey: 'listSpaces',
description: 'List the spaces the current user can access. Returns the array of ' +
'spaces (id, name, slug, ...).',
},
listShares: {
mcpName: 'list_shares',
inAppKey: 'listShares',
description: 'List all public shares in the workspace with page titles and public URLs.',
},
// --- single-pageId read tools ---
getPageJson: {
mcpName: 'get_page_json',
inAppKey: 'getPageJson',
description: 'Get page details with the raw ProseMirror JSON content (lossless: ' +
'includes block ids, callouts, tables, link/image attributes) plus the ' +
'slugId used in URLs. Use the block ids it returns to make precise ' +
'structural edits or surgical text edits without resending the page.',
buildShape: (z) => ({
pageId: z.string().min(1),
}),
},
getOutline: {
mcpName: 'get_outline',
inAppKey: 'getOutline',
description: "Return a COMPACT outline of a page's top-level blocks ({index, type, " +
'id, level, firstText}; tables add rows/cols/header; lists add item ' +
'count) WITHOUT the full document body. Use it to locate sections/tables ' +
'and grab block ids cheaply before fetching, patching or inserting ' +
'individual blocks.',
buildShape: (z) => ({
pageId: z.string().min(1),
}),
},
// --- two-id read tool ---
getNode: {
mcpName: 'get_node',
inAppKey: 'getNode',
description: "Fetch a single node's full ProseMirror subtree (lossless) without " +
'pulling the whole document. `nodeId` is a block id from the page ' +
'outline or page-JSON view (works for headings/paragraphs/callouts/images), OR ' +
'`#<index>` to fetch a top-level block by its outline index — use the ' +
'`#<index>` form for tables/rows/cells, which carry no id.',
buildShape: (z) => ({
pageId: z.string().min(1),
nodeId: z.string().min(1),
}),
},
// --- node delete ---
deleteNode: {
mcpName: 'delete_node',
inAppKey: 'deleteNode',
description: 'Remove a single block by its attrs.id (from the page-JSON view) WITHOUT ' +
'resending the whole document.',
buildShape: (z) => ({
pageId: z.string().min(1),
nodeId: z.string().min(1),
}),
},
// --- share management ---
unsharePage: {
mcpName: 'unshare_page',
inAppKey: 'unsharePage',
description: 'Remove the public share of a page (revokes the public URL).',
buildShape: (z) => ({
pageId: z.string().min(1).describe('ID of the page to unshare'),
}),
},
// --- version history ---
diffPageVersions: {
mcpName: 'diff_page_versions',
inAppKey: 'diffPageVersions',
description: 'Diff two versions of a page and return a Docmost-equivalent change set ' +
'(inserted/deleted text, integrity counts for images/links/tables/' +
'callouts/footnote markers, and a human-readable markdown summary). ' +
"`from`/`to` each accept a historyId, or null/'current' for the page's " +
'current content (defaults: from=current, to=current — pass a historyId ' +
'from the page-history list to compare against the live page).',
buildShape: (z) => ({
pageId: z.string().min(1),
from: z
.string()
.optional()
.describe("historyId, or 'current'/omit for current content"),
to: z
.string()
.optional()
.describe("historyId, or 'current'/omit for current content"),
}),
},
listPageHistory: {
mcpName: 'list_page_history',
inAppKey: 'listPageHistory',
description: "List a page's saved versions (Docmost auto-snapshots on every save), " +
'newest first, cursor-paginated. Returns { items, nextCursor }; each ' +
"item's id is the historyId to pass to the page diff or restore tools.",
buildShape: (z) => ({
pageId: z.string().min(1),
cursor: z
.string()
.optional()
.describe('Pagination cursor from a previous nextCursor'),
}),
},
restorePageVersion: {
mcpName: 'restore_page_version',
inAppKey: 'restorePageVersion',
description: 'Restore a page to a saved version: writes that version\'s content back ' +
'as the page\'s current content (Docmost has no restore endpoint, so ' +
'this creates a NEW history snapshot — the restore is itself revertible). ' +
'Get the historyId from the page-history list.',
buildShape: (z) => ({
historyId: z.string().min(1),
}),
},
// --- markdown round-trip ---
importPageMarkdown: {
mcpName: 'import_page_markdown',
inAppKey: 'importPageMarkdown',
description: "Replace a page's content from a self-contained Docmost-flavoured " +
'Markdown file produced by the page-Markdown export tool. Restores comment ' +
'highlight anchors and diagrams from their inline HTML. NOTE: comment ' +
'thread records are NOT created/updated/deleted on the server by this ' +
'tool — only the page body + inline comment marks are written; manage ' +
'comment threads via the comment tools/UI.',
buildShape: (z) => ({
pageId: z.string().min(1),
markdown: z.string().min(1),
}),
},
// --- server-side content copy ---
copyPageContent: {
mcpName: 'copy_page_content',
inAppKey: 'copyPageContent',
description: "Replace targetPageId's content with a copy of sourcePageId's content, " +
'entirely server-side — the document is NOT sent through the model. The ' +
'target keeps its own title and slug; only its body is replaced. Ideal ' +
"for 'make page A's content equal to B' or 'replace A with B but keep A's URL'.",
buildShape: (z) => ({
sourcePageId: z.string().min(1).describe('Page to copy content FROM'),
targetPageId: z
.string()
.min(1)
.describe('Page whose content is REPLACED (title/slug kept)'),
}),
},
// --- surgical text edit (folds in the documented drift-bug fix) ---
//
// CANONICAL description is the CORRECTED in-app wording: a formatting-only
// change is REFUSED into failed[] (not silently stripped-and-retried). The
// stale MCP claim that "Markdown wrappers are tolerated via a strip-and-retry
// fallback" is intentionally absent here.
editPageText: {
mcpName: 'edit_page_text',
inAppKey: 'editPageText',
description: "Surgical find/replace inside a page's text, preserving all block " +
'ids and marks. A find MAY cross bold/italic/link boundaries; the ' +
'replacement inherits marks from the unchanged common prefix/suffix ' +
'(so editing plain text next to a bold word keeps it bold, and ' +
'editing inside a bold word keeps the new text bold). Each find must ' +
'match exactly once unless replaceAll is set. The batch applies what ' +
'it can and returns applied[] + failed[] plus a verify change-report ' +
'(the text/marks/structure that ACTUALLY changed — read it to confirm ' +
'your edit landed; do not assume success); a fully-unmatched batch ' +
'writes nothing and errors. find and replace are LITERAL text, not ' +
'markdown. This tool edits plain text ONLY and CANNOT add or remove ' +
'formatting marks: a formatting change — find/replace that differ only ' +
'in markdown markers (e.g. find:"~~x~~", replace:"x"), or a replace ' +
'containing **bold**/~~strike~~/`code` wrappers — is REFUSED into ' +
'failed[]. To change bold/italic/strike/code/link, read the block as ' +
'page JSON and use a structural node patch/update to set its marks. ' +
'Examples: edits:[{find:"teh",replace:"the"}]; edits:[{find:"Hello ' +
'world",replace:"Hello there"}] (crosses a bold boundary).',
buildShape: (z) => ({
pageId: z.string().describe('ID of the page to edit'),
edits: z
.array(z.object({
find: z.string().describe('Exact text to find'),
replace: z.string().describe('Replacement text (may be empty)'),
replaceAll: z
.boolean()
.optional()
.describe('Replace every occurrence (default: must match once)'),
}))
.min(1)
.describe('List of find/replace operations, applied in order'),
}),
},
};

View File

@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../marked/bin/marked.js" "$@"
else
exec node "$basedir/../marked/bin/marked.js" "$@"
fi

17
packages/mcp/node_modules/.bin/tsc generated vendored
View File

@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../typescript/bin/tsc" "$@"
else
exec node "$basedir/../typescript/bin/tsc" "$@"
fi

View File

@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../typescript/bin/tsserver" "$@"
else
exec node "$basedir/../typescript/bin/tsserver" "$@"
fi

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@fellow+prosemirror-recreate-transform@1.2.3/node_modules/@fellow/prosemirror-recreate-transform

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@hocuspocus+provider@3.4.4_y-protocols@1.0.6_yjs@13.6.30_patch_hash=1ceeb66dba1f86545c9_bc01a253a9579de2451e72d099c2c9d7/node_modules/@hocuspocus/provider

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@hocuspocus+transformer@3.4.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4__3efc11776a1877aaec07b26dc33505b1/node_modules/@hocuspocus/transformer

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@modelcontextprotocol+sdk@1.29.0_@cfworker+json-schema@4.1.1_zod@3.25.76/node_modules/@modelcontextprotocol/sdk

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-highlight@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-highlight

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-image@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-image

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-link@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-link

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-subscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-subscript

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-superscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-superscript

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-task-item@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_f120fce1a3d9fc85461b67496f03c362/node_modules/@tiptap/extension-task-item

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-task-list@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_c94f69f56aee3556ec680ab7491aa1d4/node_modules/@tiptap/extension-task-list

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+html@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4_happy-dom@20.8.9/node_modules/@tiptap/html

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+starter-kit@3.20.4/node_modules/@tiptap/starter-kit

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@types+form-data@2.5.2/node_modules/@types/form-data

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@types+jsdom@27.0.0/node_modules/@types/jsdom

View File

@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@types+node@20.19.43/node_modules/@types/node

1
packages/mcp/node_modules/axios generated vendored
View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/axios@1.16.0/node_modules/axios

View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/form-data@4.0.5/node_modules/form-data

1
packages/mcp/node_modules/jsdom generated vendored
View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/jsdom@27.4.0_@noble+hashes@2.0.1/node_modules/jsdom

1
packages/mcp/node_modules/marked generated vendored
View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/marked@17.0.5/node_modules/marked

View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/typescript@5.9.3/node_modules/typescript

1
packages/mcp/node_modules/ws generated vendored
View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/ws@8.20.1/node_modules/ws

1
packages/mcp/node_modules/yjs generated vendored
View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/yjs@13.6.30_patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810/node_modules/yjs

1
packages/mcp/node_modules/zod generated vendored
View File

@@ -1 +0,0 @@
../../../node_modules/.pnpm/zod@3.25.76/node_modules/zod