chore(mcp): stop committing build/ and node_modules; build in CI/Docker
Same hygiene fix as git-sync (review #2), applied to packages/mcp which had the identical pre-existing problem: committed build/ (20 files) + node_modules (28, pnpm symlinks with a baked /home/claude store path). - git rm --cached packages/mcp/{build,node_modules}. - .gitignore: add packages/mcp/build/ (packages/*/node_modules/ already covers it). - Build where consumed: apps/server `pretest` and the CI Test workflow now build @docmost/mcp too. The Dockerfile builder already runs `pnpm build` (nx builds mcp) and already COPYs packages/mcp/build into the runtime image. Verified: wiped build/, rebuilt via `pnpm --filter @docmost/mcp build`; the mcp server suites (96 tests) pass against the freshly-built, non-committed output. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
12
.github/workflows/test.yml
vendored
12
.github/workflows/test.yml
vendored
@@ -36,12 +36,12 @@ jobs:
|
||||
- name: Build editor-ext
|
||||
run: pnpm --filter @docmost/editor-ext build
|
||||
|
||||
# git-sync is no longer committed in built form (build/ is gitignored), so
|
||||
# CI must compile it: the server suite imports the package via its built
|
||||
# build/index.js. The server pretest also builds it, but building here keeps
|
||||
# it explicit and independent of pnpm lifecycle ordering.
|
||||
- name: Build git-sync
|
||||
run: pnpm --filter @docmost/git-sync build
|
||||
# git-sync and mcp are no longer committed in built form (build/ is
|
||||
# gitignored), so CI must compile them: the server resolves both via their
|
||||
# built build/index.js. The server pretest also builds them, but building
|
||||
# here keeps it explicit and independent of pnpm lifecycle ordering.
|
||||
- name: Build git-sync and mcp
|
||||
run: pnpm --filter @docmost/git-sync build && pnpm --filter @docmost/mcp build
|
||||
|
||||
- name: Run tests
|
||||
run: pnpm -r test
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@ data
|
||||
# via `pnpm build`, never committed, so src/ and prod can never silently diverge).
|
||||
packages/*/node_modules/
|
||||
packages/git-sync/build/
|
||||
packages/mcp/build/
|
||||
|
||||
# Logs
|
||||
logs
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"migration:reset": "tsx src/database/migrate.ts down-to NO_MIGRATIONS",
|
||||
"migration:codegen": "kysely-codegen --dialect=postgres --camel-case --env-file=../../.env --out-file=./src/database/types/db.d.ts",
|
||||
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix",
|
||||
"pretest": "pnpm --filter @docmost/editor-ext build && pnpm --filter @docmost/git-sync build",
|
||||
"pretest": "pnpm --filter @docmost/editor-ext build && pnpm --filter @docmost/git-sync build && pnpm --filter @docmost/mcp build",
|
||||
"test": "jest",
|
||||
"test:int": "jest --config test/jest-integration.json",
|
||||
"test:watch": "jest --watch",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,133 +0,0 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
||||
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
|
||||
import { createDocmostMcpServer } from "./index.js";
|
||||
/**
|
||||
* Build a stateful Streamable-HTTP handler for the Docmost MCP server. The
|
||||
* embedding host (the gitmost NestJS server) bridges its raw Node req/res into
|
||||
* `handleRequest`. One McpServer + transport is created per MCP session and
|
||||
* kept alive between requests, keyed by the `mcp-session-id` header.
|
||||
*
|
||||
* `config` is EITHER a static `DocmostMcpConfig` (back-compat: stdio + the env
|
||||
* service account, unchanged) OR a `McpConfigResolver` run once per session at
|
||||
* `initialize` to bind that session to the request's identity.
|
||||
*/
|
||||
export function createMcpHttpHandler(config, options = {}) {
|
||||
// One transport (and one McpServer) per MCP session, keyed by session id.
|
||||
const transports = {};
|
||||
// Last activity timestamp per session id, used for idle eviction.
|
||||
const lastSeen = {};
|
||||
// Anti-session-fixation: the opaque identity key bound to each session at
|
||||
// initialize. A later request for that session whose key differs is rejected.
|
||||
const sessionIdentity = {};
|
||||
// Write a JSON-RPC error and end the response. Used for the 400/401 paths so
|
||||
// every early rejection is a well-formed JSON-RPC error, not a torn response.
|
||||
const sendJsonRpcError = (res, statusCode, code, message) => {
|
||||
res.statusCode = statusCode;
|
||||
res.setHeader("Content-Type", "application/json");
|
||||
res.end(JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
error: { code, message },
|
||||
id: null,
|
||||
}));
|
||||
};
|
||||
// Idle session TTL (ms): a session with no activity for this long is evicted.
|
||||
// Defaults to 30 min; overridable via MCP_SESSION_IDLE_MS.
|
||||
const idleTtlMs = (() => {
|
||||
const parsed = parseInt(process.env.MCP_SESSION_IDLE_MS ?? "", 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : 30 * 60 * 1000;
|
||||
})();
|
||||
// Periodically close transports idle longer than the TTL. transport.close()
|
||||
// triggers its onclose, which removes it from `transports`; we also drop the
|
||||
// lastSeen entry. unref() so this timer never keeps the process alive.
|
||||
const sweepIntervalMs = 5 * 60 * 1000;
|
||||
const sweepTimer = setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const sid of Object.keys(transports)) {
|
||||
if (now - (lastSeen[sid] ?? 0) > idleTtlMs) {
|
||||
void transports[sid].close();
|
||||
delete lastSeen[sid];
|
||||
delete sessionIdentity[sid];
|
||||
}
|
||||
}
|
||||
}, sweepIntervalMs);
|
||||
sweepTimer.unref();
|
||||
async function handleRequest(req, res, parsedBody) {
|
||||
const sessionId = req.headers["mcp-session-id"];
|
||||
const method = (req.method || "GET").toUpperCase();
|
||||
let transport = sessionId ? transports[sessionId] : undefined;
|
||||
if (method === "POST" && !transport) {
|
||||
// A new session may only be created by an initialize request without a
|
||||
// session id.
|
||||
if (sessionId || !isInitializeRequest(parsedBody)) {
|
||||
sendJsonRpcError(res, 400, -32000, "Bad Request: no valid session ID provided");
|
||||
return;
|
||||
}
|
||||
// Resolve the per-session config from the request (per-user identity) when
|
||||
// a resolver was supplied; otherwise use the static config unchanged. The
|
||||
// resolver may throw (e.g. bad credentials) — surface a clean 401, never
|
||||
// a created session.
|
||||
let sessionConfig;
|
||||
let identity;
|
||||
try {
|
||||
sessionConfig =
|
||||
typeof config === "function" ? await config(req) : config;
|
||||
if (options.identify)
|
||||
identity = await options.identify(req);
|
||||
}
|
||||
catch (err) {
|
||||
sendJsonRpcError(res, 401, -32001, err instanceof Error ? err.message : "Unauthorized");
|
||||
return;
|
||||
}
|
||||
transport = new StreamableHTTPServerTransport({
|
||||
sessionIdGenerator: () => randomUUID(),
|
||||
onsessioninitialized: (sid) => {
|
||||
transports[sid] = transport;
|
||||
lastSeen[sid] = Date.now();
|
||||
// Bind the resolved identity to the new session id for anti-fixation.
|
||||
if (identity !== undefined)
|
||||
sessionIdentity[sid] = identity;
|
||||
},
|
||||
});
|
||||
transport.onclose = () => {
|
||||
const sid = transport.sessionId;
|
||||
if (sid && transports[sid])
|
||||
delete transports[sid];
|
||||
if (sid)
|
||||
delete sessionIdentity[sid];
|
||||
};
|
||||
const server = createDocmostMcpServer(sessionConfig);
|
||||
await server.connect(transport);
|
||||
await transport.handleRequest(req, res, parsedBody);
|
||||
return;
|
||||
}
|
||||
if (!transport) {
|
||||
sendJsonRpcError(res, 400, -32000, "Bad Request: no valid session ID provided");
|
||||
return;
|
||||
}
|
||||
// Anti-session-fixation: a request reusing an existing session id must
|
||||
// present credentials/token that resolve to the SAME identity bound at
|
||||
// initialize, otherwise reject with 401. This prevents hijacking another
|
||||
// user's established session by replaying its session id with different
|
||||
// credentials.
|
||||
if (options.identify && sessionId && sessionId in sessionIdentity) {
|
||||
let presented;
|
||||
try {
|
||||
presented = await options.identify(req);
|
||||
}
|
||||
catch (err) {
|
||||
sendJsonRpcError(res, 401, -32001, err instanceof Error ? err.message : "Unauthorized");
|
||||
return;
|
||||
}
|
||||
if (presented !== sessionIdentity[sessionId]) {
|
||||
sendJsonRpcError(res, 401, -32001, "Credentials do not match the user that owns this MCP session.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Routing to an existing transport: refresh its idle timestamp.
|
||||
if (sessionId)
|
||||
lastSeen[sessionId] = Date.now();
|
||||
await transport.handleRequest(req, res, parsedBody);
|
||||
}
|
||||
return { handleRequest };
|
||||
}
|
||||
@@ -1,691 +0,0 @@
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { z } from "zod";
|
||||
import { readFileSync } from "fs";
|
||||
import { fileURLToPath } from "url";
|
||||
import { dirname, join } from "path";
|
||||
import { DocmostClient } from "./client.js";
|
||||
import { parseNodeArg } from "./lib/parse-node-arg.js";
|
||||
import { SHARED_TOOL_SPECS } from "./tool-specs.js";
|
||||
// Re-export the client and its config type so embedding hosts (e.g. the gitmost
|
||||
// NestJS server) can `import('@docmost/mcp')` and construct a DocmostClient
|
||||
// directly — for the credentials variant OR the per-user getToken variant.
|
||||
export { DocmostClient } from "./client.js";
|
||||
// Re-export the zod-agnostic shared tool-spec registry so the in-app AI-SDK
|
||||
// service can read it off the loaded module (it cannot import the ESM package's
|
||||
// internals directly; it goes through loadDocmostMcp()).
|
||||
export { SHARED_TOOL_SPECS } from "./tool-specs.js";
|
||||
// Read version from package.json
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const packageJson = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8"));
|
||||
const VERSION = packageJson.version;
|
||||
// Configuration for an MCP server instance is the DocmostMcpConfig union
|
||||
// (credentials OR getToken) defined and re-exported above. The factory below is
|
||||
// fully side-effect-free on import: it reads no environment variables and opens
|
||||
// no transport. The standalone stdio entrypoint (stdio.ts) and the HTTP handler
|
||||
// (http.ts) supply this config and own the process/transport lifecycle.
|
||||
// --- Modern McpServer Implementation ---
|
||||
// Editing guide surfaced to MCP clients in the initialize result so they can
|
||||
// pick the right tool by intent and avoid resending whole documents.
|
||||
const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (add an image from a web URL) / replace_image (swap an existing image for one from a web URL). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (always inline; requires an EXACT selection — the contiguous text to anchor/highlight on; fails rather than leaving an unanchored comment), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " +
|
||||
"Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " +
|
||||
"Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " +
|
||||
"Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
|
||||
// Helper to format JSON responses
|
||||
const jsonContent = (data) => ({
|
||||
content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
|
||||
});
|
||||
/**
|
||||
* Create a fully configured Docmost MCP server. Side-effect-free: it does not
|
||||
* read environment variables and does not connect any transport — the caller
|
||||
* decides how to expose it (stdio or HTTP). The client talks to Docmost over
|
||||
* REST + the collaboration WebSocket using the provided service-account
|
||||
* credentials and auto-re-authenticates.
|
||||
*/
|
||||
export function createDocmostMcpServer(config) {
|
||||
// Pass the whole config union through: the client branches internally on
|
||||
// credentials vs. getToken, so both the external /mcp (creds) and the
|
||||
// internal per-user (getToken) paths are wired here unchanged.
|
||||
const docmostClient = new DocmostClient(config);
|
||||
const server = new McpServer({
|
||||
name: "docmost-mcp",
|
||||
version: VERSION,
|
||||
}, { instructions: SERVER_INSTRUCTIONS });
|
||||
// Register a tool from the shared, zod-agnostic spec registry. The spec owns
|
||||
// the canonical name + model-facing description + (optional) schema builder;
|
||||
// only the execute body is supplied per call. buildShape is invoked with THIS
|
||||
// package's zod (v3); the in-app layer passes its own zod (v4).
|
||||
//
|
||||
// The spec's schema builder returns a plain ZodRawShape (Record<string,
|
||||
// unknown> in the shared module since it must stay zod-agnostic), so the
|
||||
// McpServer.registerTool overloads cannot infer the execute arg's shape from
|
||||
// it. We type `execute` loosely and cast the call through `any`; runtime
|
||||
// behaviour is unchanged — each execute body destructures the same fields the
|
||||
// builder declares.
|
||||
const registerShared = (spec, execute) => server.registerTool(spec.mcpName, spec.buildShape
|
||||
? { description: spec.description, inputSchema: spec.buildShape(z) }
|
||||
: { description: spec.description }, execute);
|
||||
// Tool: get_workspace
|
||||
registerShared(SHARED_TOOL_SPECS.getWorkspace, async () => {
|
||||
const workspace = await docmostClient.getWorkspace();
|
||||
return jsonContent(workspace);
|
||||
});
|
||||
// Tool: list_spaces
|
||||
registerShared(SHARED_TOOL_SPECS.listSpaces, async () => {
|
||||
const spaces = await docmostClient.getSpaces();
|
||||
return jsonContent(spaces);
|
||||
});
|
||||
// Tool: list_pages
|
||||
server.registerTool("list_pages", {
|
||||
description: "List most recent pages in a space ordered by updatedAt (descending). " +
|
||||
"Returns a bounded list (default 50, max 100) — use search for lookups " +
|
||||
"in large spaces. Pass tree:true (with spaceId) to instead get the " +
|
||||
"space's full page hierarchy as a nested tree.",
|
||||
inputSchema: {
|
||||
spaceId: z.string().optional(),
|
||||
limit: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(100)
|
||||
.optional()
|
||||
.describe("Max pages to return (default 50, max 100)"),
|
||||
tree: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe("When true, return the space's full page hierarchy as a nested tree (each node has a children array) instead of the recent-by-updatedAt flat list. Requires spaceId; ignores limit."),
|
||||
},
|
||||
}, async ({ spaceId, limit, tree }) => {
|
||||
const result = await docmostClient.listPages(spaceId, limit ?? 50, tree ?? false);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: get_page
|
||||
server.registerTool("get_page", {
|
||||
description: "Get page details with content converted to Markdown. The conversion is " +
|
||||
"LOSSY (block ids, exact table/callout structure are approximated); for a " +
|
||||
"lossless representation use get_page_json.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
const page = await docmostClient.getPage(pageId);
|
||||
return jsonContent(page);
|
||||
});
|
||||
// Tool: get_page_json
|
||||
registerShared(SHARED_TOOL_SPECS.getPageJson, async ({ pageId }) => {
|
||||
const page = await docmostClient.getPageJson(pageId);
|
||||
return jsonContent(page);
|
||||
});
|
||||
// Tool: get_outline
|
||||
registerShared(SHARED_TOOL_SPECS.getOutline, async ({ pageId }) => {
|
||||
const result = await docmostClient.getOutline(pageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: get_node
|
||||
registerShared(SHARED_TOOL_SPECS.getNode, async ({ pageId, nodeId }) => {
|
||||
const result = await docmostClient.getNode(pageId, nodeId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_get
|
||||
server.registerTool("table_get", {
|
||||
description: "Read a table as a matrix. Returns {rows, cols, cells (text[][]), " +
|
||||
"cellIds (paragraph id per cell, or null)}. `table` = `#<index>` from " +
|
||||
"get_outline, or any block id inside the table. Use cellIds with " +
|
||||
"patch_node for rich-formatted cell edits. `cols` is the FIRST row's " +
|
||||
"width; ragged tables may vary per row, so use the per-row length of " +
|
||||
"`cells` for each row.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId, table }) => {
|
||||
const result = await docmostClient.getTable(pageId, table);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_insert_row
|
||||
server.registerTool("table_insert_row", {
|
||||
description: "Insert a row of plain-text cells into a table. `table` = `#<index>` or " +
|
||||
"a block id inside it. `cells` = text per column (padded to the table's " +
|
||||
"column count; error if more cells than columns). `index` = 0-based " +
|
||||
"insert position (0 inserts before the header); omit to append at the end.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
cells: z.array(z.string()),
|
||||
index: z.number().int().optional(),
|
||||
},
|
||||
}, async ({ pageId, table, cells, index }) => {
|
||||
const result = await docmostClient.tableInsertRow(pageId, table, cells, index);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_delete_row
|
||||
server.registerTool("table_delete_row", {
|
||||
description: "Delete the row at 0-based `index` from a table (`table` = `#<index>` or " +
|
||||
"a block id inside it). Refuses to delete the table's only row. An " +
|
||||
"out-of-range `index` throws. Deleting `index` 0 removes the header row, " +
|
||||
"and the next row becomes the new header.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
index: z.number().int(),
|
||||
},
|
||||
}, async ({ pageId, table, index }) => {
|
||||
const result = await docmostClient.tableDeleteRow(pageId, table, index);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_update_cell
|
||||
server.registerTool("table_update_cell", {
|
||||
description: "Set the plain-text content of cell [row,col] (0-based) in a table " +
|
||||
"(`table` = `#<index>` or a block id inside it). Replaces the cell's " +
|
||||
"content with a single text paragraph; for rich formatting use patch_node " +
|
||||
"on the cell's paragraph id from table_get.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
row: z.number().int(),
|
||||
col: z.number().int(),
|
||||
text: z.string(),
|
||||
},
|
||||
}, async ({ pageId, table, row, col, text }) => {
|
||||
const result = await docmostClient.tableUpdateCell(pageId, table, row, col, text);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: create_page
|
||||
server.registerTool("create_page", {
|
||||
description: "Create a new page with content (automatically moves it to the correct hierarchy).",
|
||||
inputSchema: {
|
||||
title: z.string().min(1).describe("Title of the page"),
|
||||
content: z.string().min(1).describe("Markdown content"),
|
||||
spaceId: z.string().min(1),
|
||||
parentPageId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional parent page ID to nest under"),
|
||||
},
|
||||
}, async ({ title, content, spaceId, parentPageId }) => {
|
||||
const result = await docmostClient.createPage(title, content, spaceId, parentPageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: update_page_json
|
||||
server.registerTool("update_page_json", {
|
||||
description: "Replace a page's content with a raw ProseMirror JSON document " +
|
||||
"(lossless write: preserves the block ids, callouts, tables and " +
|
||||
"attributes you pass in). Typical flow: get_page_json -> modify the " +
|
||||
"JSON -> update_page_json. Keep existing node ids intact so heading " +
|
||||
"anchors and history stay stable. Minimal full-doc example: " +
|
||||
'{"type":"doc","content":[{"type":"paragraph","content":' +
|
||||
'[{"type":"text","text":"Hi"}]}]}. `content` may be a JSON object or a ' +
|
||||
"JSON string (both accepted), and is OPTIONAL: omit it to update only " +
|
||||
"the title (though prefer rename_page for a title-only change). " +
|
||||
"Supplying neither content nor title is an error.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1).describe("ID of the page to update"),
|
||||
content: z
|
||||
.any()
|
||||
.optional()
|
||||
.describe('ProseMirror document {"type":"doc","content":[...]} (JSON object or ' +
|
||||
"JSON string). Omit to rename only."),
|
||||
title: z.string().optional().describe("Optional new title"),
|
||||
},
|
||||
}, async ({ pageId, content, title }) => {
|
||||
// Only parse/validate the document when it was actually supplied; when it
|
||||
// is omitted, pass it straight through so the client performs a title-only
|
||||
// (or no-op) update.
|
||||
let doc;
|
||||
if (content === undefined || content === null) {
|
||||
doc = undefined;
|
||||
}
|
||||
else {
|
||||
// String -> JSON.parse (throwing on invalid); object passes through.
|
||||
doc = parseNodeArg(content, "content was a string but not valid JSON");
|
||||
}
|
||||
const result = await docmostClient.updatePageJson(pageId, doc, title);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: export_page_markdown
|
||||
server.registerTool("export_page_markdown", {
|
||||
description: "Export a page to a single self-contained, lossless Docmost-flavoured " +
|
||||
"Markdown file (custom extensions): YAML-free meta header, body with " +
|
||||
"inline comment anchors and diagrams, and a trailing comments-thread " +
|
||||
"block. Designed for a download -> edit body -> import_page_markdown " +
|
||||
"round-trip that preserves everything, including comment highlights. " +
|
||||
"Comment THREADS are preserved in the file but are not re-pushed to the " +
|
||||
"server on import.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
const md = await docmostClient.exportPageMarkdown(pageId);
|
||||
return { content: [{ type: "text", text: md }] };
|
||||
});
|
||||
// Tool: import_page_markdown
|
||||
registerShared(SHARED_TOOL_SPECS.importPageMarkdown, async ({ pageId, markdown }) => {
|
||||
const res = await docmostClient.importPageMarkdown(pageId, markdown);
|
||||
return jsonContent(res);
|
||||
});
|
||||
// Tool: copy_page_content
|
||||
registerShared(SHARED_TOOL_SPECS.copyPageContent, async ({ sourcePageId, targetPageId }) => {
|
||||
const result = await docmostClient.copyPageContent(sourcePageId, targetPageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: rename_page
|
||||
server.registerTool("rename_page", {
|
||||
description: "Rename a page (change its title only) without touching or resending " +
|
||||
"its content.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1).describe("ID of the page to rename"),
|
||||
title: z.string().min(1).describe("New title"),
|
||||
},
|
||||
}, async ({ pageId, title }) => {
|
||||
const result = await docmostClient.renamePage(pageId, title);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: edit_page_text
|
||||
registerShared(SHARED_TOOL_SPECS.editPageText, async ({ pageId, edits }) => {
|
||||
const result = await docmostClient.editPageText(pageId, edits);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: patch_node
|
||||
server.registerTool("patch_node", {
|
||||
description: "Replaces a single block identified by its attrs.id WITHOUT resending the " +
|
||||
"whole document. Get the block id from get_page_json, then pass a " +
|
||||
"ProseMirror node to put in its place. Example node: a paragraph " +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
|
||||
'heading {"type":"heading","attrs":{"level":2},"content":' +
|
||||
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
|
||||
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
|
||||
"JSON object or a JSON string (both accepted). Cheaper and safer than " +
|
||||
"update_page_json for one-block structural edits.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
node: z
|
||||
.any()
|
||||
.describe("ProseMirror node to put in place of the node with this id, e.g. " +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' +
|
||||
"JSON object or JSON string both accepted."),
|
||||
},
|
||||
}, async ({ pageId, nodeId, node }) => {
|
||||
const parsedNode = parseNodeArg(node);
|
||||
const result = await docmostClient.patchNode(pageId, nodeId, parsedNode);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: insert_node
|
||||
server.registerTool("insert_node", {
|
||||
description: "Insert a block before/after another block (by attrs.id or anchor text) " +
|
||||
"or append at the end. Get anchor block ids from get_page_json. Avoids " +
|
||||
"resending the whole document. Can also insert table structure: to add a " +
|
||||
"tableRow, pass a tableRow node with position before/after and anchor " +
|
||||
"INSIDE the target table — anchorNodeId of any block/cell in it, or " +
|
||||
"anchorText matching the table; to add a tableCell/tableHeader, use " +
|
||||
"anchorNodeId of a block inside the target row (anchorText only resolves " +
|
||||
"top-level blocks, so it cannot target a row). `anchorText` is matched " +
|
||||
"against the block's literal rendered plain text (no markdown); " +
|
||||
"markdown/emoji are tolerated as a fallback; prefer plain text or " +
|
||||
"anchorNodeId. Note: append is top-level " +
|
||||
"only and rejects structural table nodes. Example node: a paragraph " +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
|
||||
'heading {"type":"heading","attrs":{"level":2},"content":' +
|
||||
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
|
||||
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
|
||||
"JSON object or a JSON string (both accepted).",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
node: z
|
||||
.any()
|
||||
.describe("ProseMirror node to insert, e.g. " +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' +
|
||||
"JSON object or JSON string both accepted."),
|
||||
position: z.enum(["before", "after", "append"]),
|
||||
anchorNodeId: z.string().optional(),
|
||||
anchorText: z.string().optional(),
|
||||
},
|
||||
}, async ({ pageId, node, position, anchorNodeId, anchorText }) => {
|
||||
const parsedNode = parseNodeArg(node);
|
||||
const result = await docmostClient.insertNode(pageId, parsedNode, {
|
||||
position,
|
||||
anchorNodeId,
|
||||
anchorText,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: delete_node
|
||||
registerShared(SHARED_TOOL_SPECS.deleteNode, async ({ pageId, nodeId }) => {
|
||||
const result = await docmostClient.deleteNode(pageId, nodeId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: insert_image
|
||||
server.registerTool("insert_image", {
|
||||
description: "Download an image from a web (http/https) URL and insert it into " +
|
||||
"a page in one step. By default " +
|
||||
"appends the image at the end of the page. With replaceText, replaces the " +
|
||||
"first top-level block whose text contains that string (handy for " +
|
||||
'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
|
||||
"With afterText, inserts the image right after the first block containing " +
|
||||
"that string. Preserves all other block ids.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("http(s) URL of the image to download and upload"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
replaceText: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Replace the first top-level block whose text contains this string with the image"),
|
||||
afterText: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Insert the image right after the first top-level block whose text contains this string"),
|
||||
},
|
||||
}, async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
|
||||
const result = await docmostClient.insertImage(pageId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
replaceText,
|
||||
afterText,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: replace_image
|
||||
server.registerTool("replace_image", {
|
||||
description: "Replace an existing image on a page with a new image fetched from a web " +
|
||||
"(http/https) URL: uploads the new file as a NEW " +
|
||||
"attachment (fresh clean URL that renders and busts browser caches), then " +
|
||||
"repoints every image node referencing the old attachmentId (recursively, " +
|
||||
"incl. callouts/tables) via the live document, preserving comments, " +
|
||||
"alignment and alt. The old attachment is left as an unreferenced orphan " +
|
||||
"(Docmost has no API to delete a single attachment; it is removed only when " +
|
||||
"the page/space is deleted). In-place byte overwrite is avoided because some " +
|
||||
"Docmost versions corrupt the attachment (HTTP 500) on overwrite.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
attachmentId: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("attachmentId of the image currently in the page to replace"),
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("http(s) URL of the new image to download"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
},
|
||||
}, async ({ pageId, attachmentId, imageUrl, align, alt }) => {
|
||||
const result = await docmostClient.replaceImage(pageId, attachmentId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: share_page
|
||||
server.registerTool("share_page", {
|
||||
description: "Make a page publicly accessible (idempotent) and return its public " +
|
||||
"URL. The URL format is <app>/share/<key>/p/<slugId>.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1).describe("ID of the page to share"),
|
||||
searchIndexing: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe("Allow search engines to index the page (default true)"),
|
||||
},
|
||||
}, async ({ pageId, searchIndexing }) => {
|
||||
const result = await docmostClient.sharePage(pageId, searchIndexing ?? true);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: unshare_page
|
||||
registerShared(SHARED_TOOL_SPECS.unsharePage, async ({ pageId }) => {
|
||||
const result = await docmostClient.unsharePage(pageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: list_shares
|
||||
registerShared(SHARED_TOOL_SPECS.listShares, async () => {
|
||||
const result = await docmostClient.listShares();
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: move_page
|
||||
server.registerTool("move_page", {
|
||||
description: "Move a page to a new parent (nesting) or root. Essential for organizing pages created via 'create_page'.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
parentPageId: z
|
||||
.string()
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe("Target parent page ID. Pass 'null' or empty string to move to root."),
|
||||
position: z
|
||||
.string()
|
||||
.min(5)
|
||||
.optional()
|
||||
.describe("fractional-index position key; min 5 chars; omit to append at the end."),
|
||||
},
|
||||
}, async ({ pageId, parentPageId, position }) => {
|
||||
const finalParentId = parentPageId === "" || parentPageId === "null" ? null : parentPageId;
|
||||
// Cheap cycle guard: a page cannot be moved directly under itself.
|
||||
// (Deeper descendant-cycle detection is intentionally out of scope.)
|
||||
if (finalParentId !== null && finalParentId === pageId) {
|
||||
throw new Error("cannot move a page under itself");
|
||||
}
|
||||
const result = await docmostClient.movePage(pageId, finalParentId || null, position);
|
||||
// Require POSITIVE confirmation: the live /pages/move success shape is
|
||||
// exactly { success: true, status: 200 }. An empty body, a 204, or any odd
|
||||
// shape lacking success === true must NOT be reported as a successful move,
|
||||
// so we surface the raw API result instead of declaring success.
|
||||
if (!(result && typeof result === "object" && result.success === true)) {
|
||||
throw new Error(`Failed to move page ${pageId}: ${JSON.stringify(result)}`);
|
||||
}
|
||||
return jsonContent({
|
||||
message: `Successfully moved page ${pageId} to parent ${finalParentId || "root"}`,
|
||||
result,
|
||||
});
|
||||
});
|
||||
// Tool: delete_page
|
||||
server.registerTool("delete_page", {
|
||||
description: "Delete a single page by ID.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
await docmostClient.deletePage(pageId);
|
||||
return {
|
||||
content: [
|
||||
{ type: "text", text: `Successfully deleted page ${pageId}` },
|
||||
],
|
||||
};
|
||||
});
|
||||
// --- Comment tools (ported from upstream PR #3 by Max Nikitin) ---
|
||||
// Tool: list_comments
|
||||
server.registerTool("list_comments", {
|
||||
description: "List all comments on a page (paginated). Content is returned as Markdown.",
|
||||
inputSchema: {
|
||||
pageId: z.string().describe("ID of the page"),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
const comments = await docmostClient.listComments(pageId);
|
||||
return jsonContent(comments);
|
||||
});
|
||||
// Tool: create_comment
|
||||
server.registerTool("create_comment", {
|
||||
description: "Create a new comment on a page. The comment is ALWAYS inline and is " +
|
||||
"anchored to (highlights) its `selection` text — there are no page-level " +
|
||||
"comments. Content is provided as Markdown and automatically converted. " +
|
||||
"A top-level comment REQUIRES an exact `selection`; if the selection " +
|
||||
"cannot be found in the page the call fails (no orphan comment is left). " +
|
||||
"Replies (parentCommentId set) inherit the parent's anchor and take no " +
|
||||
"selection.",
|
||||
inputSchema: {
|
||||
pageId: z.string().describe("ID of the page to comment on"),
|
||||
content: z.string().min(1).describe("Comment content in Markdown format"),
|
||||
selection: z
|
||||
.string()
|
||||
.min(1)
|
||||
// Enforce the documented 250-char cap to match the description above.
|
||||
.max(250)
|
||||
.optional()
|
||||
.describe("EXACT contiguous text from a single paragraph/block to anchor the " +
|
||||
"comment on (<=250 chars). Required for a top-level comment; omit " +
|
||||
"only when replying via parentCommentId."),
|
||||
parentCommentId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Parent comment ID to create a reply (max 2 nesting levels)"),
|
||||
},
|
||||
}, async ({ pageId, content, selection, parentCommentId }) => {
|
||||
if (!parentCommentId && (!selection || !selection.trim())) {
|
||||
throw new Error("create_comment: a 'selection' (exact text to anchor on) is required for a top-level comment; omit it only when replying via parentCommentId.");
|
||||
}
|
||||
const result = await docmostClient.createComment(pageId, content, "inline", selection, parentCommentId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: update_comment
|
||||
server.registerTool("update_comment", {
|
||||
description: "Update an existing comment's content. Only the comment creator can " +
|
||||
"update it. Content is provided as Markdown.",
|
||||
inputSchema: {
|
||||
commentId: z.string().min(1).describe("ID of the comment to update"),
|
||||
content: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("New comment content in Markdown format"),
|
||||
},
|
||||
}, async ({ commentId, content }) => {
|
||||
const result = await docmostClient.updateComment(commentId, content);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: delete_comment
|
||||
server.registerTool("delete_comment", {
|
||||
description: "Delete a comment. Only the comment creator or space admin can delete it.",
|
||||
inputSchema: {
|
||||
commentId: z.string().min(1).describe("ID of the comment to delete"),
|
||||
},
|
||||
}, async ({ commentId }) => {
|
||||
await docmostClient.deleteComment(commentId);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Successfully deleted comment ${commentId}`,
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
// Tool: check_new_comments
|
||||
server.registerTool("check_new_comments", {
|
||||
description: "Check for new comments across pages in a space since a given timestamp. " +
|
||||
"Optionally scope to a page subtree (folder). Returns only comments " +
|
||||
"created after the specified time.",
|
||||
inputSchema: {
|
||||
spaceId: z.string().describe("Space ID to check for new comments"),
|
||||
since: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("ISO 8601 timestamp — only return comments created after this time (e.g. '2026-03-10T00:00:00Z')"),
|
||||
parentPageId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional root page ID to scope the check to a subtree (folder). " +
|
||||
"Only pages under this parent will be checked."),
|
||||
},
|
||||
}, async ({ spaceId, since, parentPageId }) => {
|
||||
// Reject an unparseable timestamp up front: otherwise the comparison
|
||||
// against NaN silently treats every comment as "not new" and the tool
|
||||
// returns zero results without signalling the bad input.
|
||||
if (Number.isNaN(Date.parse(since))) {
|
||||
throw new Error(`Invalid 'since' timestamp: ${JSON.stringify(since)} — expected an ISO 8601 date (e.g. '2026-03-10T00:00:00Z')`);
|
||||
}
|
||||
const result = await docmostClient.checkNewComments(spaceId, since, parentPageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: search
|
||||
server.registerTool("search", {
|
||||
description: "Search for pages and content. Results are bounded by `limit` " +
|
||||
"(default applied by the client, max 100).",
|
||||
inputSchema: {
|
||||
query: z.string().min(1).describe("Search query"),
|
||||
limit: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(100)
|
||||
.optional()
|
||||
.describe("Max results to return (max 100)"),
|
||||
},
|
||||
}, async ({ query, limit }) => {
|
||||
// The tool exposes no spaceId filter, so pass undefined for the client's
|
||||
// optional spaceId parameter and forward limit into its correct slot.
|
||||
const result = await docmostClient.search(query, undefined, limit);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: docmost_transform
|
||||
server.registerTool("docmost_transform", {
|
||||
description: "Edit a page by running an arbitrary JS transform `(doc, ctx) => doc` " +
|
||||
"against its LIVE ProseMirror document, with a diff preview and page " +
|
||||
"history as the safety net. By default dryRun=true: returns a diff " +
|
||||
"preview WITHOUT writing. Set dryRun=false to apply (atomic, won't " +
|
||||
"clobber concurrent edits). `doc` is the lossless ProseMirror document " +
|
||||
"({type:'doc',content:[...]}); return a new doc of the same shape. " +
|
||||
"`ctx` gives you: comments (the page's comments, each {id, content " +
|
||||
"(markdown), selection, type}); log (array; console.log pushes to it); " +
|
||||
"consume(id) (mark a comment id as consumed — those are deleted when " +
|
||||
"deleteComments=true after a successful apply); and helpers: " +
|
||||
"blockText(node) (plain text), walk(node, fn) (depth-first over all " +
|
||||
"nodes incl. callouts/tables/lists), getList(doc, predicate) (find a " +
|
||||
"node even without attrs.id), insertMarkerAfter(doc, anchor, marker, " +
|
||||
"{beforeBlock}) (insert a plain unmarked text run after anchor, " +
|
||||
"mark-safe), setCalloutRange(doc, n) (sync a [1]…[K] callout range to " +
|
||||
"[1]…[n]), noteItem(inlineNodes) (wrap inline nodes in a listItem with a " +
|
||||
"fresh id), mdToInlineNodes(markdown) (comment markdown -> inline nodes), " +
|
||||
"and commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes). Footnote convention: markers are " +
|
||||
"plain '[N]' text in the body; the notes are an orderedList under a " +
|
||||
"heading whose text is 'Примечания переводчика'. The transform runs " +
|
||||
"sandboxed (no require/process/fs/network, 5s timeout) and must return a " +
|
||||
"{type:'doc'} node.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
transformJs: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("A JS function `(doc, ctx) => doc` (expression-arrow or " +
|
||||
"parenthesized function). It receives a clone of the live doc and " +
|
||||
"ctx (comments, log, consume(id), helpers: blockText/walk/getList/" +
|
||||
"insertMarkerAfter/setCalloutRange/noteItem/mdToInlineNodes/" +
|
||||
"commentsToFootnotes) and must return a {type:'doc'} node."),
|
||||
dryRun: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(true)
|
||||
.describe("Preview only (no write) when true (default)."),
|
||||
deleteComments: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe("After a successful apply, delete every comment id passed to " +
|
||||
"ctx.consume(id)."),
|
||||
},
|
||||
}, async ({ pageId, transformJs, dryRun, deleteComments }) => {
|
||||
const result = await docmostClient.transformPage(pageId, transformJs, {
|
||||
dryRun,
|
||||
deleteComments,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: diff_page_versions
|
||||
registerShared(SHARED_TOOL_SPECS.diffPageVersions, async ({ pageId, from, to }) => {
|
||||
const result = await docmostClient.diffPageVersions(pageId, from, to);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: list_page_history
|
||||
registerShared(SHARED_TOOL_SPECS.listPageHistory, async ({ pageId, cursor }) => {
|
||||
const result = await docmostClient.listPageHistory(pageId, cursor);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: restore_page_version
|
||||
registerShared(SHARED_TOOL_SPECS.restorePageVersion, async ({ historyId }) => {
|
||||
const result = await docmostClient.restorePageVersion(historyId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
return server;
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
import axios from "axios";
|
||||
export async function getCollabToken(baseUrl, apiToken) {
|
||||
try {
|
||||
const response = await axios.post(`${baseUrl}/auth/collab-token`, {}, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
// console.error('Collab Token Response:', response.data);
|
||||
// Response is wrapped in { data: { token: ... } }
|
||||
return response.data.data?.token || response.data.token;
|
||||
}
|
||||
catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
// Attach the HTTP status to the plain Error so callers (e.g.
|
||||
// getCollabTokenWithReauth) can still detect a 401/403 after the
|
||||
// original AxiosError has been wrapped away.
|
||||
// Avoid leaking the full server response body by default; include only
|
||||
// status + statusText. Append the body only when DEBUG is set.
|
||||
let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`;
|
||||
if (process.env.DEBUG) {
|
||||
message += ` - ${JSON.stringify(error.response?.data)}`;
|
||||
}
|
||||
const err = new Error(message);
|
||||
err.status = error.response?.status;
|
||||
throw err;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
export async function performLogin(baseUrl, email, password) {
|
||||
try {
|
||||
const response = await axios.post(`${baseUrl}/auth/login`, {
|
||||
email,
|
||||
password,
|
||||
});
|
||||
// Extract token from Set-Cookie header
|
||||
const cookies = response.headers["set-cookie"];
|
||||
if (!cookies) {
|
||||
throw new Error("No Set-Cookie header found in login response");
|
||||
}
|
||||
// Match the cookie name exactly to avoid matching a future
|
||||
// authTokenRefresh cookie (startsWith would catch it).
|
||||
const authCookie = cookies.find((c) => {
|
||||
const kv = c.split(";")[0];
|
||||
return kv.slice(0, kv.indexOf("=")) === "authToken";
|
||||
});
|
||||
if (!authCookie) {
|
||||
throw new Error("No authToken cookie found in login response");
|
||||
}
|
||||
// Take everything after the FIRST "=" up to the first ";".
|
||||
// Splitting on "=" would truncate base64 values containing "=" padding.
|
||||
const kv = authCookie.split(";")[0];
|
||||
const token = kv.slice(kv.indexOf("=") + 1);
|
||||
return token;
|
||||
}
|
||||
catch (error) {
|
||||
// Avoid leaking the full server response body by default; log only the
|
||||
// HTTP status. Log the verbose body only when DEBUG is set.
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (process.env.DEBUG) {
|
||||
console.error("Login failed:", error.response?.data);
|
||||
}
|
||||
else {
|
||||
console.error("Login failed:", error.response?.status);
|
||||
}
|
||||
}
|
||||
else {
|
||||
console.error("Login failed:", error.message);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@@ -1,713 +0,0 @@
|
||||
import { HocuspocusProvider } from "@hocuspocus/provider";
|
||||
import { TiptapTransformer } from "@hocuspocus/transformer";
|
||||
import * as Y from "yjs";
|
||||
import WebSocket from "ws";
|
||||
import { marked } from "marked";
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { Node as PMNode } from "@tiptap/pm/model";
|
||||
import { updateYFragment } from "y-prosemirror";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { summarizeChange } from "./diff.js";
|
||||
/**
|
||||
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
||||
* content type"), shared by both encode paths (`buildYDoc` -> `toYdoc` and
|
||||
* `applyDocToFragment` -> `updateYFragment`) so the message wording stays in one
|
||||
* place. `label` names the stage that failed (diagnostic). `sanitizeForYjs`
|
||||
* already stripped `undefined` attrs, so a remaining failure is pinpointed via
|
||||
* `findUnstorableAttr`.
|
||||
*/
|
||||
function unstorableYjsError(safe, label, e) {
|
||||
const bad = findUnstorableAttr(safe);
|
||||
return new Error(`Failed to encode document to Yjs (${label}): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`);
|
||||
}
|
||||
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||
global.window = dom.window;
|
||||
global.document = dom.window.document;
|
||||
// @ts-ignore
|
||||
global.Element = dom.window.Element;
|
||||
// @ts-ignore
|
||||
global.WebSocket = WebSocket;
|
||||
// Navigator is read-only in newer Node versions and already exists
|
||||
// global.navigator = dom.window.navigator;
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown) {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines) => {
|
||||
const out = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0)
|
||||
break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
return out.join("\n");
|
||||
};
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html) {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li) => {
|
||||
const found = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox") {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter((child) => child.tagName === "LI");
|
||||
if (items.length === 0)
|
||||
continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0))
|
||||
continue;
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked = input != null &&
|
||||
(input.hasAttribute("checked") || input.checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||
// definition lines are collected into a single <section data-footnotes>.
|
||||
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||
function escapeFootnoteAttr(value) {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
}
|
||||
const footnoteRefMarkedExtension = {
|
||||
name: "footnoteRef",
|
||||
level: "inline",
|
||||
start(src) {
|
||||
return src.match(/\[\^/)?.index ?? -1;
|
||||
},
|
||||
tokenizer(src) {
|
||||
const match = FOOTNOTE_REF_RE.exec(src);
|
||||
if (match && match.index === 0) {
|
||||
return { type: "footnoteRef", raw: match[0], id: match[1] };
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
renderer(token) {
|
||||
return `<sup data-footnote-ref data-id="${escapeFootnoteAttr(token.id)}"></sup>`;
|
||||
},
|
||||
};
|
||||
marked.use({ extensions: [footnoteRefMarkedExtension] });
|
||||
/**
|
||||
* Pull `[^id]: text` definition lines out of the body and render a single
|
||||
* <section data-footnotes> for them (or "" when there are none).
|
||||
*/
|
||||
function extractFootnotes(markdown) {
|
||||
const bodyLines = [];
|
||||
const defs = [];
|
||||
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||
// block is inert and stays in the body verbatim; only real definition lines
|
||||
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||
// match exactly what import keeps/strips (#166).
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (!tok.inFence && tok.definition)
|
||||
defs.push(tok.definition);
|
||||
else
|
||||
bodyLines.push(tok.line);
|
||||
}
|
||||
if (defs.length === 0)
|
||||
return { body: markdown, section: "" };
|
||||
// Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
|
||||
// editor-ext extractFootnoteDefinitions). Reference markers are left untouched
|
||||
// so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
|
||||
// #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
|
||||
// (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
|
||||
// editor-ext mirror.
|
||||
const firstById = new Map(); // id -> first definition text
|
||||
for (const def of defs) {
|
||||
if (!firstById.has(def.id))
|
||||
firstById.set(def.id, def.text);
|
||||
}
|
||||
const inner = [...firstById.entries()]
|
||||
.map(([id, text]) => `<div data-footnote-def data-id="${escapeFootnoteAttr(id)}"><p>${marked.parseInline(text || "")}</p></div>`)
|
||||
.join("");
|
||||
return {
|
||||
body: bodyLines.join("\n"),
|
||||
section: `<section data-footnotes>${inner}</section>`,
|
||||
};
|
||||
}
|
||||
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||
export async function markdownToProseMirror(markdownContent) {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const { body, section } = extractFootnotes(withCallouts);
|
||||
const html = (await marked.parse(body)) + section;
|
||||
const bridged = bridgeTaskLists(html);
|
||||
return generateJSON(bridged, docmostExtensions);
|
||||
}
|
||||
/**
|
||||
* Build the collaboration WebSocket URL from an API base URL:
|
||||
* switch http(s)->ws(s), strip a trailing /api, mount on /collab.
|
||||
* Shared by the live read and the mutate path so both target the same socket.
|
||||
*/
|
||||
export function buildCollabWsUrl(baseUrl) {
|
||||
let wsUrl = baseUrl.replace(/^http/, "ws");
|
||||
try {
|
||||
const urlObj = new URL(wsUrl);
|
||||
if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) {
|
||||
urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, "");
|
||||
}
|
||||
urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab";
|
||||
// Drop any query/hash from the base URL so it is not carried into the
|
||||
// collaboration ws URL.
|
||||
urlObj.search = "";
|
||||
urlObj.hash = "";
|
||||
wsUrl = urlObj.toString();
|
||||
}
|
||||
catch (e) {
|
||||
// Fallback if URL parsing fails
|
||||
if (!wsUrl.endsWith("/collab")) {
|
||||
wsUrl = wsUrl.replace(/\/$/, "") + "/collab";
|
||||
}
|
||||
}
|
||||
return wsUrl;
|
||||
}
|
||||
/**
|
||||
* Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning
|
||||
* the opaque yjs "Unexpected content type" failure into a descriptive error.
|
||||
*
|
||||
* `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of
|
||||
* the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point
|
||||
* at the offending attribute path.
|
||||
*/
|
||||
export function buildYDoc(doc) {
|
||||
const safe = sanitizeForYjs(doc);
|
||||
try {
|
||||
return TiptapTransformer.toYdoc(safe, "default", docmostExtensions);
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "toYdoc", e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Write a new ProseMirror doc into the live Yjs fragment by STRUCTURAL DIFF,
|
||||
* preserving the Yjs identity of unchanged nodes (issue #152).
|
||||
*
|
||||
* The previous approach deleted the whole fragment and re-applied a fresh Y.Doc,
|
||||
* which discarded every Yjs node id. y-prosemirror anchors the editor selection
|
||||
* to those ids, so an open editor's cursor lost its anchor and snapped to the
|
||||
* end of the document on every agent write (most visibly on comment anchoring,
|
||||
* which changes no text at all). `updateYFragment` is exactly the routine the
|
||||
* editor itself uses to sync ProseMirror edits into Yjs: it diffs the new node
|
||||
* against the current fragment and touches only the changed children, so
|
||||
* unchanged nodes keep their ids and the live cursor stays put.
|
||||
*
|
||||
* Must run inside a single `transact` so the diff applies atomically (no remote
|
||||
* update interleaves). Keeps `buildYDoc`'s `findUnstorableAttr` diagnostic for
|
||||
* the opaque "Unexpected content type" encode failure.
|
||||
*/
|
||||
export function applyDocToFragment(ydoc, newDoc) {
|
||||
const safe = sanitizeForYjs(newDoc);
|
||||
const fragment = ydoc.getXmlFragment("default");
|
||||
// Hydrate the ProseMirror node in its OWN try so a failure here (e.g. an
|
||||
// unknown node type) is labelled "fromJSON" — the stage that actually threw —
|
||||
// instead of being misattributed to the Yjs write stage (#154 review).
|
||||
let pmNode;
|
||||
try {
|
||||
pmNode = PMNode.fromJSON(docmostSchema, safe);
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "fromJSON", e);
|
||||
}
|
||||
try {
|
||||
ydoc.transact(() => {
|
||||
updateYFragment(ydoc, fragment, pmNode, {
|
||||
mapping: new Map(),
|
||||
isOMark: new Map(),
|
||||
});
|
||||
});
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "updateYFragment", e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Run an independent Yjs-encodability check (the same `sanitizeForYjs` + schema
|
||||
* the apply path uses) and throw the same descriptive error when the doc cannot
|
||||
* be stored. Used by the dry-run preview.
|
||||
*
|
||||
* Note: it does NOT run `updateYFragment` against the live fragment, so it is an
|
||||
* encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc`
|
||||
* (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different
|
||||
* encoders that nonetheless reject the same unstorable attributes. To narrow the
|
||||
* preview/apply gap it ALSO rehearses the apply path's `PMNode.fromJSON`
|
||||
* hydration, so a doc that would only fail there (e.g. an unknown node type) is
|
||||
* rejected at preview time too (#154 review). Still cheap: no live fragment, no
|
||||
* `updateYFragment`.
|
||||
*/
|
||||
export function assertYjsEncodable(doc) {
|
||||
buildYDoc(doc);
|
||||
const safe = sanitizeForYjs(doc);
|
||||
try {
|
||||
PMNode.fromJSON(docmostSchema, safe);
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "fromJSON", e);
|
||||
}
|
||||
}
|
||||
/** Time we wait for the initial handshake/sync before giving up. */
|
||||
const CONNECT_TIMEOUT_MS = 25000;
|
||||
/** Time we wait for the server to acknowledge our write before giving up. */
|
||||
const PERSIST_TIMEOUT_MS = 20000;
|
||||
/**
|
||||
* Safely mutate the live content of a page over the collaboration websocket.
|
||||
*
|
||||
* This is the single safe write path for every MCP content mutation. It:
|
||||
* 1. serializes per-page writes through withPageLock (no two MCP writes on
|
||||
* the same page overlap);
|
||||
* 2. connects to Hocuspocus and waits for the initial sync so the local ydoc
|
||||
* mirrors the authoritative server doc — INCLUDING edits/comments/images
|
||||
* that are not yet in the debounced REST snapshot;
|
||||
* 3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and
|
||||
* writes the result back — with no `await` between read and write so no
|
||||
* remote update can interleave and clobber concurrent human edits;
|
||||
* 4. waits for the server to acknowledge the write (unsyncedChanges -> 0)
|
||||
* before resolving, so the next operation observes our change.
|
||||
*
|
||||
* `transform` receives the live ProseMirror doc and returns the NEW full
|
||||
* ProseMirror doc to write, or `null` to abort with no write (a no-op). If
|
||||
* `transform` throws, the error is propagated to the caller (not swallowed).
|
||||
*
|
||||
* Resolves a `MutationResult { doc, verify }`: `doc` is the doc that was
|
||||
* written (or the live doc when the transform aborted), and `verify` is a
|
||||
* verifiable change report (text/block/mark deltas) of what actually changed.
|
||||
* The report is computed AFTER the atomic read->write, so it never widens the
|
||||
* read->write window, and it never throws (it can NEVER break a write).
|
||||
*/
|
||||
export async function mutatePageContent(pageId, collabToken, baseUrl, transform) {
|
||||
return withPageLock(pageId, () => {
|
||||
if (process.env.DEBUG) {
|
||||
console.error(`Starting realtime content mutate for page ${pageId}`);
|
||||
// Token prefix is sensitive; only log it under DEBUG.
|
||||
console.error(`Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`);
|
||||
}
|
||||
const ydoc = new Y.Doc();
|
||||
const wsUrl = buildCollabWsUrl(baseUrl);
|
||||
if (process.env.DEBUG)
|
||||
console.error(`Connecting to WebSocket: ${wsUrl}`);
|
||||
return new Promise((resolve, reject) => {
|
||||
let provider;
|
||||
let applied = false; // onSynced may fire again on reconnect — apply once.
|
||||
let settled = false;
|
||||
// Set true on disconnect/close so a reconnect-driven unsyncedChanges->0
|
||||
// cannot be mistaken for a successful persist of our write.
|
||||
let connectionLost = false;
|
||||
let connectTimer;
|
||||
let persistTimer;
|
||||
let unsyncedHandler;
|
||||
const cleanup = () => {
|
||||
if (connectTimer)
|
||||
clearTimeout(connectTimer);
|
||||
if (persistTimer)
|
||||
clearTimeout(persistTimer);
|
||||
if (provider) {
|
||||
if (unsyncedHandler) {
|
||||
try {
|
||||
provider.off("unsyncedChanges", unsyncedHandler);
|
||||
}
|
||||
catch (err) { }
|
||||
}
|
||||
try {
|
||||
provider.destroy();
|
||||
}
|
||||
catch (err) { }
|
||||
}
|
||||
};
|
||||
const finish = (err, value) => {
|
||||
if (settled)
|
||||
return;
|
||||
settled = true;
|
||||
cleanup();
|
||||
if (err)
|
||||
reject(err);
|
||||
else
|
||||
resolve(value);
|
||||
};
|
||||
connectTimer = setTimeout(() => {
|
||||
finish(new Error("Connection timeout to collaboration server"));
|
||||
}, CONNECT_TIMEOUT_MS);
|
||||
// Resolve once the server has acknowledged our update. The provider
|
||||
// increments unsyncedChanges when our local update is sent and
|
||||
// decrements it when the server replies with a SyncStatus(applied=true);
|
||||
// reaching 0 means the authoritative in-memory ydoc on the server now
|
||||
// contains our write.
|
||||
const waitForPersistence = () => {
|
||||
if (settled)
|
||||
return;
|
||||
// A missing provider is a failure, not a success: without it the write
|
||||
// can never have been acknowledged. Only an actual unsyncedChanges===0
|
||||
// on a live provider counts as persisted.
|
||||
if (!provider) {
|
||||
finish(new Error("collab provider gone before persistence"));
|
||||
return;
|
||||
}
|
||||
if (provider.unsyncedChanges === 0) {
|
||||
finish(null, mutationResult);
|
||||
return;
|
||||
}
|
||||
persistTimer = setTimeout(() => {
|
||||
finish(new Error("Timeout waiting for collaboration server to persist the update"));
|
||||
}, PERSIST_TIMEOUT_MS);
|
||||
unsyncedHandler = (data) => {
|
||||
// Only treat unsyncedChanges->0 as success when the connection is
|
||||
// still up. A transient disconnect + reconnect handshake can drive
|
||||
// the counter back to 0 without our write being re-transmitted; in
|
||||
// that case let the disconnect/close error win instead.
|
||||
if (data.number === 0 && !connectionLost) {
|
||||
finish(null, mutationResult);
|
||||
}
|
||||
};
|
||||
provider.on("unsyncedChanges", unsyncedHandler);
|
||||
};
|
||||
// The verifiable result resolved on every success/abort path. Set on
|
||||
// abort (no-op report) and after a real write (computed change report).
|
||||
let mutationResult;
|
||||
provider = new HocuspocusProvider({
|
||||
url: wsUrl,
|
||||
name: `page.${pageId}`,
|
||||
document: ydoc,
|
||||
token: collabToken,
|
||||
// @ts-ignore - Required for Node.js environment
|
||||
WebSocketPolyfill: WebSocket,
|
||||
onConnect: () => {
|
||||
if (process.env.DEBUG)
|
||||
console.error("WS Connect");
|
||||
},
|
||||
// An unexpected disconnect/close while we are still waiting (during the
|
||||
// connect-wait before onSynced, or during the persistence wait after the
|
||||
// write) means the update will never be acknowledged — surface it now
|
||||
// instead of hanging until the connect/persist timeout fires. `finish`
|
||||
// is idempotent via the `settled` flag, so the onClose that our own
|
||||
// cleanup()->provider.destroy() triggers (after settled=true is set) is
|
||||
// a harmless no-op and cannot cause a double-resolve.
|
||||
onDisconnect: () => {
|
||||
if (process.env.DEBUG)
|
||||
console.error("WS Disconnect");
|
||||
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
|
||||
// sees the connection as lost and won't report a false success.
|
||||
connectionLost = true;
|
||||
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
|
||||
},
|
||||
onClose: () => {
|
||||
if (process.env.DEBUG)
|
||||
console.error("WS Close");
|
||||
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
|
||||
// sees the connection as lost and won't report a false success.
|
||||
connectionLost = true;
|
||||
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
|
||||
},
|
||||
onSynced: () => {
|
||||
if (applied || settled)
|
||||
return;
|
||||
applied = true;
|
||||
if (process.env.DEBUG)
|
||||
console.error("Connected and synced!");
|
||||
// CRITICAL: everything between reading the live doc and writing it
|
||||
// back must stay synchronous (no await). While the JS event loop is
|
||||
// not yielded, no incoming remote update can interleave, so any
|
||||
// already-synced concurrent edits are preserved in liveDoc.
|
||||
let newDoc;
|
||||
let beforeDoc;
|
||||
try {
|
||||
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
|
||||
if (!liveDoc ||
|
||||
typeof liveDoc !== "object" ||
|
||||
!Array.isArray(liveDoc.content)) {
|
||||
liveDoc = { type: "doc", content: [] };
|
||||
}
|
||||
// Snapshot the before-doc for the change report. Docs are
|
||||
// JSON-serializable, so this is a safe deep clone.
|
||||
beforeDoc = JSON.parse(JSON.stringify(liveDoc));
|
||||
newDoc = transform(liveDoc);
|
||||
if (newDoc == null) {
|
||||
// Transform aborted — write nothing, return the live doc with a
|
||||
// no-op change report.
|
||||
mutationResult = {
|
||||
doc: liveDoc,
|
||||
verify: {
|
||||
changed: false,
|
||||
textInserted: 0,
|
||||
textDeleted: 0,
|
||||
blocksChanged: 0,
|
||||
marks: {},
|
||||
summary: "no changes (transform aborted)",
|
||||
},
|
||||
};
|
||||
finish(null, mutationResult);
|
||||
return;
|
||||
}
|
||||
// Structural diff into the live fragment (issue #152): preserves
|
||||
// the Yjs ids of unchanged nodes, so an open editor's cursor is not
|
||||
// yanked to the end of the document on every agent write.
|
||||
applyDocToFragment(ydoc, newDoc);
|
||||
}
|
||||
catch (e) {
|
||||
// Includes errors thrown by transform (e.g. "afterText not found",
|
||||
// "text not found"): propagate them verbatim to the caller.
|
||||
finish(e instanceof Error ? e : new Error(String(e)));
|
||||
return;
|
||||
}
|
||||
// Compute the verifiable change report AFTER the transact write: it
|
||||
// only needs the JSON before/after, so it cannot affect the atomic
|
||||
// read->write window, and summarizeChange never throws.
|
||||
mutationResult = {
|
||||
doc: newDoc,
|
||||
verify: summarizeChange(beforeDoc, newDoc),
|
||||
};
|
||||
if (process.env.DEBUG)
|
||||
console.error("Content written, waiting for server to persist...");
|
||||
waitForPersistence();
|
||||
},
|
||||
onAuthenticationFailed: () => {
|
||||
finish(new Error("Authentication failed for collaboration connection"));
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Replace the live content of a page over the collaboration websocket.
|
||||
* Accepts a ready ProseMirror JSON document; the caller controls whether
|
||||
* it was produced from markdown (ids regenerate) or edited in place
|
||||
* (existing block ids preserved).
|
||||
*
|
||||
* This is an intentional full replace (used by update_page / update_page_json),
|
||||
* but now runs under the per-page lock and waits for server persistence via
|
||||
* mutatePageContent.
|
||||
*/
|
||||
export async function replacePageContent(pageId, prosemirrorDoc, collabToken, baseUrl) {
|
||||
// Fail fast on a bad document instead of deferring the failure into the
|
||||
// collaboration write (where TiptapTransformer.toYdoc(undefined) used to
|
||||
// throw). The transform must return a valid ProseMirror doc.
|
||||
if (prosemirrorDoc == null ||
|
||||
typeof prosemirrorDoc !== "object" ||
|
||||
prosemirrorDoc.type !== "doc") {
|
||||
throw new Error("replacePageContent: invalid ProseMirror document");
|
||||
}
|
||||
return await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc);
|
||||
}
|
||||
/**
|
||||
* Markdown update path (kept for backwards compatibility).
|
||||
* NOTE: this re-imports the whole document — block ids are regenerated.
|
||||
* Tables and :::callout::: blocks survive thanks to the full schema.
|
||||
*/
|
||||
export async function updatePageContentRealtime(pageId, markdownContent, collabToken, baseUrl) {
|
||||
const tiptapJson = await markdownToProseMirror(markdownContent);
|
||||
return await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson);
|
||||
}
|
||||
@@ -1,239 +0,0 @@
|
||||
/**
|
||||
* Inline-comment anchoring against a ProseMirror document.
|
||||
*
|
||||
* Docmost stores an inline comment's highlight as a `comment` MARK on the
|
||||
* document text (`{ type: "comment", attrs: { commentId, resolved } }`); the
|
||||
* `/comments/create` API only records the comment row + its `selection` text and
|
||||
* does NOT insert that mark, so the anchor has to be written into the page
|
||||
* content separately. This module finds where a selection lives in the document
|
||||
* and splices the comment mark across the matched range.
|
||||
*
|
||||
* Matching has to be robust because the agent supplies the selection as plain
|
||||
* text while the document stores rich inline content: a selection can span
|
||||
* several adjacent text nodes (inline code / bold / links each become their own
|
||||
* text node), and the document may use smart/typographic quotes, dash variants,
|
||||
* non-breaking spaces, or collapsed runs of whitespace that the agent typed as
|
||||
* ASCII quotes/hyphens/single spaces. We therefore normalize both sides before
|
||||
* comparing and match across maximal runs of consecutive text nodes within a
|
||||
* single block, while mapping every normalized character back to its raw index
|
||||
* so the mark lands on the exact original characters.
|
||||
*/
|
||||
/** Typographic double-quote variants mapped to ASCII `"`. */
|
||||
const DOUBLE_QUOTES = "«»„“”‟〝〞"";
|
||||
/** Typographic single-quote/apostrophe variants mapped to ASCII `'`. */
|
||||
const SINGLE_QUOTES = "‘’‚‛";
|
||||
/** Dash variants mapped to ASCII `-`. */
|
||||
const DASHES = "–—―−‐‑‒";
|
||||
/** Guard against pathological/cyclic documents in the depth-first walk. */
|
||||
const MAX_DEPTH = 200;
|
||||
/** The comment mark Docmost stores on anchored text. */
|
||||
function makeCommentMark(commentId) {
|
||||
// The comment mark schema declares both commentId and resolved; include
|
||||
// resolved:false for completeness so the stored mark matches the editor's.
|
||||
return { type: "comment", attrs: { commentId, resolved: false } };
|
||||
}
|
||||
/** True for any character we collapse/replace with a single normal space. */
|
||||
function isWhitespaceChar(ch) {
|
||||
// Regular ASCII whitespace plus the special spaces called out in the spec:
|
||||
// nbsp, narrow nbsp, en/em/thin/hair/figure spaces, etc. \s covers tab and
|
||||
// newline; the explicit code points cover the non-breaking variants \s misses
|
||||
// in some engines, so list them for determinism.
|
||||
return (/\s/.test(ch) ||
|
||||
ch === " " || // no-break space
|
||||
ch === " " || // figure space
|
||||
ch === " " || // narrow no-break space
|
||||
ch === " " || // thin space
|
||||
ch === " " || // hair space
|
||||
ch === " " || // en space
|
||||
ch === " " // em space
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Normalize a string for matching and return both the normalized text and a
|
||||
* `map` where `map[i]` is the index into the ORIGINAL `s` of the i-th
|
||||
* normalized character.
|
||||
*
|
||||
* Rules: map smart quotes / dashes / special spaces to their ASCII forms,
|
||||
* collapse any run of whitespace to a SINGLE space (whose map entry points at
|
||||
* the FIRST raw whitespace char of the run), and DO NOT lowercase (anchoring is
|
||||
* case-sensitive to match the exact document text).
|
||||
*/
|
||||
export function normalizeForMatch(s) {
|
||||
let norm = "";
|
||||
const map = [];
|
||||
let i = 0;
|
||||
while (i < s.length) {
|
||||
const ch = s[i];
|
||||
if (isWhitespaceChar(ch)) {
|
||||
// Collapse the whole whitespace run to one space mapped to the run start.
|
||||
const runStart = i;
|
||||
while (i < s.length && isWhitespaceChar(s[i]))
|
||||
i++;
|
||||
norm += " ";
|
||||
map.push(runStart);
|
||||
continue;
|
||||
}
|
||||
let mapped = ch;
|
||||
if (DOUBLE_QUOTES.indexOf(ch) !== -1)
|
||||
mapped = '"';
|
||||
else if (SINGLE_QUOTES.indexOf(ch) !== -1)
|
||||
mapped = "'";
|
||||
else if (DASHES.indexOf(ch) !== -1)
|
||||
mapped = "-";
|
||||
norm += mapped;
|
||||
map.push(i);
|
||||
i++;
|
||||
}
|
||||
return { norm, map };
|
||||
}
|
||||
/**
|
||||
* Find a selection inside a SINGLE block's direct `content` array.
|
||||
*
|
||||
* Builds maximal runs of consecutive `text` nodes (any non-text inline node,
|
||||
* e.g. a mention, breaks the run), normalizes each run and the selection the
|
||||
* same way, then searches each run for the normalized selection. Returns the
|
||||
* child/offset range of the FIRST matching run, or `null` if none match.
|
||||
*/
|
||||
export function findAnchorInBlock(blockContent, selection) {
|
||||
if (!Array.isArray(blockContent))
|
||||
return null;
|
||||
const normSelObj = normalizeForMatch(selection);
|
||||
// Trim leading/trailing spaces on the NORMALIZED selection only.
|
||||
const normSel = normSelObj.norm.trim();
|
||||
if (normSel.length === 0)
|
||||
return null;
|
||||
let i = 0;
|
||||
while (i < blockContent.length) {
|
||||
const node = blockContent[i];
|
||||
if (!node || typeof node !== "object" || node.type !== "text") {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// Accumulate a maximal run of consecutive text nodes.
|
||||
let rawRun = "";
|
||||
const rawToChild = [];
|
||||
let j = i;
|
||||
while (j < blockContent.length) {
|
||||
const n = blockContent[j];
|
||||
if (!n || typeof n !== "object" || n.type !== "text")
|
||||
break;
|
||||
const text = typeof n.text === "string" ? n.text : "";
|
||||
for (let k = 0; k < text.length; k++) {
|
||||
rawToChild.push({ childIdx: j, offset: k });
|
||||
}
|
||||
rawRun += text;
|
||||
j++;
|
||||
}
|
||||
// Try to match within this run.
|
||||
const { norm, map } = normalizeForMatch(rawRun);
|
||||
const idx = norm.indexOf(normSel);
|
||||
if (idx !== -1) {
|
||||
const rawStart = map[idx];
|
||||
const rawEndExclusive = idx + normSel.length < map.length
|
||||
? map[idx + normSel.length]
|
||||
: rawRun.length;
|
||||
const startLoc = rawToChild[rawStart];
|
||||
// rawEndExclusive points at the raw char AFTER the match; the last matched
|
||||
// raw char is at rawEndExclusive-1, so endOffset is its offset + 1.
|
||||
const lastLoc = rawToChild[rawEndExclusive - 1];
|
||||
return {
|
||||
startChild: startLoc.childIdx,
|
||||
startOffset: startLoc.offset,
|
||||
endChild: lastLoc.childIdx,
|
||||
endOffset: lastLoc.offset + 1,
|
||||
};
|
||||
}
|
||||
// No match in this run: continue scanning AFTER it.
|
||||
i = j > i ? j : i + 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Depth-first, document-order check for whether `selection` can be anchored
|
||||
* anywhere in `doc`. At each node with an array `content`, first try to match
|
||||
* within that node's own content, then recurse into children that themselves
|
||||
* have a `content` array.
|
||||
*/
|
||||
export function canAnchorInDoc(doc, selection) {
|
||||
const visit = (node, depth) => {
|
||||
if (depth > MAX_DEPTH || !node || typeof node !== "object")
|
||||
return false;
|
||||
if (!Array.isArray(node.content))
|
||||
return false;
|
||||
if (findAnchorInBlock(node.content, selection))
|
||||
return true;
|
||||
for (const child of node.content) {
|
||||
if (child && typeof child === "object" && Array.isArray(child.content)) {
|
||||
if (visit(child, depth + 1))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
return visit(doc, 0);
|
||||
}
|
||||
/**
|
||||
* Split the matched text nodes and splice the comment mark across the range.
|
||||
* `blockContent` is mutated IN PLACE. `match.startChild..endChild` are all text
|
||||
* nodes (guaranteed by findAnchorInBlock building runs of text nodes).
|
||||
*/
|
||||
function spliceCommentMark(blockContent, match, commentId) {
|
||||
const { startChild, startOffset, endChild, endOffset } = match;
|
||||
const commentMark = makeCommentMark(commentId);
|
||||
const fragments = [];
|
||||
for (let k = startChild; k <= endChild; k++) {
|
||||
const n = blockContent[k];
|
||||
const text = typeof n.text === "string" ? n.text : "";
|
||||
const sliceStart = k === startChild ? startOffset : 0;
|
||||
const sliceEnd = k === endChild ? endOffset : text.length;
|
||||
const before = k === startChild ? text.slice(0, startOffset) : "";
|
||||
const marked = text.slice(sliceStart, sliceEnd);
|
||||
const after = k === endChild ? text.slice(endOffset) : "";
|
||||
// Process per-node so each node's OWN marks/attrs are preserved.
|
||||
const ownMarks = Array.isArray(n.marks) ? n.marks : [];
|
||||
// Drop any pre-existing comment mark from the marked fragment so it ends up
|
||||
// with exactly one comment mark (the new one) rather than two.
|
||||
const markedBaseMarks = ownMarks.filter((m) => !(m && m.type === "comment"));
|
||||
if (before.length > 0) {
|
||||
fragments.push({ ...n, text: before, marks: [...ownMarks] });
|
||||
}
|
||||
if (marked.length > 0) {
|
||||
fragments.push({
|
||||
...n,
|
||||
text: marked,
|
||||
marks: [...markedBaseMarks, commentMark],
|
||||
});
|
||||
}
|
||||
if (after.length > 0) {
|
||||
fragments.push({ ...n, text: after, marks: [...ownMarks] });
|
||||
}
|
||||
}
|
||||
blockContent.splice(startChild, endChild - startChild + 1, ...fragments);
|
||||
}
|
||||
/**
|
||||
* Depth-first (same order as canAnchorInDoc) over `doc`; on the FIRST block
|
||||
* whose content matches `selection`, splice the comment mark across the matched
|
||||
* range in place and return true. Returns false (and does NOT mutate) when no
|
||||
* block matches.
|
||||
*/
|
||||
export function applyAnchorInDoc(doc, selection, commentId) {
|
||||
const visit = (node, depth) => {
|
||||
if (depth > MAX_DEPTH || !node || typeof node !== "object")
|
||||
return false;
|
||||
if (!Array.isArray(node.content))
|
||||
return false;
|
||||
const match = findAnchorInBlock(node.content, selection);
|
||||
if (match) {
|
||||
spliceCommentMark(node.content, match, commentId);
|
||||
return true;
|
||||
}
|
||||
for (const child of node.content) {
|
||||
if (child && typeof child === "object" && Array.isArray(child.content)) {
|
||||
if (visit(child, depth + 1))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
return visit(doc, 0);
|
||||
}
|
||||
@@ -1,423 +0,0 @@
|
||||
/**
|
||||
* Headless, Docmost-equivalent document diff.
|
||||
*
|
||||
* Docmost's history editor computes a change set with the exact pipeline below
|
||||
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
|
||||
* editor decorations. This module runs the SAME computation but serializes the
|
||||
* result to text + integrity counts instead of decorations, so a diff can be
|
||||
* previewed without a browser.
|
||||
*
|
||||
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
|
||||
* maintained published fork of the MIT prosemirror-recreate-steps source that
|
||||
* Docmost vendors in @docmost/editor-ext; it exposes the identical
|
||||
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
|
||||
* signature.
|
||||
*
|
||||
* If recreateTransform / the changeset throws on a pathological document pair,
|
||||
* we fall back to a coarse block-level text diff so the tool never hard-fails.
|
||||
*/
|
||||
import { Node } from "@tiptap/pm/model";
|
||||
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
|
||||
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
|
||||
import { docmostSchema } from "./docmost-schema.js";
|
||||
/** Recursively concatenate the plain text of a JSON node. */
|
||||
function plainText(node) {
|
||||
if (!node || typeof node !== "object")
|
||||
return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string")
|
||||
out += node.text;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
out += plainText(child);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
|
||||
function countNodes(doc, pred) {
|
||||
let n = 0;
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (pred(node))
|
||||
n++;
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return n;
|
||||
}
|
||||
/**
|
||||
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
|
||||
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
|
||||
* run); counting link-bearing runs would over-count it. Walking the tree and
|
||||
* collecting hrefs into a Set keys each distinct link once. Link marks with a
|
||||
* missing/empty href are bucketed under a single "" key so a malformed link is
|
||||
* still counted as one.
|
||||
*/
|
||||
function countUniqueLinks(doc) {
|
||||
const hrefs = new Set();
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||
for (const m of node.marks) {
|
||||
if (m && m.type === "link") {
|
||||
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
|
||||
hrefs.add(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return hrefs.size;
|
||||
}
|
||||
/** Count footnoteReference nodes anywhere under a node (reading order). */
|
||||
function countFootnoteRefs(node) {
|
||||
if (!node || typeof node !== "object")
|
||||
return 0;
|
||||
let n = node.type === "footnoteReference" ? 1 : 0;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
n += countFootnoteRefs(child);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
/**
|
||||
* Ordered list of footnote marker numbers found in the BODY only (every
|
||||
* top-level block before the first "Примечания..." notes heading; if no such
|
||||
* heading, the whole doc), in reading order.
|
||||
*
|
||||
* Supports BOTH representations:
|
||||
* - real `footnoteReference` nodes (the current footnote feature) — numbered
|
||||
* 1..n by reading position, since their visible number is derived;
|
||||
* - legacy `[N]` text markers (older translated docs) — the literal N.
|
||||
*/
|
||||
function footnoteMarkers(doc, notesHeading) {
|
||||
const top = Array.isArray(doc?.content) ? doc.content : [];
|
||||
const notesIdx = top.findIndex((n) => n &&
|
||||
n.type === "heading" &&
|
||||
plainText(n).trim() === notesHeading);
|
||||
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
|
||||
// Real footnoteReference nodes take precedence: when present, number them by
|
||||
// reading position (their displayed number is not stored).
|
||||
let refCount = 0;
|
||||
for (const block of bodyBlocks)
|
||||
refCount += countFootnoteRefs(block);
|
||||
if (refCount > 0) {
|
||||
return Array.from({ length: refCount }, (_, i) => i + 1);
|
||||
}
|
||||
// Fallback: legacy `[N]` text markers.
|
||||
const markers = [];
|
||||
const re = /\[(\d+)\]/g;
|
||||
for (const block of bodyBlocks) {
|
||||
const text = plainText(block);
|
||||
let m;
|
||||
re.lastIndex = 0;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
markers.push(Number(m[1]));
|
||||
}
|
||||
}
|
||||
return markers;
|
||||
}
|
||||
/** Compute the [old,new] integrity tuples for two JSON docs. */
|
||||
function computeIntegrity(oldDoc, newDoc, notesHeading) {
|
||||
const images = [
|
||||
countNodes(oldDoc, (n) => n.type === "image"),
|
||||
countNodes(newDoc, (n) => n.type === "image"),
|
||||
];
|
||||
const links = [
|
||||
countUniqueLinks(oldDoc),
|
||||
countUniqueLinks(newDoc),
|
||||
];
|
||||
const tables = [
|
||||
countNodes(oldDoc, (n) => n.type === "table"),
|
||||
countNodes(newDoc, (n) => n.type === "table"),
|
||||
];
|
||||
const callouts = [
|
||||
countNodes(oldDoc, (n) => n.type === "callout"),
|
||||
countNodes(newDoc, (n) => n.type === "callout"),
|
||||
];
|
||||
const fns = [
|
||||
footnoteMarkers(oldDoc, notesHeading),
|
||||
footnoteMarkers(newDoc, notesHeading),
|
||||
];
|
||||
return { images, links, tables, callouts, footnoteMarkers: fns };
|
||||
}
|
||||
/**
|
||||
* Resolve the lead text of the top-level block in a ProseMirror Node that
|
||||
* contains the given document position. Returns "" when out of range.
|
||||
*/
|
||||
function blockContextAt(node, pos) {
|
||||
try {
|
||||
const clamped = Math.max(0, Math.min(pos, node.content.size));
|
||||
const $pos = node.resolve(clamped);
|
||||
// depth 1 is the top-level block in a doc node.
|
||||
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
|
||||
const text = block.textContent || "";
|
||||
return text.length > 80 ? text.slice(0, 77) + "..." : text;
|
||||
}
|
||||
catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
/** Truncate a string for the markdown summary. */
|
||||
function truncate(s, n = 120) {
|
||||
return s.length > n ? s.slice(0, n - 3) + "..." : s;
|
||||
}
|
||||
/**
|
||||
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
|
||||
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
|
||||
*/
|
||||
function coarseDiff(oldDoc, newDoc) {
|
||||
const oldBlocks = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
|
||||
const newBlocks = Array.isArray(newDoc?.content) ? newDoc.content : [];
|
||||
const oldTexts = oldBlocks.map(plainText);
|
||||
const newTexts = newBlocks.map(plainText);
|
||||
const oldSet = new Set(oldTexts);
|
||||
const newSet = new Set(newTexts);
|
||||
const changes = [];
|
||||
for (const t of oldTexts) {
|
||||
if (!newSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "delete", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
for (const t of newTexts) {
|
||||
if (!oldSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "insert", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
return changes;
|
||||
}
|
||||
/** Build the human-readable unified-ish markdown summary. */
|
||||
function renderMarkdown(result, fellBack) {
|
||||
const lines = [];
|
||||
const { summary, integrity, changes } = result;
|
||||
lines.push(`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`);
|
||||
if (fellBack) {
|
||||
lines.push("");
|
||||
lines.push("> note: precise diff failed; coarse block-level diff shown.");
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Integrity (old -> new)");
|
||||
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
|
||||
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
|
||||
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
|
||||
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
|
||||
lines.push(`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`);
|
||||
lines.push("");
|
||||
lines.push("## Changes");
|
||||
if (changes.length === 0) {
|
||||
lines.push("(no textual changes)");
|
||||
}
|
||||
else {
|
||||
for (const c of changes) {
|
||||
const sign = c.op === "insert" ? "+" : "-";
|
||||
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
|
||||
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
|
||||
}
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
/**
|
||||
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
|
||||
* serialize the result to text + integrity counts.
|
||||
*
|
||||
* @param oldDocJson the earlier document
|
||||
* @param newDocJson the later document
|
||||
* @param notesHeading heading delimiting body from notes for footnote counting
|
||||
*/
|
||||
export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
|
||||
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
|
||||
let changes = [];
|
||||
let inserted = 0;
|
||||
let deleted = 0;
|
||||
let fellBack = false;
|
||||
const changedBlocks = new Set();
|
||||
try {
|
||||
const oldNode = Node.fromJSON(docmostSchema, oldDocJson);
|
||||
const newNode = Node.fromJSON(docmostSchema, newDocJson);
|
||||
const tr = recreateTransform(oldNode, newNode, {
|
||||
complexSteps: false,
|
||||
wordDiffs: true,
|
||||
simplifyDiff: true,
|
||||
});
|
||||
const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
|
||||
const simplified = simplifyChanges(changeSet.changes, newNode);
|
||||
for (const change of simplified) {
|
||||
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
|
||||
if (change.toA > change.fromA) {
|
||||
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
deleted += text.length;
|
||||
const block = blockContextAt(oldNode, change.fromA);
|
||||
changes.push({ op: "delete", block, text });
|
||||
if (block)
|
||||
changedBlocks.add("d:" + block);
|
||||
}
|
||||
}
|
||||
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
|
||||
if (change.toB > change.fromB) {
|
||||
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
inserted += text.length;
|
||||
const block = blockContextAt(newNode, change.fromB);
|
||||
changes.push({ op: "insert", block, text });
|
||||
if (block)
|
||||
changedBlocks.add("i:" + block);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch {
|
||||
// Pathological pair: degrade to a coarse block-level diff so we never throw.
|
||||
fellBack = true;
|
||||
changes = coarseDiff(oldDocJson, newDocJson);
|
||||
for (const c of changes) {
|
||||
if (c.op === "insert")
|
||||
inserted += c.text.length;
|
||||
else
|
||||
deleted += c.text.length;
|
||||
if (c.block)
|
||||
changedBlocks.add(c.op[0] + ":" + c.block);
|
||||
}
|
||||
}
|
||||
const partial = {
|
||||
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
|
||||
integrity,
|
||||
changes,
|
||||
};
|
||||
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
|
||||
}
|
||||
/**
|
||||
* Recursively walk every `text` node and tally the count of each mark by
|
||||
* `mark.type` (e.g. `{ bold: 5, strike: 3, link: 2 }`). Pure and never throws.
|
||||
*/
|
||||
function markCounts(doc) {
|
||||
const counts = {};
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||
for (const m of node.marks) {
|
||||
if (m && typeof m.type === "string") {
|
||||
counts[m.type] = (counts[m.type] || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return counts;
|
||||
}
|
||||
/**
|
||||
* Build a VerifyReport for a content mutation. Pure and never throws — on any
|
||||
* internal error it returns a minimal "changed (diff unavailable)" report so it
|
||||
* can NEVER break a write.
|
||||
*
|
||||
* `changed` is VALUE-based, not JSON-string-based: it is derived from the actual
|
||||
* deltas (text chars, blocks, mark counts, structural integrity counts), so two
|
||||
* value-equal docs that differ only in JSON key order report cleanly as
|
||||
* `changed:false` / "no content change" rather than a misleading +0/-0 change.
|
||||
*
|
||||
* The structural integrity delta (from diffDocs's `integrity` tuples) is what
|
||||
* makes `changed` true for an image/table/callout/link count change that diffs
|
||||
* to zero text — closing a verify blind spot for insert_image, delete_node on a
|
||||
* table, etc.
|
||||
*/
|
||||
export function summarizeChange(before, after) {
|
||||
try {
|
||||
const diff = diffDocs(before, after);
|
||||
// Per-mark-type delta: include a type only when its count actually changed.
|
||||
const beforeMarks = markCounts(before);
|
||||
const afterMarks = markCounts(after);
|
||||
const marks = {};
|
||||
for (const type of new Set([
|
||||
...Object.keys(beforeMarks),
|
||||
...Object.keys(afterMarks),
|
||||
])) {
|
||||
const b = beforeMarks[type] || 0;
|
||||
const a = afterMarks[type] || 0;
|
||||
if (b !== a)
|
||||
marks[type] = [b, a];
|
||||
}
|
||||
// Structural integrity delta from diffDocs: count-based [old,new] tuples for
|
||||
// images/links/tables/callouts. Include a type only when old != new.
|
||||
const integrity = diff.integrity;
|
||||
const structure = {};
|
||||
const countTypes = [
|
||||
"images",
|
||||
"links",
|
||||
"tables",
|
||||
"callouts",
|
||||
];
|
||||
for (const type of countTypes) {
|
||||
const [b, a] = integrity[type];
|
||||
if (b !== a)
|
||||
structure[type] = [b, a];
|
||||
}
|
||||
const textInserted = diff.summary.inserted;
|
||||
const textDeleted = diff.summary.deleted;
|
||||
const blocksChanged = diff.summary.blocksChanged;
|
||||
const hasMarkDelta = Object.keys(marks).length > 0;
|
||||
const hasStructureDelta = Object.keys(structure).length > 0;
|
||||
// VALUE-based change decision: ignore JSON key-order no-ops entirely.
|
||||
const changed = textInserted > 0 ||
|
||||
textDeleted > 0 ||
|
||||
blocksChanged > 0 ||
|
||||
hasMarkDelta ||
|
||||
hasStructureDelta;
|
||||
if (!changed) {
|
||||
return {
|
||||
changed: false,
|
||||
textInserted: 0,
|
||||
textDeleted: 0,
|
||||
blocksChanged: 0,
|
||||
marks: {},
|
||||
summary: "no content change",
|
||||
};
|
||||
}
|
||||
const parts = [];
|
||||
// Only mention text/blocks when they actually changed (avoid a misleading
|
||||
// "+0/-0 chars, 0 block(s)" prefix on a pure mark/structure change).
|
||||
if (textInserted > 0 || textDeleted > 0 || blocksChanged > 0) {
|
||||
parts.push(`+${textInserted}/-${textDeleted} chars, ${blocksChanged} block(s)`);
|
||||
}
|
||||
const markParts = Object.entries(marks).map(([type, [b, a]]) => `${type} ${b}→${a}`);
|
||||
if (markParts.length > 0)
|
||||
parts.push(`marks: ${markParts.join(", ")}`);
|
||||
const structureParts = Object.entries(structure).map(([type, [b, a]]) => `${type} ${b}→${a}`);
|
||||
if (structureParts.length > 0)
|
||||
parts.push(structureParts.join(", "));
|
||||
// `changed` is true here, so at least one group is present and parts is non-empty.
|
||||
const summary = `changed: ${parts.join("; ")}`;
|
||||
const report = {
|
||||
changed: true,
|
||||
textInserted,
|
||||
textDeleted,
|
||||
blocksChanged,
|
||||
marks,
|
||||
summary,
|
||||
};
|
||||
if (hasStructureDelta)
|
||||
report.structure = structure;
|
||||
return report;
|
||||
}
|
||||
catch {
|
||||
// A pathological pair must never break a write: degrade to a minimal report.
|
||||
return {
|
||||
changed: true,
|
||||
textInserted: 0,
|
||||
textDeleted: 0,
|
||||
blocksChanged: 0,
|
||||
marks: {},
|
||||
summary: "changed (diff unavailable)",
|
||||
};
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,87 +0,0 @@
|
||||
/**
|
||||
* Filter functions to extract only relevant information from API responses
|
||||
* for better agent consumption
|
||||
*/
|
||||
export function filterWorkspace(data) {
|
||||
return {
|
||||
id: data.id,
|
||||
name: data.name,
|
||||
description: data.description,
|
||||
defaultSpaceId: data.defaultSpaceId,
|
||||
createdAt: data.createdAt,
|
||||
updatedAt: data.updatedAt,
|
||||
deletedAt: data.deletedAt,
|
||||
};
|
||||
}
|
||||
export function filterSpace(space) {
|
||||
return {
|
||||
id: space.id,
|
||||
name: space.name,
|
||||
description: space.description,
|
||||
slug: space.slug,
|
||||
visibility: space.visibility,
|
||||
createdAt: space.createdAt,
|
||||
updatedAt: space.updatedAt,
|
||||
deletedAt: space.deletedAt,
|
||||
};
|
||||
}
|
||||
export function filterGroup(group) {
|
||||
return {
|
||||
id: group.id,
|
||||
name: group.name,
|
||||
description: group.description,
|
||||
workspaceId: group.workspaceId,
|
||||
createdAt: group.createdAt,
|
||||
updatedAt: group.updatedAt,
|
||||
deletedAt: group.deletedAt,
|
||||
};
|
||||
}
|
||||
export function filterPage(page, content, subpages) {
|
||||
return {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
title: page.title,
|
||||
parentPageId: page.parentPageId,
|
||||
spaceId: page.spaceId,
|
||||
isLocked: page.isLocked,
|
||||
createdAt: page.createdAt,
|
||||
updatedAt: page.updatedAt,
|
||||
deletedAt: page.deletedAt,
|
||||
// Include converted markdown content if valid string (even empty)
|
||||
...(typeof content === "string" && { content }),
|
||||
// Include subpages if provided
|
||||
...(subpages &&
|
||||
subpages.length > 0 && {
|
||||
subpages: subpages.map((p) => ({ id: p.id, title: p.title })),
|
||||
}),
|
||||
};
|
||||
}
|
||||
export function filterComment(comment, markdownContent) {
|
||||
return {
|
||||
id: comment.id,
|
||||
pageId: comment.pageId,
|
||||
content: markdownContent ?? comment.content,
|
||||
selection: comment.selection || null,
|
||||
type: comment.type || "page",
|
||||
parentCommentId: comment.parentCommentId || null,
|
||||
creatorId: comment.creatorId,
|
||||
creatorName: comment.creator?.name || null,
|
||||
createdAt: comment.createdAt,
|
||||
editedAt: comment.editedAt || null,
|
||||
resolvedAt: comment.resolvedAt || null,
|
||||
resolvedById: comment.resolvedById || null,
|
||||
};
|
||||
}
|
||||
export function filterSearchResult(result) {
|
||||
return {
|
||||
id: result.id,
|
||||
title: result.title,
|
||||
parentPageId: result.parentPageId,
|
||||
createdAt: result.createdAt,
|
||||
updatedAt: result.updatedAt,
|
||||
rank: result.rank,
|
||||
highlight: result.highlight,
|
||||
spaceId: result.space?.id,
|
||||
spaceName: result.space?.name,
|
||||
};
|
||||
}
|
||||
@@ -1,393 +0,0 @@
|
||||
/**
|
||||
* Surgical text edits on a ProseMirror document without re-importing it.
|
||||
*
|
||||
* Each edit replaces an exact substring of a block's inline text, preserving
|
||||
* every node id, mark and attribute around it. Matching works at the
|
||||
* INLINE-CONTAINER (block) level: a block's text nodes are flattened into a
|
||||
* per-character array, so a `find` may freely cross bold/italic/link
|
||||
* boundaries (separate text nodes). The replacement inherits marks from the
|
||||
* unchanged common prefix/suffix of the match, so editing plain text next to a
|
||||
* bold word keeps the bold word bold, and editing the inside of a bold word
|
||||
* keeps the inserted text bold. This is the safe alternative to a full markdown
|
||||
* re-import for small wording fixes.
|
||||
*/
|
||||
import { stripInlineMarkdown, stripBalancedWrappers } from "./text-normalize.js";
|
||||
/** Placeholder code unit standing in for one opaque (non-text) inline node. */
|
||||
const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER
|
||||
/**
|
||||
* Find every VALID occurrence of `needle` in a block's flattened slots.
|
||||
*
|
||||
* A candidate occurrence at slot range [start, start+needle.length) is valid
|
||||
* ONLY IF none of the slots in that range are atoms (non-text inline nodes).
|
||||
* This makes atom matching collision-safe against the U+FFFC placeholder: an
|
||||
* atom slot can never be part of a match, while a real text node containing a
|
||||
* literal U+FFFC code unit still matches normally (its slot has no `.atom`).
|
||||
*
|
||||
* Overlapping candidates that touch an atom are skipped (not counted, not
|
||||
* spliced); the scan resumes one code unit past the rejected start so a valid
|
||||
* match that begins just after an atom is not missed.
|
||||
*/
|
||||
function findValidMatches(chars, plain, needle) {
|
||||
if (!needle)
|
||||
return [];
|
||||
const positions = [];
|
||||
let idx = plain.indexOf(needle);
|
||||
while (idx !== -1) {
|
||||
const end = idx + needle.length;
|
||||
let hasAtom = false;
|
||||
for (let i = idx; i < end; i++) {
|
||||
if (chars[i] && chars[i].atom) {
|
||||
hasAtom = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasAtom) {
|
||||
positions.push(idx);
|
||||
// Non-overlapping: skip past this match.
|
||||
idx = plain.indexOf(needle, end);
|
||||
}
|
||||
else {
|
||||
// This candidate crosses an atom: reject it and resume one unit later so
|
||||
// an overlapping valid match starting after the atom is still found.
|
||||
idx = plain.indexOf(needle, idx + 1);
|
||||
}
|
||||
}
|
||||
return positions;
|
||||
}
|
||||
/** Order-sensitive deep-equality of two marks arrays. */
|
||||
function marksEqual(a, b) {
|
||||
if (a === b)
|
||||
return true;
|
||||
if (a.length !== b.length)
|
||||
return false;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (JSON.stringify(a[i]) !== JSON.stringify(b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/** A block is any node that DIRECTLY contains at least one inline text child. */
|
||||
function isInlineBlock(node) {
|
||||
return (Array.isArray(node?.content) &&
|
||||
node.content.some((child) => child && child.type === "text"));
|
||||
}
|
||||
/** Flatten a block's inline content into a per-code-unit slot array. */
|
||||
function flattenBlock(node) {
|
||||
const chars = [];
|
||||
for (const child of node.content || []) {
|
||||
if (child && child.type === "text" && typeof child.text === "string") {
|
||||
const marks = child.marks || [];
|
||||
// Iterate by UTF-16 code unit so indices align with String.indexOf.
|
||||
for (let i = 0; i < child.text.length; i++) {
|
||||
chars.push({ ch: child.text[i], marks });
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Any non-text inline node becomes one opaque slot.
|
||||
chars.push({
|
||||
ch: ATOM_PLACEHOLDER,
|
||||
marks: (child && child.marks) || [],
|
||||
atom: child,
|
||||
});
|
||||
}
|
||||
}
|
||||
return chars;
|
||||
}
|
||||
/** Re-tokenize a slot array back into ProseMirror inline nodes. */
|
||||
function tokenizeChars(chars) {
|
||||
const out = [];
|
||||
let buffer = "";
|
||||
let bufferMarks = null;
|
||||
const flush = () => {
|
||||
if (buffer.length === 0)
|
||||
return;
|
||||
const textNode = { type: "text", text: buffer };
|
||||
if (bufferMarks && bufferMarks.length > 0)
|
||||
textNode.marks = bufferMarks;
|
||||
out.push(textNode);
|
||||
buffer = "";
|
||||
bufferMarks = null;
|
||||
};
|
||||
for (const slot of chars) {
|
||||
if (slot.atom) {
|
||||
flush();
|
||||
out.push(slot.atom);
|
||||
continue;
|
||||
}
|
||||
if (bufferMarks !== null && !marksEqual(bufferMarks, slot.marks)) {
|
||||
flush();
|
||||
}
|
||||
if (bufferMarks === null)
|
||||
bufferMarks = slot.marks;
|
||||
buffer += slot.ch;
|
||||
}
|
||||
flush();
|
||||
return out;
|
||||
}
|
||||
/** Longest common prefix length of two strings. */
|
||||
function commonPrefixLen(a, b) {
|
||||
const max = Math.min(a.length, b.length);
|
||||
let i = 0;
|
||||
while (i < max && a[i] === b[i])
|
||||
i++;
|
||||
return i;
|
||||
}
|
||||
/** Longest common suffix length of two strings, capped so it can't overlap. */
|
||||
function commonSuffixLen(a, b, cap) {
|
||||
const max = Math.min(a.length, b.length, cap);
|
||||
let i = 0;
|
||||
while (i < max && a[a.length - 1 - i] === b[b.length - 1 - i])
|
||||
i++;
|
||||
return i;
|
||||
}
|
||||
/**
|
||||
* Apply one edit to one block's flattened slot array.
|
||||
*
|
||||
* The caller passes only VALID (atom-free) match positions (see
|
||||
* findValidMatches), so no match range can overlap an atom slot here.
|
||||
*/
|
||||
function applyEditToChars(chars, edit, matchPositions) {
|
||||
// Pre-compute the diff slices once (find/replace are constant per edit).
|
||||
const p = commonPrefixLen(edit.find, edit.replace);
|
||||
const s = commonSuffixLen(edit.find, edit.replace, Math.min(edit.find.length, edit.replace.length) - p);
|
||||
const insertText = edit.replace.slice(p, edit.replace.length - s);
|
||||
// Rebuild the slot array in a single left-to-right pass, splicing at each
|
||||
// match start. Offsets into `chars` stay valid because we copy through.
|
||||
const newChars = [];
|
||||
let cursor = 0;
|
||||
let spliced = 0;
|
||||
for (const mStart of matchPositions) {
|
||||
const mEnd = mStart + edit.find.length;
|
||||
const changedStart = mStart + p;
|
||||
const changedEnd = mEnd - s;
|
||||
// Copy through everything up to the changed region (incl. the prefix).
|
||||
for (; cursor < changedStart; cursor++)
|
||||
newChars.push(chars[cursor]);
|
||||
const removed = chars.slice(changedStart, changedEnd);
|
||||
// Choose the marks for the inserted characters.
|
||||
let chosenMarks = [];
|
||||
if (removed.length > 0 &&
|
||||
removed.every((r) => marksEqual(r.marks, removed[0].marks))) {
|
||||
// Uniform removed region: inherit its marks directly.
|
||||
chosenMarks = removed[0].marks;
|
||||
}
|
||||
else {
|
||||
// Empty or non-uniform removed region: inherit from the nearest TEXT
|
||||
// neighbour, skipping atom slots (an atom carries marks that do not
|
||||
// belong on inserted text). Scan left first, then right; fall back to [].
|
||||
let inherited = null;
|
||||
for (let i = changedStart - 1; i >= 0; i--) {
|
||||
if (!chars[i].atom) {
|
||||
inherited = chars[i].marks;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (inherited === null) {
|
||||
for (let i = changedEnd; i < chars.length; i++) {
|
||||
if (!chars[i].atom) {
|
||||
inherited = chars[i].marks;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
chosenMarks = inherited === null ? [] : inherited;
|
||||
}
|
||||
// Emit the inserted text (one slot per code unit).
|
||||
for (let i = 0; i < insertText.length; i++) {
|
||||
newChars.push({ ch: insertText[i], marks: chosenMarks });
|
||||
}
|
||||
// Skip the removed region.
|
||||
cursor = changedEnd;
|
||||
spliced++;
|
||||
}
|
||||
// Copy through the tail.
|
||||
for (; cursor < chars.length; cursor++)
|
||||
newChars.push(chars[cursor]);
|
||||
return { newChars, spliced };
|
||||
}
|
||||
/**
|
||||
* Apply text edits to a ProseMirror doc (operates on a deep copy, returns it).
|
||||
*
|
||||
* Returns { doc, results, failed }:
|
||||
* - results: edits that applied (replacements >= 1).
|
||||
* - failed: edits that matched zero times, were ambiguous (multi-match
|
||||
* without replaceAll), or whose changed region crosses a non-text inline
|
||||
* node. These do NOT throw — they are recorded so the caller can surface an
|
||||
* actionable message and still keep the edits that did apply.
|
||||
*
|
||||
* Edits apply IN ORDER to the same working copy, so a later edit can target
|
||||
* text produced by an earlier one. The input doc is never mutated. The only
|
||||
* thrown error is for invalid input (an empty `edit.find`).
|
||||
*/
|
||||
export function applyTextEdits(doc, edits) {
|
||||
const copy = JSON.parse(JSON.stringify(doc));
|
||||
const results = [];
|
||||
const failed = [];
|
||||
for (const edit of edits) {
|
||||
if (!edit.find)
|
||||
throw new Error("edit.find must be a non-empty string");
|
||||
// HARD-REFUSE formatting changes. edit_page_text edits PLAIN TEXT only and
|
||||
// writes the replacement verbatim, so it cannot add/remove marks. We refuse
|
||||
// only a pure formatting TOGGLE: find and replace differ ONLY by balanced
|
||||
// markdown markers (e.g. find:"~~$69~~" / replace:"$69", or find:"M5Stack" /
|
||||
// replace:"**M5Stack**" which would write literal `**`).
|
||||
//
|
||||
// The detector is the STRICT stripBalancedWrappers, NOT the lenient locator
|
||||
// stripInlineMarkdown: the lenient one also trims whitespace/emoji and
|
||||
// collapses lone `*`/`_` runs, which gives false positives on ordinary
|
||||
// plain-text edits (trailing-space trim, snake_case, `2 * 3 * 4`, URLs with
|
||||
// underscores) and wrongly refuses them. Comparing the strict strip of both
|
||||
// sides symmetrically catches every real formatting toggle while leaving
|
||||
// plain text alone; a typo fix wrapped in markdown still applies because its
|
||||
// stripped find != stripped replace.
|
||||
const formattingOnly = edit.find !== edit.replace &&
|
||||
stripBalancedWrappers(edit.find) === stripBalancedWrappers(edit.replace);
|
||||
if (formattingOnly) {
|
||||
failed.push({
|
||||
find: edit.find,
|
||||
reason: "edit_page_text edits plain text only and cannot add or remove formatting marks (bold/italic/strike/code/link); it writes the replacement as LITERAL text. This edit looks like a formatting change (markdown markers in find/replace). To change marks, read the block with get_page_json and use patch_node (or update_page_json) to set the node's marks array.",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Gather every inline block in document order (recurse the whole tree so
|
||||
// nested containers — callouts, list items, table cells, blockquotes — are
|
||||
// all covered).
|
||||
const blocks = [];
|
||||
(function collect(node) {
|
||||
if (isInlineBlock(node))
|
||||
blocks.push(node);
|
||||
for (const child of node.content || [])
|
||||
collect(child);
|
||||
})(copy);
|
||||
// Find every VALID (atom-free) occurrence per block. A candidate whose slot
|
||||
// range overlaps a non-text inline atom is never a match (collision-safe vs
|
||||
// the U+FFFC placeholder), so it is excluded from both the uniqueness count
|
||||
// and the splicing.
|
||||
const blockChars = blocks.map((b) => flattenBlock(b));
|
||||
const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join(""));
|
||||
// EXACT MATCH WINS: try the verbatim locator first.
|
||||
let effectiveFind = edit.find;
|
||||
let normalized = false;
|
||||
let validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find));
|
||||
let total = 0;
|
||||
for (const positions of validPerBlock)
|
||||
total += positions.length;
|
||||
// FALLBACK: only if the verbatim locator matched nothing, retry with the
|
||||
// markdown-stripped form. `edit.replace` is never touched — this only
|
||||
// changes what we LOCATE, not what we insert.
|
||||
const stripped = stripInlineMarkdown(edit.find);
|
||||
if (total === 0 && stripped !== edit.find && stripped.length > 0) {
|
||||
const strippedPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], stripped));
|
||||
let strippedTotal = 0;
|
||||
for (const positions of strippedPerBlock)
|
||||
strippedTotal += positions.length;
|
||||
if (strippedTotal >= 1) {
|
||||
validPerBlock = strippedPerBlock;
|
||||
total = strippedTotal;
|
||||
effectiveFind = stripped;
|
||||
normalized = true;
|
||||
}
|
||||
}
|
||||
if (total === 0) {
|
||||
// Distinguish "the text exists but only across an atom" from a plain
|
||||
// not-found: if a raw substring scan (atoms included) WOULD have hit —
|
||||
// for EITHER the verbatim or the stripped locator — the only thing
|
||||
// blocking the edit is the atom, so report that.
|
||||
const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1 ||
|
||||
(stripped !== edit.find && plain.indexOf(stripped) !== -1));
|
||||
let reason;
|
||||
if (existsAcrossAtom) {
|
||||
reason =
|
||||
"match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes.";
|
||||
}
|
||||
else {
|
||||
// Append a bounded "closest text" hint: find the FIRST block that
|
||||
// contains the longest whitespace-delimited token (>= 3 chars) of the
|
||||
// (stripped, then raw) locator, and quote that block's plain text.
|
||||
reason = "text not found in the document.";
|
||||
const tokenSource = stripped.length > 0 ? stripped : edit.find;
|
||||
const longestToken = tokenSource
|
||||
.split(/\s+/)
|
||||
.filter((t) => t.length >= 3)
|
||||
.sort((a, b) => b.length - a.length)[0];
|
||||
if (longestToken) {
|
||||
const hitBlock = blockPlain.find((plain) => plain.includes(longestToken));
|
||||
if (hitBlock) {
|
||||
// Truncate by code point (spread iterates by code point) so a
|
||||
// surrogate pair is never split; append the ellipsis only when the
|
||||
// text was actually longer than the limit.
|
||||
const points = [...hitBlock];
|
||||
const snippet = points.length > 120
|
||||
? points.slice(0, 120).join("") + "…"
|
||||
: hitBlock;
|
||||
reason += ` Closest block text: "${snippet}".`;
|
||||
}
|
||||
}
|
||||
}
|
||||
failed.push({ find: edit.find, reason });
|
||||
continue;
|
||||
}
|
||||
if (total > 1 && !edit.replaceAll) {
|
||||
failed.push({
|
||||
find: edit.find,
|
||||
reason: `matches ${total} times. Provide a longer, unique fragment or set replaceAll: true.`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Plan the splices from the valid positions. For a non-replaceAll edit we
|
||||
// splice only the first valid match (left-to-right across blocks); for
|
||||
// replaceAll we splice every valid match.
|
||||
const plannedPerBlock = blockChars.map(() => []);
|
||||
let takenFirst = false;
|
||||
for (let b = 0; b < validPerBlock.length; b++) {
|
||||
for (const idx of validPerBlock[b]) {
|
||||
if (edit.replaceAll) {
|
||||
plannedPerBlock[b].push(idx);
|
||||
}
|
||||
else if (!takenFirst) {
|
||||
plannedPerBlock[b].push(idx);
|
||||
takenFirst = true;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!edit.replaceAll && takenFirst)
|
||||
break;
|
||||
}
|
||||
// Apply the splices block-by-block and re-tokenize changed blocks. The
|
||||
// local edit uses `effectiveFind` (verbatim or normalized) so the
|
||||
// prefix/suffix diff is computed against the ACTUALLY matched text, while
|
||||
// `edit.replace` stays literal — never stripped.
|
||||
const effectiveEdit = {
|
||||
find: effectiveFind,
|
||||
replace: edit.replace,
|
||||
replaceAll: edit.replaceAll,
|
||||
};
|
||||
let spliced = 0;
|
||||
for (let b = 0; b < blocks.length; b++) {
|
||||
if (plannedPerBlock[b].length === 0)
|
||||
continue;
|
||||
const { newChars, spliced: n } = applyEditToChars(blockChars[b], effectiveEdit, plannedPerBlock[b]);
|
||||
spliced += n;
|
||||
blocks[b].content = tokenizeChars(newChars);
|
||||
}
|
||||
// Keep `find: edit.find` (the original) so the caller can correlate.
|
||||
const result = { find: edit.find, replacements: spliced };
|
||||
if (normalized)
|
||||
result.normalized = true;
|
||||
results.push(result);
|
||||
}
|
||||
// Safety net: drop any empty text nodes (ProseMirror forbids them). The
|
||||
// re-tokenizer never emits empty text nodes, but untouched blocks could in
|
||||
// principle carry one in from upstream.
|
||||
(function prune(node) {
|
||||
if (Array.isArray(node.content)) {
|
||||
node.content = node.content.filter((child) => !(child.type === "text" && child.text === ""));
|
||||
for (const child of node.content)
|
||||
prune(child);
|
||||
}
|
||||
})(copy);
|
||||
return { doc: copy, results, failed };
|
||||
}
|
||||
@@ -1,816 +0,0 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
*/
|
||||
export function convertProseMirrorToMarkdown(content) {
|
||||
if (!content || !content.content)
|
||||
return "";
|
||||
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||
// </> back inside attribute values, so escaping them would corrupt the
|
||||
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||
// keeps the value inert against attribute-injection while staying idempotent.
|
||||
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||
// value, and parse5 does not decode ' back inside attribute values, so
|
||||
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||
const escapeAttr = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """);
|
||||
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||
const escapeHtmlText = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
// Percent-encode characters that would break out of a markdown URL target
|
||||
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||
// single inert token (used for image/video/youtube srcs).
|
||||
const encodeMdUrl = (value) => String(value || "")
|
||||
.replace(/\s/g, (c) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
const processNode = (node) => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
switch (type) {
|
||||
case "doc":
|
||||
return nodeContent.map(processNode).join("\n\n");
|
||||
case "paragraph":
|
||||
const text = nodeContent.map(processNode).join("");
|
||||
const align = node.attrs?.textAlign;
|
||||
if (align && align !== "left") {
|
||||
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||
}
|
||||
return text || "";
|
||||
case "heading":
|
||||
const level = node.attrs?.level || 1;
|
||||
const headingText = nodeContent.map(processNode).join("");
|
||||
return "#".repeat(level) + " " + headingText;
|
||||
case "text":
|
||||
let textContent = node.text || "";
|
||||
// Apply marks (bold, italic, code, etc.)
|
||||
if (node.marks) {
|
||||
// Markdown code spans (`...`) cannot carry inner formatting, so when a
|
||||
// run has the `code` mark alongside ANY other mark, backtick syntax
|
||||
// would leak literal ** / []() into the code text. In that case emit
|
||||
// nested HTML (<code> innermost, the other marks wrapping it as HTML)
|
||||
// so the output is at least well-formed and re-parseable.
|
||||
//
|
||||
// NOTE: this does NOT round-trip both marks. The schema's `code` mark
|
||||
// has `excludes: "_"` (it excludes every other mark), so on import the
|
||||
// co-occurring mark is always dropped — the run comes back as `code`
|
||||
// only. We keep the emission simple and accept that the other mark is
|
||||
// lost; preserving both is impossible while `code` excludes them.
|
||||
// Only use the backtick form when `code` is the sole mark.
|
||||
const markTypes = node.marks.map((m) => m.type);
|
||||
const hasCode = markTypes.includes("code");
|
||||
const codeCombined = hasCode && markTypes.length > 1;
|
||||
for (const mark of node.marks) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
textContent = codeCombined
|
||||
? `<strong>${textContent}</strong>`
|
||||
: `**${textContent}**`;
|
||||
break;
|
||||
case "italic":
|
||||
textContent = codeCombined
|
||||
? `<em>${textContent}</em>`
|
||||
: `*${textContent}*`;
|
||||
break;
|
||||
case "code":
|
||||
// When combined with another mark, wrap as <code> so the
|
||||
// surrounding HTML marks can nest around it; otherwise use the
|
||||
// plain backtick span.
|
||||
textContent = codeCombined
|
||||
? `<code>${textContent}</code>`
|
||||
: `\`${textContent}\``;
|
||||
break;
|
||||
case "link": {
|
||||
const href = mark.attrs?.href || "";
|
||||
const title = mark.attrs?.title;
|
||||
if (codeCombined) {
|
||||
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||
const safeHref = escapeAttr(href);
|
||||
if (title) {
|
||||
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||
}
|
||||
else {
|
||||
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||
}
|
||||
}
|
||||
else if (title) {
|
||||
// Emit the optional markdown link title; escape an embedded
|
||||
// double-quote so it cannot terminate the title string early.
|
||||
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||
}
|
||||
else {
|
||||
textContent = `[${textContent}](${href})`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "strike":
|
||||
textContent = codeCombined
|
||||
? `<s>${textContent}</s>`
|
||||
: `~~${textContent}~~`;
|
||||
break;
|
||||
case "underline":
|
||||
textContent = `<u>${textContent}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
textContent = `<sub>${textContent}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
textContent = `<sup>${textContent}</sup>`;
|
||||
break;
|
||||
case "highlight": {
|
||||
// Preserve a null/empty color as a plain highlight (a bare
|
||||
// <mark> with no background-color); only emit the style when a
|
||||
// color is actually set, so a plain highlight is not forced to
|
||||
// yellow on export.
|
||||
const color = mark.attrs?.color;
|
||||
textContent = color
|
||||
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||
: `<mark>${textContent}</mark>`;
|
||||
break;
|
||||
}
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color) {
|
||||
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
case "comment": {
|
||||
// Emit the inline comment anchor so highlights round-trip. The
|
||||
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||
// commentId/resolved).
|
||||
const cid = mark.attrs?.commentId;
|
||||
if (cid) {
|
||||
const resolvedAttr = mark.attrs?.resolved
|
||||
? ` data-resolved="true"`
|
||||
: "";
|
||||
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return textContent;
|
||||
case "codeBlock":
|
||||
const language = node.attrs?.language || "";
|
||||
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||
// here would let the text grow by "\n" on each round-trip. Removing
|
||||
// every trailing newline makes repeated cycles stable.
|
||||
const code = nodeContent
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
.map((item) => processListItem(item, "-"))
|
||||
.join("\n");
|
||||
case "orderedList":
|
||||
return nodeContent
|
||||
.map((item, index) => processListItem(item, `${index + 1}.`))
|
||||
.join("\n");
|
||||
case "taskList":
|
||||
return nodeContent.map((item) => processTaskItem(item)).join("\n");
|
||||
case "taskItem":
|
||||
// Delegate to the same helper used by taskList so multi-block and
|
||||
// nested task items render and indent consistently.
|
||||
return processTaskItem(node);
|
||||
case "listItem":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "blockquote":
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line so code blocks / multi-paragraph
|
||||
// quotes round-trip correctly.
|
||||
return nodeContent
|
||||
.map((n) => processNode(n)
|
||||
.split("\n")
|
||||
.map((line) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"))
|
||||
.join("\n>\n");
|
||||
case "horizontalRule":
|
||||
return "---";
|
||||
case "hardBreak":
|
||||
// Two trailing spaces before the newline encode a markdown hard break;
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
case "image":
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||
// not emit one (the previous caption branch was dead).
|
||||
return ``;
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||
// from the standard attributes and the remaining attrs from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||
// data-type, so the schema parser ignores it and just hoists the video.
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
case "youtube": {
|
||||
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||
// src from data-src and width/height/align from data-* attributes.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "table": {
|
||||
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||
const tableRows = nodeContent;
|
||||
if (tableRows.length === 0)
|
||||
return "";
|
||||
const hasSpan = tableRows.some((row) => (row.content || []).some((cell) => (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1));
|
||||
if (hasSpan) {
|
||||
// Render each cell's block children to HTML (marked does NOT parse
|
||||
// markdown inside a raw HTML block, so emitting markdown here would
|
||||
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||
const renderHtmlCell = (cell) => {
|
||||
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||
const a = cell.attrs || {};
|
||||
const cellParts = [];
|
||||
if ((a.colspan ?? 1) > 1)
|
||||
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||
if ((a.rowspan ?? 1) > 1)
|
||||
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||
if (a.align)
|
||||
cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||
const open = cellParts.length
|
||||
? `<${tag} ${cellParts.join(" ")}>`
|
||||
: `<${tag}>`;
|
||||
const inner = (cell.content || [])
|
||||
.map((block) => blockToHtml(block))
|
||||
.join("");
|
||||
return `${open}${inner}</${tag}>`;
|
||||
};
|
||||
const htmlRows = tableRows
|
||||
.map((row) => `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`)
|
||||
.join("");
|
||||
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||
}
|
||||
// No merged cells: emit a GFM table (header row + separator) so the
|
||||
// markdown can be parsed back into a table on re-import.
|
||||
const rows = tableRows.map(processNode);
|
||||
const headerCells = tableRows[0]?.content || [];
|
||||
const columns = headerCells.length || 1;
|
||||
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||
const markers = Array.from({ length: columns }, (_, i) => {
|
||||
const align = headerCells[i]?.attrs?.align;
|
||||
switch (align) {
|
||||
case "left":
|
||||
return ":--";
|
||||
case "center":
|
||||
return ":-:";
|
||||
case "right":
|
||||
return "--:";
|
||||
default:
|
||||
return "---";
|
||||
}
|
||||
});
|
||||
const separator = "| " + markers.join(" | ") + " |";
|
||||
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||
}
|
||||
case "tableRow":
|
||||
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||
case "tableCell":
|
||||
case "tableHeader": {
|
||||
// Join multiple block children with a space (not "") so adjacent blocks
|
||||
// like a paragraph followed by a list don't collide into "line1- a".
|
||||
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||
// line break cannot corrupt the surrounding GFM row.
|
||||
return nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\r?\n/g, " ")
|
||||
.replace(/\|/g, "\\|");
|
||||
}
|
||||
case "callout":
|
||||
const calloutType = node.attrs?.type || "info";
|
||||
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||
case "details":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "detailsSummary":
|
||||
const summaryText = nodeContent.map(processNode).join("");
|
||||
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||
case "detailsContent":
|
||||
const detailsText = nodeContent.map(processNode).join("\n");
|
||||
return `${detailsText}\n</details>`;
|
||||
case "mathInline": {
|
||||
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||
// back, so the previous form was lossy.
|
||||
const inlineMath = node.attrs?.text || "";
|
||||
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||
}
|
||||
case "mathBlock": {
|
||||
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||
// for the schema's default parser to recover it.
|
||||
const blockMath = node.attrs?.text || "";
|
||||
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||
}
|
||||
case "mention": {
|
||||
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||
// plain text that cannot re-parse.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="mention"`];
|
||||
if (attrs.id)
|
||||
parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||
if (attrs.label)
|
||||
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||
if (attrs.entityType)
|
||||
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||
if (attrs.entityId)
|
||||
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||
if (attrs.slugId)
|
||||
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||
if (attrs.creatorId)
|
||||
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||
if (attrs.anchorId)
|
||||
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||
// Keep the label as visible text content too; the schema reads attrs
|
||||
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||
const mentionLabel = attrs.label || attrs.id || "";
|
||||
// The label is visible element TEXT content here (the data-* attrs above
|
||||
// carry the real values), so escape it for the text context, not attrs.
|
||||
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||
}
|
||||
case "footnoteReference": {
|
||||
// Pandoc/GFM inline marker. The number is derived (not stored), so the
|
||||
// id is the stable anchor.
|
||||
const fnId = node.attrs?.id || "";
|
||||
return fnId ? `[^${fnId}]` : "";
|
||||
}
|
||||
case "footnotesList":
|
||||
// The container renders its definitions, each on its own `[^id]: ...`
|
||||
// line. A blank line separates the body from the notes block.
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "footnoteDefinition": {
|
||||
const defId = node.attrs?.id || "";
|
||||
// Collapse the definition's paragraphs into a single line; multi-line
|
||||
// footnotes are a v2 refinement.
|
||||
const defText = nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\s*\n+\s*/g, " ")
|
||||
.trim();
|
||||
return defId ? `[^${defId}]: ${defText}` : "";
|
||||
}
|
||||
case "attachment": {
|
||||
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "drawio":
|
||||
case "excalidraw": {
|
||||
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null)
|
||||
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null)
|
||||
parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "embed": {
|
||||
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||
// src/provider/align/width/height from data-* attributes so the node
|
||||
// (and its provider iframe info) survives the round-trip.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "audio": {
|
||||
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||
// schema reads src from src and attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||
// inline-level HTML that marked would wrap in <p>.
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
case "pdf": {
|
||||
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||
// The schema reads src/width/height from standard attrs and name/
|
||||
// attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "columns": {
|
||||
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||
// multi-column layout survives. Without a case the children were
|
||||
// concatenated with no separator and the text merged. The schema reads
|
||||
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||
// which marked would not re-parse inside a raw HTML block).
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="columns"`];
|
||||
if (attrs.layout)
|
||||
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "column": {
|
||||
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||
// column width from data-width. Children are rendered as HTML so their
|
||||
// formatting survives inside this raw HTML block.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="column"`];
|
||||
if (attrs.width)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
}
|
||||
};
|
||||
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||
// accepts so it re-imports as the matching ProseMirror mark.
|
||||
const inlineToHtml = (inlineNodes) => (inlineNodes || [])
|
||||
.map((n) => {
|
||||
if (n.type === "hardBreak")
|
||||
return "<br>";
|
||||
if (n.type !== "text") {
|
||||
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||
return processNode(n);
|
||||
}
|
||||
let t = escapeHtmlText(n.text || "");
|
||||
for (const mark of n.marks || []) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
t = `<strong>${t}</strong>`;
|
||||
break;
|
||||
case "italic":
|
||||
t = `<em>${t}</em>`;
|
||||
break;
|
||||
case "code":
|
||||
t = `<code>${t}</code>`;
|
||||
break;
|
||||
case "strike":
|
||||
t = `<s>${t}</s>`;
|
||||
break;
|
||||
case "underline":
|
||||
t = `<u>${t}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
t = `<sub>${t}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
t = `<sup>${t}</sup>`;
|
||||
break;
|
||||
case "link":
|
||||
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||
break;
|
||||
case "highlight":
|
||||
t = mark.attrs?.color
|
||||
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||
: `<mark>${t}</mark>`;
|
||||
break;
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color)
|
||||
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||
break;
|
||||
case "comment":
|
||||
// Inline comment anchor inside a raw-HTML container (columns /
|
||||
// spanned table cells), so commented text there also round-trips.
|
||||
if (mark.attrs?.commentId) {
|
||||
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.join("");
|
||||
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||
// and would survive as literal text. The Image extension reads src/alt from
|
||||
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||
// attributes, so emit them by name.
|
||||
const imageToHtml = (node) => {
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.title)
|
||||
parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<img ${parts.join(" ")}>`;
|
||||
};
|
||||
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||
// schema reads the banner type from data-callout-type. Children are rendered
|
||||
// as HTML so they survive inside a raw-HTML container.
|
||||
const calloutToHtml = (node) => {
|
||||
const type = (node.attrs?.type || "info").toLowerCase();
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||
};
|
||||
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||
const detailsToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<details>${inner}</details>`;
|
||||
};
|
||||
const detailsSummaryToHtml = (node) => `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||
const detailsContentToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="detailsContent">${inner}</div>`;
|
||||
};
|
||||
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||
const taskListToHtml = (node) => {
|
||||
const items = (node.content || [])
|
||||
.map((it) => {
|
||||
const checked = it.attrs?.checked ? "true" : "false";
|
||||
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||
})
|
||||
.join("");
|
||||
return `<ul data-type="taskList">${items}</ul>`;
|
||||
};
|
||||
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||
// as literal text on re-import. Nodes whose processNode case already produces
|
||||
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||
// table) are delegated to processNode; the markdown-emitting cases
|
||||
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||
const blockToHtml = (block) => {
|
||||
const children = block.content || [];
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return `<p>${inlineToHtml(children)}</p>`;
|
||||
case "heading": {
|
||||
const level = block.attrs?.level || 1;
|
||||
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||
}
|
||||
case "bulletList":
|
||||
return `<ul>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ul>`;
|
||||
case "orderedList":
|
||||
return `<ol>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ol>`;
|
||||
case "codeBlock": {
|
||||
const lang = block.attrs?.language || "";
|
||||
// The code itself is element TEXT content (between <code> tags), so it
|
||||
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||
const code = escapeHtmlText(children
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, ""));
|
||||
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||
return `<pre><code${cls}>${code}</code></pre>`;
|
||||
}
|
||||
case "image":
|
||||
return imageToHtml(block);
|
||||
case "blockquote":
|
||||
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||
case "horizontalRule":
|
||||
return "<hr>";
|
||||
case "callout":
|
||||
return calloutToHtml(block);
|
||||
case "details":
|
||||
return detailsToHtml(block);
|
||||
case "detailsSummary":
|
||||
return detailsSummaryToHtml(block);
|
||||
case "detailsContent":
|
||||
return detailsContentToHtml(block);
|
||||
case "taskList":
|
||||
return taskListToHtml(block);
|
||||
case "taskItem":
|
||||
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||
// the schema parses it; wrap it in a single-item taskList.
|
||||
return taskListToHtml({ content: [block] });
|
||||
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||
// mention, etc. already emit schema-matching HTML from processNode.
|
||||
case "table":
|
||||
case "columns":
|
||||
case "column":
|
||||
case "mathBlock":
|
||||
case "video":
|
||||
case "audio":
|
||||
case "pdf":
|
||||
case "youtube":
|
||||
case "embed":
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||
// children in a <div> so their content is preserved; if it has no block
|
||||
// children, render its inline content instead.
|
||||
if (children.length && children.some((c) => c.type !== "text")) {
|
||||
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||
}
|
||||
return `<div>${inlineToHtml(children)}</div>`;
|
||||
}
|
||||
};
|
||||
// Render the block children of a list item to HTML (a listItem holds block+
|
||||
// content). Mirrors processListItem but for the HTML fallback path.
|
||||
const blockChildrenToHtml = (item) => (item.content || []).map((b) => blockToHtml(b)).join("");
|
||||
// Indent the rendered children of a list item under a marker prefix.
|
||||
// Each child block is a (possibly multi-line) string. The very first physical
|
||||
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||
// other line — the remaining lines of the first child AND all lines of every
|
||||
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||
// to align under the marker. Without indenting these continuation lines, the
|
||||
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||
//
|
||||
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||
// same as the visible prefix width:
|
||||
// - bullet "- " -> 2 columns
|
||||
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||
// re-import. Callers therefore pass the exact indent width to use.
|
||||
const indentItemChildren = (childStrings, prefix, indentWidth) => {
|
||||
const indent = " ".repeat(indentWidth);
|
||||
const lines = [];
|
||||
childStrings.forEach((child, childIndex) => {
|
||||
child.split("\n").forEach((line, lineIndex) => {
|
||||
if (childIndex === 0 && lineIndex === 0) {
|
||||
// First physical line of the first block gets the marker.
|
||||
lines.push(`${prefix} ${line}`);
|
||||
}
|
||||
else {
|
||||
// Indent every continuation line by the marker width; keep blank
|
||||
// lines blank rather than emitting trailing whitespace.
|
||||
lines.push(line.length ? `${indent}${line}` : "");
|
||||
}
|
||||
});
|
||||
});
|
||||
return lines.join("\n");
|
||||
};
|
||||
const processListItem = (item, prefix) => {
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||
// since for those the visible prefix IS the list marker.
|
||||
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||
};
|
||||
const processTaskItem = (item) => {
|
||||
const checked = item.attrs?.checked || false;
|
||||
const checkbox = checked ? "[x]" : "[ ]";
|
||||
const prefix = `- ${checkbox}`;
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
// An empty task item still needs its checkbox marker; without this guard
|
||||
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||
// checkbox is item content, NOT part of the marker. So the continuation
|
||||
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||
return indentItemChildren(childStrings, prefix, 2);
|
||||
};
|
||||
return processNode(content).trim();
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
/**
|
||||
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||
*
|
||||
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||
* a page through "download -> edit body -> re-upload":
|
||||
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||
* threads.
|
||||
*
|
||||
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||
*
|
||||
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||
* literal occurrence is left in the body untouched.
|
||||
*
|
||||
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||
* the file but are NOT pushed back to the server on import — only the inline
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
*/
|
||||
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
||||
// the JSON text between the markers.
|
||||
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
|
||||
// rather than end-anchoring a single regex (which would mis-capture across a
|
||||
// literal opener that appears earlier in the body).
|
||||
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export function serializeDocmostMarkdown(meta, body, comments) {
|
||||
const metaJson = JSON.stringify(meta);
|
||||
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||
const trimmedBody = (body ?? "").trim();
|
||||
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||
`${trimmedBody}\n\n` +
|
||||
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
|
||||
}
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
* corresponding value is returned as `null` and the whole input is treated as
|
||||
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export function parseDocmostMarkdown(full) {
|
||||
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||
let meta = null;
|
||||
let metaEnd = 0;
|
||||
const metaMatch = normalized.match(META_RE);
|
||||
if (metaMatch) {
|
||||
try {
|
||||
meta = JSON.parse(metaMatch[1]);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
// Body starts right after the matched meta block.
|
||||
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||
}
|
||||
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||
let lastOpenStart = -1;
|
||||
let lastOpenEnd = -1;
|
||||
let m;
|
||||
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||
lastOpenStart = m.index;
|
||||
lastOpenEnd = m.index + m[0].length;
|
||||
}
|
||||
let comments = null;
|
||||
let bodyEnd = normalized.length;
|
||||
if (lastOpenStart !== -1) {
|
||||
const rest = normalized.slice(lastOpenEnd);
|
||||
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||
if (close) {
|
||||
const jsonText = rest.slice(0, close.index);
|
||||
try {
|
||||
comments = JSON.parse(jsonText);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||
}
|
||||
}
|
||||
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||
return { meta, body, comments };
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
/**
|
||||
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
|
||||
* tree by node id.
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
|
||||
* table cells hold their children in `content` just like any other block, so a
|
||||
* single recursive walk reaches them all.
|
||||
*
|
||||
* Every exported function operates on a DEEP CLONE of the input document and
|
||||
* returns the new document. The input doc and any `newNode`/`node` argument are
|
||||
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
import { stripInlineMarkdown } from "./text-normalize.js";
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
return structuredClone(value);
|
||||
}
|
||||
// Fallback for environments without structuredClone.
|
||||
return JSON.parse(JSON.stringify(value));
|
||||
}
|
||||
/** True if `value` is a non-null object (and not an array). */
|
||||
function isObject(value) {
|
||||
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
/** True if `node` carries the given id in `node.attrs.id`. */
|
||||
function matchesId(node, nodeId) {
|
||||
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
|
||||
}
|
||||
/**
|
||||
* Recursively concatenate all text contained in a node.
|
||||
*
|
||||
* Text nodes contribute their `text` string; container nodes contribute the
|
||||
* joined `blockPlainText` of their `content` children. Returns "" for nullish
|
||||
* or non-object inputs.
|
||||
*/
|
||||
export function blockPlainText(node) {
|
||||
if (!isObject(node))
|
||||
return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string") {
|
||||
out += node.text;
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
out += blockPlainText(child);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
|
||||
function truncate(text, n) {
|
||||
return text.length > n ? text.slice(0, n) + "…" : text;
|
||||
}
|
||||
/**
|
||||
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
|
||||
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
|
||||
* table cells — compactness is the point; use `getNodeByRef` to drill into a
|
||||
* specific block.
|
||||
*
|
||||
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
|
||||
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
|
||||
* cell texts as `header`; list blocks (types ending in "List") add `items`.
|
||||
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
|
||||
* a missing or non-object doc/content yields `[]`.
|
||||
*/
|
||||
export function buildOutline(doc) {
|
||||
if (!isObject(doc) || !Array.isArray(doc.content))
|
||||
return [];
|
||||
const out = [];
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const block = doc.content[i];
|
||||
const type = isObject(block) ? block.type : undefined;
|
||||
const entry = {
|
||||
index: i,
|
||||
type,
|
||||
id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
|
||||
firstText: truncate(blockPlainText(block), 100),
|
||||
};
|
||||
if (type === "heading") {
|
||||
entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
|
||||
}
|
||||
else if (type === "table") {
|
||||
const headerRow = block.content?.[0]?.content ?? [];
|
||||
entry.rows = block.content?.length ?? 0;
|
||||
entry.cols = block.content?.[0]?.content?.length ?? 0;
|
||||
entry.header = headerRow.map((cell) => truncate(blockPlainText(cell), 40));
|
||||
}
|
||||
else if (typeof type === "string" && type.endsWith("List")) {
|
||||
entry.items = block.content?.length ?? 0;
|
||||
}
|
||||
out.push(entry);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Resolve a single node by reference and return `{ node, path, type }`, or
|
||||
* `null` when nothing matches.
|
||||
*
|
||||
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
|
||||
* `n` in `doc.content`. This is the only way to address table/tableRow/
|
||||
* tableCell nodes, which carry no `attrs.id`.
|
||||
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
|
||||
* tree with `attrs.id === ref` is returned.
|
||||
*
|
||||
* `path` is the array of child indices from the doc root down to the node
|
||||
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
|
||||
* so callers can mutate it without touching the input doc. Null-safe.
|
||||
*/
|
||||
export function getNodeByRef(doc, ref) {
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
// "#<n>": index into the top-level content array.
|
||||
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
|
||||
if (!isObject(block))
|
||||
return null;
|
||||
return { node: clone(block), path: [index], type: block.type };
|
||||
}
|
||||
// Otherwise: depth-first search for the first node with attrs.id === ref.
|
||||
const search = (node, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const path = [...trail, i];
|
||||
if (matchesId(child, ref)) {
|
||||
return { node: clone(child), path, type: child.type };
|
||||
}
|
||||
const hit = search(child, path);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, []);
|
||||
}
|
||||
/**
|
||||
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
|
||||
* `newNode`, anywhere in the tree (including inside callouts and table cells).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
|
||||
* is the number of nodes substituted. A fresh clone of `newNode` is used for
|
||||
* each match so they do not share references.
|
||||
*/
|
||||
export function replaceNodeById(doc, nodeId, newNode) {
|
||||
const out = clone(doc);
|
||||
let replaced = 0;
|
||||
// Walk a content array, replacing direct matches and recursing into the
|
||||
// (possibly new) children of non-matching nodes.
|
||||
const walkContent = (content) => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, nodeId)) {
|
||||
content[i] = clone(newNode);
|
||||
replaced++;
|
||||
// Do not recurse into a freshly substituted node.
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, replaced };
|
||||
}
|
||||
/**
|
||||
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
|
||||
* array, anywhere in the tree (recursive, including callouts and tables).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
|
||||
* the number of nodes removed.
|
||||
*/
|
||||
export function deleteNodeById(doc, nodeId) {
|
||||
const out = clone(doc);
|
||||
let deleted = 0;
|
||||
// Filter a content array in place, dropping matches and recursing into the
|
||||
// surviving children.
|
||||
const walkContent = (content) => {
|
||||
const kept = [];
|
||||
for (const child of content) {
|
||||
if (matchesId(child, nodeId)) {
|
||||
deleted++;
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
child.content = walkContent(child.content);
|
||||
}
|
||||
kept.push(child);
|
||||
}
|
||||
return kept;
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
out.content = walkContent(out.content);
|
||||
}
|
||||
return { doc: out, deleted };
|
||||
}
|
||||
/**
|
||||
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
|
||||
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
|
||||
* "Unexpected content type" when asked to store an `undefined` attribute value).
|
||||
*
|
||||
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
|
||||
* legitimate JSON-storable values and are preserved. Operates on a clone and
|
||||
* returns it; the input is never mutated. Defensively null-safe like the rest
|
||||
* of the file.
|
||||
*/
|
||||
export function sanitizeForYjs(doc) {
|
||||
const out = clone(doc);
|
||||
// Drop every key whose value is strictly `undefined` from an attrs object.
|
||||
const stripUndefined = (attrs) => {
|
||||
if (!isObject(attrs))
|
||||
return;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
if (attrs[key] === undefined) {
|
||||
delete attrs[key];
|
||||
}
|
||||
}
|
||||
};
|
||||
const walk = (node) => {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
stripUndefined(node.attrs);
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (const mark of node.marks) {
|
||||
if (isObject(mark))
|
||||
stripUndefined(mark.attrs);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
walk(out);
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Diagnostics helper: walk the tree and return a human-readable path string for
|
||||
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
|
||||
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
|
||||
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
|
||||
* every attribute is storable. Null-safe.
|
||||
*/
|
||||
export function findUnstorableAttr(doc) {
|
||||
const isUnstorable = (value) => {
|
||||
if (value === undefined)
|
||||
return "undefined";
|
||||
const t = typeof value;
|
||||
if (t === "function")
|
||||
return "function";
|
||||
if (t === "symbol")
|
||||
return "symbol";
|
||||
if (t === "bigint")
|
||||
return "bigint";
|
||||
return null;
|
||||
};
|
||||
// Check an attrs object; return the offending sub-path or null.
|
||||
const checkAttrs = (attrs, basePath) => {
|
||||
if (!isObject(attrs))
|
||||
return null;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
const kind = isUnstorable(attrs[key]);
|
||||
if (kind != null)
|
||||
return `${basePath}.${key} (${kind})`;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
const walk = (node, path) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
|
||||
if (attrHit != null)
|
||||
return attrHit;
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (let i = 0; i < node.marks.length; i++) {
|
||||
const markHit = checkAttrs(node.marks[i]?.attrs, `${path}.marks[${i}].attrs`);
|
||||
if (markHit != null)
|
||||
return markHit;
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const childHit = walk(node.content[i], `${path}.content[${i}]`);
|
||||
if (childHit != null)
|
||||
return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
// The root doc node carries no useful index, so start the path at "doc".
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
const attrHit = checkAttrs(doc.attrs, "attrs");
|
||||
if (attrHit != null)
|
||||
return attrHit;
|
||||
if (Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const childHit = walk(doc.content[i], `content[${i}]`);
|
||||
if (childHit != null)
|
||||
return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Table structural node types and the container each must live directly inside.
|
||||
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
|
||||
* rather than blindly into the anchor's direct parent (which would corrupt the
|
||||
* table's nesting).
|
||||
*/
|
||||
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
|
||||
const REQUIRED_CONTAINER = {
|
||||
tableRow: "table",
|
||||
tableCell: "tableRow",
|
||||
tableHeader: "tableRow",
|
||||
};
|
||||
/**
|
||||
* Find the index of the first TOP-LEVEL block whose plain text includes the
|
||||
* anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches.
|
||||
*
|
||||
* Two passes preserve "exact wins globally":
|
||||
* - Pass 1: first block containing the verbatim `anchorText`.
|
||||
* - Pass 2 (only if pass 1 found nothing): first block containing the
|
||||
* markdown-stripped anchor, when stripping actually changed it.
|
||||
*/
|
||||
function findAnchorTextIndex(content, anchorText) {
|
||||
if (!Array.isArray(content))
|
||||
return -1;
|
||||
// Pass 1: exact.
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(anchorText))
|
||||
return i;
|
||||
}
|
||||
// Pass 2: markdown-stripped fallback.
|
||||
const a = stripInlineMarkdown(anchorText);
|
||||
if (a !== anchorText && a.length > 0) {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(a))
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
/**
|
||||
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||
* including the matched node). Each chain entry is `{ node, index }` where
|
||||
* `index` is the node's position inside its parent's `content` array (the root
|
||||
* doc has index -1). Returns `null` when the anchor cannot be resolved.
|
||||
*/
|
||||
function findAnchorChain(doc, opts) {
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
// DFS by id anywhere in the tree, accumulating the path.
|
||||
if (opts.anchorNodeId != null) {
|
||||
const targetId = opts.anchorNodeId;
|
||||
const search = (node, index, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
const here = [...trail, { node, index }];
|
||||
if (matchesId(node, targetId))
|
||||
return here;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const hit = search(node.content[i], i, here);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, -1, []);
|
||||
}
|
||||
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||
// Exact match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||
const i = findAnchorTextIndex(doc.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
return [
|
||||
{ node: doc, index: -1 },
|
||||
{ node: doc.content[i], index: i },
|
||||
];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Insert a deep clone of `node` relative to an anchor.
|
||||
*
|
||||
* - position "append": push the node onto the top-level `doc.content`.
|
||||
* - position "before"/"after": locate the anchor and splice the node into the
|
||||
* anchor's parent `content` array immediately before / after it.
|
||||
*
|
||||
* Anchor resolution for before/after:
|
||||
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
|
||||
* anywhere in the tree (recursive);
|
||||
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
|
||||
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor could not be resolved (the doc is returned unchanged
|
||||
* apart from being cloned).
|
||||
*/
|
||||
export function insertNodeRelative(doc, node, opts) {
|
||||
const out = clone(doc);
|
||||
const fresh = clone(node);
|
||||
// Defensive: stay null-safe like the other exports — a missing opts means
|
||||
// there is nothing actionable to do.
|
||||
if (!isObject(opts))
|
||||
return { doc: out, inserted: false };
|
||||
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
|
||||
// "append": top-level push.
|
||||
if (opts.position === "append") {
|
||||
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
|
||||
// top level — appending one would produce invalid nesting.
|
||||
if (isStructural) {
|
||||
throw new Error(`insert_node: cannot append a ${node.type} at the top level; use ` +
|
||||
`position before/after with an anchor inside the target table`);
|
||||
}
|
||||
if (isObject(out)) {
|
||||
if (!Array.isArray(out.content))
|
||||
out.content = [];
|
||||
out.content.push(fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
const offset = opts.position === "after" ? 1 : 0;
|
||||
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
|
||||
// into the nearest enclosing table/tableRow rather than the anchor's direct
|
||||
// parent, so the row/cell lands at the correct level of the table.
|
||||
if (isStructural) {
|
||||
const containerType = REQUIRED_CONTAINER[node.type];
|
||||
const chain = findAnchorChain(out, opts);
|
||||
// Anchor not resolved at all — keep the existing "anchor not found" path.
|
||||
if (chain == null)
|
||||
return { doc: out, inserted: false };
|
||||
// Find the DEEPEST ancestor (including the anchor itself) of the required
|
||||
// container type.
|
||||
let containerIdx = -1;
|
||||
for (let i = chain.length - 1; i >= 0; i--) {
|
||||
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
|
||||
containerIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (containerIdx === -1) {
|
||||
throw new Error(`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
|
||||
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
|
||||
`that lives inside the target table.`);
|
||||
}
|
||||
const container = chain[containerIdx].node;
|
||||
if (!Array.isArray(container.content))
|
||||
container.content = [];
|
||||
if (containerIdx === chain.length - 1) {
|
||||
// The matched container IS the anchor node itself (e.g. anchorText
|
||||
// resolved to the table block): append/prepend within it.
|
||||
const at = opts.position === "after" ? container.content.length : 0;
|
||||
container.content.splice(at, 0, fresh);
|
||||
}
|
||||
else {
|
||||
// The immediate child on the path leading to the anchor is the row/cell
|
||||
// to splice next to.
|
||||
const enclosingChildIndex = chain[containerIdx + 1].index;
|
||||
container.content.splice(enclosingChildIndex + offset, 0, fresh);
|
||||
}
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
// Resolve by id anywhere in the tree: splice into the parent content array.
|
||||
if (opts.anchorNodeId != null) {
|
||||
let inserted = false;
|
||||
const walkContent = (content) => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, opts.anchorNodeId)) {
|
||||
content.splice(i + offset, 0, fresh);
|
||||
inserted = true;
|
||||
return;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
if (inserted)
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, inserted };
|
||||
}
|
||||
// Resolve by text: only top-level doc.content blocks are scanned. Exact
|
||||
// match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||
const i = findAnchorTextIndex(out.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
out.content.splice(i + offset, 0, fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
// ===========================================================================
|
||||
// Table editing helpers
|
||||
//
|
||||
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
|
||||
// table -> { type:"table", content:[tableRow...] }
|
||||
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
|
||||
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
|
||||
// content:[paragraph...] }
|
||||
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
|
||||
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
|
||||
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
|
||||
// of the input doc (via `clone`) and never mutate their inputs.
|
||||
// ===========================================================================
|
||||
/**
|
||||
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
|
||||
* `makeFreshId` so generated paragraph ids never collide with existing ones.
|
||||
*/
|
||||
function collectIds(node, used) {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
|
||||
used.add(node.attrs.id);
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectIds(child, used);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Fresh-id generator: returns a random Docmost-style id (12 chars from
|
||||
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
|
||||
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
|
||||
* exact string, so randomness is fine — and unlike a module-local counter it
|
||||
* needs no reset and cannot become predictable across calls.
|
||||
*/
|
||||
function makeFreshId(used) {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
let id;
|
||||
do {
|
||||
id = "";
|
||||
for (let i = 0; i < 12; i++) {
|
||||
id += alphabet[Math.floor(Math.random() * alphabet.length)];
|
||||
}
|
||||
} while (used.has(id) || id === "");
|
||||
used.add(id);
|
||||
return id;
|
||||
}
|
||||
/**
|
||||
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
|
||||
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
|
||||
* its index path. Returns null when no table matches.
|
||||
*
|
||||
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
|
||||
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
|
||||
* ancestor chain to the nearest `type === "table"` ancestor.
|
||||
*/
|
||||
function locateTable(rootClone, tableRef) {
|
||||
if (!isObject(rootClone))
|
||||
return null;
|
||||
// "#<n>": index into the top-level content array; must be a table.
|
||||
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(rootClone.content)
|
||||
? rootClone.content[index]
|
||||
: undefined;
|
||||
if (isObject(block) && block.type === "table") {
|
||||
return { table: block, path: [index] };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
|
||||
// climb to the nearest enclosing table.
|
||||
const search = (node, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const here = [...trail, { node: child, index: i }];
|
||||
if (matchesId(child, tableRef)) {
|
||||
// Walk UP to the nearest table ancestor (including the match itself).
|
||||
for (let j = here.length - 1; j >= 0; j--) {
|
||||
if (isObject(here[j].node) && here[j].node.type === "table") {
|
||||
return {
|
||||
table: here[j].node,
|
||||
path: here.slice(0, j + 1).map((e) => e.index),
|
||||
};
|
||||
}
|
||||
}
|
||||
return null; // id found but no enclosing table
|
||||
}
|
||||
const hit = search(child, here);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(rootClone, []);
|
||||
}
|
||||
/** Build the plain-text → single-paragraph cell content used by all writers. */
|
||||
function makeCellParagraph(id, text) {
|
||||
return {
|
||||
type: "paragraph",
|
||||
attrs: { id, indent: 0 },
|
||||
// Empty string → a paragraph with an empty content array.
|
||||
content: text ? [{ type: "text", text }] : [],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
|
||||
*
|
||||
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
|
||||
* Tables may be ragged (rows of differing length), so `cols` reflects only
|
||||
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
|
||||
* width.
|
||||
* - `cells`: `string[][]` of each cell's `blockPlainText`.
|
||||
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
|
||||
* so callers can `patch_node` a cell for rich-formatted edits.
|
||||
* - `path`: index path of the table within the doc.
|
||||
*/
|
||||
export function readTable(doc, tableRef) {
|
||||
const root = clone(doc);
|
||||
const located = locateTable(root, tableRef);
|
||||
if (located == null)
|
||||
return null;
|
||||
const { table, path } = located;
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const cols = rowNodes[0]?.content?.length ?? 0;
|
||||
const cells = [];
|
||||
const cellIds = [];
|
||||
for (const rowNode of rowNodes) {
|
||||
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
|
||||
const rowText = [];
|
||||
const rowIds = [];
|
||||
for (const cellNode of cellNodes) {
|
||||
rowText.push(blockPlainText(cellNode));
|
||||
// The cell's first paragraph carries the id used for patch_node.
|
||||
const firstPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
const id = isObject(firstPara) && isObject(firstPara.attrs)
|
||||
? firstPara.attrs.id ?? null
|
||||
: null;
|
||||
rowIds.push(id);
|
||||
}
|
||||
cells.push(rowText);
|
||||
cellIds.push(rowIds);
|
||||
}
|
||||
return { rows, cols, cells, cellIds, path };
|
||||
}
|
||||
/**
|
||||
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
|
||||
*
|
||||
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
|
||||
* MORE cells than columns throws. Each new cell copies `colwidth` for its
|
||||
* column from the header row when present, gets a fresh-id paragraph, and a
|
||||
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
|
||||
* the row there; otherwise the row is appended at the end.
|
||||
*/
|
||||
export function insertTableRow(doc, tableRef, cells, index) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, inserted: false };
|
||||
const { table } = located;
|
||||
if (!Array.isArray(table.content))
|
||||
table.content = [];
|
||||
const rows = table.content.length;
|
||||
const headerRow = table.content[0];
|
||||
const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
|
||||
// Column count is the WIDEST existing row, so the guard below stays
|
||||
// meaningful for ragged tables and the new row matches the table's width.
|
||||
// Fall back to the supplied cell count only when the table has no rows.
|
||||
let colCount = 0;
|
||||
for (const r of table.content) {
|
||||
if (isObject(r) && Array.isArray(r.content))
|
||||
colCount = Math.max(colCount, r.content.length);
|
||||
}
|
||||
if (colCount === 0)
|
||||
colCount = Array.isArray(cells) ? cells.length : 0;
|
||||
if (Array.isArray(cells) && cells.length > colCount) {
|
||||
throw new Error(`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`);
|
||||
}
|
||||
// Resolve the landing index up front so the cell-type decision and the splice
|
||||
// below agree: a valid integer in [0, rows] splices there, else we append.
|
||||
const landingIndex = typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
|
||||
? index
|
||||
: rows;
|
||||
// Seed the id generator with every id already in the doc so the new cell
|
||||
// paragraph ids are unique within the whole document.
|
||||
const used = new Set();
|
||||
collectIds(out, used);
|
||||
const newCells = [];
|
||||
for (let i = 0; i < colCount; i++) {
|
||||
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
|
||||
const attrs = { colspan: 1, rowspan: 1 };
|
||||
// Copy this column's colwidth from the header row's cell when present.
|
||||
const colwidth = headerCells[i]?.attrs?.colwidth;
|
||||
if (colwidth !== undefined)
|
||||
attrs.colwidth = colwidth;
|
||||
// A row landing at index 0 becomes the new header row, so inherit the
|
||||
// current header cell's type per column (Docmost uses "tableHeader" there);
|
||||
// every other position is a plain data cell.
|
||||
const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
|
||||
newCells.push({
|
||||
type: cellType,
|
||||
attrs,
|
||||
content: [makeCellParagraph(makeFreshId(used), text)],
|
||||
});
|
||||
}
|
||||
const newRow = { type: "tableRow", content: newCells };
|
||||
// Splice at the resolved landing index (append when index was omitted/invalid).
|
||||
table.content.splice(landingIndex, 0, newRow);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
/**
|
||||
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
|
||||
* `deleted` is false only when the table cannot be located. Throws on an
|
||||
* out-of-range index, and refuses to delete the table's only row.
|
||||
*/
|
||||
export function deleteTableRow(doc, tableRef, index) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, deleted: false };
|
||||
const { table } = located;
|
||||
if (!Array.isArray(table.content))
|
||||
table.content = [];
|
||||
const rows = table.content.length;
|
||||
if (!Number.isInteger(index) || index < 0 || index >= rows) {
|
||||
throw new Error(`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`);
|
||||
}
|
||||
if (rows <= 1) {
|
||||
throw new Error("table_delete_row: refusing to delete the only row of the table");
|
||||
}
|
||||
table.content.splice(index, 1);
|
||||
return { doc: out, deleted: true };
|
||||
}
|
||||
/**
|
||||
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
|
||||
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
|
||||
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
|
||||
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
|
||||
* that reuses the cell's existing first-paragraph id when present, else a fresh
|
||||
* one.
|
||||
*/
|
||||
export function updateTableCell(doc, tableRef, row, col, text) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, updated: false };
|
||||
const { table } = located;
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const rowNode = rowNodes[row];
|
||||
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
|
||||
? rowNode.content.length
|
||||
: 0;
|
||||
if (!Number.isInteger(row) ||
|
||||
row < 0 ||
|
||||
row >= rows ||
|
||||
!Number.isInteger(col) ||
|
||||
col < 0 ||
|
||||
col >= cols) {
|
||||
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
|
||||
}
|
||||
const cellNode = rowNode.content[col];
|
||||
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
|
||||
const existingPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
let id = isObject(existingPara) && isObject(existingPara.attrs)
|
||||
? existingPara.attrs.id
|
||||
: undefined;
|
||||
if (typeof id !== "string" || id.length === 0) {
|
||||
const used = new Set();
|
||||
collectIds(out, used);
|
||||
id = makeFreshId(used);
|
||||
}
|
||||
cellNode.content = [makeCellParagraph(id, text)];
|
||||
return { doc: out, updated: true };
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* Per-page async mutex.
|
||||
*
|
||||
* Content writes over the collaboration websocket must never overlap for the
|
||||
* same page: two concurrent full-document replaces would race on the live Yjs
|
||||
* fragment. We serialize them with a per-pageId promise chain — each new
|
||||
* operation waits for the previous one on that page to settle (success or
|
||||
* failure) before it runs. Different pages never block each other.
|
||||
*/
|
||||
const chains = new Map();
|
||||
// The returned promise carries the real result/rejection of `fn` and MUST be
|
||||
// awaited/handled by the caller; only the internal chaining tail swallows
|
||||
// errors (purely to gate ordering).
|
||||
export function withPageLock(pageId, fn) {
|
||||
// Wait for the previous op on this page; swallow its error so a failure does
|
||||
// not poison the queue for the next caller.
|
||||
const prev = (chains.get(pageId) ?? Promise.resolve()).catch(() => { });
|
||||
const run = prev.then(fn);
|
||||
// The tail used for chaining must also swallow errors (it only gates order).
|
||||
const tail = run.catch(() => { });
|
||||
chains.set(pageId, tail);
|
||||
// Drop the map entry once this op is the tail and has settled, to avoid an
|
||||
// unbounded map of resolved promises.
|
||||
tail.then(() => {
|
||||
if (chains.get(pageId) === tail) {
|
||||
chains.delete(pageId);
|
||||
}
|
||||
});
|
||||
// Callers get the real result/rejection of fn.
|
||||
return run;
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
// The model sometimes serializes a ProseMirror node arg as a JSON string
|
||||
// instead of an object. Normalize: parse a string to an object (throwing on
|
||||
// invalid JSON), pass an object through unchanged. Shared by patch_node /
|
||||
// insert_node (and the analogous update_page_json content parsing).
|
||||
export function parseNodeArg(node, errMsg = "node was a string but not valid JSON") {
|
||||
if (typeof node === "string") {
|
||||
try {
|
||||
return JSON.parse(node);
|
||||
}
|
||||
catch {
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
}
|
||||
return node;
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
/**
|
||||
* Locator normalization: strip inline markdown wrappers and trailing
|
||||
* decoration from a LOCATOR string so a find/anchor that the model wrote with
|
||||
* markdown (or a stray emoji) can still match the document's plain text.
|
||||
*
|
||||
* This is used ONLY as a fallback for LOCATING (after an exact match fails);
|
||||
* it is never applied to replacement text or inserted node content, so no
|
||||
* formatting is ever lost.
|
||||
*/
|
||||
/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */
|
||||
const MAX_PASSES = 8;
|
||||
/**
|
||||
* Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so
|
||||
* `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is
|
||||
* non-greedy and capture group 1 is the inner text. Applied repeatedly until
|
||||
* the string stops changing (nested wrappers like `**_x_**`).
|
||||
*/
|
||||
const WRAPPER_PATTERNS = [
|
||||
/\*\*([^*]+?)\*\*/g, // **x**
|
||||
/__([^_]+?)__/g, // __x__
|
||||
/~~([^~]+?)~~/g, // ~~x~~
|
||||
/\*([^*]+?)\*/g, // *x*
|
||||
/_([^_]+?)_/g, // _x_
|
||||
/``([^`]+?)``/g, // ``x``
|
||||
/`([^`]+?)`/g, // `x`
|
||||
];
|
||||
/** Links/images -> their visible text. `!?` covers both `[t](u)` and ``. */
|
||||
const LINK_IMAGE_RE = /!?\[([^\]]*)\]\([^)]*\)/g;
|
||||
/**
|
||||
* Apply ONLY the two balanced/link passes shared by both normalizers: first
|
||||
* collapse links/images to their visible text, then collapse balanced inline
|
||||
* wrappers repeatedly until stable. Does NOT trim decoration, does NOT guard
|
||||
* against an empty result — it returns exactly the transformed string.
|
||||
*/
|
||||
function stripWrappersAndLinks(s) {
|
||||
// 1. Links/images -> their visible text.
|
||||
let out = s.replace(LINK_IMAGE_RE, "$1");
|
||||
// 2. Strip balanced wrappers, repeating until the string is stable so nested
|
||||
// wrappers (`**_x_**`) and adjacent runs both collapse.
|
||||
for (let pass = 0; pass < MAX_PASSES; pass++) {
|
||||
const before = out;
|
||||
for (const re of WRAPPER_PATTERNS) {
|
||||
out = out.replace(re, "$1");
|
||||
}
|
||||
if (out === before)
|
||||
break;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* STRICT formatting detector — distinct from the lenient locator
|
||||
* normalization below. It strips ONLY what unambiguously is markdown markup:
|
||||
* 1. links/images `[text](url)` -> `text`, `` -> `alt`, and
|
||||
* 2. balanced inline `**`/`__`/`~~`/`*`/`_`/`` ` `` wrappers (repeat-until-stable),
|
||||
* and DELIBERATELY does NOT trim leading/trailing whitespace, emoji, or lone
|
||||
* marker chars (the lenient extras `stripInlineMarkdown` does in its step 3).
|
||||
*
|
||||
* It exists ONLY to recognize formatting-vs-plain INTENT in `applyTextEdits`
|
||||
* (deciding whether find/replace differ purely by markdown markers). Because it
|
||||
* skips the lenient trimming, ordinary plain-text edits are NOT misread as
|
||||
* formatting: a trailing-space trim, snake_case (`my_var_name`), math (`2 * 3`),
|
||||
* and identifiers/URLs with underscores all stay untouched here (their `_x_` /
|
||||
* `*x*` runs are only collapsed when actually balanced, and even then they are
|
||||
* compared symmetrically, so plain text never collapses to a different string).
|
||||
*
|
||||
* Do NOT use this for LOCATING — the locator fallback must keep using the
|
||||
* lenient `stripInlineMarkdown` (it trims stray decoration so a find still
|
||||
* matches the document's plain text).
|
||||
*/
|
||||
export function stripBalancedWrappers(s) {
|
||||
if (typeof s !== "string" || s.length === 0)
|
||||
return s;
|
||||
return stripWrappersAndLinks(s);
|
||||
}
|
||||
/**
|
||||
* Conservatively strip inline markdown from a locator string.
|
||||
*
|
||||
* Deterministic, order-fixed steps:
|
||||
* 1. Links/images: `[text](url)` -> `text`, `` -> `alt`.
|
||||
* 2. Balanced inline wrappers (strong before emphasis, code, strikethrough),
|
||||
* applied repeatedly until stable for nested cases.
|
||||
* 3. Trim leading/trailing decoration only: whitespace, leftover marker chars
|
||||
* (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,`
|
||||
* etc.) are NEVER trimmed.
|
||||
*
|
||||
* If the result is empty (e.g. the input was only markers like `***`), the
|
||||
* ORIGINAL string is returned so a locator can never normalize down to "" and
|
||||
* match everything.
|
||||
*/
|
||||
export function stripInlineMarkdown(s) {
|
||||
if (typeof s !== "string" || s.length === 0)
|
||||
return s;
|
||||
// 1 + 2. Shared link/image and balanced-wrapper passes.
|
||||
let out = stripWrappersAndLinks(s);
|
||||
// 3. Trim leading/trailing decoration: whitespace, leftover markdown markers,
|
||||
// and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the
|
||||
// regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT
|
||||
// Extended_Pictographic). The `u` flag enables the Unicode property escape.
|
||||
// Anchored runs only — interior text and sentence punctuation are untouched.
|
||||
const DECORATION = "[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+";
|
||||
out = out
|
||||
.replace(new RegExp("^" + DECORATION, "u"), "")
|
||||
.replace(new RegExp(DECORATION + "$", "u"), "");
|
||||
// 4. Never normalize a locator down to nothing.
|
||||
if (out.length === 0)
|
||||
return s;
|
||||
return out;
|
||||
}
|
||||
@@ -1,473 +0,0 @@
|
||||
/**
|
||||
* Pure, network-free transform primitives for a ProseMirror/TipTap document
|
||||
* tree, plus one higher-level orchestration (commentsToFootnotes).
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; callouts, tables, lists all hold their children in
|
||||
* `content`, so a single recursive walk reaches them all.
|
||||
*
|
||||
* Conventions (matching node-ops.ts):
|
||||
* - functions that produce a new document deep-clone their input and return a
|
||||
* `{ doc, ... }` object; the caller's objects are never mutated.
|
||||
* - functions are defensively null-safe.
|
||||
* - `marks` arrays are preserved verbatim when fragments are split/reordered.
|
||||
*/
|
||||
import { blockPlainText } from "./node-ops.js";
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
return structuredClone(value);
|
||||
}
|
||||
// Fallback for environments without structuredClone.
|
||||
return JSON.parse(JSON.stringify(value));
|
||||
}
|
||||
/** True if `value` is a non-null object (and not an array). */
|
||||
function isObject(value) {
|
||||
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
/**
|
||||
* Plain text of a node (re-export of node-ops' blockPlainText so transform
|
||||
* authors have a single import surface). Recurses through nested content.
|
||||
*/
|
||||
export function blockText(node) {
|
||||
return blockPlainText(node);
|
||||
}
|
||||
/**
|
||||
* Depth-first visit of every node in the tree, including the root and the
|
||||
* nested content of callouts, tables, lists, etc. `fn` is called once per node.
|
||||
* Null-safe: a nullish or non-object node is ignored.
|
||||
*/
|
||||
export function walk(node, fn) {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
fn(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
walk(child, fn);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
|
||||
* Works even when the node carries no `attrs.id` (it searches the raw tree, not
|
||||
* an id index). Returns the live node reference inside `doc` (NOT a clone), or
|
||||
* null when nothing matches. Typical use: `getList(doc, n => n.type ===
|
||||
* "orderedList")`.
|
||||
*/
|
||||
export function getList(doc, predicate) {
|
||||
let found = null;
|
||||
walk(doc, (node) => {
|
||||
if (found == null && predicate(node)) {
|
||||
found = node;
|
||||
}
|
||||
});
|
||||
return found;
|
||||
}
|
||||
/**
|
||||
* Insert `marker` as a PLAIN (unmarked) text run right after the first
|
||||
* occurrence of `anchor`.
|
||||
*
|
||||
* The text run that contains the END of the anchor is SPLIT at the anchor end,
|
||||
* so all existing marks (links, bold, ...) on the surrounding text are
|
||||
* preserved, while the inserted marker run carries NO marks. The marker is
|
||||
* inserted as a leading-space-padded run (`" " + marker`) so it visually
|
||||
* separates from the preceding word.
|
||||
*
|
||||
* The anchor is matched against the concatenated plain text of each top-level
|
||||
* block (so an anchor that spans several text/mark runs still matches). The
|
||||
* insertion happens inside the inline content array that holds the anchor's
|
||||
* final character.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor text was not found in any in-scope block.
|
||||
*/
|
||||
export function insertMarkerAfter(doc, anchor, marker, opts = {}) {
|
||||
const out = clone(doc);
|
||||
if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
const limit = typeof opts.beforeBlock === "number"
|
||||
? Math.min(opts.beforeBlock, out.content.length)
|
||||
: out.content.length;
|
||||
for (let b = 0; b < limit; b++) {
|
||||
const block = out.content[b];
|
||||
if (!isObject(block))
|
||||
continue;
|
||||
// Quick reject: skip blocks whose plain text cannot contain the anchor.
|
||||
if (!blockPlainText(block).includes(anchor))
|
||||
continue;
|
||||
// Walk the inline content arrays inside this block, tracking a running
|
||||
// character offset so we can locate the inline array + text run that holds
|
||||
// the END of the anchor's first occurrence.
|
||||
let inserted = false;
|
||||
let offset = 0; // characters of plain text seen so far in this block
|
||||
const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
|
||||
// Recurse into inline-bearing containers (paragraph, heading, table cell,
|
||||
// callout child paragraphs, ...). We only split inside an array of inline
|
||||
// nodes (text/inline atoms); the FIRST array whose cumulative range covers
|
||||
// anchorEnd receives the split + marker.
|
||||
const visit = (container) => {
|
||||
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
|
||||
return;
|
||||
}
|
||||
const inline = container.content;
|
||||
// Detect whether this array is an inline array (contains text nodes).
|
||||
const hasText = inline.some((n) => isObject(n) && n.type === "text");
|
||||
if (hasText) {
|
||||
for (let i = 0; i < inline.length; i++) {
|
||||
const n = inline[i];
|
||||
const len = isObject(n) ? blockPlainText(n).length : 0;
|
||||
const runStart = offset;
|
||||
const runEnd = offset + len;
|
||||
// The run that contains the anchor end (anchorEnd lands inside this
|
||||
// run, i.e. runStart < anchorEnd <= runEnd) is the split point.
|
||||
if (!inserted &&
|
||||
isObject(n) &&
|
||||
n.type === "text" &&
|
||||
typeof n.text === "string" &&
|
||||
anchorEnd > runStart &&
|
||||
anchorEnd <= runEnd) {
|
||||
const cut = anchorEnd - runStart; // split index within this text run
|
||||
const before = n.text.slice(0, cut);
|
||||
const after = n.text.slice(cut);
|
||||
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||
const parts = [];
|
||||
if (before.length > 0) {
|
||||
parts.push({ ...n, text: before, marks: [...marks] });
|
||||
}
|
||||
// Marker is a PLAIN run: no marks copied. Leading space separates it.
|
||||
parts.push({ type: "text", text: " " + marker });
|
||||
if (after.length > 0) {
|
||||
parts.push({ ...n, text: after, marks: [...marks] });
|
||||
}
|
||||
inline.splice(i, 1, ...parts);
|
||||
inserted = true;
|
||||
return;
|
||||
}
|
||||
offset = runEnd;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Not an inline array: recurse into children (e.g. callout -> paragraph).
|
||||
for (const child of inline) {
|
||||
visit(child);
|
||||
if (inserted)
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
visit(block);
|
||||
if (inserted) {
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
// If the block matched in plain text but we could not split (e.g. anchor
|
||||
// lands inside an atom), fall through to the next block rather than failing.
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
/**
|
||||
* In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
|
||||
*
|
||||
* Docmost translations use a callout that states the footnote range, e.g.
|
||||
* "[1]…[5]". When the number of notes changes, this rewrites the trailing
|
||||
* number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
|
||||
* callout's text nodes to `[1]…[n]`. Operates on a clone; returns
|
||||
* `{ doc, changed }` where `changed` is the number of text nodes rewritten.
|
||||
*/
|
||||
export function setCalloutRange(doc, n) {
|
||||
const out = clone(doc);
|
||||
let changed = 0;
|
||||
// Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
|
||||
const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
|
||||
walk(out, (node) => {
|
||||
if (node.type === "callout") {
|
||||
walk(node, (inner) => {
|
||||
if (inner.type === "text" &&
|
||||
typeof inner.text === "string" &&
|
||||
rangeRe.test(inner.text)) {
|
||||
rangeRe.lastIndex = 0;
|
||||
inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
|
||||
changed++;
|
||||
}
|
||||
rangeRe.lastIndex = 0;
|
||||
});
|
||||
}
|
||||
});
|
||||
return { doc: out, changed };
|
||||
}
|
||||
/**
|
||||
* Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
|
||||
* a base36 random string is sufficient here (uniqueness within one document).
|
||||
*/
|
||||
function freshId() {
|
||||
return (Math.random().toString(36).slice(2, 12) +
|
||||
Math.random().toString(36).slice(2, 6));
|
||||
}
|
||||
/**
|
||||
* Wrap inline ProseMirror nodes in a list item:
|
||||
* { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
|
||||
* with a fresh random block id on the paragraph. The inline nodes are cloned so
|
||||
* the result shares no references with the caller's input.
|
||||
*/
|
||||
export function noteItem(inlineNodes) {
|
||||
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
|
||||
return {
|
||||
type: "listItem",
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
attrs: { id: freshId() },
|
||||
content,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
|
||||
* { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
|
||||
* (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
|
||||
*/
|
||||
export function footnoteDefinition(id, inlineNodes) {
|
||||
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
|
||||
return {
|
||||
type: "footnoteDefinition",
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
|
||||
* an inline content array with a real `footnoteReference` node, in reading
|
||||
* order. `onMarker` is called for each replaced marker (with the original `[N]`
|
||||
* number or the placeholder index) and returns the fresh footnote id to attach
|
||||
* to the inserted node. Mutates `inline` in place.
|
||||
*/
|
||||
function replaceMarkersWithReferences(inline, onMarker) {
|
||||
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
|
||||
for (let i = 0; i < inline.length; i++) {
|
||||
const n = inline[i];
|
||||
if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
|
||||
continue;
|
||||
}
|
||||
if (!re.test(n.text))
|
||||
continue;
|
||||
re.lastIndex = 0;
|
||||
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||
const parts = [];
|
||||
let last = 0;
|
||||
let m;
|
||||
while ((m = re.exec(n.text)) !== null) {
|
||||
if (m.index > last) {
|
||||
parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
|
||||
}
|
||||
const oldNum = m[1] != null ? Number(m[1]) : undefined;
|
||||
const phIdx = m[2] != null ? Number(m[2]) : undefined;
|
||||
const fnId = onMarker({ oldNum, phIdx });
|
||||
parts.push({ type: "footnoteReference", attrs: { id: fnId } });
|
||||
last = m.index + m[0].length;
|
||||
}
|
||||
if (last < n.text.length) {
|
||||
parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
|
||||
}
|
||||
// Drop any zero-length text runs the slicing may have produced.
|
||||
const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
|
||||
inline.splice(i, 1, ...cleaned);
|
||||
i += cleaned.length - 1;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
|
||||
* ProseMirror nodes.
|
||||
*
|
||||
* A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
|
||||
* stripped first. Then a minimal bold-split is applied: a leading
|
||||
* `**bold lead**` run becomes a text node with a bold mark, and the remainder
|
||||
* becomes a plain text node. This keeps the conversion synchronous (the
|
||||
* transform sandbox runs synchronously) and dependency-free; the existing
|
||||
* async markdownToProseMirror is intentionally NOT used here.
|
||||
*/
|
||||
export function mdToInlineNodes(markdown) {
|
||||
let md = typeof markdown === "string" ? markdown : "";
|
||||
// Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
|
||||
md = md.replace(/^\s*комментарий\s*:\s*/i, "");
|
||||
md = md.replace(/^\s*\d+\.\s+/, "");
|
||||
md = md.trim();
|
||||
if (md === "")
|
||||
return [];
|
||||
const nodes = [];
|
||||
// Leading bold lead: **...** at the very start.
|
||||
const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
|
||||
if (leadMatch) {
|
||||
const leadText = leadMatch[1];
|
||||
nodes.push({
|
||||
type: "text",
|
||||
text: leadText,
|
||||
marks: [{ type: "bold" }],
|
||||
});
|
||||
const rest = md.slice(leadMatch[0].length);
|
||||
if (rest.length > 0) {
|
||||
// Preserve the separating space that followed the bold lead.
|
||||
const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
|
||||
const spacing = sep ? sep[1] : "";
|
||||
nodes.push({ type: "text", text: spacing + rest });
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
// No bold lead: emit the whole thing as a single plain text node, with any
|
||||
// remaining **bold** spans split out inline.
|
||||
return splitInlineBold(md);
|
||||
}
|
||||
/**
|
||||
* Split a string with inline `**bold**` spans into text nodes, bolding the
|
||||
* spans. Used as the no-lead fallback in mdToInlineNodes.
|
||||
*/
|
||||
function splitInlineBold(text) {
|
||||
const nodes = [];
|
||||
const re = /\*\*([^*]+)\*\*/g;
|
||||
let last = 0;
|
||||
let m;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
if (m.index > last) {
|
||||
nodes.push({ type: "text", text: text.slice(last, m.index) });
|
||||
}
|
||||
nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
|
||||
last = m.index + m[0].length;
|
||||
}
|
||||
if (last < text.length) {
|
||||
nodes.push({ type: "text", text: text.slice(last) });
|
||||
}
|
||||
return nodes.length > 0 ? nodes : [{ type: "text", text }];
|
||||
}
|
||||
/**
|
||||
* Turn inline comments into numbered footnotes.
|
||||
*
|
||||
* For each inline comment that carries a `selection`:
|
||||
* 1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
|
||||
* sentinel) right after the selection text in the BODY (before the
|
||||
* notes heading);
|
||||
* 2. build a note list item from the comment's markdown content.
|
||||
*
|
||||
* Then RENUMBER every footnote marker in the body by reading order: existing
|
||||
* `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
|
||||
* sequential `[seq]`, and the notes orderedList is reordered so each note lines
|
||||
* up with its marker's reading-order position. Finally the disclaimer callout
|
||||
* range is synced to the new note count.
|
||||
*
|
||||
* Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
|
||||
* were successfully anchored (their selection was found and a placeholder
|
||||
* inserted). Operates on a clone of `doc`.
|
||||
*/
|
||||
export function commentsToFootnotes(doc, comments, opts = {}) {
|
||||
let working = clone(doc);
|
||||
const notesHeading = opts.notesHeading ?? "Примечания переводчика";
|
||||
const top = Array.isArray(working.content) ? working.content : [];
|
||||
const notesIdx = top.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
|
||||
if (notesIdx < 0) {
|
||||
throw new Error(`heading "${notesHeading}" not found`);
|
||||
}
|
||||
// The notes orderedList lives at or after the heading.
|
||||
const notesList = top
|
||||
.slice(notesIdx)
|
||||
.find((n) => isObject(n) && n.type === "orderedList");
|
||||
if (!notesList) {
|
||||
throw new Error("notes orderedList not found");
|
||||
}
|
||||
const consumed = [];
|
||||
const noteInlineByPh = new Map();
|
||||
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
|
||||
if (!c || !c.selection)
|
||||
return;
|
||||
// Collision-proof sentinel delimited by NUL control chars, which never occur
|
||||
// in real Docmost prose - so the marker regex cannot mistake any body text
|
||||
// (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
|
||||
// transient: the placeholder is inserted here and replaced by a
|
||||
// footnoteReference node below; it never persists in a returned document.
|
||||
const ph = `\u0000FN${i}\u0000`;
|
||||
// insertMarkerAfter returns a NEW cloned doc; reassign `working`.
|
||||
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
|
||||
beforeBlock: notesIdx,
|
||||
});
|
||||
if (!r.inserted)
|
||||
return;
|
||||
working = r.doc;
|
||||
noteInlineByPh.set(ph, mdToInlineNodes(c.content));
|
||||
consumed.push(c.id);
|
||||
});
|
||||
// Re-resolve references into the (possibly re-cloned) working doc.
|
||||
const top2 = Array.isArray(working.content) ? working.content : [];
|
||||
const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
|
||||
const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
|
||||
const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
|
||||
if (!notesList2) {
|
||||
throw new Error("notes orderedList not found");
|
||||
}
|
||||
// Inline content of each existing note (listItem -> paragraph -> inline).
|
||||
const oldNoteInline = (Array.isArray(notesList2.content)
|
||||
? notesList2.content
|
||||
: []).map((item) => {
|
||||
const para = isObject(item) && Array.isArray(item.content)
|
||||
? item.content.find((c) => isObject(c) && c.type === "paragraph")
|
||||
: null;
|
||||
return para && Array.isArray(para.content) ? para.content : [];
|
||||
});
|
||||
// Walk the body in reading order, turning each "[N]" / placeholder marker into
|
||||
// a real footnoteReference node and collecting its definition inline content.
|
||||
const definitions = [];
|
||||
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
|
||||
// Recursively visit inline arrays inside a block (paragraph, heading, callout
|
||||
// child paragraphs, table cells, ...), preserving document reading order.
|
||||
const visitInlineArrays = (container) => {
|
||||
if (!isObject(container) || !Array.isArray(container.content))
|
||||
return;
|
||||
const hasText = container.content.some((n) => isObject(n) && n.type === "text");
|
||||
if (hasText) {
|
||||
replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
|
||||
const fnId = freshId();
|
||||
if (oldNum != null) {
|
||||
const inline = oldNoteInline[oldNum - 1];
|
||||
// Every existing body marker MUST map to a real note. An out-of-range
|
||||
// marker means the document is internally inconsistent; fail loudly.
|
||||
if (inline === undefined) {
|
||||
throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
|
||||
}
|
||||
definitions.push(footnoteDefinition(fnId, inline));
|
||||
}
|
||||
else {
|
||||
const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
|
||||
definitions.push(footnoteDefinition(fnId, inline));
|
||||
}
|
||||
return fnId;
|
||||
});
|
||||
}
|
||||
else {
|
||||
for (const child of container.content)
|
||||
visitInlineArrays(child);
|
||||
}
|
||||
};
|
||||
const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
|
||||
for (let i = 0; i < notesBoundary; i++) {
|
||||
// Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
|
||||
// marker and is synced separately by setCalloutRange.
|
||||
if (isObject(top2[i]) &&
|
||||
top2[i].type === "callout" &&
|
||||
disclaimerRangeRe.test(blockText(top2[i]))) {
|
||||
continue;
|
||||
}
|
||||
visitInlineArrays(top2[i]);
|
||||
}
|
||||
// Replace the old orderedList with a real footnotesList of the collected
|
||||
// definitions (reading order). If there are no definitions, drop the list.
|
||||
if (definitions.length > 0) {
|
||||
top2[oldListIndex] = {
|
||||
type: "footnotesList",
|
||||
content: definitions,
|
||||
};
|
||||
}
|
||||
else {
|
||||
top2.splice(oldListIndex, 1);
|
||||
}
|
||||
// Sync the disclaimer callout range to the new note count.
|
||||
const synced = setCalloutRange(working, definitions.length);
|
||||
return { doc: synced.doc, consumed };
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
/**
|
||||
* Pure tree-builder: turn a flat array of sidebar-style page nodes (as produced
|
||||
* by `enumerateSpacePages`) into a nested tree.
|
||||
*
|
||||
* Input: a flat array of nodes. Each node is expected to carry at least
|
||||
* { id, slugId, title, position, parentPageId } (extra fields are ignored).
|
||||
*
|
||||
* Output: an array of ROOT nodes, each shaped as
|
||||
* { id, slugId, title, children? }
|
||||
* where `children` is the array of child nodes (same shape, recursively). The
|
||||
* `children` key is OMITTED entirely when a node has no children — consistent
|
||||
* with how `filterPage` omits an empty `subpages` array — to keep the payload
|
||||
* lean (nesting alone conveys the structure; parentPageId/position/hasChildren
|
||||
* are intentionally dropped from the output).
|
||||
*
|
||||
* Linking rule: a node is attached as a child of `parentPageId` only when that
|
||||
* parent id is actually present in the input. Otherwise — including a null /
|
||||
* undefined `parentPageId`, or a parent that was capped out of the bounded walk
|
||||
* — the node is promoted to a ROOT. So "orphan whose parent is missing" is the
|
||||
* defined behavior: it surfaces at the top level rather than disappearing.
|
||||
*
|
||||
* Ordering rule: the roots array and every `children` array are sorted ascending
|
||||
* by the node's `position` string. The comparator is a plain code-unit (byte)
|
||||
* comparison — NOT localeCompare — because the server orders sidebar pages by
|
||||
* `collate "C"` (byte order), which a raw `<`/`>` compare approximates for the
|
||||
* fractional-index ASCII keys (e.g. "a0", "a1"). Nodes with a missing/undefined
|
||||
* `position` sort last.
|
||||
*
|
||||
* Pure: no I/O, no network, deterministic.
|
||||
*/
|
||||
export function buildPageTree(nodes) {
|
||||
// Map id -> output node. Build the lean output shape up front.
|
||||
const byId = new Map();
|
||||
// Preserve the original position string for sorting (kept off the output).
|
||||
const positionById = new Map();
|
||||
for (const node of nodes) {
|
||||
if (!node || typeof node !== "object" || !node.id)
|
||||
continue;
|
||||
// Defensive against duplicate ids: last one wins (overwrites the earlier
|
||||
// entry). `enumerateSpacePages` already dedups, so this is belt-and-braces.
|
||||
byId.set(node.id, {
|
||||
id: node.id,
|
||||
slugId: node.slugId,
|
||||
title: node.title,
|
||||
});
|
||||
positionById.set(node.id, node.position);
|
||||
}
|
||||
// Stable comparator on the position string: code-unit order, missing last.
|
||||
const byPosition = (aId, bId) => {
|
||||
const a = positionById.get(aId);
|
||||
const b = positionById.get(bId);
|
||||
if (a === undefined || a === null)
|
||||
return b === undefined || b === null ? 0 : 1;
|
||||
if (b === undefined || b === null)
|
||||
return -1;
|
||||
if (a < b)
|
||||
return -1;
|
||||
if (a > b)
|
||||
return 1;
|
||||
return 0;
|
||||
};
|
||||
const roots = [];
|
||||
const childrenIdsByParent = new Map();
|
||||
for (const node of nodes) {
|
||||
if (!node || typeof node !== "object" || !node.id)
|
||||
continue;
|
||||
const parentId = node.parentPageId;
|
||||
// Child only when the parent is actually present in the input; otherwise
|
||||
// (null/undefined parent, or parent capped out of the walk) -> root.
|
||||
if (parentId && byId.has(parentId)) {
|
||||
const list = childrenIdsByParent.get(parentId) ?? [];
|
||||
list.push(node.id);
|
||||
childrenIdsByParent.set(parentId, list);
|
||||
}
|
||||
else {
|
||||
roots.push(node.id);
|
||||
}
|
||||
}
|
||||
// Attach sorted children arrays to each parent, omitting empty ones.
|
||||
for (const [parentId, childIds] of childrenIdsByParent) {
|
||||
const parent = byId.get(parentId);
|
||||
if (!parent)
|
||||
continue;
|
||||
childIds.sort(byPosition);
|
||||
parent.children = childIds.map((id) => byId.get(id));
|
||||
}
|
||||
roots.sort(byPosition);
|
||||
return roots.map((id) => byId.get(id));
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||||
import { createDocmostMcpServer } from "./index.js";
|
||||
// Standalone stdio entrypoint. This restores the original behavior of the
|
||||
// package when run as a CLI (`docmost-mcp`): it reads credentials from the
|
||||
// environment and serves the MCP protocol over stdin/stdout. The factory in
|
||||
// index.ts stays side-effect-free; all the process/transport lifecycle lives
|
||||
// here.
|
||||
const API_URL = process.env.DOCMOST_API_URL;
|
||||
const EMAIL = process.env.DOCMOST_EMAIL;
|
||||
const PASSWORD = process.env.DOCMOST_PASSWORD;
|
||||
if (!API_URL || !EMAIL || !PASSWORD) {
|
||||
console.error("Error: DOCMOST_API_URL, DOCMOST_EMAIL, and DOCMOST_PASSWORD environment variables are required.");
|
||||
process.exit(1);
|
||||
}
|
||||
async function run() {
|
||||
// Global safety nets so a stray rejection/exception cannot silently kill
|
||||
// the stdio server. Per-tool errors still flow through the SDK and are not
|
||||
// affected by these handlers; these only catch errors raised OUTSIDE a tool
|
||||
// call (e.g. a transient ws/collab socket "error" event). Such errors must
|
||||
// NOT tear down the whole stdio server, so we log only and keep running.
|
||||
// Genuine startup failures are still fatal via run().catch(...) below.
|
||||
process.on("unhandledRejection", (reason) => {
|
||||
console.error("Unhandled promise rejection:", reason);
|
||||
});
|
||||
process.on("uncaughtException", (error) => {
|
||||
console.error("Uncaught exception:", error);
|
||||
});
|
||||
const server = createDocmostMcpServer({
|
||||
apiUrl: API_URL,
|
||||
email: EMAIL,
|
||||
password: PASSWORD,
|
||||
});
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
}
|
||||
run().catch((error) => {
|
||||
console.error("Fatal error running server:", error);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,212 +0,0 @@
|
||||
// Zod-agnostic shared tool-spec registry consumed by BOTH the zod-v3 MCP server
|
||||
// (packages/mcp/src/index.ts) and the zod-v4 in-app AI-SDK service
|
||||
// (apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts). Intentionally
|
||||
// imports NO zod: each consumer passes its OWN zod namespace into buildShape,
|
||||
// because the two packages are on different zod majors (v3 here, v4 in the
|
||||
// server) and a zod schema object built with one major cannot be reused by the
|
||||
// other. The builders below only touch z.string()/.min()/.optional()/.describe(),
|
||||
// z.array() and z.object() — API identical across v3 and v4 — so a single
|
||||
// builder works with either namespace.
|
||||
//
|
||||
// Only tools whose snake_case/camelCase name, input schema AND model-facing
|
||||
// description are genuinely identical across both layers live here. Tools that
|
||||
// diverge on purpose (security guardrails, tuned UX, "Reversible" framing on
|
||||
// some write tools, different limits, hybrid-RRF search, etc.) stay defined
|
||||
// per-layer and are NOT represented here.
|
||||
export const SHARED_TOOL_SPECS = {
|
||||
// --- no-argument read tools ---
|
||||
getWorkspace: {
|
||||
mcpName: 'get_workspace',
|
||||
inAppKey: 'getWorkspace',
|
||||
description: 'Fetch metadata about the current workspace (name, settings).',
|
||||
},
|
||||
listSpaces: {
|
||||
mcpName: 'list_spaces',
|
||||
inAppKey: 'listSpaces',
|
||||
description: 'List the spaces the current user can access. Returns the array of ' +
|
||||
'spaces (id, name, slug, ...).',
|
||||
},
|
||||
listShares: {
|
||||
mcpName: 'list_shares',
|
||||
inAppKey: 'listShares',
|
||||
description: 'List all public shares in the workspace with page titles and public URLs.',
|
||||
},
|
||||
// --- single-pageId read tools ---
|
||||
getPageJson: {
|
||||
mcpName: 'get_page_json',
|
||||
inAppKey: 'getPageJson',
|
||||
description: 'Get page details with the raw ProseMirror JSON content (lossless: ' +
|
||||
'includes block ids, callouts, tables, link/image attributes) plus the ' +
|
||||
'slugId used in URLs. Use the block ids it returns to make precise ' +
|
||||
'structural edits or surgical text edits without resending the page.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
getOutline: {
|
||||
mcpName: 'get_outline',
|
||||
inAppKey: 'getOutline',
|
||||
description: "Return a COMPACT outline of a page's top-level blocks ({index, type, " +
|
||||
'id, level, firstText}; tables add rows/cols/header; lists add item ' +
|
||||
'count) WITHOUT the full document body. Use it to locate sections/tables ' +
|
||||
'and grab block ids cheaply before fetching, patching or inserting ' +
|
||||
'individual blocks.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- two-id read tool ---
|
||||
getNode: {
|
||||
mcpName: 'get_node',
|
||||
inAppKey: 'getNode',
|
||||
description: "Fetch a single node's full ProseMirror subtree (lossless) without " +
|
||||
'pulling the whole document. `nodeId` is a block id from the page ' +
|
||||
'outline or page-JSON view (works for headings/paragraphs/callouts/images), OR ' +
|
||||
'`#<index>` to fetch a top-level block by its outline index — use the ' +
|
||||
'`#<index>` form for tables/rows/cells, which carry no id.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- node delete ---
|
||||
deleteNode: {
|
||||
mcpName: 'delete_node',
|
||||
inAppKey: 'deleteNode',
|
||||
description: 'Remove a single block by its attrs.id (from the page-JSON view) WITHOUT ' +
|
||||
'resending the whole document.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- share management ---
|
||||
unsharePage: {
|
||||
mcpName: 'unshare_page',
|
||||
inAppKey: 'unsharePage',
|
||||
description: 'Remove the public share of a page (revokes the public URL).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1).describe('ID of the page to unshare'),
|
||||
}),
|
||||
},
|
||||
// --- version history ---
|
||||
diffPageVersions: {
|
||||
mcpName: 'diff_page_versions',
|
||||
inAppKey: 'diffPageVersions',
|
||||
description: 'Diff two versions of a page and return a Docmost-equivalent change set ' +
|
||||
'(inserted/deleted text, integrity counts for images/links/tables/' +
|
||||
'callouts/footnote markers, and a human-readable markdown summary). ' +
|
||||
"`from`/`to` each accept a historyId, or null/'current' for the page's " +
|
||||
'current content (defaults: from=current, to=current — pass a historyId ' +
|
||||
'from the page-history list to compare against the live page).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
from: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("historyId, or 'current'/omit for current content"),
|
||||
to: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("historyId, or 'current'/omit for current content"),
|
||||
}),
|
||||
},
|
||||
listPageHistory: {
|
||||
mcpName: 'list_page_history',
|
||||
inAppKey: 'listPageHistory',
|
||||
description: "List a page's saved versions (Docmost auto-snapshots on every save), " +
|
||||
'newest first, cursor-paginated. Returns { items, nextCursor }; each ' +
|
||||
"item's id is the historyId to pass to the page diff or restore tools.",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Pagination cursor from a previous nextCursor'),
|
||||
}),
|
||||
},
|
||||
restorePageVersion: {
|
||||
mcpName: 'restore_page_version',
|
||||
inAppKey: 'restorePageVersion',
|
||||
description: 'Restore a page to a saved version: writes that version\'s content back ' +
|
||||
'as the page\'s current content (Docmost has no restore endpoint, so ' +
|
||||
'this creates a NEW history snapshot — the restore is itself revertible). ' +
|
||||
'Get the historyId from the page-history list.',
|
||||
buildShape: (z) => ({
|
||||
historyId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- markdown round-trip ---
|
||||
importPageMarkdown: {
|
||||
mcpName: 'import_page_markdown',
|
||||
inAppKey: 'importPageMarkdown',
|
||||
description: "Replace a page's content from a self-contained Docmost-flavoured " +
|
||||
'Markdown file produced by the page-Markdown export tool. Restores comment ' +
|
||||
'highlight anchors and diagrams from their inline HTML. NOTE: comment ' +
|
||||
'thread records are NOT created/updated/deleted on the server by this ' +
|
||||
'tool — only the page body + inline comment marks are written; manage ' +
|
||||
'comment threads via the comment tools/UI.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
markdown: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- server-side content copy ---
|
||||
copyPageContent: {
|
||||
mcpName: 'copy_page_content',
|
||||
inAppKey: 'copyPageContent',
|
||||
description: "Replace targetPageId's content with a copy of sourcePageId's content, " +
|
||||
'entirely server-side — the document is NOT sent through the model. The ' +
|
||||
'target keeps its own title and slug; only its body is replaced. Ideal ' +
|
||||
"for 'make page A's content equal to B' or 'replace A with B but keep A's URL'.",
|
||||
buildShape: (z) => ({
|
||||
sourcePageId: z.string().min(1).describe('Page to copy content FROM'),
|
||||
targetPageId: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Page whose content is REPLACED (title/slug kept)'),
|
||||
}),
|
||||
},
|
||||
// --- surgical text edit (folds in the documented drift-bug fix) ---
|
||||
//
|
||||
// CANONICAL description is the CORRECTED in-app wording: a formatting-only
|
||||
// change is REFUSED into failed[] (not silently stripped-and-retried). The
|
||||
// stale MCP claim that "Markdown wrappers are tolerated via a strip-and-retry
|
||||
// fallback" is intentionally absent here.
|
||||
editPageText: {
|
||||
mcpName: 'edit_page_text',
|
||||
inAppKey: 'editPageText',
|
||||
description: "Surgical find/replace inside a page's text, preserving all block " +
|
||||
'ids and marks. A find MAY cross bold/italic/link boundaries; the ' +
|
||||
'replacement inherits marks from the unchanged common prefix/suffix ' +
|
||||
'(so editing plain text next to a bold word keeps it bold, and ' +
|
||||
'editing inside a bold word keeps the new text bold). Each find must ' +
|
||||
'match exactly once unless replaceAll is set. The batch applies what ' +
|
||||
'it can and returns applied[] + failed[] plus a verify change-report ' +
|
||||
'(the text/marks/structure that ACTUALLY changed — read it to confirm ' +
|
||||
'your edit landed; do not assume success); a fully-unmatched batch ' +
|
||||
'writes nothing and errors. find and replace are LITERAL text, not ' +
|
||||
'markdown. This tool edits plain text ONLY and CANNOT add or remove ' +
|
||||
'formatting marks: a formatting change — find/replace that differ only ' +
|
||||
'in markdown markers (e.g. find:"~~x~~", replace:"x"), or a replace ' +
|
||||
'containing **bold**/~~strike~~/`code` wrappers — is REFUSED into ' +
|
||||
'failed[]. To change bold/italic/strike/code/link, read the block as ' +
|
||||
'page JSON and use a structural node patch/update to set its marks. ' +
|
||||
'Examples: edits:[{find:"teh",replace:"the"}]; edits:[{find:"Hello ' +
|
||||
'world",replace:"Hello there"}] (crosses a bold boundary).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().describe('ID of the page to edit'),
|
||||
edits: z
|
||||
.array(z.object({
|
||||
find: z.string().describe('Exact text to find'),
|
||||
replace: z.string().describe('Replacement text (may be empty)'),
|
||||
replaceAll: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe('Replace every occurrence (default: must match once)'),
|
||||
}))
|
||||
.min(1)
|
||||
.describe('List of find/replace operations, applied in order'),
|
||||
}),
|
||||
},
|
||||
};
|
||||
17
packages/mcp/node_modules/.bin/marked
generated
vendored
17
packages/mcp/node_modules/.bin/marked
generated
vendored
@@ -1,17 +0,0 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../marked/bin/marked.js" "$@"
|
||||
else
|
||||
exec node "$basedir/../marked/bin/marked.js" "$@"
|
||||
fi
|
||||
17
packages/mcp/node_modules/.bin/tsc
generated
vendored
17
packages/mcp/node_modules/.bin/tsc
generated
vendored
@@ -1,17 +0,0 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../typescript/bin/tsc" "$@"
|
||||
else
|
||||
exec node "$basedir/../typescript/bin/tsc" "$@"
|
||||
fi
|
||||
17
packages/mcp/node_modules/.bin/tsserver
generated
vendored
17
packages/mcp/node_modules/.bin/tsserver
generated
vendored
@@ -1,17 +0,0 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/Users/vvzvlad/Data/Projects/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../typescript/bin/tsserver" "$@"
|
||||
else
|
||||
exec node "$basedir/../typescript/bin/tsserver" "$@"
|
||||
fi
|
||||
1
packages/mcp/node_modules/@fellow/prosemirror-recreate-transform
generated
vendored
1
packages/mcp/node_modules/@fellow/prosemirror-recreate-transform
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@fellow+prosemirror-recreate-transform@1.2.3/node_modules/@fellow/prosemirror-recreate-transform
|
||||
1
packages/mcp/node_modules/@hocuspocus/provider
generated
vendored
1
packages/mcp/node_modules/@hocuspocus/provider
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@hocuspocus+provider@3.4.4_y-protocols@1.0.6_yjs@13.6.30_patch_hash=1ceeb66dba1f86545c9_bc01a253a9579de2451e72d099c2c9d7/node_modules/@hocuspocus/provider
|
||||
1
packages/mcp/node_modules/@hocuspocus/transformer
generated
vendored
1
packages/mcp/node_modules/@hocuspocus/transformer
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@hocuspocus+transformer@3.4.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4__3efc11776a1877aaec07b26dc33505b1/node_modules/@hocuspocus/transformer
|
||||
1
packages/mcp/node_modules/@modelcontextprotocol/sdk
generated
vendored
1
packages/mcp/node_modules/@modelcontextprotocol/sdk
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@modelcontextprotocol+sdk@1.29.0_@cfworker+json-schema@4.1.1_zod@3.25.76/node_modules/@modelcontextprotocol/sdk
|
||||
1
packages/mcp/node_modules/@tiptap/core
generated
vendored
1
packages/mcp/node_modules/@tiptap/core
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core
|
||||
1
packages/mcp/node_modules/@tiptap/extension-highlight
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-highlight
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-highlight@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-highlight
|
||||
1
packages/mcp/node_modules/@tiptap/extension-image
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-image
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-image@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-image
|
||||
1
packages/mcp/node_modules/@tiptap/extension-link
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-link
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-link@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-link
|
||||
1
packages/mcp/node_modules/@tiptap/extension-subscript
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-subscript
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-subscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-subscript
|
||||
1
packages/mcp/node_modules/@tiptap/extension-superscript
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-superscript
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-superscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-superscript
|
||||
1
packages/mcp/node_modules/@tiptap/extension-task-item
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-task-item
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-task-item@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_f120fce1a3d9fc85461b67496f03c362/node_modules/@tiptap/extension-task-item
|
||||
1
packages/mcp/node_modules/@tiptap/extension-task-list
generated
vendored
1
packages/mcp/node_modules/@tiptap/extension-task-list
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-task-list@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_c94f69f56aee3556ec680ab7491aa1d4/node_modules/@tiptap/extension-task-list
|
||||
1
packages/mcp/node_modules/@tiptap/html
generated
vendored
1
packages/mcp/node_modules/@tiptap/html
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+html@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4_happy-dom@20.8.9/node_modules/@tiptap/html
|
||||
1
packages/mcp/node_modules/@tiptap/starter-kit
generated
vendored
1
packages/mcp/node_modules/@tiptap/starter-kit
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+starter-kit@3.20.4/node_modules/@tiptap/starter-kit
|
||||
1
packages/mcp/node_modules/@types/form-data
generated
vendored
1
packages/mcp/node_modules/@types/form-data
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@types+form-data@2.5.2/node_modules/@types/form-data
|
||||
1
packages/mcp/node_modules/@types/jsdom
generated
vendored
1
packages/mcp/node_modules/@types/jsdom
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@types+jsdom@27.0.0/node_modules/@types/jsdom
|
||||
1
packages/mcp/node_modules/@types/node
generated
vendored
1
packages/mcp/node_modules/@types/node
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../../node_modules/.pnpm/@types+node@20.19.43/node_modules/@types/node
|
||||
1
packages/mcp/node_modules/axios
generated
vendored
1
packages/mcp/node_modules/axios
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/axios@1.16.0/node_modules/axios
|
||||
1
packages/mcp/node_modules/form-data
generated
vendored
1
packages/mcp/node_modules/form-data
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/form-data@4.0.5/node_modules/form-data
|
||||
1
packages/mcp/node_modules/jsdom
generated
vendored
1
packages/mcp/node_modules/jsdom
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/jsdom@27.4.0_@noble+hashes@2.0.1/node_modules/jsdom
|
||||
1
packages/mcp/node_modules/marked
generated
vendored
1
packages/mcp/node_modules/marked
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/marked@17.0.5/node_modules/marked
|
||||
1
packages/mcp/node_modules/typescript
generated
vendored
1
packages/mcp/node_modules/typescript
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/typescript@5.9.3/node_modules/typescript
|
||||
1
packages/mcp/node_modules/ws
generated
vendored
1
packages/mcp/node_modules/ws
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/ws@8.20.1/node_modules/ws
|
||||
1
packages/mcp/node_modules/yjs
generated
vendored
1
packages/mcp/node_modules/yjs
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/yjs@13.6.30_patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810/node_modules/yjs
|
||||
1
packages/mcp/node_modules/zod
generated
vendored
1
packages/mcp/node_modules/zod
generated
vendored
@@ -1 +0,0 @@
|
||||
../../../node_modules/.pnpm/zod@3.25.76/node_modules/zod
|
||||
Reference in New Issue
Block a user