feat(sync): scaffold monorepo, extract docmost-client, add Phase-0 harness + read-only pull

Lock the access-layer decision (REST only) and start implementation per SPEC. - monorepo (npm workspaces): packages/docmost-client = DocmostClient + lib/* copied 1:1 from docmost-mcp/src (backport target), plus bannered sync methods (listTrash, restorePage, listAllSpacePages, exportPageBody, listRecentSince / collectRecentSince cursor scan) - engine stays the root app per AGENTS.md (src/, test/, build/, data/, settings.ts); add roundtrip.ts (SPEC §11 idempotency harness), pull.ts (SPEC §6 read-only Docmost->FS mirror), sanitize.ts (SPEC §12 filenames, path-traversal-safe) - Dockerfile builds the workspace lib before the app; vitest gates CI - exportPageBody never touches /comments (SPEC §3); serializeDocmostMarkdownBody emits meta + body only - SPEC: resolve access-layer (REST), reflect root-engine layout + REST pagination - tests: sanitize (incl. dot-traversal), collectRecentSince (cutoff/dedup/cap), stripBlockIds, markdown round-trip byte-stability Note: raw ProseMirror round-trip is byte-stable in Markdown but not yet attribute- idempotent (SPEC §11 Задача №0, before Phase 2).
2026-06-16 20:20:20 +03:00
parent 2f92dc4c1f
commit 447d2508ae
33 changed files with 10502 additions and 174 deletions
--- a/packages/docmost-client/package.json
+++ b/packages/docmost-client/package.json
@@ -0,0 +1,44 @@
+{
+  "name": "docmost-client",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "build": "tsc -p tsconfig.json"
+  },
+  "dependencies": {
+    "@fellow/prosemirror-recreate-transform": "^1.2.3",
+    "@hocuspocus/provider": "^3.4.4",
+    "@hocuspocus/transformer": "^3.4.4",
+    "@tiptap/core": "^3.18.0",
+    "@tiptap/extension-highlight": "^3.26.1",
+    "@tiptap/extension-image": "^3.18.0",
+    "@tiptap/extension-subscript": "^3.26.1",
+    "@tiptap/extension-superscript": "^3.26.1",
+    "@tiptap/extension-task-item": "^3.26.1",
+    "@tiptap/extension-task-list": "^3.26.1",
+    "@tiptap/html": "^3.18.0",
+    "@tiptap/pm": "^3.18.0",
+    "@tiptap/starter-kit": "^3.18.0",
+    "axios": "^1.6.0",
+    "form-data": "^4.0.0",
+    "jsdom": "^27.4.0",
+    "marked": "^17.0.1",
+    "ws": "^8.19.0",
+    "yjs": "^13.6.29"
+  },
+  "devDependencies": {
+    "@types/jsdom": "^27.0.0",
+    "@types/node": "^20.0.0",
+    "@types/ws": "^8.5.10",
+    "typescript": "^5.0.0"
+  }
+}
--- a/packages/docmost-client/src/client.ts
+++ b/packages/docmost-client/src/client.ts
--- a/packages/docmost-client/src/index.ts
+++ b/packages/docmost-client/src/index.ts
@@ -0,0 +1,23 @@
+/**
+ * Public surface of the `docmost-client` package.
+ *
+ * This is a NEW barrel authored for docmost-sync (it is NOT copied from
+ * docmost-mcp, whose `src/index.ts` is the MCP-server entry point and is
+ * deliberately not part of this package). It re-exports the pieces the sync
+ * engine and other consumers need: the REST client, the self-contained
+ * markdown (de)serializers, and the lossless ProseMirror <-> Markdown
+ * converter.
+ */
+
+export { DocmostClient, collectRecentSince } from "./client.js";
+
+export {
+  serializeDocmostMarkdown,
+  parseDocmostMarkdown,
+  serializeDocmostMarkdownBody,
+} from "./lib/markdown-document.js";
+export type { DocmostMdMeta } from "./lib/markdown-document.js";
+
+export { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
+
+export { markdownToProseMirror } from "./lib/collaboration.js";
--- a/packages/docmost-client/src/lib/auth-utils.ts
+++ b/packages/docmost-client/src/lib/auth-utils.ts
@@ -0,0 +1,86 @@
+import axios from "axios";
+
+export async function getCollabToken(
+  baseUrl: string,
+  apiToken: string,
+): Promise<string> {
+  try {
+    const response = await axios.post(
+      `${baseUrl}/auth/collab-token`,
+      {},
+      {
+        headers: {
+          Authorization: `Bearer ${apiToken}`,
+          "Content-Type": "application/json",
+        },
+      },
+    );
+
+    // console.error('Collab Token Response:', response.data);
+    // Response is wrapped in { data: { token: ... } }
+    return response.data.data?.token || response.data.token;
+  } catch (error) {
+    if (axios.isAxiosError(error)) {
+      // Attach the HTTP status to the plain Error so callers (e.g.
+      // getCollabTokenWithReauth) can still detect a 401/403 after the
+      // original AxiosError has been wrapped away.
+      // Avoid leaking the full server response body by default; include only
+      // status + statusText. Append the body only when DEBUG is set.
+      let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`;
+      if (process.env.DEBUG) {
+        message += ` - ${JSON.stringify(error.response?.data)}`;
+      }
+      const err: any = new Error(message);
+      err.status = error.response?.status;
+      throw err;
+    }
+    throw error;
+  }
+}
+
+export async function performLogin(
+  baseUrl: string,
+  email: string,
+  password: string,
+): Promise<string> {
+  try {
+    const response = await axios.post(`${baseUrl}/auth/login`, {
+      email,
+      password,
+    });
+
+    // Extract token from Set-Cookie header
+    const cookies = response.headers["set-cookie"];
+    if (!cookies) {
+      throw new Error("No Set-Cookie header found in login response");
+    }
+    // Match the cookie name exactly to avoid matching a future
+    // authTokenRefresh cookie (startsWith would catch it).
+    const authCookie = cookies.find((c: string) => {
+      const kv = c.split(";")[0];
+      return kv.slice(0, kv.indexOf("=")) === "authToken";
+    });
+    if (!authCookie) {
+      throw new Error("No authToken cookie found in login response");
+    }
+
+    // Take everything after the FIRST "=" up to the first ";".
+    // Splitting on "=" would truncate base64 values containing "=" padding.
+    const kv = authCookie.split(";")[0];
+    const token = kv.slice(kv.indexOf("=") + 1);
+    return token;
+  } catch (error: any) {
+    // Avoid leaking the full server response body by default; log only the
+    // HTTP status. Log the verbose body only when DEBUG is set.
+    if (axios.isAxiosError(error)) {
+      if (process.env.DEBUG) {
+        console.error("Login failed:", error.response?.data);
+      } else {
+        console.error("Login failed:", error.response?.status);
+      }
+    } else {
+      console.error("Login failed:", error.message);
+    }
+    throw error;
+  }
+}
--- a/packages/docmost-client/src/lib/collaboration.ts
+++ b/packages/docmost-client/src/lib/collaboration.ts
@@ -0,0 +1,618 @@
+import { HocuspocusProvider } from "@hocuspocus/provider";
+import { TiptapTransformer } from "@hocuspocus/transformer";
+import * as Y from "yjs";
+import WebSocket from "ws";
+import { marked } from "marked";
+import { generateJSON } from "@tiptap/html";
+import { JSDOM } from "jsdom";
+import { docmostExtensions } from "./docmost-schema.js";
+import { withPageLock } from "./page-lock.js";
+import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
+
+// Setup DOM environment for Tiptap HTML parsing in Node.js
+const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
+global.window = dom.window as any;
+global.document = dom.window.document;
+// @ts-ignore
+global.Element = dom.window.Element;
+// @ts-ignore
+global.WebSocket = WebSocket;
+// Navigator is read-only in newer Node versions and already exists
+// global.navigator = dom.window.navigator;
+
+/**
+ * Hard ceiling above which we skip callout preprocessing entirely. The linear
+ * scanner below has no quadratic blow-up, but we still cap input defensively so
+ * a pathological multi-megabyte payload cannot tie up the event loop; in that
+ * case the markdown is passed through verbatim (callouts are simply not
+ * detected) rather than risking a slow scan.
+ */
+const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
+
+/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
+const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
+/** Matches a bare closing callout fence: `:::`. */
+const CALLOUT_CLOSE_RE = /^:::\s*$/;
+/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
+const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
+
+/**
+ * Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
+ * callout blocks (the syntax our markdown export produces) into HTML
+ * divs that the callout extension parses. The inner content is rendered
+ * through marked as regular markdown.
+ *
+ * Implemented as a single linear pass over the lines (no quadratic regex
+ * rescan). It:
+ *   - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
+ *     `:::` line that lives inside a code fence as a callout delimiter, so a
+ *     callout body that itself contains a fenced code block with a `:::` line is
+ *     no longer corrupted;
+ *   - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
+ *     nesting level, supporting NESTED callouts via a depth counter (an inner
+ *     `:::type` opens a deeper level and consumes a matching `:::`);
+ *   - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
+ *     (inner rendered through marked) as the previous regex implementation.
+ */
+async function preprocessCallouts(markdown: string): Promise<string> {
+  // Defensive cap: skip preprocessing for pathologically large inputs.
+  if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
+    return markdown;
+  }
+
+  // Recursively transform a slice of lines, converting top-level callouts in
+  // that slice into <div> blocks and rendering their inner content (which may
+  // itself contain nested callouts) through this same function.
+  const transform = async (lines: string[]): Promise<string> => {
+    const out: string[] = [];
+    let inCodeFence = false;
+    let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
+    let i = 0;
+
+    while (i < lines.length) {
+      const line = lines[i];
+
+      // Inside a code fence, only its matching closing fence is significant;
+      // everything else (including `:::` lines) is copied through verbatim.
+      if (inCodeFence) {
+        out.push(line);
+        const fence = line.match(CODE_FENCE_RE);
+        if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
+            fence[2].length >= codeFenceMarker.length) {
+          inCodeFence = false;
+          codeFenceMarker = "";
+        }
+        i++;
+        continue;
+      }
+
+      // A code fence opening outside any callout body: enter code-fence mode.
+      const fenceOpen = line.match(CODE_FENCE_RE);
+      if (fenceOpen) {
+        inCodeFence = true;
+        codeFenceMarker = fenceOpen[2];
+        out.push(line);
+        i++;
+        continue;
+      }
+
+      // An opening callout fence: scan forward (with code-fence and nested
+      // callout awareness) for its matching closing `:::` at the same level.
+      const open = line.match(CALLOUT_OPEN_RE);
+      if (open) {
+        const type = open[1].toLowerCase();
+        const bodyLines: string[] = [];
+        let depth = 1;
+        let innerInCodeFence = false;
+        let innerCodeFenceMarker = "";
+        let j = i + 1;
+        for (; j < lines.length; j++) {
+          const bl = lines[j];
+          if (innerInCodeFence) {
+            const f = bl.match(CODE_FENCE_RE);
+            if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
+                f[2].length >= innerCodeFenceMarker.length) {
+              innerInCodeFence = false;
+              innerCodeFenceMarker = "";
+            }
+            bodyLines.push(bl);
+            continue;
+          }
+          const innerFence = bl.match(CODE_FENCE_RE);
+          if (innerFence) {
+            innerInCodeFence = true;
+            innerCodeFenceMarker = innerFence[2];
+            bodyLines.push(bl);
+            continue;
+          }
+          if (CALLOUT_OPEN_RE.test(bl)) {
+            depth++;
+            bodyLines.push(bl);
+            continue;
+          }
+          if (CALLOUT_CLOSE_RE.test(bl)) {
+            depth--;
+            if (depth === 0) break; // matching close for THIS callout
+            bodyLines.push(bl);
+            continue;
+          }
+          bodyLines.push(bl);
+        }
+
+        if (j < lines.length) {
+          // Found the matching closing fence: render the body (recursively, so
+          // nested callouts are handled) and emit the callout div.
+          const inner = await transform(bodyLines);
+          const renderedInner = await marked.parse(inner);
+          out.push(
+            `\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
+          );
+          i = j + 1; // skip past the closing `:::`
+          continue;
+        }
+        // No matching close (unterminated callout): treat the opener as a
+        // literal line and continue, preserving the original text.
+        out.push(line);
+        i++;
+        continue;
+      }
+
+      out.push(line);
+      i++;
+    }
+
+    return out.join("\n");
+  };
+
+  return transform(markdown.split("\n"));
+}
+
+/**
+ * Bridge marked's checkbox lists to TipTap task lists.
+ *
+ * marked renders GitHub task list items (`- [x] done`) as a plain
+ * `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
+ * markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
+ * into the shape those extensions expect:
+ *   TaskList parseHTML matches `ul[data-type="taskList"]`,
+ *   TaskItem matches `li[data-type="taskItem"]`,
+ *   the checked state is read from `data-checked === "true"`.
+ *
+ * A list is only converted when it has at least one `<li>` and EVERY direct
+ * `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
+ * numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
+ * `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
+ * so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
+ * `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
+ * untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
+ * wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
+ */
+function bridgeTaskLists(html: string): string {
+  // Cheap early-out: if the markup contains no checkbox input at all there is
+  // nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
+  // common case (most pages have no task lists).
+  if (!/type=["']?checkbox/i.test(html)) {
+    return html;
+  }
+  // Defensive cap (consistent with preprocessCallouts): skip the bridge for
+  // pathologically large inputs rather than running a second expensive JSDOM
+  // parse on a multi-megabyte payload. The markup is passed through verbatim.
+  if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
+    return html;
+  }
+  const dom = new JSDOM(html);
+  const document = dom.window.document;
+  // Collect the checkbox(es) that belong to THIS <li> directly: either direct
+  // child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
+  // child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
+  // Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
+  // bullet <li> that merely contains a nested task sublist is not misdetected.
+  // Raw inline HTML can put more than one checkbox in a single <li>; we gather
+  // ALL of them so none survive into the converted item.
+  const directCheckboxes = (li: Element): Element[] => {
+    const found: Element[] = [];
+    for (const child of Array.from(li.children)) {
+      if (
+        child.tagName === "INPUT" &&
+        child.getAttribute("type") === "checkbox"
+      ) {
+        found.push(child);
+        continue;
+      }
+      if (child.tagName === "P") {
+        for (const inp of Array.from(
+          child.querySelectorAll(":scope > input[type='checkbox']"),
+        )) {
+          found.push(inp);
+        }
+      }
+    }
+    return found;
+  };
+  // Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
+  // its own checkbox is a numbered checklist that must also become a taskList.
+  const lists = Array.from(document.querySelectorAll("ul, ol"));
+  for (const list of lists) {
+    // Only consider DIRECT child <li> elements; nested lists are handled by
+    // their own iteration of the outer loop.
+    const items = Array.from(list.children).filter(
+      (child) => child.tagName === "LI",
+    );
+    if (items.length === 0) continue;
+    const itemCheckboxes = items.map((li) => directCheckboxes(li));
+    // Convert only when every direct <li> carries at least one OWN checkbox.
+    if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue;
+
+    // A numbered checklist arrives as an <ol>. We must NOT leave the tag as
+    // <ol> while tagging it data-type="taskList": generateJSON would then match
+    // BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
+    // emitting a phantom empty orderedList beside the real taskList. So rename a
+    // qualifying <ol> to a <ul> — move its <li> children over and replace it —
+    // leaving only the taskList rule to match. Already-<ul> lists are unchanged.
+    let target: Element = list;
+    if (list.tagName === "OL") {
+      const ul = document.createElement("ul");
+      // Carry over existing attributes (e.g. class) so nothing is silently lost.
+      for (const attr of Array.from(list.attributes)) {
+        ul.setAttribute(attr.name, attr.value);
+      }
+      // Move every child node (including the <li>s we collected) into the <ul>.
+      while (list.firstChild) {
+        ul.appendChild(list.firstChild);
+      }
+      list.replaceWith(ul);
+      target = ul;
+    }
+
+    target.setAttribute("data-type", "taskList");
+    items.forEach((li, index) => {
+      const boxes = itemCheckboxes[index];
+      // The first checkbox determines the checked state (matches the previous
+      // single-checkbox behaviour); any extras only need removing.
+      const input = boxes[0] ?? null;
+      li.setAttribute("data-type", "taskItem");
+      const checked =
+        input != null &&
+        (input.hasAttribute("checked") || (input as any).checked);
+      li.setAttribute("data-checked", checked ? "true" : "false");
+      // Remove ALL direct checkbox inputs so none survive into the content
+      // (a raw-inline-HTML <li> may carry more than one).
+      for (const box of boxes) {
+        box.remove();
+      }
+    });
+  }
+  return document.body.innerHTML;
+}
+
+/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
+export async function markdownToProseMirror(
+  markdownContent: string,
+): Promise<any> {
+  const withCallouts = await preprocessCallouts(markdownContent);
+  const html = await marked.parse(withCallouts);
+  const bridged = bridgeTaskLists(html);
+  return generateJSON(bridged, docmostExtensions);
+}
+
+/**
+ * Build the collaboration WebSocket URL from an API base URL:
+ * switch http(s)->ws(s), strip a trailing /api, mount on /collab.
+ * Shared by the live read and the mutate path so both target the same socket.
+ */
+export function buildCollabWsUrl(baseUrl: string): string {
+  let wsUrl = baseUrl.replace(/^http/, "ws");
+  try {
+    const urlObj = new URL(wsUrl);
+    if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) {
+      urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, "");
+    }
+    urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab";
+    // Drop any query/hash from the base URL so it is not carried into the
+    // collaboration ws URL.
+    urlObj.search = "";
+    urlObj.hash = "";
+    wsUrl = urlObj.toString();
+  } catch (e) {
+    // Fallback if URL parsing fails
+    if (!wsUrl.endsWith("/collab")) {
+      wsUrl = wsUrl.replace(/\/$/, "") + "/collab";
+    }
+  }
+  return wsUrl;
+}
+
+/**
+ * Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning
+ * the opaque yjs "Unexpected content type" failure into a descriptive error.
+ *
+ * `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of
+ * the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point
+ * at the offending attribute path.
+ */
+export function buildYDoc(doc: any): Y.Doc {
+  const safe = sanitizeForYjs(doc);
+  try {
+    return TiptapTransformer.toYdoc(safe, "default", docmostExtensions);
+  } catch (e) {
+    const bad = findUnstorableAttr(safe);
+    throw new Error(
+      `Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`,
+    );
+  }
+}
+
+/**
+ * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc.
+ * Throws the same descriptive error as the apply path when it is not. Used by
+ * the dry-run preview so it fails identically to apply.
+ */
+export function assertYjsEncodable(doc: any): void {
+  buildYDoc(doc);
+}
+
+/** Time we wait for the initial handshake/sync before giving up. */
+const CONNECT_TIMEOUT_MS = 25000;
+/** Time we wait for the server to acknowledge our write before giving up. */
+const PERSIST_TIMEOUT_MS = 20000;
+
+/**
+ * Safely mutate the live content of a page over the collaboration websocket.
+ *
+ * This is the single safe write path for every MCP content mutation. It:
+ *   1. serializes per-page writes through withPageLock (no two MCP writes on
+ *      the same page overlap);
+ *   2. connects to Hocuspocus and waits for the initial sync so the local ydoc
+ *      mirrors the authoritative server doc — INCLUDING edits/comments/images
+ *      that are not yet in the debounced REST snapshot;
+ *   3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and
+ *      writes the result back — with no `await` between read and write so no
+ *      remote update can interleave and clobber concurrent human edits;
+ *   4. waits for the server to acknowledge the write (unsyncedChanges -> 0)
+ *      before resolving, so the next operation observes our change.
+ *
+ * `transform` receives the live ProseMirror doc and returns the NEW full
+ * ProseMirror doc to write, or `null` to abort with no write (a no-op). If
+ * `transform` throws, the error is propagated to the caller (not swallowed).
+ *
+ * Returns the doc that was written, or the live doc when the transform aborted.
+ */
+export async function mutatePageContent(
+  pageId: string,
+  collabToken: string,
+  baseUrl: string,
+  transform: (liveDoc: any) => any | null,
+): Promise<any> {
+  return withPageLock(pageId, () => {
+    if (process.env.DEBUG) {
+      console.error(`Starting realtime content mutate for page ${pageId}`);
+      // Token prefix is sensitive; only log it under DEBUG.
+      console.error(
+        `Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`,
+      );
+    }
+
+    const ydoc = new Y.Doc();
+    const wsUrl = buildCollabWsUrl(baseUrl);
+    if (process.env.DEBUG) console.error(`Connecting to WebSocket: ${wsUrl}`);
+
+    return new Promise<any>((resolve, reject) => {
+      let provider: HocuspocusProvider | undefined;
+      let applied = false; // onSynced may fire again on reconnect — apply once.
+      let settled = false;
+      // Set true on disconnect/close so a reconnect-driven unsyncedChanges->0
+      // cannot be mistaken for a successful persist of our write.
+      let connectionLost = false;
+      let connectTimer: ReturnType<typeof setTimeout> | undefined;
+      let persistTimer: ReturnType<typeof setTimeout> | undefined;
+      let unsyncedHandler: ((data: { number: number }) => void) | undefined;
+
+      const cleanup = () => {
+        if (connectTimer) clearTimeout(connectTimer);
+        if (persistTimer) clearTimeout(persistTimer);
+        if (provider) {
+          if (unsyncedHandler) {
+            try {
+              provider.off("unsyncedChanges", unsyncedHandler);
+            } catch (err) {}
+          }
+          try {
+            provider.destroy();
+          } catch (err) {}
+        }
+      };
+
+      const finish = (err: Error | null, value?: any) => {
+        if (settled) return;
+        settled = true;
+        cleanup();
+        if (err) reject(err);
+        else resolve(value);
+      };
+
+      connectTimer = setTimeout(() => {
+        finish(new Error("Connection timeout to collaboration server"));
+      }, CONNECT_TIMEOUT_MS);
+
+      // Resolve once the server has acknowledged our update. The provider
+      // increments unsyncedChanges when our local update is sent and
+      // decrements it when the server replies with a SyncStatus(applied=true);
+      // reaching 0 means the authoritative in-memory ydoc on the server now
+      // contains our write.
+      const waitForPersistence = () => {
+        if (settled) return;
+        // A missing provider is a failure, not a success: without it the write
+        // can never have been acknowledged. Only an actual unsyncedChanges===0
+        // on a live provider counts as persisted.
+        if (!provider) {
+          finish(new Error("collab provider gone before persistence"));
+          return;
+        }
+        if (provider.unsyncedChanges === 0) {
+          finish(null, lastWrittenDoc);
+          return;
+        }
+        persistTimer = setTimeout(() => {
+          finish(
+            new Error(
+              "Timeout waiting for collaboration server to persist the update",
+            ),
+          );
+        }, PERSIST_TIMEOUT_MS);
+        unsyncedHandler = (data: { number: number }) => {
+          // Only treat unsyncedChanges->0 as success when the connection is
+          // still up. A transient disconnect + reconnect handshake can drive
+          // the counter back to 0 without our write being re-transmitted; in
+          // that case let the disconnect/close error win instead.
+          if (data.number === 0 && !connectionLost) {
+            finish(null, lastWrittenDoc);
+          }
+        };
+        provider.on("unsyncedChanges", unsyncedHandler);
+      };
+
+      let lastWrittenDoc: any;
+
+      provider = new HocuspocusProvider({
+        url: wsUrl,
+        name: `page.${pageId}`,
+        document: ydoc,
+        token: collabToken,
+        // @ts-ignore - Required for Node.js environment
+        WebSocketPolyfill: WebSocket,
+        onConnect: () => {
+          if (process.env.DEBUG) console.error("WS Connect");
+        },
+        // An unexpected disconnect/close while we are still waiting (during the
+        // connect-wait before onSynced, or during the persistence wait after the
+        // write) means the update will never be acknowledged — surface it now
+        // instead of hanging until the connect/persist timeout fires. `finish`
+        // is idempotent via the `settled` flag, so the onClose that our own
+        // cleanup()->provider.destroy() triggers (after settled=true is set) is
+        // a harmless no-op and cannot cause a double-resolve.
+        onDisconnect: () => {
+          if (process.env.DEBUG) console.error("WS Disconnect");
+          // Mark BEFORE finish so the unsyncedChanges handler (if it races)
+          // sees the connection as lost and won't report a false success.
+          connectionLost = true;
+          finish(
+            new Error(
+              "Collaboration connection closed before the update was persisted/synced",
+            ),
+          );
+        },
+        onClose: () => {
+          if (process.env.DEBUG) console.error("WS Close");
+          // Mark BEFORE finish so the unsyncedChanges handler (if it races)
+          // sees the connection as lost and won't report a false success.
+          connectionLost = true;
+          finish(
+            new Error(
+              "Collaboration connection closed before the update was persisted/synced",
+            ),
+          );
+        },
+        onSynced: () => {
+          if (applied || settled) return;
+          applied = true;
+          if (process.env.DEBUG) console.error("Connected and synced!");
+
+          // CRITICAL: everything between reading the live doc and writing it
+          // back must stay synchronous (no await). While the JS event loop is
+          // not yielded, no incoming remote update can interleave, so any
+          // already-synced concurrent edits are preserved in liveDoc.
+          let newDoc: any;
+          try {
+            let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
+            if (
+              !liveDoc ||
+              typeof liveDoc !== "object" ||
+              !Array.isArray(liveDoc.content)
+            ) {
+              liveDoc = { type: "doc", content: [] };
+            }
+
+            newDoc = transform(liveDoc);
+
+            if (newDoc == null) {
+              // Transform aborted — write nothing, return the live doc.
+              lastWrittenDoc = liveDoc;
+              finish(null, liveDoc);
+              return;
+            }
+
+            const tempDoc = buildYDoc(newDoc);
+            // Fetch the fragment immediately before the transact that mutates
+            // it, rather than reusing a handle grabbed across the transform.
+            const fragment = ydoc.getXmlFragment("default");
+            ydoc.transact(() => {
+              if (fragment.length > 0) {
+                fragment.delete(0, fragment.length);
+              }
+              Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc));
+            });
+          } catch (e) {
+            // Includes errors thrown by transform (e.g. "afterText not found",
+            // "text not found"): propagate them verbatim to the caller.
+            finish(e instanceof Error ? e : new Error(String(e)));
+            return;
+          }
+
+          lastWrittenDoc = newDoc;
+          if (process.env.DEBUG)
+            console.error("Content written, waiting for server to persist...");
+          waitForPersistence();
+        },
+        onAuthenticationFailed: () => {
+          finish(
+            new Error("Authentication failed for collaboration connection"),
+          );
+        },
+      });
+    });
+  });
+}
+
+/**
+ * Replace the live content of a page over the collaboration websocket.
+ * Accepts a ready ProseMirror JSON document; the caller controls whether
+ * it was produced from markdown (ids regenerate) or edited in place
+ * (existing block ids preserved).
+ *
+ * This is an intentional full replace (used by update_page / update_page_json),
+ * but now runs under the per-page lock and waits for server persistence via
+ * mutatePageContent.
+ */
+export async function replacePageContent(
+  pageId: string,
+  prosemirrorDoc: any,
+  collabToken: string,
+  baseUrl: string,
+): Promise<void> {
+  // Fail fast on a bad document instead of deferring the failure into the
+  // collaboration write (where TiptapTransformer.toYdoc(undefined) used to
+  // throw). The transform must return a valid ProseMirror doc.
+  if (
+    prosemirrorDoc == null ||
+    typeof prosemirrorDoc !== "object" ||
+    prosemirrorDoc.type !== "doc"
+  ) {
+    throw new Error("replacePageContent: invalid ProseMirror document");
+  }
+  await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc);
+}
+
+/**
+ * Markdown update path (kept for backwards compatibility).
+ * NOTE: this re-imports the whole document — block ids are regenerated.
+ * Tables and :::callout::: blocks survive thanks to the full schema.
+ */
+export async function updatePageContentRealtime(
+  pageId: string,
+  markdownContent: string,
+  collabToken: string,
+  baseUrl: string,
+): Promise<void> {
+  const tiptapJson = await markdownToProseMirror(markdownContent);
+  await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson);
+}
--- a/packages/docmost-client/src/lib/diff.ts
+++ b/packages/docmost-client/src/lib/diff.ts
@@ -0,0 +1,319 @@
+/**
+ * Headless, Docmost-equivalent document diff.
+ *
+ * Docmost's history editor computes a change set with the exact pipeline below
+ * (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
+ * editor decorations. This module runs the SAME computation but serializes the
+ * result to text + integrity counts instead of decorations, so a diff can be
+ * previewed without a browser.
+ *
+ * recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
+ * maintained published fork of the MIT prosemirror-recreate-steps source that
+ * Docmost vendors in @docmost/editor-ext; it exposes the identical
+ * recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
+ * signature.
+ *
+ * If recreateTransform / the changeset throws on a pathological document pair,
+ * we fall back to a coarse block-level text diff so the tool never hard-fails.
+ */
+
+import { getSchema } from "@tiptap/core";
+import { Node } from "@tiptap/pm/model";
+import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
+import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
+import { docmostExtensions } from "./docmost-schema.js";
+
+/** A single inserted/deleted change with its containing-block context. */
+export interface DiffChange {
+  op: "insert" | "delete";
+  /** Lead (plain) text of the block that contains the change, for context. */
+  block: string;
+  /** The inserted or deleted text. */
+  text: string;
+}
+
+/** Integrity counts as [old, new] tuples; footnoteMarkers as [oldList, newList]. */
+export interface DiffIntegrity {
+  images: [number, number];
+  links: [number, number];
+  tables: [number, number];
+  callouts: [number, number];
+  footnoteMarkers: [number[], number[]];
+}
+
+export interface DiffResult {
+  summary: { inserted: number; deleted: number; blocksChanged: number };
+  integrity: DiffIntegrity;
+  changes: DiffChange[];
+  /** Human-readable unified-ish summary. */
+  markdown: string;
+}
+
+/** Build the schema once; it is pure and reused across calls. */
+const schema = getSchema(docmostExtensions);
+
+/** Recursively concatenate the plain text of a JSON node. */
+function plainText(node: any): string {
+  if (!node || typeof node !== "object") return "";
+  let out = "";
+  if (typeof node.text === "string") out += node.text;
+  if (Array.isArray(node.content)) {
+    for (const child of node.content) out += plainText(child);
+  }
+  return out;
+}
+
+/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
+function countNodes(doc: any, pred: (node: any) => boolean): number {
+  let n = 0;
+  const visit = (node: any): void => {
+    if (!node || typeof node !== "object") return;
+    if (pred(node)) n++;
+    if (Array.isArray(node.content)) for (const c of node.content) visit(c);
+  };
+  visit(doc);
+  return n;
+}
+
+/**
+ * Count UNIQUE links in a JSON doc by their `href`. A single link can be split
+ * across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
+ * run); counting link-bearing runs would over-count it. Walking the tree and
+ * collecting hrefs into a Set keys each distinct link once. Link marks with a
+ * missing/empty href are bucketed under a single "" key so a malformed link is
+ * still counted as one.
+ */
+function countUniqueLinks(doc: any): number {
+  const hrefs = new Set<string>();
+  const visit = (node: any): void => {
+    if (!node || typeof node !== "object") return;
+    if (node.type === "text" && Array.isArray(node.marks)) {
+      for (const m of node.marks) {
+        if (m && m.type === "link") {
+          const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
+          hrefs.add(href);
+        }
+      }
+    }
+    if (Array.isArray(node.content)) for (const c of node.content) visit(c);
+  };
+  visit(doc);
+  return hrefs.size;
+}
+
+/**
+ * Parse the ordered list of integers from `[N]` footnote markers found in the
+ * BODY only (every top-level block before the first "Примечания..." notes
+ * heading; if no such heading, the whole doc). Returned in reading order.
+ */
+function footnoteMarkers(doc: any, notesHeading: string): number[] {
+  const top: any[] = Array.isArray(doc?.content) ? doc.content : [];
+  const notesIdx = top.findIndex(
+    (n) =>
+      n &&
+      n.type === "heading" &&
+      plainText(n).trim() === notesHeading,
+  );
+  const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
+  const markers: number[] = [];
+  const re = /\[(\d+)\]/g;
+  for (const block of bodyBlocks) {
+    const text = plainText(block);
+    let m: RegExpExecArray | null;
+    re.lastIndex = 0;
+    while ((m = re.exec(text)) !== null) {
+      markers.push(Number(m[1]));
+    }
+  }
+  return markers;
+}
+
+/** Compute the [old,new] integrity tuples for two JSON docs. */
+function computeIntegrity(
+  oldDoc: any,
+  newDoc: any,
+  notesHeading: string,
+): DiffIntegrity {
+  const images: [number, number] = [
+    countNodes(oldDoc, (n) => n.type === "image"),
+    countNodes(newDoc, (n) => n.type === "image"),
+  ];
+  const links: [number, number] = [
+    countUniqueLinks(oldDoc),
+    countUniqueLinks(newDoc),
+  ];
+  const tables: [number, number] = [
+    countNodes(oldDoc, (n) => n.type === "table"),
+    countNodes(newDoc, (n) => n.type === "table"),
+  ];
+  const callouts: [number, number] = [
+    countNodes(oldDoc, (n) => n.type === "callout"),
+    countNodes(newDoc, (n) => n.type === "callout"),
+  ];
+  const fns: [number[], number[]] = [
+    footnoteMarkers(oldDoc, notesHeading),
+    footnoteMarkers(newDoc, notesHeading),
+  ];
+  return { images, links, tables, callouts, footnoteMarkers: fns };
+}
+
+/**
+ * Resolve the lead text of the top-level block in a ProseMirror Node that
+ * contains the given document position. Returns "" when out of range.
+ */
+function blockContextAt(node: Node, pos: number): string {
+  try {
+    const clamped = Math.max(0, Math.min(pos, node.content.size));
+    const $pos = node.resolve(clamped);
+    // depth 1 is the top-level block in a doc node.
+    const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
+    const text = block.textContent || "";
+    return text.length > 80 ? text.slice(0, 77) + "..." : text;
+  } catch {
+    return "";
+  }
+}
+
+/** Truncate a string for the markdown summary. */
+function truncate(s: string, n = 120): string {
+  return s.length > n ? s.slice(0, n - 3) + "..." : s;
+}
+
+/**
+ * Coarse fallback: a block-by-block plain-text diff. Used only when the precise
+ * changeset pipeline throws, so the tool degrades gracefully instead of failing.
+ */
+function coarseDiff(oldDoc: any, newDoc: any): DiffChange[] {
+  const oldBlocks: any[] = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
+  const newBlocks: any[] = Array.isArray(newDoc?.content) ? newDoc.content : [];
+  const oldTexts = oldBlocks.map(plainText);
+  const newTexts = newBlocks.map(plainText);
+  const oldSet = new Set(oldTexts);
+  const newSet = new Set(newTexts);
+  const changes: DiffChange[] = [];
+  for (const t of oldTexts) {
+    if (!newSet.has(t) && t.trim() !== "") {
+      changes.push({ op: "delete", block: truncate(t, 80), text: t });
+    }
+  }
+  for (const t of newTexts) {
+    if (!oldSet.has(t) && t.trim() !== "") {
+      changes.push({ op: "insert", block: truncate(t, 80), text: t });
+    }
+  }
+  return changes;
+}
+
+/** Build the human-readable unified-ish markdown summary. */
+function renderMarkdown(
+  result: Omit<DiffResult, "markdown">,
+  fellBack: boolean,
+): string {
+  const lines: string[] = [];
+  const { summary, integrity, changes } = result;
+  lines.push(
+    `# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`,
+  );
+  if (fellBack) {
+    lines.push("");
+    lines.push("> note: precise diff failed; coarse block-level diff shown.");
+  }
+  lines.push("");
+  lines.push("## Integrity (old -> new)");
+  lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
+  lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
+  lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
+  lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
+  lines.push(
+    `- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`,
+  );
+  lines.push("");
+  lines.push("## Changes");
+  if (changes.length === 0) {
+    lines.push("(no textual changes)");
+  } else {
+    for (const c of changes) {
+      const sign = c.op === "insert" ? "+" : "-";
+      const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
+      lines.push(`${sign} ${truncate(c.text)}${ctx}`);
+    }
+  }
+  return lines.join("\n");
+}
+
+/**
+ * Diff two ProseMirror JSON documents the way Docmost's history editor does and
+ * serialize the result to text + integrity counts.
+ *
+ * @param oldDocJson the earlier document
+ * @param newDocJson the later document
+ * @param notesHeading heading delimiting body from notes for footnote counting
+ */
+export function diffDocs(
+  oldDocJson: any,
+  newDocJson: any,
+  notesHeading: string = "Примечания переводчика",
+): DiffResult {
+  const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
+
+  let changes: DiffChange[] = [];
+  let inserted = 0;
+  let deleted = 0;
+  let fellBack = false;
+  const changedBlocks = new Set<string>();
+
+  try {
+    const oldNode = Node.fromJSON(schema, oldDocJson);
+    const newNode = Node.fromJSON(schema, newDocJson);
+    const tr = recreateTransform(oldNode, newNode, {
+      complexSteps: false,
+      wordDiffs: true,
+      simplifyDiff: true,
+    });
+    const changeSet = ChangeSet.create(oldNode).addSteps(
+      tr.doc,
+      tr.mapping.maps,
+      [],
+    );
+    const simplified = simplifyChanges(changeSet.changes, newNode);
+
+    for (const change of simplified) {
+      // Deleted text lives in the OLD doc coordinate range [fromA, toA).
+      if (change.toA > change.fromA) {
+        const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
+        if (text.length > 0) {
+          deleted += text.length;
+          const block = blockContextAt(oldNode, change.fromA);
+          changes.push({ op: "delete", block, text });
+          if (block) changedBlocks.add("d:" + block);
+        }
+      }
+      // Inserted text lives in the NEW doc coordinate range [fromB, toB).
+      if (change.toB > change.fromB) {
+        const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
+        if (text.length > 0) {
+          inserted += text.length;
+          const block = blockContextAt(newNode, change.fromB);
+          changes.push({ op: "insert", block, text });
+          if (block) changedBlocks.add("i:" + block);
+        }
+      }
+    }
+  } catch {
+    // Pathological pair: degrade to a coarse block-level diff so we never throw.
+    fellBack = true;
+    changes = coarseDiff(oldDocJson, newDocJson);
+    for (const c of changes) {
+      if (c.op === "insert") inserted += c.text.length;
+      else deleted += c.text.length;
+      if (c.block) changedBlocks.add(c.op[0] + ":" + c.block);
+    }
+  }
+
+  const partial: Omit<DiffResult, "markdown"> = {
+    summary: { inserted, deleted, blocksChanged: changedBlocks.size },
+    integrity,
+    changes,
+  };
+  return { ...partial, markdown: renderMarkdown(partial, fellBack) };
+}
--- a/packages/docmost-client/src/lib/docmost-schema.ts
+++ b/packages/docmost-client/src/lib/docmost-schema.ts
--- a/packages/docmost-client/src/lib/filters.ts
+++ b/packages/docmost-client/src/lib/filters.ts
@@ -0,0 +1,93 @@
+/**
+ * Filter functions to extract only relevant information from API responses
+ * for better agent consumption
+ */
+
+export function filterWorkspace(data: any) {
+  return {
+    id: data.id,
+    name: data.name,
+    description: data.description,
+    defaultSpaceId: data.defaultSpaceId,
+    createdAt: data.createdAt,
+    updatedAt: data.updatedAt,
+    deletedAt: data.deletedAt,
+  };
+}
+
+export function filterSpace(space: any) {
+  return {
+    id: space.id,
+    name: space.name,
+    description: space.description,
+    slug: space.slug,
+    visibility: space.visibility,
+    createdAt: space.createdAt,
+    updatedAt: space.updatedAt,
+    deletedAt: space.deletedAt,
+  };
+}
+
+export function filterGroup(group: any) {
+  return {
+    id: group.id,
+    name: group.name,
+    description: group.description,
+    workspaceId: group.workspaceId,
+    createdAt: group.createdAt,
+    updatedAt: group.updatedAt,
+    deletedAt: group.deletedAt,
+  };
+}
+
+export function filterPage(page: any, content?: string, subpages?: any[]) {
+  return {
+    id: page.id,
+    slugId: page.slugId,
+    title: page.title,
+    parentPageId: page.parentPageId,
+    spaceId: page.spaceId,
+    isLocked: page.isLocked,
+    createdAt: page.createdAt,
+    updatedAt: page.updatedAt,
+    deletedAt: page.deletedAt,
+    // Include converted markdown content if valid string (even empty)
+    ...(typeof content === "string" && { content }),
+    // Include subpages if provided
+    ...(subpages &&
+      subpages.length > 0 && {
+        subpages: subpages.map((p) => ({ id: p.id, title: p.title })),
+      }),
+  };
+}
+
+export function filterComment(comment: any, markdownContent?: string) {
+  return {
+    id: comment.id,
+    pageId: comment.pageId,
+    content: markdownContent ?? comment.content,
+    selection: comment.selection || null,
+    type: comment.type || "page",
+    parentCommentId: comment.parentCommentId || null,
+    creatorId: comment.creatorId,
+    creatorName: comment.creator?.name || null,
+    createdAt: comment.createdAt,
+    editedAt: comment.editedAt || null,
+    resolvedAt: comment.resolvedAt || null,
+    resolvedById: comment.resolvedById || null,
+  };
+}
+
+export function filterSearchResult(result: any) {
+  return {
+    id: result.id,
+    title: result.title,
+    parentPageId: result.parentPageId,
+    createdAt: result.createdAt,
+    updatedAt: result.updatedAt,
+    rank: result.rank,
+    highlight: result.highlight,
+    spaceId: result.space?.id,
+    spaceName: result.space?.name,
+  };
+}
--- a/packages/docmost-client/src/lib/json-edit.ts
+++ b/packages/docmost-client/src/lib/json-edit.ts
@@ -0,0 +1,127 @@
+/**
+ * Surgical text edits on a ProseMirror document without re-importing it.
+ *
+ * Each edit replaces an exact substring inside individual text nodes,
+ * preserving every node id, mark and attribute around it. This is the
+ * safe alternative to a full markdown re-import for small wording fixes.
+ */
+
+export interface TextEdit {
+  find: string;
+  replace: string;
+  /** Replace every occurrence; otherwise the edit must match exactly once. */
+  replaceAll?: boolean;
+}
+
+export interface TextEditResult {
+  find: string;
+  replacements: number;
+}
+
+/** Collect plain text of the whole document (for span-detection hints). */
+function collectText(node: any): string {
+  let out = "";
+  if (node.type === "text") out += node.text || "";
+  for (const child of node.content || []) out += collectText(child);
+  return out;
+}
+
+function countOccurrences(haystack: string, needle: string): number {
+  if (!needle) return 0;
+  let count = 0;
+  let idx = haystack.indexOf(needle);
+  while (idx !== -1) {
+    count++;
+    idx = haystack.indexOf(needle, idx + needle.length);
+  }
+  return count;
+}
+
+/**
+ * Apply text edits to a ProseMirror doc (mutates a deep copy, returns it).
+ * Throws a descriptive error when an edit matches zero times or matches
+ * multiple times without replaceAll — so the caller can refine `find`.
+ */
+export function applyTextEdits(
+  doc: any,
+  edits: TextEdit[],
+): { doc: any; results: TextEditResult[] } {
+  const copy = JSON.parse(JSON.stringify(doc));
+  const results: TextEditResult[] = [];
+
+  for (const edit of edits) {
+    if (!edit.find) throw new Error("edit.find must be a non-empty string");
+
+    // Count matches inside individual text nodes first.
+    let nodeMatches = 0;
+    (function count(node: any) {
+      if (node.type === "text" && node.text) {
+        nodeMatches += countOccurrences(node.text, edit.find);
+      }
+      for (const child of node.content || []) count(child);
+    })(copy);
+
+    if (nodeMatches === 0) {
+      // Distinguish "text not present" from "text spans formatting runs".
+      const fullText = collectText(copy);
+      if (fullText.includes(edit.find)) {
+        throw new Error(
+          `Edit "${truncate(edit.find)}": the text exists in the document but spans ` +
+            `multiple formatting runs (bold/link/italic boundaries). Use a shorter ` +
+            `fragment that stays inside one run, or use update_page_json for ` +
+            `structural changes.`,
+        );
+      }
+      throw new Error(
+        `Edit "${truncate(edit.find)}": text not found in the document.`,
+      );
+    }
+
+    if (nodeMatches > 1 && !edit.replaceAll) {
+      throw new Error(
+        `Edit "${truncate(edit.find)}": matches ${nodeMatches} times. ` +
+          `Provide a longer, unique fragment or set replaceAll: true.`,
+      );
+    }
+
+    // Perform the replacement(s).
+    let done = 0;
+    (function replace(node: any) {
+      if (node.type === "text" && node.text && node.text.includes(edit.find)) {
+        if (edit.replaceAll) {
+          done += countOccurrences(node.text, edit.find);
+          node.text = node.text.split(edit.find).join(edit.replace);
+        } else if (done === 0) {
+          // Avoid String.replace: its second arg treats $&, $1, $`, $', $$ as
+          // special patterns, expanding them instead of inserting literally.
+          // Splice the first occurrence by index to keep the replacement literal.
+          const idx = node.text.indexOf(edit.find);
+          node.text =
+            node.text.slice(0, idx) +
+            edit.replace +
+            node.text.slice(idx + edit.find.length);
+          done = 1;
+        }
+      }
+      for (const child of node.content || []) replace(child);
+    })(copy);
+
+    results.push({ find: edit.find, replacements: done });
+  }
+
+  // Drop text nodes that became empty (ProseMirror forbids empty text nodes).
+  (function prune(node: any) {
+    if (Array.isArray(node.content)) {
+      node.content = node.content.filter(
+        (child: any) => !(child.type === "text" && child.text === ""),
+      );
+      for (const child of node.content) prune(child);
+    }
+  })(copy);
+
+  return { doc: copy, results };
+}
+
+function truncate(s: string): string {
+  return s.length > 60 ? s.slice(0, 57) + "..." : s;
+}
--- a/packages/docmost-client/src/lib/markdown-converter.ts
+++ b/packages/docmost-client/src/lib/markdown-converter.ts
@@ -0,0 +1,861 @@
+/**
+ * Convert ProseMirror/TipTap JSON content to Markdown
+ * Supports all Docmost-specific node types and extensions
+ */
+export function convertProseMirrorToMarkdown(content: any): string {
+  if (!content || !content.content) return "";
+
+  // Escape a value interpolated into an HTML double-quoted attribute value
+  // (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
+  // ATTRIBUTE context only the quote that delimits the value and the ampersand
+  // that starts an entity are special, so we escape ONLY & " (and ' for safety
+  // when single-quoted delimiters are used). We deliberately do NOT escape < or
+  // >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
+  // &lt;/&gt; back inside attribute values, so escaping them would corrupt the
+  // stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
+  // every round-trip (`a < b` -> `a &lt; b` -> `a &amp;lt; b`). Escaping & "
+  // keeps the value inert against attribute-injection while staying idempotent.
+  // NOTE: escape ONLY & and " here. The value is always wrapped in double
+  // quotes, so " is the only delimiter; ' is NOT special in a double-quoted
+  // value, and parse5 does not decode &#39; back inside attribute values, so
+  // escaping ' would (like < >) corrupt the value and accumulate &amp; on every
+  // round-trip. Escaping & and " is idempotent (parse5 decodes them back).
+  const escapeAttr = (value: unknown): string =>
+    String(value)
+      .replace(/&/g, "&amp;")
+      .replace(/"/g, "&quot;");
+
+  // Escape a value placed as HTML element TEXT content (between tags), where
+  // <, >, and & are all significant. Used for text rendered inside raw-HTML
+  // blocks (table cells / columns) so stored characters cannot inject markup.
+  const escapeHtmlText = (value: unknown): string =>
+    String(value)
+      .replace(/&/g, "&amp;")
+      .replace(/</g, "&lt;")
+      .replace(/>/g, "&gt;");
+
+  // Percent-encode characters that would break out of a markdown URL target
+  // (...) — whitespace/newlines and parentheses — so a stored src stays a
+  // single inert token (used for image/video/youtube srcs).
+  const encodeMdUrl = (value: unknown): string =>
+    String(value || "")
+      .replace(/\s/g, (c: string) => (c === " " ? "%20" : encodeURIComponent(c)))
+      .replace(/\(/g, "%28")
+      .replace(/\)/g, "%29");
+
+  const processNode = (node: any): string => {
+    const type = node.type;
+    const nodeContent = node.content || [];
+
+    switch (type) {
+      case "doc":
+        return nodeContent.map(processNode).join("\n\n");
+
+      case "paragraph":
+        const text = nodeContent.map(processNode).join("");
+        const align = node.attrs?.textAlign;
+        if (align && align !== "left") {
+          return `<div align="${escapeAttr(align)}">${text}</div>`;
+        }
+        return text || "";
+
+      case "heading":
+        const level = node.attrs?.level || 1;
+        const headingText = nodeContent.map(processNode).join("");
+        return "#".repeat(level) + " " + headingText;
+
+      case "text":
+        let textContent = node.text || "";
+        // Apply marks (bold, italic, code, etc.)
+        if (node.marks) {
+          // Markdown code spans (`...`) cannot carry inner formatting, so when a
+          // run has the `code` mark alongside ANY other mark, backtick syntax
+          // would leak literal ** / []() into the code text. In that case emit
+          // nested HTML (<code> innermost, the other marks wrapping it as HTML)
+          // so the output is at least well-formed and re-parseable.
+          //
+          // NOTE: this does NOT round-trip both marks. The schema's `code` mark
+          // has `excludes: "_"` (it excludes every other mark), so on import the
+          // co-occurring mark is always dropped — the run comes back as `code`
+          // only. We keep the emission simple and accept that the other mark is
+          // lost; preserving both is impossible while `code` excludes them.
+          // Only use the backtick form when `code` is the sole mark.
+          const markTypes = node.marks.map((m: any) => m.type);
+          const hasCode = markTypes.includes("code");
+          const codeCombined = hasCode && markTypes.length > 1;
+          for (const mark of node.marks) {
+            switch (mark.type) {
+              case "bold":
+                textContent = codeCombined
+                  ? `<strong>${textContent}</strong>`
+                  : `**${textContent}**`;
+                break;
+              case "italic":
+                textContent = codeCombined
+                  ? `<em>${textContent}</em>`
+                  : `*${textContent}*`;
+                break;
+              case "code":
+                // When combined with another mark, wrap as <code> so the
+                // surrounding HTML marks can nest around it; otherwise use the
+                // plain backtick span.
+                textContent = codeCombined
+                  ? `<code>${textContent}</code>`
+                  : `\`${textContent}\``;
+                break;
+              case "link": {
+                const href = mark.attrs?.href || "";
+                const title = mark.attrs?.title;
+                if (codeCombined) {
+                  // Emit an HTML anchor so it can wrap the nested <code>.
+                  const safeHref = escapeAttr(href);
+                  if (title) {
+                    textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
+                  } else {
+                    textContent = `<a href="${safeHref}">${textContent}</a>`;
+                  }
+                } else if (title) {
+                  // Emit the optional markdown link title; escape an embedded
+                  // double-quote so it cannot terminate the title string early.
+                  const safeTitle = String(title).replace(/"/g, '\\"');
+                  textContent = `[${textContent}](${href} "${safeTitle}")`;
+                } else {
+                  textContent = `[${textContent}](${href})`;
+                }
+                break;
+              }
+              case "strike":
+                textContent = codeCombined
+                  ? `<s>${textContent}</s>`
+                  : `~~${textContent}~~`;
+                break;
+              case "underline":
+                textContent = `<u>${textContent}</u>`;
+                break;
+              case "subscript":
+                textContent = `<sub>${textContent}</sub>`;
+                break;
+              case "superscript":
+                textContent = `<sup>${textContent}</sup>`;
+                break;
+              case "highlight": {
+                // Preserve a null/empty color as a plain highlight (a bare
+                // <mark> with no background-color); only emit the style when a
+                // color is actually set, so a plain highlight is not forced to
+                // yellow on export.
+                const color = mark.attrs?.color;
+                textContent = color
+                  ? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
+                  : `<mark>${textContent}</mark>`;
+                break;
+              }
+              case "textStyle":
+                if (mark.attrs?.color) {
+                  textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
+                }
+                break;
+              case "comment": {
+                // Emit the inline comment anchor so highlights round-trip. The
+                // schema's Comment mark parses span[data-comment-id] (attrs
+                // commentId/resolved).
+                const cid = mark.attrs?.commentId;
+                if (cid) {
+                  const resolvedAttr = mark.attrs?.resolved
+                    ? ` data-resolved="true"`
+                    : "";
+                  textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
+                }
+                break;
+              }
+            }
+          }
+        }
+        return textContent;
+
+      case "codeBlock":
+        const language = node.attrs?.language || "";
+        // Strip ALL trailing newlines so the export is idempotent: marked
+        // re-adds exactly one trailing "\n" on import, so trimming only one
+        // here would let the text grow by "\n" on each round-trip. Removing
+        // every trailing newline makes repeated cycles stable.
+        const code = nodeContent
+          .map(processNode)
+          .join("")
+          .replace(/\n+$/, "");
+        return "```" + language + "\n" + code + "\n```";
+
+      case "bulletList":
+        return nodeContent
+          .map((item: any) => processListItem(item, "-"))
+          .join("\n");
+
+      case "orderedList":
+        return nodeContent
+          .map((item: any, index: number) =>
+            processListItem(item, `${index + 1}.`),
+          )
+          .join("\n");
+
+      case "taskList":
+        return nodeContent.map((item: any) => processTaskItem(item)).join("\n");
+
+      case "taskItem":
+        // Delegate to the same helper used by taskList so multi-block and
+        // nested task items render and indent consistently.
+        return processTaskItem(node);
+
+      case "listItem":
+        return nodeContent.map(processNode).join("\n");
+
+      case "blockquote":
+        // Prefix EVERY line of EVERY child with "> " and separate block-level
+        // children with a blank ">" line so code blocks / multi-paragraph
+        // quotes round-trip correctly.
+        return nodeContent
+          .map((n: any) =>
+            processNode(n)
+              .split("\n")
+              .map((line: string) => (line.length ? `> ${line}` : ">"))
+              .join("\n"),
+          )
+          .join("\n>\n");
+
+      case "horizontalRule":
+        return "---";
+
+      case "hardBreak":
+        // Two trailing spaces before the newline encode a markdown hard break;
+        // a bare "\n" would be reimported as a soft break and lost.
+        return "  \n";
+
+      case "image":
+        const imgAlt = node.attrs?.alt || "";
+        // Neutralize characters that could break out of the markdown image
+        // URL: spaces/newlines and parentheses would terminate the (...) target
+        // and let a stored src inject following markdown/HTML. Percent-encode
+        // them so the URL stays a single inert token.
+        const imgSrc = encodeMdUrl(node.attrs?.src);
+        // No "caption" attribute exists in the Docmost image schema, so we do
+        // not emit one (the previous caption branch was dead).
+        return `![${imgAlt}](${imgSrc})`;
+
+      case "video": {
+        // Emit the schema-matching <video> element so generateJSON rebuilds the
+        // node with its attrs intact. The schema's parseHTML reads src/aria-label
+        // from the standard attributes and the remaining attrs from data-*.
+        const attrs = node.attrs || {};
+        const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
+        if (attrs.alt) parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
+        if (attrs.attachmentId)
+          parts.push(
+            `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
+          );
+        if (attrs.width != null)
+          parts.push(`width="${escapeAttr(attrs.width)}"`);
+        if (attrs.height != null)
+          parts.push(`height="${escapeAttr(attrs.height)}"`);
+        if (attrs.size != null)
+          parts.push(`data-size="${escapeAttr(attrs.size)}"`);
+        if (attrs.align)
+          parts.push(`data-align="${escapeAttr(attrs.align)}"`);
+        if (attrs.aspectRatio != null)
+          parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
+        // Wrap in a block <div> so marked treats it as a block (a bare <video>
+        // is inline-level HTML and marked wraps it in <p>, leaving a spurious
+        // empty paragraph beside the hoisted block atom). The wrapper has no
+        // data-type, so the schema parser ignores it and just hoists the video.
+        return `<div><video ${parts.join(" ")}></video></div>`;
+      }
+
+      case "youtube": {
+        // Emit the schema-matching div[data-type="youtube"]; the schema reads
+        // src from data-src and width/height/align from data-* attributes.
+        const attrs = node.attrs || {};
+        const parts: string[] = [
+          `data-type="youtube"`,
+          `data-src="${escapeAttr(attrs.src ?? "")}"`,
+        ];
+        if (attrs.width != null)
+          parts.push(`data-width="${escapeAttr(attrs.width)}"`);
+        if (attrs.height != null)
+          parts.push(`data-height="${escapeAttr(attrs.height)}"`);
+        if (attrs.align)
+          parts.push(`data-align="${escapeAttr(attrs.align)}"`);
+        return `<div ${parts.join(" ")}></div>`;
+      }
+
+      case "table": {
+        // A GFM pipe table cannot represent merged cells. If ANY cell carries
+        // colspan>1 or rowspan>1, a pipe table would corrupt the grid on
+        // re-import, so emit the WHOLE table as raw HTML <table> instead: the
+        // schema's table family parseHTML (tag table/tr/td/th, with colspan/
+        // rowspan read from the same-named HTML attrs and align via parseHTML)
+        // round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
+        const tableRows: any[] = nodeContent;
+        if (tableRows.length === 0) return "";
+        const hasSpan = tableRows.some((row: any) =>
+          (row.content || []).some(
+            (cell: any) =>
+              (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1,
+          ),
+        );
+
+        if (hasSpan) {
+          // Render each cell's block children to HTML (marked does NOT parse
+          // markdown inside a raw HTML block, so emitting markdown here would
+          // leak literal ** / `` into the cell). blockToHtml mirrors the schema
+          // HTML so inner formatting re-parses into the right marks/nodes.
+          const renderHtmlCell = (cell: any): string => {
+            const tag = cell.type === "tableHeader" ? "th" : "td";
+            const a = cell.attrs || {};
+            const cellParts: string[] = [];
+            if ((a.colspan ?? 1) > 1)
+              cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
+            if ((a.rowspan ?? 1) > 1)
+              cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
+            if (a.align) cellParts.push(`align="${escapeAttr(a.align)}"`);
+            const open = cellParts.length
+              ? `<${tag} ${cellParts.join(" ")}>`
+              : `<${tag}>`;
+            const inner = (cell.content || [])
+              .map((block: any) => blockToHtml(block))
+              .join("");
+            return `${open}${inner}</${tag}>`;
+          };
+          const htmlRows = tableRows
+            .map(
+              (row: any) =>
+                `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`,
+            )
+            .join("");
+          return `<table><tbody>${htmlRows}</tbody></table>`;
+        }
+
+        // No merged cells: emit a GFM table (header row + separator) so the
+        // markdown can be parsed back into a table on re-import.
+        const rows = tableRows.map(processNode);
+        const headerCells = tableRows[0]?.content || [];
+        const columns = headerCells.length || 1;
+        // Derive alignment markers (:--, :-:, --:) from each header cell.
+        const markers = Array.from({ length: columns }, (_, i) => {
+          const align = headerCells[i]?.attrs?.align;
+          switch (align) {
+            case "left":
+              return ":--";
+            case "center":
+              return ":-:";
+            case "right":
+              return "--:";
+            default:
+              return "---";
+          }
+        });
+        const separator = "| " + markers.join(" | ") + " |";
+        return [rows[0], separator, ...rows.slice(1)].join("\n");
+      }
+
+      case "tableRow":
+        return "| " + nodeContent.map(processNode).join(" | ") + " |";
+
+      case "tableCell":
+      case "tableHeader": {
+        // Join multiple block children with a space (not "") so adjacent blocks
+        // like a paragraph followed by a list don't collide into "line1- a".
+        // Then collapse newlines and escape pipes so a cell containing "|" or a
+        // line break cannot corrupt the surrounding GFM row.
+        return nodeContent
+          .map(processNode)
+          .join(" ")
+          .replace(/\r?\n/g, " ")
+          .replace(/\|/g, "\\|");
+      }
+
+      case "callout":
+        const calloutType = node.attrs?.type || "info";
+        const calloutContent = nodeContent.map(processNode).join("\n");
+        return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
+
+      case "details":
+        return nodeContent.map(processNode).join("\n");
+
+      case "detailsSummary":
+        const summaryText = nodeContent.map(processNode).join("");
+        return `<details>\n<summary>${summaryText}</summary>\n`;
+
+      case "detailsContent":
+        const detailsText = nodeContent.map(processNode).join("\n");
+        return `${detailsText}\n</details>`;
+
+      case "mathInline": {
+        // The schema's `text` attribute has no parseHTML, so TipTap's default
+        // parser reads it from the `text` HTML attribute (NOT the element's text
+        // content). Emit span[data-type="mathInline"] carrying the LaTeX in a
+        // `text="..."` attribute so it round-trips. marked cannot parse $...$
+        // back, so the previous form was lossy.
+        const inlineMath = node.attrs?.text || "";
+        return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
+      }
+
+      case "mathBlock": {
+        // Same as mathInline: the LaTeX must ride in the `text` HTML attribute
+        // for the schema's default parser to recover it.
+        const blockMath = node.attrs?.text || "";
+        return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
+      }
+
+      case "mention": {
+        // Emit span[data-type="mention"] with the schema's data-* attributes so
+        // generateJSON rebuilds the mention node instead of leaving "@label"
+        // plain text that cannot re-parse.
+        const attrs = node.attrs || {};
+        const parts: string[] = [`data-type="mention"`];
+        if (attrs.id) parts.push(`data-id="${escapeAttr(attrs.id)}"`);
+        if (attrs.label)
+          parts.push(`data-label="${escapeAttr(attrs.label)}"`);
+        if (attrs.entityType)
+          parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
+        if (attrs.entityId)
+          parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
+        if (attrs.slugId)
+          parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
+        if (attrs.creatorId)
+          parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
+        if (attrs.anchorId)
+          parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
+        // Keep the label as visible text content too; the schema reads attrs
+        // from data-*, so the inner text is purely cosmetic and harmless.
+        const mentionLabel = attrs.label || attrs.id || "";
+        // The label is visible element TEXT content here (the data-* attrs above
+        // carry the real values), so escape it for the text context, not attrs.
+        return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
+      }
+
+      case "attachment": {
+        // BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
+        // the schema stores name/url (plus mime/size/attachmentId). Emit the
+        // schema-matching div[data-type="attachment"] with data-attachment-*
+        // attrs so the node round-trips instead of degrading to a markdown link.
+        const attrs = node.attrs || {};
+        const parts: string[] = [
+          `data-type="attachment"`,
+          `data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
+        ];
+        if (attrs.name)
+          parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
+        if (attrs.mime)
+          parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
+        if (attrs.size != null)
+          parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
+        if (attrs.attachmentId)
+          parts.push(
+            `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
+          );
+        return `<div ${parts.join(" ")}></div>`;
+      }
+
+      case "drawio":
+      case "excalidraw": {
+        // Emit the schema-matching div[data-type=...] carrying the diagram's
+        // attrs as data-* (the schema's diagramAttributes reads src/title/alt/
+        // width/height/size/aspectRatio/align/attachmentId from data-*), so the
+        // diagram round-trips instead of degrading to a lossy placeholder.
+        const attrs = node.attrs || {};
+        const parts: string[] = [
+          `data-type="${type}"`,
+          `data-src="${escapeAttr(attrs.src ?? "")}"`,
+        ];
+        if (attrs.title != null)
+          parts.push(`data-title="${escapeAttr(attrs.title)}"`);
+        if (attrs.alt != null) parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
+        if (attrs.width != null)
+          parts.push(`data-width="${escapeAttr(attrs.width)}"`);
+        if (attrs.height != null)
+          parts.push(`data-height="${escapeAttr(attrs.height)}"`);
+        if (attrs.size != null)
+          parts.push(`data-size="${escapeAttr(attrs.size)}"`);
+        if (attrs.aspectRatio != null)
+          parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
+        if (attrs.align)
+          parts.push(`data-align="${escapeAttr(attrs.align)}"`);
+        if (attrs.attachmentId)
+          parts.push(
+            `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
+          );
+        return `<div ${parts.join(" ")}></div>`;
+      }
+
+      case "embed": {
+        // Emit the schema-matching div[data-type="embed"]; the schema reads
+        // src/provider/align/width/height from data-* attributes so the node
+        // (and its provider iframe info) survives the round-trip.
+        const attrs = node.attrs || {};
+        const parts: string[] = [
+          `data-type="embed"`,
+          `data-src="${escapeAttr(attrs.src ?? "")}"`,
+          `data-provider="${escapeAttr(attrs.provider ?? "")}"`,
+        ];
+        if (attrs.align)
+          parts.push(`data-align="${escapeAttr(attrs.align)}"`);
+        if (attrs.width != null)
+          parts.push(`data-width="${escapeAttr(attrs.width)}"`);
+        if (attrs.height != null)
+          parts.push(`data-height="${escapeAttr(attrs.height)}"`);
+        return `<div ${parts.join(" ")}></div>`;
+      }
+
+      case "audio": {
+        // Emit the schema-matching <audio> element (was emitting nothing). The
+        // schema reads src from src and attachmentId/size from data-*.
+        const attrs = node.attrs || {};
+        const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
+        if (attrs.attachmentId)
+          parts.push(
+            `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
+          );
+        if (attrs.size != null)
+          parts.push(`data-size="${escapeAttr(attrs.size)}"`);
+        // Wrap in a block <div> for the same reason as video: a bare <audio> is
+        // inline-level HTML that marked would wrap in <p>.
+        return `<div><audio ${parts.join(" ")}></audio></div>`;
+      }
+
+      case "pdf": {
+        // Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
+        // The schema reads src/width/height from standard attrs and name/
+        // attachmentId/size from data-*.
+        const attrs = node.attrs || {};
+        const parts: string[] = [
+          `data-type="pdf"`,
+          `src="${escapeAttr(attrs.src ?? "")}"`,
+        ];
+        if (attrs.name) parts.push(`data-name="${escapeAttr(attrs.name)}"`);
+        if (attrs.attachmentId)
+          parts.push(
+            `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
+          );
+        if (attrs.size != null)
+          parts.push(`data-size="${escapeAttr(attrs.size)}"`);
+        if (attrs.width != null)
+          parts.push(`width="${escapeAttr(attrs.width)}"`);
+        if (attrs.height != null)
+          parts.push(`height="${escapeAttr(attrs.height)}"`);
+        return `<div ${parts.join(" ")}></div>`;
+      }
+
+      case "columns": {
+        // Emit the schema-matching div[data-type="columns"] wrapper so the
+        // multi-column layout survives. Without a case the children were
+        // concatenated with no separator and the text merged. The schema reads
+        // layout from data-layout and widthMode from data-width-mode. The whole
+        // block is raw HTML, so render children via blockToHtml (NOT markdown,
+        // which marked would not re-parse inside a raw HTML block).
+        const attrs = node.attrs || {};
+        const parts: string[] = [`data-type="columns"`];
+        if (attrs.layout)
+          parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
+        if (attrs.widthMode && attrs.widthMode !== "normal")
+          parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
+        const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
+        return `<div ${parts.join(" ")}>${inner}</div>`;
+      }
+
+      case "column": {
+        // Emit the schema-matching div[data-type="column"]; the schema reads the
+        // column width from data-width. Children are rendered as HTML so their
+        // formatting survives inside this raw HTML block.
+        const attrs = node.attrs || {};
+        const parts: string[] = [`data-type="column"`];
+        if (attrs.width)
+          parts.push(`data-width="${escapeAttr(attrs.width)}"`);
+        const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
+        return `<div ${parts.join(" ")}>${inner}</div>`;
+      }
+
+      case "subpages":
+        return "{{SUBPAGES}}";
+
+      default:
+        // Fallback: process children
+        return nodeContent.map(processNode).join("");
+    }
+  };
+
+  // Render inline content (text runs + their marks) to HTML. Used by the raw
+  // HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
+  // markdown, so backtick/asterisk/bracket syntax would otherwise leak as
+  // literal characters. Each mark is mirrored to the HTML the schema's parseHTML
+  // accepts so it re-imports as the matching ProseMirror mark.
+  const inlineToHtml = (inlineNodes: any[]): string =>
+    (inlineNodes || [])
+      .map((n: any) => {
+        if (n.type === "hardBreak") return "<br>";
+        if (n.type !== "text") {
+          // Inline atoms (mention, mathInline) already emit schema HTML.
+          return processNode(n);
+        }
+        let t = escapeHtmlText(n.text || "");
+        for (const mark of n.marks || []) {
+          switch (mark.type) {
+            case "bold":
+              t = `<strong>${t}</strong>`;
+              break;
+            case "italic":
+              t = `<em>${t}</em>`;
+              break;
+            case "code":
+              t = `<code>${t}</code>`;
+              break;
+            case "strike":
+              t = `<s>${t}</s>`;
+              break;
+            case "underline":
+              t = `<u>${t}</u>`;
+              break;
+            case "subscript":
+              t = `<sub>${t}</sub>`;
+              break;
+            case "superscript":
+              t = `<sup>${t}</sup>`;
+              break;
+            case "link":
+              t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
+              break;
+            case "highlight":
+              t = mark.attrs?.color
+                ? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
+                : `<mark>${t}</mark>`;
+              break;
+            case "textStyle":
+              if (mark.attrs?.color)
+                t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
+              break;
+            case "comment":
+              // Inline comment anchor inside a raw-HTML container (columns /
+              // spanned table cells), so commented text there also round-trips.
+              if (mark.attrs?.commentId) {
+                const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
+                t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
+              }
+              break;
+          }
+        }
+        return t;
+      })
+      .join("");
+
+  // Emit the schema-matching <img> for an image node. Shared so the image is
+  // emitted as real HTML wherever a raw-HTML container needs it (inside a column
+  // or a spanned table cell), where markdown `![](...)` would NOT be re-parsed
+  // and would survive as literal text. The Image extension reads src/alt from
+  // the standard attributes; the Docmost extra attrs (width/height/align/size/
+  // attachmentId/aspectRatio) are global attributes read from same-named DOM
+  // attributes, so emit them by name.
+  const imageToHtml = (node: any): string => {
+    const attrs = node.attrs || {};
+    const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
+    if (attrs.alt) parts.push(`alt="${escapeAttr(attrs.alt)}"`);
+    if (attrs.title) parts.push(`title="${escapeAttr(attrs.title)}"`);
+    if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
+    if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
+    if (attrs.align) parts.push(`align="${escapeAttr(attrs.align)}"`);
+    if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
+    if (attrs.attachmentId)
+      parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
+    if (attrs.aspectRatio != null)
+      parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
+    return `<img ${parts.join(" ")}>`;
+  };
+
+  // Emit the schema-matching div[data-type="callout"] for a callout node. The
+  // schema reads the banner type from data-callout-type. Children are rendered
+  // as HTML so they survive inside a raw-HTML container.
+  const calloutToHtml = (node: any): string => {
+    const type = (node.attrs?.type || "info").toLowerCase();
+    const inner = (node.content || []).map(blockToHtml).join("");
+    return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
+  };
+
+  // Emit a schema-matching <details> tree. The schema parses <details>,
+  // summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
+  const detailsToHtml = (node: any): string => {
+    const inner = (node.content || []).map(blockToHtml).join("");
+    return `<details>${inner}</details>`;
+  };
+  const detailsSummaryToHtml = (node: any): string =>
+    `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
+  const detailsContentToHtml = (node: any): string => {
+    const inner = (node.content || []).map(blockToHtml).join("");
+    return `<div data-type="detailsContent">${inner}</div>`;
+  };
+
+  // Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
+  // collaboration.ts) recognizes ul[data-type="taskList"] with
+  // li[data-type="taskItem"][data-checked]; emitting that directly here keeps
+  // task lists inside columns/cells from degrading to literal "- [ ]" text.
+  const taskListToHtml = (node: any): string => {
+    const items = (node.content || [])
+      .map((it: any) => {
+        const checked = it.attrs?.checked ? "true" : "false";
+        return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
+      })
+      .join("");
+    return `<ul data-type="taskList">${items}</ul>`;
+  };
+
+  // Render a block node to HTML for the raw-HTML containers (spanned tables,
+  // columns). marked does NOT re-parse markdown inside a raw-HTML block, so
+  // EVERY block type that can appear inside a column or a spanned cell must be
+  // emitted as schema-matching HTML here — never as markdown, or it would land
+  // as literal text on re-import. Nodes whose processNode case already produces
+  // schema-matching HTML (math/media/embed/attachment/nested columns/spanned
+  // table) are delegated to processNode; the markdown-emitting cases
+  // (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
+  const blockToHtml = (block: any): string => {
+    const children = block.content || [];
+    switch (block.type) {
+      case "paragraph":
+        return `<p>${inlineToHtml(children)}</p>`;
+      case "heading": {
+        const level = block.attrs?.level || 1;
+        return `<h${level}>${inlineToHtml(children)}</h${level}>`;
+      }
+      case "bulletList":
+        return `<ul>${children
+          .map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
+          .join("")}</ul>`;
+      case "orderedList":
+        return `<ol>${children
+          .map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
+          .join("")}</ol>`;
+      case "codeBlock": {
+        const lang = block.attrs?.language || "";
+        // The code itself is element TEXT content (between <code> tags), so it
+        // must escape < > & — NOT the attribute escaper. The language rides in
+        // a class ATTRIBUTE, so it uses escapeAttr.
+        const code = escapeHtmlText(
+          children
+            .map(processNode)
+            .join("")
+            .replace(/\n+$/, ""),
+        );
+        const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
+        return `<pre><code${cls}>${code}</code></pre>`;
+      }
+      case "image":
+        return imageToHtml(block);
+      case "blockquote":
+        return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
+      case "horizontalRule":
+        return "<hr>";
+      case "callout":
+        return calloutToHtml(block);
+      case "details":
+        return detailsToHtml(block);
+      case "detailsSummary":
+        return detailsSummaryToHtml(block);
+      case "detailsContent":
+        return detailsContentToHtml(block);
+      case "taskList":
+        return taskListToHtml(block);
+      case "taskItem":
+        // A bare taskItem (outside a taskList) still needs a wrapping list so
+        // the schema parses it; wrap it in a single-item taskList.
+        return taskListToHtml({ content: [block] });
+      // table (incl. spanned), columns/column, math, media, embed, attachment,
+      // mention, etc. already emit schema-matching HTML from processNode.
+      case "table":
+      case "columns":
+      case "column":
+      case "mathBlock":
+      case "video":
+      case "audio":
+      case "pdf":
+      case "youtube":
+      case "embed":
+      case "attachment":
+      case "drawio":
+      case "excalidraw":
+        return processNode(block);
+      default:
+        // Any still-unhandled block type: NEVER fall back to markdown inside a
+        // raw-HTML block (it would become literal text). Wrap its rendered
+        // children in a <div> so their content is preserved; if it has no block
+        // children, render its inline content instead.
+        if (children.length && children.some((c: any) => c.type !== "text")) {
+          return `<div>${children.map(blockToHtml).join("")}</div>`;
+        }
+        return `<div>${inlineToHtml(children)}</div>`;
+    }
+  };
+
+  // Render the block children of a list item to HTML (a listItem holds block+
+  // content). Mirrors processListItem but for the HTML fallback path.
+  const blockChildrenToHtml = (item: any): string =>
+    (item.content || []).map((b: any) => blockToHtml(b)).join("");
+
+  // Indent the rendered children of a list item under a marker prefix.
+  // Each child block is a (possibly multi-line) string. The very first physical
+  // line of the first child carries the marker (e.g. "- " or "1. "); EVERY
+  // other line — the remaining lines of the first child AND all lines of every
+  // subsequent child (nested lists, code blocks, extra paragraphs) — is indented
+  // to align under the marker. Without indenting these continuation lines, the
+  // 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
+  //
+  // The continuation indent MUST equal the LIST marker width, which is not the
+  // same as the visible prefix width:
+  //   - bullet "- "          -> 2 columns
+  //   - task   "- [ ] "      -> marker is still "- " (the "[ ] " is content), 2
+  //   - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
+  // CommonMark anchors nested content to the marker column, so an ordered item
+  // indented to only 2 columns would be re-parsed as a sibling/loose content on
+  // re-import. Callers therefore pass the exact indent width to use.
+  const indentItemChildren = (
+    childStrings: string[],
+    prefix: string,
+    indentWidth: number,
+  ): string => {
+    const indent = " ".repeat(indentWidth);
+    const lines: string[] = [];
+    childStrings.forEach((child, childIndex) => {
+      child.split("\n").forEach((line, lineIndex) => {
+        if (childIndex === 0 && lineIndex === 0) {
+          // First physical line of the first block gets the marker.
+          lines.push(`${prefix} ${line}`);
+        } else {
+          // Indent every continuation line by the marker width; keep blank
+          // lines blank rather than emitting trailing whitespace.
+          lines.push(line.length ? `${indent}${line}` : "");
+        }
+      });
+    });
+    return lines.join("\n");
+  };
+
+  const processListItem = (item: any, prefix: string): string => {
+    const itemContent = item.content || [];
+    const childStrings = itemContent.map(processNode);
+    if (childStrings.length === 0) return prefix;
+    // The rendered marker is `${prefix} ` (prefix + one space), so its width —
+    // and thus the continuation indent — is prefix.length + 1. This is correct
+    // for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
+    // since for those the visible prefix IS the list marker.
+    return indentItemChildren(childStrings, prefix, prefix.length + 1);
+  };
+
+  const processTaskItem = (item: any): string => {
+    const checked = item.attrs?.checked || false;
+    const checkbox = checked ? "[x]" : "[ ]";
+    const prefix = `- ${checkbox}`;
+    const itemContent = item.content || [];
+    const childStrings = itemContent.map(processNode);
+    // An empty task item still needs its checkbox marker; without this guard
+    // the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
+    if (childStrings.length === 0) return prefix;
+    // The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
+    // checkbox is item content, NOT part of the marker. So the continuation
+    // indent is a fixed 2 — do NOT derive it from the wider prefix.length.
+    return indentItemChildren(childStrings, prefix, 2);
+  };
+
+  return processNode(content).trim();
+}
--- a/packages/docmost-client/src/lib/markdown-document.ts
+++ b/packages/docmost-client/src/lib/markdown-document.ts
@@ -0,0 +1,156 @@
+/**
+ * Self-contained Docmost-flavoured Markdown document (custom extensions).
+ *
+ * A single `.md` file that packages everything needed to losslessly round-trip
+ * a page through "download -> edit body -> re-upload":
+ *   - a leading `docmost:meta` block: a one-line JSON object with page identity;
+ *   - the Markdown body (carrying inline comment anchors and diagrams as HTML);
+ *   - a trailing `docmost:comments` block: a one-line JSON array of comment
+ *     threads.
+ *
+ * Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
+ * drop HTML comments, so even if the WHOLE file were ever fed straight to the
+ * importer without first stripping the blocks, the metadata cannot leak into the
+ * document. (A fenced ```docmost-comments``` block would WRONGLY become a
+ * codeBlock node, so a fenced block is deliberately NOT used.)
+ *
+ * The delimiter literals may legitimately appear in the BODY too (e.g. a user
+ * re-pastes an exported `.md` into a page, or a page documents this very
+ * format). To stay robust, parsing treats only the FINAL, document-ending
+ * `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
+ * opener whose closing `-->` sits at the very end of the file. Any earlier
+ * literal occurrence is left in the body untouched.
+ *
+ * NOTE on comments: in this version the comment THREAD records are preserved in
+ * the file but are NOT pushed back to the server on import — only the inline
+ * comment marks (anchors) embedded in the body are restored. Managing comment
+ * records stays with the comment tools/UI.
+ */
+
+export interface DocmostMdMeta {
+  version: number;
+  pageId?: string;
+  slugId?: string;
+  title?: string;
+  spaceId?: string;
+  parentPageId?: string | null;
+}
+
+// Match the leading meta block (allow leading whitespace). Capture group 1 is
+// the JSON text between the markers.
+const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
+// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
+// rather than end-anchoring a single regex (which would mis-capture across a
+// literal opener that appears earlier in the body).
+const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
+
+/**
+ * Assemble the full self-contained markdown file: meta block, body, and the
+ * comments block. The meta block is always emitted; the comments block is always
+ * emitted too (with `[]` when there are no comments) so the format stays uniform
+ * and parsing stays simple.
+ */
+export function serializeDocmostMarkdown(
+  meta: DocmostMdMeta,
+  body: string,
+  comments: any[],
+): string {
+  const metaJson = JSON.stringify(meta);
+  const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
+  const trimmedBody = (body ?? "").trim();
+  return (
+    `<!-- docmost:meta\n${metaJson}\n-->\n\n` +
+    `${trimmedBody}\n\n` +
+    `<!-- docmost:comments\n${commentsJson}\n-->\n`
+  );
+}
+
+/**
+ * Split a self-contained file back into its parts. Tolerant: if the meta or
+ * comments block is missing (e.g. a hand-written plain-markdown file), the
+ * corresponding value is returned as `null` and the whole input is treated as
+ * the body. This never throws on a MISSING block; only a `JSON.parse` failure
+ * inside a block that IS present is surfaced as a thrown Error with a clear
+ * message. Robust to `\r\n` line endings.
+ */
+export function parseDocmostMarkdown(full: string): {
+  meta: DocmostMdMeta | null;
+  body: string;
+  comments: any[] | null;
+} {
+  // Normalize line endings so the anchored regexes work regardless of CRLF.
+  const normalized = (full ?? "").replace(/\r\n/g, "\n");
+
+  // Extract the leading meta block (start-anchored — already unambiguous).
+  let meta: DocmostMdMeta | null = null;
+  let metaEnd = 0;
+  const metaMatch = normalized.match(META_RE);
+  if (metaMatch) {
+    try {
+      meta = JSON.parse(metaMatch[1]);
+    } catch (e) {
+      throw new Error(
+        `Invalid docmost:meta JSON block: ${
+          e instanceof Error ? e.message : String(e)
+        }`,
+      );
+    }
+    // Body starts right after the matched meta block.
+    metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
+  }
+
+  // Find the LAST `<!-- docmost:comments` opener; the real file-level block is
+  // the final one whose closing `-->` ends the document. Any earlier literal
+  // occurrence inside the body (e.g. a re-pasted export) is left in the body.
+  let lastOpenStart = -1;
+  let lastOpenEnd = -1;
+  let m: RegExpExecArray | null;
+  COMMENTS_OPEN_RE.lastIndex = 0;
+  while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
+    lastOpenStart = m.index;
+    lastOpenEnd = m.index + m[0].length;
+  }
+
+  let comments: any[] | null = null;
+  let bodyEnd = normalized.length;
+  if (lastOpenStart !== -1) {
+    const rest = normalized.slice(lastOpenEnd);
+    const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
+    if (close) {
+      const jsonText = rest.slice(0, close.index);
+      try {
+        comments = JSON.parse(jsonText);
+      } catch (e) {
+        throw new Error(
+          `Invalid docmost:comments JSON block: ${
+            e instanceof Error ? e.message : String(e)
+          }`,
+        );
+      }
+      bodyEnd = lastOpenStart; // strip from the opener to end of document
+    }
+  }
+
+  const body = normalized.slice(metaEnd, bodyEnd).trim();
+  return { meta, body, comments };
+}
+
+// --- docmost-sync addition (backport target: docmost-mcp/src/lib/markdown-document.ts) ---
+
+/**
+ * Serialize a self-contained markdown file with the meta block + body ONLY —
+ * NO trailing `docmost:comments` block. The docmost-sync engine never touches
+ * `/comments` (SPEC §3): the synced file carries just page identity (meta) and
+ * the body, where comment threads survive only as inline `<span
+ * data-comment-id>` anchor marks inside the body.
+ *
+ * `parseDocmostMarkdown` already tolerates a missing comments block (it returns
+ * `comments: null` and treats the rest as body), so a file produced here
+ * round-trips cleanly through the parser.
+ */
+export function serializeDocmostMarkdownBody(
+  meta: DocmostMdMeta,
+  body: string,
+): string {
+  return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
+}
--- a/packages/docmost-client/src/lib/node-ops.ts
+++ b/packages/docmost-client/src/lib/node-ops.ts
@@ -0,0 +1,897 @@
+/**
+ * Pure, network-free helpers for manipulating a ProseMirror/TipTap document
+ * tree by node id.
+ *
+ * A ProseMirror node here is a plain JSON object of the shape produced by
+ * Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
+ * `content` array; a node carries a stable id in `attrs.id`. Callouts and
+ * table cells hold their children in `content` just like any other block, so a
+ * single recursive walk reaches them all.
+ *
+ * Every exported function operates on a DEEP CLONE of the input document and
+ * returns the new document. The input doc and any `newNode`/`node` argument are
+ * never mutated. All functions are defensively null-safe: missing/!Array
+ * `content`, non-object nodes, and absent `attrs` are tolerated.
+ */
+
+/** Deep-clone a JSON-serializable value without mutating the original. */
+function clone<T>(value: T): T {
+  if (typeof structuredClone === "function") {
+    return structuredClone(value);
+  }
+  // Fallback for environments without structuredClone.
+  return JSON.parse(JSON.stringify(value)) as T;
+}
+
+/** True if `value` is a non-null object (and not an array). */
+function isObject(value: any): value is Record<string, any> {
+  return value != null && typeof value === "object" && !Array.isArray(value);
+}
+
+/** True if `node` carries the given id in `node.attrs.id`. */
+function matchesId(node: any, nodeId: string): boolean {
+  return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
+}
+
+/**
+ * Recursively concatenate all text contained in a node.
+ *
+ * Text nodes contribute their `text` string; container nodes contribute the
+ * joined `blockPlainText` of their `content` children. Returns "" for nullish
+ * or non-object inputs.
+ */
+export function blockPlainText(node: any): string {
+  if (!isObject(node)) return "";
+  let out = "";
+  if (typeof node.text === "string") {
+    out += node.text;
+  }
+  if (Array.isArray(node.content)) {
+    for (const child of node.content) {
+      out += blockPlainText(child);
+    }
+  }
+  return out;
+}
+
+/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
+function truncate(text: string, n: number): string {
+  return text.length > n ? text.slice(0, n) + "…" : text;
+}
+
+/** One compact outline entry for a single top-level block. */
+export interface OutlineEntry {
+  index: number;
+  type: string | undefined;
+  id: string | null;
+  firstText: string;
+  /** Present for headings only. */
+  level?: number | null;
+  /** Present for tables only. */
+  rows?: number;
+  cols?: number;
+  header?: string[];
+  /** Present for list blocks only (bulletList/orderedList/taskList). */
+  items?: number;
+}
+
+/**
+ * Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
+ * `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
+ * table cells — compactness is the point; use `getNodeByRef` to drill into a
+ * specific block.
+ *
+ * Each entry carries `{ index, type, id, firstText }`, plus type-specific
+ * extras: headings add `level`; tables add `rows`/`cols` and the first row's
+ * cell texts as `header`; list blocks (types ending in "List") add `items`.
+ * `firstText` is the block's plain text truncated to 100 chars. Null-safe:
+ * a missing or non-object doc/content yields `[]`.
+ */
+export function buildOutline(doc: any): OutlineEntry[] {
+  if (!isObject(doc) || !Array.isArray(doc.content)) return [];
+
+  const out: OutlineEntry[] = [];
+  for (let i = 0; i < doc.content.length; i++) {
+    const block = doc.content[i];
+    const type = isObject(block) ? block.type : undefined;
+    const entry: OutlineEntry = {
+      index: i,
+      type,
+      id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
+      firstText: truncate(blockPlainText(block), 100),
+    };
+
+    if (type === "heading") {
+      entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
+    } else if (type === "table") {
+      const headerRow = block.content?.[0]?.content ?? [];
+      entry.rows = block.content?.length ?? 0;
+      entry.cols = block.content?.[0]?.content?.length ?? 0;
+      entry.header = headerRow.map((cell: any) =>
+        truncate(blockPlainText(cell), 40),
+      );
+    } else if (typeof type === "string" && type.endsWith("List")) {
+      entry.items = block.content?.length ?? 0;
+    }
+
+    out.push(entry);
+  }
+  return out;
+}
+
+/**
+ * Resolve a single node by reference and return `{ node, path, type }`, or
+ * `null` when nothing matches.
+ *
+ * - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
+ *   `n` in `doc.content`. This is the only way to address table/tableRow/
+ *   tableCell nodes, which carry no `attrs.id`.
+ * - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
+ *   tree with `attrs.id === ref` is returned.
+ *
+ * `path` is the array of child indices from the doc root down to the node
+ * (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
+ * so callers can mutate it without touching the input doc. Null-safe.
+ */
+export function getNodeByRef(
+  doc: any,
+  ref: string,
+): { node: any; path: number[]; type: string | undefined } | null {
+  if (!isObject(doc)) return null;
+
+  // "#<n>": index into the top-level content array.
+  const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
+  if (indexMatch) {
+    const index = Number(indexMatch[1]);
+    const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
+    if (!isObject(block)) return null;
+    return { node: clone(block), path: [index], type: block.type };
+  }
+
+  // Otherwise: depth-first search for the first node with attrs.id === ref.
+  const search = (
+    node: any,
+    trail: number[],
+  ): { node: any; path: number[]; type: string } | null => {
+    if (!isObject(node)) return null;
+    if (Array.isArray(node.content)) {
+      for (let i = 0; i < node.content.length; i++) {
+        const child = node.content[i];
+        const path = [...trail, i];
+        if (matchesId(child, ref)) {
+          return { node: clone(child), path, type: child.type };
+        }
+        const hit = search(child, path);
+        if (hit != null) return hit;
+      }
+    }
+    return null;
+  };
+
+  return search(doc, []);
+}
+
+/**
+ * Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
+ * `newNode`, anywhere in the tree (including inside callouts and table cells).
+ *
+ * Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
+ * is the number of nodes substituted. A fresh clone of `newNode` is used for
+ * each match so they do not share references.
+ */
+export function replaceNodeById(
+  doc: any,
+  nodeId: string,
+  newNode: any,
+): { doc: any; replaced: number } {
+  const out = clone(doc);
+  let replaced = 0;
+
+  // Walk a content array, replacing direct matches and recursing into the
+  // (possibly new) children of non-matching nodes.
+  const walkContent = (content: any[]): void => {
+    for (let i = 0; i < content.length; i++) {
+      const child = content[i];
+      if (matchesId(child, nodeId)) {
+        content[i] = clone(newNode);
+        replaced++;
+        // Do not recurse into a freshly substituted node.
+        continue;
+      }
+      if (isObject(child) && Array.isArray(child.content)) {
+        walkContent(child.content);
+      }
+    }
+  };
+
+  if (isObject(out) && Array.isArray(out.content)) {
+    walkContent(out.content);
+  }
+  return { doc: out, replaced };
+}
+
+/**
+ * Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
+ * array, anywhere in the tree (recursive, including callouts and tables).
+ *
+ * Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
+ * the number of nodes removed.
+ */
+export function deleteNodeById(
+  doc: any,
+  nodeId: string,
+): { doc: any; deleted: number } {
+  const out = clone(doc);
+  let deleted = 0;
+
+  // Filter a content array in place, dropping matches and recursing into the
+  // surviving children.
+  const walkContent = (content: any[]): any[] => {
+    const kept: any[] = [];
+    for (const child of content) {
+      if (matchesId(child, nodeId)) {
+        deleted++;
+        continue;
+      }
+      if (isObject(child) && Array.isArray(child.content)) {
+        child.content = walkContent(child.content);
+      }
+      kept.push(child);
+    }
+    return kept;
+  };
+
+  if (isObject(out) && Array.isArray(out.content)) {
+    out.content = walkContent(out.content);
+  }
+  return { doc: out, deleted };
+}
+
+/**
+ * Deep-clone `doc` and strip every node/mark attribute whose value is strictly
+ * `undefined`, so the result is safe to hand to Yjs (which throws an opaque
+ * "Unexpected content type" when asked to store an `undefined` attribute value).
+ *
+ * Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
+ * legitimate JSON-storable values and are preserved. Operates on a clone and
+ * returns it; the input is never mutated. Defensively null-safe like the rest
+ * of the file.
+ */
+export function sanitizeForYjs(doc: any): any {
+  const out = clone(doc);
+
+  // Drop every key whose value is strictly `undefined` from an attrs object.
+  const stripUndefined = (attrs: any): void => {
+    if (!isObject(attrs)) return;
+    for (const key of Object.keys(attrs)) {
+      if (attrs[key] === undefined) {
+        delete attrs[key];
+      }
+    }
+  };
+
+  const walk = (node: any): void => {
+    if (!isObject(node)) return;
+    stripUndefined(node.attrs);
+    if (Array.isArray(node.marks)) {
+      for (const mark of node.marks) {
+        if (isObject(mark)) stripUndefined(mark.attrs);
+      }
+    }
+    if (Array.isArray(node.content)) {
+      for (const child of node.content) {
+        walk(child);
+      }
+    }
+  };
+
+  walk(out);
+  return out;
+}
+
+/**
+ * Diagnostics helper: walk the tree and return a human-readable path string for
+ * the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
+ * cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
+ * (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
+ * every attribute is storable. Null-safe.
+ */
+export function findUnstorableAttr(doc: any): string | null {
+  const isUnstorable = (value: any): string | null => {
+    if (value === undefined) return "undefined";
+    const t = typeof value;
+    if (t === "function") return "function";
+    if (t === "symbol") return "symbol";
+    if (t === "bigint") return "bigint";
+    return null;
+  };
+
+  // Check an attrs object; return the offending sub-path or null.
+  const checkAttrs = (attrs: any, basePath: string): string | null => {
+    if (!isObject(attrs)) return null;
+    for (const key of Object.keys(attrs)) {
+      const kind = isUnstorable(attrs[key]);
+      if (kind != null) return `${basePath}.${key} (${kind})`;
+    }
+    return null;
+  };
+
+  const walk = (node: any, path: string): string | null => {
+    if (!isObject(node)) return null;
+    const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
+    if (attrHit != null) return attrHit;
+    if (Array.isArray(node.marks)) {
+      for (let i = 0; i < node.marks.length; i++) {
+        const markHit = checkAttrs(
+          node.marks[i]?.attrs,
+          `${path}.marks[${i}].attrs`,
+        );
+        if (markHit != null) return markHit;
+      }
+    }
+    if (Array.isArray(node.content)) {
+      for (let i = 0; i < node.content.length; i++) {
+        const childHit = walk(node.content[i], `${path}.content[${i}]`);
+        if (childHit != null) return childHit;
+      }
+    }
+    return null;
+  };
+
+  // The root doc node carries no useful index, so start the path at "doc".
+  if (!isObject(doc)) return null;
+  const attrHit = checkAttrs(doc.attrs, "attrs");
+  if (attrHit != null) return attrHit;
+  if (Array.isArray(doc.content)) {
+    for (let i = 0; i < doc.content.length; i++) {
+      const childHit = walk(doc.content[i], `content[${i}]`);
+      if (childHit != null) return childHit;
+    }
+  }
+  return null;
+}
+
+/**
+ * Table structural node types and the container each must live directly inside.
+ * Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
+ * rather than blindly into the anchor's direct parent (which would corrupt the
+ * table's nesting).
+ */
+const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
+const REQUIRED_CONTAINER: Record<string, string> = {
+  tableRow: "table",
+  tableCell: "tableRow",
+  tableHeader: "tableRow",
+};
+
+/**
+ * Locate an anchor and return its ancestor chain (from `doc` down to and
+ * including the matched node). Each chain entry is `{ node, index }` where
+ * `index` is the node's position inside its parent's `content` array (the root
+ * doc has index -1). Returns `null` when the anchor cannot be resolved.
+ */
+function findAnchorChain(
+  doc: any,
+  opts: InsertOptions,
+): { node: any; index: number }[] | null {
+  if (!isObject(doc)) return null;
+
+  // DFS by id anywhere in the tree, accumulating the path.
+  if (opts.anchorNodeId != null) {
+    const targetId = opts.anchorNodeId;
+    const search = (
+      node: any,
+      index: number,
+      trail: { node: any; index: number }[],
+    ): { node: any; index: number }[] | null => {
+      if (!isObject(node)) return null;
+      const here = [...trail, { node, index }];
+      if (matchesId(node, targetId)) return here;
+      if (Array.isArray(node.content)) {
+        for (let i = 0; i < node.content.length; i++) {
+          const hit = search(node.content[i], i, here);
+          if (hit != null) return hit;
+        }
+      }
+      return null;
+    };
+    return search(doc, -1, []);
+  }
+
+  // By text: only top-level blocks are scanned (same rule as the JSON path).
+  if (opts.anchorText != null && Array.isArray(doc.content)) {
+    for (let i = 0; i < doc.content.length; i++) {
+      if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
+        return [
+          { node: doc, index: -1 },
+          { node: doc.content[i], index: i },
+        ];
+      }
+    }
+  }
+
+  return null;
+}
+
+/** Options controlling where `insertNodeRelative` places the new node. */
+export interface InsertOptions {
+  position: "before" | "after" | "append";
+  /** Resolve the anchor by node id anywhere in the tree (preferred). */
+  anchorNodeId?: string;
+  /** Fallback: first TOP-LEVEL block whose plain text includes this string. */
+  anchorText?: string;
+}
+
+/**
+ * Insert a deep clone of `node` relative to an anchor.
+ *
+ * - position "append": push the node onto the top-level `doc.content`.
+ * - position "before"/"after": locate the anchor and splice the node into the
+ *   anchor's parent `content` array immediately before / after it.
+ *
+ * Anchor resolution for before/after:
+ *   - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
+ *     anywhere in the tree (recursive);
+ *   - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
+ *     blocks and pick the first whose `blockPlainText` includes `anchorText`.
+ *
+ * Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
+ * false when the anchor could not be resolved (the doc is returned unchanged
+ * apart from being cloned).
+ */
+export function insertNodeRelative(
+  doc: any,
+  node: any,
+  opts: InsertOptions,
+): { doc: any; inserted: boolean } {
+  const out = clone(doc);
+  const fresh = clone(node);
+
+  // Defensive: stay null-safe like the other exports — a missing opts means
+  // there is nothing actionable to do.
+  if (!isObject(opts)) return { doc: out, inserted: false };
+
+  const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
+
+  // "append": top-level push.
+  if (opts.position === "append") {
+    // Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
+    // top level — appending one would produce invalid nesting.
+    if (isStructural) {
+      throw new Error(
+        `insert_node: cannot append a ${node.type} at the top level; use ` +
+          `position before/after with an anchor inside the target table`,
+      );
+    }
+    if (isObject(out)) {
+      if (!Array.isArray(out.content)) out.content = [];
+      out.content.push(fresh);
+      return { doc: out, inserted: true };
+    }
+    return { doc: out, inserted: false };
+  }
+
+  const offset = opts.position === "after" ? 1 : 0;
+
+  // Structural insert (before/after a tableRow/tableCell/tableHeader): splice
+  // into the nearest enclosing table/tableRow rather than the anchor's direct
+  // parent, so the row/cell lands at the correct level of the table.
+  if (isStructural) {
+    const containerType = REQUIRED_CONTAINER[node.type];
+    const chain = findAnchorChain(out, opts);
+    // Anchor not resolved at all — keep the existing "anchor not found" path.
+    if (chain == null) return { doc: out, inserted: false };
+
+    // Find the DEEPEST ancestor (including the anchor itself) of the required
+    // container type.
+    let containerIdx = -1;
+    for (let i = chain.length - 1; i >= 0; i--) {
+      if (isObject(chain[i].node) && chain[i].node.type === containerType) {
+        containerIdx = i;
+        break;
+      }
+    }
+
+    if (containerIdx === -1) {
+      throw new Error(
+        `insert_node: cannot insert a ${node.type} here — the anchor is not ` +
+          `inside a ${containerType}. Anchor on a cell's text or a block id ` +
+          `that lives inside the target table.`,
+      );
+    }
+
+    const container = chain[containerIdx].node;
+    if (!Array.isArray(container.content)) container.content = [];
+
+    if (containerIdx === chain.length - 1) {
+      // The matched container IS the anchor node itself (e.g. anchorText
+      // resolved to the table block): append/prepend within it.
+      const at = opts.position === "after" ? container.content.length : 0;
+      container.content.splice(at, 0, fresh);
+    } else {
+      // The immediate child on the path leading to the anchor is the row/cell
+      // to splice next to.
+      const enclosingChildIndex = chain[containerIdx + 1].index;
+      container.content.splice(enclosingChildIndex + offset, 0, fresh);
+    }
+    return { doc: out, inserted: true };
+  }
+
+  // Resolve by id anywhere in the tree: splice into the parent content array.
+  if (opts.anchorNodeId != null) {
+    let inserted = false;
+    const walkContent = (content: any[]): void => {
+      for (let i = 0; i < content.length; i++) {
+        const child = content[i];
+        if (matchesId(child, opts.anchorNodeId as string)) {
+          content.splice(i + offset, 0, fresh);
+          inserted = true;
+          return;
+        }
+        if (isObject(child) && Array.isArray(child.content)) {
+          walkContent(child.content);
+          if (inserted) return;
+        }
+      }
+    };
+    if (isObject(out) && Array.isArray(out.content)) {
+      walkContent(out.content);
+    }
+    return { doc: out, inserted };
+  }
+
+  // Resolve by text: only top-level doc.content blocks are scanned.
+  if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
+    for (let i = 0; i < out.content.length; i++) {
+      if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
+        out.content.splice(i + offset, 0, fresh);
+        return { doc: out, inserted: true };
+      }
+    }
+  }
+
+  return { doc: out, inserted: false };
+}
+
+// ===========================================================================
+// Table editing helpers
+//
+// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
+//   table   -> { type:"table",     content:[tableRow...] }
+//   row     -> { type:"tableRow",  content:[tableCell|tableHeader...] }
+//   cell    -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
+//                content:[paragraph...] }
+//   para    -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
+// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
+// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
+// of the input doc (via `clone`) and never mutate their inputs.
+// ===========================================================================
+
+/**
+ * Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
+ * `makeFreshId` so generated paragraph ids never collide with existing ones.
+ */
+function collectIds(node: any, used: Set<string>): void {
+  if (!isObject(node)) return;
+  if (isObject(node.attrs) && typeof node.attrs.id === "string") {
+    used.add(node.attrs.id);
+  }
+  if (Array.isArray(node.content)) {
+    for (const child of node.content) collectIds(child, used);
+  }
+}
+
+/**
+ * Fresh-id generator: returns a random Docmost-style id (12 chars from
+ * lowercase `a-z0-9`) that is not already in `used`, and records it. On the
+ * rare collision the id is regenerated. Callers rely on uniqueness, not on the
+ * exact string, so randomness is fine — and unlike a module-local counter it
+ * needs no reset and cannot become predictable across calls.
+ */
+function makeFreshId(used: Set<string>): string {
+  const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
+  let id: string;
+  do {
+    id = "";
+    for (let i = 0; i < 12; i++) {
+      id += alphabet[Math.floor(Math.random() * alphabet.length)];
+    }
+  } while (used.has(id) || id === "");
+  used.add(id);
+  return id;
+}
+
+/**
+ * Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
+ * table node (a reference inside `rootClone`, so the caller may mutate it) plus
+ * its index path. Returns null when no table matches.
+ *
+ * - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
+ * - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
+ *   ancestor chain to the nearest `type === "table"` ancestor.
+ */
+function locateTable(
+  rootClone: any,
+  tableRef: string,
+): { table: any; path: number[] } | null {
+  if (!isObject(rootClone)) return null;
+
+  // "#<n>": index into the top-level content array; must be a table.
+  const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
+  if (indexMatch) {
+    const index = Number(indexMatch[1]);
+    const block = Array.isArray(rootClone.content)
+      ? rootClone.content[index]
+      : undefined;
+    if (isObject(block) && block.type === "table") {
+      return { table: block, path: [index] };
+    }
+    return null;
+  }
+
+  // Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
+  // climb to the nearest enclosing table.
+  const search = (
+    node: any,
+    trail: { node: any; index: number }[],
+  ): { table: any; path: number[] } | null => {
+    if (!isObject(node)) return null;
+    if (Array.isArray(node.content)) {
+      for (let i = 0; i < node.content.length; i++) {
+        const child = node.content[i];
+        const here = [...trail, { node: child, index: i }];
+        if (matchesId(child, tableRef)) {
+          // Walk UP to the nearest table ancestor (including the match itself).
+          for (let j = here.length - 1; j >= 0; j--) {
+            if (isObject(here[j].node) && here[j].node.type === "table") {
+              return {
+                table: here[j].node,
+                path: here.slice(0, j + 1).map((e) => e.index),
+              };
+            }
+          }
+          return null; // id found but no enclosing table
+        }
+        const hit = search(child, here);
+        if (hit != null) return hit;
+      }
+    }
+    return null;
+  };
+
+  return search(rootClone, []);
+}
+
+/** Build the plain-text → single-paragraph cell content used by all writers. */
+function makeCellParagraph(id: string, text: string): any {
+  return {
+    type: "paragraph",
+    attrs: { id, indent: 0 },
+    // Empty string → a paragraph with an empty content array.
+    content: text ? [{ type: "text", text }] : [],
+  };
+}
+
+/**
+ * Read a table as a matrix. Returns null when `tableRef` resolves to no table.
+ *
+ * - `rows`/`cols`: the table's row count and the column count of its FIRST row.
+ *   Tables may be ragged (rows of differing length), so `cols` reflects only
+ *   row 0; use the per-row length of `cells`/`cellIds` for each row's actual
+ *   width.
+ * - `cells`: `string[][]` of each cell's `blockPlainText`.
+ * - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
+ *   so callers can `patch_node` a cell for rich-formatted edits.
+ * - `path`: index path of the table within the doc.
+ */
+export function readTable(
+  doc: any,
+  tableRef: string,
+): {
+  rows: number;
+  cols: number;
+  cells: string[][];
+  cellIds: (string | null)[][];
+  path: number[];
+} | null {
+  const root = clone(doc);
+  const located = locateTable(root, tableRef);
+  if (located == null) return null;
+  const { table, path } = located;
+
+  const rowNodes = Array.isArray(table.content) ? table.content : [];
+  const rows = rowNodes.length;
+  const cols = rowNodes[0]?.content?.length ?? 0;
+
+  const cells: string[][] = [];
+  const cellIds: (string | null)[][] = [];
+  for (const rowNode of rowNodes) {
+    const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
+    const rowText: string[] = [];
+    const rowIds: (string | null)[] = [];
+    for (const cellNode of cellNodes) {
+      rowText.push(blockPlainText(cellNode));
+      // The cell's first paragraph carries the id used for patch_node.
+      const firstPara = Array.isArray(cellNode?.content)
+        ? cellNode.content[0]
+        : undefined;
+      const id =
+        isObject(firstPara) && isObject(firstPara.attrs)
+          ? firstPara.attrs.id ?? null
+          : null;
+      rowIds.push(id);
+    }
+    cells.push(rowText);
+    cellIds.push(rowIds);
+  }
+
+  return { rows, cols, cells, cellIds, path };
+}
+
+/**
+ * Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
+ *
+ * The row is padded to the table's column count (`cells[i] ?? ""`); supplying
+ * MORE cells than columns throws. Each new cell copies `colwidth` for its
+ * column from the header row when present, gets a fresh-id paragraph, and a
+ * `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
+ * the row there; otherwise the row is appended at the end.
+ */
+export function insertTableRow(
+  doc: any,
+  tableRef: string,
+  cells: string[],
+  index?: number,
+): { doc: any; inserted: boolean } {
+  const out = clone(doc);
+  const located = locateTable(out, tableRef);
+  if (located == null) return { doc: out, inserted: false };
+  const { table } = located;
+
+  if (!Array.isArray(table.content)) table.content = [];
+  const rows = table.content.length;
+  const headerRow = table.content[0];
+  const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
+
+  // Column count is the WIDEST existing row, so the guard below stays
+  // meaningful for ragged tables and the new row matches the table's width.
+  // Fall back to the supplied cell count only when the table has no rows.
+  let colCount = 0;
+  for (const r of table.content) {
+    if (isObject(r) && Array.isArray(r.content)) colCount = Math.max(colCount, r.content.length);
+  }
+  if (colCount === 0) colCount = Array.isArray(cells) ? cells.length : 0;
+
+  if (Array.isArray(cells) && cells.length > colCount) {
+    throw new Error(
+      `table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`,
+    );
+  }
+
+  // Resolve the landing index up front so the cell-type decision and the splice
+  // below agree: a valid integer in [0, rows] splices there, else we append.
+  const landingIndex =
+    typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
+      ? index
+      : rows;
+
+  // Seed the id generator with every id already in the doc so the new cell
+  // paragraph ids are unique within the whole document.
+  const used = new Set<string>();
+  collectIds(out, used);
+
+  const newCells: any[] = [];
+  for (let i = 0; i < colCount; i++) {
+    const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
+    const attrs: Record<string, any> = { colspan: 1, rowspan: 1 };
+    // Copy this column's colwidth from the header row's cell when present.
+    const colwidth = headerCells[i]?.attrs?.colwidth;
+    if (colwidth !== undefined) attrs.colwidth = colwidth;
+    // A row landing at index 0 becomes the new header row, so inherit the
+    // current header cell's type per column (Docmost uses "tableHeader" there);
+    // every other position is a plain data cell.
+    const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
+    newCells.push({
+      type: cellType,
+      attrs,
+      content: [makeCellParagraph(makeFreshId(used), text)],
+    });
+  }
+
+  const newRow = { type: "tableRow", content: newCells };
+
+  // Splice at the resolved landing index (append when index was omitted/invalid).
+  table.content.splice(landingIndex, 0, newRow);
+
+  return { doc: out, inserted: true };
+}
+
+/**
+ * Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
+ * `deleted` is false only when the table cannot be located. Throws on an
+ * out-of-range index, and refuses to delete the table's only row.
+ */
+export function deleteTableRow(
+  doc: any,
+  tableRef: string,
+  index: number,
+): { doc: any; deleted: boolean } {
+  const out = clone(doc);
+  const located = locateTable(out, tableRef);
+  if (located == null) return { doc: out, deleted: false };
+  const { table } = located;
+
+  if (!Array.isArray(table.content)) table.content = [];
+  const rows = table.content.length;
+
+  if (!Number.isInteger(index) || index < 0 || index >= rows) {
+    throw new Error(
+      `table_delete_row: row index ${index} out of range (table has ${rows} row(s))`,
+    );
+  }
+  if (rows <= 1) {
+    throw new Error(
+      "table_delete_row: refusing to delete the only row of the table",
+    );
+  }
+
+  table.content.splice(index, 1);
+  return { doc: out, deleted: true };
+}
+
+/**
+ * Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
+ * `{ doc, updated }`; `updated` is false only when the table cannot be located.
+ * Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
+ * rowspan/colwidth) are preserved; its content becomes a single text paragraph
+ * that reuses the cell's existing first-paragraph id when present, else a fresh
+ * one.
+ */
+export function updateTableCell(
+  doc: any,
+  tableRef: string,
+  row: number,
+  col: number,
+  text: string,
+): { doc: any; updated: boolean } {
+  const out = clone(doc);
+  const located = locateTable(out, tableRef);
+  if (located == null) return { doc: out, updated: false };
+  const { table } = located;
+
+  const rowNodes = Array.isArray(table.content) ? table.content : [];
+  const rows = rowNodes.length;
+  const rowNode = rowNodes[row];
+  const cols = isObject(rowNode) && Array.isArray(rowNode.content)
+    ? rowNode.content.length
+    : 0;
+
+  if (
+    !Number.isInteger(row) ||
+    row < 0 ||
+    row >= rows ||
+    !Number.isInteger(col) ||
+    col < 0 ||
+    col >= cols
+  ) {
+    throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
+  }
+
+  const cellNode = rowNode.content[col];
+  // Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
+  const existingPara = Array.isArray(cellNode?.content)
+    ? cellNode.content[0]
+    : undefined;
+  let id =
+    isObject(existingPara) && isObject(existingPara.attrs)
+      ? existingPara.attrs.id
+      : undefined;
+  if (typeof id !== "string" || id.length === 0) {
+    const used = new Set<string>();
+    collectIds(out, used);
+    id = makeFreshId(used);
+  }
+
+  cellNode.content = [makeCellParagraph(id, text)];
+  return { doc: out, updated: true };
+}
--- a/packages/docmost-client/src/lib/page-lock.ts
+++ b/packages/docmost-client/src/lib/page-lock.ts
@@ -0,0 +1,39 @@
+/**
+ * Per-page async mutex.
+ *
+ * Content writes over the collaboration websocket must never overlap for the
+ * same page: two concurrent full-document replaces would race on the live Yjs
+ * fragment. We serialize them with a per-pageId promise chain — each new
+ * operation waits for the previous one on that page to settle (success or
+ * failure) before it runs. Different pages never block each other.
+ */
+
+const chains = new Map<string, Promise<unknown>>();
+
+// The returned promise carries the real result/rejection of `fn` and MUST be
+// awaited/handled by the caller; only the internal chaining tail swallows
+// errors (purely to gate ordering).
+export function withPageLock<T>(
+  pageId: string,
+  fn: () => Promise<T>,
+): Promise<T> {
+  // Wait for the previous op on this page; swallow its error so a failure does
+  // not poison the queue for the next caller.
+  const prev = (chains.get(pageId) ?? Promise.resolve()).catch(() => {});
+  const run = prev.then(fn);
+
+  // The tail used for chaining must also swallow errors (it only gates order).
+  const tail = run.catch(() => {});
+  chains.set(pageId, tail);
+
+  // Drop the map entry once this op is the tail and has settled, to avoid an
+  // unbounded map of resolved promises.
+  tail.then(() => {
+    if (chains.get(pageId) === tail) {
+      chains.delete(pageId);
+    }
+  });
+
+  // Callers get the real result/rejection of fn.
+  return run;
+}
--- a/packages/docmost-client/src/lib/transforms.ts
+++ b/packages/docmost-client/src/lib/transforms.ts
@@ -0,0 +1,477 @@
+/**
+ * Pure, network-free transform primitives for a ProseMirror/TipTap document
+ * tree, plus one higher-level orchestration (commentsToFootnotes).
+ *
+ * A ProseMirror node here is a plain JSON object of the shape produced by
+ * Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
+ * `content` array; callouts, tables, lists all hold their children in
+ * `content`, so a single recursive walk reaches them all.
+ *
+ * Conventions (matching node-ops.ts):
+ *  - functions that produce a new document deep-clone their input and return a
+ *    `{ doc, ... }` object; the caller's objects are never mutated.
+ *  - functions are defensively null-safe.
+ *  - `marks` arrays are preserved verbatim when fragments are split/reordered.
+ */
+
+import { blockPlainText } from "./node-ops.js";
+
+/** Deep-clone a JSON-serializable value without mutating the original. */
+function clone<T>(value: T): T {
+  if (typeof structuredClone === "function") {
+    return structuredClone(value);
+  }
+  // Fallback for environments without structuredClone.
+  return JSON.parse(JSON.stringify(value)) as T;
+}
+
+/** True if `value` is a non-null object (and not an array). */
+function isObject(value: any): value is Record<string, any> {
+  return value != null && typeof value === "object" && !Array.isArray(value);
+}
+
+/**
+ * Plain text of a node (re-export of node-ops' blockPlainText so transform
+ * authors have a single import surface). Recurses through nested content.
+ */
+export function blockText(node: any): string {
+  return blockPlainText(node);
+}
+
+/**
+ * Depth-first visit of every node in the tree, including the root and the
+ * nested content of callouts, tables, lists, etc. `fn` is called once per node.
+ * Null-safe: a nullish or non-object node is ignored.
+ */
+export function walk(node: any, fn: (node: any) => void): void {
+  if (!isObject(node)) return;
+  fn(node);
+  if (Array.isArray(node.content)) {
+    for (const child of node.content) {
+      walk(child, fn);
+    }
+  }
+}
+
+/**
+ * Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
+ * Works even when the node carries no `attrs.id` (it searches the raw tree, not
+ * an id index). Returns the live node reference inside `doc` (NOT a clone), or
+ * null when nothing matches. Typical use: `getList(doc, n => n.type ===
+ * "orderedList")`.
+ */
+export function getList(
+  doc: any,
+  predicate: (node: any) => boolean,
+): any | null {
+  let found: any | null = null;
+  walk(doc, (node) => {
+    if (found == null && predicate(node)) {
+      found = node;
+    }
+  });
+  return found;
+}
+
+/** Options for insertMarkerAfter. */
+export interface InsertMarkerOptions {
+  /**
+   * Limit the search to TOP-LEVEL blocks with index < beforeBlock. Used to keep
+   * footnote markers in the body and out of the notes section.
+   */
+  beforeBlock?: number;
+}
+
+/**
+ * Insert `marker` as a PLAIN (unmarked) text run right after the first
+ * occurrence of `anchor`.
+ *
+ * The text run that contains the END of the anchor is SPLIT at the anchor end,
+ * so all existing marks (links, bold, ...) on the surrounding text are
+ * preserved, while the inserted marker run carries NO marks. The marker is
+ * inserted as a leading-space-padded run (`" " + marker`) so it visually
+ * separates from the preceding word.
+ *
+ * The anchor is matched against the concatenated plain text of each top-level
+ * block (so an anchor that spans several text/mark runs still matches). The
+ * insertion happens inside the inline content array that holds the anchor's
+ * final character.
+ *
+ * Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
+ * false when the anchor text was not found in any in-scope block.
+ */
+export function insertMarkerAfter(
+  doc: any,
+  anchor: string,
+  marker: string,
+  opts: InsertMarkerOptions = {},
+): { doc: any; inserted: boolean } {
+  const out = clone(doc);
+  if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
+    return { doc: out, inserted: false };
+  }
+
+  const limit =
+    typeof opts.beforeBlock === "number"
+      ? Math.min(opts.beforeBlock, out.content.length)
+      : out.content.length;
+
+  for (let b = 0; b < limit; b++) {
+    const block = out.content[b];
+    if (!isObject(block)) continue;
+    // Quick reject: skip blocks whose plain text cannot contain the anchor.
+    if (!blockPlainText(block).includes(anchor)) continue;
+
+    // Walk the inline content arrays inside this block, tracking a running
+    // character offset so we can locate the inline array + text run that holds
+    // the END of the anchor's first occurrence.
+    let inserted = false;
+    let offset = 0; // characters of plain text seen so far in this block
+    const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
+
+    // Recurse into inline-bearing containers (paragraph, heading, table cell,
+    // callout child paragraphs, ...). We only split inside an array of inline
+    // nodes (text/inline atoms); the FIRST array whose cumulative range covers
+    // anchorEnd receives the split + marker.
+    const visit = (container: any): void => {
+      if (inserted || !isObject(container) || !Array.isArray(container.content)) {
+        return;
+      }
+      const inline = container.content;
+      // Detect whether this array is an inline array (contains text nodes).
+      const hasText = inline.some(
+        (n: any) => isObject(n) && n.type === "text",
+      );
+      if (hasText) {
+        for (let i = 0; i < inline.length; i++) {
+          const n = inline[i];
+          const len = isObject(n) ? blockPlainText(n).length : 0;
+          const runStart = offset;
+          const runEnd = offset + len;
+          // The run that contains the anchor end (anchorEnd lands inside this
+          // run, i.e. runStart < anchorEnd <= runEnd) is the split point.
+          if (
+            !inserted &&
+            isObject(n) &&
+            n.type === "text" &&
+            typeof n.text === "string" &&
+            anchorEnd > runStart &&
+            anchorEnd <= runEnd
+          ) {
+            const cut = anchorEnd - runStart; // split index within this text run
+            const before = n.text.slice(0, cut);
+            const after = n.text.slice(cut);
+            const marks = Array.isArray(n.marks) ? n.marks : [];
+            const parts: any[] = [];
+            if (before.length > 0) {
+              parts.push({ ...n, text: before, marks: [...marks] });
+            }
+            // Marker is a PLAIN run: no marks copied. Leading space separates it.
+            parts.push({ type: "text", text: " " + marker });
+            if (after.length > 0) {
+              parts.push({ ...n, text: after, marks: [...marks] });
+            }
+            inline.splice(i, 1, ...parts);
+            inserted = true;
+            return;
+          }
+          offset = runEnd;
+        }
+      } else {
+        // Not an inline array: recurse into children (e.g. callout -> paragraph).
+        for (const child of inline) {
+          visit(child);
+          if (inserted) return;
+        }
+      }
+    };
+
+    visit(block);
+    if (inserted) {
+      return { doc: out, inserted: true };
+    }
+    // If the block matched in plain text but we could not split (e.g. anchor
+    // lands inside an atom), fall through to the next block rather than failing.
+  }
+
+  return { doc: out, inserted: false };
+}
+
+/**
+ * In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
+ *
+ * Docmost translations use a callout that states the footnote range, e.g.
+ * "[1]…[5]". When the number of notes changes, this rewrites the trailing
+ * number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
+ * callout's text nodes to `[1]…[n]`. Operates on a clone; returns
+ * `{ doc, changed }` where `changed` is the number of text nodes rewritten.
+ */
+export function setCalloutRange(
+  doc: any,
+  n: number,
+): { doc: any; changed: number } {
+  const out = clone(doc);
+  let changed = 0;
+  // Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
+  const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
+  walk(out, (node) => {
+    if (node.type === "callout") {
+      walk(node, (inner) => {
+        if (
+          inner.type === "text" &&
+          typeof inner.text === "string" &&
+          rangeRe.test(inner.text)
+        ) {
+          rangeRe.lastIndex = 0;
+          inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
+          changed++;
+        }
+        rangeRe.lastIndex = 0;
+      });
+    }
+  });
+  return { doc: out, changed };
+}
+
+/**
+ * Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
+ * a base36 random string is sufficient here (uniqueness within one document).
+ */
+function freshId(): string {
+  return (
+    Math.random().toString(36).slice(2, 12) +
+    Math.random().toString(36).slice(2, 6)
+  );
+}
+
+/**
+ * Wrap inline ProseMirror nodes in a list item:
+ *   { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
+ * with a fresh random block id on the paragraph. The inline nodes are cloned so
+ * the result shares no references with the caller's input.
+ */
+export function noteItem(inlineNodes: any[]): any {
+  const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
+  return {
+    type: "listItem",
+    content: [
+      {
+        type: "paragraph",
+        attrs: { id: freshId() },
+        content,
+      },
+    ],
+  };
+}
+
+/**
+ * Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
+ * ProseMirror nodes.
+ *
+ * A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
+ * stripped first. Then a minimal bold-split is applied: a leading
+ * `**bold lead**` run becomes a text node with a bold mark, and the remainder
+ * becomes a plain text node. This keeps the conversion synchronous (the
+ * transform sandbox runs synchronously) and dependency-free; the existing
+ * async markdownToProseMirror is intentionally NOT used here.
+ */
+export function mdToInlineNodes(markdown: string): any[] {
+  let md = typeof markdown === "string" ? markdown : "";
+  // Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
+  md = md.replace(/^\s*комментарий\s*:\s*/i, "");
+  md = md.replace(/^\s*\d+\.\s+/, "");
+  md = md.trim();
+
+  if (md === "") return [];
+
+  const nodes: any[] = [];
+  // Leading bold lead: **...** at the very start.
+  const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
+  if (leadMatch) {
+    const leadText = leadMatch[1];
+    nodes.push({
+      type: "text",
+      text: leadText,
+      marks: [{ type: "bold" }],
+    });
+    const rest = md.slice(leadMatch[0].length);
+    if (rest.length > 0) {
+      // Preserve the separating space that followed the bold lead.
+      const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
+      const spacing = sep ? sep[1] : "";
+      nodes.push({ type: "text", text: spacing + rest });
+    }
+    return nodes;
+  }
+
+  // No bold lead: emit the whole thing as a single plain text node, with any
+  // remaining **bold** spans split out inline.
+  return splitInlineBold(md);
+}
+
+/**
+ * Split a string with inline `**bold**` spans into text nodes, bolding the
+ * spans. Used as the no-lead fallback in mdToInlineNodes.
+ */
+function splitInlineBold(text: string): any[] {
+  const nodes: any[] = [];
+  const re = /\*\*([^*]+)\*\*/g;
+  let last = 0;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(text)) !== null) {
+    if (m.index > last) {
+      nodes.push({ type: "text", text: text.slice(last, m.index) });
+    }
+    nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
+    last = m.index + m[0].length;
+  }
+  if (last < text.length) {
+    nodes.push({ type: "text", text: text.slice(last) });
+  }
+  return nodes.length > 0 ? nodes : [{ type: "text", text }];
+}
+
+/** Options for commentsToFootnotes. */
+export interface CommentsToFootnotesOptions {
+  /** Heading text under which the notes orderedList lives. */
+  notesHeading?: string;
+}
+
+/** A comment shape as returned by DocmostClient.listComments. */
+export interface FootnoteComment {
+  id: string;
+  content: string;
+  selection?: string | null;
+  [k: string]: any;
+}
+
+/**
+ * Turn inline comments into numbered footnotes.
+ *
+ * For each inline comment that carries a `selection`:
+ *   1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
+ *      sentinel) right after the selection text in the BODY (before the
+ *      notes heading);
+ *   2. build a note list item from the comment's markdown content.
+ *
+ * Then RENUMBER every footnote marker in the body by reading order: existing
+ * `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
+ * sequential `[seq]`, and the notes orderedList is reordered so each note lines
+ * up with its marker's reading-order position. Finally the disclaimer callout
+ * range is synced to the new note count.
+ *
+ * Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
+ * were successfully anchored (their selection was found and a placeholder
+ * inserted). Operates on a clone of `doc`.
+ */
+export function commentsToFootnotes(
+  doc: any,
+  comments: FootnoteComment[],
+  opts: CommentsToFootnotesOptions = {},
+): { doc: any; consumed: string[] } {
+  let working = clone(doc);
+  const notesHeading = opts.notesHeading ?? "Примечания переводчика";
+
+  const top: any[] = Array.isArray(working.content) ? working.content : [];
+  const notesIdx = top.findIndex(
+    (n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading,
+  );
+  if (notesIdx < 0) {
+    throw new Error(`heading "${notesHeading}" not found`);
+  }
+  // The notes orderedList lives at or after the heading.
+  const notesList = top
+    .slice(notesIdx)
+    .find((n) => isObject(n) && n.type === "orderedList");
+  if (!notesList) {
+    throw new Error("notes orderedList not found");
+  }
+
+  const consumed: string[] = [];
+  const noteByPh = new Map<string, any>();
+
+  (Array.isArray(comments) ? comments : []).forEach((c, i) => {
+    if (!c || !c.selection) return;
+    // Collision-proof sentinel delimited by NUL control chars, which never occur
+    // in real Docmost prose — so the renumber regex below cannot mistake any body
+    // text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
+    // transient: the placeholder round-trips within this function (insertMarkerAfter
+    // inserts it, the renumber pass replaces it with "[N]"), so it never persists
+    // in a returned/pushed document.
+    const ph = `\u0000FN${i}\u0000`;
+    // insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh
+    // the `top` / `notesList` references that point into it.
+    const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
+      beforeBlock: notesIdx,
+    });
+    if (!r.inserted) return;
+    working = r.doc;
+    noteByPh.set(ph, noteItem(mdToInlineNodes(c.content)));
+    consumed.push(c.id);
+  });
+
+  // Re-resolve references into the (possibly re-cloned) working doc.
+  const top2: any[] = Array.isArray(working.content) ? working.content : [];
+  const notesList2 = top2
+    .slice(notesIdx)
+    .find((n) => isObject(n) && n.type === "orderedList");
+  if (!notesList2) {
+    throw new Error("notes orderedList not found");
+  }
+
+  const oldNotes: any[] = Array.isArray(notesList2.content)
+    ? notesList2.content
+    : [];
+  const newNotes: any[] = [];
+  let seq = 0;
+  // Match either an existing "[N]" marker or a NUL-delimited "\u0000FN<i>\u0000"
+  // placeholder, in reading order across the body (blocks before the notes heading).
+  const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
+  // Same range regex setCalloutRange uses to detect the disclaimer callout's
+  // "[1]…[K]" range; used here to decide whether a top-level callout is the
+  // disclaimer (skip) or an ordinary callout (renumber normally).
+  const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
+  for (let i = 0; i < notesIdx; i++) {
+    // Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote
+    // marker and is synced separately by setCalloutRange. Renumbering it here
+    // would consume note slots and corrupt the sequence. Other top-level
+    // callouts may carry legitimate "[N]" body markers and are renumbered.
+    if (
+      isObject(top2[i]) &&
+      top2[i].type === "callout" &&
+      disclaimerRangeRe.test(blockText(top2[i]))
+    ) {
+      continue;
+    }
+    walk(top2[i], (node) => {
+      if (node.type !== "text" || typeof node.text !== "string") return;
+      node.text = node.text.replace(re, (_m: string, oldNum: string, phIdx: string) => {
+        if (oldNum != null) {
+          const note = oldNotes[Number(oldNum) - 1];
+          // Every existing body marker MUST map to a real note. An out-of-range
+          // marker means the document is internally inconsistent; fail loudly
+          // rather than silently dropping the note and desyncing the callout.
+          if (note === undefined) {
+            throw new Error(
+              `footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`,
+            );
+          }
+          newNotes.push(note);
+        } else {
+          newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`));
+        }
+        return `[${++seq}]`;
+      });
+    });
+  }
+
+  // Reorder the notes list IN PLACE on `working` first, THEN sync the callout
+  // range. setCalloutRange clones `working`, so the reordered notes (mutated
+  // before the clone) are carried into its result automatically. No null-filter
+  // here: marker count and note count must stay exactly equal (the out-of-range
+  // guard above guarantees no undefined entry is ever pushed).
+  notesList2.content = newNotes;
+  const synced = setCalloutRange(working, notesList2.content.length);
+
+  return { doc: synced.doc, consumed };
+}
--- a/packages/docmost-client/tsconfig.json
+++ b/packages/docmost-client/tsconfig.json
@@ -0,0 +1,8 @@
+{
+  "extends": "../../tsconfig.base.json",
+  "compilerOptions": {
+    "outDir": "dist",
+    "rootDir": "src"
+  },
+  "include": ["src/**/*"]
+}