diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts index e2cca688..5e1d6cba 100644 --- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts +++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts @@ -632,8 +632,15 @@ export class AiChatToolsService { editPageText: tool({ description: 'Surgical find/replace inside a page\'s text, preserving all block ' + - 'ids and marks. Each find must match exactly once unless replaceAll ' + - 'is set. Reversible: the previous version is kept in page history.', + 'ids and marks. A find MAY cross bold/italic/link boundaries; the ' + + 'replacement inherits marks from the unchanged common prefix/suffix ' + + '(so editing plain text next to a bold word keeps it bold, and ' + + 'editing inside a bold word keeps the new text bold). Each find must ' + + 'match exactly once unless replaceAll is set. The batch applies what ' + + 'it can and returns applied[] + failed[]; a fully-unmatched batch ' + + 'writes nothing and errors. Examples: edits:[{find:"teh",replace:"the"}]; ' + + 'edits:[{find:"Hello world",replace:"Hello there"}] (crosses a bold ' + + 'boundary). Reversible: the previous version is kept in page history.', inputSchema: z.object({ pageId: z.string().describe('The id of the page to edit.'), edits: z @@ -657,8 +664,13 @@ export class AiChatToolsService { patchNode: tool({ description: 'Replace a single content block (by id) with a new ProseMirror ' + - 'node; the replacement keeps the same nodeId. Reversible: the ' + - 'previous version is kept in page history.', + 'node; the replacement keeps the same nodeId. Example node: a ' + + 'paragraph {"type":"paragraph","content":[{"type":"text","text":"Hello"}]} ' + + 'or a heading {"type":"heading","attrs":{"level":2},"content":' + + '[{"type":"text","text":"Title"}]}. Bold is a mark: ' + + '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node arg ' + + 'may be a JSON object or a JSON string (both accepted). Reversible: ' + + 'the previous version is kept in page history.', inputSchema: z.object({ pageId: z.string().describe('The id of the page.'), nodeId: z @@ -666,20 +678,48 @@ export class AiChatToolsService { .describe('The block id to replace (from getOutline/getPageJson).'), node: z .any() - .describe('The replacement ProseMirror node object.'), + .describe( + 'The replacement ProseMirror node, e.g. ' + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' + + 'JSON object or JSON string both accepted.', + ), }), - execute: async ({ pageId, nodeId, node }) => - await client.patchNode(pageId, nodeId, node), + execute: async ({ pageId, nodeId, node }) => { + // Parity with the standalone MCP server (index.ts patch_node): the + // model sometimes serializes the node as a JSON string. Parse it + // before the client's typeof-object guard rejects it. + let parsedNode = node; + if (typeof node === 'string') { + try { + parsedNode = JSON.parse(node); + } catch { + throw new Error('node was a string but not valid JSON'); + } + } + return await client.patchNode(pageId, nodeId, parsedNode); + }, }), insertNode: tool({ description: 'Insert a ProseMirror node relative to an anchor, or append it at ' + 'the top level. For before/after you MUST provide EXACTLY ONE of ' + - 'anchorNodeId or anchorText. Reversible via page history.', + 'anchorNodeId or anchorText. Example node: a paragraph ' + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + + 'heading {"type":"heading","attrs":{"level":2},"content":' + + '[{"type":"text","text":"Title"}]}. Bold is a mark: ' + + '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node arg ' + + 'may be a JSON object or a JSON string (both accepted). Reversible ' + + 'via page history.', inputSchema: z.object({ pageId: z.string().describe('The id of the page.'), - node: z.any().describe('The ProseMirror node object to insert.'), + node: z + .any() + .describe( + 'The ProseMirror node to insert, e.g. ' + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' + + 'JSON object or JSON string both accepted.', + ), position: z .enum(['before', 'after', 'append']) .describe('Where to insert relative to the anchor.'), @@ -692,12 +732,30 @@ export class AiChatToolsService { .optional() .describe('Anchor text fragment (for before/after).'), }), - execute: async ({ pageId, node, position, anchorNodeId, anchorText }) => - await client.insertNode(pageId, node, { + execute: async ({ + pageId, + node, + position, + anchorNodeId, + anchorText, + }) => { + // Parity with the standalone MCP server (index.ts insert_node): the + // model sometimes serializes the node as a JSON string. Parse it + // before the client's typeof-object guard rejects it. + let parsedNode = node; + if (typeof node === 'string') { + try { + parsedNode = JSON.parse(node); + } catch { + throw new Error('node was a string but not valid JSON'); + } + } + return await client.insertNode(pageId, parsedNode, { position, anchorNodeId, anchorText, - }), + }); + }, }), deleteNode: tool({ @@ -714,23 +772,43 @@ export class AiChatToolsService { updatePageJson: tool({ description: - "Replace a page's body with a full ProseMirror document " + - "({type:'doc',content:[...]}) — a full overwrite — and/or update " + - 'its title. Omit content for a title-only update. Reversible: the ' + - 'previous version is kept in page history.', + "Replace a page's body with a full ProseMirror document — a full " + + 'overwrite — and/or update its title. Minimal example content: ' + + '{"type":"doc","content":[{"type":"paragraph","content":' + + '[{"type":"text","text":"Hi"}]}]}. The content arg may be a JSON ' + + 'object or a JSON string (both accepted). Omit content for a ' + + 'title-only update. Reversible: the previous version is kept in page ' + + 'history.', inputSchema: z.object({ pageId: z.string().describe('The id of the page to update.'), content: z .any() .optional() .describe( - "Full ProseMirror doc {type:'doc',content:[...]}; omit for a " + - 'title-only update.', + 'Full ProseMirror doc {"type":"doc","content":[...]} (JSON ' + + 'object or JSON string); omit for a title-only update.', ), title: z.string().optional().describe('Optional new title.'), }), - execute: async ({ pageId, content, title }) => - await client.updatePageJson(pageId, content, title), + execute: async ({ pageId, content, title }) => { + // Parity with the standalone MCP server (index.ts update_page_json): + // undefined/null pass through as undefined (title-only / no-op); any + // string is JSON.parsed (so an empty string "" throws, matching the + // MCP server); an object is passed through unchanged. + let doc; + if (content === undefined || content === null) { + doc = undefined; + } else if (typeof content === 'string') { + try { + doc = JSON.parse(content); + } catch { + throw new Error('content was a string but not valid JSON'); + } + } else { + doc = content; + } + return await client.updatePageJson(pageId, doc, title); + }, }), tableInsertRow: tool({ diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index fca067eb..ed4ab202 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -13,7 +13,7 @@ import { docmostExtensions } from "./lib/docmost-schema.js"; import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js"; import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; import { withPageLock } from "./lib/page-lock.js"; -import { applyTextEdits } from "./lib/json-edit.js"; +import { applyTextEdits, } from "./lib/json-edit.js"; import { getCollabToken, performLogin } from "./lib/auth-utils.js"; import { diffDocs } from "./lib/diff.js"; import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js"; @@ -1111,18 +1111,59 @@ export class DocmostClient { const collabToken = await this.getCollabTokenWithReauth(); // Apply the edits against the LIVE synced document, not the debounced REST // snapshot, so concurrent human edits/comments are preserved. applyTextEdits - // throws descriptive errors on zero/multiple matches — let them propagate. + // records per-edit match problems in `failed` instead of throwing, and + // applies whatever it can; we abort the write only when nothing applied. let results; + let failed; + // Whether we actually wrote new content. Set inside the transform: a + // degenerate edit (e.g. find === replace, or a batch that nets to no change) + // can "apply" yet leave the document byte-for-byte identical, in which case + // we must NOT write (no spurious history version) and must not claim a write + // happened. + let wrote = false; await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + wrote = false; const r = applyTextEdits(liveDoc, edits); results = r.results; + failed = r.failed; + // Nothing applied -> abort the write (mutatePageContent treats a null + // return from the transform as "write nothing"). + if (r.results.length === 0) + return null; + // Edits "applied" but produced an identical document: skip the write so no + // new history version is created. Stable structural comparison via + // JSON.stringify (both docs come from the same deep-copied source, so key + // order is stable). + if (JSON.stringify(r.doc) === JSON.stringify(liveDoc)) + return null; + wrote = true; return r.doc; }); + if ((results?.length ?? 0) === 0 && (failed?.length ?? 0) > 0) { + // No edit applied: surface an aggregated, actionable error so the caller + // does not mistake a no-op for a partial success. + throw new Error("edit_page_text: no edits were applied (nothing written). " + + failed.map((f) => `"${f.find}": ${f.reason}`).join("; ")); + } + // Edits matched but produced no content change (identical document): report + // a successful no-op — NOT a failure — and do not falsely claim a write. + if (!wrote) { + return { + success: true, + pageId, + applied: results, + failed, + message: "No changes written (edits produced identical content).", + }; + } return { success: true, pageId, - edits: results, - message: "Text edits applied (node ids and formatting preserved).", + applied: results, + failed, + message: (failed?.length ?? 0) + ? `Applied ${results?.length ?? 0} edit(s); ${failed.length} failed (see failed[]). Node ids and formatting preserved.` + : "Text edits applied (node ids and formatting preserved).", }; } /** diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index 3b9c09d4..e8cdd1ee 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -217,15 +217,19 @@ export function createDocmostMcpServer(config) { "(lossless write: preserves the block ids, callouts, tables and " + "attributes you pass in). Typical flow: get_page_json -> modify the " + "JSON -> update_page_json. Keep existing node ids intact so heading " + - "anchors and history stay stable. `content` is OPTIONAL: omit it to " + - "update only the title (though prefer rename_page for a title-only " + - "change). Supplying neither content nor title is an error.", + "anchors and history stay stable. Minimal full-doc example: " + + '{"type":"doc","content":[{"type":"paragraph","content":' + + '[{"type":"text","text":"Hi"}]}]}. `content` may be a JSON object or a ' + + "JSON string (both accepted), and is OPTIONAL: omit it to update only " + + "the title (though prefer rename_page for a title-only change). " + + "Supplying neither content nor title is an error.", inputSchema: { pageId: z.string().min(1).describe("ID of the page to update"), content: z .any() .optional() - .describe('ProseMirror document: {"type":"doc","content":[...]}. Omit to rename only.'), + .describe('ProseMirror document {"type":"doc","content":[...]} (JSON object or ' + + "JSON string). Omit to rename only."), title: z.string().optional().describe("Optional new title"), }, }, async ({ pageId, content, title }) => { @@ -314,11 +318,16 @@ export function createDocmostMcpServer(config) { // Tool: edit_page_text server.registerTool("edit_page_text", { description: "Surgical find/replace inside a page's text. Preserves ALL structure: " + - "block ids, marks, links, callouts, tables. Each `find` must match " + - "exactly once (or set replaceAll). A match must lie inside one " + - "formatting run; if the target text crosses bold/link boundaries the " + - "tool reports it — use a shorter fragment or update_page_json then. " + - "This is the preferred tool for fixing wording, typos, numbers, names.", + "block ids, marks, links, callouts, tables. A `find` MAY cross " + + "bold/italic/link boundaries; the replacement inherits marks from the " + + "unchanged common prefix/suffix (editing plain text next to a bold word " + + "keeps it bold; editing inside a bold word keeps the new text bold). " + + "Each `find` must match exactly once (or set replaceAll). The batch " + + "applies what it can and returns applied[] + failed[]; a fully-unmatched " + + "batch writes nothing and errors. Examples: edits:[{find:\"teh\"," + + "replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " + + "(crosses a bold boundary). This is the preferred tool for fixing " + + "wording, typos, numbers, names.", inputSchema: { pageId: z.string().describe("ID of the page to edit"), edits: z @@ -341,14 +350,21 @@ export function createDocmostMcpServer(config) { server.registerTool("patch_node", { description: "Replaces a single block identified by its attrs.id WITHOUT resending the " + "whole document. Get the block id from get_page_json, then pass a " + - "ProseMirror node to put in its place. Cheaper and safer than " + + "ProseMirror node to put in its place. Example node: a paragraph " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + + 'heading {"type":"heading","attrs":{"level":2},"content":' + + '[{"type":"text","text":"Title"}]}. Bold is a mark: ' + + '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' + + "JSON object or a JSON string (both accepted). Cheaper and safer than " + "update_page_json for one-block structural edits.", inputSchema: { pageId: z.string().min(1), nodeId: z.string().min(1), node: z .any() - .describe("ProseMirror node JSON to put in place of the node with this id"), + .describe("ProseMirror node to put in place of the node with this id, e.g. " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' + + "JSON object or JSON string both accepted."), }, }, async ({ pageId, nodeId, node }) => { let parsedNode; @@ -376,10 +392,19 @@ export function createDocmostMcpServer(config) { "anchorText matching the table; to add a tableCell/tableHeader, use " + "anchorNodeId of a block inside the target row (anchorText only resolves " + "top-level blocks, so it cannot target a row). Note: append is top-level " + - "only and rejects structural table nodes.", + "only and rejects structural table nodes. Example node: a paragraph " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + + 'heading {"type":"heading","attrs":{"level":2},"content":' + + '[{"type":"text","text":"Title"}]}. Bold is a mark: ' + + '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' + + "JSON object or a JSON string (both accepted).", inputSchema: { pageId: z.string().min(1), - node: z.any(), + node: z + .any() + .describe("ProseMirror node to insert, e.g. " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' + + "JSON object or JSON string both accepted."), position: z.enum(["before", "after", "append"]), anchorNodeId: z.string().optional(), anchorText: z.string().optional(), diff --git a/packages/mcp/build/lib/json-edit.js b/packages/mcp/build/lib/json-edit.js index a00f41ef..b0696697 100644 --- a/packages/mcp/build/lib/json-edit.js +++ b/packages/mcp/build/lib/json-edit.js @@ -1,91 +1,306 @@ /** * Surgical text edits on a ProseMirror document without re-importing it. * - * Each edit replaces an exact substring inside individual text nodes, - * preserving every node id, mark and attribute around it. This is the - * safe alternative to a full markdown re-import for small wording fixes. + * Each edit replaces an exact substring of a block's inline text, preserving + * every node id, mark and attribute around it. Matching works at the + * INLINE-CONTAINER (block) level: a block's text nodes are flattened into a + * per-character array, so a `find` may freely cross bold/italic/link + * boundaries (separate text nodes). The replacement inherits marks from the + * unchanged common prefix/suffix of the match, so editing plain text next to a + * bold word keeps the bold word bold, and editing the inside of a bold word + * keeps the inserted text bold. This is the safe alternative to a full markdown + * re-import for small wording fixes. */ -/** Collect plain text of the whole document (for span-detection hints). */ -function collectText(node) { - let out = ""; - if (node.type === "text") - out += node.text || ""; - for (const child of node.content || []) - out += collectText(child); +/** Placeholder code unit standing in for one opaque (non-text) inline node. */ +const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER +/** + * Find every VALID occurrence of `needle` in a block's flattened slots. + * + * A candidate occurrence at slot range [start, start+needle.length) is valid + * ONLY IF none of the slots in that range are atoms (non-text inline nodes). + * This makes atom matching collision-safe against the U+FFFC placeholder: an + * atom slot can never be part of a match, while a real text node containing a + * literal U+FFFC code unit still matches normally (its slot has no `.atom`). + * + * Overlapping candidates that touch an atom are skipped (not counted, not + * spliced); the scan resumes one code unit past the rejected start so a valid + * match that begins just after an atom is not missed. + */ +function findValidMatches(chars, plain, needle) { + if (!needle) + return []; + const positions = []; + let idx = plain.indexOf(needle); + while (idx !== -1) { + const end = idx + needle.length; + let hasAtom = false; + for (let i = idx; i < end; i++) { + if (chars[i] && chars[i].atom) { + hasAtom = true; + break; + } + } + if (!hasAtom) { + positions.push(idx); + // Non-overlapping: skip past this match. + idx = plain.indexOf(needle, end); + } + else { + // This candidate crosses an atom: reject it and resume one unit later so + // an overlapping valid match starting after the atom is still found. + idx = plain.indexOf(needle, idx + 1); + } + } + return positions; +} +/** Order-sensitive deep-equality of two marks arrays. */ +function marksEqual(a, b) { + if (a === b) + return true; + if (a.length !== b.length) + return false; + for (let i = 0; i < a.length; i++) { + if (JSON.stringify(a[i]) !== JSON.stringify(b[i])) + return false; + } + return true; +} +/** A block is any node that DIRECTLY contains at least one inline text child. */ +function isInlineBlock(node) { + return (Array.isArray(node?.content) && + node.content.some((child) => child && child.type === "text")); +} +/** Flatten a block's inline content into a per-code-unit slot array. */ +function flattenBlock(node) { + const chars = []; + for (const child of node.content || []) { + if (child && child.type === "text" && typeof child.text === "string") { + const marks = child.marks || []; + // Iterate by UTF-16 code unit so indices align with String.indexOf. + for (let i = 0; i < child.text.length; i++) { + chars.push({ ch: child.text[i], marks }); + } + } + else { + // Any non-text inline node becomes one opaque slot. + chars.push({ + ch: ATOM_PLACEHOLDER, + marks: (child && child.marks) || [], + atom: child, + }); + } + } + return chars; +} +/** Re-tokenize a slot array back into ProseMirror inline nodes. */ +function tokenizeChars(chars) { + const out = []; + let buffer = ""; + let bufferMarks = null; + const flush = () => { + if (buffer.length === 0) + return; + const textNode = { type: "text", text: buffer }; + if (bufferMarks && bufferMarks.length > 0) + textNode.marks = bufferMarks; + out.push(textNode); + buffer = ""; + bufferMarks = null; + }; + for (const slot of chars) { + if (slot.atom) { + flush(); + out.push(slot.atom); + continue; + } + if (bufferMarks !== null && !marksEqual(bufferMarks, slot.marks)) { + flush(); + } + if (bufferMarks === null) + bufferMarks = slot.marks; + buffer += slot.ch; + } + flush(); return out; } -function countOccurrences(haystack, needle) { - if (!needle) - return 0; - let count = 0; - let idx = haystack.indexOf(needle); - while (idx !== -1) { - count++; - idx = haystack.indexOf(needle, idx + needle.length); - } - return count; +/** Longest common prefix length of two strings. */ +function commonPrefixLen(a, b) { + const max = Math.min(a.length, b.length); + let i = 0; + while (i < max && a[i] === b[i]) + i++; + return i; +} +/** Longest common suffix length of two strings, capped so it can't overlap. */ +function commonSuffixLen(a, b, cap) { + const max = Math.min(a.length, b.length, cap); + let i = 0; + while (i < max && a[a.length - 1 - i] === b[b.length - 1 - i]) + i++; + return i; } /** - * Apply text edits to a ProseMirror doc (mutates a deep copy, returns it). - * Throws a descriptive error when an edit matches zero times or matches - * multiple times without replaceAll — so the caller can refine `find`. + * Apply one edit to one block's flattened slot array. + * + * The caller passes only VALID (atom-free) match positions (see + * findValidMatches), so no match range can overlap an atom slot here. + */ +function applyEditToChars(chars, edit, matchPositions) { + // Pre-compute the diff slices once (find/replace are constant per edit). + const p = commonPrefixLen(edit.find, edit.replace); + const s = commonSuffixLen(edit.find, edit.replace, Math.min(edit.find.length, edit.replace.length) - p); + const insertText = edit.replace.slice(p, edit.replace.length - s); + // Rebuild the slot array in a single left-to-right pass, splicing at each + // match start. Offsets into `chars` stay valid because we copy through. + const newChars = []; + let cursor = 0; + let spliced = 0; + for (const mStart of matchPositions) { + const mEnd = mStart + edit.find.length; + const changedStart = mStart + p; + const changedEnd = mEnd - s; + // Copy through everything up to the changed region (incl. the prefix). + for (; cursor < changedStart; cursor++) + newChars.push(chars[cursor]); + const removed = chars.slice(changedStart, changedEnd); + // Choose the marks for the inserted characters. + let chosenMarks = []; + if (removed.length > 0 && + removed.every((r) => marksEqual(r.marks, removed[0].marks))) { + // Uniform removed region: inherit its marks directly. + chosenMarks = removed[0].marks; + } + else { + // Empty or non-uniform removed region: inherit from the nearest TEXT + // neighbour, skipping atom slots (an atom carries marks that do not + // belong on inserted text). Scan left first, then right; fall back to []. + let inherited = null; + for (let i = changedStart - 1; i >= 0; i--) { + if (!chars[i].atom) { + inherited = chars[i].marks; + break; + } + } + if (inherited === null) { + for (let i = changedEnd; i < chars.length; i++) { + if (!chars[i].atom) { + inherited = chars[i].marks; + break; + } + } + } + chosenMarks = inherited === null ? [] : inherited; + } + // Emit the inserted text (one slot per code unit). + for (let i = 0; i < insertText.length; i++) { + newChars.push({ ch: insertText[i], marks: chosenMarks }); + } + // Skip the removed region. + cursor = changedEnd; + spliced++; + } + // Copy through the tail. + for (; cursor < chars.length; cursor++) + newChars.push(chars[cursor]); + return { newChars, spliced }; +} +/** + * Apply text edits to a ProseMirror doc (operates on a deep copy, returns it). + * + * Returns { doc, results, failed }: + * - results: edits that applied (replacements >= 1). + * - failed: edits that matched zero times, were ambiguous (multi-match + * without replaceAll), or whose changed region crosses a non-text inline + * node. These do NOT throw — they are recorded so the caller can surface an + * actionable message and still keep the edits that did apply. + * + * Edits apply IN ORDER to the same working copy, so a later edit can target + * text produced by an earlier one. The input doc is never mutated. The only + * thrown error is for invalid input (an empty `edit.find`). */ export function applyTextEdits(doc, edits) { const copy = JSON.parse(JSON.stringify(doc)); const results = []; + const failed = []; for (const edit of edits) { if (!edit.find) throw new Error("edit.find must be a non-empty string"); - // Count matches inside individual text nodes first. - let nodeMatches = 0; - (function count(node) { - if (node.type === "text" && node.text) { - nodeMatches += countOccurrences(node.text, edit.find); - } + // Gather every inline block in document order (recurse the whole tree so + // nested containers — callouts, list items, table cells, blockquotes — are + // all covered). + const blocks = []; + (function collect(node) { + if (isInlineBlock(node)) + blocks.push(node); for (const child of node.content || []) - count(child); + collect(child); })(copy); - if (nodeMatches === 0) { - // Distinguish "text not present" from "text spans formatting runs". - const fullText = collectText(copy); - if (fullText.includes(edit.find)) { - throw new Error(`Edit "${truncate(edit.find)}": the text exists in the document but spans ` + - `multiple formatting runs (bold/link/italic boundaries). Use a shorter ` + - `fragment that stays inside one run, or use update_page_json for ` + - `structural changes.`); - } - throw new Error(`Edit "${truncate(edit.find)}": text not found in the document.`); + // Find every VALID (atom-free) occurrence per block. A candidate whose slot + // range overlaps a non-text inline atom is never a match (collision-safe vs + // the U+FFFC placeholder), so it is excluded from both the uniqueness count + // and the splicing. + const blockChars = blocks.map((b) => flattenBlock(b)); + const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join("")); + const validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find)); + let total = 0; + for (const positions of validPerBlock) + total += positions.length; + if (total === 0) { + // Distinguish "the text exists but only across an atom" from a plain + // not-found: if a raw substring scan (atoms included) WOULD have hit, + // the only thing blocking the edit is the atom, so report that. + const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1); + failed.push({ + find: edit.find, + reason: existsAcrossAtom + ? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes." + : "text not found in the document.", + }); + continue; } - if (nodeMatches > 1 && !edit.replaceAll) { - throw new Error(`Edit "${truncate(edit.find)}": matches ${nodeMatches} times. ` + - `Provide a longer, unique fragment or set replaceAll: true.`); + if (total > 1 && !edit.replaceAll) { + failed.push({ + find: edit.find, + reason: `matches ${total} times. Provide a longer, unique fragment or set replaceAll: true.`, + }); + continue; } - // Perform the replacement(s). - let done = 0; - (function replace(node) { - if (node.type === "text" && node.text && node.text.includes(edit.find)) { + // Plan the splices from the valid positions. For a non-replaceAll edit we + // splice only the first valid match (left-to-right across blocks); for + // replaceAll we splice every valid match. + const plannedPerBlock = blockChars.map(() => []); + let takenFirst = false; + for (let b = 0; b < validPerBlock.length; b++) { + for (const idx of validPerBlock[b]) { if (edit.replaceAll) { - done += countOccurrences(node.text, edit.find); - node.text = node.text.split(edit.find).join(edit.replace); + plannedPerBlock[b].push(idx); } - else if (done === 0) { - // Avoid String.replace: its second arg treats $&, $1, $`, $', $$ as - // special patterns, expanding them instead of inserting literally. - // Splice the first occurrence by index to keep the replacement literal. - const idx = node.text.indexOf(edit.find); - node.text = - node.text.slice(0, idx) + - edit.replace + - node.text.slice(idx + edit.find.length); - done = 1; + else if (!takenFirst) { + plannedPerBlock[b].push(idx); + takenFirst = true; + break; + } + else { + break; } } - for (const child of node.content || []) - replace(child); - })(copy); - results.push({ find: edit.find, replacements: done }); + if (!edit.replaceAll && takenFirst) + break; + } + // Apply the splices block-by-block and re-tokenize changed blocks. + let spliced = 0; + for (let b = 0; b < blocks.length; b++) { + if (plannedPerBlock[b].length === 0) + continue; + const { newChars, spliced: n } = applyEditToChars(blockChars[b], edit, plannedPerBlock[b]); + spliced += n; + blocks[b].content = tokenizeChars(newChars); + } + results.push({ find: edit.find, replacements: spliced }); } - // Drop text nodes that became empty (ProseMirror forbids empty text nodes). + // Safety net: drop any empty text nodes (ProseMirror forbids them). The + // re-tokenizer never emits empty text nodes, but untouched blocks could in + // principle carry one in from upstream. (function prune(node) { if (Array.isArray(node.content)) { node.content = node.content.filter((child) => !(child.type === "text" && child.text === "")); @@ -93,8 +308,5 @@ export function applyTextEdits(doc, edits) { prune(child); } })(copy); - return { doc: copy, results }; -} -function truncate(s) { - return s.length > 60 ? s.slice(0, 57) + "..." : s; + return { doc: copy, results, failed }; } diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 388a22ff..1b1b9f66 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -39,7 +39,12 @@ import { updateTableCell, } from "./lib/node-ops.js"; import { withPageLock } from "./lib/page-lock.js"; -import { applyTextEdits, TextEdit, TextEditResult } from "./lib/json-edit.js"; +import { + applyTextEdits, + TextEdit, + TextEditResult, + TextEditFailure, +} from "./lib/json-edit.js"; import { getCollabToken, performLogin } from "./lib/auth-utils.js"; import { diffDocs } from "./lib/diff.js"; import { @@ -1373,19 +1378,62 @@ export class DocmostClient { // Apply the edits against the LIVE synced document, not the debounced REST // snapshot, so concurrent human edits/comments are preserved. applyTextEdits - // throws descriptive errors on zero/multiple matches — let them propagate. + // records per-edit match problems in `failed` instead of throwing, and + // applies whatever it can; we abort the write only when nothing applied. let results: TextEditResult[] | undefined; + let failed: TextEditFailure[] | undefined; + // Whether we actually wrote new content. Set inside the transform: a + // degenerate edit (e.g. find === replace, or a batch that nets to no change) + // can "apply" yet leave the document byte-for-byte identical, in which case + // we must NOT write (no spurious history version) and must not claim a write + // happened. + let wrote = false; await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + wrote = false; const r = applyTextEdits(liveDoc, edits); results = r.results; + failed = r.failed; + // Nothing applied -> abort the write (mutatePageContent treats a null + // return from the transform as "write nothing"). + if (r.results.length === 0) return null; + // Edits "applied" but produced an identical document: skip the write so no + // new history version is created. Stable structural comparison via + // JSON.stringify (both docs come from the same deep-copied source, so key + // order is stable). + if (JSON.stringify(r.doc) === JSON.stringify(liveDoc)) return null; + wrote = true; return r.doc; }); + if ((results?.length ?? 0) === 0 && (failed?.length ?? 0) > 0) { + // No edit applied: surface an aggregated, actionable error so the caller + // does not mistake a no-op for a partial success. + throw new Error( + "edit_page_text: no edits were applied (nothing written). " + + failed!.map((f) => `"${f.find}": ${f.reason}`).join("; "), + ); + } + + // Edits matched but produced no content change (identical document): report + // a successful no-op — NOT a failure — and do not falsely claim a write. + if (!wrote) { + return { + success: true, + pageId, + applied: results, + failed, + message: "No changes written (edits produced identical content).", + }; + } + return { success: true, pageId, - edits: results, - message: "Text edits applied (node ids and formatting preserved).", + applied: results, + failed, + message: (failed?.length ?? 0) + ? `Applied ${results?.length ?? 0} edit(s); ${failed!.length} failed (see failed[]). Node ids and formatting preserved.` + : "Text edits applied (node ids and formatting preserved).", }; } diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 5d91b914..6f38d15d 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -322,16 +322,20 @@ server.registerTool( "(lossless write: preserves the block ids, callouts, tables and " + "attributes you pass in). Typical flow: get_page_json -> modify the " + "JSON -> update_page_json. Keep existing node ids intact so heading " + - "anchors and history stay stable. `content` is OPTIONAL: omit it to " + - "update only the title (though prefer rename_page for a title-only " + - "change). Supplying neither content nor title is an error.", + "anchors and history stay stable. Minimal full-doc example: " + + '{"type":"doc","content":[{"type":"paragraph","content":' + + '[{"type":"text","text":"Hi"}]}]}. `content` may be a JSON object or a ' + + "JSON string (both accepted), and is OPTIONAL: omit it to update only " + + "the title (though prefer rename_page for a title-only change). " + + "Supplying neither content nor title is an error.", inputSchema: { pageId: z.string().min(1).describe("ID of the page to update"), content: z .any() .optional() .describe( - 'ProseMirror document: {"type":"doc","content":[...]}. Omit to rename only.', + 'ProseMirror document {"type":"doc","content":[...]} (JSON object or ' + + "JSON string). Omit to rename only.", ), title: z.string().optional().describe("Optional new title"), }, @@ -451,11 +455,16 @@ server.registerTool( { description: "Surgical find/replace inside a page's text. Preserves ALL structure: " + - "block ids, marks, links, callouts, tables. Each `find` must match " + - "exactly once (or set replaceAll). A match must lie inside one " + - "formatting run; if the target text crosses bold/link boundaries the " + - "tool reports it — use a shorter fragment or update_page_json then. " + - "This is the preferred tool for fixing wording, typos, numbers, names.", + "block ids, marks, links, callouts, tables. A `find` MAY cross " + + "bold/italic/link boundaries; the replacement inherits marks from the " + + "unchanged common prefix/suffix (editing plain text next to a bold word " + + "keeps it bold; editing inside a bold word keeps the new text bold). " + + "Each `find` must match exactly once (or set replaceAll). The batch " + + "applies what it can and returns applied[] + failed[]; a fully-unmatched " + + "batch writes nothing and errors. Examples: edits:[{find:\"teh\"," + + "replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " + + "(crosses a bold boundary). This is the preferred tool for fixing " + + "wording, typos, numbers, names.", inputSchema: { pageId: z.string().describe("ID of the page to edit"), edits: z @@ -486,14 +495,23 @@ server.registerTool( description: "Replaces a single block identified by its attrs.id WITHOUT resending the " + "whole document. Get the block id from get_page_json, then pass a " + - "ProseMirror node to put in its place. Cheaper and safer than " + + "ProseMirror node to put in its place. Example node: a paragraph " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + + 'heading {"type":"heading","attrs":{"level":2},"content":' + + '[{"type":"text","text":"Title"}]}. Bold is a mark: ' + + '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' + + "JSON object or a JSON string (both accepted). Cheaper and safer than " + "update_page_json for one-block structural edits.", inputSchema: { pageId: z.string().min(1), nodeId: z.string().min(1), node: z .any() - .describe("ProseMirror node JSON to put in place of the node with this id"), + .describe( + "ProseMirror node to put in place of the node with this id, e.g. " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' + + "JSON object or JSON string both accepted.", + ), }, }, async ({ pageId, nodeId, node }) => { @@ -525,10 +543,21 @@ server.registerTool( "anchorText matching the table; to add a tableCell/tableHeader, use " + "anchorNodeId of a block inside the target row (anchorText only resolves " + "top-level blocks, so it cannot target a row). Note: append is top-level " + - "only and rejects structural table nodes.", + "only and rejects structural table nodes. Example node: a paragraph " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + + 'heading {"type":"heading","attrs":{"level":2},"content":' + + '[{"type":"text","text":"Title"}]}. Bold is a mark: ' + + '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' + + "JSON object or a JSON string (both accepted).", inputSchema: { pageId: z.string().min(1), - node: z.any(), + node: z + .any() + .describe( + "ProseMirror node to insert, e.g. " + + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' + + "JSON object or JSON string both accepted.", + ), position: z.enum(["before", "after", "append"]), anchorNodeId: z.string().optional(), anchorText: z.string().optional(), diff --git a/packages/mcp/src/lib/json-edit.ts b/packages/mcp/src/lib/json-edit.ts index d452cd97..92a98c0d 100644 --- a/packages/mcp/src/lib/json-edit.ts +++ b/packages/mcp/src/lib/json-edit.ts @@ -1,9 +1,15 @@ /** * Surgical text edits on a ProseMirror document without re-importing it. * - * Each edit replaces an exact substring inside individual text nodes, - * preserving every node id, mark and attribute around it. This is the - * safe alternative to a full markdown re-import for small wording fixes. + * Each edit replaces an exact substring of a block's inline text, preserving + * every node id, mark and attribute around it. Matching works at the + * INLINE-CONTAINER (block) level: a block's text nodes are flattened into a + * per-character array, so a `find` may freely cross bold/italic/link + * boundaries (separate text nodes). The replacement inherits marks from the + * unchanged common prefix/suffix of the match, so editing plain text next to a + * bold word keeps the bold word bold, and editing the inside of a bold word + * keeps the inserted text bold. This is the safe alternative to a full markdown + * re-import for small wording fixes. */ export interface TextEdit { @@ -18,98 +24,342 @@ export interface TextEditResult { replacements: number; } -/** Collect plain text of the whole document (for span-detection hints). */ -function collectText(node: any): string { - let out = ""; - if (node.type === "text") out += node.text || ""; - for (const child of node.content || []) out += collectText(child); +export interface TextEditFailure { + find: string; + reason: string; +} + +/** One flattened inline slot: a single UTF-16 code unit, or an opaque atom. */ +interface CharSlot { + ch: string; + marks: any[]; + /** Set for non-text inline nodes (hardBreak/mention/image/emoji/...). */ + atom?: any; +} + +/** Placeholder code unit standing in for one opaque (non-text) inline node. */ +const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER + +/** + * Find every VALID occurrence of `needle` in a block's flattened slots. + * + * A candidate occurrence at slot range [start, start+needle.length) is valid + * ONLY IF none of the slots in that range are atoms (non-text inline nodes). + * This makes atom matching collision-safe against the U+FFFC placeholder: an + * atom slot can never be part of a match, while a real text node containing a + * literal U+FFFC code unit still matches normally (its slot has no `.atom`). + * + * Overlapping candidates that touch an atom are skipped (not counted, not + * spliced); the scan resumes one code unit past the rejected start so a valid + * match that begins just after an atom is not missed. + */ +function findValidMatches( + chars: CharSlot[], + plain: string, + needle: string, +): number[] { + if (!needle) return []; + const positions: number[] = []; + let idx = plain.indexOf(needle); + while (idx !== -1) { + const end = idx + needle.length; + let hasAtom = false; + for (let i = idx; i < end; i++) { + if (chars[i] && chars[i].atom) { + hasAtom = true; + break; + } + } + if (!hasAtom) { + positions.push(idx); + // Non-overlapping: skip past this match. + idx = plain.indexOf(needle, end); + } else { + // This candidate crosses an atom: reject it and resume one unit later so + // an overlapping valid match starting after the atom is still found. + idx = plain.indexOf(needle, idx + 1); + } + } + return positions; +} + +/** Order-sensitive deep-equality of two marks arrays. */ +function marksEqual(a: any[], b: any[]): boolean { + if (a === b) return true; + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (JSON.stringify(a[i]) !== JSON.stringify(b[i])) return false; + } + return true; +} + +/** A block is any node that DIRECTLY contains at least one inline text child. */ +function isInlineBlock(node: any): boolean { + return ( + Array.isArray(node?.content) && + node.content.some((child: any) => child && child.type === "text") + ); +} + +/** Flatten a block's inline content into a per-code-unit slot array. */ +function flattenBlock(node: any): CharSlot[] { + const chars: CharSlot[] = []; + for (const child of node.content || []) { + if (child && child.type === "text" && typeof child.text === "string") { + const marks = child.marks || []; + // Iterate by UTF-16 code unit so indices align with String.indexOf. + for (let i = 0; i < child.text.length; i++) { + chars.push({ ch: child.text[i], marks }); + } + } else { + // Any non-text inline node becomes one opaque slot. + chars.push({ + ch: ATOM_PLACEHOLDER, + marks: (child && child.marks) || [], + atom: child, + }); + } + } + return chars; +} + +/** Re-tokenize a slot array back into ProseMirror inline nodes. */ +function tokenizeChars(chars: CharSlot[]): any[] { + const out: any[] = []; + let buffer = ""; + let bufferMarks: any[] | null = null; + + const flush = () => { + if (buffer.length === 0) return; + const textNode: any = { type: "text", text: buffer }; + if (bufferMarks && bufferMarks.length > 0) textNode.marks = bufferMarks; + out.push(textNode); + buffer = ""; + bufferMarks = null; + }; + + for (const slot of chars) { + if (slot.atom) { + flush(); + out.push(slot.atom); + continue; + } + if (bufferMarks !== null && !marksEqual(bufferMarks, slot.marks)) { + flush(); + } + if (bufferMarks === null) bufferMarks = slot.marks; + buffer += slot.ch; + } + flush(); return out; } -function countOccurrences(haystack: string, needle: string): number { - if (!needle) return 0; - let count = 0; - let idx = haystack.indexOf(needle); - while (idx !== -1) { - count++; - idx = haystack.indexOf(needle, idx + needle.length); - } - return count; +/** Longest common prefix length of two strings. */ +function commonPrefixLen(a: string, b: string): number { + const max = Math.min(a.length, b.length); + let i = 0; + while (i < max && a[i] === b[i]) i++; + return i; +} + +/** Longest common suffix length of two strings, capped so it can't overlap. */ +function commonSuffixLen(a: string, b: string, cap: number): number { + const max = Math.min(a.length, b.length, cap); + let i = 0; + while (i < max && a[a.length - 1 - i] === b[b.length - 1 - i]) i++; + return i; } /** - * Apply text edits to a ProseMirror doc (mutates a deep copy, returns it). - * Throws a descriptive error when an edit matches zero times or matches - * multiple times without replaceAll — so the caller can refine `find`. + * Apply one edit to one block's flattened slot array. + * + * The caller passes only VALID (atom-free) match positions (see + * findValidMatches), so no match range can overlap an atom slot here. + */ +function applyEditToChars( + chars: CharSlot[], + edit: TextEdit, + matchPositions: number[], +): { newChars: CharSlot[]; spliced: number } { + // Pre-compute the diff slices once (find/replace are constant per edit). + const p = commonPrefixLen(edit.find, edit.replace); + const s = commonSuffixLen( + edit.find, + edit.replace, + Math.min(edit.find.length, edit.replace.length) - p, + ); + const insertText = edit.replace.slice(p, edit.replace.length - s); + + // Rebuild the slot array in a single left-to-right pass, splicing at each + // match start. Offsets into `chars` stay valid because we copy through. + const newChars: CharSlot[] = []; + let cursor = 0; + let spliced = 0; + for (const mStart of matchPositions) { + const mEnd = mStart + edit.find.length; + const changedStart = mStart + p; + const changedEnd = mEnd - s; + + // Copy through everything up to the changed region (incl. the prefix). + for (; cursor < changedStart; cursor++) newChars.push(chars[cursor]); + + const removed = chars.slice(changedStart, changedEnd); + + // Choose the marks for the inserted characters. + let chosenMarks: any[] = []; + if ( + removed.length > 0 && + removed.every((r) => marksEqual(r.marks, removed[0].marks)) + ) { + // Uniform removed region: inherit its marks directly. + chosenMarks = removed[0].marks; + } else { + // Empty or non-uniform removed region: inherit from the nearest TEXT + // neighbour, skipping atom slots (an atom carries marks that do not + // belong on inserted text). Scan left first, then right; fall back to []. + let inherited: any[] | null = null; + for (let i = changedStart - 1; i >= 0; i--) { + if (!chars[i].atom) { + inherited = chars[i].marks; + break; + } + } + if (inherited === null) { + for (let i = changedEnd; i < chars.length; i++) { + if (!chars[i].atom) { + inherited = chars[i].marks; + break; + } + } + } + chosenMarks = inherited === null ? [] : inherited; + } + + // Emit the inserted text (one slot per code unit). + for (let i = 0; i < insertText.length; i++) { + newChars.push({ ch: insertText[i], marks: chosenMarks }); + } + + // Skip the removed region. + cursor = changedEnd; + spliced++; + } + // Copy through the tail. + for (; cursor < chars.length; cursor++) newChars.push(chars[cursor]); + + return { newChars, spliced }; +} + +/** + * Apply text edits to a ProseMirror doc (operates on a deep copy, returns it). + * + * Returns { doc, results, failed }: + * - results: edits that applied (replacements >= 1). + * - failed: edits that matched zero times, were ambiguous (multi-match + * without replaceAll), or whose changed region crosses a non-text inline + * node. These do NOT throw — they are recorded so the caller can surface an + * actionable message and still keep the edits that did apply. + * + * Edits apply IN ORDER to the same working copy, so a later edit can target + * text produced by an earlier one. The input doc is never mutated. The only + * thrown error is for invalid input (an empty `edit.find`). */ export function applyTextEdits( doc: any, edits: TextEdit[], -): { doc: any; results: TextEditResult[] } { +): { doc: any; results: TextEditResult[]; failed: TextEditFailure[] } { const copy = JSON.parse(JSON.stringify(doc)); const results: TextEditResult[] = []; + const failed: TextEditFailure[] = []; for (const edit of edits) { if (!edit.find) throw new Error("edit.find must be a non-empty string"); - // Count matches inside individual text nodes first. - let nodeMatches = 0; - (function count(node: any) { - if (node.type === "text" && node.text) { - nodeMatches += countOccurrences(node.text, edit.find); - } - for (const child of node.content || []) count(child); + // Gather every inline block in document order (recurse the whole tree so + // nested containers — callouts, list items, table cells, blockquotes — are + // all covered). + const blocks: any[] = []; + (function collect(node: any) { + if (isInlineBlock(node)) blocks.push(node); + for (const child of node.content || []) collect(child); })(copy); - if (nodeMatches === 0) { - // Distinguish "text not present" from "text spans formatting runs". - const fullText = collectText(copy); - if (fullText.includes(edit.find)) { - throw new Error( - `Edit "${truncate(edit.find)}": the text exists in the document but spans ` + - `multiple formatting runs (bold/link/italic boundaries). Use a shorter ` + - `fragment that stays inside one run, or use update_page_json for ` + - `structural changes.`, - ); - } - throw new Error( - `Edit "${truncate(edit.find)}": text not found in the document.`, + // Find every VALID (atom-free) occurrence per block. A candidate whose slot + // range overlaps a non-text inline atom is never a match (collision-safe vs + // the U+FFFC placeholder), so it is excluded from both the uniqueness count + // and the splicing. + const blockChars = blocks.map((b) => flattenBlock(b)); + const blockPlain = blockChars.map((chars) => + chars.map((c) => c.ch).join(""), + ); + const validPerBlock: number[][] = blockChars.map((chars, b) => + findValidMatches(chars, blockPlain[b], edit.find), + ); + let total = 0; + for (const positions of validPerBlock) total += positions.length; + + if (total === 0) { + // Distinguish "the text exists but only across an atom" from a plain + // not-found: if a raw substring scan (atoms included) WOULD have hit, + // the only thing blocking the edit is the atom, so report that. + const existsAcrossAtom = blockPlain.some( + (plain) => plain.indexOf(edit.find) !== -1, ); + failed.push({ + find: edit.find, + reason: existsAcrossAtom + ? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes." + : "text not found in the document.", + }); + continue; + } + if (total > 1 && !edit.replaceAll) { + failed.push({ + find: edit.find, + reason: `matches ${total} times. Provide a longer, unique fragment or set replaceAll: true.`, + }); + continue; } - if (nodeMatches > 1 && !edit.replaceAll) { - throw new Error( - `Edit "${truncate(edit.find)}": matches ${nodeMatches} times. ` + - `Provide a longer, unique fragment or set replaceAll: true.`, - ); - } - - // Perform the replacement(s). - let done = 0; - (function replace(node: any) { - if (node.type === "text" && node.text && node.text.includes(edit.find)) { + // Plan the splices from the valid positions. For a non-replaceAll edit we + // splice only the first valid match (left-to-right across blocks); for + // replaceAll we splice every valid match. + const plannedPerBlock: number[][] = blockChars.map(() => []); + let takenFirst = false; + for (let b = 0; b < validPerBlock.length; b++) { + for (const idx of validPerBlock[b]) { if (edit.replaceAll) { - done += countOccurrences(node.text, edit.find); - node.text = node.text.split(edit.find).join(edit.replace); - } else if (done === 0) { - // Avoid String.replace: its second arg treats $&, $1, $`, $', $$ as - // special patterns, expanding them instead of inserting literally. - // Splice the first occurrence by index to keep the replacement literal. - const idx = node.text.indexOf(edit.find); - node.text = - node.text.slice(0, idx) + - edit.replace + - node.text.slice(idx + edit.find.length); - done = 1; + plannedPerBlock[b].push(idx); + } else if (!takenFirst) { + plannedPerBlock[b].push(idx); + takenFirst = true; + break; + } else { + break; } } - for (const child of node.content || []) replace(child); - })(copy); + if (!edit.replaceAll && takenFirst) break; + } - results.push({ find: edit.find, replacements: done }); + // Apply the splices block-by-block and re-tokenize changed blocks. + let spliced = 0; + for (let b = 0; b < blocks.length; b++) { + if (plannedPerBlock[b].length === 0) continue; + const { newChars, spliced: n } = applyEditToChars( + blockChars[b], + edit, + plannedPerBlock[b], + ); + spliced += n; + blocks[b].content = tokenizeChars(newChars); + } + + results.push({ find: edit.find, replacements: spliced }); } - // Drop text nodes that became empty (ProseMirror forbids empty text nodes). + // Safety net: drop any empty text nodes (ProseMirror forbids them). The + // re-tokenizer never emits empty text nodes, but untouched blocks could in + // principle carry one in from upstream. (function prune(node: any) { if (Array.isArray(node.content)) { node.content = node.content.filter( @@ -119,9 +369,5 @@ export function applyTextEdits( } })(copy); - return { doc: copy, results }; -} - -function truncate(s: string): string { - return s.length > 60 ? s.slice(0, 57) + "..." : s; + return { doc: copy, results, failed }; } diff --git a/packages/mcp/test/unit/json-edit.test.mjs b/packages/mcp/test/unit/json-edit.test.mjs index a270dc9c..d1958b38 100644 --- a/packages/mcp/test/unit/json-edit.test.mjs +++ b/packages/mcp/test/unit/json-edit.test.mjs @@ -32,18 +32,24 @@ test("single-match replace preserves ids/marks and reports replacements===1", () assert.equal(tnode.text, "Hello there"); }); -test("zero match throws not found", () => { +test("zero match is reported via failed[], doc unchanged", () => { const input = doc(paragraph(textNode("Hello world"))); + const snapshot = JSON.parse(JSON.stringify(input)); - assert.throws( - () => applyTextEdits(input, [{ find: "absent", replace: "x" }]), - /not found/, - ); + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "absent", replace: "x" }, + ]); + + assert.deepEqual(results, []); + assert.equal(failed.length, 1); + assert.match(failed[0].reason, /not found/); + // Doc is structurally unchanged (modulo deep-copy identity). + assert.deepEqual(out, snapshot); }); -test("text split across two text nodes (one bold) throws spans-multiple-runs", () => { +test("text split across two text nodes (one bold) now applies, marks preserved", () => { // "Hello world" is split: "Hello " (plain) + "world" (bold). No single text - // node contains "Hello world", but the collected document text does. + // node contains "Hello world", but the block-level matcher spans them. const input = doc( paragraph( textNode("Hello "), @@ -51,20 +57,161 @@ test("text split across two text nodes (one bold) throws spans-multiple-runs", ( ), ); - assert.throws( - () => applyTextEdits(input, [{ find: "Hello world", replace: "x" }]), - /spans/, - ); + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "Hello world", replace: "Hello there" }, + ]); + + assert.deepEqual(results, [{ find: "Hello world", replacements: 1 }]); + assert.deepEqual(failed, []); + + // The unchanged prefix "Hello " stays plain; the changed region "world" was + // uniformly bold, so the replacement "there" stays bold. + const para = out.content[0]; + assert.equal(para.content.length, 2); + assert.equal(para.content[0].text, "Hello "); + assert.equal(para.content[0].marks, undefined); + assert.equal(para.content[1].text, "there"); + assert.deepEqual(para.content[1].marks, [{ type: "bold" }]); }); -test("multi-match without replaceAll throws matches", () => { +test("multi-match without replaceAll is reported via failed[], doc unchanged", () => { // "ab" appears twice inside a single text node. const input = doc(paragraph(textNode("ab cd ab"))); + const snapshot = JSON.parse(JSON.stringify(input)); - assert.throws( - () => applyTextEdits(input, [{ find: "ab", replace: "x" }]), - /matches/, + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "ab", replace: "x" }, + ]); + + assert.deepEqual(results, []); + assert.equal(failed.length, 1); + assert.match(failed[0].reason, /matches/); + assert.deepEqual(out, snapshot); +}); + +test("cross-run replace with mixed marks inherits left-neighbor marks", () => { + // The matched region "BC" is split: "B" bold, "C" italic — non-uniform marks, + // and the replacement "X" shares no common prefix/suffix with "BC", so the + // inserted text inherits the left neighbor's marks. Here the left neighbor of + // the changed region is "A" (plain), so "X" must be plain. + const input = doc( + paragraph( + textNode("A"), + textNode("B", { marks: [{ type: "bold" }] }), + textNode("C", { marks: [{ type: "italic" }] }), + textNode("D"), + ), ); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "BC", replace: "X" }, + ]); + + assert.deepEqual(results, [{ find: "BC", replacements: 1 }]); + assert.deepEqual(failed, []); + + // "A" + "X"(plain) + "D" coalesce into a single plain text node "AXD". + const para = out.content[0]; + assert.equal(para.content.length, 1); + assert.equal(para.content[0].text, "AXD"); + assert.equal(para.content[0].marks, undefined); +}); + +test("cross-run replace at block start inherits [] marks", () => { + // The whole block content is the mixed-mark match "BC" with no left neighbor, + // so inserted text falls through to the right neighbor / [] (block start). + const input = doc( + paragraph( + textNode("B", { marks: [{ type: "bold" }] }), + textNode("C", { marks: [{ type: "italic" }] }), + ), + ); + + const { doc: out, results } = applyTextEdits(input, [ + { find: "BC", replace: "X" }, + ]); + + assert.deepEqual(results, [{ find: "BC", replacements: 1 }]); + const para = out.content[0]; + assert.equal(para.content.length, 1); + assert.equal(para.content[0].text, "X"); + assert.equal(para.content[0].marks, undefined); +}); + +test("partial batch: good edits apply, the bad one goes to failed[]", () => { + const input = doc(paragraph(textNode("alpha beta gamma"))); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "alpha", replace: "ALPHA" }, + { find: "absent", replace: "X" }, + { find: "gamma", replace: "GAMMA" }, + ]); + + // The 2 matching edits applied; the missing one is reported. + assert.deepEqual(results, [ + { find: "alpha", replacements: 1 }, + { find: "gamma", replacements: 1 }, + ]); + assert.equal(failed.length, 1); + assert.equal(failed[0].find, "absent"); + assert.match(failed[0].reason, /not found/); + assert.equal(out.content[0].content[0].text, "ALPHA beta GAMMA"); +}); + +test("a match that crosses an atom is refused, doc unchanged", () => { + // paragraph: "a" "b". A find of "ab" spans the hardBreak atom, + // so it is not a valid match: a match range may not contain an atom slot. + // The edit lands in failed[] (reason: atom-specific OR not-found) and the + // document is left unchanged. + const input = doc( + paragraph( + textNode("a"), + { type: "hardBreak" }, + textNode("b"), + ), + ); + const snapshot = JSON.parse(JSON.stringify(input)); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "ab", replace: "z" }, + ]); + + assert.deepEqual(results, []); + assert.equal(failed.length, 1); + assert.match(failed[0].reason, /non-text inline node|not found/); + assert.deepEqual(out, snapshot); +}); + +test("a TEXT node containing a literal U+FFFC matches/replaces normally", () => { + // The U+FFFC OBJECT REPLACEMENT CHARACTER is the placeholder for atom slots, + // but a real text node may legitimately contain that code unit. Such a slot + // has no `.atom`, so it must match and replace like any other character — + // proving atoms and literal-U+FFFC text are distinguished. + const input = doc(paragraph(textNode("xy"))); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "xy", replace: "done" }, + ]); + + assert.deepEqual(results, [{ find: "xy", replacements: 1 }]); + assert.deepEqual(failed, []); + assert.equal(out.content[0].content[0].text, "done"); +}); + +test("a no-op edit (find === replace) produces a doc deep-equal to the input", () => { + // find === replace "applies" but changes nothing: the produced document must + // be structurally identical to the input (this is what lets the client skip + // the collaboration write and avoid a spurious history version). + const input = doc(paragraph(textNode("unchanged text"))); + const snapshot = JSON.parse(JSON.stringify(input)); + + const { doc: out, results } = applyTextEdits(input, [ + { find: "unchanged", replace: "unchanged" }, + ]); + + assert.deepEqual(results, [{ find: "unchanged", replacements: 1 }]); + // Deep-equal to the input despite the edit being reported as applied. + assert.deepEqual(out, snapshot); }); test("replaceAll replaces all occurrences", () => {