diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts index 5e1d6cba..4bfff0fb 100644 --- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts +++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts @@ -638,7 +638,10 @@ export class AiChatToolsService { 'editing inside a bold word keeps the new text bold). Each find must ' + 'match exactly once unless replaceAll is set. The batch applies what ' + 'it can and returns applied[] + failed[]; a fully-unmatched batch ' + - 'writes nothing and errors. Examples: edits:[{find:"teh",replace:"the"}]; ' + + 'writes nothing and errors. find should be the literal rendered text ' + + '(no markdown). Markdown wrappers (**bold**, *italic*, `code`) and ' + + 'trailing emoji are tolerated via a strip-and-retry fallback, but ' + + 'plain text is preferred. Examples: edits:[{find:"teh",replace:"the"}]; ' + 'edits:[{find:"Hello world",replace:"Hello there"}] (crosses a bold ' + 'boundary). Reversible: the previous version is kept in page history.', inputSchema: z.object({ @@ -730,7 +733,12 @@ export class AiChatToolsService { anchorText: z .string() .optional() - .describe('Anchor text fragment (for before/after).'), + .describe( + 'Anchor text fragment (for before/after), matched against the ' + + "block's literal rendered plain text (no markdown). " + + 'Markdown/emoji are tolerated as a fallback; prefer plain text ' + + 'or anchorNodeId.', + ), }), execute: async ({ pageId, diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index ed4ab202..92ea7aef 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -1258,7 +1258,13 @@ export class DocmostClient { const anchorDesc = opts.anchorNodeId ? `anchorNodeId "${opts.anchorNodeId}"` : `anchorText "${opts.anchorText}"`; - throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}`); + // anchorText is matched against the block's literal RENDERED plain text; + // markdown/emoji are tolerated only as a strip-and-retry fallback, so a + // miss usually means the text differs from what's on the page. + const hint = opts.anchorText + ? ' anchorText must be the block\'s literal rendered plain text (no markdown wrappers or emoji); anchorNodeId from get_page_json is more reliable.' + : ""; + throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}.${hint}`); } return { success: true, inserted: true, position: opts.position }; } diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index e8cdd1ee..b169f495 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -324,7 +324,10 @@ export function createDocmostMcpServer(config) { "keeps it bold; editing inside a bold word keeps the new text bold). " + "Each `find` must match exactly once (or set replaceAll). The batch " + "applies what it can and returns applied[] + failed[]; a fully-unmatched " + - "batch writes nothing and errors. Examples: edits:[{find:\"teh\"," + + "batch writes nothing and errors. `find` should be the literal rendered " + + "text (no markdown). Markdown wrappers (**bold**, *italic*, `code`) and " + + "trailing emoji are tolerated via a strip-and-retry fallback, but plain " + + "text is preferred. Examples: edits:[{find:\"teh\"," + "replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " + "(crosses a bold boundary). This is the preferred tool for fixing " + "wording, typos, numbers, names.", @@ -391,7 +394,10 @@ export function createDocmostMcpServer(config) { "INSIDE the target table — anchorNodeId of any block/cell in it, or " + "anchorText matching the table; to add a tableCell/tableHeader, use " + "anchorNodeId of a block inside the target row (anchorText only resolves " + - "top-level blocks, so it cannot target a row). Note: append is top-level " + + "top-level blocks, so it cannot target a row). `anchorText` is matched " + + "against the block's literal rendered plain text (no markdown); " + + "markdown/emoji are tolerated as a fallback; prefer plain text or " + + "anchorNodeId. Note: append is top-level " + "only and rejects structural table nodes. Example node: a paragraph " + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + 'heading {"type":"heading","attrs":{"level":2},"content":' + diff --git a/packages/mcp/build/lib/json-edit.js b/packages/mcp/build/lib/json-edit.js index b0696697..c4d8a79e 100644 --- a/packages/mcp/build/lib/json-edit.js +++ b/packages/mcp/build/lib/json-edit.js @@ -11,6 +11,7 @@ * keeps the inserted text bold. This is the safe alternative to a full markdown * re-import for small wording fixes. */ +import { stripInlineMarkdown } from "./text-normalize.js"; /** Placeholder code unit standing in for one opaque (non-text) inline node. */ const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER /** @@ -241,21 +242,66 @@ export function applyTextEdits(doc, edits) { // and the splicing. const blockChars = blocks.map((b) => flattenBlock(b)); const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join("")); - const validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find)); + // EXACT MATCH WINS: try the verbatim locator first. + let effectiveFind = edit.find; + let normalized = false; + let validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find)); let total = 0; for (const positions of validPerBlock) total += positions.length; + // FALLBACK: only if the verbatim locator matched nothing, retry with the + // markdown-stripped form. `edit.replace` is never touched — this only + // changes what we LOCATE, not what we insert. + const stripped = stripInlineMarkdown(edit.find); + if (total === 0 && stripped !== edit.find && stripped.length > 0) { + const strippedPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], stripped)); + let strippedTotal = 0; + for (const positions of strippedPerBlock) + strippedTotal += positions.length; + if (strippedTotal >= 1) { + validPerBlock = strippedPerBlock; + total = strippedTotal; + effectiveFind = stripped; + normalized = true; + } + } if (total === 0) { // Distinguish "the text exists but only across an atom" from a plain - // not-found: if a raw substring scan (atoms included) WOULD have hit, - // the only thing blocking the edit is the atom, so report that. - const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1); - failed.push({ - find: edit.find, - reason: existsAcrossAtom - ? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes." - : "text not found in the document.", - }); + // not-found: if a raw substring scan (atoms included) WOULD have hit — + // for EITHER the verbatim or the stripped locator — the only thing + // blocking the edit is the atom, so report that. + const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1 || + (stripped !== edit.find && plain.indexOf(stripped) !== -1)); + let reason; + if (existsAcrossAtom) { + reason = + "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes."; + } + else { + // Append a bounded "closest text" hint: find the FIRST block that + // contains the longest whitespace-delimited token (>= 3 chars) of the + // (stripped, then raw) locator, and quote that block's plain text. + reason = "text not found in the document."; + const tokenSource = stripped.length > 0 ? stripped : edit.find; + const longestToken = tokenSource + .split(/\s+/) + .filter((t) => t.length >= 3) + .sort((a, b) => b.length - a.length)[0]; + if (longestToken) { + const hitBlock = blockPlain.find((plain) => plain.includes(longestToken)); + if (hitBlock) { + // Truncate by code point (spread iterates by code point) so a + // surrogate pair is never split; append the ellipsis only when the + // text was actually longer than the limit. + const points = [...hitBlock]; + const snippet = points.length > 120 + ? points.slice(0, 120).join("") + "…" + : hitBlock; + reason += ` Closest block text: "${snippet}".`; + } + } + } + failed.push({ find: edit.find, reason }); continue; } if (total > 1 && !edit.replaceAll) { @@ -287,16 +333,28 @@ export function applyTextEdits(doc, edits) { if (!edit.replaceAll && takenFirst) break; } - // Apply the splices block-by-block and re-tokenize changed blocks. + // Apply the splices block-by-block and re-tokenize changed blocks. The + // local edit uses `effectiveFind` (verbatim or normalized) so the + // prefix/suffix diff is computed against the ACTUALLY matched text, while + // `edit.replace` stays literal — never stripped. + const effectiveEdit = { + find: effectiveFind, + replace: edit.replace, + replaceAll: edit.replaceAll, + }; let spliced = 0; for (let b = 0; b < blocks.length; b++) { if (plannedPerBlock[b].length === 0) continue; - const { newChars, spliced: n } = applyEditToChars(blockChars[b], edit, plannedPerBlock[b]); + const { newChars, spliced: n } = applyEditToChars(blockChars[b], effectiveEdit, plannedPerBlock[b]); spliced += n; blocks[b].content = tokenizeChars(newChars); } - results.push({ find: edit.find, replacements: spliced }); + // Keep `find: edit.find` (the original) so the caller can correlate. + const result = { find: edit.find, replacements: spliced }; + if (normalized) + result.normalized = true; + results.push(result); } // Safety net: drop any empty text nodes (ProseMirror forbids them). The // re-tokenizer never emits empty text nodes, but untouched blocks could in diff --git a/packages/mcp/build/lib/node-ops.js b/packages/mcp/build/lib/node-ops.js index 6356df5e..3f8ca1a8 100644 --- a/packages/mcp/build/lib/node-ops.js +++ b/packages/mcp/build/lib/node-ops.js @@ -13,6 +13,7 @@ * never mutated. All functions are defensively null-safe: missing/!Array * `content`, non-object nodes, and absent `attrs` are tolerated. */ +import { stripInlineMarkdown } from "./text-normalize.js"; /** Deep-clone a JSON-serializable value without mutating the original. */ function clone(value) { if (typeof structuredClone === "function") { @@ -325,6 +326,33 @@ const REQUIRED_CONTAINER = { tableCell: "tableRow", tableHeader: "tableRow", }; +/** + * Find the index of the first TOP-LEVEL block whose plain text includes the + * anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches. + * + * Two passes preserve "exact wins globally": + * - Pass 1: first block containing the verbatim `anchorText`. + * - Pass 2 (only if pass 1 found nothing): first block containing the + * markdown-stripped anchor, when stripping actually changed it. + */ +function findAnchorTextIndex(content, anchorText) { + if (!Array.isArray(content)) + return -1; + // Pass 1: exact. + for (let i = 0; i < content.length; i++) { + if (blockPlainText(content[i]).includes(anchorText)) + return i; + } + // Pass 2: markdown-stripped fallback. + const a = stripInlineMarkdown(anchorText); + if (a !== anchorText && a.length > 0) { + for (let i = 0; i < content.length; i++) { + if (blockPlainText(content[i]).includes(a)) + return i; + } + } + return -1; +} /** * Locate an anchor and return its ancestor chain (from `doc` down to and * including the matched node). Each chain entry is `{ node, index }` where @@ -355,14 +383,14 @@ function findAnchorChain(doc, opts) { return search(doc, -1, []); } // By text: only top-level blocks are scanned (same rule as the JSON path). + // Exact match wins; a markdown-stripped fallback is tried only on a miss. if (opts.anchorText != null && Array.isArray(doc.content)) { - for (let i = 0; i < doc.content.length; i++) { - if (blockPlainText(doc.content[i]).includes(opts.anchorText)) { - return [ - { node: doc, index: -1 }, - { node: doc.content[i], index: i }, - ]; - } + const i = findAnchorTextIndex(doc.content, opts.anchorText); + if (i !== -1) { + return [ + { node: doc, index: -1 }, + { node: doc.content[i], index: i }, + ]; } } return null; @@ -472,13 +500,13 @@ export function insertNodeRelative(doc, node, opts) { } return { doc: out, inserted }; } - // Resolve by text: only top-level doc.content blocks are scanned. + // Resolve by text: only top-level doc.content blocks are scanned. Exact + // match wins; a markdown-stripped fallback is tried only on a miss. if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) { - for (let i = 0; i < out.content.length; i++) { - if (blockPlainText(out.content[i]).includes(opts.anchorText)) { - out.content.splice(i + offset, 0, fresh); - return { doc: out, inserted: true }; - } + const i = findAnchorTextIndex(out.content, opts.anchorText); + if (i !== -1) { + out.content.splice(i + offset, 0, fresh); + return { doc: out, inserted: true }; } } return { doc: out, inserted: false }; diff --git a/packages/mcp/build/lib/text-normalize.js b/packages/mcp/build/lib/text-normalize.js new file mode 100644 index 00000000..f1aebf4f --- /dev/null +++ b/packages/mcp/build/lib/text-normalize.js @@ -0,0 +1,71 @@ +/** + * Locator normalization: strip inline markdown wrappers and trailing + * decoration from a LOCATOR string so a find/anchor that the model wrote with + * markdown (or a stray emoji) can still match the document's plain text. + * + * This is used ONLY as a fallback for LOCATING (after an exact match fails); + * it is never applied to replacement text or inserted node content, so no + * formatting is ever lost. + */ +/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */ +const MAX_PASSES = 8; +/** + * Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so + * `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is + * non-greedy and capture group 1 is the inner text. Applied repeatedly until + * the string stops changing (nested wrappers like `**_x_**`). + */ +const WRAPPER_PATTERNS = [ + /\*\*([^*]+?)\*\*/g, // **x** + /__([^_]+?)__/g, // __x__ + /~~([^~]+?)~~/g, // ~~x~~ + /\*([^*]+?)\*/g, // *x* + /_([^_]+?)_/g, // _x_ + /``([^`]+?)``/g, // ``x`` + /`([^`]+?)`/g, // `x` +]; +/** + * Conservatively strip inline markdown from a locator string. + * + * Deterministic, order-fixed steps: + * 1. Links/images: `[text](url)` -> `text`, `![alt](src)` -> `alt`. + * 2. Balanced inline wrappers (strong before emphasis, code, strikethrough), + * applied repeatedly until stable for nested cases. + * 3. Trim leading/trailing decoration only: whitespace, leftover marker chars + * (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,` + * etc.) are NEVER trimmed. + * + * If the result is empty (e.g. the input was only markers like `***`), the + * ORIGINAL string is returned so a locator can never normalize down to "" and + * match everything. + */ +export function stripInlineMarkdown(s) { + if (typeof s !== "string" || s.length === 0) + return s; + let out = s; + // 1. Links/images -> their visible text. `!?` covers both forms. + out = out.replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1"); + // 2. Strip balanced wrappers, repeating until the string is stable so nested + // wrappers (`**_x_**`) and adjacent runs both collapse. + for (let pass = 0; pass < MAX_PASSES; pass++) { + const before = out; + for (const re of WRAPPER_PATTERNS) { + out = out.replace(re, "$1"); + } + if (out === before) + break; + } + // 3. Trim leading/trailing decoration: whitespace, leftover markdown markers, + // and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the + // regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT + // Extended_Pictographic). The `u` flag enables the Unicode property escape. + // Anchored runs only — interior text and sentence punctuation are untouched. + const DECORATION = "[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+"; + out = out + .replace(new RegExp("^" + DECORATION, "u"), "") + .replace(new RegExp(DECORATION + "$", "u"), ""); + // 4. Never normalize a locator down to nothing. + if (out.length === 0) + return s; + return out; +} diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 1b1b9f66..093c0ab8 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1559,8 +1559,14 @@ export class DocmostClient { const anchorDesc = opts.anchorNodeId ? `anchorNodeId "${opts.anchorNodeId}"` : `anchorText "${opts.anchorText}"`; + // anchorText is matched against the block's literal RENDERED plain text; + // markdown/emoji are tolerated only as a strip-and-retry fallback, so a + // miss usually means the text differs from what's on the page. + const hint = opts.anchorText + ? ' anchorText must be the block\'s literal rendered plain text (no markdown wrappers or emoji); anchorNodeId from get_page_json is more reliable.' + : ""; throw new Error( - `insert_node: anchor not found (${anchorDesc}) on page ${pageId}`, + `insert_node: anchor not found (${anchorDesc}) on page ${pageId}.${hint}`, ); } diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 6f38d15d..95a649e6 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -461,7 +461,10 @@ server.registerTool( "keeps it bold; editing inside a bold word keeps the new text bold). " + "Each `find` must match exactly once (or set replaceAll). The batch " + "applies what it can and returns applied[] + failed[]; a fully-unmatched " + - "batch writes nothing and errors. Examples: edits:[{find:\"teh\"," + + "batch writes nothing and errors. `find` should be the literal rendered " + + "text (no markdown). Markdown wrappers (**bold**, *italic*, `code`) and " + + "trailing emoji are tolerated via a strip-and-retry fallback, but plain " + + "text is preferred. Examples: edits:[{find:\"teh\"," + "replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " + "(crosses a bold boundary). This is the preferred tool for fixing " + "wording, typos, numbers, names.", @@ -542,7 +545,10 @@ server.registerTool( "INSIDE the target table — anchorNodeId of any block/cell in it, or " + "anchorText matching the table; to add a tableCell/tableHeader, use " + "anchorNodeId of a block inside the target row (anchorText only resolves " + - "top-level blocks, so it cannot target a row). Note: append is top-level " + + "top-level blocks, so it cannot target a row). `anchorText` is matched " + + "against the block's literal rendered plain text (no markdown); " + + "markdown/emoji are tolerated as a fallback; prefer plain text or " + + "anchorNodeId. Note: append is top-level " + "only and rejects structural table nodes. Example node: a paragraph " + '{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' + 'heading {"type":"heading","attrs":{"level":2},"content":' + diff --git a/packages/mcp/src/lib/json-edit.ts b/packages/mcp/src/lib/json-edit.ts index 92a98c0d..bfba6793 100644 --- a/packages/mcp/src/lib/json-edit.ts +++ b/packages/mcp/src/lib/json-edit.ts @@ -12,6 +12,8 @@ * re-import for small wording fixes. */ +import { stripInlineMarkdown } from "./text-normalize.js"; + export interface TextEdit { find: string; replace: string; @@ -22,6 +24,8 @@ export interface TextEdit { export interface TextEditResult { find: string; replacements: number; + /** True when the match required the markdown-stripped fallback locator. */ + normalized?: boolean; } export interface TextEditFailure { @@ -292,25 +296,75 @@ export function applyTextEdits( const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join(""), ); - const validPerBlock: number[][] = blockChars.map((chars, b) => + // EXACT MATCH WINS: try the verbatim locator first. + let effectiveFind = edit.find; + let normalized = false; + let validPerBlock: number[][] = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find), ); let total = 0; for (const positions of validPerBlock) total += positions.length; + // FALLBACK: only if the verbatim locator matched nothing, retry with the + // markdown-stripped form. `edit.replace` is never touched — this only + // changes what we LOCATE, not what we insert. + const stripped = stripInlineMarkdown(edit.find); + if (total === 0 && stripped !== edit.find && stripped.length > 0) { + const strippedPerBlock: number[][] = blockChars.map((chars, b) => + findValidMatches(chars, blockPlain[b], stripped), + ); + let strippedTotal = 0; + for (const positions of strippedPerBlock) strippedTotal += positions.length; + if (strippedTotal >= 1) { + validPerBlock = strippedPerBlock; + total = strippedTotal; + effectiveFind = stripped; + normalized = true; + } + } + if (total === 0) { // Distinguish "the text exists but only across an atom" from a plain - // not-found: if a raw substring scan (atoms included) WOULD have hit, - // the only thing blocking the edit is the atom, so report that. + // not-found: if a raw substring scan (atoms included) WOULD have hit — + // for EITHER the verbatim or the stripped locator — the only thing + // blocking the edit is the atom, so report that. const existsAcrossAtom = blockPlain.some( - (plain) => plain.indexOf(edit.find) !== -1, + (plain) => + plain.indexOf(edit.find) !== -1 || + (stripped !== edit.find && plain.indexOf(stripped) !== -1), ); - failed.push({ - find: edit.find, - reason: existsAcrossAtom - ? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes." - : "text not found in the document.", - }); + let reason: string; + if (existsAcrossAtom) { + reason = + "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes."; + } else { + // Append a bounded "closest text" hint: find the FIRST block that + // contains the longest whitespace-delimited token (>= 3 chars) of the + // (stripped, then raw) locator, and quote that block's plain text. + reason = "text not found in the document."; + const tokenSource = stripped.length > 0 ? stripped : edit.find; + const longestToken = tokenSource + .split(/\s+/) + .filter((t) => t.length >= 3) + .sort((a, b) => b.length - a.length)[0]; + if (longestToken) { + const hitBlock = blockPlain.find((plain) => + plain.includes(longestToken), + ); + if (hitBlock) { + // Truncate by code point (spread iterates by code point) so a + // surrogate pair is never split; append the ellipsis only when the + // text was actually longer than the limit. + const points = [...hitBlock]; + const snippet = + points.length > 120 + ? points.slice(0, 120).join("") + "…" + : hitBlock; + reason += ` Closest block text: "${snippet}".`; + } + } + } + failed.push({ find: edit.find, reason }); continue; } if (total > 1 && !edit.replaceAll) { @@ -341,20 +395,31 @@ export function applyTextEdits( if (!edit.replaceAll && takenFirst) break; } - // Apply the splices block-by-block and re-tokenize changed blocks. + // Apply the splices block-by-block and re-tokenize changed blocks. The + // local edit uses `effectiveFind` (verbatim or normalized) so the + // prefix/suffix diff is computed against the ACTUALLY matched text, while + // `edit.replace` stays literal — never stripped. + const effectiveEdit: TextEdit = { + find: effectiveFind, + replace: edit.replace, + replaceAll: edit.replaceAll, + }; let spliced = 0; for (let b = 0; b < blocks.length; b++) { if (plannedPerBlock[b].length === 0) continue; const { newChars, spliced: n } = applyEditToChars( blockChars[b], - edit, + effectiveEdit, plannedPerBlock[b], ); spliced += n; blocks[b].content = tokenizeChars(newChars); } - results.push({ find: edit.find, replacements: spliced }); + // Keep `find: edit.find` (the original) so the caller can correlate. + const result: TextEditResult = { find: edit.find, replacements: spliced }; + if (normalized) result.normalized = true; + results.push(result); } // Safety net: drop any empty text nodes (ProseMirror forbids them). The diff --git a/packages/mcp/src/lib/node-ops.ts b/packages/mcp/src/lib/node-ops.ts index 4934b216..8a619266 100644 --- a/packages/mcp/src/lib/node-ops.ts +++ b/packages/mcp/src/lib/node-ops.ts @@ -14,6 +14,8 @@ * `content`, non-object nodes, and absent `attrs` are tolerated. */ +import { stripInlineMarkdown } from "./text-normalize.js"; + /** Deep-clone a JSON-serializable value without mutating the original. */ function clone(value: T): T { if (typeof structuredClone === "function") { @@ -364,6 +366,31 @@ const REQUIRED_CONTAINER: Record = { tableHeader: "tableRow", }; +/** + * Find the index of the first TOP-LEVEL block whose plain text includes the + * anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches. + * + * Two passes preserve "exact wins globally": + * - Pass 1: first block containing the verbatim `anchorText`. + * - Pass 2 (only if pass 1 found nothing): first block containing the + * markdown-stripped anchor, when stripping actually changed it. + */ +function findAnchorTextIndex(content: any[], anchorText: string): number { + if (!Array.isArray(content)) return -1; + // Pass 1: exact. + for (let i = 0; i < content.length; i++) { + if (blockPlainText(content[i]).includes(anchorText)) return i; + } + // Pass 2: markdown-stripped fallback. + const a = stripInlineMarkdown(anchorText); + if (a !== anchorText && a.length > 0) { + for (let i = 0; i < content.length; i++) { + if (blockPlainText(content[i]).includes(a)) return i; + } + } + return -1; +} + /** * Locate an anchor and return its ancestor chain (from `doc` down to and * including the matched node). Each chain entry is `{ node, index }` where @@ -399,14 +426,14 @@ function findAnchorChain( } // By text: only top-level blocks are scanned (same rule as the JSON path). + // Exact match wins; a markdown-stripped fallback is tried only on a miss. if (opts.anchorText != null && Array.isArray(doc.content)) { - for (let i = 0; i < doc.content.length; i++) { - if (blockPlainText(doc.content[i]).includes(opts.anchorText)) { - return [ - { node: doc, index: -1 }, - { node: doc.content[i], index: i }, - ]; - } + const i = findAnchorTextIndex(doc.content, opts.anchorText); + if (i !== -1) { + return [ + { node: doc, index: -1 }, + { node: doc.content[i], index: i }, + ]; } } @@ -540,13 +567,13 @@ export function insertNodeRelative( return { doc: out, inserted }; } - // Resolve by text: only top-level doc.content blocks are scanned. + // Resolve by text: only top-level doc.content blocks are scanned. Exact + // match wins; a markdown-stripped fallback is tried only on a miss. if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) { - for (let i = 0; i < out.content.length; i++) { - if (blockPlainText(out.content[i]).includes(opts.anchorText)) { - out.content.splice(i + offset, 0, fresh); - return { doc: out, inserted: true }; - } + const i = findAnchorTextIndex(out.content, opts.anchorText); + if (i !== -1) { + out.content.splice(i + offset, 0, fresh); + return { doc: out, inserted: true }; } } diff --git a/packages/mcp/src/lib/text-normalize.ts b/packages/mcp/src/lib/text-normalize.ts new file mode 100644 index 00000000..73a764e4 --- /dev/null +++ b/packages/mcp/src/lib/text-normalize.ts @@ -0,0 +1,78 @@ +/** + * Locator normalization: strip inline markdown wrappers and trailing + * decoration from a LOCATOR string so a find/anchor that the model wrote with + * markdown (or a stray emoji) can still match the document's plain text. + * + * This is used ONLY as a fallback for LOCATING (after an exact match fails); + * it is never applied to replacement text or inserted node content, so no + * formatting is ever lost. + */ + +/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */ +const MAX_PASSES = 8; + +/** + * Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so + * `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is + * non-greedy and capture group 1 is the inner text. Applied repeatedly until + * the string stops changing (nested wrappers like `**_x_**`). + */ +const WRAPPER_PATTERNS: RegExp[] = [ + /\*\*([^*]+?)\*\*/g, // **x** + /__([^_]+?)__/g, // __x__ + /~~([^~]+?)~~/g, // ~~x~~ + /\*([^*]+?)\*/g, // *x* + /_([^_]+?)_/g, // _x_ + /``([^`]+?)``/g, // ``x`` + /`([^`]+?)`/g, // `x` +]; + +/** + * Conservatively strip inline markdown from a locator string. + * + * Deterministic, order-fixed steps: + * 1. Links/images: `[text](url)` -> `text`, `![alt](src)` -> `alt`. + * 2. Balanced inline wrappers (strong before emphasis, code, strikethrough), + * applied repeatedly until stable for nested cases. + * 3. Trim leading/trailing decoration only: whitespace, leftover marker chars + * (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,` + * etc.) are NEVER trimmed. + * + * If the result is empty (e.g. the input was only markers like `***`), the + * ORIGINAL string is returned so a locator can never normalize down to "" and + * match everything. + */ +export function stripInlineMarkdown(s: string): string { + if (typeof s !== "string" || s.length === 0) return s; + + let out = s; + + // 1. Links/images -> their visible text. `!?` covers both forms. + out = out.replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1"); + + // 2. Strip balanced wrappers, repeating until the string is stable so nested + // wrappers (`**_x_**`) and adjacent runs both collapse. + for (let pass = 0; pass < MAX_PASSES; pass++) { + const before = out; + for (const re of WRAPPER_PATTERNS) { + out = out.replace(re, "$1"); + } + if (out === before) break; + } + + // 3. Trim leading/trailing decoration: whitespace, leftover markdown markers, + // and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the + // regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT + // Extended_Pictographic). The `u` flag enables the Unicode property escape. + // Anchored runs only — interior text and sentence punctuation are untouched. + const DECORATION = + "[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+"; + out = out + .replace(new RegExp("^" + DECORATION, "u"), "") + .replace(new RegExp(DECORATION + "$", "u"), ""); + + // 4. Never normalize a locator down to nothing. + if (out.length === 0) return s; + + return out; +} diff --git a/packages/mcp/test/unit/json-edit.test.mjs b/packages/mcp/test/unit/json-edit.test.mjs index d1958b38..e55a0b3f 100644 --- a/packages/mcp/test/unit/json-edit.test.mjs +++ b/packages/mcp/test/unit/json-edit.test.mjs @@ -318,3 +318,107 @@ test("input doc is not mutated", () => { assert.notEqual(out, input); assert.equal(out.content[0].content[0].text, "changed source"); }); + +// --------------------------------------------------------------------------- +// Markdown-normalization fallback (locating only; replace is always literal) +// --------------------------------------------------------------------------- + +test("markdown-wrapped find matches via normalization, preserving the mark", () => { + // The document renders "Hello world" with "world" bold. The model's locator + // "**world**" has no verbatim match, so the stripped form "world" is used. + const input = doc( + paragraph( + textNode("Hello "), + textNode("world", { marks: [{ type: "bold" }] }), + ), + ); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "**world**", replace: "earth" }, + ]); + + assert.equal(failed.length, 0); + assert.equal(results.length, 1); + assert.equal(results[0].find, "**world**"); // original is reported back + assert.equal(results[0].replacements, 1); + assert.equal(results[0].normalized, true); + + // The bold mark is preserved on the replacement (inherited from the match). + const para = out.content[0]; + const bold = para.content.find((n) => n.text === "earth"); + assert.deepEqual(bold.marks, [{ type: "bold" }]); +}); + +test("exact match wins: literal '2 * 3' matches without normalization", () => { + const input = doc(paragraph(textNode("compute 2 * 3 now"))); + + const { results, failed } = applyTextEdits(input, [ + { find: "2 * 3", replace: "6" }, + ]); + + assert.equal(failed.length, 0); + assert.equal(results.length, 1); + assert.equal(results[0].replacements, 1); + // No normalization was needed/used. + assert.ok(!results[0].normalized); +}); + +test("normalization yielding >1 matches without replaceAll is an ambiguity failure", () => { + // Two bold "world" blocks. The verbatim "**world**" matches nothing; the + // stripped "world" matches twice -> ambiguous, must not guess. + const input = doc( + paragraph(textNode("world", { marks: [{ type: "bold" }] })), + paragraph(textNode("world", { marks: [{ type: "bold" }] })), + ); + + const { results, failed } = applyTextEdits(input, [ + { find: "**world**", replace: "earth" }, + ]); + + assert.equal(results.length, 0); + assert.equal(failed.length, 1); + assert.match(failed[0].reason, /matches/); +}); + +test("stripped locator that only matches across an atom is refused (atom-aware reason)", () => { + // paragraph: "a" "b", so blockPlain is "ab" (U+FFFC is the + // atom placeholder). The locator is markdown-wrapped, so the verbatim form + // "**ab**" never matches; its stripped form "ab" has no atom-free + // valid match either, BUT a raw substring scan of the block (atoms included) + // DOES hit the stripped needle. That exercises the existsAcrossAtom branch on + // the STRIPPED needle: the edit is refused with the atom-aware reason and the + // document is left unchanged. + const input = doc( + paragraph( + textNode("a"), + { type: "hardBreak" }, + textNode("b"), + ), + ); + const snapshot = JSON.parse(JSON.stringify(input)); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "**ab**", replace: "z" }, + ]); + + assert.deepEqual(results, []); + assert.equal(failed.length, 1); + assert.match(failed[0].reason, /non-text inline node/); + assert.deepEqual(out, snapshot); +}); + +test("genuine miss appends a 'Closest block text' hint", () => { + const input = doc( + paragraph(textNode("The quick brown fox jumps over the lazy dog")), + ); + + // No verbatim/stripped match, but the longest token "jumps" exists in the + // block, so a bounded "closest text" hint is appended. + const { failed } = applyTextEdits(input, [ + { find: "fox jumps now", replace: "x" }, + ]); + + assert.equal(failed.length, 1); + assert.match(failed[0].reason, /Closest block text/); + assert.match(failed[0].reason, /quick brown fox/); +}); diff --git a/packages/mcp/test/unit/node-ops.test.mjs b/packages/mcp/test/unit/node-ops.test.mjs index cf4341ee..155b99a0 100644 --- a/packages/mcp/test/unit/node-ops.test.mjs +++ b/packages/mcp/test/unit/node-ops.test.mjs @@ -400,3 +400,90 @@ test("insertNodeRelative does NOT mutate input (deep-equal snapshot)", () => { assert.deepEqual(input, snap); assert.notEqual(out, input); }); + +// --------------------------------------------------------------------------- +// anchorText markdown-normalization fallback (locating only) +// --------------------------------------------------------------------------- + +test("insertNodeRelative before by markdown-wrapped anchorText matches the plain block", () => { + const input = doc( + para("p-1", textNode("alpha")), + para("p-2", textNode("beta")), + ); + const node = para("new", textNode("NEW")); + // "**beta**" has no verbatim match; the stripped "beta" matches "p-2". + const { doc: out, inserted } = insertNodeRelative(input, node, { + position: "before", + anchorText: "**beta**", + }); + assert.equal(inserted, true); + assert.deepEqual( + out.content.map((n) => n.attrs.id), + ["p-1", "new", "p-2"], + ); +}); + +test("insertNodeRelative after by markdown-wrapped anchorText matches the plain block", () => { + const input = doc( + para("p-1", textNode("alpha")), + para("p-2", textNode("beta")), + ); + const node = para("new", textNode("NEW")); + const { doc: out, inserted } = insertNodeRelative(input, node, { + position: "after", + anchorText: "**alpha**", + }); + assert.equal(inserted, true); + assert.deepEqual( + out.content.map((n) => n.attrs.id), + ["p-1", "new", "p-2"], + ); +}); + +test("insertNodeRelative anchorText with markdown AND a trailing emoji matches the plain block", () => { + const input = doc( + para("p-1", textNode("alpha")), + para("p-2", textNode("beta")), + ); + const node = para("new", textNode("NEW")); + const { doc: out, inserted } = insertNodeRelative(input, node, { + position: "before", + anchorText: "**beta** ✨", + }); + assert.equal(inserted, true); + assert.deepEqual( + out.content.map((n) => n.attrs.id), + ["p-1", "new", "p-2"], + ); +}); + +test("insertNodeRelative exact anchorText still wins (no normalization)", () => { + // A block literally contains "a*b"; the exact anchor must match it directly. + const input = doc( + para("p-1", textNode("a*b")), + para("p-2", textNode("beta")), + ); + const node = para("new", textNode("NEW")); + const { doc: out, inserted } = insertNodeRelative(input, node, { + position: "after", + anchorText: "a*b", + }); + assert.equal(inserted, true); + assert.deepEqual( + out.content.map((n) => n.attrs.id), + ["p-1", "new", "p-2"], + ); +}); + +test("insertNodeRelative truly-missing anchor still returns inserted:false", () => { + const input = doc( + para("p-1", textNode("alpha")), + para("p-2", textNode("beta")), + ); + const node = para("new", textNode("NEW")); + const { inserted } = insertNodeRelative(input, node, { + position: "before", + anchorText: "**gamma**", + }); + assert.equal(inserted, false); +}); diff --git a/packages/mcp/test/unit/text-normalize.test.mjs b/packages/mcp/test/unit/text-normalize.test.mjs new file mode 100644 index 00000000..9a20ed5b --- /dev/null +++ b/packages/mcp/test/unit/text-normalize.test.mjs @@ -0,0 +1,42 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { stripInlineMarkdown } from "../../build/lib/text-normalize.js"; + +test("strips strong wrappers", () => { + assert.equal(stripInlineMarkdown("**в полном порядке**"), "в полном порядке"); +}); + +test("strips emphasis and trims a trailing emoji, keeps sentence punctuation", () => { + assert.equal(stripInlineMarkdown("*Конец.* ✨"), "Конец."); +}); + +test("strips inline code", () => { + assert.equal(stripInlineMarkdown("`code`"), "code"); +}); + +test("links collapse to their visible text", () => { + assert.equal(stripInlineMarkdown("[t](http://x)"), "t"); +}); + +test("a plain string is unchanged", () => { + assert.equal(stripInlineMarkdown("just plain text"), "just plain text"); +}); + +test("a string of only markers returns the original", () => { + assert.equal(stripInlineMarkdown("***"), "***"); +}); + +test("nested wrappers collapse to the inner text", () => { + assert.equal(stripInlineMarkdown("**_x_**"), "x"); +}); + +test("image syntax collapses to its alt text", () => { + assert.equal(stripInlineMarkdown("![alt](src)"), "alt"); +}); + +test("a trailing flag emoji is trimmed", () => { + // Regional-indicator flags are not Extended_Pictographic, so this guards the + // explicit U+1F1E6–U+1F1FF range in the decoration-trim class. + assert.equal(stripInlineMarkdown("hello 🇺🇸").trim(), "hello"); +});