feat(ai-chat): tolerate markdown in edit_page_text/insert_node locators
Locators (edit_page_text `find`, insert_node `anchorText`) are matched against the document's plain text, so a model-supplied locator carrying markdown wrappers (**bold**, *italic*, `code`, [t](url)) or trailing emoji never matched and the edit/insert failed. Add stripInlineMarkdown() and a fallback: try the locator verbatim first (exact match wins, so literal asterisks/underscores still work), and only on zero matches retry with a markdown-stripped form. The ambiguity guard runs on the post-fallback count, and `replace` / inserted node content are never stripped, so no formatting is lost. Failed edits gain an atom-aware reason plus a bounded "closest block text" hint; the insert_node "anchor not found" error now points at plain-text anchors / anchorNodeId. New packages/mcp/src/lib/text-normalize.ts (+ unit tests); wired into json-edit.ts and node-ops.ts; tool descriptions updated. Tests: 212 pass.
This commit is contained in:
@@ -638,7 +638,10 @@ export class AiChatToolsService {
|
||||
'editing inside a bold word keeps the new text bold). Each find must ' +
|
||||
'match exactly once unless replaceAll is set. The batch applies what ' +
|
||||
'it can and returns applied[] + failed[]; a fully-unmatched batch ' +
|
||||
'writes nothing and errors. Examples: edits:[{find:"teh",replace:"the"}]; ' +
|
||||
'writes nothing and errors. find should be the literal rendered text ' +
|
||||
'(no markdown). Markdown wrappers (**bold**, *italic*, `code`) and ' +
|
||||
'trailing emoji are tolerated via a strip-and-retry fallback, but ' +
|
||||
'plain text is preferred. Examples: edits:[{find:"teh",replace:"the"}]; ' +
|
||||
'edits:[{find:"Hello world",replace:"Hello there"}] (crosses a bold ' +
|
||||
'boundary). Reversible: the previous version is kept in page history.',
|
||||
inputSchema: z.object({
|
||||
@@ -730,7 +733,12 @@ export class AiChatToolsService {
|
||||
anchorText: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Anchor text fragment (for before/after).'),
|
||||
.describe(
|
||||
'Anchor text fragment (for before/after), matched against the ' +
|
||||
"block's literal rendered plain text (no markdown). " +
|
||||
'Markdown/emoji are tolerated as a fallback; prefer plain text ' +
|
||||
'or anchorNodeId.',
|
||||
),
|
||||
}),
|
||||
execute: async ({
|
||||
pageId,
|
||||
|
||||
@@ -1258,7 +1258,13 @@ export class DocmostClient {
|
||||
const anchorDesc = opts.anchorNodeId
|
||||
? `anchorNodeId "${opts.anchorNodeId}"`
|
||||
: `anchorText "${opts.anchorText}"`;
|
||||
throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}`);
|
||||
// anchorText is matched against the block's literal RENDERED plain text;
|
||||
// markdown/emoji are tolerated only as a strip-and-retry fallback, so a
|
||||
// miss usually means the text differs from what's on the page.
|
||||
const hint = opts.anchorText
|
||||
? ' anchorText must be the block\'s literal rendered plain text (no markdown wrappers or emoji); anchorNodeId from get_page_json is more reliable.'
|
||||
: "";
|
||||
throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}.${hint}`);
|
||||
}
|
||||
return { success: true, inserted: true, position: opts.position };
|
||||
}
|
||||
|
||||
@@ -324,7 +324,10 @@ export function createDocmostMcpServer(config) {
|
||||
"keeps it bold; editing inside a bold word keeps the new text bold). " +
|
||||
"Each `find` must match exactly once (or set replaceAll). The batch " +
|
||||
"applies what it can and returns applied[] + failed[]; a fully-unmatched " +
|
||||
"batch writes nothing and errors. Examples: edits:[{find:\"teh\"," +
|
||||
"batch writes nothing and errors. `find` should be the literal rendered " +
|
||||
"text (no markdown). Markdown wrappers (**bold**, *italic*, `code`) and " +
|
||||
"trailing emoji are tolerated via a strip-and-retry fallback, but plain " +
|
||||
"text is preferred. Examples: edits:[{find:\"teh\"," +
|
||||
"replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " +
|
||||
"(crosses a bold boundary). This is the preferred tool for fixing " +
|
||||
"wording, typos, numbers, names.",
|
||||
@@ -391,7 +394,10 @@ export function createDocmostMcpServer(config) {
|
||||
"INSIDE the target table — anchorNodeId of any block/cell in it, or " +
|
||||
"anchorText matching the table; to add a tableCell/tableHeader, use " +
|
||||
"anchorNodeId of a block inside the target row (anchorText only resolves " +
|
||||
"top-level blocks, so it cannot target a row). Note: append is top-level " +
|
||||
"top-level blocks, so it cannot target a row). `anchorText` is matched " +
|
||||
"against the block's literal rendered plain text (no markdown); " +
|
||||
"markdown/emoji are tolerated as a fallback; prefer plain text or " +
|
||||
"anchorNodeId. Note: append is top-level " +
|
||||
"only and rejects structural table nodes. Example node: a paragraph " +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
|
||||
'heading {"type":"heading","attrs":{"level":2},"content":' +
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
* keeps the inserted text bold. This is the safe alternative to a full markdown
|
||||
* re-import for small wording fixes.
|
||||
*/
|
||||
import { stripInlineMarkdown } from "./text-normalize.js";
|
||||
/** Placeholder code unit standing in for one opaque (non-text) inline node. */
|
||||
const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER
|
||||
/**
|
||||
@@ -241,21 +242,66 @@ export function applyTextEdits(doc, edits) {
|
||||
// and the splicing.
|
||||
const blockChars = blocks.map((b) => flattenBlock(b));
|
||||
const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join(""));
|
||||
const validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find));
|
||||
// EXACT MATCH WINS: try the verbatim locator first.
|
||||
let effectiveFind = edit.find;
|
||||
let normalized = false;
|
||||
let validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find));
|
||||
let total = 0;
|
||||
for (const positions of validPerBlock)
|
||||
total += positions.length;
|
||||
// FALLBACK: only if the verbatim locator matched nothing, retry with the
|
||||
// markdown-stripped form. `edit.replace` is never touched — this only
|
||||
// changes what we LOCATE, not what we insert.
|
||||
const stripped = stripInlineMarkdown(edit.find);
|
||||
if (total === 0 && stripped !== edit.find && stripped.length > 0) {
|
||||
const strippedPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], stripped));
|
||||
let strippedTotal = 0;
|
||||
for (const positions of strippedPerBlock)
|
||||
strippedTotal += positions.length;
|
||||
if (strippedTotal >= 1) {
|
||||
validPerBlock = strippedPerBlock;
|
||||
total = strippedTotal;
|
||||
effectiveFind = stripped;
|
||||
normalized = true;
|
||||
}
|
||||
}
|
||||
if (total === 0) {
|
||||
// Distinguish "the text exists but only across an atom" from a plain
|
||||
// not-found: if a raw substring scan (atoms included) WOULD have hit,
|
||||
// the only thing blocking the edit is the atom, so report that.
|
||||
const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1);
|
||||
failed.push({
|
||||
find: edit.find,
|
||||
reason: existsAcrossAtom
|
||||
? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes."
|
||||
: "text not found in the document.",
|
||||
});
|
||||
// not-found: if a raw substring scan (atoms included) WOULD have hit —
|
||||
// for EITHER the verbatim or the stripped locator — the only thing
|
||||
// blocking the edit is the atom, so report that.
|
||||
const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1 ||
|
||||
(stripped !== edit.find && plain.indexOf(stripped) !== -1));
|
||||
let reason;
|
||||
if (existsAcrossAtom) {
|
||||
reason =
|
||||
"match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes.";
|
||||
}
|
||||
else {
|
||||
// Append a bounded "closest text" hint: find the FIRST block that
|
||||
// contains the longest whitespace-delimited token (>= 3 chars) of the
|
||||
// (stripped, then raw) locator, and quote that block's plain text.
|
||||
reason = "text not found in the document.";
|
||||
const tokenSource = stripped.length > 0 ? stripped : edit.find;
|
||||
const longestToken = tokenSource
|
||||
.split(/\s+/)
|
||||
.filter((t) => t.length >= 3)
|
||||
.sort((a, b) => b.length - a.length)[0];
|
||||
if (longestToken) {
|
||||
const hitBlock = blockPlain.find((plain) => plain.includes(longestToken));
|
||||
if (hitBlock) {
|
||||
// Truncate by code point (spread iterates by code point) so a
|
||||
// surrogate pair is never split; append the ellipsis only when the
|
||||
// text was actually longer than the limit.
|
||||
const points = [...hitBlock];
|
||||
const snippet = points.length > 120
|
||||
? points.slice(0, 120).join("") + "…"
|
||||
: hitBlock;
|
||||
reason += ` Closest block text: "${snippet}".`;
|
||||
}
|
||||
}
|
||||
}
|
||||
failed.push({ find: edit.find, reason });
|
||||
continue;
|
||||
}
|
||||
if (total > 1 && !edit.replaceAll) {
|
||||
@@ -287,16 +333,28 @@ export function applyTextEdits(doc, edits) {
|
||||
if (!edit.replaceAll && takenFirst)
|
||||
break;
|
||||
}
|
||||
// Apply the splices block-by-block and re-tokenize changed blocks.
|
||||
// Apply the splices block-by-block and re-tokenize changed blocks. The
|
||||
// local edit uses `effectiveFind` (verbatim or normalized) so the
|
||||
// prefix/suffix diff is computed against the ACTUALLY matched text, while
|
||||
// `edit.replace` stays literal — never stripped.
|
||||
const effectiveEdit = {
|
||||
find: effectiveFind,
|
||||
replace: edit.replace,
|
||||
replaceAll: edit.replaceAll,
|
||||
};
|
||||
let spliced = 0;
|
||||
for (let b = 0; b < blocks.length; b++) {
|
||||
if (plannedPerBlock[b].length === 0)
|
||||
continue;
|
||||
const { newChars, spliced: n } = applyEditToChars(blockChars[b], edit, plannedPerBlock[b]);
|
||||
const { newChars, spliced: n } = applyEditToChars(blockChars[b], effectiveEdit, plannedPerBlock[b]);
|
||||
spliced += n;
|
||||
blocks[b].content = tokenizeChars(newChars);
|
||||
}
|
||||
results.push({ find: edit.find, replacements: spliced });
|
||||
// Keep `find: edit.find` (the original) so the caller can correlate.
|
||||
const result = { find: edit.find, replacements: spliced };
|
||||
if (normalized)
|
||||
result.normalized = true;
|
||||
results.push(result);
|
||||
}
|
||||
// Safety net: drop any empty text nodes (ProseMirror forbids them). The
|
||||
// re-tokenizer never emits empty text nodes, but untouched blocks could in
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
import { stripInlineMarkdown } from "./text-normalize.js";
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
@@ -325,6 +326,33 @@ const REQUIRED_CONTAINER = {
|
||||
tableCell: "tableRow",
|
||||
tableHeader: "tableRow",
|
||||
};
|
||||
/**
|
||||
* Find the index of the first TOP-LEVEL block whose plain text includes the
|
||||
* anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches.
|
||||
*
|
||||
* Two passes preserve "exact wins globally":
|
||||
* - Pass 1: first block containing the verbatim `anchorText`.
|
||||
* - Pass 2 (only if pass 1 found nothing): first block containing the
|
||||
* markdown-stripped anchor, when stripping actually changed it.
|
||||
*/
|
||||
function findAnchorTextIndex(content, anchorText) {
|
||||
if (!Array.isArray(content))
|
||||
return -1;
|
||||
// Pass 1: exact.
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(anchorText))
|
||||
return i;
|
||||
}
|
||||
// Pass 2: markdown-stripped fallback.
|
||||
const a = stripInlineMarkdown(anchorText);
|
||||
if (a !== anchorText && a.length > 0) {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(a))
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
/**
|
||||
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||
* including the matched node). Each chain entry is `{ node, index }` where
|
||||
@@ -355,16 +383,16 @@ function findAnchorChain(doc, opts) {
|
||||
return search(doc, -1, []);
|
||||
}
|
||||
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||
// Exact match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
|
||||
const i = findAnchorTextIndex(doc.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
return [
|
||||
{ node: doc, index: -1 },
|
||||
{ node: doc.content[i], index: i },
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
@@ -472,15 +500,15 @@ export function insertNodeRelative(doc, node, opts) {
|
||||
}
|
||||
return { doc: out, inserted };
|
||||
}
|
||||
// Resolve by text: only top-level doc.content blocks are scanned.
|
||||
// Resolve by text: only top-level doc.content blocks are scanned. Exact
|
||||
// match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||
for (let i = 0; i < out.content.length; i++) {
|
||||
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
|
||||
const i = findAnchorTextIndex(out.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
out.content.splice(i + offset, 0, fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
// ===========================================================================
|
||||
|
||||
71
packages/mcp/build/lib/text-normalize.js
Normal file
71
packages/mcp/build/lib/text-normalize.js
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Locator normalization: strip inline markdown wrappers and trailing
|
||||
* decoration from a LOCATOR string so a find/anchor that the model wrote with
|
||||
* markdown (or a stray emoji) can still match the document's plain text.
|
||||
*
|
||||
* This is used ONLY as a fallback for LOCATING (after an exact match fails);
|
||||
* it is never applied to replacement text or inserted node content, so no
|
||||
* formatting is ever lost.
|
||||
*/
|
||||
/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */
|
||||
const MAX_PASSES = 8;
|
||||
/**
|
||||
* Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so
|
||||
* `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is
|
||||
* non-greedy and capture group 1 is the inner text. Applied repeatedly until
|
||||
* the string stops changing (nested wrappers like `**_x_**`).
|
||||
*/
|
||||
const WRAPPER_PATTERNS = [
|
||||
/\*\*([^*]+?)\*\*/g, // **x**
|
||||
/__([^_]+?)__/g, // __x__
|
||||
/~~([^~]+?)~~/g, // ~~x~~
|
||||
/\*([^*]+?)\*/g, // *x*
|
||||
/_([^_]+?)_/g, // _x_
|
||||
/``([^`]+?)``/g, // ``x``
|
||||
/`([^`]+?)`/g, // `x`
|
||||
];
|
||||
/**
|
||||
* Conservatively strip inline markdown from a locator string.
|
||||
*
|
||||
* Deterministic, order-fixed steps:
|
||||
* 1. Links/images: `[text](url)` -> `text`, `` -> `alt`.
|
||||
* 2. Balanced inline wrappers (strong before emphasis, code, strikethrough),
|
||||
* applied repeatedly until stable for nested cases.
|
||||
* 3. Trim leading/trailing decoration only: whitespace, leftover marker chars
|
||||
* (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,`
|
||||
* etc.) are NEVER trimmed.
|
||||
*
|
||||
* If the result is empty (e.g. the input was only markers like `***`), the
|
||||
* ORIGINAL string is returned so a locator can never normalize down to "" and
|
||||
* match everything.
|
||||
*/
|
||||
export function stripInlineMarkdown(s) {
|
||||
if (typeof s !== "string" || s.length === 0)
|
||||
return s;
|
||||
let out = s;
|
||||
// 1. Links/images -> their visible text. `!?` covers both forms.
|
||||
out = out.replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1");
|
||||
// 2. Strip balanced wrappers, repeating until the string is stable so nested
|
||||
// wrappers (`**_x_**`) and adjacent runs both collapse.
|
||||
for (let pass = 0; pass < MAX_PASSES; pass++) {
|
||||
const before = out;
|
||||
for (const re of WRAPPER_PATTERNS) {
|
||||
out = out.replace(re, "$1");
|
||||
}
|
||||
if (out === before)
|
||||
break;
|
||||
}
|
||||
// 3. Trim leading/trailing decoration: whitespace, leftover markdown markers,
|
||||
// and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the
|
||||
// regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT
|
||||
// Extended_Pictographic). The `u` flag enables the Unicode property escape.
|
||||
// Anchored runs only — interior text and sentence punctuation are untouched.
|
||||
const DECORATION = "[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+";
|
||||
out = out
|
||||
.replace(new RegExp("^" + DECORATION, "u"), "")
|
||||
.replace(new RegExp(DECORATION + "$", "u"), "");
|
||||
// 4. Never normalize a locator down to nothing.
|
||||
if (out.length === 0)
|
||||
return s;
|
||||
return out;
|
||||
}
|
||||
@@ -1559,8 +1559,14 @@ export class DocmostClient {
|
||||
const anchorDesc = opts.anchorNodeId
|
||||
? `anchorNodeId "${opts.anchorNodeId}"`
|
||||
: `anchorText "${opts.anchorText}"`;
|
||||
// anchorText is matched against the block's literal RENDERED plain text;
|
||||
// markdown/emoji are tolerated only as a strip-and-retry fallback, so a
|
||||
// miss usually means the text differs from what's on the page.
|
||||
const hint = opts.anchorText
|
||||
? ' anchorText must be the block\'s literal rendered plain text (no markdown wrappers or emoji); anchorNodeId from get_page_json is more reliable.'
|
||||
: "";
|
||||
throw new Error(
|
||||
`insert_node: anchor not found (${anchorDesc}) on page ${pageId}`,
|
||||
`insert_node: anchor not found (${anchorDesc}) on page ${pageId}.${hint}`,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -461,7 +461,10 @@ server.registerTool(
|
||||
"keeps it bold; editing inside a bold word keeps the new text bold). " +
|
||||
"Each `find` must match exactly once (or set replaceAll). The batch " +
|
||||
"applies what it can and returns applied[] + failed[]; a fully-unmatched " +
|
||||
"batch writes nothing and errors. Examples: edits:[{find:\"teh\"," +
|
||||
"batch writes nothing and errors. `find` should be the literal rendered " +
|
||||
"text (no markdown). Markdown wrappers (**bold**, *italic*, `code`) and " +
|
||||
"trailing emoji are tolerated via a strip-and-retry fallback, but plain " +
|
||||
"text is preferred. Examples: edits:[{find:\"teh\"," +
|
||||
"replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " +
|
||||
"(crosses a bold boundary). This is the preferred tool for fixing " +
|
||||
"wording, typos, numbers, names.",
|
||||
@@ -542,7 +545,10 @@ server.registerTool(
|
||||
"INSIDE the target table — anchorNodeId of any block/cell in it, or " +
|
||||
"anchorText matching the table; to add a tableCell/tableHeader, use " +
|
||||
"anchorNodeId of a block inside the target row (anchorText only resolves " +
|
||||
"top-level blocks, so it cannot target a row). Note: append is top-level " +
|
||||
"top-level blocks, so it cannot target a row). `anchorText` is matched " +
|
||||
"against the block's literal rendered plain text (no markdown); " +
|
||||
"markdown/emoji are tolerated as a fallback; prefer plain text or " +
|
||||
"anchorNodeId. Note: append is top-level " +
|
||||
"only and rejects structural table nodes. Example node: a paragraph " +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
|
||||
'heading {"type":"heading","attrs":{"level":2},"content":' +
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
* re-import for small wording fixes.
|
||||
*/
|
||||
|
||||
import { stripInlineMarkdown } from "./text-normalize.js";
|
||||
|
||||
export interface TextEdit {
|
||||
find: string;
|
||||
replace: string;
|
||||
@@ -22,6 +24,8 @@ export interface TextEdit {
|
||||
export interface TextEditResult {
|
||||
find: string;
|
||||
replacements: number;
|
||||
/** True when the match required the markdown-stripped fallback locator. */
|
||||
normalized?: boolean;
|
||||
}
|
||||
|
||||
export interface TextEditFailure {
|
||||
@@ -292,25 +296,75 @@ export function applyTextEdits(
|
||||
const blockPlain = blockChars.map((chars) =>
|
||||
chars.map((c) => c.ch).join(""),
|
||||
);
|
||||
const validPerBlock: number[][] = blockChars.map((chars, b) =>
|
||||
// EXACT MATCH WINS: try the verbatim locator first.
|
||||
let effectiveFind = edit.find;
|
||||
let normalized = false;
|
||||
let validPerBlock: number[][] = blockChars.map((chars, b) =>
|
||||
findValidMatches(chars, blockPlain[b], edit.find),
|
||||
);
|
||||
let total = 0;
|
||||
for (const positions of validPerBlock) total += positions.length;
|
||||
|
||||
// FALLBACK: only if the verbatim locator matched nothing, retry with the
|
||||
// markdown-stripped form. `edit.replace` is never touched — this only
|
||||
// changes what we LOCATE, not what we insert.
|
||||
const stripped = stripInlineMarkdown(edit.find);
|
||||
if (total === 0 && stripped !== edit.find && stripped.length > 0) {
|
||||
const strippedPerBlock: number[][] = blockChars.map((chars, b) =>
|
||||
findValidMatches(chars, blockPlain[b], stripped),
|
||||
);
|
||||
let strippedTotal = 0;
|
||||
for (const positions of strippedPerBlock) strippedTotal += positions.length;
|
||||
if (strippedTotal >= 1) {
|
||||
validPerBlock = strippedPerBlock;
|
||||
total = strippedTotal;
|
||||
effectiveFind = stripped;
|
||||
normalized = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (total === 0) {
|
||||
// Distinguish "the text exists but only across an atom" from a plain
|
||||
// not-found: if a raw substring scan (atoms included) WOULD have hit,
|
||||
// the only thing blocking the edit is the atom, so report that.
|
||||
// not-found: if a raw substring scan (atoms included) WOULD have hit —
|
||||
// for EITHER the verbatim or the stripped locator — the only thing
|
||||
// blocking the edit is the atom, so report that.
|
||||
const existsAcrossAtom = blockPlain.some(
|
||||
(plain) => plain.indexOf(edit.find) !== -1,
|
||||
(plain) =>
|
||||
plain.indexOf(edit.find) !== -1 ||
|
||||
(stripped !== edit.find && plain.indexOf(stripped) !== -1),
|
||||
);
|
||||
failed.push({
|
||||
find: edit.find,
|
||||
reason: existsAcrossAtom
|
||||
? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes."
|
||||
: "text not found in the document.",
|
||||
});
|
||||
let reason: string;
|
||||
if (existsAcrossAtom) {
|
||||
reason =
|
||||
"match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes.";
|
||||
} else {
|
||||
// Append a bounded "closest text" hint: find the FIRST block that
|
||||
// contains the longest whitespace-delimited token (>= 3 chars) of the
|
||||
// (stripped, then raw) locator, and quote that block's plain text.
|
||||
reason = "text not found in the document.";
|
||||
const tokenSource = stripped.length > 0 ? stripped : edit.find;
|
||||
const longestToken = tokenSource
|
||||
.split(/\s+/)
|
||||
.filter((t) => t.length >= 3)
|
||||
.sort((a, b) => b.length - a.length)[0];
|
||||
if (longestToken) {
|
||||
const hitBlock = blockPlain.find((plain) =>
|
||||
plain.includes(longestToken),
|
||||
);
|
||||
if (hitBlock) {
|
||||
// Truncate by code point (spread iterates by code point) so a
|
||||
// surrogate pair is never split; append the ellipsis only when the
|
||||
// text was actually longer than the limit.
|
||||
const points = [...hitBlock];
|
||||
const snippet =
|
||||
points.length > 120
|
||||
? points.slice(0, 120).join("") + "…"
|
||||
: hitBlock;
|
||||
reason += ` Closest block text: "${snippet}".`;
|
||||
}
|
||||
}
|
||||
}
|
||||
failed.push({ find: edit.find, reason });
|
||||
continue;
|
||||
}
|
||||
if (total > 1 && !edit.replaceAll) {
|
||||
@@ -341,20 +395,31 @@ export function applyTextEdits(
|
||||
if (!edit.replaceAll && takenFirst) break;
|
||||
}
|
||||
|
||||
// Apply the splices block-by-block and re-tokenize changed blocks.
|
||||
// Apply the splices block-by-block and re-tokenize changed blocks. The
|
||||
// local edit uses `effectiveFind` (verbatim or normalized) so the
|
||||
// prefix/suffix diff is computed against the ACTUALLY matched text, while
|
||||
// `edit.replace` stays literal — never stripped.
|
||||
const effectiveEdit: TextEdit = {
|
||||
find: effectiveFind,
|
||||
replace: edit.replace,
|
||||
replaceAll: edit.replaceAll,
|
||||
};
|
||||
let spliced = 0;
|
||||
for (let b = 0; b < blocks.length; b++) {
|
||||
if (plannedPerBlock[b].length === 0) continue;
|
||||
const { newChars, spliced: n } = applyEditToChars(
|
||||
blockChars[b],
|
||||
edit,
|
||||
effectiveEdit,
|
||||
plannedPerBlock[b],
|
||||
);
|
||||
spliced += n;
|
||||
blocks[b].content = tokenizeChars(newChars);
|
||||
}
|
||||
|
||||
results.push({ find: edit.find, replacements: spliced });
|
||||
// Keep `find: edit.find` (the original) so the caller can correlate.
|
||||
const result: TextEditResult = { find: edit.find, replacements: spliced };
|
||||
if (normalized) result.normalized = true;
|
||||
results.push(result);
|
||||
}
|
||||
|
||||
// Safety net: drop any empty text nodes (ProseMirror forbids them). The
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
|
||||
import { stripInlineMarkdown } from "./text-normalize.js";
|
||||
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone<T>(value: T): T {
|
||||
if (typeof structuredClone === "function") {
|
||||
@@ -364,6 +366,31 @@ const REQUIRED_CONTAINER: Record<string, string> = {
|
||||
tableHeader: "tableRow",
|
||||
};
|
||||
|
||||
/**
|
||||
* Find the index of the first TOP-LEVEL block whose plain text includes the
|
||||
* anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches.
|
||||
*
|
||||
* Two passes preserve "exact wins globally":
|
||||
* - Pass 1: first block containing the verbatim `anchorText`.
|
||||
* - Pass 2 (only if pass 1 found nothing): first block containing the
|
||||
* markdown-stripped anchor, when stripping actually changed it.
|
||||
*/
|
||||
function findAnchorTextIndex(content: any[], anchorText: string): number {
|
||||
if (!Array.isArray(content)) return -1;
|
||||
// Pass 1: exact.
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(anchorText)) return i;
|
||||
}
|
||||
// Pass 2: markdown-stripped fallback.
|
||||
const a = stripInlineMarkdown(anchorText);
|
||||
if (a !== anchorText && a.length > 0) {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(a)) return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||
* including the matched node). Each chain entry is `{ node, index }` where
|
||||
@@ -399,16 +426,16 @@ function findAnchorChain(
|
||||
}
|
||||
|
||||
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||
// Exact match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
|
||||
const i = findAnchorTextIndex(doc.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
return [
|
||||
{ node: doc, index: -1 },
|
||||
{ node: doc.content[i], index: i },
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
@@ -540,15 +567,15 @@ export function insertNodeRelative(
|
||||
return { doc: out, inserted };
|
||||
}
|
||||
|
||||
// Resolve by text: only top-level doc.content blocks are scanned.
|
||||
// Resolve by text: only top-level doc.content blocks are scanned. Exact
|
||||
// match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||
for (let i = 0; i < out.content.length; i++) {
|
||||
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
|
||||
const i = findAnchorTextIndex(out.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
out.content.splice(i + offset, 0, fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
|
||||
78
packages/mcp/src/lib/text-normalize.ts
Normal file
78
packages/mcp/src/lib/text-normalize.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
/**
|
||||
* Locator normalization: strip inline markdown wrappers and trailing
|
||||
* decoration from a LOCATOR string so a find/anchor that the model wrote with
|
||||
* markdown (or a stray emoji) can still match the document's plain text.
|
||||
*
|
||||
* This is used ONLY as a fallback for LOCATING (after an exact match fails);
|
||||
* it is never applied to replacement text or inserted node content, so no
|
||||
* formatting is ever lost.
|
||||
*/
|
||||
|
||||
/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */
|
||||
const MAX_PASSES = 8;
|
||||
|
||||
/**
|
||||
* Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so
|
||||
* `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is
|
||||
* non-greedy and capture group 1 is the inner text. Applied repeatedly until
|
||||
* the string stops changing (nested wrappers like `**_x_**`).
|
||||
*/
|
||||
const WRAPPER_PATTERNS: RegExp[] = [
|
||||
/\*\*([^*]+?)\*\*/g, // **x**
|
||||
/__([^_]+?)__/g, // __x__
|
||||
/~~([^~]+?)~~/g, // ~~x~~
|
||||
/\*([^*]+?)\*/g, // *x*
|
||||
/_([^_]+?)_/g, // _x_
|
||||
/``([^`]+?)``/g, // ``x``
|
||||
/`([^`]+?)`/g, // `x`
|
||||
];
|
||||
|
||||
/**
|
||||
* Conservatively strip inline markdown from a locator string.
|
||||
*
|
||||
* Deterministic, order-fixed steps:
|
||||
* 1. Links/images: `[text](url)` -> `text`, `` -> `alt`.
|
||||
* 2. Balanced inline wrappers (strong before emphasis, code, strikethrough),
|
||||
* applied repeatedly until stable for nested cases.
|
||||
* 3. Trim leading/trailing decoration only: whitespace, leftover marker chars
|
||||
* (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,`
|
||||
* etc.) are NEVER trimmed.
|
||||
*
|
||||
* If the result is empty (e.g. the input was only markers like `***`), the
|
||||
* ORIGINAL string is returned so a locator can never normalize down to "" and
|
||||
* match everything.
|
||||
*/
|
||||
export function stripInlineMarkdown(s: string): string {
|
||||
if (typeof s !== "string" || s.length === 0) return s;
|
||||
|
||||
let out = s;
|
||||
|
||||
// 1. Links/images -> their visible text. `!?` covers both forms.
|
||||
out = out.replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1");
|
||||
|
||||
// 2. Strip balanced wrappers, repeating until the string is stable so nested
|
||||
// wrappers (`**_x_**`) and adjacent runs both collapse.
|
||||
for (let pass = 0; pass < MAX_PASSES; pass++) {
|
||||
const before = out;
|
||||
for (const re of WRAPPER_PATTERNS) {
|
||||
out = out.replace(re, "$1");
|
||||
}
|
||||
if (out === before) break;
|
||||
}
|
||||
|
||||
// 3. Trim leading/trailing decoration: whitespace, leftover markdown markers,
|
||||
// and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the
|
||||
// regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT
|
||||
// Extended_Pictographic). The `u` flag enables the Unicode property escape.
|
||||
// Anchored runs only — interior text and sentence punctuation are untouched.
|
||||
const DECORATION =
|
||||
"[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+";
|
||||
out = out
|
||||
.replace(new RegExp("^" + DECORATION, "u"), "")
|
||||
.replace(new RegExp(DECORATION + "$", "u"), "");
|
||||
|
||||
// 4. Never normalize a locator down to nothing.
|
||||
if (out.length === 0) return s;
|
||||
|
||||
return out;
|
||||
}
|
||||
@@ -318,3 +318,107 @@ test("input doc is not mutated", () => {
|
||||
assert.notEqual(out, input);
|
||||
assert.equal(out.content[0].content[0].text, "changed source");
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Markdown-normalization fallback (locating only; replace is always literal)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("markdown-wrapped find matches via normalization, preserving the mark", () => {
|
||||
// The document renders "Hello world" with "world" bold. The model's locator
|
||||
// "**world**" has no verbatim match, so the stripped form "world" is used.
|
||||
const input = doc(
|
||||
paragraph(
|
||||
textNode("Hello "),
|
||||
textNode("world", { marks: [{ type: "bold" }] }),
|
||||
),
|
||||
);
|
||||
|
||||
const { doc: out, results, failed } = applyTextEdits(input, [
|
||||
{ find: "**world**", replace: "earth" },
|
||||
]);
|
||||
|
||||
assert.equal(failed.length, 0);
|
||||
assert.equal(results.length, 1);
|
||||
assert.equal(results[0].find, "**world**"); // original is reported back
|
||||
assert.equal(results[0].replacements, 1);
|
||||
assert.equal(results[0].normalized, true);
|
||||
|
||||
// The bold mark is preserved on the replacement (inherited from the match).
|
||||
const para = out.content[0];
|
||||
const bold = para.content.find((n) => n.text === "earth");
|
||||
assert.deepEqual(bold.marks, [{ type: "bold" }]);
|
||||
});
|
||||
|
||||
test("exact match wins: literal '2 * 3' matches without normalization", () => {
|
||||
const input = doc(paragraph(textNode("compute 2 * 3 now")));
|
||||
|
||||
const { results, failed } = applyTextEdits(input, [
|
||||
{ find: "2 * 3", replace: "6" },
|
||||
]);
|
||||
|
||||
assert.equal(failed.length, 0);
|
||||
assert.equal(results.length, 1);
|
||||
assert.equal(results[0].replacements, 1);
|
||||
// No normalization was needed/used.
|
||||
assert.ok(!results[0].normalized);
|
||||
});
|
||||
|
||||
test("normalization yielding >1 matches without replaceAll is an ambiguity failure", () => {
|
||||
// Two bold "world" blocks. The verbatim "**world**" matches nothing; the
|
||||
// stripped "world" matches twice -> ambiguous, must not guess.
|
||||
const input = doc(
|
||||
paragraph(textNode("world", { marks: [{ type: "bold" }] })),
|
||||
paragraph(textNode("world", { marks: [{ type: "bold" }] })),
|
||||
);
|
||||
|
||||
const { results, failed } = applyTextEdits(input, [
|
||||
{ find: "**world**", replace: "earth" },
|
||||
]);
|
||||
|
||||
assert.equal(results.length, 0);
|
||||
assert.equal(failed.length, 1);
|
||||
assert.match(failed[0].reason, /matches/);
|
||||
});
|
||||
|
||||
test("stripped locator that only matches across an atom is refused (atom-aware reason)", () => {
|
||||
// paragraph: "a" <hardBreak> "b", so blockPlain is "ab" (U+FFFC is the
|
||||
// atom placeholder). The locator is markdown-wrapped, so the verbatim form
|
||||
// "**ab**" never matches; its stripped form "ab" has no atom-free
|
||||
// valid match either, BUT a raw substring scan of the block (atoms included)
|
||||
// DOES hit the stripped needle. That exercises the existsAcrossAtom branch on
|
||||
// the STRIPPED needle: the edit is refused with the atom-aware reason and the
|
||||
// document is left unchanged.
|
||||
const input = doc(
|
||||
paragraph(
|
||||
textNode("a"),
|
||||
{ type: "hardBreak" },
|
||||
textNode("b"),
|
||||
),
|
||||
);
|
||||
const snapshot = JSON.parse(JSON.stringify(input));
|
||||
|
||||
const { doc: out, results, failed } = applyTextEdits(input, [
|
||||
{ find: "**ab**", replace: "z" },
|
||||
]);
|
||||
|
||||
assert.deepEqual(results, []);
|
||||
assert.equal(failed.length, 1);
|
||||
assert.match(failed[0].reason, /non-text inline node/);
|
||||
assert.deepEqual(out, snapshot);
|
||||
});
|
||||
|
||||
test("genuine miss appends a 'Closest block text' hint", () => {
|
||||
const input = doc(
|
||||
paragraph(textNode("The quick brown fox jumps over the lazy dog")),
|
||||
);
|
||||
|
||||
// No verbatim/stripped match, but the longest token "jumps" exists in the
|
||||
// block, so a bounded "closest text" hint is appended.
|
||||
const { failed } = applyTextEdits(input, [
|
||||
{ find: "fox jumps now", replace: "x" },
|
||||
]);
|
||||
|
||||
assert.equal(failed.length, 1);
|
||||
assert.match(failed[0].reason, /Closest block text/);
|
||||
assert.match(failed[0].reason, /quick brown fox/);
|
||||
});
|
||||
|
||||
@@ -400,3 +400,90 @@ test("insertNodeRelative does NOT mutate input (deep-equal snapshot)", () => {
|
||||
assert.deepEqual(input, snap);
|
||||
assert.notEqual(out, input);
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// anchorText markdown-normalization fallback (locating only)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("insertNodeRelative before by markdown-wrapped anchorText matches the plain block", () => {
|
||||
const input = doc(
|
||||
para("p-1", textNode("alpha")),
|
||||
para("p-2", textNode("beta")),
|
||||
);
|
||||
const node = para("new", textNode("NEW"));
|
||||
// "**beta**" has no verbatim match; the stripped "beta" matches "p-2".
|
||||
const { doc: out, inserted } = insertNodeRelative(input, node, {
|
||||
position: "before",
|
||||
anchorText: "**beta**",
|
||||
});
|
||||
assert.equal(inserted, true);
|
||||
assert.deepEqual(
|
||||
out.content.map((n) => n.attrs.id),
|
||||
["p-1", "new", "p-2"],
|
||||
);
|
||||
});
|
||||
|
||||
test("insertNodeRelative after by markdown-wrapped anchorText matches the plain block", () => {
|
||||
const input = doc(
|
||||
para("p-1", textNode("alpha")),
|
||||
para("p-2", textNode("beta")),
|
||||
);
|
||||
const node = para("new", textNode("NEW"));
|
||||
const { doc: out, inserted } = insertNodeRelative(input, node, {
|
||||
position: "after",
|
||||
anchorText: "**alpha**",
|
||||
});
|
||||
assert.equal(inserted, true);
|
||||
assert.deepEqual(
|
||||
out.content.map((n) => n.attrs.id),
|
||||
["p-1", "new", "p-2"],
|
||||
);
|
||||
});
|
||||
|
||||
test("insertNodeRelative anchorText with markdown AND a trailing emoji matches the plain block", () => {
|
||||
const input = doc(
|
||||
para("p-1", textNode("alpha")),
|
||||
para("p-2", textNode("beta")),
|
||||
);
|
||||
const node = para("new", textNode("NEW"));
|
||||
const { doc: out, inserted } = insertNodeRelative(input, node, {
|
||||
position: "before",
|
||||
anchorText: "**beta** ✨",
|
||||
});
|
||||
assert.equal(inserted, true);
|
||||
assert.deepEqual(
|
||||
out.content.map((n) => n.attrs.id),
|
||||
["p-1", "new", "p-2"],
|
||||
);
|
||||
});
|
||||
|
||||
test("insertNodeRelative exact anchorText still wins (no normalization)", () => {
|
||||
// A block literally contains "a*b"; the exact anchor must match it directly.
|
||||
const input = doc(
|
||||
para("p-1", textNode("a*b")),
|
||||
para("p-2", textNode("beta")),
|
||||
);
|
||||
const node = para("new", textNode("NEW"));
|
||||
const { doc: out, inserted } = insertNodeRelative(input, node, {
|
||||
position: "after",
|
||||
anchorText: "a*b",
|
||||
});
|
||||
assert.equal(inserted, true);
|
||||
assert.deepEqual(
|
||||
out.content.map((n) => n.attrs.id),
|
||||
["p-1", "new", "p-2"],
|
||||
);
|
||||
});
|
||||
|
||||
test("insertNodeRelative truly-missing anchor still returns inserted:false", () => {
|
||||
const input = doc(
|
||||
para("p-1", textNode("alpha")),
|
||||
para("p-2", textNode("beta")),
|
||||
);
|
||||
const node = para("new", textNode("NEW"));
|
||||
const { inserted } = insertNodeRelative(input, node, {
|
||||
position: "before",
|
||||
anchorText: "**gamma**",
|
||||
});
|
||||
assert.equal(inserted, false);
|
||||
});
|
||||
|
||||
42
packages/mcp/test/unit/text-normalize.test.mjs
Normal file
42
packages/mcp/test/unit/text-normalize.test.mjs
Normal file
@@ -0,0 +1,42 @@
|
||||
import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import { stripInlineMarkdown } from "../../build/lib/text-normalize.js";
|
||||
|
||||
test("strips strong wrappers", () => {
|
||||
assert.equal(stripInlineMarkdown("**в полном порядке**"), "в полном порядке");
|
||||
});
|
||||
|
||||
test("strips emphasis and trims a trailing emoji, keeps sentence punctuation", () => {
|
||||
assert.equal(stripInlineMarkdown("*Конец.* ✨"), "Конец.");
|
||||
});
|
||||
|
||||
test("strips inline code", () => {
|
||||
assert.equal(stripInlineMarkdown("`code`"), "code");
|
||||
});
|
||||
|
||||
test("links collapse to their visible text", () => {
|
||||
assert.equal(stripInlineMarkdown("[t](http://x)"), "t");
|
||||
});
|
||||
|
||||
test("a plain string is unchanged", () => {
|
||||
assert.equal(stripInlineMarkdown("just plain text"), "just plain text");
|
||||
});
|
||||
|
||||
test("a string of only markers returns the original", () => {
|
||||
assert.equal(stripInlineMarkdown("***"), "***");
|
||||
});
|
||||
|
||||
test("nested wrappers collapse to the inner text", () => {
|
||||
assert.equal(stripInlineMarkdown("**_x_**"), "x");
|
||||
});
|
||||
|
||||
test("image syntax collapses to its alt text", () => {
|
||||
assert.equal(stripInlineMarkdown(""), "alt");
|
||||
});
|
||||
|
||||
test("a trailing flag emoji is trimmed", () => {
|
||||
// Regional-indicator flags are not Extended_Pictographic, so this guards the
|
||||
// explicit U+1F1E6–U+1F1FF range in the decoration-trim class.
|
||||
assert.equal(stripInlineMarkdown("hello 🇺🇸").trim(), "hello");
|
||||
});
|
||||
Reference in New Issue
Block a user