Files
gitmost/packages/mcp/test/unit/json-edit.test.mjs
vvzvlad afd2248a75 feat(ai-chat): tolerate markdown in edit_page_text/insert_node locators
Locators (edit_page_text `find`, insert_node `anchorText`) are matched
against the document's plain text, so a model-supplied locator carrying
markdown wrappers (**bold**, *italic*, `code`, [t](url)) or trailing emoji
never matched and the edit/insert failed. Add stripInlineMarkdown() and a
fallback: try the locator verbatim first (exact match wins, so literal
asterisks/underscores still work), and only on zero matches retry with a
markdown-stripped form. The ambiguity guard runs on the post-fallback count,
and `replace` / inserted node content are never stripped, so no formatting is
lost. Failed edits gain an atom-aware reason plus a bounded "closest block
text" hint; the insert_node "anchor not found" error now points at plain-text
anchors / anchorNodeId.

New packages/mcp/src/lib/text-normalize.ts (+ unit tests); wired into
json-edit.ts and node-ops.ts; tool descriptions updated. Tests: 212 pass.
2026-06-17 15:44:19 +03:00

425 lines
15 KiB
JavaScript

import { test } from "node:test";
import assert from "node:assert/strict";
import { applyTextEdits } from "../../build/lib/json-edit.js";
// Helpers to build small ProseMirror docs.
const textNode = (text, extra = {}) => ({ type: "text", text, ...extra });
const paragraph = (...children) => ({ type: "paragraph", content: children });
const doc = (...children) => ({ type: "doc", content: children });
test("single-match replace preserves ids/marks and reports replacements===1", () => {
const input = doc({
type: "paragraph",
attrs: { id: "para-1" },
content: [
textNode("Hello world", { marks: [{ type: "bold" }] }),
],
});
const { doc: out, results } = applyTextEdits(input, [
{ find: "world", replace: "there" },
]);
assert.deepEqual(results, [{ find: "world", replacements: 1 }]);
const para = out.content[0];
// Paragraph id attribute is preserved.
assert.equal(para.attrs.id, "para-1");
const tnode = para.content[0];
// Text node marks are preserved.
assert.deepEqual(tnode.marks, [{ type: "bold" }]);
assert.equal(tnode.text, "Hello there");
});
test("zero match is reported via failed[], doc unchanged", () => {
const input = doc(paragraph(textNode("Hello world")));
const snapshot = JSON.parse(JSON.stringify(input));
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "absent", replace: "x" },
]);
assert.deepEqual(results, []);
assert.equal(failed.length, 1);
assert.match(failed[0].reason, /not found/);
// Doc is structurally unchanged (modulo deep-copy identity).
assert.deepEqual(out, snapshot);
});
test("text split across two text nodes (one bold) now applies, marks preserved", () => {
// "Hello world" is split: "Hello " (plain) + "world" (bold). No single text
// node contains "Hello world", but the block-level matcher spans them.
const input = doc(
paragraph(
textNode("Hello "),
textNode("world", { marks: [{ type: "bold" }] }),
),
);
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "Hello world", replace: "Hello there" },
]);
assert.deepEqual(results, [{ find: "Hello world", replacements: 1 }]);
assert.deepEqual(failed, []);
// The unchanged prefix "Hello " stays plain; the changed region "world" was
// uniformly bold, so the replacement "there" stays bold.
const para = out.content[0];
assert.equal(para.content.length, 2);
assert.equal(para.content[0].text, "Hello ");
assert.equal(para.content[0].marks, undefined);
assert.equal(para.content[1].text, "there");
assert.deepEqual(para.content[1].marks, [{ type: "bold" }]);
});
test("multi-match without replaceAll is reported via failed[], doc unchanged", () => {
// "ab" appears twice inside a single text node.
const input = doc(paragraph(textNode("ab cd ab")));
const snapshot = JSON.parse(JSON.stringify(input));
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "ab", replace: "x" },
]);
assert.deepEqual(results, []);
assert.equal(failed.length, 1);
assert.match(failed[0].reason, /matches/);
assert.deepEqual(out, snapshot);
});
test("cross-run replace with mixed marks inherits left-neighbor marks", () => {
// The matched region "BC" is split: "B" bold, "C" italic — non-uniform marks,
// and the replacement "X" shares no common prefix/suffix with "BC", so the
// inserted text inherits the left neighbor's marks. Here the left neighbor of
// the changed region is "A" (plain), so "X" must be plain.
const input = doc(
paragraph(
textNode("A"),
textNode("B", { marks: [{ type: "bold" }] }),
textNode("C", { marks: [{ type: "italic" }] }),
textNode("D"),
),
);
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "BC", replace: "X" },
]);
assert.deepEqual(results, [{ find: "BC", replacements: 1 }]);
assert.deepEqual(failed, []);
// "A" + "X"(plain) + "D" coalesce into a single plain text node "AXD".
const para = out.content[0];
assert.equal(para.content.length, 1);
assert.equal(para.content[0].text, "AXD");
assert.equal(para.content[0].marks, undefined);
});
test("cross-run replace at block start inherits [] marks", () => {
// The whole block content is the mixed-mark match "BC" with no left neighbor,
// so inserted text falls through to the right neighbor / [] (block start).
const input = doc(
paragraph(
textNode("B", { marks: [{ type: "bold" }] }),
textNode("C", { marks: [{ type: "italic" }] }),
),
);
const { doc: out, results } = applyTextEdits(input, [
{ find: "BC", replace: "X" },
]);
assert.deepEqual(results, [{ find: "BC", replacements: 1 }]);
const para = out.content[0];
assert.equal(para.content.length, 1);
assert.equal(para.content[0].text, "X");
assert.equal(para.content[0].marks, undefined);
});
test("partial batch: good edits apply, the bad one goes to failed[]", () => {
const input = doc(paragraph(textNode("alpha beta gamma")));
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "alpha", replace: "ALPHA" },
{ find: "absent", replace: "X" },
{ find: "gamma", replace: "GAMMA" },
]);
// The 2 matching edits applied; the missing one is reported.
assert.deepEqual(results, [
{ find: "alpha", replacements: 1 },
{ find: "gamma", replacements: 1 },
]);
assert.equal(failed.length, 1);
assert.equal(failed[0].find, "absent");
assert.match(failed[0].reason, /not found/);
assert.equal(out.content[0].content[0].text, "ALPHA beta GAMMA");
});
test("a match that crosses an atom is refused, doc unchanged", () => {
// paragraph: "a" <hardBreak> "b". A find of "ab" spans the hardBreak atom,
// so it is not a valid match: a match range may not contain an atom slot.
// The edit lands in failed[] (reason: atom-specific OR not-found) and the
// document is left unchanged.
const input = doc(
paragraph(
textNode("a"),
{ type: "hardBreak" },
textNode("b"),
),
);
const snapshot = JSON.parse(JSON.stringify(input));
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "ab", replace: "z" },
]);
assert.deepEqual(results, []);
assert.equal(failed.length, 1);
assert.match(failed[0].reason, /non-text inline node|not found/);
assert.deepEqual(out, snapshot);
});
test("a TEXT node containing a literal U+FFFC matches/replaces normally", () => {
// The U+FFFC OBJECT REPLACEMENT CHARACTER is the placeholder for atom slots,
// but a real text node may legitimately contain that code unit. Such a slot
// has no `.atom`, so it must match and replace like any other character —
// proving atoms and literal-U+FFFC text are distinguished.
const input = doc(paragraph(textNode("xy")));
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "xy", replace: "done" },
]);
assert.deepEqual(results, [{ find: "xy", replacements: 1 }]);
assert.deepEqual(failed, []);
assert.equal(out.content[0].content[0].text, "done");
});
test("a no-op edit (find === replace) produces a doc deep-equal to the input", () => {
// find === replace "applies" but changes nothing: the produced document must
// be structurally identical to the input (this is what lets the client skip
// the collaboration write and avoid a spurious history version).
const input = doc(paragraph(textNode("unchanged text")));
const snapshot = JSON.parse(JSON.stringify(input));
const { doc: out, results } = applyTextEdits(input, [
{ find: "unchanged", replace: "unchanged" },
]);
assert.deepEqual(results, [{ find: "unchanged", replacements: 1 }]);
// Deep-equal to the input despite the edit being reported as applied.
assert.deepEqual(out, snapshot);
});
test("replaceAll replaces all occurrences", () => {
const input = doc(
paragraph(textNode("foo and foo")),
paragraph(textNode("more foo")),
);
const { doc: out, results } = applyTextEdits(input, [
{ find: "foo", replace: "bar", replaceAll: true },
]);
// 2 in the first paragraph, 1 in the second = 3 total.
assert.deepEqual(results, [{ find: "foo", replacements: 3 }]);
assert.equal(out.content[0].content[0].text, "bar and bar");
assert.equal(out.content[1].content[0].text, "more bar");
});
test("replacement containing $&, $1, $$ is inserted LITERALLY (regression)", () => {
const input = doc(paragraph(textNode("token here")));
const literal = "price $& cost $1 dollars $$ end";
const { doc: out } = applyTextEdits(input, [
{ find: "token", replace: literal },
]);
// The replacement must appear verbatim, NOT regex-expanded.
assert.equal(out.content[0].content[0].text, `${literal} here`);
// Be explicit that the find text was not re-injected via $&.
assert.ok(out.content[0].content[0].text.includes("$&"));
assert.ok(!out.content[0].content[0].text.includes("token"));
});
test("$ patterns are inserted literally under replaceAll too", () => {
const input = doc(paragraph(textNode("x and x")));
const { doc: out } = applyTextEdits(input, [
{ find: "x", replace: "$&$1$$", replaceAll: true },
]);
assert.equal(out.content[0].content[0].text, "$&$1$$ and $&$1$$");
});
test("empty replacement prunes the emptied text node", () => {
// A paragraph whose only text node becomes empty: the node must be pruned.
const input = doc(
paragraph(
textNode("DELETE", { marks: [{ type: "italic" }] }),
textNode(" kept"),
),
);
const { doc: out, results } = applyTextEdits(input, [
{ find: "DELETE", replace: "" },
]);
assert.deepEqual(results, [{ find: "DELETE", replacements: 1 }]);
const para = out.content[0];
// The emptied first text node is gone; only the " kept" node remains.
assert.equal(para.content.length, 1);
assert.equal(para.content[0].text, " kept");
});
test("multi-edit array applied in order", () => {
const input = doc(paragraph(textNode("alpha beta")));
const { doc: out, results } = applyTextEdits(input, [
{ find: "alpha", replace: "ALPHA" },
{ find: "beta", replace: "BETA" },
]);
assert.deepEqual(results, [
{ find: "alpha", replacements: 1 },
{ find: "beta", replacements: 1 },
]);
assert.equal(out.content[0].content[0].text, "ALPHA BETA");
});
test("second edit can target text produced by the first (ordered application)", () => {
const input = doc(paragraph(textNode("one")));
const { doc: out, results } = applyTextEdits(input, [
{ find: "one", replace: "two" },
{ find: "two", replace: "three" },
]);
assert.deepEqual(results, [
{ find: "one", replacements: 1 },
{ find: "two", replacements: 1 },
]);
assert.equal(out.content[0].content[0].text, "three");
});
test("input doc is not mutated", () => {
const input = doc(paragraph(textNode("immutable source")));
const snapshot = JSON.parse(JSON.stringify(input));
const { doc: out } = applyTextEdits(input, [
{ find: "immutable", replace: "changed" },
]);
// Original is untouched; the returned doc is a distinct object.
assert.deepEqual(input, snapshot);
assert.notEqual(out, input);
assert.equal(out.content[0].content[0].text, "changed source");
});
// ---------------------------------------------------------------------------
// Markdown-normalization fallback (locating only; replace is always literal)
// ---------------------------------------------------------------------------
test("markdown-wrapped find matches via normalization, preserving the mark", () => {
// The document renders "Hello world" with "world" bold. The model's locator
// "**world**" has no verbatim match, so the stripped form "world" is used.
const input = doc(
paragraph(
textNode("Hello "),
textNode("world", { marks: [{ type: "bold" }] }),
),
);
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "**world**", replace: "earth" },
]);
assert.equal(failed.length, 0);
assert.equal(results.length, 1);
assert.equal(results[0].find, "**world**"); // original is reported back
assert.equal(results[0].replacements, 1);
assert.equal(results[0].normalized, true);
// The bold mark is preserved on the replacement (inherited from the match).
const para = out.content[0];
const bold = para.content.find((n) => n.text === "earth");
assert.deepEqual(bold.marks, [{ type: "bold" }]);
});
test("exact match wins: literal '2 * 3' matches without normalization", () => {
const input = doc(paragraph(textNode("compute 2 * 3 now")));
const { results, failed } = applyTextEdits(input, [
{ find: "2 * 3", replace: "6" },
]);
assert.equal(failed.length, 0);
assert.equal(results.length, 1);
assert.equal(results[0].replacements, 1);
// No normalization was needed/used.
assert.ok(!results[0].normalized);
});
test("normalization yielding >1 matches without replaceAll is an ambiguity failure", () => {
// Two bold "world" blocks. The verbatim "**world**" matches nothing; the
// stripped "world" matches twice -> ambiguous, must not guess.
const input = doc(
paragraph(textNode("world", { marks: [{ type: "bold" }] })),
paragraph(textNode("world", { marks: [{ type: "bold" }] })),
);
const { results, failed } = applyTextEdits(input, [
{ find: "**world**", replace: "earth" },
]);
assert.equal(results.length, 0);
assert.equal(failed.length, 1);
assert.match(failed[0].reason, /matches/);
});
test("stripped locator that only matches across an atom is refused (atom-aware reason)", () => {
// paragraph: "a" <hardBreak> "b", so blockPlain is "ab" (U+FFFC is the
// atom placeholder). The locator is markdown-wrapped, so the verbatim form
// "**ab**" never matches; its stripped form "ab" has no atom-free
// valid match either, BUT a raw substring scan of the block (atoms included)
// DOES hit the stripped needle. That exercises the existsAcrossAtom branch on
// the STRIPPED needle: the edit is refused with the atom-aware reason and the
// document is left unchanged.
const input = doc(
paragraph(
textNode("a"),
{ type: "hardBreak" },
textNode("b"),
),
);
const snapshot = JSON.parse(JSON.stringify(input));
const { doc: out, results, failed } = applyTextEdits(input, [
{ find: "**ab**", replace: "z" },
]);
assert.deepEqual(results, []);
assert.equal(failed.length, 1);
assert.match(failed[0].reason, /non-text inline node/);
assert.deepEqual(out, snapshot);
});
test("genuine miss appends a 'Closest block text' hint", () => {
const input = doc(
paragraph(textNode("The quick brown fox jumps over the lazy dog")),
);
// No verbatim/stripped match, but the longest token "jumps" exists in the
// block, so a bounded "closest text" hint is appended.
const { failed } = applyTextEdits(input, [
{ find: "fox jumps now", replace: "x" },
]);
assert.equal(failed.length, 1);
assert.match(failed[0].reason, /Closest block text/);
assert.match(failed[0].reason, /quick brown fox/);
});