Locators (edit_page_text `find`, insert_node `anchorText`) are matched against the document's plain text, so a model-supplied locator carrying markdown wrappers (**bold**, *italic*, `code`, [t](url)) or trailing emoji never matched and the edit/insert failed. Add stripInlineMarkdown() and a fallback: try the locator verbatim first (exact match wins, so literal asterisks/underscores still work), and only on zero matches retry with a markdown-stripped form. The ambiguity guard runs on the post-fallback count, and `replace` / inserted node content are never stripped, so no formatting is lost. Failed edits gain an atom-aware reason plus a bounded "closest block text" hint; the insert_node "anchor not found" error now points at plain-text anchors / anchorNodeId. New packages/mcp/src/lib/text-normalize.ts (+ unit tests); wired into json-edit.ts and node-ops.ts; tool descriptions updated. Tests: 212 pass.
425 lines
15 KiB
JavaScript
425 lines
15 KiB
JavaScript
import { test } from "node:test";
|
|
import assert from "node:assert/strict";
|
|
|
|
import { applyTextEdits } from "../../build/lib/json-edit.js";
|
|
|
|
// Helpers to build small ProseMirror docs.
|
|
const textNode = (text, extra = {}) => ({ type: "text", text, ...extra });
|
|
const paragraph = (...children) => ({ type: "paragraph", content: children });
|
|
const doc = (...children) => ({ type: "doc", content: children });
|
|
|
|
test("single-match replace preserves ids/marks and reports replacements===1", () => {
|
|
const input = doc({
|
|
type: "paragraph",
|
|
attrs: { id: "para-1" },
|
|
content: [
|
|
textNode("Hello world", { marks: [{ type: "bold" }] }),
|
|
],
|
|
});
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "world", replace: "there" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "world", replacements: 1 }]);
|
|
|
|
const para = out.content[0];
|
|
// Paragraph id attribute is preserved.
|
|
assert.equal(para.attrs.id, "para-1");
|
|
const tnode = para.content[0];
|
|
// Text node marks are preserved.
|
|
assert.deepEqual(tnode.marks, [{ type: "bold" }]);
|
|
assert.equal(tnode.text, "Hello there");
|
|
});
|
|
|
|
test("zero match is reported via failed[], doc unchanged", () => {
|
|
const input = doc(paragraph(textNode("Hello world")));
|
|
const snapshot = JSON.parse(JSON.stringify(input));
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "absent", replace: "x" },
|
|
]);
|
|
|
|
assert.deepEqual(results, []);
|
|
assert.equal(failed.length, 1);
|
|
assert.match(failed[0].reason, /not found/);
|
|
// Doc is structurally unchanged (modulo deep-copy identity).
|
|
assert.deepEqual(out, snapshot);
|
|
});
|
|
|
|
test("text split across two text nodes (one bold) now applies, marks preserved", () => {
|
|
// "Hello world" is split: "Hello " (plain) + "world" (bold). No single text
|
|
// node contains "Hello world", but the block-level matcher spans them.
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("Hello "),
|
|
textNode("world", { marks: [{ type: "bold" }] }),
|
|
),
|
|
);
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "Hello world", replace: "Hello there" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "Hello world", replacements: 1 }]);
|
|
assert.deepEqual(failed, []);
|
|
|
|
// The unchanged prefix "Hello " stays plain; the changed region "world" was
|
|
// uniformly bold, so the replacement "there" stays bold.
|
|
const para = out.content[0];
|
|
assert.equal(para.content.length, 2);
|
|
assert.equal(para.content[0].text, "Hello ");
|
|
assert.equal(para.content[0].marks, undefined);
|
|
assert.equal(para.content[1].text, "there");
|
|
assert.deepEqual(para.content[1].marks, [{ type: "bold" }]);
|
|
});
|
|
|
|
test("multi-match without replaceAll is reported via failed[], doc unchanged", () => {
|
|
// "ab" appears twice inside a single text node.
|
|
const input = doc(paragraph(textNode("ab cd ab")));
|
|
const snapshot = JSON.parse(JSON.stringify(input));
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "ab", replace: "x" },
|
|
]);
|
|
|
|
assert.deepEqual(results, []);
|
|
assert.equal(failed.length, 1);
|
|
assert.match(failed[0].reason, /matches/);
|
|
assert.deepEqual(out, snapshot);
|
|
});
|
|
|
|
test("cross-run replace with mixed marks inherits left-neighbor marks", () => {
|
|
// The matched region "BC" is split: "B" bold, "C" italic — non-uniform marks,
|
|
// and the replacement "X" shares no common prefix/suffix with "BC", so the
|
|
// inserted text inherits the left neighbor's marks. Here the left neighbor of
|
|
// the changed region is "A" (plain), so "X" must be plain.
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("A"),
|
|
textNode("B", { marks: [{ type: "bold" }] }),
|
|
textNode("C", { marks: [{ type: "italic" }] }),
|
|
textNode("D"),
|
|
),
|
|
);
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "BC", replace: "X" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "BC", replacements: 1 }]);
|
|
assert.deepEqual(failed, []);
|
|
|
|
// "A" + "X"(plain) + "D" coalesce into a single plain text node "AXD".
|
|
const para = out.content[0];
|
|
assert.equal(para.content.length, 1);
|
|
assert.equal(para.content[0].text, "AXD");
|
|
assert.equal(para.content[0].marks, undefined);
|
|
});
|
|
|
|
test("cross-run replace at block start inherits [] marks", () => {
|
|
// The whole block content is the mixed-mark match "BC" with no left neighbor,
|
|
// so inserted text falls through to the right neighbor / [] (block start).
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("B", { marks: [{ type: "bold" }] }),
|
|
textNode("C", { marks: [{ type: "italic" }] }),
|
|
),
|
|
);
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "BC", replace: "X" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "BC", replacements: 1 }]);
|
|
const para = out.content[0];
|
|
assert.equal(para.content.length, 1);
|
|
assert.equal(para.content[0].text, "X");
|
|
assert.equal(para.content[0].marks, undefined);
|
|
});
|
|
|
|
test("partial batch: good edits apply, the bad one goes to failed[]", () => {
|
|
const input = doc(paragraph(textNode("alpha beta gamma")));
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "alpha", replace: "ALPHA" },
|
|
{ find: "absent", replace: "X" },
|
|
{ find: "gamma", replace: "GAMMA" },
|
|
]);
|
|
|
|
// The 2 matching edits applied; the missing one is reported.
|
|
assert.deepEqual(results, [
|
|
{ find: "alpha", replacements: 1 },
|
|
{ find: "gamma", replacements: 1 },
|
|
]);
|
|
assert.equal(failed.length, 1);
|
|
assert.equal(failed[0].find, "absent");
|
|
assert.match(failed[0].reason, /not found/);
|
|
assert.equal(out.content[0].content[0].text, "ALPHA beta GAMMA");
|
|
});
|
|
|
|
test("a match that crosses an atom is refused, doc unchanged", () => {
|
|
// paragraph: "a" <hardBreak> "b". A find of "ab" spans the hardBreak atom,
|
|
// so it is not a valid match: a match range may not contain an atom slot.
|
|
// The edit lands in failed[] (reason: atom-specific OR not-found) and the
|
|
// document is left unchanged.
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("a"),
|
|
{ type: "hardBreak" },
|
|
textNode("b"),
|
|
),
|
|
);
|
|
const snapshot = JSON.parse(JSON.stringify(input));
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "ab", replace: "z" },
|
|
]);
|
|
|
|
assert.deepEqual(results, []);
|
|
assert.equal(failed.length, 1);
|
|
assert.match(failed[0].reason, /non-text inline node|not found/);
|
|
assert.deepEqual(out, snapshot);
|
|
});
|
|
|
|
test("a TEXT node containing a literal U+FFFC matches/replaces normally", () => {
|
|
// The U+FFFC OBJECT REPLACEMENT CHARACTER is the placeholder for atom slots,
|
|
// but a real text node may legitimately contain that code unit. Such a slot
|
|
// has no `.atom`, so it must match and replace like any other character —
|
|
// proving atoms and literal-U+FFFC text are distinguished.
|
|
const input = doc(paragraph(textNode("xy")));
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "xy", replace: "done" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "xy", replacements: 1 }]);
|
|
assert.deepEqual(failed, []);
|
|
assert.equal(out.content[0].content[0].text, "done");
|
|
});
|
|
|
|
test("a no-op edit (find === replace) produces a doc deep-equal to the input", () => {
|
|
// find === replace "applies" but changes nothing: the produced document must
|
|
// be structurally identical to the input (this is what lets the client skip
|
|
// the collaboration write and avoid a spurious history version).
|
|
const input = doc(paragraph(textNode("unchanged text")));
|
|
const snapshot = JSON.parse(JSON.stringify(input));
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "unchanged", replace: "unchanged" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "unchanged", replacements: 1 }]);
|
|
// Deep-equal to the input despite the edit being reported as applied.
|
|
assert.deepEqual(out, snapshot);
|
|
});
|
|
|
|
test("replaceAll replaces all occurrences", () => {
|
|
const input = doc(
|
|
paragraph(textNode("foo and foo")),
|
|
paragraph(textNode("more foo")),
|
|
);
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "foo", replace: "bar", replaceAll: true },
|
|
]);
|
|
|
|
// 2 in the first paragraph, 1 in the second = 3 total.
|
|
assert.deepEqual(results, [{ find: "foo", replacements: 3 }]);
|
|
assert.equal(out.content[0].content[0].text, "bar and bar");
|
|
assert.equal(out.content[1].content[0].text, "more bar");
|
|
});
|
|
|
|
test("replacement containing $&, $1, $$ is inserted LITERALLY (regression)", () => {
|
|
const input = doc(paragraph(textNode("token here")));
|
|
|
|
const literal = "price $& cost $1 dollars $$ end";
|
|
const { doc: out } = applyTextEdits(input, [
|
|
{ find: "token", replace: literal },
|
|
]);
|
|
|
|
// The replacement must appear verbatim, NOT regex-expanded.
|
|
assert.equal(out.content[0].content[0].text, `${literal} here`);
|
|
// Be explicit that the find text was not re-injected via $&.
|
|
assert.ok(out.content[0].content[0].text.includes("$&"));
|
|
assert.ok(!out.content[0].content[0].text.includes("token"));
|
|
});
|
|
|
|
test("$ patterns are inserted literally under replaceAll too", () => {
|
|
const input = doc(paragraph(textNode("x and x")));
|
|
|
|
const { doc: out } = applyTextEdits(input, [
|
|
{ find: "x", replace: "$&$1$$", replaceAll: true },
|
|
]);
|
|
|
|
assert.equal(out.content[0].content[0].text, "$&$1$$ and $&$1$$");
|
|
});
|
|
|
|
test("empty replacement prunes the emptied text node", () => {
|
|
// A paragraph whose only text node becomes empty: the node must be pruned.
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("DELETE", { marks: [{ type: "italic" }] }),
|
|
textNode(" kept"),
|
|
),
|
|
);
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "DELETE", replace: "" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [{ find: "DELETE", replacements: 1 }]);
|
|
const para = out.content[0];
|
|
// The emptied first text node is gone; only the " kept" node remains.
|
|
assert.equal(para.content.length, 1);
|
|
assert.equal(para.content[0].text, " kept");
|
|
});
|
|
|
|
test("multi-edit array applied in order", () => {
|
|
const input = doc(paragraph(textNode("alpha beta")));
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "alpha", replace: "ALPHA" },
|
|
{ find: "beta", replace: "BETA" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [
|
|
{ find: "alpha", replacements: 1 },
|
|
{ find: "beta", replacements: 1 },
|
|
]);
|
|
assert.equal(out.content[0].content[0].text, "ALPHA BETA");
|
|
});
|
|
|
|
test("second edit can target text produced by the first (ordered application)", () => {
|
|
const input = doc(paragraph(textNode("one")));
|
|
|
|
const { doc: out, results } = applyTextEdits(input, [
|
|
{ find: "one", replace: "two" },
|
|
{ find: "two", replace: "three" },
|
|
]);
|
|
|
|
assert.deepEqual(results, [
|
|
{ find: "one", replacements: 1 },
|
|
{ find: "two", replacements: 1 },
|
|
]);
|
|
assert.equal(out.content[0].content[0].text, "three");
|
|
});
|
|
|
|
test("input doc is not mutated", () => {
|
|
const input = doc(paragraph(textNode("immutable source")));
|
|
const snapshot = JSON.parse(JSON.stringify(input));
|
|
|
|
const { doc: out } = applyTextEdits(input, [
|
|
{ find: "immutable", replace: "changed" },
|
|
]);
|
|
|
|
// Original is untouched; the returned doc is a distinct object.
|
|
assert.deepEqual(input, snapshot);
|
|
assert.notEqual(out, input);
|
|
assert.equal(out.content[0].content[0].text, "changed source");
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Markdown-normalization fallback (locating only; replace is always literal)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
test("markdown-wrapped find matches via normalization, preserving the mark", () => {
|
|
// The document renders "Hello world" with "world" bold. The model's locator
|
|
// "**world**" has no verbatim match, so the stripped form "world" is used.
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("Hello "),
|
|
textNode("world", { marks: [{ type: "bold" }] }),
|
|
),
|
|
);
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "**world**", replace: "earth" },
|
|
]);
|
|
|
|
assert.equal(failed.length, 0);
|
|
assert.equal(results.length, 1);
|
|
assert.equal(results[0].find, "**world**"); // original is reported back
|
|
assert.equal(results[0].replacements, 1);
|
|
assert.equal(results[0].normalized, true);
|
|
|
|
// The bold mark is preserved on the replacement (inherited from the match).
|
|
const para = out.content[0];
|
|
const bold = para.content.find((n) => n.text === "earth");
|
|
assert.deepEqual(bold.marks, [{ type: "bold" }]);
|
|
});
|
|
|
|
test("exact match wins: literal '2 * 3' matches without normalization", () => {
|
|
const input = doc(paragraph(textNode("compute 2 * 3 now")));
|
|
|
|
const { results, failed } = applyTextEdits(input, [
|
|
{ find: "2 * 3", replace: "6" },
|
|
]);
|
|
|
|
assert.equal(failed.length, 0);
|
|
assert.equal(results.length, 1);
|
|
assert.equal(results[0].replacements, 1);
|
|
// No normalization was needed/used.
|
|
assert.ok(!results[0].normalized);
|
|
});
|
|
|
|
test("normalization yielding >1 matches without replaceAll is an ambiguity failure", () => {
|
|
// Two bold "world" blocks. The verbatim "**world**" matches nothing; the
|
|
// stripped "world" matches twice -> ambiguous, must not guess.
|
|
const input = doc(
|
|
paragraph(textNode("world", { marks: [{ type: "bold" }] })),
|
|
paragraph(textNode("world", { marks: [{ type: "bold" }] })),
|
|
);
|
|
|
|
const { results, failed } = applyTextEdits(input, [
|
|
{ find: "**world**", replace: "earth" },
|
|
]);
|
|
|
|
assert.equal(results.length, 0);
|
|
assert.equal(failed.length, 1);
|
|
assert.match(failed[0].reason, /matches/);
|
|
});
|
|
|
|
test("stripped locator that only matches across an atom is refused (atom-aware reason)", () => {
|
|
// paragraph: "a" <hardBreak> "b", so blockPlain is "ab" (U+FFFC is the
|
|
// atom placeholder). The locator is markdown-wrapped, so the verbatim form
|
|
// "**ab**" never matches; its stripped form "ab" has no atom-free
|
|
// valid match either, BUT a raw substring scan of the block (atoms included)
|
|
// DOES hit the stripped needle. That exercises the existsAcrossAtom branch on
|
|
// the STRIPPED needle: the edit is refused with the atom-aware reason and the
|
|
// document is left unchanged.
|
|
const input = doc(
|
|
paragraph(
|
|
textNode("a"),
|
|
{ type: "hardBreak" },
|
|
textNode("b"),
|
|
),
|
|
);
|
|
const snapshot = JSON.parse(JSON.stringify(input));
|
|
|
|
const { doc: out, results, failed } = applyTextEdits(input, [
|
|
{ find: "**ab**", replace: "z" },
|
|
]);
|
|
|
|
assert.deepEqual(results, []);
|
|
assert.equal(failed.length, 1);
|
|
assert.match(failed[0].reason, /non-text inline node/);
|
|
assert.deepEqual(out, snapshot);
|
|
});
|
|
|
|
test("genuine miss appends a 'Closest block text' hint", () => {
|
|
const input = doc(
|
|
paragraph(textNode("The quick brown fox jumps over the lazy dog")),
|
|
);
|
|
|
|
// No verbatim/stripped match, but the longest token "jumps" exists in the
|
|
// block, so a bounded "closest text" hint is appended.
|
|
const { failed } = applyTextEdits(input, [
|
|
{ find: "fox jumps now", replace: "x" },
|
|
]);
|
|
|
|
assert.equal(failed.length, 1);
|
|
assert.match(failed[0].reason, /Closest block text/);
|
|
assert.match(failed[0].reason, /quick brown fox/);
|
|
});
|