diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts index 643b31df..7b3b0a02 100644 --- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts +++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts @@ -622,13 +622,19 @@ export class AiChatToolsService { '(so editing plain text next to a bold word keeps it bold, and ' + 'editing inside a bold word keeps the new text bold). Each find must ' + 'match exactly once unless replaceAll is set. The batch applies what ' + - 'it can and returns applied[] + failed[]; a fully-unmatched batch ' + - 'writes nothing and errors. find should be the literal rendered text ' + - '(no markdown). Markdown wrappers (**bold**, *italic*, `code`) and ' + - 'trailing emoji are tolerated via a strip-and-retry fallback, but ' + - 'plain text is preferred. Examples: edits:[{find:"teh",replace:"the"}]; ' + - 'edits:[{find:"Hello world",replace:"Hello there"}] (crosses a bold ' + - 'boundary). Reversible: the previous version is kept in page history.', + 'it can and returns applied[] + failed[] plus a verify change-report ' + + '(the text/marks/structure that ACTUALLY changed — read it to confirm ' + + 'your edit landed; do not assume success); a fully-unmatched batch ' + + 'writes nothing and errors. find and replace are LITERAL text, not ' + + 'markdown. This tool edits plain text ONLY and CANNOT add or remove ' + + 'formatting marks: a formatting change — find/replace that differ only ' + + 'in markdown markers (e.g. find:"~~x~~", replace:"x"), or a replace ' + + 'containing **bold**/~~strike~~/`code` wrappers — is REFUSED into ' + + 'failed[]. To change bold/italic/strike/code/link, read the block with ' + + 'getPageJson and use patchNode (or updatePageJson) to set its marks. ' + + 'Examples: edits:[{find:"teh",replace:"the"}]; edits:[{find:"Hello ' + + 'world",replace:"Hello there"}] (crosses a bold boundary). Reversible: ' + + 'the previous version is kept in page history.', inputSchema: z.object({ pageId: z.string().describe('The id of the page to edit.'), edits: z diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index b8111cc9..e0366a30 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -14,7 +14,7 @@ import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getN import { withPageLock } from "./lib/page-lock.js"; import { applyTextEdits, } from "./lib/json-edit.js"; import { getCollabToken, performLogin } from "./lib/auth-utils.js"; -import { diffDocs } from "./lib/diff.js"; +import { diffDocs, summarizeChange } from "./lib/diff.js"; import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js"; import vm from "node:vm"; // Supported image types, kept as two lookup tables so both a local file @@ -234,6 +234,11 @@ export class DocmostClient { * `transform` receives the live ProseMirror doc and returns the NEW full doc * to write, or `null` to abort with no write. Errors thrown by `transform` * propagate to the caller. + * + * Resolves a `MutationResult { doc, verify }` mirroring mutatePageContent, so + * every content mutator (including replaceImage) can return a verifiable + * change report. The report is computed AFTER the atomic read->write and + * never throws. */ mutateLiveContentUnlocked(pageId, collabToken, transform) { const CONNECT_TIMEOUT_MS = 25000; @@ -248,7 +253,9 @@ export class DocmostClient { let connectTimer; let persistTimer; let unsyncedHandler; - let lastWrittenDoc; + // The verifiable result resolved on every success/abort path. Set on abort + // (no-op report) and after a real write (computed change report). + let mutationResult; const cleanup = () => { if (connectTimer) clearTimeout(connectTimer); @@ -288,7 +295,7 @@ export class DocmostClient { return; } if (provider.unsyncedChanges === 0) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); return; } persistTimer = setTimeout(() => { @@ -296,7 +303,7 @@ export class DocmostClient { }, PERSIST_TIMEOUT_MS); unsyncedHandler = (data) => { if (data.number === 0 && !connectionLost) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); } }; provider.on("unsyncedChanges", unsyncedHandler); @@ -323,6 +330,7 @@ export class DocmostClient { // CRITICAL: keep everything between reading and writing the live doc // synchronous (no await) so no remote update can interleave. let newDoc; + let beforeDoc; try { let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); if (!liveDoc || @@ -330,11 +338,24 @@ export class DocmostClient { !Array.isArray(liveDoc.content)) { liveDoc = { type: "doc", content: [] }; } + // Snapshot the before-doc for the change report (safe deep clone). + beforeDoc = JSON.parse(JSON.stringify(liveDoc)); newDoc = transform(liveDoc); if (newDoc == null) { - // Transform aborted — write nothing, return the live doc. - lastWrittenDoc = liveDoc; - finish(null, liveDoc); + // Transform aborted — write nothing, return the live doc with a + // no-op change report. + mutationResult = { + doc: liveDoc, + verify: { + changed: false, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "no changes (transform aborted)", + }, + }; + finish(null, mutationResult); return; } const tempDoc = TiptapTransformer.toYdoc(newDoc, "default", docmostExtensions); @@ -350,7 +371,13 @@ export class DocmostClient { finish(e instanceof Error ? e : new Error(String(e))); return; } - lastWrittenDoc = newDoc; + // Compute the verifiable change report AFTER the transact write: it + // only needs the JSON before/after, so it cannot affect the atomic + // read->write window, and summarizeChange never throws. + mutationResult = { + doc: newDoc, + verify: summarizeChange(beforeDoc, newDoc), + }; waitForPersistence(); }, onAuthenticationFailed: () => { @@ -628,7 +655,7 @@ export class DocmostClient { // Track insertion in an outer var, reset per-transform, so a collab retry // recomputes it cleanly (mirrors insertNode's pattern). let inserted = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { inserted = false; const { doc: nd, inserted: ins } = insertTableRow(liveDoc, tableRef, cells, index); inserted = ins; @@ -639,7 +666,7 @@ export class DocmostClient { if (!inserted) { throw new Error(`table_insert_row: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); } - return { success: true, table: tableRef, inserted: true }; + return { success: true, table: tableRef, inserted: true, verify: mutation.verify }; } /** * Delete the row at 0-based `index` from a table on the LIVE collab document. @@ -650,7 +677,7 @@ export class DocmostClient { await this.ensureAuthenticated(); const collabToken = await this.getCollabTokenWithReauth(); let deleted = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { deleted = false; const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index); deleted = del; @@ -661,7 +688,7 @@ export class DocmostClient { if (!deleted) { throw new Error(`table_delete_row: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); } - return { success: true, table: tableRef, deleted: true }; + return { success: true, table: tableRef, deleted: true, verify: mutation.verify }; } /** * Set the plain-text content of cell `[row, col]` (0-based) in a table on the @@ -674,7 +701,7 @@ export class DocmostClient { await this.ensureAuthenticated(); const collabToken = await this.getCollabTokenWithReauth(); let updated = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { updated = false; const { doc: nd, updated: upd } = updateTableCell(liveDoc, tableRef, row, col, text); updated = upd; @@ -685,7 +712,7 @@ export class DocmostClient { if (!updated) { throw new Error(`table_update_cell: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); } - return { success: true, table: tableRef, row, col }; + return { success: true, table: tableRef, row, col, verify: mutation.verify }; } /** * Create a new page with title and content. @@ -781,9 +808,10 @@ export class DocmostClient { await this.client.post("/pages/update", { pageId, title }); } let collabToken = ""; + let mutation; try { collabToken = await this.getCollabTokenWithReauth(); - await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl); + mutation = await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl); } catch (error) { // Verbose diagnostics (incl. anything that could expose a token prefix) @@ -802,6 +830,7 @@ export class DocmostClient { modified: true, message: "Page updated successfully.", pageId: pageId, + verify: mutation.verify, }; } /** @@ -984,12 +1013,13 @@ export class DocmostClient { await this.client.post("/pages/update", { pageId, title }); } const collabToken = await this.getCollabTokenWithReauth(); - await replacePageContent(pageId, doc, collabToken, this.apiUrl); + const mutation = await replacePageContent(pageId, doc, collabToken, this.apiUrl); return { success: true, modified: true, message: "Page content replaced from ProseMirror JSON.", pageId, + verify: mutation.verify, }; } /** @@ -1037,7 +1067,7 @@ export class DocmostClient { const { meta, body, comments } = parseDocmostMarkdown(fullMarkdown); const doc = await markdownToProseMirror(body); const collabToken = await this.getCollabTokenWithReauth(); - await replacePageContent(pageId, doc, collabToken, this.apiUrl); + const mutation = await replacePageContent(pageId, doc, collabToken, this.apiUrl); // Collect distinct comment ids that actually became comment marks in the doc. const collectCommentIds = (node, acc) => { if (!node || typeof node !== "object") @@ -1064,6 +1094,7 @@ export class DocmostClient { pageId, anchoredCommentCount: anchoredIds.size, commentsInFile: Array.isArray(comments) ? comments.length : 0, + verify: mutation.verify, }; // Warn (non-fatal) if the file was exported from a DIFFERENT page. if (meta?.pageId && meta.pageId !== pageId) { @@ -1110,12 +1141,13 @@ export class DocmostClient { // (parity with updatePageJson; harmless for already-stored source content). this.validateDocUrls(content); const collabToken = await this.getCollabTokenWithReauth(); - await replacePageContent(targetPageId, content, collabToken, this.apiUrl); + const mutation = await replacePageContent(targetPageId, content, collabToken, this.apiUrl); return { success: true, sourcePageId, targetPageId, copiedNodes: content.content.length, + verify: mutation.verify, }; } /** @@ -1137,7 +1169,7 @@ export class DocmostClient { // we must NOT write (no spurious history version) and must not claim a write // happened. let wrote = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { wrote = false; const r = applyTextEdits(liveDoc, edits); results = r.results; @@ -1146,10 +1178,10 @@ export class DocmostClient { // return from the transform as "write nothing"). if (r.results.length === 0) return null; - // Edits "applied" but produced an identical document: skip the write so no - // new history version is created. Stable structural comparison via - // JSON.stringify (both docs come from the same deep-copied source, so key - // order is stable). + // Edits "applied" but produced an identical document: skip the write so + // no new history version is created. Stable structural comparison via + // JSON.stringify (both docs come from the same deep-copied source, so + // key order is stable). if (JSON.stringify(r.doc) === JSON.stringify(liveDoc)) return null; wrote = true; @@ -1170,9 +1202,10 @@ export class DocmostClient { applied: results, failed, message: "No changes written (edits produced identical content).", + verify: mutation.verify, }; } - return { + const result = { success: true, pageId, applied: results, @@ -1180,7 +1213,19 @@ export class DocmostClient { message: (failed?.length ?? 0) ? `Applied ${results?.length ?? 0} edit(s); ${failed.length} failed (see failed[]). Node ids and formatting preserved.` : "Text edits applied (node ids and formatting preserved).", + verify: mutation.verify, }; + // If any applied edit matched only after stripping markdown (the + // normalized fallback), warn that edit_page_text preserved existing marks + // and did NOT change formatting — so a caller who intended a formatting + // change is pointed at patch_node. + if (results?.some((r) => r.normalized === true)) { + result.warning = + "Some edits matched only after stripping markdown from your find string; " + + "edit_page_text preserved existing marks (it did not change bold/strike/etc.). " + + "If you intended a formatting change, use patch_node."; + } + return result; } /** * Replace EVERY node whose attrs.id === nodeId (recursively, including nodes @@ -1212,7 +1257,7 @@ export class DocmostClient { // Track the replacement count in an outer var, reset per-transform, so a // collab retry recomputes it cleanly (mirrors replaceImage's pattern). let replaced = 0; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { replaced = 0; const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target); replaced = r; @@ -1223,7 +1268,7 @@ export class DocmostClient { if (replaced === 0) { throw new Error(`patch_node: no node with id "${nodeId}" found on page ${pageId}`); } - return { success: true, replaced, nodeId }; + return { success: true, replaced, nodeId, verify: mutation.verify }; } /** * Insert a node relative to an anchor (or append it at the top level). @@ -1262,7 +1307,7 @@ export class DocmostClient { // Track insertion in an outer var, reset per-transform, so a collab retry // recomputes it cleanly (mirrors replaceImage's pattern). let inserted = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { inserted = false; const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts); inserted = ins; @@ -1282,7 +1327,12 @@ export class DocmostClient { : ""; throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}.${hint}`); } - return { success: true, inserted: true, position: opts.position }; + return { + success: true, + inserted: true, + position: opts.position, + verify: mutation.verify, + }; } /** * Remove EVERY node whose attrs.id === nodeId (recursively, including nodes @@ -1296,7 +1346,7 @@ export class DocmostClient { // Track the deletion count in an outer var, reset per-transform, so a // collab retry recomputes it cleanly (mirrors replaceImage's pattern). let deleted = 0; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { deleted = 0; const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId); deleted = d; @@ -1307,7 +1357,7 @@ export class DocmostClient { if (deleted === 0) { throw new Error(`delete_node: no node with id "${nodeId}" found on page ${pageId}`); } - return { success: true, deleted, nodeId }; + return { success: true, deleted, nodeId, verify: mutation.verify }; } /** Build the public share URL for a page. */ shareUrl(shareKey, slugId) { @@ -1479,7 +1529,7 @@ export class DocmostClient { let anchored = false; try { const collabToken = await this.getCollabTokenWithReauth(); - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { const doc = liveDoc && liveDoc.type === "doc" ? liveDoc : { type: "doc", content: [] }; @@ -1544,6 +1594,7 @@ export class DocmostClient { // exists). Abort the write so nothing changes. return null; }); + result.verify = mutation.verify; } catch (e) { // The comment record already exists; an anchoring failure must not turn @@ -1925,7 +1976,7 @@ export class DocmostClient { // concurrent edits/comments/images are preserved and parallel insert_image // calls (serialized by the per-page lock) each see the previous insertion. let placement; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { const doc = liveDoc && liveDoc.type === "doc" ? liveDoc : { type: "doc", content: [] }; @@ -1989,6 +2040,7 @@ export class DocmostClient { attachmentId: up.attachmentId, src: up.src, placement, + verify: mutation.verify, }; } /** @@ -2088,7 +2140,7 @@ export class DocmostClient { walk(node.content); } }; - await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { + const mutation = await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { // Reset per-transform so collab retries recompute cleanly (no double-count). replaced = 0; const doc = liveDoc && liveDoc.type === "doc" @@ -2101,6 +2153,11 @@ export class DocmostClient { return null; // no match -> skip the write entirely return doc; }); + // KNOWN LIMITATION: a same-count image SRC swap (image count unchanged, no + // text/mark change) may still report verify.changed === false, because the + // text+marks+integrity-count model in summarizeChange does not inspect + // image `src`/attachmentId attributes. That is acceptable here — the + // replace is confirmed by `replaced` below, and verify is supplementary. if (replaced === 0) { // The pass-1 SCAN found the target (matchFound was true) and we already // uploaded the new attachment, but pass-2 matched nothing — a concurrent @@ -2118,6 +2175,7 @@ export class DocmostClient { src: up.src, orphanedAttachmentId: up.attachmentId, warning: "target image was removed concurrently; uploaded attachment is unreferenced", + verify: mutation.verify, }; } return { @@ -2127,6 +2185,7 @@ export class DocmostClient { oldAttachmentId, newAttachmentId: up.attachmentId, src: up.src, + verify: mutation.verify, }; }); } @@ -2178,8 +2237,12 @@ export class DocmostClient { // JSON write path) before writing it back. this.validateDocUrls(version.content); const collabToken = await this.getCollabTokenWithReauth(); - await mutatePageContent(version.pageId, collabToken, this.apiUrl, () => version.content); - return { pageId: version.pageId, restoredFrom: historyId }; + const mutation = await mutatePageContent(version.pageId, collabToken, this.apiUrl, () => version.content); + return { + pageId: version.pageId, + restoredFrom: historyId, + verify: mutation.verify, + }; } /** * Diff two versions of a page and return a Docmost-equivalent change set. @@ -2332,7 +2395,7 @@ export class DocmostClient { } // Apply atomically against the live doc. const collabToken = await this.getCollabTokenWithReauth(); - await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform); + const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform); // Optionally delete consumed comments (best-effort; a delete failure must // not undo the successful write). const deletedComments = []; @@ -2366,6 +2429,7 @@ export class DocmostClient { diff: diffDocs(oldDoc, newDoc), deletedComments, log: ctx.log, + verify: mutation.verify, }; } } diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index d759a53a..b9ed854c 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -330,7 +330,11 @@ export function createDocmostMcpServer(config) { "text is preferred. Examples: edits:[{find:\"teh\"," + "replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " + "(crosses a bold boundary). This is the preferred tool for fixing " + - "wording, typos, numbers, names.", + "wording, typos, numbers, names. It edits plain text only and CANNOT " + + "change formatting marks: formatting changes (markdown markers in " + + "find/replace) are refused — use patch_node/update_page_json to change " + + "marks. The result includes a `verify` change-report of what actually " + + "changed (text/block/mark deltas).", inputSchema: { pageId: z.string().describe("ID of the page to edit"), edits: z diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index 6c7386e1..7b47b9e9 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -8,6 +8,7 @@ import { JSDOM } from "jsdom"; import { docmostExtensions } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +import { summarizeChange } from "./diff.js"; // Setup DOM environment for Tiptap HTML parsing in Node.js const dom = new JSDOM(""); global.window = dom.window; @@ -345,7 +346,11 @@ const PERSIST_TIMEOUT_MS = 20000; * ProseMirror doc to write, or `null` to abort with no write (a no-op). If * `transform` throws, the error is propagated to the caller (not swallowed). * - * Returns the doc that was written, or the live doc when the transform aborted. + * Resolves a `MutationResult { doc, verify }`: `doc` is the doc that was + * written (or the live doc when the transform aborted), and `verify` is a + * verifiable change report (text/block/mark deltas) of what actually changed. + * The report is computed AFTER the atomic read->write, so it never widens the + * read->write window, and it never throws (it can NEVER break a write). */ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) { return withPageLock(pageId, () => { @@ -415,7 +420,7 @@ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) return; } if (provider.unsyncedChanges === 0) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); return; } persistTimer = setTimeout(() => { @@ -427,12 +432,14 @@ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) // the counter back to 0 without our write being re-transmitted; in // that case let the disconnect/close error win instead. if (data.number === 0 && !connectionLost) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); } }; provider.on("unsyncedChanges", unsyncedHandler); }; - let lastWrittenDoc; + // The verifiable result resolved on every success/abort path. Set on + // abort (no-op report) and after a real write (computed change report). + let mutationResult; provider = new HocuspocusProvider({ url: wsUrl, name: `page.${pageId}`, @@ -478,6 +485,7 @@ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) // not yielded, no incoming remote update can interleave, so any // already-synced concurrent edits are preserved in liveDoc. let newDoc; + let beforeDoc; try { let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); if (!liveDoc || @@ -485,11 +493,25 @@ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) !Array.isArray(liveDoc.content)) { liveDoc = { type: "doc", content: [] }; } + // Snapshot the before-doc for the change report. Docs are + // JSON-serializable, so this is a safe deep clone. + beforeDoc = JSON.parse(JSON.stringify(liveDoc)); newDoc = transform(liveDoc); if (newDoc == null) { - // Transform aborted — write nothing, return the live doc. - lastWrittenDoc = liveDoc; - finish(null, liveDoc); + // Transform aborted — write nothing, return the live doc with a + // no-op change report. + mutationResult = { + doc: liveDoc, + verify: { + changed: false, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "no changes (transform aborted)", + }, + }; + finish(null, mutationResult); return; } const tempDoc = buildYDoc(newDoc); @@ -509,7 +531,13 @@ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) finish(e instanceof Error ? e : new Error(String(e))); return; } - lastWrittenDoc = newDoc; + // Compute the verifiable change report AFTER the transact write: it + // only needs the JSON before/after, so it cannot affect the atomic + // read->write window, and summarizeChange never throws. + mutationResult = { + doc: newDoc, + verify: summarizeChange(beforeDoc, newDoc), + }; if (process.env.DEBUG) console.error("Content written, waiting for server to persist..."); waitForPersistence(); @@ -540,7 +568,7 @@ export async function replacePageContent(pageId, prosemirrorDoc, collabToken, ba prosemirrorDoc.type !== "doc") { throw new Error("replacePageContent: invalid ProseMirror document"); } - await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc); + return await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc); } /** * Markdown update path (kept for backwards compatibility). @@ -549,5 +577,5 @@ export async function replacePageContent(pageId, prosemirrorDoc, collabToken, ba */ export async function updatePageContentRealtime(pageId, markdownContent, collabToken, baseUrl) { const tiptapJson = await markdownToProseMirror(markdownContent); - await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson); + return await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson); } diff --git a/packages/mcp/build/lib/diff.js b/packages/mcp/build/lib/diff.js index 5205aff1..f5e7ab44 100644 --- a/packages/mcp/build/lib/diff.js +++ b/packages/mcp/build/lib/diff.js @@ -271,3 +271,131 @@ export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примеча }; return { ...partial, markdown: renderMarkdown(partial, fellBack) }; } +/** + * Recursively walk every `text` node and tally the count of each mark by + * `mark.type` (e.g. `{ bold: 5, strike: 3, link: 2 }`). Pure and never throws. + */ +function markCounts(doc) { + const counts = {}; + const visit = (node) => { + if (!node || typeof node !== "object") + return; + if (node.type === "text" && Array.isArray(node.marks)) { + for (const m of node.marks) { + if (m && typeof m.type === "string") { + counts[m.type] = (counts[m.type] || 0) + 1; + } + } + } + if (Array.isArray(node.content)) + for (const c of node.content) + visit(c); + }; + visit(doc); + return counts; +} +/** + * Build a VerifyReport for a content mutation. Pure and never throws — on any + * internal error it returns a minimal "changed (diff unavailable)" report so it + * can NEVER break a write. + * + * `changed` is VALUE-based, not JSON-string-based: it is derived from the actual + * deltas (text chars, blocks, mark counts, structural integrity counts), so two + * value-equal docs that differ only in JSON key order report cleanly as + * `changed:false` / "no content change" rather than a misleading +0/-0 change. + * + * The structural integrity delta (from diffDocs's `integrity` tuples) is what + * makes `changed` true for an image/table/callout/link count change that diffs + * to zero text — closing a verify blind spot for insert_image, delete_node on a + * table, etc. + */ +export function summarizeChange(before, after) { + try { + const diff = diffDocs(before, after); + // Per-mark-type delta: include a type only when its count actually changed. + const beforeMarks = markCounts(before); + const afterMarks = markCounts(after); + const marks = {}; + for (const type of new Set([ + ...Object.keys(beforeMarks), + ...Object.keys(afterMarks), + ])) { + const b = beforeMarks[type] || 0; + const a = afterMarks[type] || 0; + if (b !== a) + marks[type] = [b, a]; + } + // Structural integrity delta from diffDocs: count-based [old,new] tuples for + // images/links/tables/callouts. Include a type only when old != new. + const integrity = diff.integrity; + const structure = {}; + const countTypes = [ + "images", + "links", + "tables", + "callouts", + ]; + for (const type of countTypes) { + const [b, a] = integrity[type]; + if (b !== a) + structure[type] = [b, a]; + } + const textInserted = diff.summary.inserted; + const textDeleted = diff.summary.deleted; + const blocksChanged = diff.summary.blocksChanged; + const hasMarkDelta = Object.keys(marks).length > 0; + const hasStructureDelta = Object.keys(structure).length > 0; + // VALUE-based change decision: ignore JSON key-order no-ops entirely. + const changed = textInserted > 0 || + textDeleted > 0 || + blocksChanged > 0 || + hasMarkDelta || + hasStructureDelta; + if (!changed) { + return { + changed: false, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "no content change", + }; + } + const parts = []; + // Only mention text/blocks when they actually changed (avoid a misleading + // "+0/-0 chars, 0 block(s)" prefix on a pure mark/structure change). + if (textInserted > 0 || textDeleted > 0 || blocksChanged > 0) { + parts.push(`+${textInserted}/-${textDeleted} chars, ${blocksChanged} block(s)`); + } + const markParts = Object.entries(marks).map(([type, [b, a]]) => `${type} ${b}→${a}`); + if (markParts.length > 0) + parts.push(`marks: ${markParts.join(", ")}`); + const structureParts = Object.entries(structure).map(([type, [b, a]]) => `${type} ${b}→${a}`); + if (structureParts.length > 0) + parts.push(structureParts.join(", ")); + // `changed` is true here, so at least one group is present and parts is non-empty. + const summary = `changed: ${parts.join("; ")}`; + const report = { + changed: true, + textInserted, + textDeleted, + blocksChanged, + marks, + summary, + }; + if (hasStructureDelta) + report.structure = structure; + return report; + } + catch { + // A pathological pair must never break a write: degrade to a minimal report. + return { + changed: true, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "changed (diff unavailable)", + }; + } +} diff --git a/packages/mcp/build/lib/json-edit.js b/packages/mcp/build/lib/json-edit.js index c4d8a79e..4a98a4c5 100644 --- a/packages/mcp/build/lib/json-edit.js +++ b/packages/mcp/build/lib/json-edit.js @@ -11,7 +11,7 @@ * keeps the inserted text bold. This is the safe alternative to a full markdown * re-import for small wording fixes. */ -import { stripInlineMarkdown } from "./text-normalize.js"; +import { stripInlineMarkdown, stripBalancedWrappers } from "./text-normalize.js"; /** Placeholder code unit standing in for one opaque (non-text) inline node. */ const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER /** @@ -226,6 +226,29 @@ export function applyTextEdits(doc, edits) { for (const edit of edits) { if (!edit.find) throw new Error("edit.find must be a non-empty string"); + // HARD-REFUSE formatting changes. edit_page_text edits PLAIN TEXT only and + // writes the replacement verbatim, so it cannot add/remove marks. We refuse + // only a pure formatting TOGGLE: find and replace differ ONLY by balanced + // markdown markers (e.g. find:"~~$69~~" / replace:"$69", or find:"M5Stack" / + // replace:"**M5Stack**" which would write literal `**`). + // + // The detector is the STRICT stripBalancedWrappers, NOT the lenient locator + // stripInlineMarkdown: the lenient one also trims whitespace/emoji and + // collapses lone `*`/`_` runs, which gives false positives on ordinary + // plain-text edits (trailing-space trim, snake_case, `2 * 3 * 4`, URLs with + // underscores) and wrongly refuses them. Comparing the strict strip of both + // sides symmetrically catches every real formatting toggle while leaving + // plain text alone; a typo fix wrapped in markdown still applies because its + // stripped find != stripped replace. + const formattingOnly = edit.find !== edit.replace && + stripBalancedWrappers(edit.find) === stripBalancedWrappers(edit.replace); + if (formattingOnly) { + failed.push({ + find: edit.find, + reason: "edit_page_text edits plain text only and cannot add or remove formatting marks (bold/italic/strike/code/link); it writes the replacement as LITERAL text. This edit looks like a formatting change (markdown markers in find/replace). To change marks, read the block with get_page_json and use patch_node (or update_page_json) to set the node's marks array.", + }); + continue; + } // Gather every inline block in document order (recurse the whole tree so // nested containers — callouts, list items, table cells, blockquotes — are // all covered). diff --git a/packages/mcp/build/lib/text-normalize.js b/packages/mcp/build/lib/text-normalize.js index f1aebf4f..4db72e4b 100644 --- a/packages/mcp/build/lib/text-normalize.js +++ b/packages/mcp/build/lib/text-normalize.js @@ -24,6 +24,54 @@ const WRAPPER_PATTERNS = [ /``([^`]+?)``/g, // ``x`` /`([^`]+?)`/g, // `x` ]; +/** Links/images -> their visible text. `!?` covers both `[t](u)` and `![a](s)`. */ +const LINK_IMAGE_RE = /!?\[([^\]]*)\]\([^)]*\)/g; +/** + * Apply ONLY the two balanced/link passes shared by both normalizers: first + * collapse links/images to their visible text, then collapse balanced inline + * wrappers repeatedly until stable. Does NOT trim decoration, does NOT guard + * against an empty result — it returns exactly the transformed string. + */ +function stripWrappersAndLinks(s) { + // 1. Links/images -> their visible text. + let out = s.replace(LINK_IMAGE_RE, "$1"); + // 2. Strip balanced wrappers, repeating until the string is stable so nested + // wrappers (`**_x_**`) and adjacent runs both collapse. + for (let pass = 0; pass < MAX_PASSES; pass++) { + const before = out; + for (const re of WRAPPER_PATTERNS) { + out = out.replace(re, "$1"); + } + if (out === before) + break; + } + return out; +} +/** + * STRICT formatting detector — distinct from the lenient locator + * normalization below. It strips ONLY what unambiguously is markdown markup: + * 1. links/images `[text](url)` -> `text`, `![alt](src)` -> `alt`, and + * 2. balanced inline `**`/`__`/`~~`/`*`/`_`/`` ` `` wrappers (repeat-until-stable), + * and DELIBERATELY does NOT trim leading/trailing whitespace, emoji, or lone + * marker chars (the lenient extras `stripInlineMarkdown` does in its step 3). + * + * It exists ONLY to recognize formatting-vs-plain INTENT in `applyTextEdits` + * (deciding whether find/replace differ purely by markdown markers). Because it + * skips the lenient trimming, ordinary plain-text edits are NOT misread as + * formatting: a trailing-space trim, snake_case (`my_var_name`), math (`2 * 3`), + * and identifiers/URLs with underscores all stay untouched here (their `_x_` / + * `*x*` runs are only collapsed when actually balanced, and even then they are + * compared symmetrically, so plain text never collapses to a different string). + * + * Do NOT use this for LOCATING — the locator fallback must keep using the + * lenient `stripInlineMarkdown` (it trims stray decoration so a find still + * matches the document's plain text). + */ +export function stripBalancedWrappers(s) { + if (typeof s !== "string" || s.length === 0) + return s; + return stripWrappersAndLinks(s); +} /** * Conservatively strip inline markdown from a locator string. * @@ -42,19 +90,8 @@ const WRAPPER_PATTERNS = [ export function stripInlineMarkdown(s) { if (typeof s !== "string" || s.length === 0) return s; - let out = s; - // 1. Links/images -> their visible text. `!?` covers both forms. - out = out.replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1"); - // 2. Strip balanced wrappers, repeating until the string is stable so nested - // wrappers (`**_x_**`) and adjacent runs both collapse. - for (let pass = 0; pass < MAX_PASSES; pass++) { - const before = out; - for (const re of WRAPPER_PATTERNS) { - out = out.replace(re, "$1"); - } - if (out === before) - break; - } + // 1 + 2. Shared link/image and balanced-wrapper passes. + let out = stripWrappersAndLinks(s); // 3. Trim leading/trailing decoration: whitespace, leftover markdown markers, // and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the // regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index cbf3a31e..488e5998 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -20,6 +20,7 @@ import { mutatePageContent, buildCollabWsUrl, assertYjsEncodable, + MutationResult, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; import { @@ -45,7 +46,7 @@ import { TextEditFailure, } from "./lib/json-edit.js"; import { getCollabToken, performLogin } from "./lib/auth-utils.js"; -import { diffDocs } from "./lib/diff.js"; +import { diffDocs, summarizeChange } from "./lib/diff.js"; import { blockText, walk, @@ -328,18 +329,23 @@ export class DocmostClient { * `transform` receives the live ProseMirror doc and returns the NEW full doc * to write, or `null` to abort with no write. Errors thrown by `transform` * propagate to the caller. + * + * Resolves a `MutationResult { doc, verify }` mirroring mutatePageContent, so + * every content mutator (including replaceImage) can return a verifiable + * change report. The report is computed AFTER the atomic read->write and + * never throws. */ private mutateLiveContentUnlocked( pageId: string, collabToken: string, transform: (liveDoc: any) => any | null, - ): Promise { + ): Promise { const CONNECT_TIMEOUT_MS = 25000; const PERSIST_TIMEOUT_MS = 20000; const ydoc = new Y.Doc(); const wsUrl = buildCollabWsUrl(this.apiUrl); - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { let provider: HocuspocusProvider | undefined; let applied = false; // onSynced may fire again on reconnect — apply once. let settled = false; @@ -347,7 +353,9 @@ export class DocmostClient { let connectTimer: ReturnType | undefined; let persistTimer: ReturnType | undefined; let unsyncedHandler: ((data: { number: number }) => void) | undefined; - let lastWrittenDoc: any; + // The verifiable result resolved on every success/abort path. Set on abort + // (no-op report) and after a real write (computed change report). + let mutationResult: MutationResult; const cleanup = () => { if (connectTimer) clearTimeout(connectTimer); @@ -364,12 +372,12 @@ export class DocmostClient { } }; - const finish = (err: Error | null, value?: any) => { + const finish = (err: Error | null, value?: MutationResult) => { if (settled) return; settled = true; cleanup(); if (err) reject(err); - else resolve(value); + else resolve(value as MutationResult); }; connectTimer = setTimeout(() => { @@ -383,7 +391,7 @@ export class DocmostClient { return; } if (provider.unsyncedChanges === 0) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); return; } persistTimer = setTimeout(() => { @@ -395,7 +403,7 @@ export class DocmostClient { }, PERSIST_TIMEOUT_MS); unsyncedHandler = (data: { number: number }) => { if (data.number === 0 && !connectionLost) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); } }; provider.on("unsyncedChanges", unsyncedHandler); @@ -431,6 +439,7 @@ export class DocmostClient { // CRITICAL: keep everything between reading and writing the live doc // synchronous (no await) so no remote update can interleave. let newDoc: any; + let beforeDoc: any; try { let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); if ( @@ -441,12 +450,26 @@ export class DocmostClient { liveDoc = { type: "doc", content: [] }; } + // Snapshot the before-doc for the change report (safe deep clone). + beforeDoc = JSON.parse(JSON.stringify(liveDoc)); + newDoc = transform(liveDoc); if (newDoc == null) { - // Transform aborted — write nothing, return the live doc. - lastWrittenDoc = liveDoc; - finish(null, liveDoc); + // Transform aborted — write nothing, return the live doc with a + // no-op change report. + mutationResult = { + doc: liveDoc, + verify: { + changed: false, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "no changes (transform aborted)", + }, + }; + finish(null, mutationResult); return; } @@ -467,7 +490,13 @@ export class DocmostClient { return; } - lastWrittenDoc = newDoc; + // Compute the verifiable change report AFTER the transact write: it + // only needs the JSON before/after, so it cannot affect the atomic + // read->write window, and summarizeChange never throws. + mutationResult = { + doc: newDoc, + verify: summarizeChange(beforeDoc, newDoc), + }; waitForPersistence(); }, onAuthenticationFailed: () => { @@ -799,25 +828,30 @@ export class DocmostClient { // Track insertion in an outer var, reset per-transform, so a collab retry // recomputes it cleanly (mirrors insertNode's pattern). let inserted = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - inserted = false; - const { doc: nd, inserted: ins } = insertTableRow( - liveDoc, - tableRef, - cells, - index, - ); - inserted = ins; - if (!inserted) return null; // table not found -> skip the write entirely - return nd; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + inserted = false; + const { doc: nd, inserted: ins } = insertTableRow( + liveDoc, + tableRef, + cells, + index, + ); + inserted = ins; + if (!inserted) return null; // table not found -> skip the write entirely + return nd; + }, + ); if (!inserted) { throw new Error( `table_insert_row: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`, ); } - return { success: true, table: tableRef, inserted: true }; + return { success: true, table: tableRef, inserted: true, verify: mutation.verify }; } /** @@ -830,20 +864,25 @@ export class DocmostClient { const collabToken = await this.getCollabTokenWithReauth(); let deleted = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - deleted = false; - const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index); - deleted = del; - if (!deleted) return null; // table not found -> skip the write entirely - return nd; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + deleted = false; + const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index); + deleted = del; + if (!deleted) return null; // table not found -> skip the write entirely + return nd; + }, + ); if (!deleted) { throw new Error( `table_delete_row: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`, ); } - return { success: true, table: tableRef, deleted: true }; + return { success: true, table: tableRef, deleted: true, verify: mutation.verify }; } /** @@ -864,26 +903,31 @@ export class DocmostClient { const collabToken = await this.getCollabTokenWithReauth(); let updated = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - updated = false; - const { doc: nd, updated: upd } = updateTableCell( - liveDoc, - tableRef, - row, - col, - text, - ); - updated = upd; - if (!updated) return null; // table not found -> skip the write entirely - return nd; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + updated = false; + const { doc: nd, updated: upd } = updateTableCell( + liveDoc, + tableRef, + row, + col, + text, + ); + updated = upd; + if (!updated) return null; // table not found -> skip the write entirely + return nd; + }, + ); if (!updated) { throw new Error( `table_update_cell: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`, ); } - return { success: true, table: tableRef, row, col }; + return { success: true, table: tableRef, row, col, verify: mutation.verify }; } /** @@ -994,9 +1038,15 @@ export class DocmostClient { } let collabToken = ""; + let mutation; try { collabToken = await this.getCollabTokenWithReauth(); - await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl); + mutation = await updatePageContentRealtime( + pageId, + content, + collabToken, + this.apiUrl, + ); } catch (error: any) { // Verbose diagnostics (incl. anything that could expose a token prefix) // are gated behind DEBUG; the thrown Error below carries no token data. @@ -1018,6 +1068,7 @@ export class DocmostClient { modified: true, message: "Page updated successfully.", pageId: pageId, + verify: mutation.verify, }; } @@ -1237,13 +1288,19 @@ export class DocmostClient { } const collabToken = await this.getCollabTokenWithReauth(); - await replacePageContent(pageId, doc, collabToken, this.apiUrl); + const mutation = await replacePageContent( + pageId, + doc, + collabToken, + this.apiUrl, + ); return { success: true, modified: true, message: "Page content replaced from ProseMirror JSON.", pageId, + verify: mutation.verify, }; } @@ -1291,7 +1348,12 @@ export class DocmostClient { const { meta, body, comments } = parseDocmostMarkdown(fullMarkdown); const doc = await markdownToProseMirror(body); const collabToken = await this.getCollabTokenWithReauth(); - await replacePageContent(pageId, doc, collabToken, this.apiUrl); + const mutation = await replacePageContent( + pageId, + doc, + collabToken, + this.apiUrl, + ); // Collect distinct comment ids that actually became comment marks in the doc. const collectCommentIds = (node: any, acc: Set): Set => { if (!node || typeof node !== "object") return acc; @@ -1316,6 +1378,7 @@ export class DocmostClient { pageId, anchoredCommentCount: anchoredIds.size, commentsInFile: Array.isArray(comments) ? comments.length : 0, + verify: mutation.verify, }; // Warn (non-fatal) if the file was exported from a DIFFERENT page. if (meta?.pageId && meta.pageId !== pageId) { @@ -1374,13 +1437,19 @@ export class DocmostClient { this.validateDocUrls(content); const collabToken = await this.getCollabTokenWithReauth(); - await replacePageContent(targetPageId, content, collabToken, this.apiUrl); + const mutation = await replacePageContent( + targetPageId, + content, + collabToken, + this.apiUrl, + ); return { success: true, sourcePageId, targetPageId, copiedNodes: content.content.length, + verify: mutation.verify, }; } @@ -1405,22 +1474,27 @@ export class DocmostClient { // we must NOT write (no spurious history version) and must not claim a write // happened. let wrote = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - wrote = false; - const r = applyTextEdits(liveDoc, edits); - results = r.results; - failed = r.failed; - // Nothing applied -> abort the write (mutatePageContent treats a null - // return from the transform as "write nothing"). - if (r.results.length === 0) return null; - // Edits "applied" but produced an identical document: skip the write so no - // new history version is created. Stable structural comparison via - // JSON.stringify (both docs come from the same deep-copied source, so key - // order is stable). - if (JSON.stringify(r.doc) === JSON.stringify(liveDoc)) return null; - wrote = true; - return r.doc; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + wrote = false; + const r = applyTextEdits(liveDoc, edits); + results = r.results; + failed = r.failed; + // Nothing applied -> abort the write (mutatePageContent treats a null + // return from the transform as "write nothing"). + if (r.results.length === 0) return null; + // Edits "applied" but produced an identical document: skip the write so + // no new history version is created. Stable structural comparison via + // JSON.stringify (both docs come from the same deep-copied source, so + // key order is stable). + if (JSON.stringify(r.doc) === JSON.stringify(liveDoc)) return null; + wrote = true; + return r.doc; + }, + ); if ((results?.length ?? 0) === 0 && (failed?.length ?? 0) > 0) { // No edit applied: surface an aggregated, actionable error so the caller @@ -1440,10 +1514,11 @@ export class DocmostClient { applied: results, failed, message: "No changes written (edits produced identical content).", + verify: mutation.verify, }; } - return { + const result: any = { success: true, pageId, applied: results, @@ -1451,7 +1526,21 @@ export class DocmostClient { message: (failed?.length ?? 0) ? `Applied ${results?.length ?? 0} edit(s); ${failed!.length} failed (see failed[]). Node ids and formatting preserved.` : "Text edits applied (node ids and formatting preserved).", + verify: mutation.verify, }; + + // If any applied edit matched only after stripping markdown (the + // normalized fallback), warn that edit_page_text preserved existing marks + // and did NOT change formatting — so a caller who intended a formatting + // change is pointed at patch_node. + if (results?.some((r) => r.normalized === true)) { + result.warning = + "Some edits matched only after stripping markdown from your find string; " + + "edit_page_text preserved existing marks (it did not change bold/strike/etc.). " + + "If you intended a formatting change, use patch_node."; + } + + return result; } /** @@ -1489,13 +1578,18 @@ export class DocmostClient { // Track the replacement count in an outer var, reset per-transform, so a // collab retry recomputes it cleanly (mirrors replaceImage's pattern). let replaced = 0; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - replaced = 0; - const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target); - replaced = r; - if (replaced === 0) return null; // no match -> skip the write entirely - return nd; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + replaced = 0; + const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target); + replaced = r; + if (replaced === 0) return null; // no match -> skip the write entirely + return nd; + }, + ); if (replaced === 0) { throw new Error( @@ -1503,7 +1597,7 @@ export class DocmostClient { ); } - return { success: true, replaced, nodeId }; + return { success: true, replaced, nodeId, verify: mutation.verify }; } /** @@ -1564,13 +1658,18 @@ export class DocmostClient { // Track insertion in an outer var, reset per-transform, so a collab retry // recomputes it cleanly (mirrors replaceImage's pattern). let inserted = false; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - inserted = false; - const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts); - inserted = ins; - if (!inserted) return null; // anchor not found -> skip the write entirely - return nd; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + inserted = false; + const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts); + inserted = ins; + if (!inserted) return null; // anchor not found -> skip the write entirely + return nd; + }, + ); if (!inserted) { const anchorDesc = opts.anchorNodeId @@ -1587,7 +1686,12 @@ export class DocmostClient { ); } - return { success: true, inserted: true, position: opts.position }; + return { + success: true, + inserted: true, + position: opts.position, + verify: mutation.verify, + }; } /** @@ -1604,13 +1708,18 @@ export class DocmostClient { // Track the deletion count in an outer var, reset per-transform, so a // collab retry recomputes it cleanly (mirrors replaceImage's pattern). let deleted = 0; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { - deleted = 0; - const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId); - deleted = d; - if (deleted === 0) return null; // no match -> skip the write entirely - return nd; - }); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + deleted = 0; + const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId); + deleted = d; + if (deleted === 0) return null; // no match -> skip the write entirely + return nd; + }, + ); if (deleted === 0) { throw new Error( @@ -1618,7 +1727,7 @@ export class DocmostClient { ); } - return { success: true, deleted, nodeId }; + return { success: true, deleted, nodeId, verify: mutation.verify }; } /** Build the public share URL for a page. */ @@ -1817,7 +1926,7 @@ export class DocmostClient { let anchored = false; try { const collabToken = await this.getCollabTokenWithReauth(); - await mutatePageContent( + const mutation = await mutatePageContent( pageId, collabToken, this.apiUrl, @@ -1894,6 +2003,7 @@ export class DocmostClient { return null; }, ); + result.verify = mutation.verify; } catch (e) { // The comment record already exists; an anchoring failure must not turn // a successful create into an error. Report anchored:false instead. @@ -2337,7 +2447,11 @@ export class DocmostClient { // concurrent edits/comments/images are preserved and parallel insert_image // calls (serialized by the per-page lock) each see the previous insertion. let placement: "replaced" | "after" | "appended" | undefined; - await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { const doc = liveDoc && liveDoc.type === "doc" ? liveDoc @@ -2408,7 +2522,8 @@ export class DocmostClient { } return doc; - }); + }, + ); return { success: true, @@ -2416,6 +2531,7 @@ export class DocmostClient { attachmentId: up.attachmentId, src: up.src, placement, + verify: mutation.verify, }; } @@ -2529,18 +2645,27 @@ export class DocmostClient { } }; - await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { - // Reset per-transform so collab retries recompute cleanly (no double-count). - replaced = 0; - const doc = - liveDoc && liveDoc.type === "doc" - ? liveDoc - : { type: "doc", content: [] }; - if (!Array.isArray(doc.content)) doc.content = []; - walk(doc.content); - if (replaced === 0) return null; // no match -> skip the write entirely - return doc; - }); + const mutation = await this.mutateLiveContentUnlocked( + pageId, + collabToken, + (liveDoc) => { + // Reset per-transform so collab retries recompute cleanly (no double-count). + replaced = 0; + const doc = + liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (!Array.isArray(doc.content)) doc.content = []; + walk(doc.content); + if (replaced === 0) return null; // no match -> skip the write entirely + return doc; + }, + ); + // KNOWN LIMITATION: a same-count image SRC swap (image count unchanged, no + // text/mark change) may still report verify.changed === false, because the + // text+marks+integrity-count model in summarizeChange does not inspect + // image `src`/attachmentId attributes. That is acceptable here — the + // replace is confirmed by `replaced` below, and verify is supplementary. if (replaced === 0) { // The pass-1 SCAN found the target (matchFound was true) and we already @@ -2560,6 +2685,7 @@ export class DocmostClient { orphanedAttachmentId: up.attachmentId, warning: "target image was removed concurrently; uploaded attachment is unreferenced", + verify: mutation.verify, }; } @@ -2570,6 +2696,7 @@ export class DocmostClient { oldAttachmentId, newAttachmentId: up.attachmentId, src: up.src, + verify: mutation.verify, }; }); } @@ -2628,13 +2755,17 @@ export class DocmostClient { // JSON write path) before writing it back. this.validateDocUrls(version.content); const collabToken = await this.getCollabTokenWithReauth(); - await mutatePageContent( + const mutation = await mutatePageContent( version.pageId, collabToken, this.apiUrl, () => version.content, ); - return { pageId: version.pageId, restoredFrom: historyId }; + return { + pageId: version.pageId, + restoredFrom: historyId, + verify: mutation.verify, + }; } /** @@ -2812,7 +2943,12 @@ export class DocmostClient { // Apply atomically against the live doc. const collabToken = await this.getCollabTokenWithReauth(); - await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform); + const mutation = await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + runTransform, + ); // Optionally delete consumed comments (best-effort; a delete failure must // not undo the successful write). @@ -2847,6 +2983,7 @@ export class DocmostClient { diff: diffDocs(oldDoc, newDoc), deletedComments, log: ctx.log, + verify: mutation.verify, }; } } diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index f573d530..00559734 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -467,7 +467,11 @@ server.registerTool( "text is preferred. Examples: edits:[{find:\"teh\"," + "replace:\"the\"}]; edits:[{find:\"Hello world\",replace:\"Hello there\"}] " + "(crosses a bold boundary). This is the preferred tool for fixing " + - "wording, typos, numbers, names.", + "wording, typos, numbers, names. It edits plain text only and CANNOT " + + "change formatting marks: formatting changes (markdown markers in " + + "find/replace) are refused — use patch_node/update_page_json to change " + + "marks. The result includes a `verify` change-report of what actually " + + "changed (text/block/mark deltas).", inputSchema: { pageId: z.string().describe("ID of the page to edit"), edits: z diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index e8840c6f..ca2114d9 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -8,6 +8,17 @@ import { JSDOM } from "jsdom"; import { docmostExtensions } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +import { summarizeChange, VerifyReport } from "./diff.js"; + +/** + * The resolved value of every content-mutating collab write: the document that + * was written (or the live doc when the transform aborted) plus a verifiable + * change report describing what actually changed in the document. + */ +export interface MutationResult { + doc: any; + verify: VerifyReport; +} // Setup DOM environment for Tiptap HTML parsing in Node.js const dom = new JSDOM(""); @@ -375,14 +386,18 @@ const PERSIST_TIMEOUT_MS = 20000; * ProseMirror doc to write, or `null` to abort with no write (a no-op). If * `transform` throws, the error is propagated to the caller (not swallowed). * - * Returns the doc that was written, or the live doc when the transform aborted. + * Resolves a `MutationResult { doc, verify }`: `doc` is the doc that was + * written (or the live doc when the transform aborted), and `verify` is a + * verifiable change report (text/block/mark deltas) of what actually changed. + * The report is computed AFTER the atomic read->write, so it never widens the + * read->write window, and it never throws (it can NEVER break a write). */ export async function mutatePageContent( pageId: string, collabToken: string, baseUrl: string, transform: (liveDoc: any) => any | null, -): Promise { +): Promise { return withPageLock(pageId, () => { if (process.env.DEBUG) { console.error(`Starting realtime content mutate for page ${pageId}`); @@ -396,7 +411,7 @@ export async function mutatePageContent( const wsUrl = buildCollabWsUrl(baseUrl); if (process.env.DEBUG) console.error(`Connecting to WebSocket: ${wsUrl}`); - return new Promise((resolve, reject) => { + return new Promise((resolve, reject) => { let provider: HocuspocusProvider | undefined; let applied = false; // onSynced may fire again on reconnect — apply once. let settled = false; @@ -422,12 +437,12 @@ export async function mutatePageContent( } }; - const finish = (err: Error | null, value?: any) => { + const finish = (err: Error | null, value?: MutationResult) => { if (settled) return; settled = true; cleanup(); if (err) reject(err); - else resolve(value); + else resolve(value as MutationResult); }; connectTimer = setTimeout(() => { @@ -449,7 +464,7 @@ export async function mutatePageContent( return; } if (provider.unsyncedChanges === 0) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); return; } persistTimer = setTimeout(() => { @@ -465,13 +480,15 @@ export async function mutatePageContent( // the counter back to 0 without our write being re-transmitted; in // that case let the disconnect/close error win instead. if (data.number === 0 && !connectionLost) { - finish(null, lastWrittenDoc); + finish(null, mutationResult); } }; provider.on("unsyncedChanges", unsyncedHandler); }; - let lastWrittenDoc: any; + // The verifiable result resolved on every success/abort path. Set on + // abort (no-op report) and after a real write (computed change report). + let mutationResult: MutationResult; provider = new HocuspocusProvider({ url: wsUrl, @@ -522,6 +539,7 @@ export async function mutatePageContent( // not yielded, no incoming remote update can interleave, so any // already-synced concurrent edits are preserved in liveDoc. let newDoc: any; + let beforeDoc: any; try { let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); if ( @@ -532,12 +550,27 @@ export async function mutatePageContent( liveDoc = { type: "doc", content: [] }; } + // Snapshot the before-doc for the change report. Docs are + // JSON-serializable, so this is a safe deep clone. + beforeDoc = JSON.parse(JSON.stringify(liveDoc)); + newDoc = transform(liveDoc); if (newDoc == null) { - // Transform aborted — write nothing, return the live doc. - lastWrittenDoc = liveDoc; - finish(null, liveDoc); + // Transform aborted — write nothing, return the live doc with a + // no-op change report. + mutationResult = { + doc: liveDoc, + verify: { + changed: false, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "no changes (transform aborted)", + }, + }; + finish(null, mutationResult); return; } @@ -558,7 +591,13 @@ export async function mutatePageContent( return; } - lastWrittenDoc = newDoc; + // Compute the verifiable change report AFTER the transact write: it + // only needs the JSON before/after, so it cannot affect the atomic + // read->write window, and summarizeChange never throws. + mutationResult = { + doc: newDoc, + verify: summarizeChange(beforeDoc, newDoc), + }; if (process.env.DEBUG) console.error("Content written, waiting for server to persist..."); waitForPersistence(); @@ -588,7 +627,7 @@ export async function replacePageContent( prosemirrorDoc: any, collabToken: string, baseUrl: string, -): Promise { +): Promise { // Fail fast on a bad document instead of deferring the failure into the // collaboration write (where TiptapTransformer.toYdoc(undefined) used to // throw). The transform must return a valid ProseMirror doc. @@ -599,7 +638,12 @@ export async function replacePageContent( ) { throw new Error("replacePageContent: invalid ProseMirror document"); } - await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc); + return await mutatePageContent( + pageId, + collabToken, + baseUrl, + () => prosemirrorDoc, + ); } /** @@ -612,7 +656,12 @@ export async function updatePageContentRealtime( markdownContent: string, collabToken: string, baseUrl: string, -): Promise { +): Promise { const tiptapJson = await markdownToProseMirror(markdownContent); - await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson); + return await mutatePageContent( + pageId, + collabToken, + baseUrl, + () => tiptapJson, + ); } diff --git a/packages/mcp/src/lib/diff.ts b/packages/mcp/src/lib/diff.ts index 25b19148..befe047c 100644 --- a/packages/mcp/src/lib/diff.ts +++ b/packages/mcp/src/lib/diff.ts @@ -317,3 +317,163 @@ export function diffDocs( }; return { ...partial, markdown: renderMarkdown(partial, fellBack) }; } + +/** + * Recursively walk every `text` node and tally the count of each mark by + * `mark.type` (e.g. `{ bold: 5, strike: 3, link: 2 }`). Pure and never throws. + */ +function markCounts(doc: any): Record { + const counts: Record = {}; + const visit = (node: any): void => { + if (!node || typeof node !== "object") return; + if (node.type === "text" && Array.isArray(node.marks)) { + for (const m of node.marks) { + if (m && typeof m.type === "string") { + counts[m.type] = (counts[m.type] || 0) + 1; + } + } + } + if (Array.isArray(node.content)) for (const c of node.content) visit(c); + }; + visit(doc); + return counts; +} + +/** + * A compact, machine-readable report of what actually changed between two + * ProseMirror docs. Unlike DiffResult it ALSO surfaces a per-mark-type count + * delta, because diffDocs diffs TEXT only (complexSteps:false) and so reports + * 0/0 chars for a pure MARK change (e.g. removing `strike` from unchanged text). + */ +export interface VerifyReport { + /** Did the document actually change at all. */ + changed: boolean; + /** Chars inserted (from diffDocs). */ + textInserted: number; + /** Chars deleted (from diffDocs). */ + textDeleted: number; + blocksChanged: number; + /** ONLY mark types whose count changed, as [before, after]. */ + marks: Record; + /** + * ONLY structural integrity types whose count changed, as [before, after] + * (images/links/tables/callouts). Surfaces structural mutations that touch + * neither text nor marks (e.g. insert_image, deleting a table) which diffDocs + * — being TEXT-only — would otherwise report as "no content change". + */ + structure?: Record; + /** One-line human/agent-readable summary. */ + summary: string; +} + +/** + * Build a VerifyReport for a content mutation. Pure and never throws — on any + * internal error it returns a minimal "changed (diff unavailable)" report so it + * can NEVER break a write. + * + * `changed` is VALUE-based, not JSON-string-based: it is derived from the actual + * deltas (text chars, blocks, mark counts, structural integrity counts), so two + * value-equal docs that differ only in JSON key order report cleanly as + * `changed:false` / "no content change" rather than a misleading +0/-0 change. + * + * The structural integrity delta (from diffDocs's `integrity` tuples) is what + * makes `changed` true for an image/table/callout/link count change that diffs + * to zero text — closing a verify blind spot for insert_image, delete_node on a + * table, etc. + */ +export function summarizeChange(before: any, after: any): VerifyReport { + try { + const diff = diffDocs(before, after); + + // Per-mark-type delta: include a type only when its count actually changed. + const beforeMarks = markCounts(before); + const afterMarks = markCounts(after); + const marks: Record = {}; + for (const type of new Set([ + ...Object.keys(beforeMarks), + ...Object.keys(afterMarks), + ])) { + const b = beforeMarks[type] || 0; + const a = afterMarks[type] || 0; + if (b !== a) marks[type] = [b, a]; + } + + // Structural integrity delta from diffDocs: count-based [old,new] tuples for + // images/links/tables/callouts. Include a type only when old != new. + const integrity = diff.integrity; + const structure: Record = {}; + const countTypes: ["images", "links", "tables", "callouts"] = [ + "images", + "links", + "tables", + "callouts", + ]; + for (const type of countTypes) { + const [b, a] = integrity[type]; + if (b !== a) structure[type] = [b, a]; + } + + const textInserted = diff.summary.inserted; + const textDeleted = diff.summary.deleted; + const blocksChanged = diff.summary.blocksChanged; + const hasMarkDelta = Object.keys(marks).length > 0; + const hasStructureDelta = Object.keys(structure).length > 0; + + // VALUE-based change decision: ignore JSON key-order no-ops entirely. + const changed = + textInserted > 0 || + textDeleted > 0 || + blocksChanged > 0 || + hasMarkDelta || + hasStructureDelta; + + if (!changed) { + return { + changed: false, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "no content change", + }; + } + + const parts: string[] = []; + // Only mention text/blocks when they actually changed (avoid a misleading + // "+0/-0 chars, 0 block(s)" prefix on a pure mark/structure change). + if (textInserted > 0 || textDeleted > 0 || blocksChanged > 0) { + parts.push(`+${textInserted}/-${textDeleted} chars, ${blocksChanged} block(s)`); + } + const markParts = Object.entries(marks).map( + ([type, [b, a]]) => `${type} ${b}→${a}`, + ); + if (markParts.length > 0) parts.push(`marks: ${markParts.join(", ")}`); + const structureParts = Object.entries(structure).map( + ([type, [b, a]]) => `${type} ${b}→${a}`, + ); + if (structureParts.length > 0) parts.push(structureParts.join(", ")); + // `changed` is true here, so at least one group is present and parts is non-empty. + const summary = `changed: ${parts.join("; ")}`; + + const report: VerifyReport = { + changed: true, + textInserted, + textDeleted, + blocksChanged, + marks, + summary, + }; + if (hasStructureDelta) report.structure = structure; + return report; + } catch { + // A pathological pair must never break a write: degrade to a minimal report. + return { + changed: true, + textInserted: 0, + textDeleted: 0, + blocksChanged: 0, + marks: {}, + summary: "changed (diff unavailable)", + }; + } +} diff --git a/packages/mcp/src/lib/json-edit.ts b/packages/mcp/src/lib/json-edit.ts index bfba6793..0238e3f7 100644 --- a/packages/mcp/src/lib/json-edit.ts +++ b/packages/mcp/src/lib/json-edit.ts @@ -12,7 +12,7 @@ * re-import for small wording fixes. */ -import { stripInlineMarkdown } from "./text-normalize.js"; +import { stripInlineMarkdown, stripBalancedWrappers } from "./text-normalize.js"; export interface TextEdit { find: string; @@ -279,6 +279,32 @@ export function applyTextEdits( for (const edit of edits) { if (!edit.find) throw new Error("edit.find must be a non-empty string"); + // HARD-REFUSE formatting changes. edit_page_text edits PLAIN TEXT only and + // writes the replacement verbatim, so it cannot add/remove marks. We refuse + // only a pure formatting TOGGLE: find and replace differ ONLY by balanced + // markdown markers (e.g. find:"~~$69~~" / replace:"$69", or find:"M5Stack" / + // replace:"**M5Stack**" which would write literal `**`). + // + // The detector is the STRICT stripBalancedWrappers, NOT the lenient locator + // stripInlineMarkdown: the lenient one also trims whitespace/emoji and + // collapses lone `*`/`_` runs, which gives false positives on ordinary + // plain-text edits (trailing-space trim, snake_case, `2 * 3 * 4`, URLs with + // underscores) and wrongly refuses them. Comparing the strict strip of both + // sides symmetrically catches every real formatting toggle while leaving + // plain text alone; a typo fix wrapped in markdown still applies because its + // stripped find != stripped replace. + const formattingOnly = + edit.find !== edit.replace && + stripBalancedWrappers(edit.find) === stripBalancedWrappers(edit.replace); + if (formattingOnly) { + failed.push({ + find: edit.find, + reason: + "edit_page_text edits plain text only and cannot add or remove formatting marks (bold/italic/strike/code/link); it writes the replacement as LITERAL text. This edit looks like a formatting change (markdown markers in find/replace). To change marks, read the block with get_page_json and use patch_node (or update_page_json) to set the node's marks array.", + }); + continue; + } + // Gather every inline block in document order (recurse the whole tree so // nested containers — callouts, list items, table cells, blockquotes — are // all covered). diff --git a/packages/mcp/src/lib/text-normalize.ts b/packages/mcp/src/lib/text-normalize.ts index 73a764e4..7feeb69f 100644 --- a/packages/mcp/src/lib/text-normalize.ts +++ b/packages/mcp/src/lib/text-normalize.ts @@ -27,6 +27,56 @@ const WRAPPER_PATTERNS: RegExp[] = [ /`([^`]+?)`/g, // `x` ]; +/** Links/images -> their visible text. `!?` covers both `[t](u)` and `![a](s)`. */ +const LINK_IMAGE_RE = /!?\[([^\]]*)\]\([^)]*\)/g; + +/** + * Apply ONLY the two balanced/link passes shared by both normalizers: first + * collapse links/images to their visible text, then collapse balanced inline + * wrappers repeatedly until stable. Does NOT trim decoration, does NOT guard + * against an empty result — it returns exactly the transformed string. + */ +function stripWrappersAndLinks(s: string): string { + // 1. Links/images -> their visible text. + let out = s.replace(LINK_IMAGE_RE, "$1"); + + // 2. Strip balanced wrappers, repeating until the string is stable so nested + // wrappers (`**_x_**`) and adjacent runs both collapse. + for (let pass = 0; pass < MAX_PASSES; pass++) { + const before = out; + for (const re of WRAPPER_PATTERNS) { + out = out.replace(re, "$1"); + } + if (out === before) break; + } + return out; +} + +/** + * STRICT formatting detector — distinct from the lenient locator + * normalization below. It strips ONLY what unambiguously is markdown markup: + * 1. links/images `[text](url)` -> `text`, `![alt](src)` -> `alt`, and + * 2. balanced inline `**`/`__`/`~~`/`*`/`_`/`` ` `` wrappers (repeat-until-stable), + * and DELIBERATELY does NOT trim leading/trailing whitespace, emoji, or lone + * marker chars (the lenient extras `stripInlineMarkdown` does in its step 3). + * + * It exists ONLY to recognize formatting-vs-plain INTENT in `applyTextEdits` + * (deciding whether find/replace differ purely by markdown markers). Because it + * skips the lenient trimming, ordinary plain-text edits are NOT misread as + * formatting: a trailing-space trim, snake_case (`my_var_name`), math (`2 * 3`), + * and identifiers/URLs with underscores all stay untouched here (their `_x_` / + * `*x*` runs are only collapsed when actually balanced, and even then they are + * compared symmetrically, so plain text never collapses to a different string). + * + * Do NOT use this for LOCATING — the locator fallback must keep using the + * lenient `stripInlineMarkdown` (it trims stray decoration so a find still + * matches the document's plain text). + */ +export function stripBalancedWrappers(s: string): string { + if (typeof s !== "string" || s.length === 0) return s; + return stripWrappersAndLinks(s); +} + /** * Conservatively strip inline markdown from a locator string. * @@ -45,20 +95,8 @@ const WRAPPER_PATTERNS: RegExp[] = [ export function stripInlineMarkdown(s: string): string { if (typeof s !== "string" || s.length === 0) return s; - let out = s; - - // 1. Links/images -> their visible text. `!?` covers both forms. - out = out.replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1"); - - // 2. Strip balanced wrappers, repeating until the string is stable so nested - // wrappers (`**_x_**`) and adjacent runs both collapse. - for (let pass = 0; pass < MAX_PASSES; pass++) { - const before = out; - for (const re of WRAPPER_PATTERNS) { - out = out.replace(re, "$1"); - } - if (out === before) break; - } + // 1 + 2. Shared link/image and balanced-wrapper passes. + let out = stripWrappersAndLinks(s); // 3. Trim leading/trailing decoration: whitespace, leftover markdown markers, // and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the diff --git a/packages/mcp/test/unit/diff-verify.test.mjs b/packages/mcp/test/unit/diff-verify.test.mjs new file mode 100644 index 00000000..01af95dc --- /dev/null +++ b/packages/mcp/test/unit/diff-verify.test.mjs @@ -0,0 +1,146 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { summarizeChange } from "../../build/lib/diff.js"; + +// --------------------------------------------------------------------------- +// Builders +// --------------------------------------------------------------------------- +const t = (text, marks) => (marks ? { type: "text", text, marks } : { type: "text", text }); +const para = (...children) => ({ type: "paragraph", content: children }); +const doc = (...children) => ({ type: "doc", content: children }); + +// --------------------------------------------------------------------------- +// (i) Identical docs -> changed:false, marks {} +// --------------------------------------------------------------------------- +test("summarizeChange on identical docs reports no change", () => { + const d = doc(para(t("unchanged"))); + // Distinct deep clone so it is value-equal but not reference-equal. + const same = JSON.parse(JSON.stringify(d)); + const r = summarizeChange(d, same); + + assert.equal(r.changed, false); + assert.deepEqual(r.marks, {}); + assert.equal(r.textInserted, 0); + assert.equal(r.textDeleted, 0); + assert.equal(r.blocksChanged, 0); + assert.equal(r.summary, "no content change"); +}); + +// --------------------------------------------------------------------------- +// (ii) A pure text change -> textInserted/textDeleted > 0 +// --------------------------------------------------------------------------- +test("summarizeChange reports char counts for a text change", () => { + const before = doc(para(t("Hello world"))); + const after = doc(para(t("Hello brave world"))); + const r = summarizeChange(before, after); + + assert.equal(r.changed, true); + assert.ok(r.textInserted > 0, "reports inserted chars"); + // No marks changed in a pure text edit. + assert.deepEqual(r.marks, {}); + assert.match(r.summary, /chars/); +}); + +// --------------------------------------------------------------------------- +// (iii) CRITICAL: a mark-only change. Same text, one node loses its strike +// mark -> changed:true, marks.strike === [1,0], text counts are 0. +// This proves mark changes are surfaced even though diffDocs sees no text diff. +// --------------------------------------------------------------------------- +test("summarizeChange surfaces a pure mark removal (strike 1->0)", () => { + const before = doc(para(t("on sale", [{ type: "strike" }]))); + // Same characters, strike mark removed. + const after = doc(para(t("on sale"))); + const r = summarizeChange(before, after); + + assert.equal(r.changed, true); + // The whole point: a mark delta is surfaced as [before, after]. + assert.deepEqual(r.marks.strike, [1, 0]); + // No characters changed. + assert.equal(r.textInserted, 0); + assert.equal(r.textDeleted, 0); + // The summary mentions the mark delta. + assert.match(r.summary, /strike 1→0/); + // A pure mark change must not carry a misleading "+0/-0 chars" text clause. + assert.ok(!r.summary.includes("chars")); +}); + +// --------------------------------------------------------------------------- +// A mark addition is surfaced too (bold 0->1), and only changed types appear. +// --------------------------------------------------------------------------- +test("summarizeChange surfaces a mark addition and omits unchanged types", () => { + const before = doc(para(t("a", [{ type: "italic" }]), t("b"))); + // Same text + same italic on "a", but "b" gains bold. + const after = doc(para(t("a", [{ type: "italic" }]), t("b", [{ type: "bold" }]))); + const r = summarizeChange(before, after); + + assert.equal(r.changed, true); + assert.deepEqual(r.marks.bold, [0, 1]); + // italic count is unchanged (1 -> 1), so it must NOT appear in marks. + assert.equal("italic" in r.marks, false); +}); + +// --------------------------------------------------------------------------- +// (iv) VALUE-based change: two value-equal docs that differ ONLY in JSON key +// order must report changed:false / "no content change", not a +0/-0 change. +// --------------------------------------------------------------------------- +test("summarizeChange treats a key-order-only difference as no change", () => { + // Same node, but attrs/text written in a different key order on each side. + const before = { + type: "doc", + content: [ + { type: "paragraph", attrs: { a: 1, b: 2 }, content: [{ type: "text", text: "same" }] }, + ], + }; + const after = { + content: [ + { content: [{ text: "same", type: "text" }], attrs: { b: 2, a: 1 }, type: "paragraph" }, + ], + type: "doc", + }; + // JSON strings differ (key order), but the values are equal. + assert.notEqual(JSON.stringify(before), JSON.stringify(after)); + + const r = summarizeChange(before, after); + assert.equal(r.changed, false); + assert.equal(r.summary, "no content change"); + assert.equal(r.textInserted, 0); + assert.equal(r.textDeleted, 0); + assert.equal(r.blocksChanged, 0); + assert.deepEqual(r.marks, {}); +}); + +// --------------------------------------------------------------------------- +// (v) CRITICAL: a structural change that touches no text/marks — adding an +// image node (images 0 -> 1) — must report changed:true and surface the +// integrity delta in structure + summary, closing the verify blind spot for +// insert_image / delete_node on structural nodes. +// --------------------------------------------------------------------------- +test("summarizeChange surfaces an image-count change (0->1)", () => { + const before = doc(para(t("caption"))); + const after = doc( + para(t("caption")), + { type: "image", attrs: { src: "x.png", attachmentId: "a1" } }, + ); + const r = summarizeChange(before, after); + + assert.equal(r.changed, true, "an added image is a change"); + assert.deepEqual(r.structure.images, [0, 1]); + assert.match(r.summary, /images 0→1/); +}); + +// --------------------------------------------------------------------------- +// Robustness: a malformed pair must never throw; it degrades gracefully. +// --------------------------------------------------------------------------- +test("summarizeChange never throws on a pathological pair", () => { + const before = { type: "doc", content: [] }; + // A doc whose `content` array references itself makes the recursive walkers + // (diffDocs / markCounts / countNodes) recurse without bound and overflow the + // stack. The try/catch must keep summarizeChange safe and degrade to a + // minimal "changed (diff unavailable)" report instead of throwing. + const after = { type: "doc", content: [] }; + after.content.push(after); + const r = summarizeChange(before, after); + assert.equal(r.changed, true); + assert.equal(r.summary, "changed (diff unavailable)"); +}); diff --git a/packages/mcp/test/unit/json-edit-refuse.test.mjs b/packages/mcp/test/unit/json-edit-refuse.test.mjs new file mode 100644 index 00000000..04fb7a94 --- /dev/null +++ b/packages/mcp/test/unit/json-edit-refuse.test.mjs @@ -0,0 +1,153 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { applyTextEdits } from "../../build/lib/json-edit.js"; + +// Helpers to build small ProseMirror docs. +const textNode = (text, extra = {}) => ({ type: "text", text, ...extra }); +const paragraph = (...children) => ({ type: "paragraph", content: children }); +const doc = (...children) => ({ type: "doc", content: children }); + +// --------------------------------------------------------------------------- +// (i) formattingOnly: find and replace differ ONLY by markdown markers +// (find:"~~x~~" / replace:"x"). The text "x" exists, but the edit is a pure +// formatting toggle -> refused into failed[], nothing applied. +// --------------------------------------------------------------------------- +test("formatting-only edit (strip-toggle) is refused, not applied", () => { + const input = doc(paragraph(textNode("x", { marks: [{ type: "strike" }] }))); + const snapshot = JSON.parse(JSON.stringify(input)); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "~~x~~", replace: "x" }, + ]); + + assert.equal(results.length, 0, "nothing applied"); + assert.equal(failed.length, 1, "one refused edit"); + assert.equal(failed[0].find, "~~x~~"); + assert.match(failed[0].reason, /cannot add or remove formatting marks/); + assert.match(failed[0].reason, /patch_node/); + // The document is untouched (the strike mark is preserved). + assert.deepEqual(out, snapshot); +}); + +// --------------------------------------------------------------------------- +// (ii) formattingOnly via add-bold: a plain `find:"x"` whose `replace:"**x**"` +// only adds balanced markers. stripBalancedWrappers(replace) == find, find != +// replace -> formattingOnly -> refused (it would write a LITERAL `**x**`). +// --------------------------------------------------------------------------- +test("edit that only adds bold markers around plain text is refused", () => { + const input = doc(paragraph(textNode("x"))); + const snapshot = JSON.parse(JSON.stringify(input)); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "x", replace: "**x**" }, + ]); + + assert.equal(results.length, 0, "nothing applied"); + assert.equal(failed.length, 1, "one refused edit"); + assert.match(failed[0].reason, /cannot add or remove formatting marks/); + // No literal ** was written into the document. + assert.deepEqual(out, snapshot); +}); + +// --------------------------------------------------------------------------- +// (ii-b) More real formatting toggles are still caught by stripBalancedWrappers. +// --------------------------------------------------------------------------- +test("strike-toggle on a price is refused", () => { + const input = doc(paragraph(textNode("$69", { marks: [{ type: "strike" }] }))); + const snapshot = JSON.parse(JSON.stringify(input)); + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "~~$69~~", replace: "$69" }, + ]); + assert.equal(results.length, 0, "nothing applied"); + assert.equal(failed.length, 1, "one refused edit"); + assert.match(failed[0].reason, /cannot add or remove formatting marks/); + assert.deepEqual(out, snapshot); +}); + +test("nested-wrapper toggle (~~~~**M5Stack**~~~~ -> **M5Stack**) is refused", () => { + const input = doc( + paragraph(textNode("M5Stack", { marks: [{ type: "bold" }] })), + ); + const snapshot = JSON.parse(JSON.stringify(input)); + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "~~~~**M5Stack**~~~~", replace: "**M5Stack**" }, + ]); + assert.equal(results.length, 0, "nothing applied"); + assert.equal(failed.length, 1, "one refused edit"); + assert.match(failed[0].reason, /cannot add or remove formatting marks/); + assert.deepEqual(out, snapshot); +}); + +// --------------------------------------------------------------------------- +// (ii-c) REGRESSION: ordinary plain-text edits that the OLD lenient detector +// wrongly refused (false positives) now APPLY — they land in `results`, never +// in `failed`. Each `find` exists verbatim in the built doc. +// --------------------------------------------------------------------------- +test("plain-text edits formerly mis-flagged as formatting now apply", () => { + const cases = [ + // trailing-space trim: lenient strip trimmed the space -> equal -> refused. + { find: "tail ", replace: "tail", before: "head tail more" }, + // snake_case: `_case_` looked like `_x_` emphasis to the lenient detector. + { find: "oldname", replace: "snake_case_name", before: "the oldname here" }, + // math: `* 3 *` looked like `*x*` emphasis. + { find: "X", replace: "2 * 3 * 4", before: "value X end" }, + // identifier with underscores. + { find: "A", replace: "my_var_name", before: "set A now" }, + ]; + + for (const c of cases) { + const input = doc(paragraph(textNode(c.before))); + const { results, failed } = applyTextEdits(input, [ + { find: c.find, replace: c.replace }, + ]); + assert.equal( + failed.length, + 0, + `"${c.find}" -> "${c.replace}" must NOT be refused (got: ${JSON.stringify(failed)})`, + ); + assert.equal(results.length, 1, `"${c.find}" must apply once`); + assert.equal(results[0].find, c.find); + assert.equal(results[0].replacements, 1); + } +}); + +// --------------------------------------------------------------------------- +// (iii) Legit typo fix: find has markdown but replace differs in LETTERS and +// has no markers. stripped find != stripped replace AND replace has no markers +// -> neither flag trips -> the edit applies. +// --------------------------------------------------------------------------- +test("typo fix wrapped in markdown still applies (not refused)", () => { + // The document renders "M5Stack Atom Eco" with that span bold (misspelled). + const input = doc( + paragraph(textNode("M5Stack Atom Eco", { marks: [{ type: "bold" }] })), + ); + + const { doc: out, results, failed } = applyTextEdits(input, [ + { find: "**M5Stack Atom Eco**", replace: "M5Stack Atom Echo" }, + ]); + + assert.equal(failed.length, 0, "not refused"); + assert.equal(results.length, 1, "applied"); + assert.equal(results[0].find, "**M5Stack Atom Eco**"); + assert.equal(results[0].replacements, 1); + // It matched via the markdown-strip fallback. + assert.equal(results[0].normalized, true); + // The fix is applied AND the bold mark is preserved (text edit, not a + // formatting change). + const node = out.content[0].content.find((n) => n.text === "M5Stack Atom Echo"); + assert.ok(node, "the corrected text node exists"); + assert.deepEqual(node.marks, [{ type: "bold" }]); +}); + +// --------------------------------------------------------------------------- +// A plain text fix is unaffected by the refuse logic. +// --------------------------------------------------------------------------- +test("plain find/replace is not refused", () => { + const input = doc(paragraph(textNode("teh cat"))); + const { results, failed } = applyTextEdits(input, [ + { find: "teh", replace: "the" }, + ]); + assert.equal(failed.length, 0); + assert.deepEqual(results, [{ find: "teh", replacements: 1 }]); +});