From 48c1ec46f71446b4957aedf8e69de06f22b80df7 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Fri, 3 Jul 2026 20:29:42 +0300 Subject: [PATCH] fix(comment): store the real anchored substring as expectedText + pin authz (#318 F1/F2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F1 [blocking]: a suggestion whose anchor matched via normalization could never be applied (spurious 409). The comment mark lands on the doc's ACTUAL text (Docmost auto-converts to typographic quotes/dashes/nbsp), but the stored selection — used as expectedText at apply — was the raw ASCII agent input (+substring(0,250)). So replaceYjsMarkedText's strict joined!==expectedText always failed and threw "text changed" though nobody edited. Fix: new pure getAnchoredText(doc, selection) reconstructs the exact raw doc substring the mark covers (slicing identical to spliceCommentMark); on the suggestion path client.createComment stores THAT as selection, so expectedText equals the marked text and apply returns applied:true. Live anchoring still uses the raw agent selection (normalization still finds the anchor). Truncation raised 250->2000 (+ DTO @MaxLength(2000)) so the anchored substring is never cut below the mark span. Ordinary comments unchanged. AI-chat shares client.createComment, so covered. Regression tests: getAnchoredText raw-vs-ASCII; create payload selection is the typographic substring; apply with typographic expectedText -> applied. F2 [blocking]: added comment.controller.spec.ts pinning that validateCanEdit runs before applySuggestion (Forbidden -> applySuggestion never called; happy path -> called; missing comment -> 404 without authorizing). MCP 448 pass; server comment+yjs 54 pass. MCP build/ rebuilt. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../server/src/collaboration/yjs.util.spec.ts | 26 ++++ .../core/comment/comment.controller.spec.ts | 119 ++++++++++++++++++ .../src/core/comment/comment.service.ts | 10 +- .../core/comment/dto/create-comment.dto.ts | 7 ++ packages/mcp/build/client.js | 24 +++- packages/mcp/build/lib/comment-anchor.js | 61 +++++++++ packages/mcp/src/client.ts | 25 +++- packages/mcp/src/lib/comment-anchor.ts | 59 +++++++++ .../mcp/test/mock/create-comment.test.mjs | 96 ++++++++++++++ .../mcp/test/unit/comment-anchor.test.mjs | 34 +++++ 10 files changed, 457 insertions(+), 4 deletions(-) create mode 100644 apps/server/src/core/comment/comment.controller.spec.ts diff --git a/apps/server/src/collaboration/yjs.util.spec.ts b/apps/server/src/collaboration/yjs.util.spec.ts index eb0eb0f0..8f6fc2e4 100644 --- a/apps/server/src/collaboration/yjs.util.spec.ts +++ b/apps/server/src/collaboration/yjs.util.spec.ts @@ -379,6 +379,32 @@ describe('replaceYjsMarkedText', () => { expect(text.toDelta()).toEqual(before); }); + // F1 regression: the marked doc text is TYPOGRAPHIC (smart quotes / em-dash) + // and expectedText equals that raw typographic text — as it now does, because + // the MCP client stores the RAW anchored substring (getAnchoredText) rather + // than the agent's ASCII input. The strict `joinedText !== expectedText` + // compare must therefore MATCH and the suggestion apply (not a spurious 409). + it('typographic marked text applies when expectedText is the raw typographic text', () => { + const marked = '“hello”—world'; + const { fragment, text } = buildRuns([ + { text: 'say ' }, + { text: marked, comment: { commentId: 'c1', resolved: false } }, + { text: '!' }, + ]); + + const result = replaceYjsMarkedText(fragment, 'c1', marked, 'bye'); + + expect(result).toEqual({ applied: true, currentText: 'bye' }); + expect(text.toDelta()).toEqual([ + { insert: 'say ' }, + { + insert: 'bye', + attributes: { comment: { commentId: 'c1', resolved: false } }, + }, + { insert: '!' }, + ]); + }); + it('anchor deleted: no mark with that commentId → { applied: false, currentText: null }', () => { const { fragment, text } = buildWithComments([ { text: 'abc', comment: { commentId: 'c1', resolved: false } }, diff --git a/apps/server/src/core/comment/comment.controller.spec.ts b/apps/server/src/core/comment/comment.controller.spec.ts new file mode 100644 index 00000000..707e9687 --- /dev/null +++ b/apps/server/src/core/comment/comment.controller.spec.ts @@ -0,0 +1,119 @@ +import { + ForbiddenException, + NotFoundException, +} from '@nestjs/common'; +import { CommentController } from './comment.controller'; + +/** + * Authz-gate tests for the apply-suggestion route. Applying a suggestion + * rewrites the page text, so the route MUST call + * pageAccessService.validateCanEdit BEFORE handing off to + * commentService.applySuggestion (which performs the document mutation + stamp). + * That ordering is a security boundary: an unauthorized user must never reach + * the mutation. These tests pin it against a fully mocked controller so any + * regression that drops the gate (or reorders it after the mutation) fails here. + */ +describe('CommentController apply-suggestion authz', () => { + function makeController() { + const commentService = { + applySuggestion: jest.fn(async () => ({ id: 'c-1', applied: true })), + }; + const commentRepo = { findById: jest.fn() }; + const pageRepo = { findById: jest.fn() }; + const spaceAbility = {} as any; + const pageAccessService = { + validateCanEdit: jest.fn(async () => undefined), + }; + const wsService = {} as any; + const auditService = { log: jest.fn() }; + + const controller = new CommentController( + commentService as any, + commentRepo as any, + pageRepo as any, + spaceAbility, + pageAccessService as any, + wsService, + auditService as any, + ); + return { + controller, + commentService, + commentRepo, + pageRepo, + pageAccessService, + }; + } + + const user: any = { id: 'u-1' }; + const workspace: any = { id: 'ws-1' }; + const provenance: any = undefined; + const dto: any = { commentId: 'c-1' }; + + const comment = { + id: 'c-1', + pageId: 'p-1', + spaceId: 'sp-1', + suggestedText: 'new text', + selection: 'old text', + }; + const page = { id: 'p-1', spaceId: 'sp-1', deletedAt: null }; + + it('validateCanEdit throwing Forbidden rejects AND applySuggestion is never called', async () => { + const { controller, commentRepo, pageRepo, pageAccessService, commentService } = + makeController(); + commentRepo.findById.mockResolvedValue(comment); + pageRepo.findById.mockResolvedValue(page); + pageAccessService.validateCanEdit.mockRejectedValue( + new ForbiddenException('no edit access'), + ); + + await expect( + controller.applySuggestion(dto, user, workspace, provenance), + ).rejects.toBeInstanceOf(ForbiddenException); + + // The security boundary: the mutation/stamp must NOT run for an + // unauthorized user. + expect(pageAccessService.validateCanEdit).toHaveBeenCalledWith(page, user); + expect(commentService.applySuggestion).not.toHaveBeenCalled(); + }); + + it('happy path: validateCanEdit resolves → applySuggestion is called and its result returned', async () => { + const { controller, commentRepo, pageRepo, pageAccessService, commentService } = + makeController(); + commentRepo.findById.mockResolvedValue(comment); + pageRepo.findById.mockResolvedValue(page); + const applied = { id: 'c-1', applied: true }; + commentService.applySuggestion.mockResolvedValue(applied); + + const result = await controller.applySuggestion( + dto, + user, + workspace, + provenance, + ); + + // Authorization ran before the mutation, then the service was invoked. + expect(pageAccessService.validateCanEdit).toHaveBeenCalledWith(page, user); + expect(commentService.applySuggestion).toHaveBeenCalledWith( + comment, + user, + provenance, + ); + expect(result).toBe(applied); + }); + + it('missing comment: NotFound is thrown without authorizing or applying', async () => { + const { controller, commentRepo, pageRepo, pageAccessService, commentService } = + makeController(); + commentRepo.findById.mockResolvedValue(null); + + await expect( + controller.applySuggestion(dto, user, workspace, provenance), + ).rejects.toBeInstanceOf(NotFoundException); + + expect(pageRepo.findById).not.toHaveBeenCalled(); + expect(pageAccessService.validateCanEdit).not.toHaveBeenCalled(); + expect(commentService.applySuggestion).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/server/src/core/comment/comment.service.ts b/apps/server/src/core/comment/comment.service.ts index 3a251b58..cc46a002 100644 --- a/apps/server/src/core/comment/comment.service.ts +++ b/apps/server/src/core/comment/comment.service.ts @@ -87,7 +87,15 @@ export class CommentService { } } - const selection = createCommentDto?.selection?.substring(0, 250) ?? null; + // Do NOT lossily truncate at 250: for a suggestion the client sends the RAW + // anchored document substring (the exact text under the comment mark) as the + // selection, which can be LONGER than the agent's <=250-char typed input + // (normalization collapses whitespace/typographic runs, so the raw span can + // exceed the normalized selection). Truncating it shorter than the mark span + // would break the apply-time equality check and make the suggestion + // un-appliable. Keep a generous 2000-char safety bound (matching + // suggestedText) so a legitimate anchored substring is never cut. + const selection = createCommentDto?.selection?.substring(0, 2000) ?? null; // A suggested edit rewrites the exact text under an inline comment mark, so // it is only meaningful on a top-level inline comment that carries a diff --git a/apps/server/src/core/comment/dto/create-comment.dto.ts b/apps/server/src/core/comment/dto/create-comment.dto.ts index b556c627..dd75494b 100644 --- a/apps/server/src/core/comment/dto/create-comment.dto.ts +++ b/apps/server/src/core/comment/dto/create-comment.dto.ts @@ -33,8 +33,15 @@ export class CreateCommentDto { @IsJSON() content: any; + // The agent tool caps what it TYPES at 250 chars, but for a suggestion the + // client resolves and sends the RAW anchored document substring (the exact + // text under the mark), which can be longer once normalization is undone. Bound + // the stored value at 2000 (matching suggestedText) so a legitimate anchored + // substring is never rejected — the service used to lossily truncate at 250, + // which broke the apply-time equality check. @IsOptional() @IsString() + @MaxLength(2000) selection: string; @IsOptional() diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index d1cdc528..1b4b31d5 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -17,7 +17,7 @@ import { withPageLock } from "./lib/page-lock.js"; import { applyTextEdits, } from "./lib/json-edit.js"; import { getCollabToken, performLogin } from "./lib/auth-utils.js"; import { diffDocs, summarizeChange } from "./lib/diff.js"; -import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, } from "./lib/comment-anchor.js"; +import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, getAnchoredText, } from "./lib/comment-anchor.js"; import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, canonicalizeFootnotes, insertInlineFootnote, } from "./lib/transforms.js"; import vm from "node:vm"; // Supported image types, kept as two lookup tables so both a local file @@ -1936,6 +1936,16 @@ export class DocmostClient { if (!isReply && (!selection || !selection.trim())) { throw new Error("create_comment: an inline 'selection' (exact text to anchor on) is required for a top-level comment"); } + // For a SUGGESTION, the value we store as the comment's `selection` must be + // the RAW document substring the mark lands on (typographic quotes/dashes, + // nbsp, collapsed whitespace), NOT the agent's ASCII input. The anchor is + // placed via normalization, so when the doc was auto-converted to + // typographic the raw substring differs from the agent input; apply-time + // compares the stored selection to the marked doc text STRICTLY, so storing + // the raw substring is what makes "Apply" succeed instead of a spurious 409. + // Captured in the pre-check below (which already reads the page) and used as + // payload.selection. Ordinary comments keep sending the raw agent selection. + let anchoredSelection = null; // For a top-level comment, fail BEFORE creating anything when the selection // is not present in the persisted document — this avoids leaving an orphan // comment + notification behind. A read failure (network) is non-fatal: the @@ -1958,6 +1968,11 @@ export class DocmostClient { "A suggested edit must anchor to a UNIQUE location; expand the selection with surrounding context " + "(still <=250 chars) so it appears exactly once."); } + // Exactly one match: capture the RAW anchored substring to store as the + // comment selection (so apply-time equality holds). If this returns + // null despite countAnchorMatches===1 (shouldn't happen), fall back to + // the raw agent selection below rather than crash. + anchoredSelection = getAnchoredText(page.content, selection); } else if (!canAnchorInDoc(page.content, selection)) { throw new Error("create_comment: could not find the selection text in the page to anchor the comment. " + @@ -1987,8 +2002,13 @@ export class DocmostClient { content: JSON.stringify(jsonContent), type: effectiveType, }; + // For a suggestion, store the RAW anchored substring (anchoredSelection) so + // the stored selection === the text under the mark === apply-time + // expectedText. Ordinary comments (and the null fallback) keep the raw + // agent selection — their selection is only display/anchor and never used + // by apply, so their behavior is unchanged. if (!isReply && selection) - payload.selection = selection; + payload.selection = anchoredSelection ?? selection; if (parentCommentId) payload.parentCommentId = parentCommentId; // Only a top-level inline comment (with a selection) may carry a suggestion. diff --git a/packages/mcp/build/lib/comment-anchor.js b/packages/mcp/build/lib/comment-anchor.js index 03a50931..b5c26dc4 100644 --- a/packages/mcp/build/lib/comment-anchor.js +++ b/packages/mcp/build/lib/comment-anchor.js @@ -148,6 +148,67 @@ export function findAnchorInBlock(blockContent, selection) { } return null; } +/** + * Reconstruct the RAW text spanned by an AnchorMatch inside one block's + * `content` array. `startChild..endChild` are all text nodes (guaranteed by + * findAnchorInBlock, which only builds runs of `text` nodes), so concatenate + * each node's text slice: from `startOffset` on the first node, up to + * `endOffset` on the last, and the whole `.text` for any node fully inside the + * range. Mirrors spliceCommentMark's per-node slicing so the string returned + * here is EXACTLY the characters the comment mark will cover. + */ +function reconstructRawText(blockContent, match) { + const { startChild, startOffset, endChild, endOffset } = match; + let out = ""; + for (let k = startChild; k <= endChild; k++) { + const n = blockContent[k]; + const text = typeof n.text === "string" ? n.text : ""; + const sliceStart = k === startChild ? startOffset : 0; + const sliceEnd = k === endChild ? endOffset : text.length; + out += text.slice(sliceStart, sliceEnd); + } + return out; +} +/** + * Return the RAW document substring that `selection` would anchor to — the exact + * characters the comment mark will cover — or `null` when the selection cannot + * be anchored anywhere in `doc`. + * + * This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first, + * document-order traversal and the same findAnchorInBlock match on the FIRST + * matching block), but instead of a boolean / an in-place mutation it + * reconstructs the raw text spanned by the matched range. Because + * findAnchorInBlock maps the normalized selection back to raw text-node + * positions, the returned string is the document's ORIGINAL characters (smart + * quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII + * agent input. + * + * Callers store THIS as the comment's `selection` so the stored value equals the + * text actually under the mark, which is what the apply-suggestion equality + * check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against. + * Without it a suggestion whose anchor only matched via normalization would be + * un-appliable (spurious 409). + */ +export function getAnchoredText(doc, selection) { + const visit = (node, depth) => { + if (depth > MAX_DEPTH || !node || typeof node !== "object") + return null; + if (!Array.isArray(node.content)) + return null; + const match = findAnchorInBlock(node.content, selection); + if (match) + return reconstructRawText(node.content, match); + for (const child of node.content) { + if (child && typeof child === "object" && Array.isArray(child.content)) { + const found = visit(child, depth + 1); + if (found !== null) + return found; + } + } + return null; + }; + return visit(doc, 0); +} /** * Depth-first, document-order check for whether `selection` can be anchored * anywhere in `doc`. At each node with an array `content`, first try to match diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 9e9eb421..cd29d494 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -60,6 +60,7 @@ import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, + getAnchoredText, } from "./lib/comment-anchor.js"; import { blockText, @@ -2433,6 +2434,17 @@ export class DocmostClient { ); } + // For a SUGGESTION, the value we store as the comment's `selection` must be + // the RAW document substring the mark lands on (typographic quotes/dashes, + // nbsp, collapsed whitespace), NOT the agent's ASCII input. The anchor is + // placed via normalization, so when the doc was auto-converted to + // typographic the raw substring differs from the agent input; apply-time + // compares the stored selection to the marked doc text STRICTLY, so storing + // the raw substring is what makes "Apply" succeed instead of a spurious 409. + // Captured in the pre-check below (which already reads the page) and used as + // payload.selection. Ordinary comments keep sending the raw agent selection. + let anchoredSelection: string | null = null; + // For a top-level comment, fail BEFORE creating anything when the selection // is not present in the persisted document — this avoids leaving an orphan // comment + notification behind. A read failure (network) is non-fatal: the @@ -2459,6 +2471,11 @@ export class DocmostClient { "(still <=250 chars) so it appears exactly once.", ); } + // Exactly one match: capture the RAW anchored substring to store as the + // comment selection (so apply-time equality holds). If this returns + // null despite countAnchorMatches===1 (shouldn't happen), fall back to + // the raw agent selection below rather than crash. + anchoredSelection = getAnchoredText(page.content, selection); } else if (!canAnchorInDoc(page.content, selection)) { throw new Error( "create_comment: could not find the selection text in the page to anchor the comment. " + @@ -2496,7 +2513,13 @@ export class DocmostClient { content: JSON.stringify(jsonContent), type: effectiveType, }; - if (!isReply && selection) payload.selection = selection; + // For a suggestion, store the RAW anchored substring (anchoredSelection) so + // the stored selection === the text under the mark === apply-time + // expectedText. Ordinary comments (and the null fallback) keep the raw + // agent selection — their selection is only display/anchor and never used + // by apply, so their behavior is unchanged. + if (!isReply && selection) + payload.selection = anchoredSelection ?? selection; if (parentCommentId) payload.parentCommentId = parentCommentId; // Only a top-level inline comment (with a selection) may carry a suggestion. if (!isReply && selection && hasSuggestion) { diff --git a/packages/mcp/src/lib/comment-anchor.ts b/packages/mcp/src/lib/comment-anchor.ts index 7deb6620..cb494b1b 100644 --- a/packages/mcp/src/lib/comment-anchor.ts +++ b/packages/mcp/src/lib/comment-anchor.ts @@ -171,6 +171,65 @@ export function findAnchorInBlock( return null; } +/** + * Reconstruct the RAW text spanned by an AnchorMatch inside one block's + * `content` array. `startChild..endChild` are all text nodes (guaranteed by + * findAnchorInBlock, which only builds runs of `text` nodes), so concatenate + * each node's text slice: from `startOffset` on the first node, up to + * `endOffset` on the last, and the whole `.text` for any node fully inside the + * range. Mirrors spliceCommentMark's per-node slicing so the string returned + * here is EXACTLY the characters the comment mark will cover. + */ +function reconstructRawText(blockContent: any[], match: AnchorMatch): string { + const { startChild, startOffset, endChild, endOffset } = match; + let out = ""; + for (let k = startChild; k <= endChild; k++) { + const n = blockContent[k]; + const text: string = typeof n.text === "string" ? n.text : ""; + const sliceStart = k === startChild ? startOffset : 0; + const sliceEnd = k === endChild ? endOffset : text.length; + out += text.slice(sliceStart, sliceEnd); + } + return out; +} + +/** + * Return the RAW document substring that `selection` would anchor to — the exact + * characters the comment mark will cover — or `null` when the selection cannot + * be anchored anywhere in `doc`. + * + * This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first, + * document-order traversal and the same findAnchorInBlock match on the FIRST + * matching block), but instead of a boolean / an in-place mutation it + * reconstructs the raw text spanned by the matched range. Because + * findAnchorInBlock maps the normalized selection back to raw text-node + * positions, the returned string is the document's ORIGINAL characters (smart + * quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII + * agent input. + * + * Callers store THIS as the comment's `selection` so the stored value equals the + * text actually under the mark, which is what the apply-suggestion equality + * check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against. + * Without it a suggestion whose anchor only matched via normalization would be + * un-appliable (spurious 409). + */ +export function getAnchoredText(doc: any, selection: string): string | null { + const visit = (node: any, depth: number): string | null => { + if (depth > MAX_DEPTH || !node || typeof node !== "object") return null; + if (!Array.isArray(node.content)) return null; + const match = findAnchorInBlock(node.content, selection); + if (match) return reconstructRawText(node.content, match); + for (const child of node.content) { + if (child && typeof child === "object" && Array.isArray(child.content)) { + const found = visit(child, depth + 1); + if (found !== null) return found; + } + } + return null; + }; + return visit(doc, 0); +} + /** * Depth-first, document-order check for whether `selection` can be anchored * anywhere in `doc`. At each node with an array `content`, first try to match diff --git a/packages/mcp/test/mock/create-comment.test.mjs b/packages/mcp/test/mock/create-comment.test.mjs index d854bdb0..bcf70c44 100644 --- a/packages/mcp/test/mock/create-comment.test.mjs +++ b/packages/mcp/test/mock/create-comment.test.mjs @@ -452,3 +452,99 @@ test("suggestedText with a unique selection succeeds and forwards the payload", assert.equal(createPayload.selection, "brave"); assert.equal(result.data.suggestedText, "bold", "filterComment surfaces suggestedText"); }); + +// ----------------------------------------------------------------------------- +// 8) suggestedText where the DOC has TYPOGRAPHIC text and the agent selection is +// ASCII: the stored selection sent to /comments/create MUST be the doc's RAW +// typographic substring (what the mark covers), NOT the agent's ASCII input. +// This is the F1 contract that makes "Apply" succeed instead of a spurious +// 409 (apply compares the stored selection to the marked doc text strictly). +// ----------------------------------------------------------------------------- +test("suggestedText: the stored selection is the doc's RAW typographic substring, not the ASCII input", async () => { + let createPayload = null; + + const { baseURL } = await spawn(async (req, res) => { + const raw = await readBody(req); + if (req.url === "/api/auth/login") { + sendJson(res, 200, { success: true }, { + "Set-Cookie": "authToken=t; Path=/; HttpOnly", + }); + return; + } + if (req.url === "/api/pages/info") { + // The doc holds SMART quotes; the agent will select the ASCII form. + sendJson(res, 200, { + data: { + id: "22222222-2222-2222-2222-222222222222", + content: { + type: "doc", + content: [ + { + type: "paragraph", + content: [{ type: "text", text: "he said “hello” loudly" }], + }, + ], + }, + }, + }); + return; + } + if (req.url === "/api/comments/create") { + createPayload = JSON.parse(raw); + sendJson(res, 200, { + data: { + id: "cmt-typo-1", + content: createPayload.content, + selection: createPayload.selection, + suggestedText: createPayload.suggestedText, + type: createPayload.type, + }, + }); + return; + } + sendJson(res, 404, { message: "not found" }); + }); + + class TestClient extends DocmostClient { + async getCollabTokenWithReauth() { + return "collab-token"; + } + async resolvePageId() { + return "22222222-2222-2222-2222-222222222222"; + } + async mutatePage(pageId, collabToken, apiUrl, transform) { + const doc = { + type: "doc", + content: [ + { + type: "paragraph", + content: [{ type: "text", text: "he said “hello” loudly" }], + }, + ], + }; + const out = transform(doc); + return { doc: out, verify: { ok: true } }; + } + } + + const client = new TestClient(baseURL, "user@example.com", "pw"); + + const result = await client.createComment( + "22222222-2222-2222-2222-222222222222", + "please change", + "inline", + '"hello"', // ASCII quotes — the doc has smart quotes + undefined, + "goodbye", + ); + + assert.equal(result.success, true); + assert.equal(result.anchored, true); + assert.ok(createPayload, "/comments/create must have been called"); + assert.equal( + createPayload.selection, + "“hello”", + "the stored selection must be the doc's RAW typographic substring, not the ASCII input", + ); + assert.equal(createPayload.suggestedText, "goodbye"); +}); diff --git a/packages/mcp/test/unit/comment-anchor.test.mjs b/packages/mcp/test/unit/comment-anchor.test.mjs index c35d853b..87e61def 100644 --- a/packages/mcp/test/unit/comment-anchor.test.mjs +++ b/packages/mcp/test/unit/comment-anchor.test.mjs @@ -7,6 +7,7 @@ import { canAnchorInDoc, applyAnchorInDoc, countAnchorMatches, + getAnchoredText, } from "../../build/lib/comment-anchor.js"; const COMMENT_ID = "cmt-123"; @@ -274,3 +275,36 @@ test("countAnchorMatches applies the same normalization as anchoring", () => { const doc = paragraphDoc([{ type: "text", text: "say “hi” now" }]); assert.equal(countAnchorMatches(doc, '"hi"'), 1); }); + +// ----------------------------------------------------------------------------- +// getAnchoredText: returns the RAW document substring the mark would cover (the +// doc's original typographic characters), not the normalized ASCII selection. +// This is what makes a suggestion's stored selection equal the apply-time +// expectedText, so the strict equality in replaceYjsMarkedText holds. +// ----------------------------------------------------------------------------- +test("getAnchoredText returns the RAW (typographic) doc substring for an ASCII selection", () => { + // Doc holds smart quotes; agent selection is the ASCII form. + const doc = paragraphDoc([{ type: "text", text: "he said “hello” loudly" }]); + assert.equal(getAnchoredText(doc, '"hello"'), "“hello”"); +}); + +test("getAnchoredText undoes whitespace/dash normalization to the raw span", () => { + // Em-dash + nbsp in the doc; ASCII hyphen + single space in the selection. + const doc = paragraphDoc([{ type: "text", text: "a—b c" }]); + // selection "a-b c" (ascii dash) matches, raw substring keeps the em-dash+nbsp. + assert.equal(getAnchoredText(doc, "a-b c"), "a—b c"); +}); + +test("getAnchoredText spans consecutive text nodes and returns their raw slices", () => { + const doc = paragraphDoc([ + { type: "text", text: "Hello " }, + { type: "text", text: "“brave”", marks: [{ type: "bold" }] }, + { type: "text", text: " world" }, + ]); + assert.equal(getAnchoredText(doc, '"brave" wor'), "“brave” wor"); +}); + +test("getAnchoredText returns null when the selection does not anchor", () => { + const doc = paragraphDoc([{ type: "text", text: "hello world" }]); + assert.equal(getAnchoredText(doc, "not present"), null); +});