fix(comment): store the real anchored substring as expectedText + pin authz (#318 F1/F2)

F1 [blocking]: a suggestion whose anchor matched via normalization could never
be applied (spurious 409). The comment mark lands on the doc's ACTUAL text
(Docmost auto-converts to typographic quotes/dashes/nbsp), but the stored
selection — used as expectedText at apply — was the raw ASCII agent input
(+substring(0,250)). So replaceYjsMarkedText's strict joined!==expectedText
always failed and threw "text changed" though nobody edited. Fix: new pure
getAnchoredText(doc, selection) reconstructs the exact raw doc substring the mark
covers (slicing identical to spliceCommentMark); on the suggestion path
client.createComment stores THAT as selection, so expectedText equals the marked
text and apply returns applied:true. Live anchoring still uses the raw agent
selection (normalization still finds the anchor). Truncation raised 250->2000
(+ DTO @MaxLength(2000)) so the anchored substring is never cut below the mark
span. Ordinary comments unchanged. AI-chat shares client.createComment, so
covered. Regression tests: getAnchoredText raw-vs-ASCII; create payload selection
is the typographic substring; apply with typographic expectedText -> applied.

F2 [blocking]: added comment.controller.spec.ts pinning that validateCanEdit runs
before applySuggestion (Forbidden -> applySuggestion never called; happy path ->
called; missing comment -> 404 without authorizing).

MCP 448 pass; server comment+yjs 54 pass. MCP build/ rebuilt.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-07-03 20:29:42 +03:00
parent cd539558ed
commit 48c1ec46f7
10 changed files with 457 additions and 4 deletions
@@ -379,6 +379,32 @@ describe('replaceYjsMarkedText', () => {
expect(text.toDelta()).toEqual(before);
});
// F1 regression: the marked doc text is TYPOGRAPHIC (smart quotes / em-dash)
// and expectedText equals that raw typographic text — as it now does, because
// the MCP client stores the RAW anchored substring (getAnchoredText) rather
// than the agent's ASCII input. The strict `joinedText !== expectedText`
// compare must therefore MATCH and the suggestion apply (not a spurious 409).
it('typographic marked text applies when expectedText is the raw typographic text', () => {
const marked = '“hello”—world';
const { fragment, text } = buildRuns([
{ text: 'say ' },
{ text: marked, comment: { commentId: 'c1', resolved: false } },
{ text: '!' },
]);
const result = replaceYjsMarkedText(fragment, 'c1', marked, 'bye');
expect(result).toEqual({ applied: true, currentText: 'bye' });
expect(text.toDelta()).toEqual([
{ insert: 'say ' },
{
insert: 'bye',
attributes: { comment: { commentId: 'c1', resolved: false } },
},
{ insert: '!' },
]);
});
it('anchor deleted: no mark with that commentId → { applied: false, currentText: null }', () => {
const { fragment, text } = buildWithComments([
{ text: 'abc', comment: { commentId: 'c1', resolved: false } },
@@ -0,0 +1,119 @@
import {
ForbiddenException,
NotFoundException,
} from '@nestjs/common';
import { CommentController } from './comment.controller';
/**
* Authz-gate tests for the apply-suggestion route. Applying a suggestion
* rewrites the page text, so the route MUST call
* pageAccessService.validateCanEdit BEFORE handing off to
* commentService.applySuggestion (which performs the document mutation + stamp).
* That ordering is a security boundary: an unauthorized user must never reach
* the mutation. These tests pin it against a fully mocked controller so any
* regression that drops the gate (or reorders it after the mutation) fails here.
*/
describe('CommentController apply-suggestion authz', () => {
function makeController() {
const commentService = {
applySuggestion: jest.fn(async () => ({ id: 'c-1', applied: true })),
};
const commentRepo = { findById: jest.fn() };
const pageRepo = { findById: jest.fn() };
const spaceAbility = {} as any;
const pageAccessService = {
validateCanEdit: jest.fn(async () => undefined),
};
const wsService = {} as any;
const auditService = { log: jest.fn() };
const controller = new CommentController(
commentService as any,
commentRepo as any,
pageRepo as any,
spaceAbility,
pageAccessService as any,
wsService,
auditService as any,
);
return {
controller,
commentService,
commentRepo,
pageRepo,
pageAccessService,
};
}
const user: any = { id: 'u-1' };
const workspace: any = { id: 'ws-1' };
const provenance: any = undefined;
const dto: any = { commentId: 'c-1' };
const comment = {
id: 'c-1',
pageId: 'p-1',
spaceId: 'sp-1',
suggestedText: 'new text',
selection: 'old text',
};
const page = { id: 'p-1', spaceId: 'sp-1', deletedAt: null };
it('validateCanEdit throwing Forbidden rejects AND applySuggestion is never called', async () => {
const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
makeController();
commentRepo.findById.mockResolvedValue(comment);
pageRepo.findById.mockResolvedValue(page);
pageAccessService.validateCanEdit.mockRejectedValue(
new ForbiddenException('no edit access'),
);
await expect(
controller.applySuggestion(dto, user, workspace, provenance),
).rejects.toBeInstanceOf(ForbiddenException);
// The security boundary: the mutation/stamp must NOT run for an
// unauthorized user.
expect(pageAccessService.validateCanEdit).toHaveBeenCalledWith(page, user);
expect(commentService.applySuggestion).not.toHaveBeenCalled();
});
it('happy path: validateCanEdit resolves → applySuggestion is called and its result returned', async () => {
const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
makeController();
commentRepo.findById.mockResolvedValue(comment);
pageRepo.findById.mockResolvedValue(page);
const applied = { id: 'c-1', applied: true };
commentService.applySuggestion.mockResolvedValue(applied);
const result = await controller.applySuggestion(
dto,
user,
workspace,
provenance,
);
// Authorization ran before the mutation, then the service was invoked.
expect(pageAccessService.validateCanEdit).toHaveBeenCalledWith(page, user);
expect(commentService.applySuggestion).toHaveBeenCalledWith(
comment,
user,
provenance,
);
expect(result).toBe(applied);
});
it('missing comment: NotFound is thrown without authorizing or applying', async () => {
const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
makeController();
commentRepo.findById.mockResolvedValue(null);
await expect(
controller.applySuggestion(dto, user, workspace, provenance),
).rejects.toBeInstanceOf(NotFoundException);
expect(pageRepo.findById).not.toHaveBeenCalled();
expect(pageAccessService.validateCanEdit).not.toHaveBeenCalled();
expect(commentService.applySuggestion).not.toHaveBeenCalled();
});
});
@@ -87,7 +87,15 @@ export class CommentService {
}
}
const selection = createCommentDto?.selection?.substring(0, 250) ?? null;
// Do NOT lossily truncate at 250: for a suggestion the client sends the RAW
// anchored document substring (the exact text under the comment mark) as the
// selection, which can be LONGER than the agent's <=250-char typed input
// (normalization collapses whitespace/typographic runs, so the raw span can
// exceed the normalized selection). Truncating it shorter than the mark span
// would break the apply-time equality check and make the suggestion
// un-appliable. Keep a generous 2000-char safety bound (matching
// suggestedText) so a legitimate anchored substring is never cut.
const selection = createCommentDto?.selection?.substring(0, 2000) ?? null;
// A suggested edit rewrites the exact text under an inline comment mark, so
// it is only meaningful on a top-level inline comment that carries a
@@ -33,8 +33,15 @@ export class CreateCommentDto {
@IsJSON()
content: any;
// The agent tool caps what it TYPES at 250 chars, but for a suggestion the
// client resolves and sends the RAW anchored document substring (the exact
// text under the mark), which can be longer once normalization is undone. Bound
// the stored value at 2000 (matching suggestedText) so a legitimate anchored
// substring is never rejected — the service used to lossily truncate at 250,
// which broke the apply-time equality check.
@IsOptional()
@IsString()
@MaxLength(2000)
selection: string;
@IsOptional()
+22 -2
View File
@@ -17,7 +17,7 @@ import { withPageLock } from "./lib/page-lock.js";
import { applyTextEdits, } from "./lib/json-edit.js";
import { getCollabToken, performLogin } from "./lib/auth-utils.js";
import { diffDocs, summarizeChange } from "./lib/diff.js";
import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, } from "./lib/comment-anchor.js";
import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, getAnchoredText, } from "./lib/comment-anchor.js";
import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, canonicalizeFootnotes, insertInlineFootnote, } from "./lib/transforms.js";
import vm from "node:vm";
// Supported image types, kept as two lookup tables so both a local file
@@ -1936,6 +1936,16 @@ export class DocmostClient {
if (!isReply && (!selection || !selection.trim())) {
throw new Error("create_comment: an inline 'selection' (exact text to anchor on) is required for a top-level comment");
}
// For a SUGGESTION, the value we store as the comment's `selection` must be
// the RAW document substring the mark lands on (typographic quotes/dashes,
// nbsp, collapsed whitespace), NOT the agent's ASCII input. The anchor is
// placed via normalization, so when the doc was auto-converted to
// typographic the raw substring differs from the agent input; apply-time
// compares the stored selection to the marked doc text STRICTLY, so storing
// the raw substring is what makes "Apply" succeed instead of a spurious 409.
// Captured in the pre-check below (which already reads the page) and used as
// payload.selection. Ordinary comments keep sending the raw agent selection.
let anchoredSelection = null;
// For a top-level comment, fail BEFORE creating anything when the selection
// is not present in the persisted document — this avoids leaving an orphan
// comment + notification behind. A read failure (network) is non-fatal: the
@@ -1958,6 +1968,11 @@ export class DocmostClient {
"A suggested edit must anchor to a UNIQUE location; expand the selection with surrounding context " +
"(still <=250 chars) so it appears exactly once.");
}
// Exactly one match: capture the RAW anchored substring to store as the
// comment selection (so apply-time equality holds). If this returns
// null despite countAnchorMatches===1 (shouldn't happen), fall back to
// the raw agent selection below rather than crash.
anchoredSelection = getAnchoredText(page.content, selection);
}
else if (!canAnchorInDoc(page.content, selection)) {
throw new Error("create_comment: could not find the selection text in the page to anchor the comment. " +
@@ -1987,8 +2002,13 @@ export class DocmostClient {
content: JSON.stringify(jsonContent),
type: effectiveType,
};
// For a suggestion, store the RAW anchored substring (anchoredSelection) so
// the stored selection === the text under the mark === apply-time
// expectedText. Ordinary comments (and the null fallback) keep the raw
// agent selection — their selection is only display/anchor and never used
// by apply, so their behavior is unchanged.
if (!isReply && selection)
payload.selection = selection;
payload.selection = anchoredSelection ?? selection;
if (parentCommentId)
payload.parentCommentId = parentCommentId;
// Only a top-level inline comment (with a selection) may carry a suggestion.
+61
View File
@@ -148,6 +148,67 @@ export function findAnchorInBlock(blockContent, selection) {
}
return null;
}
/**
* Reconstruct the RAW text spanned by an AnchorMatch inside one block's
* `content` array. `startChild..endChild` are all text nodes (guaranteed by
* findAnchorInBlock, which only builds runs of `text` nodes), so concatenate
* each node's text slice: from `startOffset` on the first node, up to
* `endOffset` on the last, and the whole `.text` for any node fully inside the
* range. Mirrors spliceCommentMark's per-node slicing so the string returned
* here is EXACTLY the characters the comment mark will cover.
*/
function reconstructRawText(blockContent, match) {
const { startChild, startOffset, endChild, endOffset } = match;
let out = "";
for (let k = startChild; k <= endChild; k++) {
const n = blockContent[k];
const text = typeof n.text === "string" ? n.text : "";
const sliceStart = k === startChild ? startOffset : 0;
const sliceEnd = k === endChild ? endOffset : text.length;
out += text.slice(sliceStart, sliceEnd);
}
return out;
}
/**
* Return the RAW document substring that `selection` would anchor to — the exact
* characters the comment mark will cover — or `null` when the selection cannot
* be anchored anywhere in `doc`.
*
* This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first,
* document-order traversal and the same findAnchorInBlock match on the FIRST
* matching block), but instead of a boolean / an in-place mutation it
* reconstructs the raw text spanned by the matched range. Because
* findAnchorInBlock maps the normalized selection back to raw text-node
* positions, the returned string is the document's ORIGINAL characters (smart
* quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII
* agent input.
*
* Callers store THIS as the comment's `selection` so the stored value equals the
* text actually under the mark, which is what the apply-suggestion equality
* check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against.
* Without it a suggestion whose anchor only matched via normalization would be
* un-appliable (spurious 409).
*/
export function getAnchoredText(doc, selection) {
const visit = (node, depth) => {
if (depth > MAX_DEPTH || !node || typeof node !== "object")
return null;
if (!Array.isArray(node.content))
return null;
const match = findAnchorInBlock(node.content, selection);
if (match)
return reconstructRawText(node.content, match);
for (const child of node.content) {
if (child && typeof child === "object" && Array.isArray(child.content)) {
const found = visit(child, depth + 1);
if (found !== null)
return found;
}
}
return null;
};
return visit(doc, 0);
}
/**
* Depth-first, document-order check for whether `selection` can be anchored
* anywhere in `doc`. At each node with an array `content`, first try to match
+24 -1
View File
@@ -60,6 +60,7 @@ import {
applyAnchorInDoc,
canAnchorInDoc,
countAnchorMatches,
getAnchoredText,
} from "./lib/comment-anchor.js";
import {
blockText,
@@ -2433,6 +2434,17 @@ export class DocmostClient {
);
}
// For a SUGGESTION, the value we store as the comment's `selection` must be
// the RAW document substring the mark lands on (typographic quotes/dashes,
// nbsp, collapsed whitespace), NOT the agent's ASCII input. The anchor is
// placed via normalization, so when the doc was auto-converted to
// typographic the raw substring differs from the agent input; apply-time
// compares the stored selection to the marked doc text STRICTLY, so storing
// the raw substring is what makes "Apply" succeed instead of a spurious 409.
// Captured in the pre-check below (which already reads the page) and used as
// payload.selection. Ordinary comments keep sending the raw agent selection.
let anchoredSelection: string | null = null;
// For a top-level comment, fail BEFORE creating anything when the selection
// is not present in the persisted document — this avoids leaving an orphan
// comment + notification behind. A read failure (network) is non-fatal: the
@@ -2459,6 +2471,11 @@ export class DocmostClient {
"(still <=250 chars) so it appears exactly once.",
);
}
// Exactly one match: capture the RAW anchored substring to store as the
// comment selection (so apply-time equality holds). If this returns
// null despite countAnchorMatches===1 (shouldn't happen), fall back to
// the raw agent selection below rather than crash.
anchoredSelection = getAnchoredText(page.content, selection);
} else if (!canAnchorInDoc(page.content, selection)) {
throw new Error(
"create_comment: could not find the selection text in the page to anchor the comment. " +
@@ -2496,7 +2513,13 @@ export class DocmostClient {
content: JSON.stringify(jsonContent),
type: effectiveType,
};
if (!isReply && selection) payload.selection = selection;
// For a suggestion, store the RAW anchored substring (anchoredSelection) so
// the stored selection === the text under the mark === apply-time
// expectedText. Ordinary comments (and the null fallback) keep the raw
// agent selection — their selection is only display/anchor and never used
// by apply, so their behavior is unchanged.
if (!isReply && selection)
payload.selection = anchoredSelection ?? selection;
if (parentCommentId) payload.parentCommentId = parentCommentId;
// Only a top-level inline comment (with a selection) may carry a suggestion.
if (!isReply && selection && hasSuggestion) {
+59
View File
@@ -171,6 +171,65 @@ export function findAnchorInBlock(
return null;
}
/**
* Reconstruct the RAW text spanned by an AnchorMatch inside one block's
* `content` array. `startChild..endChild` are all text nodes (guaranteed by
* findAnchorInBlock, which only builds runs of `text` nodes), so concatenate
* each node's text slice: from `startOffset` on the first node, up to
* `endOffset` on the last, and the whole `.text` for any node fully inside the
* range. Mirrors spliceCommentMark's per-node slicing so the string returned
* here is EXACTLY the characters the comment mark will cover.
*/
function reconstructRawText(blockContent: any[], match: AnchorMatch): string {
const { startChild, startOffset, endChild, endOffset } = match;
let out = "";
for (let k = startChild; k <= endChild; k++) {
const n = blockContent[k];
const text: string = typeof n.text === "string" ? n.text : "";
const sliceStart = k === startChild ? startOffset : 0;
const sliceEnd = k === endChild ? endOffset : text.length;
out += text.slice(sliceStart, sliceEnd);
}
return out;
}
/**
* Return the RAW document substring that `selection` would anchor to — the exact
* characters the comment mark will cover — or `null` when the selection cannot
* be anchored anywhere in `doc`.
*
* This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first,
* document-order traversal and the same findAnchorInBlock match on the FIRST
* matching block), but instead of a boolean / an in-place mutation it
* reconstructs the raw text spanned by the matched range. Because
* findAnchorInBlock maps the normalized selection back to raw text-node
* positions, the returned string is the document's ORIGINAL characters (smart
* quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII
* agent input.
*
* Callers store THIS as the comment's `selection` so the stored value equals the
* text actually under the mark, which is what the apply-suggestion equality
* check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against.
* Without it a suggestion whose anchor only matched via normalization would be
* un-appliable (spurious 409).
*/
export function getAnchoredText(doc: any, selection: string): string | null {
const visit = (node: any, depth: number): string | null => {
if (depth > MAX_DEPTH || !node || typeof node !== "object") return null;
if (!Array.isArray(node.content)) return null;
const match = findAnchorInBlock(node.content, selection);
if (match) return reconstructRawText(node.content, match);
for (const child of node.content) {
if (child && typeof child === "object" && Array.isArray(child.content)) {
const found = visit(child, depth + 1);
if (found !== null) return found;
}
}
return null;
};
return visit(doc, 0);
}
/**
* Depth-first, document-order check for whether `selection` can be anchored
* anywhere in `doc`. At each node with an array `content`, first try to match
@@ -452,3 +452,99 @@ test("suggestedText with a unique selection succeeds and forwards the payload",
assert.equal(createPayload.selection, "brave");
assert.equal(result.data.suggestedText, "bold", "filterComment surfaces suggestedText");
});
// -----------------------------------------------------------------------------
// 8) suggestedText where the DOC has TYPOGRAPHIC text and the agent selection is
// ASCII: the stored selection sent to /comments/create MUST be the doc's RAW
// typographic substring (what the mark covers), NOT the agent's ASCII input.
// This is the F1 contract that makes "Apply" succeed instead of a spurious
// 409 (apply compares the stored selection to the marked doc text strictly).
// -----------------------------------------------------------------------------
test("suggestedText: the stored selection is the doc's RAW typographic substring, not the ASCII input", async () => {
let createPayload = null;
const { baseURL } = await spawn(async (req, res) => {
const raw = await readBody(req);
if (req.url === "/api/auth/login") {
sendJson(res, 200, { success: true }, {
"Set-Cookie": "authToken=t; Path=/; HttpOnly",
});
return;
}
if (req.url === "/api/pages/info") {
// The doc holds SMART quotes; the agent will select the ASCII form.
sendJson(res, 200, {
data: {
id: "22222222-2222-2222-2222-222222222222",
content: {
type: "doc",
content: [
{
type: "paragraph",
content: [{ type: "text", text: "he said “hello” loudly" }],
},
],
},
},
});
return;
}
if (req.url === "/api/comments/create") {
createPayload = JSON.parse(raw);
sendJson(res, 200, {
data: {
id: "cmt-typo-1",
content: createPayload.content,
selection: createPayload.selection,
suggestedText: createPayload.suggestedText,
type: createPayload.type,
},
});
return;
}
sendJson(res, 404, { message: "not found" });
});
class TestClient extends DocmostClient {
async getCollabTokenWithReauth() {
return "collab-token";
}
async resolvePageId() {
return "22222222-2222-2222-2222-222222222222";
}
async mutatePage(pageId, collabToken, apiUrl, transform) {
const doc = {
type: "doc",
content: [
{
type: "paragraph",
content: [{ type: "text", text: "he said “hello” loudly" }],
},
],
};
const out = transform(doc);
return { doc: out, verify: { ok: true } };
}
}
const client = new TestClient(baseURL, "user@example.com", "pw");
const result = await client.createComment(
"22222222-2222-2222-2222-222222222222",
"please change",
"inline",
'"hello"', // ASCII quotes — the doc has smart quotes
undefined,
"goodbye",
);
assert.equal(result.success, true);
assert.equal(result.anchored, true);
assert.ok(createPayload, "/comments/create must have been called");
assert.equal(
createPayload.selection,
"“hello”",
"the stored selection must be the doc's RAW typographic substring, not the ASCII input",
);
assert.equal(createPayload.suggestedText, "goodbye");
});
@@ -7,6 +7,7 @@ import {
canAnchorInDoc,
applyAnchorInDoc,
countAnchorMatches,
getAnchoredText,
} from "../../build/lib/comment-anchor.js";
const COMMENT_ID = "cmt-123";
@@ -274,3 +275,36 @@ test("countAnchorMatches applies the same normalization as anchoring", () => {
const doc = paragraphDoc([{ type: "text", text: "say “hi” now" }]);
assert.equal(countAnchorMatches(doc, '"hi"'), 1);
});
// -----------------------------------------------------------------------------
// getAnchoredText: returns the RAW document substring the mark would cover (the
// doc's original typographic characters), not the normalized ASCII selection.
// This is what makes a suggestion's stored selection equal the apply-time
// expectedText, so the strict equality in replaceYjsMarkedText holds.
// -----------------------------------------------------------------------------
test("getAnchoredText returns the RAW (typographic) doc substring for an ASCII selection", () => {
// Doc holds smart quotes; agent selection is the ASCII form.
const doc = paragraphDoc([{ type: "text", text: "he said “hello” loudly" }]);
assert.equal(getAnchoredText(doc, '"hello"'), "“hello”");
});
test("getAnchoredText undoes whitespace/dash normalization to the raw span", () => {
// Em-dash + nbsp in the doc; ASCII hyphen + single space in the selection.
const doc = paragraphDoc([{ type: "text", text: "a—b c" }]);
// selection "a-b c" (ascii dash) matches, raw substring keeps the em-dash+nbsp.
assert.equal(getAnchoredText(doc, "a-b c"), "a—b c");
});
test("getAnchoredText spans consecutive text nodes and returns their raw slices", () => {
const doc = paragraphDoc([
{ type: "text", text: "Hello " },
{ type: "text", text: "“brave”", marks: [{ type: "bold" }] },
{ type: "text", text: " world" },
]);
assert.equal(getAnchoredText(doc, '"brave" wor'), "“brave” wor");
});
test("getAnchoredText returns null when the selection does not anchor", () => {
const doc = paragraphDoc([{ type: "text", text: "hello world" }]);
assert.equal(getAnchoredText(doc, "not present"), null);
});