fix(comment): store the real anchored substring as expectedText + pin authz (#318 F1/F2)

F1 [blocking]: a suggestion whose anchor matched via normalization could never be applied (spurious 409). The comment mark lands on the doc's ACTUAL text (Docmost auto-converts to typographic quotes/dashes/nbsp), but the stored selection — used as expectedText at apply — was the raw ASCII agent input (+substring(0,250)). So replaceYjsMarkedText's strict joined!==expectedText always failed and threw "text changed" though nobody edited. Fix: new pure getAnchoredText(doc, selection) reconstructs the exact raw doc substring the mark covers (slicing identical to spliceCommentMark); on the suggestion path client.createComment stores THAT as selection, so expectedText equals the marked text and apply returns applied:true. Live anchoring still uses the raw agent selection (normalization still finds the anchor). Truncation raised 250->2000 (+ DTO @MaxLength(2000)) so the anchored substring is never cut below the mark span. Ordinary comments unchanged. AI-chat shares client.createComment, so covered. Regression tests: getAnchoredText raw-vs-ASCII; create payload selection is the typographic substring; apply with typographic expectedText -> applied. F2 [blocking]: added comment.controller.spec.ts pinning that validateCanEdit runs before applySuggestion (Forbidden -> applySuggestion never called; happy path -> called; missing comment -> 404 without authorizing). MCP 448 pass; server comment+yjs 54 pass. MCP build/ rebuilt. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-03 20:29:42 +03:00
parent cd539558ed
commit 48c1ec46f7
10 changed files with 457 additions and 4 deletions
@@ -379,6 +379,32 @@ describe('replaceYjsMarkedText', () => {
    expect(text.toDelta()).toEqual(before);
  });

+  // F1 regression: the marked doc text is TYPOGRAPHIC (smart quotes / em-dash)
+  // and expectedText equals that raw typographic text — as it now does, because
+  // the MCP client stores the RAW anchored substring (getAnchoredText) rather
+  // than the agent's ASCII input. The strict `joinedText !== expectedText`
+  // compare must therefore MATCH and the suggestion apply (not a spurious 409).
+  it('typographic marked text applies when expectedText is the raw typographic text', () => {
+    const marked = '“hello”—world';
+    const { fragment, text } = buildRuns([
+      { text: 'say ' },
+      { text: marked, comment: { commentId: 'c1', resolved: false } },
+      { text: '!' },
+    ]);
+
+    const result = replaceYjsMarkedText(fragment, 'c1', marked, 'bye');
+
+    expect(result).toEqual({ applied: true, currentText: 'bye' });
+    expect(text.toDelta()).toEqual([
+      { insert: 'say ' },
+      {
+        insert: 'bye',
+        attributes: { comment: { commentId: 'c1', resolved: false } },
+      },
+      { insert: '!' },
+    ]);
+  });
+
  it('anchor deleted: no mark with that commentId → { applied: false, currentText: null }', () => {
    const { fragment, text } = buildWithComments([
      { text: 'abc', comment: { commentId: 'c1', resolved: false } },
@@ -0,0 +1,119 @@
+import {
+  ForbiddenException,
+  NotFoundException,
+} from '@nestjs/common';
+import { CommentController } from './comment.controller';
+
+/**
+ * Authz-gate tests for the apply-suggestion route. Applying a suggestion
+ * rewrites the page text, so the route MUST call
+ * pageAccessService.validateCanEdit BEFORE handing off to
+ * commentService.applySuggestion (which performs the document mutation + stamp).
+ * That ordering is a security boundary: an unauthorized user must never reach
+ * the mutation. These tests pin it against a fully mocked controller so any
+ * regression that drops the gate (or reorders it after the mutation) fails here.
+ */
+describe('CommentController apply-suggestion authz', () => {
+  function makeController() {
+    const commentService = {
+      applySuggestion: jest.fn(async () => ({ id: 'c-1', applied: true })),
+    };
+    const commentRepo = { findById: jest.fn() };
+    const pageRepo = { findById: jest.fn() };
+    const spaceAbility = {} as any;
+    const pageAccessService = {
+      validateCanEdit: jest.fn(async () => undefined),
+    };
+    const wsService = {} as any;
+    const auditService = { log: jest.fn() };
+
+    const controller = new CommentController(
+      commentService as any,
+      commentRepo as any,
+      pageRepo as any,
+      spaceAbility,
+      pageAccessService as any,
+      wsService,
+      auditService as any,
+    );
+    return {
+      controller,
+      commentService,
+      commentRepo,
+      pageRepo,
+      pageAccessService,
+    };
+  }
+
+  const user: any = { id: 'u-1' };
+  const workspace: any = { id: 'ws-1' };
+  const provenance: any = undefined;
+  const dto: any = { commentId: 'c-1' };
+
+  const comment = {
+    id: 'c-1',
+    pageId: 'p-1',
+    spaceId: 'sp-1',
+    suggestedText: 'new text',
+    selection: 'old text',
+  };
+  const page = { id: 'p-1', spaceId: 'sp-1', deletedAt: null };
+
+  it('validateCanEdit throwing Forbidden rejects AND applySuggestion is never called', async () => {
+    const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
+      makeController();
+    commentRepo.findById.mockResolvedValue(comment);
+    pageRepo.findById.mockResolvedValue(page);
+    pageAccessService.validateCanEdit.mockRejectedValue(
+      new ForbiddenException('no edit access'),
+    );
+
+    await expect(
+      controller.applySuggestion(dto, user, workspace, provenance),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+
+    // The security boundary: the mutation/stamp must NOT run for an
+    // unauthorized user.
+    expect(pageAccessService.validateCanEdit).toHaveBeenCalledWith(page, user);
+    expect(commentService.applySuggestion).not.toHaveBeenCalled();
+  });
+
+  it('happy path: validateCanEdit resolves → applySuggestion is called and its result returned', async () => {
+    const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
+      makeController();
+    commentRepo.findById.mockResolvedValue(comment);
+    pageRepo.findById.mockResolvedValue(page);
+    const applied = { id: 'c-1', applied: true };
+    commentService.applySuggestion.mockResolvedValue(applied);
+
+    const result = await controller.applySuggestion(
+      dto,
+      user,
+      workspace,
+      provenance,
+    );
+
+    // Authorization ran before the mutation, then the service was invoked.
+    expect(pageAccessService.validateCanEdit).toHaveBeenCalledWith(page, user);
+    expect(commentService.applySuggestion).toHaveBeenCalledWith(
+      comment,
+      user,
+      provenance,
+    );
+    expect(result).toBe(applied);
+  });
+
+  it('missing comment: NotFound is thrown without authorizing or applying', async () => {
+    const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
+      makeController();
+    commentRepo.findById.mockResolvedValue(null);
+
+    await expect(
+      controller.applySuggestion(dto, user, workspace, provenance),
+    ).rejects.toBeInstanceOf(NotFoundException);
+
+    expect(pageRepo.findById).not.toHaveBeenCalled();
+    expect(pageAccessService.validateCanEdit).not.toHaveBeenCalled();
+    expect(commentService.applySuggestion).not.toHaveBeenCalled();
+  });
+});
@@ -87,7 +87,15 @@ export class CommentService {
      }
    }

-    const selection = createCommentDto?.selection?.substring(0, 250) ?? null;
+    // Do NOT lossily truncate at 250: for a suggestion the client sends the RAW
+    // anchored document substring (the exact text under the comment mark) as the
+    // selection, which can be LONGER than the agent's <=250-char typed input
+    // (normalization collapses whitespace/typographic runs, so the raw span can
+    // exceed the normalized selection). Truncating it shorter than the mark span
+    // would break the apply-time equality check and make the suggestion
+    // un-appliable. Keep a generous 2000-char safety bound (matching
+    // suggestedText) so a legitimate anchored substring is never cut.
+    const selection = createCommentDto?.selection?.substring(0, 2000) ?? null;

    // A suggested edit rewrites the exact text under an inline comment mark, so
    // it is only meaningful on a top-level inline comment that carries a
@@ -33,8 +33,15 @@ export class CreateCommentDto {
  @IsJSON()
  content: any;

+  // The agent tool caps what it TYPES at 250 chars, but for a suggestion the
+  // client resolves and sends the RAW anchored document substring (the exact
+  // text under the mark), which can be longer once normalization is undone. Bound
+  // the stored value at 2000 (matching suggestedText) so a legitimate anchored
+  // substring is never rejected — the service used to lossily truncate at 250,
+  // which broke the apply-time equality check.
  @IsOptional()
  @IsString()
+  @MaxLength(2000)
  selection: string;

  @IsOptional()
@@ -17,7 +17,7 @@ import { withPageLock } from "./lib/page-lock.js";
 import { applyTextEdits, } from "./lib/json-edit.js";
 import { getCollabToken, performLogin } from "./lib/auth-utils.js";
 import { diffDocs, summarizeChange } from "./lib/diff.js";
-import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, } from "./lib/comment-anchor.js";
+import { applyAnchorInDoc, canAnchorInDoc, countAnchorMatches, getAnchoredText, } from "./lib/comment-anchor.js";
 import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, canonicalizeFootnotes, insertInlineFootnote, } from "./lib/transforms.js";
 import vm from "node:vm";
 // Supported image types, kept as two lookup tables so both a local file
@@ -1936,6 +1936,16 @@ export class DocmostClient {
        if (!isReply && (!selection || !selection.trim())) {
            throw new Error("create_comment: an inline 'selection' (exact text to anchor on) is required for a top-level comment");
        }
+        // For a SUGGESTION, the value we store as the comment's `selection` must be
+        // the RAW document substring the mark lands on (typographic quotes/dashes,
+        // nbsp, collapsed whitespace), NOT the agent's ASCII input. The anchor is
+        // placed via normalization, so when the doc was auto-converted to
+        // typographic the raw substring differs from the agent input; apply-time
+        // compares the stored selection to the marked doc text STRICTLY, so storing
+        // the raw substring is what makes "Apply" succeed instead of a spurious 409.
+        // Captured in the pre-check below (which already reads the page) and used as
+        // payload.selection. Ordinary comments keep sending the raw agent selection.
+        let anchoredSelection = null;
        // For a top-level comment, fail BEFORE creating anything when the selection
        // is not present in the persisted document — this avoids leaving an orphan
        // comment + notification behind. A read failure (network) is non-fatal: the
@@ -1958,6 +1968,11 @@ export class DocmostClient {
                            "A suggested edit must anchor to a UNIQUE location; expand the selection with surrounding context " +
                            "(still <=250 chars) so it appears exactly once.");
                    }
+                    // Exactly one match: capture the RAW anchored substring to store as the
+                    // comment selection (so apply-time equality holds). If this returns
+                    // null despite countAnchorMatches===1 (shouldn't happen), fall back to
+                    // the raw agent selection below rather than crash.
+                    anchoredSelection = getAnchoredText(page.content, selection);
                }
                else if (!canAnchorInDoc(page.content, selection)) {
                    throw new Error("create_comment: could not find the selection text in the page to anchor the comment. " +
@@ -1987,8 +2002,13 @@ export class DocmostClient {
            content: JSON.stringify(jsonContent),
            type: effectiveType,
        };
+        // For a suggestion, store the RAW anchored substring (anchoredSelection) so
+        // the stored selection === the text under the mark === apply-time
+        // expectedText. Ordinary comments (and the null fallback) keep the raw
+        // agent selection — their selection is only display/anchor and never used
+        // by apply, so their behavior is unchanged.
        if (!isReply && selection)
-            payload.selection = selection;
+            payload.selection = anchoredSelection ?? selection;
        if (parentCommentId)
            payload.parentCommentId = parentCommentId;
        // Only a top-level inline comment (with a selection) may carry a suggestion.
@@ -148,6 +148,67 @@ export function findAnchorInBlock(blockContent, selection) {
    }
    return null;
 }
+/**
+ * Reconstruct the RAW text spanned by an AnchorMatch inside one block's
+ * `content` array. `startChild..endChild` are all text nodes (guaranteed by
+ * findAnchorInBlock, which only builds runs of `text` nodes), so concatenate
+ * each node's text slice: from `startOffset` on the first node, up to
+ * `endOffset` on the last, and the whole `.text` for any node fully inside the
+ * range. Mirrors spliceCommentMark's per-node slicing so the string returned
+ * here is EXACTLY the characters the comment mark will cover.
+ */
+function reconstructRawText(blockContent, match) {
+    const { startChild, startOffset, endChild, endOffset } = match;
+    let out = "";
+    for (let k = startChild; k <= endChild; k++) {
+        const n = blockContent[k];
+        const text = typeof n.text === "string" ? n.text : "";
+        const sliceStart = k === startChild ? startOffset : 0;
+        const sliceEnd = k === endChild ? endOffset : text.length;
+        out += text.slice(sliceStart, sliceEnd);
+    }
+    return out;
+}
+/**
+ * Return the RAW document substring that `selection` would anchor to — the exact
+ * characters the comment mark will cover — or `null` when the selection cannot
+ * be anchored anywhere in `doc`.
+ *
+ * This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first,
+ * document-order traversal and the same findAnchorInBlock match on the FIRST
+ * matching block), but instead of a boolean / an in-place mutation it
+ * reconstructs the raw text spanned by the matched range. Because
+ * findAnchorInBlock maps the normalized selection back to raw text-node
+ * positions, the returned string is the document's ORIGINAL characters (smart
+ * quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII
+ * agent input.
+ *
+ * Callers store THIS as the comment's `selection` so the stored value equals the
+ * text actually under the mark, which is what the apply-suggestion equality
+ * check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against.
+ * Without it a suggestion whose anchor only matched via normalization would be
+ * un-appliable (spurious 409).
+ */
+export function getAnchoredText(doc, selection) {
+    const visit = (node, depth) => {
+        if (depth > MAX_DEPTH || !node || typeof node !== "object")
+            return null;
+        if (!Array.isArray(node.content))
+            return null;
+        const match = findAnchorInBlock(node.content, selection);
+        if (match)
+            return reconstructRawText(node.content, match);
+        for (const child of node.content) {
+            if (child && typeof child === "object" && Array.isArray(child.content)) {
+                const found = visit(child, depth + 1);
+                if (found !== null)
+                    return found;
+            }
+        }
+        return null;
+    };
+    return visit(doc, 0);
+}
 /**
 * Depth-first, document-order check for whether `selection` can be anchored
 * anywhere in `doc`. At each node with an array `content`, first try to match
@@ -60,6 +60,7 @@ import {
  applyAnchorInDoc,
  canAnchorInDoc,
  countAnchorMatches,
+  getAnchoredText,
 } from "./lib/comment-anchor.js";
 import {
  blockText,
@@ -2433,6 +2434,17 @@ export class DocmostClient {
      );
    }

+    // For a SUGGESTION, the value we store as the comment's `selection` must be
+    // the RAW document substring the mark lands on (typographic quotes/dashes,
+    // nbsp, collapsed whitespace), NOT the agent's ASCII input. The anchor is
+    // placed via normalization, so when the doc was auto-converted to
+    // typographic the raw substring differs from the agent input; apply-time
+    // compares the stored selection to the marked doc text STRICTLY, so storing
+    // the raw substring is what makes "Apply" succeed instead of a spurious 409.
+    // Captured in the pre-check below (which already reads the page) and used as
+    // payload.selection. Ordinary comments keep sending the raw agent selection.
+    let anchoredSelection: string | null = null;
+
    // For a top-level comment, fail BEFORE creating anything when the selection
    // is not present in the persisted document — this avoids leaving an orphan
    // comment + notification behind. A read failure (network) is non-fatal: the
@@ -2459,6 +2471,11 @@ export class DocmostClient {
                "(still <=250 chars) so it appears exactly once.",
            );
          }
+          // Exactly one match: capture the RAW anchored substring to store as the
+          // comment selection (so apply-time equality holds). If this returns
+          // null despite countAnchorMatches===1 (shouldn't happen), fall back to
+          // the raw agent selection below rather than crash.
+          anchoredSelection = getAnchoredText(page.content, selection);
        } else if (!canAnchorInDoc(page.content, selection)) {
          throw new Error(
            "create_comment: could not find the selection text in the page to anchor the comment. " +
@@ -2496,7 +2513,13 @@ export class DocmostClient {
      content: JSON.stringify(jsonContent),
      type: effectiveType,
    };
-    if (!isReply && selection) payload.selection = selection;
+    // For a suggestion, store the RAW anchored substring (anchoredSelection) so
+    // the stored selection === the text under the mark === apply-time
+    // expectedText. Ordinary comments (and the null fallback) keep the raw
+    // agent selection — their selection is only display/anchor and never used
+    // by apply, so their behavior is unchanged.
+    if (!isReply && selection)
+      payload.selection = anchoredSelection ?? selection;
    if (parentCommentId) payload.parentCommentId = parentCommentId;
    // Only a top-level inline comment (with a selection) may carry a suggestion.
    if (!isReply && selection && hasSuggestion) {
@@ -171,6 +171,65 @@ export function findAnchorInBlock(
  return null;
 }

+/**
+ * Reconstruct the RAW text spanned by an AnchorMatch inside one block's
+ * `content` array. `startChild..endChild` are all text nodes (guaranteed by
+ * findAnchorInBlock, which only builds runs of `text` nodes), so concatenate
+ * each node's text slice: from `startOffset` on the first node, up to
+ * `endOffset` on the last, and the whole `.text` for any node fully inside the
+ * range. Mirrors spliceCommentMark's per-node slicing so the string returned
+ * here is EXACTLY the characters the comment mark will cover.
+ */
+function reconstructRawText(blockContent: any[], match: AnchorMatch): string {
+  const { startChild, startOffset, endChild, endOffset } = match;
+  let out = "";
+  for (let k = startChild; k <= endChild; k++) {
+    const n = blockContent[k];
+    const text: string = typeof n.text === "string" ? n.text : "";
+    const sliceStart = k === startChild ? startOffset : 0;
+    const sliceEnd = k === endChild ? endOffset : text.length;
+    out += text.slice(sliceStart, sliceEnd);
+  }
+  return out;
+}
+
+/**
+ * Return the RAW document substring that `selection` would anchor to — the exact
+ * characters the comment mark will cover — or `null` when the selection cannot
+ * be anchored anywhere in `doc`.
+ *
+ * This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first,
+ * document-order traversal and the same findAnchorInBlock match on the FIRST
+ * matching block), but instead of a boolean / an in-place mutation it
+ * reconstructs the raw text spanned by the matched range. Because
+ * findAnchorInBlock maps the normalized selection back to raw text-node
+ * positions, the returned string is the document's ORIGINAL characters (smart
+ * quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII
+ * agent input.
+ *
+ * Callers store THIS as the comment's `selection` so the stored value equals the
+ * text actually under the mark, which is what the apply-suggestion equality
+ * check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against.
+ * Without it a suggestion whose anchor only matched via normalization would be
+ * un-appliable (spurious 409).
+ */
+export function getAnchoredText(doc: any, selection: string): string | null {
+  const visit = (node: any, depth: number): string | null => {
+    if (depth > MAX_DEPTH || !node || typeof node !== "object") return null;
+    if (!Array.isArray(node.content)) return null;
+    const match = findAnchorInBlock(node.content, selection);
+    if (match) return reconstructRawText(node.content, match);
+    for (const child of node.content) {
+      if (child && typeof child === "object" && Array.isArray(child.content)) {
+        const found = visit(child, depth + 1);
+        if (found !== null) return found;
+      }
+    }
+    return null;
+  };
+  return visit(doc, 0);
+}
+
 /**
 * Depth-first, document-order check for whether `selection` can be anchored
 * anywhere in `doc`. At each node with an array `content`, first try to match
@@ -452,3 +452,99 @@ test("suggestedText with a unique selection succeeds and forwards the payload",
  assert.equal(createPayload.selection, "brave");
  assert.equal(result.data.suggestedText, "bold", "filterComment surfaces suggestedText");
 });
+
+// -----------------------------------------------------------------------------
+// 8) suggestedText where the DOC has TYPOGRAPHIC text and the agent selection is
+//    ASCII: the stored selection sent to /comments/create MUST be the doc's RAW
+//    typographic substring (what the mark covers), NOT the agent's ASCII input.
+//    This is the F1 contract that makes "Apply" succeed instead of a spurious
+//    409 (apply compares the stored selection to the marked doc text strictly).
+// -----------------------------------------------------------------------------
+test("suggestedText: the stored selection is the doc's RAW typographic substring, not the ASCII input", async () => {
+  let createPayload = null;
+
+  const { baseURL } = await spawn(async (req, res) => {
+    const raw = await readBody(req);
+    if (req.url === "/api/auth/login") {
+      sendJson(res, 200, { success: true }, {
+        "Set-Cookie": "authToken=t; Path=/; HttpOnly",
+      });
+      return;
+    }
+    if (req.url === "/api/pages/info") {
+      // The doc holds SMART quotes; the agent will select the ASCII form.
+      sendJson(res, 200, {
+        data: {
+          id: "22222222-2222-2222-2222-222222222222",
+          content: {
+            type: "doc",
+            content: [
+              {
+                type: "paragraph",
+                content: [{ type: "text", text: "he said “hello” loudly" }],
+              },
+            ],
+          },
+        },
+      });
+      return;
+    }
+    if (req.url === "/api/comments/create") {
+      createPayload = JSON.parse(raw);
+      sendJson(res, 200, {
+        data: {
+          id: "cmt-typo-1",
+          content: createPayload.content,
+          selection: createPayload.selection,
+          suggestedText: createPayload.suggestedText,
+          type: createPayload.type,
+        },
+      });
+      return;
+    }
+    sendJson(res, 404, { message: "not found" });
+  });
+
+  class TestClient extends DocmostClient {
+    async getCollabTokenWithReauth() {
+      return "collab-token";
+    }
+    async resolvePageId() {
+      return "22222222-2222-2222-2222-222222222222";
+    }
+    async mutatePage(pageId, collabToken, apiUrl, transform) {
+      const doc = {
+        type: "doc",
+        content: [
+          {
+            type: "paragraph",
+            content: [{ type: "text", text: "he said “hello” loudly" }],
+          },
+        ],
+      };
+      const out = transform(doc);
+      return { doc: out, verify: { ok: true } };
+    }
+  }
+
+  const client = new TestClient(baseURL, "user@example.com", "pw");
+
+  const result = await client.createComment(
+    "22222222-2222-2222-2222-222222222222",
+    "please change",
+    "inline",
+    '"hello"', // ASCII quotes — the doc has smart quotes
+    undefined,
+    "goodbye",
+  );
+
+  assert.equal(result.success, true);
+  assert.equal(result.anchored, true);
+  assert.ok(createPayload, "/comments/create must have been called");
+  assert.equal(
+    createPayload.selection,
+    "“hello”",
+    "the stored selection must be the doc's RAW typographic substring, not the ASCII input",
+  );
+  assert.equal(createPayload.suggestedText, "goodbye");
+});
@@ -7,6 +7,7 @@ import {
  canAnchorInDoc,
  applyAnchorInDoc,
  countAnchorMatches,
+  getAnchoredText,
 } from "../../build/lib/comment-anchor.js";

 const COMMENT_ID = "cmt-123";
@@ -274,3 +275,36 @@ test("countAnchorMatches applies the same normalization as anchoring", () => {
  const doc = paragraphDoc([{ type: "text", text: "say “hi” now" }]);
  assert.equal(countAnchorMatches(doc, '"hi"'), 1);
 });
+
+// -----------------------------------------------------------------------------
+// getAnchoredText: returns the RAW document substring the mark would cover (the
+// doc's original typographic characters), not the normalized ASCII selection.
+// This is what makes a suggestion's stored selection equal the apply-time
+// expectedText, so the strict equality in replaceYjsMarkedText holds.
+// -----------------------------------------------------------------------------
+test("getAnchoredText returns the RAW (typographic) doc substring for an ASCII selection", () => {
+  // Doc holds smart quotes; agent selection is the ASCII form.
+  const doc = paragraphDoc([{ type: "text", text: "he said “hello” loudly" }]);
+  assert.equal(getAnchoredText(doc, '"hello"'), "“hello”");
+});
+
+test("getAnchoredText undoes whitespace/dash normalization to the raw span", () => {
+  // Em-dash + nbsp in the doc; ASCII hyphen + single space in the selection.
+  const doc = paragraphDoc([{ type: "text", text: "a—b c" }]);
+  // selection "a-b c" (ascii dash) matches, raw substring keeps the em-dash+nbsp.
+  assert.equal(getAnchoredText(doc, "a-b c"), "a—b c");
+});
+
+test("getAnchoredText spans consecutive text nodes and returns their raw slices", () => {
+  const doc = paragraphDoc([
+    { type: "text", text: "Hello " },
+    { type: "text", text: "“brave”", marks: [{ type: "bold" }] },
+    { type: "text", text: " world" },
+  ]);
+  assert.equal(getAnchoredText(doc, '"brave" wor'), "“brave” wor");
+});
+
+test("getAnchoredText returns null when the selection does not anchor", () => {
+  const doc = paragraphDoc([{ type: "text", text: "hello world" }]);
+  assert.equal(getAnchoredText(doc, "not present"), null);
+});