fix(#345 ): normalize CRLF before front-matter strip (review round 3)

F9 [WARNING] The line-anchored front-matter regex from round 2 requires a bare LF after the opening `---`, so a Windows/CRLF foreign file (`---\r\n...`) slips past the strip and leaks its front-matter into the body (where `title: Foo` renders as a setext heading that title extraction hijacks). The canonical parser whose regex shape this copied (page-file.ts) normalizes CRLF -> LF BEFORE its FRONTMATTER_RE; the import path copied the regex but missed the normalization. normalizeForeignMarkdown now replaces CRLF with LF first (which also makes convertReferenceFootnotes' split('\n') consistent). Adds a CRLF fixture. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
fix(#345 ): replace id-alternation regex with a fixed generic scanner + line-anchor frontmatter (review round 2)
2026-07-05 05:38:07 +03:00 · 2026-07-05 05:18:44 +03:00 · 2026-07-05 04:54:07 +03:00 · 2026-07-05 03:39:01 +03:00 · 2026-07-05 03:27:01 +03:00 · 2026-07-05 03:21:07 +03:00
375 changed files with 44468 additions and 22557 deletions
@@ -202,6 +202,27 @@ MCP_DOCMOST_PASSWORD=
 # Default 900000 (15 min).
 # AI_MCP_CALL_TIMEOUT_MS=900000

+# Deferred tool loading for the in-app AI chat (#332). Default ON: the agent sees
+# a compact <tool_catalog> and only CORE tools + a loadTools meta-tool are active
+# each step; deferred tools (the fat/rare ones + all external MCP tools) load on
+# demand. Set AI_CHAT_DEFERRED_TOOLS=false to restore the old "all tools always
+# active" behavior.
+# AI_CHAT_DEFERRED_TOOLS=true
+
+# --- Autonomous / detached agent runs (settings.ai.autonomousRuns) ---
+# Opt-in per workspace (AI settings; off by default). When on, a chat turn becomes
+# a server-side RUN that survives a browser disconnect — only an explicit Stop ends
+# it, and a client reconnects/live-follows the run.
+#
+# DEPLOY CONSTRAINT — SINGLE-INSTANCE ONLY in phase 1: Stop and the in-process
+# AbortController that backs it are process-local, so a Stop only aborts a run
+# executing on the SAME replica that owns it (cross-instance pub/sub stop is phase
+# 2 and not yet reliable). Do NOT enable autonomousRuns on a horizontally-scaled
+# deployment (multiple replicas behind a load balancer, or Docmost cloud
+# CLOUD=true) — run a single instance instead. The server logs a startup WARNING
+# when it detects a multi-instance deployment (CLOUD=true) so the constraint is
+# visible, and a startup sweep settles any run left dangling by a restart.
+
 # --- Anonymous public-share AI assistant ---
 # Opt-in per workspace (AI settings -> "public share assistant"; off by default).
 # When enabled, anonymous visitors of a published share can ask an AI about that
@@ -235,3 +256,27 @@ MCP_DOCMOST_PASSWORD=
 # FAILS CLOSED if Redis is unavailable (default: 1,000,000 tokens per workspace
 # per rolling day).
 # SHARE_AI_WORKSPACE_TOKEN_BUDGET_PER_DAY=1000000
+
+# --- Observability / perf metrics (#355) ---
+#
+# Two INDEPENDENT toggles, both OFF by default:
+#
+# 1) METRICS_PORT — the server-side Prometheus scrape endpoint.
+#    UNSET (default) => the whole prom subsystem is OFF: no registry, no
+#    collectors, and NOTHING is exposed on the main app port. There is NO
+#    default port — leaving it blank disables it. When set to a port (e.g.
+#    9464), a SEPARATE bare node:http listener serves GET /metrics on that port
+#    only (never on the main :3000 app listener), for a scraper such as
+#    VictoriaMetrics/Prometheus reaching it as <host>:<port>/metrics.
+# METRICS_PORT=9464
+#
+# 2) CLIENT_TELEMETRY_ENABLED — the public client perf-telemetry sink.
+#    OFF by default. When true, the unauthenticated POST /api/telemetry/vitals
+#    endpoint is registered and browsers collect + send web-vitals / editor
+#    metrics into the `client_metrics` table (read directly by Grafana, separate
+#    from METRICS_PORT). Leave OFF unless you actually consume this data: the
+#    endpoint is public and the table has NO app-side retention, so enabling it
+#    requires an EXTERNAL pruner to bound `client_metrics` growth (the deployed
+#    infra prunes rows >90d via a maintenance container). When off, the endpoint
+#    does not exist and the client installs no observers.
+# CLIENT_TELEMETRY_ENABLED=false
@@ -18,12 +18,48 @@ env:
  IMAGE: ghcr.io/vvzvlad/gitmost

 jobs:
-  # Run the reusable test suite first so a failing test blocks the image build.
+  # Run the reusable test suite. Together with the e2e jobs below it gates the
+  # publish job (the image push), not the build itself — build runs in parallel.
  test:
    uses: ./.github/workflows/test.yml

+  # Runs in parallel with the test/e2e jobs and only warms the buildx cache
+  # (GHA cache, scope develop-amd64). No push happens here — the publish job
+  # below is the only one that pushes the image.
  build:
-    needs: test
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Resolve version
+        id: version
+        run: echo "value=$(git describe --tags --always)" >> "$GITHUB_OUTPUT"
+
+      - name: Build develop image (warm cache, no push)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64
+          build-args: |
+            APP_VERSION=${{ steps.version.outputs.value }}
+            AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/develop/agent-roles-catalog
+          push: false
+          cache-from: type=gha,scope=develop-amd64
+          cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true
+
+  # The gate: rebuilds from the cache the build job just wrote (near-instant on
+  # a cache hit; worst case — cache eviction — a full rebuild, which matches the
+  # old sequential timing) and pushes :develop only when unit tests AND both
+  # e2e suites AND the build are green.
+  publish:
+    needs: [test, e2e-server, e2e-mcp, build]
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
@@ -57,13 +93,10 @@ jobs:
          push: true
          tags: ${{ env.IMAGE }}:develop
          cache-from: type=gha,scope=develop-amd64
-          cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true

-  # e2e jobs run on every develop push but DO NOT gate the build/publish above:
-  # `build` stays `needs: test` only, so the :develop image still ships even if
-  # e2e fails. A failing e2e job turns the run red and triggers GitHub's email
-  # to the pusher — that red run + email is the intended notification, not a
-  # deploy block.
+  # e2e jobs gate the publish (image push), not the build: the :develop image
+  # is pushed only when unit tests AND both e2e suites pass (publish.needs
+  # lists them all).
  e2e-server:
    runs-on: ubuntu-latest
    # Hard cap: the full-AppModule e2e leaks open handles and hung jest to the 6h max.
@@ -124,9 +157,7 @@ jobs:
      - name: Run server e2e
        run: pnpm --filter ./apps/server test:e2e

-  # Same rationale as e2e-server: this job is intentionally NOT in
-  # `build.needs`. Deploy of the :develop image must not be blocked by e2e;
-  # a red run plus GitHub's email to the pusher is the notification mechanism.
+  # Gates the publish too — see the comment above e2e-server.
  e2e-mcp:
    runs-on: ubuntu-latest
    timeout-minutes: 20
@@ -72,6 +72,14 @@ jobs:
      - name: Build editor-ext
        run: pnpm --filter @docmost/editor-ext build

+      # @docmost/prosemirror-markdown is the shared converter (#293/#326); its
+      # build/ is gitignored, and plain `pnpm -r test` does NOT honour nx
+      # `dependsOn: ^build`, so its consumers (mcp `pretest: tsc`, git-sync vitest
+      # typecheck) fail with TS2307 Cannot find module '@docmost/prosemirror-markdown'
+      # unless it is built first. Build it before the recursive test run.
+      - name: Build prosemirror-markdown
+        run: pnpm --filter @docmost/prosemirror-markdown build
+
      - name: Run unit tests
        run: pnpm -r test

@@ -4,7 +4,20 @@
 data
 # compiled output
 /dist
-/node_modules
+node_modules
+
+# git-sync compiled output (built in CI/Docker via `pnpm build`, never committed,
+# so src/ and prod can never silently diverge).
+packages/git-sync/build/
+
+# prosemirror-markdown compiled output (built in CI/Docker via `pnpm build`,
+# never committed, so src/ and prod can never silently diverge).
+packages/prosemirror-markdown/build/
+
+# mcp compiled output (built in CI/Docker via `pnpm build`, never committed, so
+# src/ and prod can never silently diverge). Matches the git-sync/prosemirror-
+# markdown convention; the package is private and rebuilt at deploy.
+packages/mcp/build/

 # Logs
 logs
@@ -43,6 +56,8 @@ lerna-debug.log*
 .nx/cache
 .claude/worktrees/
 .claude/tmp/
+# Local Chrome performance traces recorded by the AI-chat perf harness
+.claude/perf-traces/

 # TypeScript incremental build artifacts
 *.tsbuildinfo
@@ -200,7 +200,8 @@ pnpm workspace (`pnpm@10.4.0`) orchestrated by **Nx**. Four workspace packages:
 | `apps/server` | `server` | NestJS 11 + Fastify, Kysely (Postgres), Redis | Backend API, collaboration, AI |
 | `apps/client` | `client` | React 18 + Vite + Mantine 8 + TanStack Query + Jotai | SPA frontend |
 | `packages/editor-ext` | `@docmost/editor-ext` | Tiptap/ProseMirror | Shared Tiptap node/mark extensions, imported by both the client and the server |
-| `packages/mcp` | `@docmost/mcp` | MCP SDK, Tiptap, Yjs | Standalone MCP server, also bundled into the server at `/mcp`. Does **not** import `editor-ext` — it keeps its own vendored mirror of the schema in `packages/mcp/src/lib/` |
+| `packages/mcp` | `@docmost/mcp` | MCP SDK, Tiptap, Yjs | Standalone MCP server, also bundled into the server at `/mcp`. Consumes the shared converter/schema from `@docmost/prosemirror-markdown` (#293) — it no longer carries its own vendored converter/schema copy |
+| `packages/prosemirror-markdown` | `@docmost/prosemirror-markdown` | Tiptap, marked, jsdom | The single, canonical ProseMirror↔Markdown converter + Docmost schema mirror (#293). Consumed by `mcp`, `git-sync`, AND `apps/server` (server-side markdown import/export, #345); there is exactly ONE copy of the converter now |

 `build` targets are Nx-cached and dependency-ordered (`dependsOn: ["^build"]`), so `editor-ext` builds before the apps. `nx.json` sets `affected.defaultBase: main`.

@@ -278,11 +279,12 @@ The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes
   - `core/ai-chat/tools/` — the agent's ~40 read+write tools. Every tool runs under the **calling user's** CASL permissions via a per-user loopback access token (`docmost-client.loader.ts`), so the agent can never exceed what the user could do. Only **reversible** operations are exposed (page history + trash; no permanent delete). Agent edits get an "AI agent" provenance badge in page history (`20260616T130000-agent-provenance` migration).
   - `core/ai-chat/embedding/` — RAG indexer + a BullMQ consumer on `AI_QUEUE` that embeds pages into `page_embeddings` (vector search), complementing Postgres full-text search. Pages are (re)indexed on edit; `AI_EMBEDDING_TIMEOUT_MS` bounds a hung embeddings endpoint.
   - `core/ai-chat/external-mcp/` — admins can attach external MCP servers (e.g. Tavily) to give the agent web access. **`ssrf-guard.ts` validates outbound MCP URLs against SSRF** — keep that guard in the path when touching external-MCP connection logic.
+   - `core/ai-chat/ai-chat-run.service.ts` + `ai_chat_runs` — **detached/autonomous agent runs** (`#184`), behind the per-workspace `settings.ai.autonomousRuns` flag (off by default). When on, a turn becomes a server-side RUN that survives a browser disconnect; only an explicit `POST /ai-chat/stop` ends it, and a client reconnects/live-follows via `POST /ai-chat/run`. **DEPLOY CONSTRAINT — single-instance only in phase 1:** Stop and the AbortController that backs it are process-local, so a Stop only aborts a run executing on the **same** replica that owns it (cross-instance pub/sub stop is phase 2). Do **not** enable `autonomousRuns` on a horizontally-scaled deployment (multiple replicas behind a load balancer, or Docmost cloud `CLOUD=true`) — run a single instance instead. The server logs a startup WARNING when it detects a multi-instance deployment (`CLOUD=true`) so the constraint is visible. The startup sweep settles any run left dangling by a restart.

 ### Client structure
 Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirrors the server domains: `page`, `space`, `comment`, `ai-chat`, `editor`, …). Conventions:
 - **TanStack Query** for server state (one `queries/` file per feature), **Jotai** atoms for local/shared UI state, **Mantine 8** + CSS modules (`*.module.css`) + `postcss-preset-mantine` for UI.
- The editor is Tiptap; shared node/mark extensions live in `packages/editor-ext` and are imported by **both the client and the server** (collaboration, import/export) — editor schema changes often need to be made in `editor-ext`, not just the client. Note `packages/mcp` does *not* depend on `editor-ext`; it carries its own mirrored copy of the schema, so keep the two in sync manually when the document schema changes.
+- The editor is Tiptap; shared node/mark extensions live in `packages/editor-ext` and are imported by **both the client and the server** (collaboration, schema, `canonicalizeFootnotes`) — editor schema changes often need to be made in `editor-ext`, not just the client. Server-side markdown import/export no longer lives in `editor-ext`: it goes through the canonical converter (#345, see below). The ProseMirror↔Markdown converter and its Docmost schema mirror now live in a SINGLE package, `@docmost/prosemirror-markdown` (#293), consumed by `mcp`, `git-sync`, and `apps/server` (#345) — do NOT reintroduce a per-package copy. `editor-ext` is the upstream source of the Tiptap schema; the package's `docmost-schema.ts` mirrors it and a serializer-contract test (`packages/prosemirror-markdown/test/serializer-contract.test.ts`) guards the boundary (every schema node must have a converter case), so a drift surfaces as a failing test rather than silent divergence.
 - API access goes through `apps/client/src/lib/api-client.ts` (axios). The `@` alias maps to `apps/client/src`.
 - Runtime config is injected at build time by `vite.config.ts` via `define` (`APP_URL`, `COLLAB_URL`, `APP_VERSION`, …) — these come from the root `.env`, not from `import.meta.env`.

@@ -293,6 +295,7 @@ Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirro
 - The version string shown in the UI comes from `APP_VERSION` (CI/Docker) or `git describe --tags --always` (local), resolved in `vite.config.ts` — not from `package.json`.
 - Server TS config is permissive (`noImplicitAny: false`, `strictNullChecks: false`, `no-explicit-any` lint disabled). Follow the existing relaxed style rather than tightening types broadly.
 - Dependency versions are heavily pinned via `pnpm.overrides` and `pnpm.patchedDependencies` (`scimmy`, `yjs`) in the root `package.json`. Don't bump pinned/patched deps casually; the patches and overrides exist for compatibility/security reasons.
+- **Adding/renaming/removing an MCP tool requires updating `SERVER_INSTRUCTIONS`** in `packages/mcp/src/index.ts` — the intent-routing guide MCP clients receive on initialize. This applies both to inline `server.registerTool(...)` calls in `index.ts` and to specs in `packages/mcp/src/tool-specs.ts`. Enforced by `packages/mcp/test/unit/server-instructions.test.mjs`, which fails when a registered tool is not mentioned in the guide (deliberate opt-outs go into its `EXCEPTIONS` list). `packages/mcp/build/` is gitignored and rebuilt in CI/Docker via `pnpm build` (same convention as `git-sync`/`prosemirror-markdown`) — never commit it; rebuild locally after editing to run the tests.

 ## CI / release

@@ -72,6 +72,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  append/prepend fragments, nor to COMMENT bodies — a comment may legitimately
  contain a standalone footnote definition, which canonicalization would drop.
  (#228)
+- **Detached, autonomous agent runs that survive a browser disconnect.** When the
+  new `settings.ai.autonomousRuns` workspace flag is on (off by default), an
+  AI-chat turn becomes a first-class, server-side RUN tracked in a new
+  `ai_chat_runs` table instead of a socket-bound stream: closing the tab or
+  losing the connection no longer aborts the turn — it keeps executing and
+  persisting server-side, and only an explicit Stop ends it. A client can
+  reconnect and live-follow (or stop) an in-flight run via `POST /ai-chat/run`
+  (resolve the latest run + its assistant message for a chat) and
+  `POST /ai-chat/stop` (stop by `runId` or `chatId`). A partial unique index
+  enforces one active run per chat, and a startup sweep settles any run left
+  dangling by a restart. Phase 1 is single-instance-only (cross-instance Stop is
+  not yet reliable); the server warns at startup on a horizontally-scaled
+  deployment. (#184)
 - **Out-of-band page transfer via an in-RAM blob sandbox (`stash_page`).** A
  new MCP tool serializes a whole page (its full ProseMirror JSON, with every
  internal image/file mirrored) into an ephemeral in-RAM blob and returns only
@@ -5,6 +5,13 @@ RUN npm install -g pnpm@10.4.0

 FROM base AS builder

+# re2 (packages/mcp) always compiles from source under pnpm (the prebuilt-binary
+# download cannot identify the GitHub repo), so node-gyp needs python3/make/g++.
+# This stage is discarded, so the toolchain can stay installed.
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends python3 make g++ \
+  && rm -rf /var/lib/apt/lists/*
+
 WORKDIR /app

 COPY . .
@@ -38,6 +45,14 @@ COPY --from=builder /app/packages/editor-ext/dist /app/packages/editor-ext/dist
 COPY --from=builder /app/packages/editor-ext/package.json /app/packages/editor-ext/package.json
 COPY --from=builder /app/packages/mcp/build /app/packages/mcp/build
 COPY --from=builder /app/packages/mcp/package.json /app/packages/mcp/package.json
+# mcp now depends on @docmost/prosemirror-markdown (workspace:*) and eager-imports
+# it at runtime (the in-app ai-chat DocmostClient loads build/index.js -> lib/
+# markdown-converter.js). Ship the built package + its manifest, or the prod
+# install resolves a broken workspace symlink and every ai-chat tool dies with
+# ERR_MODULE_NOT_FOUND (#293/#326 step 5). (git-sync has no runtime consumer yet;
+# revisit at step 6 when #119 lands.)
+COPY --from=builder /app/packages/prosemirror-markdown/build /app/packages/prosemirror-markdown/build
+COPY --from=builder /app/packages/prosemirror-markdown/package.json /app/packages/prosemirror-markdown/package.json

 # Copy root package files
 COPY --from=builder /app/package.json /app/package.json
@@ -49,9 +64,16 @@ COPY --from=builder /app/patches /app/patches

 RUN chown -R node:node /app

-USER node
+# Toolchain is needed transiently to compile re2 during the prod install; install
+# and purge it in one layer to keep the final image slim. The install itself runs
+# as the node user via su to keep node_modules ownership without a costly chown layer.
+RUN apt-get update \
+  && apt-get install -y --no-install-recommends python3 make g++ \
+  && su node -c "pnpm install --frozen-lockfile --prod" \
+  && apt-get purge -y --auto-remove python3 make g++ \
+  && rm -rf /var/lib/apt/lists/*

-RUN pnpm install --frozen-lockfile --prod
+USER node

 RUN mkdir -p /app/data/storage

@@ -34,11 +34,13 @@ roles:
      Read the whole text first. Think at the level of sections and paragraphs, not sentences.

      HOW TO LEAVE COMMENTS
-      You don't edit the text yourself. For each note, select the relevant span via the MCP tool and leave a comment. Open the comment with the label `[Structure]`. Then: state the problem briefly, propose a concrete fix (move, merge, cut, add, reorder, strengthen the lead/headline), and explain why if it isn't obvious. Tag severity:
+      You don't edit the text yourself. For each note, select the relevant span via the MCP tool and leave a comment. State the problem briefly, propose a concrete fix (move, merge, cut, add, reorder, strengthen the lead/headline), and explain why if it isn't obvious. Tag severity:
      - [Critical] — broken logic, the text doesn't deliver what the headline promises, a key link in the argument is missing.
      - [Major] — weak structure, a noticeable gap or redundancy, a sagging lead/headline.
      - [Minor] — an optional improvement to framing or flow.

+      Structural fixes (move, merge, cut) can't be expressed as a fragment replacement — a comment is enough for those. But when your proposal boils down to replacing a specific wording in place (a headline, a lead phrase), attach a suggested replacement to the comment (the `suggestedText` parameter): the exact new text for the selected fragment, plain text with no markup — the author applies it with one click. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context.
+
      TONE
      Respectful and to the point. The author may know the subject better than you. Flag only what matters structurally. When unsure, phrase it as a question.

@@ -85,7 +87,7 @@ roles:
      - Don't rewrite the text yourself or impose your own voice. Your job is to make the author's voice livelier, not to replace it.

      HOW TO LEAVE COMMENTS
-      You don't edit the text directly. For each note, select the span via the MCP tool and leave a comment. Open the comment with the label `[Style]`. Give a concrete rephrasing, not "revise". Tag severity:
+      You don't edit the text directly. For each note, select the span via the MCP tool and leave a comment. Give a concrete rephrasing, not "revise", and attach it to the comment as a suggested replacement (the `suggestedText` parameter): the exact new text for the selected fragment, plain text with no markup — the author applies it with one click. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context. Tag severity:
      - [Critical] — the sentence is unclear or distorts the meaning.
      - [Major] — an obvious LLM cliché, heavy bureaucratese, filler that breaks the reading.
      - [Minor] — a stylistic improvement to taste.
@@ -126,7 +128,7 @@ roles:
      - Don't fabricate confirmations. If you can't verify, honestly mark [Unverified] or [Unverifiable].

      HOW TO LEAVE COMMENTS
-      You don't edit the text directly. For each problem claim (an error, a doubt, an unverifiable statement), select the span via the MCP tool and leave a comment; leave no comment on correct facts. Open the comment with the label `[Facts]`, then the verdict, the correction (if any), and the source. Tag severity:
+      You don't edit the text directly. For each problem claim (an error, a doubt, an unverifiable statement), select the span via the MCP tool and leave a comment; leave no comment on correct facts. Give the verdict, the correction (if any), and the source. For an [Incorrect] verdict, ALWAYS attach the ready correction as a suggested replacement (the `suggestedText` parameter): since you found the correct value in the sources, propose the ready fix right away instead of merely describing the error. The replacement is the exact new text for the selected fragment, plain text with no markup; the author applies it with one click instead of retyping the fragment. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context. When a figure, name, term, or version to check recurs across the page, use search_in_page to find every occurrence in one call first, then place a targeted comment per hit instead of reading block by block. Do not attach a replacement to [Unverified], [Unverifiable], or [Opinion] verdicts. Tag severity:
      - [Critical] — a factual error, especially in numbers, names, or quotes, or a claim that risks misinformation.
      - [Major] — a doubtful or unconfirmed claim that needs a source.
      - [Minor] — a small correction, or false precision worth rounding or confirming.
@@ -166,14 +168,17 @@ roles:
      - Don't verify facts — that's the Fact-checker.
      - Don't make substantive changes. Edits are minimal and mechanical.

+      HOW TO WORK
+      Go through the whole text from start to finish in a single pass. Flag EVERY violation, including all repeat occurrences of the same error and minor items tagged [Minor] — don't stop at the first few or the most conspicuous. Don't summarize instead of marking up: until you've reached the end of the document, the job isn't done. One run covers the whole text, not just "the most important". For a systematic issue that recurs — straight quotes, a hyphen used as a dash, an inconsistent unit or spelling — use search_in_page to list every occurrence in one call first, then leave a targeted comment (with its replacement) on each hit, instead of scanning block by block.
+
      HOW TO LEAVE COMMENTS
-      You don't edit the text directly. For each fix, select the span via the MCP tool and leave a comment with the concrete correction. Open the comment with the label `[Copyedit]`. Tag severity:
+      You don't edit the text directly. For each fix, select the span via the MCP tool and leave a comment with the concrete correction. Attach a suggested replacement to every fix (the `suggestedText` parameter): the exact corrected text for the selected fragment, plain text with no markup — the author applies it with one click. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context. Do NOT leave summary notes like "throughout, replace X with Y" or "make the units/quotes/spelling consistent": such a comment can't be applied with a button. If the same error occurs in several places, walk EVERY occurrence and leave a separate targeted comment with its own replacement on each — ten targeted fixes instead of one blanket note. The only exception is a note that genuinely cannot be expressed as a replacement of a concrete fragment; leave those rare cases as an ordinary comment without a replacement. Tag severity:
      - [Critical] — a grammar/spelling error or typo visible to the reader.
      - [Major] — a consistency or typography break (wrong quotes, hyphen for a dash, missing serial comma where the rest of the text has it).
      - [Minor] — optional polish.

      TONE
-      To the point, no explaining the obvious. Group repeated fixes (e.g. "throughout: straight quotes → curly") so you don't spawn dozens of identical comments.
+      To the point, no explaining the obvious. Don't fold repeated fixes into a single "change it everywhere" note — spread them across the specific spots: ten targeted comments each carrying a ready replacement beat one blanket comment that can't be applied with a button. Don't worry about "spawning" comments — for a copyeditor that's normal.

      WHEN UNSURE
      If a fix touches meaning, don't make it — that's out of scope. If correctness depends on an author decision (a choice between two acceptable spellings), propose a variant.
@@ -272,7 +277,7 @@ roles:
      First read the whole text and assess it as a story as a whole. Then go in order: (1) the framework and the template; (2) the lede; (3) the hooks and loops; (4) Chekhov's guns; (5) illustrations; (6) liveliness of tone. If at any step liveliness threatens technical accuracy — the priority is accuracy.

      ═══ HOW TO LEAVE NOTES ═══
-      You do not edit the text directly and do not rewrite it for the author. Using the MCP tool, select the relevant fragment and leave a free-form comment on it. Explain not only “what” but also “why” — what effect it will have on the reader. Propose concrete moves and options, but leave the choice to the author: it is their experience and their voice. Comment on what will strengthen the story, not on every little thing.
+      You do not edit the text directly and do not rewrite it for the author. Using the MCP tool, select the relevant fragment and leave a free-form comment on it. Explain not only “what” but also “why” — what effect it will have on the reader. Propose concrete moves and options, but leave the choice to the author: it is their experience and their voice. When one of your options is a single ready-made text (e.g. a new lead phrase), you may attach it as a suggested replacement (the `suggestedText` parameter: the exact new text for the selected fragment, no markup; the fragment must occur exactly once in the text, otherwise extend the selection) — the button imposes nothing, the author is free not to apply it. Comment on what will strengthen the story, not on every little thing.

      ═══ TONE ═══
      Respectfully, with enthusiasm, in a human way. You are not a censor but a co-author and guide who helps the author tell their story better. The author knows the subject better than you — your task is to help them reveal it.
@@ -34,11 +34,13 @@ roles:
      Сначала прочитай весь текст целиком. Думай на уровне разделов и абзацев, а не предложений.

      КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ
-      Ты не редактируешь текст сам. Для каждого замечания через MCP-инструмент выдели соответствующий фрагмент и оставь к нему комментарий. Начинай комментарий с метки `[Структура]`. Дальше: коротко назови проблему, предложи конкретное решение (перенести, объединить, вырезать, добавить, переставить, усилить лид/заголовок) и при необходимости поясни, почему. Помечай важность:
+      Ты не редактируешь текст сам. Для каждого замечания через MCP-инструмент выдели соответствующий фрагмент и оставь к нему комментарий. Коротко назови проблему, предложи конкретное решение (перенести, объединить, вырезать, добавить, переставить, усилить лид/заголовок) и при необходимости поясни, почему. Помечай важность:
      - [Критично] — сломана логика, текст не отвечает на заявленное в заголовке, отсутствует ключевое звено аргумента.
      - [Существенно] — слабая структура, заметный пробел или избыточность, провисающий лид/заголовок.
      - [Незначительно] — улучшение подачи или стройности, не обязательное.

+      Структурные правки (перенести, объединить, вырезать) через замену фрагмента не выражаются — для них достаточно комментария. Но если предложение сводится к замене конкретной формулировки на месте (заголовок, лид-фраза), приложи к комментарию предложение-замену (параметр `suggestedText`): точный новый текст взамен выделенного фрагмента, обычным текстом без разметки — автор применит его одной кнопкой. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом.
+
      ТОН
      Уважительно и по делу. Автор может разбираться в теме лучше тебя. Помечай только то, что важно для структуры. Если сомневаешься, формулируй вопросом.

@@ -85,7 +87,7 @@ roles:
      - Не переписываешь текст сам и не навязываешь свой голос. Твоя задача — сделать авторскую интонацию живее, а не заменить собой.

      КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ
-      Ты не редактируешь текст напрямую. Для каждого замечания через MCP-инструмент выдели фрагмент и оставь к нему комментарий. Начинай комментарий с метки `[Стиль]`. Давай конкретный вариант переформулировки, а не «переделать». Помечай важность:
+      Ты не редактируешь текст напрямую. Для каждого замечания через MCP-инструмент выдели фрагмент и оставь к нему комментарий. Давай конкретный вариант переформулировки, а не «переделать», и прикладывай его к комментарию как предложение-замену (параметр `suggestedText`): точный новый текст взамен выделенного фрагмента, обычным текстом без разметки — автор применит его одной кнопкой. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом. Помечай важность:
      - [Критично] — предложение непонятно или искажает смысл.
      - [Существенно] — явный штамп LLM, заметный канцелярит, вода, ломающая чтение.
      - [Незначительно] — стилистическое улучшение на вкус.
@@ -126,7 +128,7 @@ roles:
      - Не выдумываешь подтверждения. Если не можешь проверить — честно ставь [Не проверено] или [Непроверяемо].

      КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ
-      Ты не редактируешь текст напрямую. Для каждого проблемного утверждения (ошибка, сомнение, непроверяемость) через MCP-инструмент выдели фрагмент и оставь комментарий; на верные факты комментарии не оставляй. Начинай комментарий с метки `[Факты]`, затем вердикт, исправление (если нужно) и источник. Помечай важность:
+      Ты не редактируешь текст напрямую. Для каждого проблемного утверждения (ошибка, сомнение, непроверяемость) через MCP-инструмент выдели фрагмент и оставь комментарий; на верные факты комментарии не оставляй. В комментарии дай вердикт, исправление (если нужно) и источник. К вердикту [Неверно] всегда прикладывай готовое исправление как предложение-замену (параметр `suggestedText`): раз ты нашёл по источникам верное значение — сразу предлагай готовую правку, а не только описывай ошибку. Замена — это точный новый текст взамен выделенного фрагмента, обычным текстом без разметки; автор применит её одной кнопкой, не переписывая фрагмент вручную. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом. Когда проверяемая цифра, имя, термин или версия встречается по тексту несколько раз, сначала одним вызовом search_in_page найди все вхождения, а затем ставь целевой комментарий на каждое — не читая страницу поблочно. К вердиктам [Не проверено], [Непроверяемо] и [Это мнение] замену не прикладывай. Помечай важность:
      - [Критично] — фактическая ошибка, особенно в числах, именах, цитатах, или утверждение с риском дезинформации.
      - [Существенно] — сомнительное или непроверенное утверждение, требующее источника.
      - [Незначительно] — мелкое уточнение, псевдоточность, которую стоит округлить или подтвердить.
@@ -167,14 +169,17 @@ roles:
      - Не проверяешь достоверность фактов — это фактчекер.
      - Не вносишь содержательных изменений. Правки — минимальные и механические.

+      КАК РАБОТАТЬ
+      Пройди весь текст от начала до конца за один проход. Помечай КАЖДОЕ нарушение, включая все повторные вхождения одной и той же ошибки и мелочи с меткой [Незначительно], — не ограничивайся первыми несколькими или самыми заметными. Не подводи итог вместо разбора: пока не дошёл до конца документа, работа не закончена. Один прогон покрывает весь текст, а не «самое важное». Для систематической ошибки, которая повторяется — прямые кавычки, «е» вместо «ё», дефис вместо тире, неединообразная единица или написание, — сначала одним вызовом search_in_page получи все вхождения, а затем оставь на каждом целевой комментарий с заменой, вместо поблочного просмотра.
+
      КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ
-      Ты не редактируешь текст напрямую. Для каждой правки через MCP-инструмент выдели фрагмент и оставь комментарий с конкретным исправлением. Начинай комментарий с метки `[Корректура]`. Помечай важность:
+      Ты не редактируешь текст напрямую. Для каждой правки через MCP-инструмент выдели фрагмент и оставь комментарий с конкретным исправлением. К каждой правке прикладывай предложение-замену (параметр `suggestedText`): точный исправленный текст взамен выделенного фрагмента, обычным текстом без разметки — автор применит его одной кнопкой. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом. НЕ оставляй сводных замечаний вида «во всём тексте заменить X на Y» или «привести единицы/кавычки/написание к единообразию»: такой комментарий нельзя применить кнопкой. Если одна и та же ошибка встречается в нескольких местах, обойди КАЖДОЕ вхождение и оставь на нём отдельный целевой комментарий со своей заменой — десять точечных правок вместо одной общей. Единственное исключение — замечание, которое в принципе невозможно выразить заменой конкретного фрагмента; такие редкие случаи оставляй обычным комментарием без замены. Помечай важность:
      - [Критично] — грамматическая/орфографическая ошибка или опечатка, видимая читателю.
      - [Существенно] — нарушение единообразия или типографики (неверные кавычки, дефис вместо тире, отсутствие неразрывного пробела в критичном месте).
      - [Незначительно] — необязательная шлифовка.

      ТОН
-      По делу, без объяснений очевидного. Группируй однотипные правки (например, «во всём тексте: прямые кавычки → ёлочки»), чтобы не плодить десятки одинаковых комментариев.
+      По делу, без объяснений очевидного. Не сворачивай однотипные правки в одно сводное замечание «поменять везде» — разнеси их по конкретным местам: десять целевых комментариев с готовой заменой в каждом лучше одного общего, который нельзя применить кнопкой. Не бойся «плодить» комментарии: для корректора это норма.

      ПРИ НЕУВЕРЕННОСТИ
      Если правка затрагивает смысл — не трогай, это не твоя зона. Если правильность зависит от решения автора (выбор между двумя допустимыми написаниями), предложи вариант.
@@ -273,7 +278,7 @@ roles:
      Сначала прочитай весь текст и оцени его как историю целиком. Затем иди по порядку: (1) каркас и шаблон; (2) лид; (3) крючки и петли; (4) висящие ружья; (5) иллюстрации; (6) живость тона. Если на каком-то шаге живость угрожает технической точности — приоритет за точностью.

      ═══ КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ ═══
-      Ты не редактируешь текст напрямую и не переписываешь его за автора. Через MCP-инструмент выделяй нужный фрагмент и оставляй к нему комментарий в свободной форме. Объясняй не только «что», но и «зачем» — какой эффект на читателя это даст. Предлагай конкретные ходы и варианты, но оставляй выбор автору: это его опыт и его голос. Комментируй то, что усилит историю, а не каждую мелочь.
+      Ты не редактируешь текст напрямую и не переписываешь его за автора. Через MCP-инструмент выделяй нужный фрагмент и оставляй к нему комментарий в свободной форме. Объясняй не только «что», но и «зачем» — какой эффект на читателя это даст. Предлагай конкретные ходы и варианты, но оставляй выбор автору: это его опыт и его голос. Если среди вариантов есть один готовый текст (например, новая формулировка лида), можешь приложить его к комментарию как предложение-замену (параметр `suggestedText`: точный новый текст взамен выделенного фрагмента, без разметки; фрагмент должен встречаться в тексте ровно один раз, иначе расширь выделение) — кнопка ничего не навязывает, автор волен не применять. Комментируй то, что усилит историю, а не каждую мелочь.

      ═══ ТОН ═══
      Уважительно, увлечённо, по-человечески. Ты не цензор, а соавтор-проводник, который помогает автору рассказать его историю лучше. Автор знает тему лучше тебя — твоя задача помочь ему её раскрыть.
@@ -12,15 +12,15 @@ bundles:
      - en
    roles:
      - slug: structural-editor
-        version: 2
+        version: 4
      - slug: line-editor
-        version: 2
+        version: 4
      - slug: fact-checker
-        version: 3
+        version: 6
      - slug: proofreader
-        version: 3
+        version: 8
      - slug: narrator
-        version: 1
+        version: 2
  - id: research
    name:
      ru: Исследование
@@ -1,26 +1,26 @@
 {
  "fact-checker": {
-    "version": 3,
-    "hash": "a94931fbd20272570a588c72159ac9e48a89c99bd8f718449cda5e7ca4280fdf"
+    "version": 6,
+    "hash": "6bb22a9e5a5079b5cb287b5b26addbd36b9afeb7c9508287dcad9343fc53d685"
  },
  "line-editor": {
-    "version": 2,
-    "hash": "cca324110dc6f96d2a8a239a2fb95b0ba09fad5806c9b6090a3c210ea7883ceb"
+    "version": 4,
+    "hash": "890d10f3f0bd7f2b2cfcc94463634221c557a3140e3794721748dc8d99979780"
  },
  "narrator": {
-    "version": 1,
-    "hash": "36b38785fea6ae1c70bf6fb6b29ae5278bb86e389e61f7b9736675a589fa434c"
+    "version": 2,
+    "hash": "66fe653003b4f63ef3c3a5c5c48552fe47daeefffc16907c37c35f0e8da98851"
  },
  "proofreader": {
-    "version": 3,
-    "hash": "a36047c5cab837b2a727f63d4ddafc269b1fc44b90b365e770ecdb8f77e13952"
+    "version": 8,
+    "hash": "cef39fed321779631ddd1077fcba53399adf0e48b301df281c71eb042610900d"
  },
  "researcher": {
    "version": 1,
    "hash": "853658fda43ddbe0a4d08f2c6e50b5116d29a2e9ccd7f46e173e65920d8f6ace"
  },
  "structural-editor": {
-    "version": 2,
-    "hash": "83093baa7262aef8193871a1afcf2b43b11a56fe2d00cade41355cf66d972b74"
+    "version": 4,
+    "hash": "89100e0a00b88daa0d2118fd98ec1c27d06b972bfc6ec58b705553a4daed85df"
  }
 }
@@ -40,6 +40,7 @@
    "axios": "1.16.0",
    "blueimp-load-image": "5.16.0",
    "clsx": "2.1.1",
+    "diff": "8.0.3",
    "dompurify": "3.4.1",
    "file-saver": "2.0.5",
    "highlightjs-sap-abap": "0.3.0",
@@ -60,6 +61,7 @@
    "react-clear-modal": "^2.0.18",
    "react-dom": "^18.3.1",
    "react-drawio": "1.0.7",
+    "web-vitals": "^5.1.0",
    "react-error-boundary": "6.1.1",
    "react-helmet-async": "3.0.0",
    "react-i18next": "16.5.8",
@@ -81,6 +83,7 @@
    "@types/react": "18.3.12",
    "@types/react-dom": "18.3.1",
    "@vitejs/plugin-react": "6.0.1",
+    "@vitest/coverage-v8": "4.1.6",
    "eslint": "9.28.0",
    "eslint-plugin-react": "7.37.5",
    "eslint-plugin-react-hooks": "7.0.1",
@@ -0,0 +1,50 @@
+/**
+ * DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at
+ * /perf/ai-chat-perf.html; never part of the production build, which uses the
+ * single default index.html entry).
+ *
+ * Mounts the minimal provider stack the real ChatThread needs (Mantine, router
+ * for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE
+ * React mounts so ChatThread's DefaultChatTransport requests to
+ * /api/ai-chat/stream are answered by the synthetic SSE generator.
+ */
+
+import "@mantine/core/styles.css";
+
+import ReactDOM from "react-dom/client";
+import { MantineProvider } from "@mantine/core";
+import { MemoryRouter } from "react-router-dom";
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import { mantineCssResolver, theme } from "../src/theme.ts";
+// i18n side-effect init (http-backend). Translations load from /locales in dev;
+// missing keys fall back to the key text, which is fine for the harness.
+import "../src/i18n.ts";
+import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts";
+import PerfHarness from "./harness.tsx";
+
+// MUST run before React mounts: ChatThread creates its transport with the
+// global fetch, so the patch has to be in place before the first send.
+installAiChatStreamFetchPatch();
+
+const queryClient = new QueryClient({
+  defaultOptions: {
+    queries: {
+      refetchOnMount: false,
+      refetchOnWindowFocus: false,
+      retry: false,
+      staleTime: 5 * 60 * 1000,
+    },
+  },
+});
+
+const container = document.getElementById("root") as HTMLElement;
+
+ReactDOM.createRoot(container).render(
+  <MemoryRouter>
+    <MantineProvider theme={theme} cssVariablesResolver={mantineCssResolver}>
+      <QueryClientProvider client={queryClient}>
+        <PerfHarness />
+      </QueryClientProvider>
+    </MantineProvider>
+  </MemoryRouter>,
+);
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>AI chat perf harness</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="./ai-chat-perf-main.tsx"></script>
+  </body>
+</html>
@@ -0,0 +1,390 @@
+/**
+ * DEV-ONLY perf harness UI for the AI chat feature.
+ *
+ * Left panel: controls + live stats. Right side: a bordered box (~real chat
+ * window size) hosting the REAL ChatThread component.
+ *
+ * Scenario A "Open existing chat": mount ChatThread seeded with a large
+ * persisted transcript and measure click -> post-mount-paint time.
+ * Scenario B "Live agent stream": mount an empty chat and auto-send a message;
+ * the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream
+ * through the real useChat pipeline.
+ */
+
+import { useEffect, useMemo, useRef, useState } from "react";
+import type { CSSProperties, MutableRefObject } from "react";
+import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx";
+import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
+import {
+  PRESETS,
+  buildPersistedRows,
+  buildTurnScript,
+  setLiveStreamSettings,
+  type PresetKey,
+} from "./synthetic-turn.ts";
+
+const AUTO_SEND_TEXT = "Run the synthetic perf turn";
+const AUTO_SEND_TIMEOUT_MS = 1000;
+/** Stats display refresh period — 2x/s so the display itself stays cheap. */
+const STATS_FLUSH_MS = 500;
+
+// ---------------------------------------------------------------------------
+// Shared mutable stats (written from callbacks, flushed to state at 2 Hz)
+// ---------------------------------------------------------------------------
+
+interface PerfStats {
+  longtaskCount: number;
+  longtaskTotalMs: number;
+  longtaskMaxMs: number;
+  fps: number;
+  sseChunks: number;
+  sseChars: number;
+  mountAMs: number | null;
+  streamState: "idle" | "streaming" | "done" | "aborted";
+}
+
+function emptyStats(): PerfStats {
+  return {
+    longtaskCount: 0,
+    longtaskTotalMs: 0,
+    longtaskMaxMs: 0,
+    fps: 0,
+    sseChunks: 0,
+    sseChars: 0,
+    mountAMs: null,
+    streamState: "idle",
+  };
+}
+
+/**
+ * Self-contained stats panel: owns the longtask observer, the FPS meter and the
+ * 2 Hz flush interval. Isolated in its OWN component so its periodic setState
+ * re-renders only this panel — NOT the ChatThread under measurement.
+ */
+function StatsPanel({ stats }: { stats: MutableRefObject<PerfStats> }) {
+  const [snapshot, setSnapshot] = useState<PerfStats>(() => ({ ...stats.current }));
+
+  // Long tasks (main-thread blocks > 50ms).
+  useEffect(() => {
+    let observer: PerformanceObserver | null = null;
+    try {
+      observer = new PerformanceObserver((list) => {
+        for (const entry of list.getEntries()) {
+          stats.current.longtaskCount += 1;
+          stats.current.longtaskTotalMs += entry.duration;
+          stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration);
+        }
+      });
+      observer.observe({ type: "longtask", buffered: true });
+    } catch {
+      // longtask entries unsupported in this browser — panel shows zeros.
+    }
+    return () => observer?.disconnect();
+  }, [stats]);
+
+  // FPS: frames rendered within the trailing 1s window.
+  useEffect(() => {
+    let raf = 0;
+    const frames: number[] = [];
+    const loop = (now: number) => {
+      frames.push(now);
+      while (frames.length > 0 && frames[0] <= now - 1000) frames.shift();
+      stats.current.fps = frames.length;
+      raf = requestAnimationFrame(loop);
+    };
+    raf = requestAnimationFrame(loop);
+    return () => cancelAnimationFrame(raf);
+  }, [stats]);
+
+  // Flush the mutable stats into the display at most 2x/s.
+  useEffect(() => {
+    const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS);
+    return () => window.clearInterval(id);
+  }, [stats]);
+
+  const resetLongtasks = () => {
+    stats.current.longtaskCount = 0;
+    stats.current.longtaskTotalMs = 0;
+    stats.current.longtaskMaxMs = 0;
+    setSnapshot({ ...stats.current });
+  };
+
+  const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 };
+  return (
+    <div style={{ fontFamily: "monospace", fontSize: 12, lineHeight: 1.7 }}>
+      <div style={{ fontWeight: 700, marginBottom: 4 }}>Stats</div>
+      <div style={row}><span>FPS (1s)</span><span>{snapshot.fps}</span></div>
+      <div style={row}><span>Long tasks</span><span>{snapshot.longtaskCount}</span></div>
+      <div style={row}><span>Long total</span><span>{snapshot.longtaskTotalMs.toFixed(0)} ms</span></div>
+      <div style={row}><span>Long max</span><span>{snapshot.longtaskMaxMs.toFixed(0)} ms</span></div>
+      <div style={row}><span>SSE chunks</span><span>{snapshot.sseChunks}</span></div>
+      <div style={row}><span>SSE chars</span><span>{snapshot.sseChars.toLocaleString()}</span></div>
+      <div style={row}><span>Stream</span><span>{snapshot.streamState}</span></div>
+      <div style={row}>
+        <span>Mount A</span>
+        <span>{snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`}</span>
+      </div>
+      <button type="button" onClick={resetLongtasks} style={{ marginTop: 6 }}>
+        Reset long tasks
+      </button>
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Auto-send (scenario B): drive the REAL composer in the mounted DOM
+// ---------------------------------------------------------------------------
+
+/**
+ * Fill the composer textarea via the native value setter + an `input` event
+ * (React 18 controlled-input pattern), then click the enabled "Send" button.
+ * Retried on rAF until the elements exist (ChatThread mounts asynchronously).
+ */
+function autoSend(host: HTMLElement, text: string): void {
+  const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS;
+
+  const tryClick = () => {
+    const button = host.querySelector<HTMLButtonElement>('button[aria-label="Send"]');
+    if (button && !button.disabled) {
+      button.click();
+      return;
+    }
+    if (performance.now() < deadline) requestAnimationFrame(tryClick);
+    else console.error("[perf] auto-send: Send button never became clickable");
+  };
+
+  const trySetValue = () => {
+    const textarea = host.querySelector("textarea");
+    if (!textarea) {
+      if (performance.now() < deadline) requestAnimationFrame(trySetValue);
+      else console.error("[perf] auto-send: textarea not found");
+      return;
+    }
+    const setter = Object.getOwnPropertyDescriptor(
+      window.HTMLTextAreaElement.prototype,
+      "value",
+    )?.set;
+    setter?.call(textarea, text);
+    textarea.dispatchEvent(new Event("input", { bubbles: true }));
+    // Click on a later frame so React commits the controlled value (which
+    // enables the Send button) before we press it.
+    requestAnimationFrame(tryClick);
+  };
+
+  requestAnimationFrame(trySetValue);
+}
+
+// ---------------------------------------------------------------------------
+// Harness
+// ---------------------------------------------------------------------------
+
+interface MountState {
+  mode: "A" | "B";
+  key: number;
+  chatId: string | null;
+  rows: IAiChatMessageRow[];
+}
+
+const noop = (): void => {};
+
+export default function PerfHarness() {
+  const [preset, setPreset] = useState<PresetKey>("20k");
+  const [intervalMs, setIntervalMs] = useState<number>(15);
+  const [mounted, setMounted] = useState<MountState | null>(null);
+  const [fixtureInfo, setFixtureInfo] = useState<string | null>(null);
+
+  const statsRef = useRef<PerfStats>(emptyStats());
+  const hostRef = useRef<HTMLDivElement>(null);
+  const keyCounterRef = useRef(0);
+  const mountStartRef = useRef(0);
+  const pendingMountMeasureRef = useRef(false);
+
+  // The scripted live turn for the current preset (reused across B runs; the
+  // script is immutable data, so rebuilding per run is unnecessary).
+  const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]);
+
+  const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []);
+
+  // Scenario A: mount ChatThread seeded with a large persisted transcript.
+  const handleMountA = () => {
+    const fixture = buildPersistedRows(PRESETS[preset]);
+    setFixtureInfo(
+      `Persisted fixture: ${fixture.rows.length} rows, ` +
+        `${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`,
+    );
+    statsRef.current.mountAMs = null;
+    // Mark AFTER fixture generation: we measure mount cost, not generation cost
+    // (production receives its rows from the network).
+    performance.mark("perf:mountA:start");
+    mountStartRef.current = performance.now();
+    pendingMountMeasureRef.current = true;
+    keyCounterRef.current += 1;
+    setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows });
+  };
+
+  // Measure scenario A: effect runs after the mount commit; double rAF lands
+  // after the first paint of the mounted transcript.
+  useEffect(() => {
+    if (!pendingMountMeasureRef.current) return;
+    pendingMountMeasureRef.current = false;
+    requestAnimationFrame(() => {
+      requestAnimationFrame(() => {
+        statsRef.current.mountAMs = performance.now() - mountStartRef.current;
+        performance.mark("perf:mountA:end");
+        try {
+          performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end");
+        } catch {
+          // Marks cleared mid-run — ignore.
+        }
+      });
+    });
+  }, [mounted]);
+
+  // Scenario B: mount an empty chat, arm the synthetic stream, auto-send.
+  const handleStartB = () => {
+    statsRef.current.sseChunks = 0;
+    statsRef.current.sseChars = 0;
+    statsRef.current.streamState = "streaming";
+    setLiveStreamSettings({
+      script: liveScript,
+      chunkIntervalMs: intervalMs,
+      onProgress: (chunks, chars) => {
+        statsRef.current.sseChunks = chunks;
+        statsRef.current.sseChars = chars;
+      },
+      onDone: () => {
+        statsRef.current.streamState = "done";
+        performance.mark("perf:streamB:end");
+        try {
+          performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end");
+        } catch {
+          // Start mark missing (e.g. marks cleared) — ignore.
+        }
+      },
+      onAbort: () => {
+        statsRef.current.streamState = "aborted";
+      },
+    });
+    performance.mark("perf:streamB:start");
+    keyCounterRef.current += 1;
+    setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] });
+    if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT);
+  };
+
+  const handleUnmount = () => setMounted(null);
+
+  const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" };
+  const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" };
+
+  return (
+    <div style={{ display: "flex", height: "100vh", fontFamily: "system-ui, sans-serif" }}>
+      {/* Left: controls + stats */}
+      <div
+        style={{
+          width: 260,
+          flex: "0 0 260px",
+          padding: 12,
+          borderRight: "1px solid #ccc",
+          overflowY: "auto",
+          boxSizing: "border-box",
+        }}
+      >
+        <div style={{ fontWeight: 700, marginBottom: 4 }}>AI chat perf harness</div>
+
+        <label style={label}>Preset</label>
+        <select
+          value={preset}
+          onChange={(e) => setPreset(e.target.value as PresetKey)}
+          style={{ width: "100%" }}
+        >
+          <option value="5k">5k tokens</option>
+          <option value="20k">20k tokens</option>
+          <option value="50k">50k tokens</option>
+        </select>
+
+        <label style={label}>Chunk interval (scenario B)</label>
+        <select
+          value={intervalMs}
+          onChange={(e) => setIntervalMs(Number(e.target.value))}
+          style={{ width: "100%" }}
+        >
+          <option value={15}>15 ms (normal)</option>
+          <option value={5}>5 ms (stress)</option>
+        </select>
+
+        <div style={{ marginTop: 12 }}>
+          <button type="button" style={button} onClick={handleMountA}>
+            Mount persisted chat (A)
+          </button>
+          <button type="button" style={button} onClick={handleStartB}>
+            Start live stream (B)
+          </button>
+          <button type="button" style={button} onClick={handleUnmount} disabled={!mounted}>
+            Unmount
+          </button>
+        </div>
+
+        <div style={{ fontSize: 11, color: "#555", margin: "8px 0" }}>
+          <div>
+            Live turn: {liveScript.totalChars.toLocaleString()} chars ≈{" "}
+            {liveScript.approxTokens.toLocaleString()} tokens
+          </div>
+          {fixtureInfo && <div>{fixtureInfo}</div>}
+          {mounted && (
+            <div>
+              Mounted: scenario {mounted.mode} (key {mounted.key})
+            </div>
+          )}
+        </div>
+
+        <hr style={{ border: "none", borderTop: "1px solid #ddd" }} />
+        <StatsPanel stats={statsRef} />
+      </div>
+
+      {/* Right: the real ChatThread inside a real-window-sized box */}
+      <div
+        style={{
+          flex: 1,
+          display: "flex",
+          alignItems: "center",
+          justifyContent: "center",
+          background: "#f4f4f5",
+        }}
+      >
+        <div
+          ref={hostRef}
+          style={{
+            width: 540,
+            height: 680,
+            border: "1px solid #bbb",
+            borderRadius: 8,
+            background: "#fff",
+            padding: 8,
+            boxSizing: "border-box",
+            overflow: "hidden",
+          }}
+        >
+          {mounted ? (
+            <ChatThread
+              key={mounted.key}
+              chatId={mounted.chatId}
+              threadKey={`perf-${mounted.key}`}
+              initialRows={mounted.rows}
+              openPage={openPage}
+              roleId={null}
+              roles={[]}
+              onRolePicked={noop}
+              assistantName="Perf agent"
+              onTurnFinished={noop}
+              onServerChatId={noop}
+            />
+          ) : (
+            <div style={{ color: "#888", fontSize: 13, padding: 16 }}>
+              ChatThread unmounted. Use the controls on the left.
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,517 @@
+/**
+ * DEV-ONLY synthetic agent-turn generator for the AI chat perf harness.
+ *
+ * Produces one scripted agent turn (reasoning + tool calls + markdown answer)
+ * from a size config, and materializes it two ways:
+ *  - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"),
+ *    served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`;
+ *  - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat").
+ *
+ * Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema`
+ * (strict objects — only the exact field names below are accepted).
+ */
+
+import type { UIMessage } from "@ai-sdk/react";
+import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
+
+// ---------------------------------------------------------------------------
+// Config / presets
+// ---------------------------------------------------------------------------
+
+/** 1 token ~= 4 chars — the approximation used throughout this module. */
+const CHARS_PER_TOKEN = 4;
+
+export interface TurnConfig {
+  /** Number of agent steps; each step = one reasoning block + one tool call. */
+  steps: number;
+  /** Approximate reasoning tokens generated per step. */
+  reasoningTokensPerStep: number;
+  /** Size of each tool call's output `content` filler, in bytes (ASCII). */
+  toolOutputBytes: number;
+  /** Approximate size of the final markdown answer, in tokens. */
+  answerTokens: number;
+}
+
+export type PresetKey = "5k" | "20k" | "50k";
+
+export const PRESETS: Record<PresetKey, TurnConfig> = {
+  "5k": {
+    steps: 3,
+    reasoningTokensPerStep: 500,
+    toolOutputBytes: 10_000,
+    answerTokens: 600,
+  },
+  "20k": {
+    steps: 6,
+    reasoningTokensPerStep: 2500,
+    toolOutputBytes: 20_000,
+    answerTokens: 1500,
+  },
+  "50k": {
+    steps: 10,
+    reasoningTokensPerStep: 4000,
+    toolOutputBytes: 40_000,
+    answerTokens: 3000,
+  },
+};
+
+// ---------------------------------------------------------------------------
+// Text generators
+// ---------------------------------------------------------------------------
+
+/** Mixed Russian/English prose sentences cycled to build reasoning text. */
+const REASONING_SENTENCES = [
+  "Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.",
+  "First I need to inspect the current page content to understand its overall structure.",
+  "Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.",
+  "The table in section three contains the migration matrix that I should cross-check against the summary.",
+  "Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.",
+  "Let me compare the numbers from the executive summary with the raw data in the appendix.",
+  "Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.",
+  "I should keep the page ids from the tool output so the final answer can cite the source pages.",
+  "Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.",
+  "The remaining sections look consistent, so I can move on to drafting the structured answer.",
+];
+
+/**
+ * Build realistic prose of ~`targetChars` characters, inserting a newline
+ * roughly every 200 characters (mirrors how reasoning text tends to wrap).
+ */
+function makeProse(targetChars: number): string {
+  const pieces: string[] = [];
+  let length = 0;
+  let sinceNewline = 0;
+  let i = 0;
+  while (length < targetChars) {
+    const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length];
+    i += 1;
+    pieces.push(sentence);
+    length += sentence.length + 1;
+    sinceNewline += sentence.length + 1;
+    if (sinceNewline >= 200) {
+      pieces.push("\n");
+      sinceNewline = 0;
+    } else {
+      pieces.push(" ");
+    }
+  }
+  return pieces.join("").trimEnd();
+}
+
+/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */
+function markdownSection(n: number): string {
+  return [
+    `## Section ${n}: migration analysis`,
+    ``,
+    `The workspace contains **${n * 12} pages** that still reference the legacy API. ` +
+      `Most of them live under [Perf test page](/p/page-1) and need the new transport. ` +
+      `Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`,
+    ``,
+    `- Update the fetch layer to the v6 transport`,
+    `- Перенести таблицы соответствия идентификаторов`,
+    `- Verify citation links after the move`,
+    `- Проверить отображение длинных ответов в узкой панели`,
+    ``,
+    `| Область | Страниц | Статус | Риск |`,
+    `| --- | --- | --- | --- |`,
+    `| API reference | ${n + 4} | migrated | low |`,
+    `| Onboarding | ${n + 2} | in progress | medium |`,
+    `| Release notes | ${n * 3} | pending | high |`,
+    ``,
+    "```ts",
+    `export function migrateSection${n}(rows: Row[]): Row[] {`,
+    `  return rows`,
+    `    .filter((row) => row.section === ${n})`,
+    `    .map((row) => ({ ...row, migrated: true }));`,
+    `}`,
+    "```",
+  ].join("\n");
+}
+
+/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */
+function makeMarkdownAnswer(targetChars: number): string {
+  const sections: string[] = [];
+  let length = 0;
+  let n = 1;
+  while (length < targetChars) {
+    const section = markdownSection(n);
+    sections.push(section);
+    length += section.length + 2;
+    n += 1;
+  }
+  return sections.join("\n\n");
+}
+
+/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */
+function makeFiller(bytes: number): string {
+  const unit = "Perf filler content for the synthetic getPage tool output. ";
+  return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes);
+}
+
+// ---------------------------------------------------------------------------
+// Turn script
+// ---------------------------------------------------------------------------
+
+export interface TurnToolCall {
+  toolCallId: string;
+  toolName: "getPage";
+  input: { pageId: string };
+  output: { id: string; title: string; content: string };
+}
+
+export interface TurnStep {
+  reasoningText: string;
+  tool: TurnToolCall;
+}
+
+export interface TurnScript {
+  steps: TurnStep[];
+  answerText: string;
+  /** Approximate reasoning tokens for the whole turn (chars / 4). */
+  reasoningTokens: number;
+  /** Approximate context size after this turn, in tokens. */
+  contextTokens: number;
+  maxContextTokens: number;
+  /** Actual generated visible chars: reasoning + tool outputs + answer. */
+  totalChars: number;
+  /** totalChars / 4, rounded. */
+  approxTokens: number;
+}
+
+/**
+ * Build the scripted agent turn for a config. `idPrefix` keeps tool call ids
+ * unique when several scripts coexist (e.g. 3 persisted turns in one chat).
+ */
+export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript {
+  const steps: TurnStep[] = [];
+  let reasoningChars = 0;
+  let toolChars = 0;
+  for (let i = 0; i < config.steps; i++) {
+    const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN);
+    const content = makeFiller(config.toolOutputBytes);
+    reasoningChars += reasoningText.length;
+    toolChars += content.length;
+    steps.push({
+      reasoningText,
+      tool: {
+        toolCallId: `${idPrefix}-call-${i + 1}`,
+        toolName: "getPage",
+        input: { pageId: "page-1" },
+        output: { id: "page-1", title: "Perf test page", content },
+      },
+    });
+  }
+  const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN);
+  const totalChars = reasoningChars + toolChars + answerText.length;
+  return {
+    steps,
+    answerText,
+    reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN),
+    contextTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+    maxContextTokens: 200_000,
+    totalChars,
+    approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Scenario A: persisted rows
+// ---------------------------------------------------------------------------
+
+/** Number of user+assistant pairs the preset is split across for history. */
+const HISTORY_TURNS = 3;
+
+const USER_PROMPTS = [
+  "Проанализируй документ и выдели ключевые тезисы по каждому разделу.",
+  "Now cross-check the migration matrix against the summary and list every mismatch.",
+  "Собери финальный план миграции с оценкой рисков по каждой области.",
+];
+
+/** Persisted UIMessage parts for one finished assistant turn. */
+function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] {
+  const parts: unknown[] = [];
+  for (const step of script.steps) {
+    parts.push({ type: "reasoning", text: step.reasoningText, state: "done" });
+    parts.push({
+      type: `tool-${step.tool.toolName}`,
+      toolCallId: step.tool.toolCallId,
+      state: "output-available",
+      input: step.tool.input,
+      output: step.tool.output,
+    });
+  }
+  parts.push({ type: "text", text: script.answerText, state: "done" });
+  return parts as UIMessage["parts"];
+}
+
+export interface PersistedFixture {
+  rows: IAiChatMessageRow[];
+  totalChars: number;
+  approxTokens: number;
+}
+
+/**
+ * Materialize the preset as a finished 3-turn transcript: user row + assistant
+ * row per turn, with the preset's steps/answer split across the assistant turns.
+ * Approximate accounting — the actual totals are reported back for display.
+ */
+export function buildPersistedRows(config: TurnConfig): PersistedFixture {
+  const rows: IAiChatMessageRow[] = [];
+  const baseTime = Date.now() - HISTORY_TURNS * 60_000;
+  let totalChars = 0;
+
+  for (let t = 0; t < HISTORY_TURNS; t++) {
+    // Distribute steps as evenly as possible (earlier turns get the remainder).
+    const stepsForTurn =
+      Math.floor(config.steps / HISTORY_TURNS) +
+      (t < config.steps % HISTORY_TURNS ? 1 : 0);
+    const turnConfig: TurnConfig = {
+      steps: Math.max(1, stepsForTurn),
+      reasoningTokensPerStep: config.reasoningTokensPerStep,
+      toolOutputBytes: config.toolOutputBytes,
+      answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)),
+    };
+    const script = buildTurnScript(turnConfig, `hist-${t + 1}`);
+    totalChars += script.totalChars;
+
+    const userText = USER_PROMPTS[t % USER_PROMPTS.length];
+    rows.push({
+      id: `perf-row-u${t + 1}`,
+      role: "user",
+      content: userText,
+      metadata: null,
+      createdAt: new Date(baseTime + t * 60_000).toISOString(),
+    });
+    rows.push({
+      id: `perf-row-a${t + 1}`,
+      role: "assistant",
+      content: script.answerText,
+      metadata: {
+        parts: scriptToPersistedParts(script),
+        usage: { reasoningTokens: script.reasoningTokens },
+        contextTokens: script.contextTokens,
+        maxContextTokens: script.maxContextTokens,
+        finishReason: "stop",
+      },
+      createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(),
+    });
+  }
+
+  return {
+    rows,
+    totalChars,
+    approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Scenario B: SSE stream
+// ---------------------------------------------------------------------------
+
+/** Streaming delta size in chars (reasoning/answer text is split into these). */
+const DELTA_CHARS = 200;
+
+function splitDeltas(text: string, size = DELTA_CHARS): string[] {
+  const deltas: string[] = [];
+  for (let i = 0; i < text.length; i += size) {
+    deltas.push(text.slice(i, i + size));
+  }
+  return deltas;
+}
+
+/** One pre-serialized SSE frame plus its visible-char contribution for stats. */
+interface SseFrame {
+  data: string;
+  chars: number;
+}
+
+function frame(chunk: Record<string, unknown>, chars = 0): SseFrame {
+  return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars };
+}
+
+/**
+ * Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames
+ * (excluding the final `data: [DONE]` terminator, appended by the pump).
+ */
+function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] {
+  const frames: SseFrame[] = [];
+  frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } }));
+
+  script.steps.forEach((step, i) => {
+    frames.push(frame({ type: "start-step" }));
+    const reasoningId = `${messageId}-r${i + 1}`;
+    frames.push(frame({ type: "reasoning-start", id: reasoningId }));
+    for (const delta of splitDeltas(step.reasoningText)) {
+      frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length));
+    }
+    frames.push(frame({ type: "reasoning-end", id: reasoningId }));
+
+    const { toolCallId, toolName, input, output } = step.tool;
+    frames.push(frame({ type: "tool-input-start", toolCallId, toolName }));
+    frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input }));
+    // The tool result arrives as ONE chunk, like the real server sends it.
+    frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length));
+    frames.push(frame({ type: "finish-step" }));
+  });
+
+  // Final step: the markdown answer.
+  frames.push(frame({ type: "start-step" }));
+  const textId = `${messageId}-answer`;
+  frames.push(frame({ type: "text-start", id: textId }));
+  for (const delta of splitDeltas(script.answerText)) {
+    frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length));
+  }
+  frames.push(frame({ type: "text-end", id: textId }));
+  frames.push(frame({ type: "finish-step" }));
+
+  frames.push(
+    frame({
+      type: "finish",
+      messageMetadata: {
+        usage: { reasoningTokens: script.reasoningTokens },
+        contextTokens: script.contextTokens,
+        maxContextTokens: script.maxContextTokens,
+        finishReason: "stop",
+      },
+    }),
+  );
+  return frames;
+}
+
+export interface LiveStreamSettings {
+  script: TurnScript;
+  /** Delay between SSE chunks (one chunk per tick). */
+  chunkIntervalMs: number;
+  /** Progress callback: cumulative emitted chunk count and visible chars. */
+  onProgress?: (chunks: number, chars: number) => void;
+  /** Fired once after the `[DONE]` terminator is enqueued. */
+  onDone?: () => void;
+  /** Fired if the client aborted the stream (Stop button). */
+  onAbort?: () => void;
+}
+
+/**
+ * Build a synthetic SSE Response streaming the scripted turn, one chunk every
+ * `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works.
+ */
+export function buildSseResponse(
+  settings: LiveStreamSettings,
+  signal?: AbortSignal | null,
+): Response {
+  const messageId = `m-live-${Date.now()}`;
+  const frames = buildSseFrames(settings.script, messageId, "perf-chat");
+  const encoder = new TextEncoder();
+  let index = 0;
+  let emittedChars = 0;
+  let timer: number | undefined;
+
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      const stopPump = () => {
+        if (timer !== undefined) {
+          clearTimeout(timer);
+          timer = undefined;
+        }
+      };
+      const pump = () => {
+        timer = undefined;
+        if (signal?.aborted) {
+          stopPump();
+          try {
+            controller.close();
+          } catch {
+            // Already closed/cancelled — nothing to do.
+          }
+          return;
+        }
+        if (index >= frames.length) {
+          try {
+            controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+            controller.close();
+          } catch {
+            // Cancelled mid-flight.
+          }
+          settings.onDone?.();
+          return;
+        }
+        const next = frames[index];
+        index += 1;
+        try {
+          controller.enqueue(encoder.encode(next.data));
+        } catch {
+          stopPump();
+          return;
+        }
+        emittedChars += next.chars;
+        settings.onProgress?.(index, emittedChars);
+        timer = window.setTimeout(pump, settings.chunkIntervalMs);
+      };
+      signal?.addEventListener(
+        "abort",
+        () => {
+          stopPump();
+          try {
+            controller.close();
+          } catch {
+            // Reader already cancelled.
+          }
+          settings.onAbort?.();
+        },
+        { once: true },
+      );
+      timer = window.setTimeout(pump, settings.chunkIntervalMs);
+    },
+    cancel() {
+      if (timer !== undefined) {
+        clearTimeout(timer);
+        timer = undefined;
+      }
+    },
+  });
+
+  return new Response(stream, {
+    status: 200,
+    headers: {
+      "content-type": "text/event-stream",
+      "cache-control": "no-cache",
+      "x-vercel-ai-ui-message-stream": "v1",
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// window.fetch patch
+// ---------------------------------------------------------------------------
+
+let currentLiveSettings: LiveStreamSettings | null = null;
+
+/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */
+export function setLiveStreamSettings(settings: LiveStreamSettings): void {
+  currentLiveSettings = settings;
+}
+
+/**
+ * Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream`
+ * get the synthetic SSE Response; everything else passes through untouched.
+ */
+export function installAiChatStreamFetchPatch(): void {
+  const originalFetch = window.fetch.bind(window);
+  window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
+    const url =
+      typeof input === "string"
+        ? input
+        : input instanceof URL
+          ? input.href
+          : input.url;
+    if (url.includes("/api/ai-chat/stream")) {
+      const settings = currentLiveSettings;
+      if (!settings) {
+        return Promise.resolve(
+          new Response("perf harness: no live stream configured", { status: 500 }),
+        );
+      }
+      return Promise.resolve(buildSseResponse(settings, init?.signal ?? null));
+    }
+    return originalFetch(input, init);
+  };
+}
@@ -1382,5 +1382,8 @@
  "Applied": "Applied",
  "Suggestion applied": "Suggestion applied",
  "Failed to apply suggestion": "Failed to apply suggestion",
-  "The commented text changed since this suggestion was made; it was not applied.": "The commented text changed since this suggestion was made; it was not applied."
+  "The commented text changed since this suggestion was made; it was not applied.": "The commented text changed since this suggestion was made; it was not applied.",
+  "Dismiss": "Dismiss",
+  "Suggestion dismissed": "Suggestion dismissed",
+  "Failed to dismiss suggestion": "Failed to dismiss suggestion"
 }
@@ -1245,5 +1245,8 @@
  "Applied": "Применено",
  "Suggestion applied": "Предложение применено",
  "Failed to apply suggestion": "Не удалось применить предложение",
-  "The commented text changed since this suggestion was made; it was not applied.": "Прокомментированный текст изменился после создания предложения; оно не было применено."
+  "The commented text changed since this suggestion was made; it was not applied.": "Прокомментированный текст изменился после создания предложения; оно не было применено.",
+  "Dismiss": "Не применять",
+  "Suggestion dismissed": "Предложение отклонено",
+  "Failed to dismiss suggestion": "Не удалось отклонить предложение"
 }
@@ -2,7 +2,8 @@ import { describe, it, expect, vi } from "vitest";
 import { render, screen, fireEvent } from "@testing-library/react";
 import { MantineProvider } from "@mantine/core";
 import { Provider, createStore } from "jotai";
-import { AgentAvatarStack, agentGlyphBackground } from "./agent-avatar-stack";
+import { AgentAvatarStack } from "./agent-avatar-stack";
+import { avatarStyle } from "@/lib/avatar-palette";
 import {
  activeAiChatIdAtom,
  aiChatWindowOpenAtom,
@@ -13,14 +14,16 @@ import {

 type Props = React.ComponentProps<typeof AgentAvatarStack>;

-// The DOM normalizes an inline `background: hsl(...)` to `rgb(...)`. Push the
+// The DOM normalizes an inline hex `background-color` to `rgb(...)`. Push the
 // expected color through the same CSSOM path so the comparison stays exact and
 // non-vacuous (an empty string — i.e. no inline background, as in the pre-fix
-// Avatar approach — can never match a real color).
+// Avatar approach — can never match a real color). NOTE: jsdom's CSSOM does not
+// round-trip a `linear-gradient` in the `background` shorthand, which is why the
+// glyph carries an explicit solid `background-color` we assert on here.
 function normalizeColor(value: string): string {
  const probe = document.createElement("div");
-  probe.style.background = value;
-  return probe.style.background;
+  probe.style.backgroundColor = value;
+  return probe.style.backgroundColor;
 }

 function renderStack(props: Props) {
@@ -36,26 +39,6 @@ function renderStack(props: Props) {
  return { store, ...utils };
 }

-describe("agentGlyphBackground", () => {
-  it("is deterministic for a given agent name", () => {
-    expect(agentGlyphBackground("Researcher")).toBe(
-      agentGlyphBackground("Researcher"),
-    );
-  });
-
-  it("gives categorically different colors to different agents", () => {
-    // The two agents that looked identically violet in the report must differ.
-    expect(agentGlyphBackground("Структурный редактор")).not.toBe(
-      agentGlyphBackground("Фактчекер"),
-    );
-    expect(agentGlyphBackground("Researcher")).not.toBe(
-      agentGlyphBackground("Нарратор"),
-    );
-    // Every color is a dark hsl circle drawn from the palette.
-    expect(agentGlyphBackground("Нарратор")).toMatch(/^hsl\(\d+, \d+%, \d+%\)$/);
-  });
-});
-
 describe("AgentAvatarStack", () => {
  it("internal chat WITH role: emoji glyph + human launcher badge in front", () => {
    const { container } = renderStack({
@@ -73,8 +56,8 @@ describe("AgentAvatarStack", () => {
    expect(screen.getByText("Alice")).toBeDefined();
  });

-  it("emoji glyph applies its per-agent color as an inline DOM background", () => {
-    // Pins the actual fix: the hashed color must reach the DOM as an inline
+  it("emoji glyph applies its per-agent gradient as an inline DOM background", () => {
+    // Pins the actual fix: the hashed gradient must reach the DOM as an inline
    // `background` on the glyph Box. The pre-fix `Avatar variant="filled"` set no
    // inline background (Mantine's --avatar-bg overrode it), so this fails there.
    const agent = { name: "Researcher", emoji: "🔬", avatarUrl: null };
@@ -88,20 +71,19 @@ describe("AgentAvatarStack", () => {
      '[data-testid="agent-glyph"]',
    );
    expect(glyph).not.toBeNull();
-    // Non-vacuous: compare against the function output (normalized the same way),
-    // not a frozen literal. Empty against the pre-fix Avatar (no inline bg).
-    expect(glyph!.style.background).not.toBe("");
-    expect(glyph!.style.background).toBe(
-      normalizeColor(agentGlyphBackground(agent.name)),
-    );
+    const expected = normalizeColor(avatarStyle(agent.name).bg);
+    // Non-vacuous: the pre-fix Avatar set no inline background at all.
+    expect(expected).not.toBe("");
+    expect(glyph!.style.backgroundColor).toBe(expected);
+    // (The gradient overlay is a browser-only enhancement — jsdom's CSSOM does
+    // not round-trip linear-gradient — so its stops/angle are covered by the
+    // avatarStyle unit tests above, not asserted on the DOM here.)
  });

-  it("agents with distinct hashed colors reach the DOM as distinct backgrounds", () => {
+  it("agents with distinct styles reach the DOM as distinct backgrounds", () => {
    // "Researcher" and "Нарратор" hash to different palette entries, so their
    // applied DOM backgrounds must differ — pins "distinct colors reach the DOM".
-    expect(agentGlyphBackground("Researcher")).not.toBe(
-      agentGlyphBackground("Нарратор"),
-    );
+    expect(avatarStyle("Researcher").bg).not.toBe(avatarStyle("Нарратор").bg);

    const a = renderStack({
      agent: { name: "Researcher", emoji: "🔬", avatarUrl: null },
@@ -120,14 +102,9 @@ describe("AgentAvatarStack", () => {
    const glyphB = b.container.querySelector<HTMLElement>(
      '[data-testid="agent-glyph"]',
    );
-    expect(glyphA!.style.background).toBe(
-      normalizeColor(agentGlyphBackground("Researcher")),
-    );
-    expect(glyphB!.style.background).toBe(
-      normalizeColor(agentGlyphBackground("Нарратор")),
-    );
-    // Different colors reach the DOM (the normalized rgb values also differ).
-    expect(glyphA!.style.background).not.toBe(glyphB!.style.background);
+    expect(glyphA!.style.backgroundColor).not.toBe("");
+    // Different base colors reach the DOM (the serialized rgb values differ).
+    expect(glyphA!.style.backgroundColor).not.toBe(glyphB!.style.backgroundColor);
  });

  it("showName=false: renders only the avatars, no inline name label", () => {
@@ -4,6 +4,7 @@ import { useCallback } from "react";
 import { useTranslation } from "react-i18next";
 import { useSetAtom } from "jotai";
 import { CustomAvatar } from "@/components/ui/custom-avatar.tsx";
+import { avatarStyle, avatarBackgroundCss } from "@/lib/avatar-palette";
 import {
  activeAiChatIdAtom,
  aiChatWindowOpenAtom,
@@ -29,54 +30,11 @@ const LAUNCHER_SIZE = 22;
 // sits as a small badge over that corner (above the glyph) and stays fully visible.
 const LAUNCHER_OVERHANG = 8;

-// Small deterministic string hash (same algorithm as custom-avatar's initials
-// hash) used to pick a stable per-agent glyph color.
-function hashName(input: string): number {
-  let hash = 0;
-  for (let i = 0; i < input.length; i += 1) {
-    hash = (hash << 5) - hash + input.charCodeAt(i);
-    hash |= 0;
-  }
-  return Math.abs(hash);
-}
-
-// A palette of categorically-DISTINCT dark circle colors for emoji/sparkles agent
-// glyphs. Every entry is intentionally dark (low lightness) so a bright emoji or
-// the white sparkles icon stays readable on top; the hues are spread across the
-// wheel (red → orange → amber → green → teal → cyan → blue → indigo → violet →
-// magenta + a neutral slate) so two different agents read as DIFFERENT colors,
-// not merely different shades of the same violet.
-const GLYPH_COLORS = [
-  "hsl(355, 60%, 34%)", // red
-  "hsl(18, 62%, 32%)", // vermilion
-  "hsl(32, 60%, 30%)", // orange
-  "hsl(45, 55%, 28%)", // amber
-  "hsl(75, 45%, 26%)", // olive-green
-  "hsl(140, 48%, 26%)", // green
-  "hsl(165, 52%, 26%)", // teal
-  "hsl(188, 58%, 28%)", // cyan
-  "hsl(205, 58%, 32%)", // sky blue
-  "hsl(225, 52%, 36%)", // blue
-  "hsl(250, 48%, 38%)", // indigo
-  "hsl(280, 46%, 36%)", // violet
-  "hsl(312, 48%, 34%)", // magenta
-  "hsl(210, 12%, 36%)", // slate / neutral
-];
-
-/**
- * Deterministic dark circle color for an emoji/sparkles agent glyph, picked from
- * GLYPH_COLORS by a hash of the agent name so distinct agents get categorically
- * distinct colors while every color stays dark enough to keep the glyph readable.
- */
-export function agentGlyphBackground(name: string): string {
-  return GLYPH_COLORS[hashName(name) % GLYPH_COLORS.length];
-}
-
 /**
 * The front avatar. Image-source priority (#300):
 *   1. agent.avatarUrl -> a real avatar image (external MCP agent account).
- *   2. agent.emoji     -> the role emoji on a per-agent dark circle.
- *   3. otherwise       -> the IconSparkles glyph on a per-agent dark circle (fallback).
+ *   2. agent.emoji     -> the role emoji on a per-agent gradient circle.
+ *   3. otherwise       -> the IconSparkles glyph on a per-agent gradient circle.
 */
 function AgentGlyph({ agent }: { agent: AgentInfo }) {
  if (agent.avatarUrl) {
@@ -89,10 +47,13 @@ function AgentGlyph({ agent }: { agent: AgentInfo }) {
    );
  }

-  // Emoji/sparkles glyph on a per-agent dark circle (color hashed from the agent
-  // name). Rendered as a plain Box, NOT a Mantine `Avatar variant="filled"`, so
-  // the background is guaranteed instead of being overridden by Mantine's
-  // `--avatar-bg` (which was falling back to the theme's violet for every agent).
+  // Emoji/sparkles glyph on a per-agent gradient circle (color, gradient partner
+  // and split angle all hashed from the agent name via avatarStyle — see
+  // @/lib/avatar-palette). Rendered as a plain Box, NOT a Mantine
+  // `Avatar variant="filled"` — Mantine's `--avatar-bg` overrode the background
+  // (every agent fell back to the theme's violet). The foreground (the sparkles
+  // icon) uses the ring's WCAG-checked readable text color.
+  const style = avatarStyle(agent.name);
  return (
    <Box
      data-testid="agent-glyph"
@@ -100,8 +61,14 @@ function AgentGlyph({ agent }: { agent: AgentInfo }) {
        width: GLYPH_SIZE,
        height: GLYPH_SIZE,
        borderRadius: "50%",
-        background: agentGlyphBackground(agent.name),
-        color: "var(--mantine-color-white)",
+        // Solid base color is the fallback (and the testable value); the gradient
+        // paints over it in browsers that support it.
+        backgroundColor: style.bg,
+        backgroundImage: avatarBackgroundCss(style),
+        color:
+          style.text === "white"
+            ? "var(--mantine-color-white)"
+            : "var(--mantine-color-black)",
        display: "flex",
        alignItems: "center",
        justifyContent: "center",
@@ -19,7 +19,7 @@ import {
  IconPlus,
  IconX,
 } from "@tabler/icons-react";
-import { useAtom, useSetAtom } from "jotai";
+import { useAtom, useAtomValue, useSetAtom } from "jotai";
 import { useLocation, useMatch } from "react-router-dom";
 import { useTranslation } from "react-i18next";
 import { useQueryClient } from "@tanstack/react-query";
@@ -41,13 +41,24 @@ import { extractPageSlugId } from "@/lib";
 import {
  AI_CHATS_RQ_KEY,
  AI_CHAT_MESSAGES_RQ_KEY,
+  AI_CHAT_RUN_RQ_KEY,
  useAiChatMessagesQuery,
+  useAiChatRunQuery,
  useAiChatsQuery,
  useAiRolesQuery,
 } from "@/features/ai-chat/queries/ai-chat-query.ts";
+import {
+  shouldClearLatchOnQueryError,
+  shouldClearStoppingLatch,
+  shouldObserveRun,
+} from "@/features/ai-chat/utils/run-polling.ts";
+import { workspaceAtom } from "@/features/user/atoms/current-user-atom";
 import ConversationList from "@/features/ai-chat/components/conversation-list.tsx";
 import ChatThread from "@/features/ai-chat/components/chat-thread.tsx";
-import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts";
+import {
+  exportAiChat,
+  stopRun,
+} from "@/features/ai-chat/services/ai-chat-service.ts";
 import { useChatSession } from "@/features/ai-chat/hooks/use-chat-session.ts";
 import {
  shouldCollapseOnOutsidePointer,
@@ -234,6 +245,147 @@ export default function AiChatWindow() {
  const { data: messageRows, isLoading: messagesLoading } =
    useAiChatMessagesQuery(activeChatId ?? undefined);

+  // #184 reconnect-and-live-follow. Whether detached agent runs are enabled for
+  // this workspace. The reconnect endpoint itself is NOT flag-gated server-side
+  // (it is only owner-gated and returns `{ run: null }` when the chat has no
+  // run); but when the feature is off no runs are ever created, so polling it
+  // would always come back empty — we gate it off here to avoid pointless polls.
+  const workspace = useAtomValue(workspaceAtom);
+  const autonomousRunsEnabled =
+    workspace?.settings?.ai?.autonomousRuns === true;
+
+  // Whether THIS tab is the one actively streaming the open chat's run locally
+  // (it started the run here and holds the SSE). Reported up from ChatThread. We
+  // are the STREAMER while true and a passive OBSERVER while false — the basis of
+  // the observer-vs-streamer detection. Reset to false by the fresh ChatThread's
+  // mount effect on every chat switch.
+  const [localStreaming, setLocalStreaming] = useState(false);
+  const onStreamingChange = useCallback((streaming: boolean) => {
+    setLocalStreaming(streaming);
+  }, []);
+
+  // #184 Stop wiring. While a detached run is being stopped we SUPPRESS the
+  // observer merge so the stopping run's still-persisting output does not
+  // re-stream back into view between the moment the user pressed Stop and the run
+  // actually settling as 'aborted' server-side. Polling itself keeps running (so
+  // the terminal transition is still detected) — only the visual merge is gated.
+  // Cleared when the run is observed terminal (below) or the chat is switched.
+  const [stoppingRun, setStoppingRun] = useState(false);
+  // Reset the stopping latch whenever the open chat changes: it is scoped to the
+  // run of the previously-open chat.
+  useEffect(() => {
+    setStoppingRun(false);
+  }, [activeChatId]);
+
+  // Authoritative stop of the open chat's detached run (the Stop button in
+  // autonomous mode). Latch "stopping" first (suppresses the re-stream flash),
+  // then request the server stop — the ONLY thing that ends a detached run; a mere
+  // local SSE abort is a client disconnect the server ignores. On failure we
+  // release the latch so the observer resumes (better to show the live run than to
+  // freeze the view) and surface the error.
+  const handleServerStop = useCallback(
+    (chatId: string): void => {
+      setStoppingRun(true);
+      // #234 F4: drop the PREVIOUS turn's run from the cache so `run` becomes null
+      // until the CURRENT turn's run is fetched fresh. Without this, once the local
+      // stream aborts (localStreaming -> false) the run query re-enables and
+      // react-query SYNCHRONOUSLY returns the still-cached prior terminal run; the
+      // terminal effect would then clear the stopping latch against that STALE run
+      // before the current turn's (still-running, detached, growing) run is ever
+      // observed — re-opening the observer merge and flashing the growing output
+      // over the frozen row. With the cache cleared the terminal effect's
+      // `if (!run) return` holds the latch until the current run itself is observed
+      // terminal (see shouldClearStoppingLatch).
+      queryClient.removeQueries({ queryKey: AI_CHAT_RUN_RQ_KEY(chatId) });
+      void stopRun(chatId).catch(() => {
+        setStoppingRun(false);
+        notifications.show({
+          message: t("Failed to stop the run"),
+          color: "red",
+        });
+      });
+    },
+    [t, queryClient],
+  );
+
+  // Poll the latest run of the open chat ONLY when we are a passive observer:
+  // feature on, a chat is open, and we are NOT the local streamer (the streamer
+  // already has the live SSE — polling/merging too would double-render). The
+  // query's own status-keyed refetchInterval stops once the run is terminal.
+  const { data: runData, isError: runQueryFailed } = useAiChatRunQuery(
+    activeChatId ?? undefined,
+    autonomousRunsEnabled && !localStreaming,
+  );
+  const run = runData?.run ?? null;
+
+  // Safety net (#234 F4 review): after handleServerStop clears the run cache,
+  // `run` is null until the current turn's run is fetched fresh, and the terminal
+  // effect below holds the latch via `if (!run) return`. If that refetch instead
+  // ERRORS PERMANENTLY (the GET-run keeps failing) while we are no longer the
+  // streamer, the run stays null, its status-keyed refetchInterval is off, and
+  // nothing would ever observe a terminal run — freezing the view with the
+  // observer merge suppressed. Release the latch on that error so the live view
+  // resumes rather than stays stuck (the local stopRun may already have succeeded
+  // independently).
+  //
+  // #234 F7: this must NOT fire on a TRANSIENT error while `run` is still an
+  // ACTIVE held run. In TanStack Query v5 (retry:false) the query's `data` is
+  // RETAINED on error, so `runQueryFailed` can be true while `run` is still
+  // pending/running — releasing then would re-open the observer merge and flash
+  // the growing detached run over the frozen row (the very flash F4 prevents). The
+  // decision is the pure, unit-tested `shouldClearLatchOnQueryError`, which gates
+  // on the run NOT being active: it cures only the genuine permanent-null-freeze
+  // (`run === null`) and never releases against an active run.
+  useEffect(() => {
+    if (
+      shouldClearLatchOnQueryError({
+        stoppingRun,
+        isLocalStreaming: localStreaming,
+        runQueryFailed,
+        run,
+      })
+    )
+      setStoppingRun(false);
+  }, [stoppingRun, localStreaming, runQueryFailed, run]);
+  // The run's incrementally-persisted assistant message to merge into the thread,
+  // but only while we are an observer (never when we are the streamer — guards
+  // against a stale poll fighting the live stream). Includes a terminal run so the
+  // final persisted output is shown on reopen.
+  const observedRow =
+    shouldObserveRun(run, localStreaming) && !stoppingRun
+      ? (runData?.message ?? null)
+      : null;
+
+  // When the observed run reaches a terminal status, do a final messages refetch
+  // so the persisted final state (token/context badge, export source) is shown,
+  // then the query's refetchInterval has already stopped polling. Deduped per run
+  // id so it fires exactly once per run, not on every subsequent poll-less render.
+  const finalizedRunIdRef = useRef<string | null>(null);
+  useEffect(() => {
+    if (!run || !activeChatId) return;
+    if (run.status === "pending" || run.status === "running") {
+      // Active again (a new run) — re-arm so its terminal transition fires once.
+      finalizedRunIdRef.current = null;
+      return;
+    }
+    // Terminal: a stop we requested has landed (or the run finished on its own),
+    // so release the stopping latch — the observer merge can now show the final
+    // persisted (aborted/finished) output without any live re-stream. The decision
+    // is the pure, unit-tested `shouldClearStoppingLatch` (run-polling.ts): release
+    // ONLY when we requested a stop, this tab is no longer the streamer, AND the
+    // CURRENT run is terminal. The #234 F4 cache removal in handleServerStop makes
+    // `run` null (this branch's `if (!run) return` above holds) until the current
+    // turn's run is fetched fresh, so the latch can never clear against a stale
+    // cached run.
+    if (shouldClearStoppingLatch({ stoppingRun, run, isLocalStreaming: localStreaming }))
+      setStoppingRun(false);
+    if (finalizedRunIdRef.current === run.id) return;
+    finalizedRunIdRef.current = run.id;
+    queryClient.invalidateQueries({
+      queryKey: AI_CHAT_MESSAGES_RQ_KEY(activeChatId),
+    });
+  }, [run, activeChatId, queryClient, stoppingRun, localStreaming]);
+
  // The page the user is currently viewing. AiChatWindow lives in a pathless
  // parent layout route, so useParams() can't see :pageSlug. Match the full
  // pathname against the authenticated page route instead so "the current page"
@@ -882,6 +1034,18 @@ export default function AiChatWindow() {
              assistantName={currentRole?.name}
              onTurnFinished={onTurnFinished}
              onServerChatId={onServerChatId}
+              // #184: live-follow a still-running run when we reopened the chat as
+              // a passive observer; null when there is nothing to observe or this
+              // tab is the streamer. onStreamingChange lets the window stop polling
+              // while we are the streamer.
+              observedRow={observedRow}
+              onStreamingChange={onStreamingChange}
+              // #184: in autonomous mode the Stop button must hit the authoritative
+              // server stop (a local SSE abort is a client disconnect the server
+              // ignores). onServerStop also arms the "stopping" latch above so the
+              // stopped run's output does not re-stream via the observer merge.
+              autonomousRunsEnabled={autonomousRunsEnabled}
+              onServerStop={handleServerStop}
            />
          )}
        </div>
@@ -164,8 +164,8 @@
    /* NOTE: `white-space: pre-wrap` is intentionally NOT set here. On the
       rendered markdown <div> it would turn the newlines between block tags
       (</li>\n<li>, </p>\n<ol>) into visible blank lines/indents on top of the
-       margins. The plain-text fallback <Text> that needs pre-wrap sets it
-       inline itself (see reasoning-block.tsx). */
+       margins. The streaming plain-text path that needs pre-wrap sets it
+       per chunk instead, in PlainChunk (see streaming-plain-text.tsx). */
 }

 .reasoningText p {
@@ -1,5 +1,5 @@
 import { describe, it, expect, beforeEach, vi } from "vitest";
-import { render, screen, fireEvent, act } from "@testing-library/react";
+import { render, screen, fireEvent, act, cleanup } from "@testing-library/react";
 import { MantineProvider } from "@mantine/core";

 // Shared, hoisted mock state so the @ai-sdk/react and "ai" module mocks (hoisted
@@ -11,6 +11,7 @@ const h = vi.hoisted(() => ({
    onFinish: null as null | ((arg: Record<string, unknown>) => void),
    sendMessage: vi.fn(),
    stop: vi.fn(),
+    setMessages: vi.fn(),
    transport: null as null | {
      prepareSendMessagesRequest: (arg: {
        messages: unknown[];
@@ -30,6 +31,8 @@ vi.mock("@ai-sdk/react", () => ({
      status: h.state.status,
      stop: h.state.stop,
      error: null,
+      // #184: ChatThread reads setMessages to merge a polled observer run.
+      setMessages: h.state.setMessages,
    };
  },
 }));
@@ -140,3 +143,144 @@ describe("ChatThread — send now (#198)", () => {
    expect(prep({ messages: [], body: {} }).body.interrupted).toBe(false);
  });
 });
+
+// The turn-end decision lives in the `onFinish` handler: given the terminal
+// outcome of a turn (`isAbort` / `isDisconnect` / `isError`, or none = clean),
+// it decides whether to CONTINUE (flush the next queued message) or END (leave
+// the queue intact for the user), and which stop notice — if any — to show.
+// `sendNow` is exercised above; these tests pin down the plain outcomes.
+describe("ChatThread — turn-end decision (onFinish)", () => {
+  beforeEach(() => {
+    h.state.status = "streaming";
+    h.state.onFinish = null;
+    h.state.sendMessage.mockClear();
+    h.state.stop.mockClear();
+    h.state.transport = null;
+  });
+
+  // Drive a fresh onFinish with the given terminal flags after queueing a
+  // message, and report both what the parent was told and whether the queue was
+  // flushed (a resend to the sendMessage spy).
+  function finishWith(flags: {
+    isAbort?: boolean;
+    isDisconnect?: boolean;
+    isError?: boolean;
+  }) {
+    // Tear down any prior render so the loop-driven "every outcome" case does
+    // not leave duplicate queue buttons in the DOM.
+    cleanup();
+    h.state.sendMessage.mockClear();
+    const { onTurnFinished } = renderThread();
+    // Populate the queue while the turn is streaming.
+    fireEvent.click(screen.getByTestId("queue-btn"));
+    act(() => {
+      h.state.onFinish?.({
+        message: { id: "a", role: "assistant", parts: [] },
+        isAbort: false,
+        isDisconnect: false,
+        isError: false,
+        ...flags,
+      });
+    });
+    return { onTurnFinished };
+  }
+
+  it("CONTINUES — flushes the next queued message on a clean finish", () => {
+    finishWith({});
+    // Clean finish (no terminal flag): the queued message is auto-sent.
+    expect(h.state.sendMessage).toHaveBeenCalledWith({ text: "queued text" });
+    // A clean finish shows no stop notice.
+    expect(screen.queryByText("Response stopped.")).toBeNull();
+  });
+
+  it("ENDS — keeps the queue intact on a user abort and shows the stopped notice", () => {
+    finishWith({ isAbort: true });
+    // A plain Stop (not the sendNow interrupt path) must NOT auto-resend: the
+    // queue is preserved for the user to decide.
+    expect(h.state.sendMessage).not.toHaveBeenCalled();
+    expect(screen.getByText("Response stopped.")).toBeTruthy();
+  });
+
+  it("ENDS — keeps the queue intact on a disconnect and shows the connection-lost notice", () => {
+    finishWith({ isDisconnect: true });
+    expect(h.state.sendMessage).not.toHaveBeenCalled();
+    expect(
+      screen.getByText("Connection lost — the answer was interrupted."),
+    ).toBeTruthy();
+  });
+
+  it("ENDS — keeps the queue intact on a stream error (no auto-retry, no stopped notice)", () => {
+    finishWith({ isError: true });
+    // Blindly retrying after a failure would be wrong; the queue is left alone.
+    expect(h.state.sendMessage).not.toHaveBeenCalled();
+    // isError clears the neutral notice (the error banner covers this case).
+    expect(screen.queryByText("Response stopped.")).toBeNull();
+  });
+
+  it("notifies the parent on EVERY terminal outcome", () => {
+    // The chat-list refresh / new-chat id adoption must run on success and on
+    // every failure path alike.
+    for (const flags of [
+      {},
+      { isAbort: true },
+      { isDisconnect: true },
+      { isError: true },
+    ]) {
+      const { onTurnFinished } = finishWith(flags);
+      expect(onTurnFinished).toHaveBeenCalled();
+    }
+  });
+});
+
+// #184 passive-observer merge: when reconnecting to a still-running run, the
+// parent feeds the polled run message via `observedRow`; ChatThread merges it via
+// setMessages — but ONLY when this tab is NOT itself streaming (the streamer's
+// SSE owns the view, so a stale observedRow must never overwrite it).
+describe("ChatThread — observer run merge (#184)", () => {
+  beforeEach(() => {
+    h.state.onFinish = null;
+    h.state.setMessages.mockReset();
+  });
+
+  const observedRow = {
+    id: "a-run",
+    role: "assistant",
+    content: "step 1\nstep 2",
+    metadata: {
+      parts: [{ type: "text", text: "step 1\nstep 2" }],
+    },
+    createdAt: "2026-01-01T00:00:00Z",
+  } as const;
+
+  function renderObserver(status: string) {
+    h.state.status = status;
+    render(
+      <MantineProvider>
+        <ChatThread
+          chatId="c1"
+          initialRows={[]}
+          onTurnFinished={vi.fn()}
+          observedRow={observedRow as never}
+        />
+      </MantineProvider>,
+    );
+  }
+
+  it("merges the polled run message when this tab is a passive observer", () => {
+    renderObserver("ready");
+    expect(h.state.setMessages).toHaveBeenCalledTimes(1);
+    // The updater replaces/append the observed assistant row by id.
+    const updater = h.state.setMessages.mock.calls[0][0] as (
+      prev: { id: string; parts: { text: string }[] }[],
+    ) => { id: string; parts: { text: string }[] }[];
+    const merged = updater([{ id: "u1", parts: [{ text: "hi" }] }]);
+    expect(merged).toHaveLength(2);
+    expect(merged[1].id).toBe("a-run");
+    expect(merged[1].parts[0].text).toBe("step 1\nstep 2");
+  });
+
+  it("does NOT merge while THIS tab is the streamer (no double-render)", () => {
+    renderObserver("streaming");
+    expect(h.state.setMessages).not.toHaveBeenCalled();
+  });
+});
@@ -24,6 +24,7 @@ import {
 } from "@/features/ai-chat/utils/role-launch.ts";
 import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
 import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
+import { mergeObservedMessage } from "@/features/ai-chat/utils/run-polling.ts";
 import {
  dequeue,
  enqueueMessage,
@@ -86,6 +87,29 @@ interface ChatThreadProps {
   *  Copy/export button available mid-stream). Distinct from onTurnFinished,
   *  which fires only at the terminal outcome. */
  onServerChatId?: (serverChatId?: string) => void;
+  /** #184 reconnect-and-live-follow. When THIS tab reopened a chat whose agent
+   *  run is still going (it is a PASSIVE OBSERVER — it did not start the run here),
+   *  the parent polls the reconnect endpoint and feeds the run's incrementally-
+   *  persisted assistant message here; we merge it into the live list so new
+   *  steps/tool-calls appear as they are persisted. Null when there is nothing to
+   *  observe (no run, feature off, or this tab IS the streamer). The merge is
+   *  ADDITIONALLY guarded by our own `isStreaming`, so a stale value can never
+   *  fight the local stream when we are the streamer. */
+  observedRow?: IAiChatMessageRow | null;
+  /** Report this tab's live streaming status up to the parent, so it can stop
+   *  polling the run while WE are the active streamer (the SSE owns the view) and
+   *  resume once we go idle. Called from an effect on every transition. */
+  onStreamingChange?: (streaming: boolean) => void;
+  /** #184: whether detached/autonomous agent runs are enabled for this workspace.
+   *  When true the Stop button must additionally hit the AUTHORITATIVE server stop
+   *  (via onServerStop) — aborting only the local SSE is just a client disconnect,
+   *  which the server deliberately ignores, so the detached run would keep going. */
+  autonomousRunsEnabled?: boolean;
+  /** #184: request the server-side stop of this chat's active run (the parent owns
+   *  the endpoint call + the "stopping" latch that keeps observer-polling from
+   *  immediately re-streaming the stopping run's output). Called with the resolved
+   *  chat id when the user presses Stop in autonomous mode. */
+  onServerStop?: (chatId: string) => void;
 }

 /**
@@ -131,6 +155,10 @@ export default function ChatThread({
  assistantName,
  onTurnFinished,
  onServerChatId,
+  observedRow,
+  onStreamingChange,
+  autonomousRunsEnabled,
+  onServerStop,
 }: ChatThreadProps) {
  const { t } = useTranslation();

@@ -216,6 +244,16 @@ export default function ChatThread({
  const flushOnAbortRef = useRef(false);
  const interruptNextSendRef = useRef(false);

+  // #234 F5: the user pressed Stop while streaming a BRAND-NEW chat whose server
+  // chat id has not been adopted yet (the `start` chunk carrying it hadn't landed
+  // when Stop was pressed). A local SSE abort alone does NOT stop the DETACHED
+  // autonomous run — it keeps burning tokens and WRITING TO PAGES — so we cannot
+  // just no-op. We latch the stop as PENDING and fire the authoritative server
+  // stop the moment onServerChatId adopts the id (below). Read-and-cleared there;
+  // also defused on every new turn start so it can never fire against a later,
+  // unrelated turn's run.
+  const stopPendingRef = useRef(false);
+
  // FIFO dequeue + send the next queued message (no-op when the queue is empty).
  // Returns whether a message was actually sent, so callers can tell an empty
  // dequeue (nothing to flush) from a real send.
@@ -274,7 +312,7 @@ export default function ChatThread({
    [],
  );

-  const { messages, sendMessage, status, stop, error } = useChat({
+  const { messages, sendMessage, status, stop, error, setMessages } = useChat({
    // Stable per-mount key. Existing chats use their real id; new chats use a
    // generated client id (never `undefined`) so the store is NOT re-created on
    // every render mid-stream (see `chatStoreId` above).
@@ -365,7 +403,14 @@ export default function ChatThread({
      return;
    lastForwardedChatIdRef.current = serverChatId;
    onServerChatId(serverChatId);
-  }, [messages, onServerChatId]);
+    // #234 F5: if Stop was pressed before the id was known, the authoritative
+    // server stop was deferred to this adoption point — fire it now with the
+    // just-adopted id. One-shot (read-and-clear) so it can't fire twice.
+    if (stopPendingRef.current) {
+      stopPendingRef.current = false;
+      onServerStop?.(serverChatId);
+    }
+  }, [messages, onServerChatId, onServerStop]);

  // Live "turn was interrupted" marker for the CURRENT session. The red error
  // banner (driven by `error`) covers the error case; this covers an aborted
@@ -378,6 +423,27 @@ export default function ChatThread({

  const isStreaming = status === "submitted" || status === "streaming";

+  // #184: report our live streaming status up so the parent stops polling the run
+  // while WE are the streamer (the SSE owns the view) and resumes once we go idle.
+  // Effect (not render) so it never updates parent state during our own render;
+  // fires on mount with `false`, which also re-syncs the parent after a chat
+  // switch remounts this thread (a fresh mount is idle until the user sends).
+  useEffect(() => {
+    onStreamingChange?.(isStreaming);
+  }, [isStreaming, onStreamingChange]);
+
+  // #184 passive-observer merge: when the parent feeds a polled run message (we
+  // reopened a chat whose run is still going and did NOT start it here), merge it
+  // into the live list so new steps/tool-calls appear as they are persisted. Hard-
+  // gated by `!isStreaming`: if THIS tab is actually the streamer, the local SSE
+  // owns the view and a stale observedRow must never overwrite it. `observedRow`
+  // is a stable per-poll object, so this runs once per poll, not per render.
+  useEffect(() => {
+    if (isStreaming || !observedRow) return;
+    const observed = rowToUiMessage(observedRow);
+    setMessages((prev) => mergeObservedMessage(prev, observed));
+  }, [observedRow, isStreaming, setMessages]);
+
  // "Send now" on a queued message: interrupt the current turn and immediately
  // send THIS message, keeping the agent's partial output. Other queued messages
  // stay queued and flush normally after the new turn. Reuses the existing
@@ -409,6 +475,40 @@ export default function ChatThread({
    [setQueue, stop],
  );

+  // Stop the current turn. ALWAYS abort the local SSE (`stop()`) so the composer
+  // returns to idle immediately. In AUTONOMOUS mode the turn is a DETACHED run:
+  // aborting the local SSE is only a client disconnect, which the server ignores,
+  // so the run would keep executing — we ADDITIONALLY request the authoritative
+  // server-side stop (the parent owns that call + the "stopping" latch that keeps
+  // observer-polling from re-streaming the stopping run's output). The chat id is
+  // read live from chatIdRef (adopted early at the stream's `start` chunk); if it
+  // is not known yet — a brand-new chat in the first moment of its first turn —
+  // only the local abort happens (there is no server-side run handle to stop yet).
+  const handleStop = useCallback(() => {
+    stop();
+    if (!autonomousRunsEnabled) return;
+    if (chatIdRef.current) {
+      onServerStop?.(chatIdRef.current);
+    } else {
+      // #234 F5: no chat id yet (brand-new chat in the first moment of its first
+      // turn, before the `start` chunk adopted the id). Latch the stop as pending;
+      // the onServerChatId adoption effect fires the deferred server stop as soon
+      // as the id appears, so the detached run is still authoritatively stopped
+      // instead of left running by a silent local-only abort.
+      //
+      // KNOWN LIMITATION (#234 F5 review): `stop()` above has already aborted the
+      // local SSE reader. In the rare sub-window where Stop is pressed while still
+      // `submitted` (request sent, not one chunk read yet), that abort can cancel
+      // the reader BEFORE the `start` chunk is applied to `messages`, so the
+      // adoption effect never runs and this pending stop never fires. The detached
+      // run then keeps going for that turn. This is not a regression (the pre-fix
+      // behavior sent no server stop at all); closing it fully would require
+      // deferring the local abort until adoption, which is riskier and out of scope
+      // for this fix. Documented so a future change can address the abort-ordering.
+      stopPendingRef.current = true;
+    }
+  }, [stop, autonomousRunsEnabled, onServerStop]);
+
  // Clear the stopped marker as soon as a new turn begins streaming, and drop any
  // stale "Send now" interrupt flags. On the legit interrupt path both refs are
  // already consumed synchronously (onFinish + prepareSendMessagesRequest) before
@@ -420,6 +520,11 @@ export default function ChatThread({
      setStopNotice(null);
      flushOnAbortRef.current = false;
      interruptNextSendRef.current = false;
+      // #234 F5: a new turn is starting — drop any pending deferred-stop from a
+      // previous turn that never adopted an id, so it can never fire against this
+      // (or a later) unrelated turn's run. A deferred stop for the CURRENT turn is
+      // set AFTER this effect (on the Stop click), so this does not clobber it.
+      stopPendingRef.current = false;
    }
  }, [isStreaming]);

@@ -539,7 +644,7 @@ export default function ChatThread({
        <ChatInput
          onSend={(text) => sendMessage({ text })}
          onQueue={enqueue}
-          onStop={stop}
+          onStop={handleStop}
          isStreaming={isStreaming}
        />
      </Stack>
@@ -65,6 +65,25 @@ describe("arePropsEqual", () => {
    expect(arePropsEqual(props(m), props(m))).toBe(true);
  });

+  // REGRESSION (stranded reasoning part): a reasoning part is left at
+  // `state:"streaming"` forever when the turn ends without `reasoning-end`
+  // (manual Stop during thinking). The signature is EQUAL across that turn-end
+  // flip (nothing in the message changed), so the comparator must ALSO compare
+  // `turnStreaming` — otherwise the memo swallows the flip and ReasoningBlock
+  // never switches from chunked plain text to its one-time markdown parse.
+  it("returns false when turnStreaming differs despite an equal signature", () => {
+    const m = msg([
+      { type: "reasoning", text: "thinking", state: "streaming" },
+      { type: "text", text: "answer" },
+    ]);
+    expect(
+      arePropsEqual(
+        props(m, { turnStreaming: true }),
+        props(m, { turnStreaming: false }),
+      ),
+    ).toBe(false);
+  });
+
  it("returns true for the same content in a different message object", () => {
    const a = msg([{ type: "text", text: "answer" }]);
    const b = msg([{ type: "text", text: "answer" }]);
@@ -52,6 +52,20 @@ interface MessageItemProps {
   * absent; the public share passes the configured identity (agent role) name.
   */
  assistantName?: string;
+  /**
+   * Whether the WHOLE turn is still streaming (MessageList's `isStreaming`).
+   * A reasoning part may be left `state: "streaming"` forever when the turn
+   * ends without a `reasoning-end` chunk (manual Stop during the thinking
+   * phase, or a provider that never emits it) — the AI SDK finalizes reasoning
+   * state ONLY on `reasoning-end`, not on `finish-step`/`finish`. So part-level
+   * state alone cannot prove liveness; the reasoning part is treated as live
+   * only while the whole turn is still streaming. Defaults to false.
+   *
+   * The parent passes it as "turn is live AND this is the tail row", so a
+   * stranded part in an EARLIER row never re-activates when a later turn
+   * streams.
+   */
+  turnStreaming?: boolean;
 }

 /**
@@ -105,6 +119,7 @@ function MessageItem({
  showCitations = true,
  neutralizeInternalLinks = false,
  assistantName,
+  turnStreaming = false,
 }: MessageItemProps) {
  // `signature` is intentionally not read in the body — it exists solely as the
  // memo key (see arePropsEqual). The render reads `message` directly.
@@ -155,8 +170,23 @@ function MessageItem({
          const text = (part as { text?: string }).text ?? "";
          if (!text.trim() && !(reasoningTokens && reasoningTokens > 0))
            return null;
+          // Absent state (persisted rows) and "done" both mean finalized.
+          // `messageSignature` already includes each part's `state`, so the
+          // streaming→done flip changes the row signature and re-renders this
+          // row — which is what lets ReasoningBlock switch from chunked plain
+          // text to its one-time markdown parse (see reasoning-block.tsx).
+          // ALSO require the turn to be live: a part stranded at
+          // `state:"streaming"` after the turn ended (no `reasoning-end` — see
+          // the `turnStreaming` prop doc) must still finalize and parse.
+          const streaming =
+            turnStreaming && (part as { state?: string }).state === "streaming";
          return (
-            <ReasoningBlock key={index} text={text} tokens={reasoningTokens} />
+            <ReasoningBlock
+              key={index}
+              text={text}
+              tokens={reasoningTokens}
+              streaming={streaming}
+            />
          );
        }

@@ -245,7 +275,11 @@ export function arePropsEqual(
    prev.signature === next.signature &&
    prev.showCitations === next.showCitations &&
    prev.neutralizeInternalLinks === next.neutralizeInternalLinks &&
-    prev.assistantName === next.assistantName
+    prev.assistantName === next.assistantName &&
+    // The turn-end flip re-renders every row once (cheap, terminal event) —
+    // that is what converts a stranded `state:"streaming"` reasoning part to
+    // its one-time markdown parse (see the `turnStreaming` prop doc).
+    prev.turnStreaming === next.turnStreaming
  );
 }

@@ -1,5 +1,5 @@
 import { describe, expect, it, vi } from "vitest";
-import { render } from "@testing-library/react";
+import { fireEvent, render } from "@testing-library/react";
 import { MantineProvider } from "@mantine/core";
 import type { UIMessage } from "@ai-sdk/react";

@@ -50,8 +50,9 @@ vi.stubGlobal(

 // One assistant message wrapping the given `parts`. Reused across renders in the
 // regression test to model how the AI SDK hands back the SAME message object.
-const msg = (parts: UIMessage["parts"]): UIMessage =>
-  ({ id: "m1", role: "assistant", parts }) as UIMessage;
+// Pass an explicit `id` when a test renders several rows at once.
+const msg = (parts: UIMessage["parts"], id = "m1"): UIMessage =>
+  ({ id, role: "assistant", parts }) as UIMessage;

 describe("MessageList", () => {
  it("wires the real MessageItem and supplies a valid signature end-to-end", () => {
@@ -116,4 +117,102 @@ describe("MessageList", () => {
      renderChatMarkdownSpy.mock.calls.some((c) => c[0] === "streamed answer"),
    ).toBe(true);
  });
+
+  // REGRESSION (stranded reasoning part): the AI SDK sets a reasoning part's
+  // state to "done" ONLY on the `reasoning-end` chunk — `finish-step`/`finish`
+  // do NOT finalize it. A manual Stop during the thinking phase (or a provider
+  // that never emits `reasoning-end`) therefore leaves the part at
+  // `state:"streaming"` forever. MessageItem must derive ReasoningBlock's
+  // `streaming` from part state AND turn liveness (MessageList's `isStreaming`,
+  // forwarded as `turnStreaming`): while the turn streams the expanded block
+  // shows chunked plain text (no parse); once the turn ends — even though the
+  // part is still `state:"streaming"` — the block finalizes and does its
+  // one-time markdown parse. Note the message signature does NOT change across
+  // that flip, so this also exercises the `turnStreaming` memo comparison in
+  // arePropsEqual (without it the row would never re-render).
+  it("finalizes a reasoning part stranded at state:'streaming' when the turn ends", () => {
+    renderChatMarkdownSpy.mockClear();
+    const reasoningText = "**bold** thinking";
+    // Reasoning part stranded mid-stream + a non-empty answer part (a
+    // reasoning-only message renders nothing — see message-content.ts).
+    const message = msg([
+      { type: "reasoning", text: reasoningText, state: "streaming" },
+      { type: "text", text: "partial answer" },
+    ]);
+    const parsesOfReasoning = () =>
+      renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText)
+        .length;
+
+    const { rerender, getByRole, queryByText } = render(
+      <MantineProvider>
+        <MessageList messages={[message]} isStreaming />
+      </MantineProvider>,
+    );
+    // Expand the reasoning block (its toggle is the only button in the list).
+    fireEvent.click(getByRole("button"));
+    // Turn live + part streaming -> ReasoningBlock received streaming=true:
+    // the body is chunked plain text (raw markdown syntax), NOT parsed.
+    expect(queryByText(/bold/)).not.toBeNull();
+    expect(parsesOfReasoning()).toBe(0);
+
+    // The turn ends WITHOUT `reasoning-end`: the part object is untouched
+    // (still state:"streaming"), only the turn-level flag flips.
+    rerender(
+      <MantineProvider>
+        <MessageList messages={[message]} isStreaming={false} />
+      </MantineProvider>,
+    );
+    // ReasoningBlock now received streaming=false and did its one-time parse.
+    expect(parsesOfReasoning()).toBe(1);
+  });
+
+  // REGRESSION (turn-global liveness leaking into earlier rows): `isStreaming`
+  // is turn-global, so forwarding it to EVERY row would re-mark a reasoning
+  // part stranded at `state:"streaming"` in a PREVIOUS message (see the test
+  // above) as live again whenever a LATER turn streams — an expanded stranded
+  // block would flip markdown -> raw plain text -> markdown across turn
+  // boundaries, re-parsing each time. MessageList must gate `turnStreaming`
+  // to the TAIL row only.
+  it("keeps a stranded reasoning part in an earlier message finalized while a later turn streams", () => {
+    renderChatMarkdownSpy.mockClear();
+    const reasoningText = "**bold** thinking";
+    // First (earlier) assistant message: its turn was stopped during the
+    // thinking phase, leaving the reasoning part at state:"streaming".
+    const first = msg(
+      [
+        { type: "reasoning", text: reasoningText, state: "streaming" },
+        { type: "text", text: "first answer" },
+      ],
+      "m1",
+    );
+    // Second assistant message: the LATER turn, currently streaming.
+    const second = msg([{ type: "text", text: "second answer" }], "m2");
+    const parsesOfReasoning = () =>
+      renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText)
+        .length;
+
+    const { rerender, getByRole, queryByText } = render(
+      <MantineProvider>
+        <MessageList messages={[first, second]} isStreaming />
+      </MantineProvider>,
+    );
+    // Expand the first row's reasoning block (the only toggle in the list —
+    // the second message has no reasoning or tool parts).
+    fireEvent.click(getByRole("button"));
+    // The turn is live but the first row is NOT the tail: its ReasoningBlock
+    // received streaming=false, so the stranded part stays finalized and does
+    // its one-time markdown parse instead of dropping to chunked plain text.
+    expect(queryByText(/bold/)).not.toBeNull();
+    expect(parsesOfReasoning()).toBe(1);
+
+    // A later-turn delta re-renders the list; the earlier block must neither
+    // flip back to streaming nor re-parse.
+    (second.parts[0] as { text: string }).text = "second answer grows";
+    rerender(
+      <MantineProvider>
+        <MessageList messages={[first, second]} isStreaming />
+      </MantineProvider>,
+    );
+    expect(parsesOfReasoning()).toBe(1);
+  });
 });
@@ -196,7 +196,7 @@ export default function MessageList({
  return (
    <ScrollArea className={classes.messages} viewportRef={viewportRef} scrollbarSize={6} type="scroll">
      <Stack gap={0} pr="xs">
-        {messages.map((message) => (
+        {messages.map((message, index) => (
          // `signature` is snapshotted HERE (parent render) into an immutable
          // string and handed to MessageItem as its memo key. It must NOT be
          // recomputed inside MessageItem's arePropsEqual: the AI SDK mutates the
@@ -210,6 +210,13 @@ export default function MessageList({
            showCitations={showCitations}
            neutralizeInternalLinks={neutralizeInternalLinks}
            assistantName={assistantName}
+            // Turn-level liveness, gated to the TAIL row: only the tail message
+            // can belong to the in-flight turn, so a reasoning part stranded at
+            // `state:"streaming"` in an EARLIER message (its turn ended without
+            // `reasoning-end`) stays finalized and doesn't flip back to plain
+            // text (and re-parse) whenever a later turn streams — see
+            // message-item.tsx.
+            turnStreaming={isStreaming && index === messages.length - 1}
          />
        ))}
        {typing && (
@@ -28,7 +28,11 @@ import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";

 // matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.

-function renderBlock(props: { text: string; tokens?: number }) {
+function renderBlock(props: {
+  text: string;
+  tokens?: number;
+  streaming?: boolean;
+}) {
  return render(
    <MantineProvider>
      <ReasoningBlock {...props} />
@@ -84,4 +88,54 @@ describe("ReasoningBlock", () => {
    fireEvent.click(screen.getByRole("button"));
    expect(renderSpy).toHaveBeenCalledTimes(1);
  });
+
+  it("does not parse while expanded and STREAMING; shows chunked plain text", () => {
+    const renderSpy = vi.mocked(renderChatMarkdown);
+    renderSpy.mockClear();
+    renderBlock({
+      text: "первый абзац размышлений\n\nвторой абзац растёт",
+      tokens: 5,
+      streaming: true,
+    });
+    fireEvent.click(screen.getByRole("button"));
+    // Expanded + still streaming: NO markdown parse and NO innerHTML swaps per
+    // delta — the body is chunked plain text (only the tail chunk updates).
+    // This is the O(n²) hole #302 left open (Safari whole-tab freeze).
+    expect(renderSpy).not.toHaveBeenCalled();
+    // Both paragraph chunks' raw text is present in the body.
+    expect(screen.getByText(/первый абзац размышлений/)).toBeDefined();
+    expect(screen.getByText(/второй абзац растёт/)).toBeDefined();
+  });
+
+  it("parses exactly once when streaming flips to done while expanded", () => {
+    const renderSpy = vi.mocked(renderChatMarkdown);
+    renderSpy.mockClear();
+    const { rerender } = renderBlock({
+      text: "**bold** reasoning",
+      tokens: 5,
+      streaming: true,
+    });
+    fireEvent.click(screen.getByRole("button"));
+    expect(renderSpy).not.toHaveBeenCalled();
+
+    // Finalization: the part's state flips streaming→done, the parent
+    // re-renders the row (the flip changes the message signature), and the
+    // block does its ONE markdown parse of the now-stable text.
+    rerender(
+      <MantineProvider>
+        <ReasoningBlock text="**bold** reasoning" tokens={5} streaming={false} />
+      </MantineProvider>,
+    );
+    expect(renderSpy).toHaveBeenCalledTimes(1);
+    // The parsed html branch rendered (the mock wraps the input in <p>…</p>).
+    expect(screen.getByText(/reasoning/)).toBeDefined();
+
+    // Further re-renders with unchanged props do not re-parse.
+    rerender(
+      <MantineProvider>
+        <ReasoningBlock text="**bold** reasoning" tokens={5} streaming={false} />
+      </MantineProvider>,
+    );
+    expect(renderSpy).toHaveBeenCalledTimes(1);
+  });
 });
@@ -5,6 +5,7 @@ import { useTranslation } from "react-i18next";
 import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
 import { collapseBlankLines } from "@/features/ai-chat/utils/collapse-blank-lines.ts";
 import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
+import { StreamingPlainText } from "@/features/ai-chat/components/streaming-plain-text.tsx";
 import classes from "@/features/ai-chat/components/ai-chat.module.css";

 interface ReasoningBlockProps {
@@ -15,6 +16,10 @@ interface ReasoningBlockProps {
   *  step/turn has finished. When absent (or 0) the count is estimated from the
   *  text length so it ticks live as the reasoning streams in. */
  tokens?: number;
+  /** True while the reasoning part is still streaming (part `state ===
+   *  "streaming"`). False means finalized: persisted history or `state ===
+   *  "done"`. Gates the markdown parse — see the invariant on the memo below. */
+  streaming?: boolean;
 }

 /**
@@ -27,26 +32,30 @@ interface ReasoningBlockProps {
 * Providers that don't stream reasoning TEXT still render this block from the
 * authoritative count alone (header only, empty body) so the cost is visible.
 */
-function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
+function ReasoningBlock({ text, tokens, streaming = false }: ReasoningBlockProps) {
  const { t } = useTranslation();
  const [open, setOpen] = useState(false);

  // Authoritative count wins; otherwise estimate live from the streamed text.
  const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
  const trimmed = text.trim();
-  // Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the
-  // default and the common case during a long "thinking" stream: reasoning text
-  // streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only
-  // memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta
-  // — an O(n²) storm that pins the main thread and freezes the chat, all for a block
-  // the user isn't even looking at (the html is only shown inside <Collapse in={open}>
-  // below). Gating on `open` skips that hidden parsing entirely; expanding parses the
-  // current text once (an instant, user-initiated click), and further streaming while
-  // open is the normal per-delta append render, like the answer.
+  // Markdown parse invariant (per throttled ~20Hz stream delta the text GROWS):
+  //  1. Collapsed -> never parse (#302): the html is only shown inside
+  //     <Collapse in={open}>, so parsing for a hidden body would be an O(n²)
+  //     marked + DOMPurify storm.
+  //  2. Expanded + STREAMING -> no parse and no innerHTML swaps either: the body
+  //     renders as chunked plain text (StreamingPlainText) with a memoized
+  //     stable prefix, so each delta updates only the tail chunk's text node.
+  //     This closes the O(n²) hole #302 left open ("expanded while streaming")
+  //     that froze the whole tab in Safari when watching the thinking stream.
+  //  3. Finalized + expanded -> exactly one parse: `trimmed` and `streaming`
+  //     are stable after the part is done, so this memo runs once per expand.
  const html = useMemo(
    () =>
-      open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "",
-    [open, trimmed],
+      open && trimmed && !streaming
+        ? renderChatMarkdown(collapseBlankLines(trimmed), {})
+        : "",
+    [open, trimmed, streaming],
  );

  return (
@@ -83,12 +92,12 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
              dangerouslySetInnerHTML={{ __html: html }}
            />
          ) : (
-            <Text
-              className={classes.reasoningText}
-              style={{ whiteSpace: "pre-wrap" }}
-            >
-              {trimmed}
-            </Text>
+            // Still streaming (or markdown yielded nothing): chunked plain text.
+            // The wrapper carries the reasoningText styling; each chunk sets its
+            // own pre-wrap inline (NOT on this div — see ai-chat.module.css).
+            <div className={classes.reasoningText}>
+              <StreamingPlainText text={trimmed} />
+            </div>
          )}
        </Collapse>
      )}
@@ -96,7 +105,7 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
  );
 }

-// Memoized: re-renders only when `text`/`tokens` change (primitive props, default
-// shallow compare), so a parent re-render during streaming of OTHER content does
-// not re-run the markdown parse for an already-finalized reasoning block.
+// Memoized: re-renders only when `text`/`tokens`/`streaming` change (primitive
+// props, default shallow compare), so a parent re-render during streaming of OTHER
+// content does not re-run the markdown parse for an already-finalized reasoning block.
 export default memo(ReasoningBlock);
@@ -0,0 +1,146 @@
+import { describe, it, expect } from "vitest";
+import { render } from "@testing-library/react";
+
+import {
+  splitPlainChunks,
+  StreamingPlainText,
+} from "./streaming-plain-text";
+
+describe("splitPlainChunks", () => {
+  // THE load-bearing property (see the invariant comment in the module): under
+  // append-only growth, every chunk except the LAST must be byte-identical
+  // between successive calls, so the memoized chunk components never re-render
+  // for the stable prefix and each stream delta touches only the tail chunk.
+  it("keeps all non-last chunks byte-identical across append-only growth", () => {
+    // A simulated reasoning stream covering: appends inside the last paragraph,
+    // appends that ADD new blank lines, growth of a trailing newline run, and a
+    // trailing separator later followed by text.
+    const steps = [
+      "Пер",
+      "Первый абзац",
+      "Первый абзац\n",
+      "Первый абзац\n\n",
+      "Первый абзац\n\n\n",
+      "Первый абзац\n\n\nВторой",
+      "Первый абзац\n\n\nВторой абзац растёт",
+      "Первый абзац\n\n\nВторой абзац растёт\n\nТретий",
+      "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\n",
+      "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\nЧетвёртый",
+    ];
+    let prev: string[] = [];
+    for (const text of steps) {
+      const next = splitPlainChunks(text);
+      // Lossless: chunks always reassemble into the exact input.
+      expect(next.join("")).toBe(text);
+      // Chunk count never shrinks (boundaries never disappear).
+      expect(next.length).toBeGreaterThanOrEqual(prev.length);
+      // Every previously-FINAL chunk (all but prev's last) is unchanged.
+      for (let i = 0; i < prev.length - 1; i++) {
+        expect(next[i]).toBe(prev[i]);
+      }
+      prev = next;
+    }
+    // Guard against a vacuous pass: the final split must be multi-chunk.
+    expect(prev.length).toBeGreaterThanOrEqual(4);
+  });
+
+  it("attaches the blank-line separator run to the preceding chunk", () => {
+    expect(splitPlainChunks("a\n\nb")).toEqual(["a\n\n", "b"]);
+    // A longer run is ONE separator, not several boundaries.
+    expect(splitPlainChunks("a\n\n\n\nb")).toEqual(["a\n\n\n\n", "b"]);
+    expect(splitPlainChunks("a\n\nb\n\n\nc")).toEqual(["a\n\n", "b\n\n\n", "c"]);
+  });
+
+  it("single newlines are not boundaries", () => {
+    expect(splitPlainChunks("a\nb\nc")).toEqual(["a\nb\nc"]);
+  });
+
+  // INTENTIONAL: CRLF blank lines are NOT boundaries (the regex is `\n{2,}`
+  // only). Supporting `(?:\r?\n){2,}` would break the stable-prefix invariant:
+  // a lone trailing `\r` is not a boundary, but a later-appended `\n` would
+  // merge with it into a new separator unit and retroactively create a boundary
+  // INSIDE previously-emitted text, moving old chunk edges. So CRLF input stays
+  // in one (still lossless) chunk — only granularity is coarser; LLM output is
+  // `\n` in practice. See the doc comment on splitPlainChunks.
+  it("keeps CRLF blank lines inside one chunk", () => {
+    expect(splitPlainChunks("a\r\n\r\nb")).toEqual(["a\r\n\r\nb"]);
+    // Mixed input: only pure-`\n` runs split.
+    expect(splitPlainChunks("a\r\n\r\nb\n\nc")).toEqual(["a\r\n\r\nb\n\n", "c"]);
+  });
+
+  it("never emits empty phantom chunks (multi-blank-line / trailing newlines)", () => {
+    expect(splitPlainChunks("")).toEqual([]);
+    // A trailing newline run stays inside the last chunk (it may still grow).
+    expect(splitPlainChunks("a\n")).toEqual(["a\n"]);
+    expect(splitPlainChunks("a\n\n")).toEqual(["a\n\n"]);
+    expect(splitPlainChunks("a\n\nb\n\n")).toEqual(["a\n\n", "b\n\n"]);
+    // Degenerate all-newlines input is a single deterministic chunk.
+    expect(splitPlainChunks("\n\n\n")).toEqual(["\n\n\n"]);
+    for (const text of ["a\n\n\nb\n\n", "x\n\n\n\n\ny\n\nz\n"]) {
+      for (const chunk of splitPlainChunks(text)) {
+        expect(chunk.length).toBeGreaterThan(0);
+      }
+    }
+  });
+});
+
+describe("StreamingPlainText", () => {
+  it("renders one block per chunk, stripping trailing separator newlines at display time", () => {
+    const text = "первый абзац\n\nвторой абзац\n\n\nтретий";
+    const { container } = render(<StreamingPlainText text={text} />);
+    const blocks = Array.from(container.querySelectorAll("div"));
+    // One block element per chunk.
+    expect(blocks.length).toBe(splitPlainChunks(text).length);
+    // DISPLAY-ONLY strip: each rendered block drops its chunk's trailing
+    // separator newlines — rendering them inside a pre-wrap block would add an
+    // empty line ON TOP of the block break (a doubled gap). The RAW chunks
+    // keep their separators (losslessness is asserted on splitPlainChunks
+    // above); multi-blank-line runs collapse to one uniform gap, consistent
+    // with collapseBlankLines on the finalized markdown path.
+    expect(blocks.map((b) => b.textContent)).toEqual([
+      "первый абзац",
+      "второй абзац",
+      "третий",
+    ]);
+    // The uniform paragraph gap comes from the block margin instead (matches
+    // the `.reasoningText p { margin: 0 0 4px }` rhythm of the markdown path).
+    for (const block of blocks) {
+      expect((block as HTMLElement).style.marginBottom).toBe("4px");
+    }
+  });
+
+  it("keeps interior newlines intact — only the trailing run is stripped", () => {
+    const text = "строка один\nстрока два\n\nхвост";
+    const { container } = render(<StreamingPlainText text={text} />);
+    const blocks = Array.from(container.querySelectorAll("div"));
+    expect(blocks.map((b) => b.textContent)).toEqual([
+      "строка один\nстрока два",
+      "хвост",
+    ]);
+  });
+
+  // SECURITY INVARIANT — the load-bearing property of the streaming path: the
+  // reasoning text is raw, untrusted model output rendered WITHOUT a sanitizer
+  // (no marked/DOMPurify, no innerHTML). PlainChunk emits it as a React text
+  // node, which escapes it, so HTML in the model output is inert. This test
+  // pins that the path is a TEXT sink, not an HTML sink: a future change to
+  // `dangerouslySetInnerHTML` (reintroducing XSS) MUST fail here.
+  //
+  // The existing tests assert via textContent, which strips tags and so cannot
+  // distinguish an escaped literal from injected DOM. This one asserts on the
+  // parsed DOM directly: if the markup were injected as HTML, the <img>/<b>
+  // would become real elements and querySelector would find them.
+  it("renders HTML-like reasoning as an escaped literal, never as injected DOM", () => {
+    const text = "<img src=x onerror=alert(1)>\n\n<b>hi</b>";
+    const { container } = render(<StreamingPlainText text={text} />);
+    // No DOM elements were created from the payload — it was NOT parsed as HTML.
+    expect(container.querySelector("img")).toBeNull();
+    expect(container.querySelector("b")).toBeNull();
+    // The raw markup survived verbatim as text (proving it is escaped, not
+    // interpreted). textContent alone can't prove this, but combined with the
+    // querySelector assertions above it does: the literals are present AND no
+    // elements exist.
+    expect(container.textContent).toContain("<b>hi</b>");
+    expect(container.textContent).toContain("<img src=x onerror=alert(1)>");
+  });
+});
@@ -0,0 +1,90 @@
+import { memo, useMemo } from "react";
+
+/**
+ * Split plain text into chunks at blank-line (paragraph) boundaries, keeping
+ * each separator run attached to the END of the preceding chunk, so the chunks
+ * always reassemble byte-for-byte into the input.
+ *
+ * A boundary is the end of a maximal `\n{2,}` run that is followed by at least
+ * one more character. A newline run that is a SUFFIX of the text is NOT a
+ * boundary yet: under append-only growth it may still gain more newlines, and
+ * cutting there would move the boundary on the next call.
+ *
+ * CRITICAL INVARIANT (load-bearing for StreamingPlainText's memoization): for
+ * APPEND-ONLY growth of `text`, every chunk except the LAST is byte-identical
+ * between successive calls — previously-emitted boundaries never move. Proof
+ * sketch: appending never modifies existing characters, so (a) an existing
+ * boundary's newline run and its following character are untouched and the
+ * boundary persists at the same offset; (b) no NEW boundary can appear strictly
+ * inside the old text, because a `\n{2,}` run followed by a character entirely
+ * within the old text would already have been a boundary. New boundaries can
+ * only materialize at or after the old text's end, i.e. inside the last chunk.
+ *
+ * CRLF is deliberately NOT a boundary: supporting `(?:\r?\n){2,}` would BREAK
+ * the invariant above — a lone trailing `\r` is not a boundary, but a later-
+ * appended `\n` would merge with it into a new separator unit and retroactively
+ * create a boundary INSIDE previously-emitted text, moving old chunk edges.
+ * With `\n`-only runs, appended characters can never extend a run that is
+ * already followed by a non-`\n` character, so old boundaries are immutable.
+ * CRLF blank lines therefore intentionally stay inside one chunk: correctness/
+ * losslessness are unaffected, only chunk granularity for CRLF input (LLM
+ * output is `\n` in practice).
+ */
+export function splitPlainChunks(text: string): string[] {
+  const chunks: string[] = [];
+  let start = 0;
+  for (const match of text.matchAll(/\n{2,}/g)) {
+    const end = match.index + match[0].length;
+    // Suffix run: not a stable boundary yet (see the invariant above).
+    if (end >= text.length) break;
+    chunks.push(text.slice(start, end));
+    start = end;
+  }
+  if (start < text.length) chunks.push(text.slice(start));
+  return chunks;
+}
+
+/**
+ * One immutable chunk. Memoized on its string prop: during streaming only the
+ * TAIL chunk's text changes (see the splitPlainChunks invariant), so React
+ * skips every stable chunk and the per-delta DOM work is a single text-node
+ * update. `pre-wrap` is set per chunk (like the old raw-text fallback did), NOT
+ * on the surrounding markdown-styled container — see the note in
+ * ai-chat.module.css. Font/size/color are inherited from that container.
+ *
+ * DISPLAY-ONLY newline strip: the raw chunk keeps its trailing `\n{2,}`
+ * separator run attached (the splitPlainChunks invariant, load-bearing for the
+ * memo), but rendering those newlines inside a pre-wrap block would add an
+ * empty line ON TOP of the block break — a doubled gap. So the RENDERED string
+ * drops trailing newlines and the paragraph gap comes from `marginBottom: 4`
+ * instead, matching the `.reasoningText p { margin: 0 0 4px }` rhythm of the
+ * finalized markdown. Multi-blank-line runs thus collapse to one uniform gap,
+ * consistent with `collapseBlankLines` on the markdown path. The last chunk
+ * usually has no trailing newlines (strip is a no-op); its margin is harmless.
+ */
+const PlainChunk = memo(function PlainChunk({ text }: { text: string }) {
+  return (
+    <div style={{ whiteSpace: "pre-wrap", marginBottom: 4 }}>
+      {text.replace(/\n+$/, "")}
+    </div>
+  );
+});
+
+/**
+ * Renders still-streaming plain text as a list of paragraph chunks where only
+ * the tail chunk changes per delta. No markdown, no sanitizer, no innerHTML —
+ * this is the cheap streaming-time stand-in for the one-time markdown parse
+ * that happens after the part is finalized (see reasoning-block.tsx).
+ */
+export function StreamingPlainText({ text }: { text: string }) {
+  const chunks = useMemo(() => splitPlainChunks(text), [text]);
+  return (
+    <>
+      {chunks.map((chunk, index) => (
+        // Index keys are stable here: chunks are append-only (the invariant),
+        // so an index never gets a different chunk's content mid-stream.
+        <PlainChunk key={index} text={chunk} />
+      ))}
+    </>
+  );
+}
@@ -12,6 +12,7 @@ import {
  deleteAiChat,
  deleteAiRole,
  getAiChatMessages,
+  getAiChatRun,
  getAiChats,
  getAiRoleCatalog,
  getAiRoleCatalogBundle,
@@ -24,6 +25,7 @@ import {
 import {
  IAiChat,
  IAiChatMessageRow,
+  IAiChatRunResponse,
  IAiRole,
  IAiRoleCatalog,
  IAiRoleCatalogBundle,
@@ -34,6 +36,7 @@ import {
  IAiRoleUpdateFromCatalogResult,
 } from "@/features/ai-chat/types/ai-chat.types.ts";
 import { IPagination } from "@/lib/types.ts";
+import { runPollInterval } from "@/features/ai-chat/utils/run-polling.ts";

 export const AI_CHATS_RQ_KEY = ["ai-chats"];
 export const AI_ROLES_RQ_KEY = ["ai-roles"];
@@ -51,16 +54,18 @@ export const AI_CHAT_MESSAGES_RQ_KEY = (chatId: string) => [
  "ai-chat-messages",
  chatId,
 ];
+export const AI_CHAT_RUN_RQ_KEY = (chatId: string) => ["ai-chat-run", chatId];

 /** Paginated list of the current user's chats (auto-loads further pages). */
 export function useAiChatsQuery() {
  const query = useInfiniteQuery({
    queryKey: AI_CHATS_RQ_KEY,
-    queryFn: ({ pageParam }) =>
-      getAiChats({ cursor: pageParam, limit: 50 }),
+    queryFn: ({ pageParam }) => getAiChats({ cursor: pageParam, limit: 50 }),
    initialPageParam: undefined as string | undefined,
    getNextPageParam: (lastPage) =>
-      lastPage.meta.hasNextPage ? (lastPage.meta.nextCursor ?? undefined) : undefined,
+      lastPage.meta.hasNextPage
+        ? (lastPage.meta.nextCursor ?? undefined)
+        : undefined,
  });

  const data = useMemo<IPagination<IAiChat> | undefined>(() => {
@@ -90,7 +95,9 @@ export function useAiChatMessagesQuery(chatId: string | undefined) {
      getAiChatMessages({ chatId: chatId as string, cursor: pageParam }),
    initialPageParam: undefined as string | undefined,
    getNextPageParam: (lastPage) =>
-      lastPage.meta.hasNextPage ? (lastPage.meta.nextCursor ?? undefined) : undefined,
+      lastPage.meta.hasNextPage
+        ? (lastPage.meta.nextCursor ?? undefined)
+        : undefined,
    enabled: !!chatId,
  });

@@ -131,6 +138,34 @@ export function useAiChatMessagesQuery(chatId: string | undefined) {
  };
 }

+/**
+ * Reconnect to a chat's latest agent run and LIVE-FOLLOW it (#184). While the run
+ * is active the query re-polls every {@link runPollInterval} ms (driven off the
+ * fetched `run.status`, the same status-keyed refetchInterval pattern as the
+ * embeddings reindex polling); once the run reaches a terminal status — or there
+ * is no run — the interval returns `false` and polling stops on its own. Polling
+ * is thus naturally bounded by the run terminating; no separate timeout cap.
+ *
+ * `enabled` gates the whole thing: callers pass `false` when the autonomous-runs
+ * feature is off (the endpoint is NOT flag-gated server-side, but with the feature
+ * off the chat has no runs, so polling would only ever return `{ run: null }`) OR
+ * when THIS tab is the one actively streaming the run (the live SSE owns the view,
+ * so we must not also poll/merge). The global `retry: false` means a failed fetch
+ * leaves `data` undefined, so refetchInterval(undefined run) returns false — a
+ * failed fetch can never spin a tight loop.
+ */
+export function useAiChatRunQuery(
+  chatId: string | undefined,
+  enabled: boolean,
+) {
+  return useQuery<IAiChatRunResponse, Error>({
+    queryKey: AI_CHAT_RUN_RQ_KEY(chatId ?? ""),
+    queryFn: () => getAiChatRun(chatId as string),
+    enabled: !!chatId && enabled,
+    refetchInterval: (query) => runPollInterval(query.state.data?.run),
+  });
+}
+
 export function useRenameAiChatMutation() {
  const queryClient = useQueryClient();
  const { t } = useTranslation();
@@ -280,11 +315,14 @@ export function useImportAiRolesFromCatalogMutation() {
    mutationFn: (payload) => importAiRolesFromCatalog(payload),
    onSuccess: (result) => {
      notifications.show({
-        message: t("Imported {{created}}, renamed {{renamed}}, skipped {{skipped}}", {
-          created: result.created,
-          renamed: result.renamed,
-          skipped: result.skipped,
-        }),
+        message: t(
+          "Imported {{created}}, renamed {{renamed}}, skipped {{skipped}}",
+          {
+            created: result.created,
+            renamed: result.renamed,
+            skipped: result.skipped,
+          },
+        ),
      });
      // Surface partial failures (e.g. unique-name races) as a red warning.
      if (result.errors.length > 0) {
@@ -0,0 +1,92 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import React from "react";
+import { renderHook, waitFor } from "@testing-library/react";
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import type { IAiChatRunResponse } from "@/features/ai-chat/types/ai-chat.types.ts";
+
+// react-i18next is pulled in transitively by ai-chat-query.ts (the mutation hooks
+// use it); stub it so the module imports cleanly in this hook test.
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({ t: (key: string) => key }),
+}));
+
+vi.mock("@mantine/notifications", () => ({
+  notifications: { show: vi.fn() },
+}));
+
+// Mock the whole service module; only getAiChatRun is exercised here, but the
+// other named exports must exist so ai-chat-query.ts imports resolve.
+vi.mock("@/features/ai-chat/services/ai-chat-service.ts", () => ({
+  getAiChatRun: vi.fn(),
+  getAiChatMessages: vi.fn(),
+  getAiChats: vi.fn(),
+  getAiRoleCatalog: vi.fn(),
+  getAiRoleCatalogBundle: vi.fn(),
+  getAiRoles: vi.fn(),
+  importAiRolesFromCatalog: vi.fn(),
+  createAiRole: vi.fn(),
+  deleteAiChat: vi.fn(),
+  deleteAiRole: vi.fn(),
+  renameAiChat: vi.fn(),
+  updateAiRole: vi.fn(),
+  updateAiRoleFromCatalog: vi.fn(),
+}));
+
+import { getAiChatRun } from "@/features/ai-chat/services/ai-chat-service.ts";
+import { useAiChatRunQuery } from "@/features/ai-chat/queries/ai-chat-query.ts";
+
+function createWrapper() {
+  const queryClient = new QueryClient({
+    defaultOptions: { queries: { retry: false } },
+  });
+  return function Wrapper({ children }: { children: React.ReactNode }) {
+    return (
+      <QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
+    );
+  };
+}
+
+const runningResponse: IAiChatRunResponse = {
+  run: { id: "run-1", chatId: "c1", status: "running" },
+  message: {
+    id: "a1",
+    role: "assistant",
+    content: "working...",
+    createdAt: "2026-01-01T00:00:00Z",
+  },
+};
+
+describe("useAiChatRunQuery — enable gating", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("fetches the run when enabled (passive observer, feature on)", async () => {
+    vi.mocked(getAiChatRun).mockResolvedValue(runningResponse);
+    const { result } = renderHook(() => useAiChatRunQuery("c1", true), {
+      wrapper: createWrapper(),
+    });
+    await waitFor(() => expect(result.current.isSuccess).toBe(true));
+    expect(getAiChatRun).toHaveBeenCalledWith("c1");
+    expect(result.current.data?.run?.status).toBe("running");
+  });
+
+  it("does NOT fetch when disabled (this tab is the streamer / feature off)", async () => {
+    vi.mocked(getAiChatRun).mockResolvedValue(runningResponse);
+    renderHook(() => useAiChatRunQuery("c1", false), {
+      wrapper: createWrapper(),
+    });
+    // Give any errant fetch a chance to fire, then assert none did.
+    await new Promise((r) => setTimeout(r, 20));
+    expect(getAiChatRun).not.toHaveBeenCalled();
+  });
+
+  it("does NOT fetch when there is no chat id", async () => {
+    vi.mocked(getAiChatRun).mockResolvedValue(runningResponse);
+    renderHook(() => useAiChatRunQuery(undefined, true), {
+      wrapper: createWrapper(),
+    });
+    await new Promise((r) => setTimeout(r, 20));
+    expect(getAiChatRun).not.toHaveBeenCalled();
+  });
+});
@@ -5,6 +5,7 @@ import {
  IAiChatListParams,
  IAiChatMessageRow,
  IAiChatMessagesParams,
+  IAiChatRunResponse,
  IAiRole,
  IAiRoleCatalog,
  IAiRoleCatalogBundle,
@@ -42,6 +43,38 @@ export async function getAiChatMessages(
  return req.data;
 }

+/**
+ * Reconnect to the latest agent run of a chat (#184). Returns the run's
+ * persisted lifecycle state and the assistant message it materializes (the
+ * partial output while the run is in-flight, the final output once it finished).
+ * The DB is the source of truth, so this works for an in-flight run (the browser
+ * dropped, the run kept going) and a finished one alike; `{ run: null }` when the
+ * chat has never had a run. Owner-gated server-side (the requesting user must own
+ * the chat); it is NOT flag-gated — when the feature is off the chat simply has no
+ * runs, so the endpoint returns `{ run: null }`.
+ */
+export async function getAiChatRun(
+  chatId: string,
+): Promise<IAiChatRunResponse> {
+  const req = await api.post<IAiChatRunResponse>("/ai-chat/run", { chatId });
+  return req.data;
+}
+
+/**
+ * Explicitly STOP the active agent run of a chat (#184). This is the ONLY thing
+ * that ends a DETACHED run — a mere browser disconnect (aborting the local SSE)
+ * is deliberately ignored server-side, so the client must call this to actually
+ * stop an autonomous run. Targeted by `chatId` (the server resolves whatever run
+ * is active on it); owner-gated server-side. Returns `{ stopped }` — false when
+ * there was nothing active to stop.
+ */
+export async function stopRun(
+  chatId: string,
+): Promise<{ stopped: boolean }> {
+  const req = await api.post<{ stopped: boolean }>("/ai-chat/stop", { chatId });
+  return req.data;
+}
+
 /**
 * Resolve the chat bound to a document (the current user's most-recent chat
 * created on that page), or null when there is none. Drives auto-open-on-page.
@@ -200,6 +200,38 @@ export interface IAiChatMessageRow {
  createdAt: string;
 }

+/**
+ * A persisted agent-run row (#184), mirroring the `ai_chat_runs` fields the
+ * client reads from `POST /ai-chat/run`. Only `status` is load-bearing for the
+ * reconnect-and-live-update UX (it drives the poll cadence); the rest are carried
+ * for display/diagnostics. The DB is the source of truth, so this resolves for an
+ * in-flight run (the browser dropped, the run kept going) and a finished one.
+ */
+export interface IAiChatRun {
+  id: string;
+  chatId: string;
+  // 'pending' | 'running' | 'succeeded' | 'failed' | 'aborted'. The first two are
+  // ACTIVE (keep polling); the rest are TERMINAL (stop polling).
+  status: "pending" | "running" | "succeeded" | "failed" | "aborted" | string;
+  error?: string | null;
+  stepCount?: number;
+  assistantMessageId?: string | null;
+  startedAt?: string | null;
+  finishedAt?: string | null;
+  createdAt?: string;
+  updatedAt?: string;
+}
+
+/**
+ * Response of `POST /ai-chat/run` (#184): the latest run of a chat and the
+ * assistant message it materializes (the partial/final output, projected from the
+ * persisted rows). Both are `null` when the chat has never had a run.
+ */
+export interface IAiChatRunResponse {
+  run: IAiChatRun | null;
+  message: IAiChatMessageRow | null;
+}
+
 export interface IAiChatListParams extends QueryParams {}

 export interface IAiChatMessagesParams {
@@ -0,0 +1,303 @@
+import { describe, it, expect } from "vitest";
+import type { UIMessage } from "@ai-sdk/react";
+import type { IAiChatRun } from "@/features/ai-chat/types/ai-chat.types.ts";
+import {
+  RUN_POLL_INTERVAL_MS,
+  isRunActive,
+  runPollInterval,
+  shouldObserveRun,
+  shouldClearStoppingLatch,
+  shouldClearLatchOnQueryError,
+  mergeObservedMessage,
+} from "./run-polling.ts";
+
+function makeRun(status: string): IAiChatRun {
+  return { id: "run-1", chatId: "c1", status };
+}
+
+function makeMsg(id: string, text: string): UIMessage {
+  return {
+    id,
+    role: "assistant",
+    parts: [{ type: "text", text }],
+  } as UIMessage;
+}
+
+describe("isRunActive", () => {
+  it("treats pending and running as active", () => {
+    expect(isRunActive(makeRun("pending"))).toBe(true);
+    expect(isRunActive(makeRun("running"))).toBe(true);
+  });
+
+  it("treats terminal / unknown / nullish as not active", () => {
+    expect(isRunActive(makeRun("succeeded"))).toBe(false);
+    expect(isRunActive(makeRun("failed"))).toBe(false);
+    expect(isRunActive(makeRun("aborted"))).toBe(false);
+    expect(isRunActive(makeRun("weird-future-status"))).toBe(false);
+    expect(isRunActive(null)).toBe(false);
+    expect(isRunActive(undefined)).toBe(false);
+  });
+});
+
+describe("runPollInterval (the refetchInterval helper)", () => {
+  it("returns 2000ms while the run is pending/running", () => {
+    expect(runPollInterval(makeRun("pending"))).toBe(RUN_POLL_INTERVAL_MS);
+    expect(runPollInterval(makeRun("running"))).toBe(RUN_POLL_INTERVAL_MS);
+    expect(RUN_POLL_INTERVAL_MS).toBe(2000);
+  });
+
+  it("returns false (stop polling) once the run is terminal", () => {
+    expect(runPollInterval(makeRun("succeeded"))).toBe(false);
+    expect(runPollInterval(makeRun("failed"))).toBe(false);
+    expect(runPollInterval(makeRun("aborted"))).toBe(false);
+  });
+
+  it("returns false (no polling) when there is no run", () => {
+    expect(runPollInterval(null)).toBe(false);
+    expect(runPollInterval(undefined)).toBe(false);
+  });
+});
+
+describe("shouldObserveRun (observer-vs-streamer decision)", () => {
+  it("observes an active run when this tab is NOT the local streamer", () => {
+    expect(shouldObserveRun(makeRun("running"), false)).toBe(true);
+    expect(shouldObserveRun(makeRun("pending"), false)).toBe(true);
+  });
+
+  it("observes a terminal run too (so the final output shows on reopen)", () => {
+    expect(shouldObserveRun(makeRun("succeeded"), false)).toBe(true);
+  });
+
+  it("does NOT observe when this tab IS the streamer (no double-render)", () => {
+    expect(shouldObserveRun(makeRun("running"), true)).toBe(false);
+    expect(shouldObserveRun(makeRun("succeeded"), true)).toBe(false);
+  });
+
+  it("does NOT observe when there is no run", () => {
+    expect(shouldObserveRun(null, false)).toBe(false);
+    expect(shouldObserveRun(undefined, false)).toBe(false);
+  });
+});
+
+describe("shouldClearStoppingLatch (#234 latch-release decision)", () => {
+  // The one case the latch SHOULD clear: we requested a stop, we are the passive
+  // observer (not streaming), and the CURRENT run is terminal.
+  it("clears only when stopping, observing, and the run is terminal", () => {
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("aborted"),
+        isLocalStreaming: false,
+      }),
+    ).toBe(true);
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("succeeded"),
+        isLocalStreaming: false,
+      }),
+    ).toBe(true);
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("failed"),
+        isLocalStreaming: false,
+      }),
+    ).toBe(true);
+  });
+
+  // Round-3 regression: clearing while THIS tab is still the local streamer would
+  // re-open the flash for the current turn the moment we switch to observer role.
+  // A predicate lacking the streaming gate would (wrongly) return true here.
+  it("does NOT clear while this tab is the local streamer", () => {
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("aborted"),
+        isLocalStreaming: true,
+      }),
+    ).toBe(false);
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("succeeded"),
+        isLocalStreaming: true,
+      }),
+    ).toBe(false);
+  });
+
+  // The detached run keeps growing after a local abort — while it is still
+  // active the latch MUST hold so the observer merge stays suppressed.
+  it("does NOT clear while the run is still active", () => {
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("running"),
+        isLocalStreaming: false,
+      }),
+    ).toBe(false);
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: makeRun("pending"),
+        isLocalStreaming: false,
+      }),
+    ).toBe(false);
+  });
+
+  // #234 F4: on Stop the stale PREVIOUS-turn run is removed from the cache, so the
+  // observed `run` is null until the current turn's run is fetched fresh. A null
+  // run HOLDS the latch — it can never clear against the just-removed stale run,
+  // only against the current turn's own terminal run once observed.
+  it("does NOT clear against a removed/absent run (F4 stale-run guard)", () => {
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: null,
+        isLocalStreaming: false,
+      }),
+    ).toBe(false);
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: true,
+        run: undefined,
+        isLocalStreaming: false,
+      }),
+    ).toBe(false);
+  });
+
+  it("does NOT clear when no stop was requested", () => {
+    expect(
+      shouldClearStoppingLatch({
+        stoppingRun: false,
+        run: makeRun("aborted"),
+        isLocalStreaming: false,
+      }),
+    ).toBe(false);
+  });
+});
+
+describe("shouldClearLatchOnQueryError (#234 F7 error-safety-net decision)", () => {
+  // This guards the REAL anti-flash decision the component's run-query-error
+  // safety-net effect uses (ai-chat-window.tsx wires the effect to THIS helper,
+  // not a copy — so the test is non-vacuous vs the live code).
+
+  // (b) The F7 hole: a TRANSIENT run-query error while `run` is STILL ACTIVE must
+  // NOT clear the latch. TanStack Query v5 retains `data` on error, so
+  // runQueryFailed can be true while the held run is still pending/running.
+  // Against the PRE-F7 condition (without `!isRunActive(run)`) this would return
+  // true — so this assertion fails on the buggy code (non-vacuous).
+  it("does NOT clear on a transient error while the run is still ACTIVE (F7)", () => {
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: false,
+        runQueryFailed: true,
+        run: makeRun("running"),
+      }),
+    ).toBe(false);
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: false,
+        runQueryFailed: true,
+        run: makeRun("pending"),
+      }),
+    ).toBe(false);
+  });
+
+  // (a) The genuine permanent-null-freeze: run cache cleared by removeQueries +
+  // the refetch keeps ERRORING, so `run === null`. This is the ONLY case the
+  // safety-net exists to cure — it MUST clear so the frozen view resumes.
+  it("clears on a permanent error when the run is null (permanent-null-freeze)", () => {
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: false,
+        runQueryFailed: true,
+        run: null,
+      }),
+    ).toBe(true);
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: false,
+        runQueryFailed: true,
+        run: undefined,
+      }),
+    ).toBe(true);
+  });
+
+  // A TERMINAL run also satisfies `!isRunActive`; clearing then is harmless — the
+  // terminal effect (shouldClearStoppingLatch) already clears for a terminal run,
+  // so this only ever agrees with it. Asserted so the (c) reasoning is pinned.
+  it("clears on an error when the run is terminal (harmless, agrees with terminal effect)", () => {
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: false,
+        runQueryFailed: true,
+        run: makeRun("aborted"),
+      }),
+    ).toBe(true);
+  });
+
+  it("does NOT clear without an actual query error", () => {
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: false,
+        runQueryFailed: false,
+        run: null,
+      }),
+    ).toBe(false);
+  });
+
+  it("does NOT clear while this tab is the local streamer", () => {
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: true,
+        isLocalStreaming: true,
+        runQueryFailed: true,
+        run: null,
+      }),
+    ).toBe(false);
+  });
+
+  it("does NOT clear when no stop was requested", () => {
+    expect(
+      shouldClearLatchOnQueryError({
+        stoppingRun: false,
+        isLocalStreaming: false,
+        runQueryFailed: true,
+        run: null,
+      }),
+    ).toBe(false);
+  });
+});
+
+describe("mergeObservedMessage", () => {
+  it("replaces the message with the same id in place (per-step growth)", () => {
+    const prev = [makeMsg("u1", "hi"), makeMsg("a1", "step 1")];
+    const observed = makeMsg("a1", "step 1\nstep 2");
+    const next = mergeObservedMessage(prev, observed);
+    expect(next).toHaveLength(2);
+    expect(next[1]).toBe(observed);
+    expect(next[0]).toBe(prev[0]); // untouched
+    expect(next).not.toBe(prev); // new array (never mutates input)
+  });
+
+  it("appends when the observed message is not yet present", () => {
+    const prev = [makeMsg("u1", "hi")];
+    const observed = makeMsg("a1", "first token");
+    const next = mergeObservedMessage(prev, observed);
+    expect(next).toHaveLength(2);
+    expect(next[1]).toBe(observed);
+  });
+
+  it("returns the original list unchanged when there is nothing to merge", () => {
+    const prev = [makeMsg("u1", "hi")];
+    expect(mergeObservedMessage(prev, null)).toBe(prev);
+    expect(mergeObservedMessage(prev, undefined)).toBe(prev);
+  });
+});
@@ -0,0 +1,151 @@
+import type { UIMessage } from "@ai-sdk/react";
+import type { IAiChatRun } from "@/features/ai-chat/types/ai-chat.types.ts";
+
+/**
+ * Reconnect-and-live-follow helpers (#184). When a chat is reopened while its
+ * agent run is STILL going, this tab is a PASSIVE OBSERVER: it did not start the
+ * run here (no local SSE stream), so it catches up by POLLING the reconnect
+ * endpoint (`POST /ai-chat/run`) and merging the run's incrementally-persisted
+ * assistant message into the rendered thread. These are the small pure decisions
+ * that machinery hangs off, extracted so they can be unit-tested in isolation
+ * (mirrors how reindex polling / editor-sync-state are tested).
+ */
+
+/** How often to re-poll the reconnect endpoint while a run is ACTIVE. */
+export const RUN_POLL_INTERVAL_MS = 2000;
+
+// 'pending' and 'running' are the two ACTIVE statuses; 'succeeded' | 'failed' |
+// 'aborted' are TERMINAL (and any unknown future status is treated as terminal,
+// so a stale/odd value never polls forever).
+const ACTIVE_STATUSES = new Set(["pending", "running"]);
+
+/** Whether a run is still going (worth polling / merging live updates from). */
+export function isRunActive(run: IAiChatRun | null | undefined): boolean {
+  return !!run && ACTIVE_STATUSES.has(run.status);
+}
+
+/**
+ * The TanStack Query `refetchInterval` value for the run query: poll every
+ * {@link RUN_POLL_INTERVAL_MS} while the run is active, and `false` (stop) once
+ * it is terminal or there is no run. Polling is thus naturally bounded by the run
+ * reaching a terminal status — no separate timeout cap is needed.
+ */
+export function runPollInterval(
+  run: IAiChatRun | null | undefined,
+): number | false {
+  return isRunActive(run) ? RUN_POLL_INTERVAL_MS : false;
+}
+
+/**
+ * Observer-vs-streamer decision. We render the polled run message (catch up +
+ * keep advancing) ONLY when this tab is a passive observer: there IS a run AND
+ * this tab is NOT the one locally streaming it (we reconnected, we didn't start
+ * it here). When this tab is the streamer, the live SSE stream owns the view, so
+ * we neither poll nor merge — avoiding a double-render fight. Terminal runs still
+ * merge (so the final persisted output is shown on reopen); the poll itself is
+ * stopped separately by {@link runPollInterval}.
+ */
+export function shouldObserveRun(
+  run: IAiChatRun | null | undefined,
+  localStreaming: boolean,
+): boolean {
+  return !!run && !localStreaming;
+}
+
+/**
+ * Should the "stopping" latch — which suppresses the observer re-stream flash
+ * after the user pressed Stop — be RELEASED now? All three must hold:
+ *  - `stoppingRun`: we actually requested a stop (otherwise nothing to release);
+ *  - `!isLocalStreaming`: this tab is NOT the local streamer. While we are the
+ *    streamer the run query is disabled, so the observed `run` is not the run we
+ *    are following — releasing the latch then would re-open the flash for the
+ *    current turn the instant we switch to observer role;
+ *  - the observed `run` EXISTS and has reached a TERMINAL status.
+ *
+ * The null / still-active `run` case is the #234 F4 invariant. On Stop the stale
+ * PREVIOUS-turn run is removed from the query cache (`removeQueries`), so `run`
+ * is null until the CURRENT turn's run is re-fetched fresh; a null or active run
+ * therefore HOLDS the latch, so it can only ever clear against the current turn's
+ * OWN terminal run — never a stale cached one. (The cache removal itself is
+ * integration-level in AiChatWindow; this predicate encodes the decision given
+ * whatever run is currently observed, and a stale terminal run is
+ * indistinguishable from a current terminal run at the predicate level — hence
+ * the cache removal is what guarantees only the current run is ever passed here.)
+ */
+export function shouldClearStoppingLatch(args: {
+  stoppingRun: boolean;
+  run: IAiChatRun | null | undefined;
+  isLocalStreaming: boolean;
+}): boolean {
+  const { stoppingRun, run, isLocalStreaming } = args;
+  if (!stoppingRun || isLocalStreaming) return false;
+  return !!run && !isRunActive(run);
+}
+
+/**
+ * Should the "stopping" latch be RELEASED by the run-query ERROR safety-net?
+ * (#234 F7 — a NEW path of the same re-stream flash the F4 latch exists to
+ * prevent.) After Stop, `handleServerStop` clears the run cache; the terminal
+ * effect then holds the latch via `if (!run) return` until the CURRENT turn's run
+ * is fetched fresh. If that refetch instead ERRORS permanently, `run` stays null,
+ * its status-keyed refetchInterval is off, and nothing would ever observe a
+ * terminal run — freezing the view with the observer merge suppressed. This
+ * safety-net cures ONLY that genuine permanent-null-freeze.
+ *
+ * All four must hold:
+ *  - `stoppingRun`: we actually requested a stop (otherwise nothing to release);
+ *  - `!isLocalStreaming`: this tab is NOT the local streamer (same reason as
+ *    {@link shouldClearStoppingLatch});
+ *  - `runQueryFailed`: the run query is in its error state (TanStack Query v5 with
+ *    retry:false — isError);
+ *  - `!isRunActive(run)`: the observed `run` is NOT an active (pending/running)
+ *    held run. This is the F7 gate. In TanStack Query v5 the query's `data` is
+ *    RETAINED on error, so `runQueryFailed` can be true while `run` is STILL an
+ *    ACTIVE run (a single transient GET-run failure in the window between Stop and
+ *    settle). Without this gate a transient error would release the latch early —
+ *    re-opening the observer merge and flashing the growing detached run over the
+ *    frozen row (exactly the F4 flash). Gating on the run NOT being active means we
+ *    only ever cure the permanent-null-freeze (`run === null`, so
+ *    `isRunActive(null)` is false), never release against an active run.
+ *
+ * (A terminal `run` also satisfies `!isRunActive(run)`; clearing then is harmless
+ * — the terminal effect's {@link shouldClearStoppingLatch} already clears the
+ * latch for a terminal run, so this only ever agrees with it, never conflicts.)
+ *
+ * INVARIANT (do not break): clearing the latch on the `run === null` branch is safe
+ * ONLY because the run query's `refetchInterval` (see {@link runPollInterval}) stops
+ * polling when the data is empty — so after we clear on null+error there is no
+ * subsequent auto-poll that could return a still-active detached run and re-open the
+ * merge. If `refetchInterval` is ever changed to keep polling on `run === null`/on
+ * error, this null-branch clear would re-open the F7 flash through the null path.
+ * Do not change the run query's refetchInterval without re-checking this path.
+ */
+export function shouldClearLatchOnQueryError(args: {
+  stoppingRun: boolean;
+  isLocalStreaming: boolean;
+  runQueryFailed: boolean;
+  run: IAiChatRun | null | undefined;
+}): boolean {
+  const { stoppingRun, isLocalStreaming, runQueryFailed, run } = args;
+  return (
+    stoppingRun && !isLocalStreaming && runQueryFailed && !isRunActive(run)
+  );
+}
+
+/**
+ * Merge an observed assistant message into the rendered list: replace the message
+ * with the same id in place (the in-progress assistant row is already seeded from
+ * history, so per-step growth replaces it), or append it when absent. Returns a
+ * new array; the input is never mutated.
+ */
+export function mergeObservedMessage(
+  messages: UIMessage[],
+  observed: UIMessage | null | undefined,
+): UIMessage[] {
+  if (!observed) return messages;
+  const idx = messages.findIndex((m) => m.id === observed.id);
+  if (idx === -1) return [...messages, observed];
+  const next = messages.slice();
+  next[idx] = observed;
+  return next;
+}
@@ -0,0 +1,250 @@
+import { describe, it, expect, vi } from "vitest";
+import { render, screen } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+import { MemoryRouter } from "react-router-dom";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+// The fallback path renders the full TipTap editor; stub it so we can assert the
+// safety valve fired without pulling in the editor stack.
+vi.mock("@/features/comment/components/comment-editor", () => ({
+  default: () => <div data-testid="comment-editor-fallback" />,
+}));
+
+// Mention rendering hits react-query; stub the page/share queries so the mention
+// case renders in isolation.
+vi.mock("@/features/page/queries/page-query.ts", () => ({
+  usePageQuery: () => ({ data: undefined, isLoading: false, isError: false }),
+}));
+vi.mock("@/features/share/queries/share-query.ts", () => ({
+  useSharePageQuery: () => ({ data: undefined }),
+}));
+
+import { CommentContentView } from "./comment-content-view";
+
+function renderView(content: string | object) {
+  return render(
+    <MantineProvider>
+      <MemoryRouter>
+        <CommentContentView content={content} />
+      </MemoryRouter>
+    </MantineProvider>,
+  );
+}
+
+const doc = (content: any[]) => JSON.stringify({ type: "doc", content });
+const para = (content: any[]) => ({ type: "paragraph", content });
+const text = (t: string, marks?: any[]) => ({ type: "text", text: t, marks });
+
+describe("CommentContentView", () => {
+  it("renders paragraphs as <p> with text", () => {
+    const { container } = renderView(doc([para([text("Hello world")])]));
+    expect(screen.getByText("Hello world")).toBeDefined();
+    expect(container.querySelector("p")).not.toBeNull();
+  });
+
+  it("reproduces the read-only CommentEditor DOM nesting for CSS parity", () => {
+    const { container } = renderView(doc([para([text("x")])]));
+    // outer .commentEditor > .ProseMirror (module) > .ProseMirror (global) > p
+    const globalPm = container.querySelector("div.ProseMirror > p");
+    expect(globalPm).not.toBeNull();
+  });
+
+  it("renders the bold mark as <strong>", () => {
+    const { container } = renderView(
+      doc([para([text("bold", [{ type: "bold" }])])]),
+    );
+    const el = container.querySelector("strong");
+    expect(el?.textContent).toBe("bold");
+  });
+
+  it("renders the italic mark as <em>", () => {
+    const { container } = renderView(
+      doc([para([text("it", [{ type: "italic" }])])]),
+    );
+    expect(container.querySelector("em")?.textContent).toBe("it");
+  });
+
+  it("renders the strike mark as <s>", () => {
+    const { container } = renderView(
+      doc([para([text("st", [{ type: "strike" }])])]),
+    );
+    expect(container.querySelector("s")?.textContent).toBe("st");
+  });
+
+  it("renders the underline mark as <u> (not the editor fallback)", () => {
+    const { container } = renderView(
+      doc([para([text("un", [{ type: "underline" }])])]),
+    );
+    expect(container.querySelector("u")?.textContent).toBe("un");
+    // Underline is a supported mark, so no degrade to the editor fallback.
+    expect(screen.queryByTestId("comment-editor-fallback")).toBeNull();
+  });
+
+  it("renders the code mark as <code>", () => {
+    const { container } = renderView(
+      doc([para([text("co", [{ type: "code" }])])]),
+    );
+    expect(container.querySelector("code")?.textContent).toBe("co");
+  });
+
+  it("renders the link mark as an anchor with safe rel/target", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          text("click", [
+            { type: "link", attrs: { href: "https://example.com" } },
+          ]),
+        ]),
+      ]),
+    );
+    const a = container.querySelector("a");
+    expect(a?.getAttribute("href")).toBe("https://example.com");
+    expect(a?.getAttribute("target")).toBe("_blank");
+    expect(a?.getAttribute("rel")).toBe("noopener noreferrer nofollow");
+    expect(a?.textContent).toBe("click");
+  });
+
+  it("neutralizes a javascript: link href (stored XSS) while keeping the text", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          text("click", [
+            { type: "link", attrs: { href: "javascript:alert(1)" } },
+          ]),
+        ]),
+      ]),
+    );
+    const a = container.querySelector("a");
+    expect(a).not.toBeNull();
+    // No navigable javascript: href — attribute is absent (or empty).
+    expect(a?.getAttribute("href")).toBeFalsy();
+    // The link text is still rendered.
+    expect(a?.textContent).toBe("click");
+  });
+
+  it("neutralizes a control-char-obfuscated javascript: href", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          text("x", [
+            { type: "link", attrs: { href: "java\tscript:alert(1)" } },
+          ]),
+        ]),
+      ]),
+    );
+    expect(container.querySelector("a")?.getAttribute("href")).toBeFalsy();
+  });
+
+  it("neutralizes a data: link href", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          text("x", [
+            {
+              type: "link",
+              attrs: { href: "data:text/html,<script>alert(1)</script>" },
+            },
+          ]),
+        ]),
+      ]),
+    );
+    expect(container.querySelector("a")?.getAttribute("href")).toBeFalsy();
+  });
+
+  it("preserves a mailto: link href (allowlisted scheme)", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          text("mail", [
+            { type: "link", attrs: { href: "mailto:a@b.com" } },
+          ]),
+        ]),
+      ]),
+    );
+    expect(container.querySelector("a")?.getAttribute("href")).toBe(
+      "mailto:a@b.com",
+    );
+  });
+
+  it("preserves a relative link href (no scheme, not a script vector)", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          text("rel", [{ type: "link", attrs: { href: "/some/path" } }]),
+        ]),
+      ]),
+    );
+    expect(container.querySelector("a")?.getAttribute("href")).toBe(
+      "/some/path",
+    );
+  });
+
+  it("nests multiple marks on one text node", () => {
+    const { container } = renderView(
+      doc([para([text("x", [{ type: "bold" }, { type: "italic" }])])]),
+    );
+    // bold wraps italic (or vice versa) — both elements exist around the text.
+    expect(container.querySelector("strong")).not.toBeNull();
+    expect(container.querySelector("em")).not.toBeNull();
+    expect(screen.getByText("x")).toBeDefined();
+  });
+
+  it("renders hardBreak as <br/>", () => {
+    const { container } = renderView(
+      doc([para([text("a"), { type: "hardBreak" }, text("b")])]),
+    );
+    expect(container.querySelector("br")).not.toBeNull();
+  });
+
+  it("renders a user mention as a styled span", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          {
+            type: "mention",
+            attrs: { label: "Alice", entityType: "user", entityId: "u1" },
+          },
+        ]),
+      ]),
+    );
+    expect(screen.getByText("@Alice")).toBeDefined();
+    // No fallback to the editor.
+    expect(screen.queryByTestId("comment-editor-fallback")).toBeNull();
+  });
+
+  it("renders a page mention as a link", () => {
+    const { container } = renderView(
+      doc([
+        para([
+          {
+            type: "mention",
+            attrs: {
+              label: "Some Page",
+              entityType: "page",
+              slugId: "pg1",
+            },
+          },
+        ]),
+      ]),
+    );
+    expect(container.querySelector("a")).not.toBeNull();
+    expect(screen.getByText("Some Page")).toBeDefined();
+  });
+
+  it("renders a legacy plain-text (non-JSON) string as plain text", () => {
+    renderView("just a legacy string");
+    expect(screen.getByText("just a legacy string")).toBeDefined();
+    expect(screen.queryByTestId("comment-editor-fallback")).toBeNull();
+  });
+
+  it("falls back to CommentEditor for an unknown node type", () => {
+    renderView(doc([{ type: "codeBlock", content: [text("x")] }]));
+    expect(screen.getByTestId("comment-editor-fallback")).toBeDefined();
+  });
+
+  it("falls back to CommentEditor for malformed JSON", () => {
+    renderView('{"type":"doc","content":[');
+    expect(screen.getByTestId("comment-editor-fallback")).toBeDefined();
+  });
+});
@@ -0,0 +1,199 @@
+import React from "react";
+import classes from "./comment.module.css";
+import { MentionContent } from "@/features/editor/components/mention/mention-view";
+import CommentEditor from "@/features/comment/components/comment-editor";
+
+// Static, editor-free renderer of a comment body (ProseMirror JSON). It walks the
+// document and emits plain DOM, avoiding the cost of a full TipTap/ProseMirror
+// instance per comment (the panel used to spin up 400+ editors on mount).
+//
+// The supported node/mark set MUST mirror what CommentEditor enables
+// (StarterKit + Mention + LinkExtension). Anything outside that set makes the
+// whole comment degrade to the read-only CommentEditor via the fallback below,
+// so we never show a half-rendered comment.
+
+// Sentinel thrown when we hit a node/mark we don't know how to render statically.
+// Caught at the top level to trigger the CommentEditor fallback for the whole comment.
+class UnknownNodeError extends Error {}
+
+// Protocol allowlist mirroring @tiptap/extension-link's default (the read-only
+// CommentEditor path relies on it to blank javascript:/data: hrefs). The static
+// renderer must apply the SAME sanitization because the backend stores comment
+// content verbatim and React does not neutralize javascript: in an href.
+const ALLOWED_URI_SCHEMES = /^(?:https?|ftps?|mailto|tel|callto|sms|cid|xmpp):/i;
+
+function safeHref(href: unknown): string | undefined {
+  if (typeof href !== "string") return undefined;
+  // Strip control chars/whitespace that could smuggle a scheme past the test
+  // (e.g. "java\tscript:").
+  const cleaned = href.replace(/[\u0000-\u0020]/g, "").trim();
+  // Allow relative/anchor/protocol-relative links (no scheme) — not script vectors.
+  if (!/^[a-z][a-z0-9+.-]*:/i.test(cleaned)) return href;
+  return ALLOWED_URI_SCHEMES.test(cleaned) ? href : undefined;
+}
+
+interface PMMark {
+  type: string;
+  attrs?: Record<string, any>;
+}
+
+interface PMNode {
+  type: string;
+  attrs?: Record<string, any>;
+  content?: PMNode[];
+  text?: string;
+  marks?: PMMark[];
+}
+
+// Wrap a text node's string in its marks (marks nest, e.g. bold + italic).
+function renderMarks(
+  text: React.ReactNode,
+  marks: PMMark[] | undefined,
+  keyPrefix: string,
+): React.ReactNode {
+  if (!marks || marks.length === 0) return text;
+
+  return marks.reduce<React.ReactNode>((acc, mark, i) => {
+    const key = `${keyPrefix}-m${i}`;
+    switch (mark.type) {
+      case "bold":
+        return <strong key={key}>{acc}</strong>;
+      case "italic":
+        return <em key={key}>{acc}</em>;
+      case "strike":
+        return <s key={key}>{acc}</s>;
+      case "underline":
+        // StarterKit enables the Underline extension by default (Mod-u) and
+        // CommentEditor does not disable it, so real comments can carry this
+        // mark. Render it here rather than degrading the whole comment.
+        return <u key={key}>{acc}</u>;
+      case "code":
+        return <code key={key}>{acc}</code>;
+      case "link": {
+        // LinkExtension (TiptapLink) opens links in a new tab; keep the same
+        // safe rel semantics the editor produces. Sanitize the href against the
+        // extension's protocol allowlist — a disallowed scheme (javascript:,
+        // data:) yields undefined so the anchor is non-navigable but still shows
+        // its text, matching how extension-link blanks a bad href.
+        const href = safeHref(mark.attrs?.href);
+        return (
+          <a
+            key={key}
+            href={href}
+            target="_blank"
+            rel="noopener noreferrer nofollow"
+          >
+            {acc}
+          </a>
+        );
+      }
+      default:
+        throw new UnknownNodeError(`Unknown mark type: ${mark.type}`);
+    }
+  }, text);
+}
+
+function renderNode(node: PMNode, key: string): React.ReactNode {
+  switch (node.type) {
+    case "paragraph":
+      return <p key={key}>{renderChildren(node.content, key)}</p>;
+    case "text":
+      return (
+        <React.Fragment key={key}>
+          {renderMarks(node.text ?? "", node.marks, key)}
+        </React.Fragment>
+      );
+    case "hardBreak":
+      return <br key={key} />;
+    case "mention":
+      return (
+        <span key={key} style={{ display: "inline" }}>
+          <MentionContent attrs={node.attrs as any} />
+        </span>
+      );
+    default:
+      throw new UnknownNodeError(`Unknown node type: ${node.type}`);
+  }
+}
+
+function renderChildren(
+  content: PMNode[] | undefined,
+  keyPrefix: string,
+): React.ReactNode {
+  if (!content) return null;
+  return content.map((child, i) => renderNode(child, `${keyPrefix}-${i}`));
+}
+
+// Reproduce the exact DOM nesting the read-only CommentEditor renders so the
+// scoped CSS in comment.module.css (which targets
+// `.commentEditor .ProseMirror :global(.ProseMirror)` and `.ProseMirror p`)
+// applies pixel-for-pixel. Read-only => no data-editable / data-surface attrs.
+function Shell({ children }: { children: React.ReactNode }) {
+  return (
+    <div className={classes.commentEditor}>
+      <div className={classes.ProseMirror}>
+        <div className="ProseMirror">{children}</div>
+      </div>
+    </div>
+  );
+}
+
+interface CommentContentViewProps {
+  content: string | object;
+}
+
+export function CommentContentView({ content }: CommentContentViewProps) {
+  // Degrade this single comment to the old editor-based render (safety valve).
+  const fallback = () => {
+    if (import.meta.env.DEV) {
+      console.warn(
+        "CommentContentView: unsupported comment content, falling back to editor",
+      );
+    }
+    return <CommentEditor defaultContent={content} editable={false} />;
+  };
+
+  let doc: unknown = content;
+
+  if (typeof content === "string") {
+    try {
+      doc = JSON.parse(content);
+    } catch {
+      const trimmed = content.trim();
+      // Looks like it was meant to be JSON but is malformed -> safety-valve fallback.
+      if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
+        return fallback();
+      }
+      // Otherwise it's a legacy plain-text comment: render as a single paragraph.
+      return (
+        <Shell>
+          <p>{content}</p>
+        </Shell>
+      );
+    }
+  }
+
+  // Double-stringified / legacy plain-text stored as a JSON string.
+  if (typeof doc === "string") {
+    return (
+      <Shell>
+        <p>{doc}</p>
+      </Shell>
+    );
+  }
+
+  try {
+    const pmDoc = doc as PMNode;
+    if (!pmDoc || typeof pmDoc !== "object" || pmDoc.type !== "doc") {
+      throw new UnknownNodeError("Not a ProseMirror doc");
+    }
+    return <Shell>{renderChildren(pmDoc.content, "n")}</Shell>;
+  } catch (err) {
+    if (err instanceof UnknownNodeError) {
+      return fallback();
+    }
+    throw err;
+  }
+}
+
+export default CommentContentView;
@@ -1,5 +1,5 @@
-import { describe, it, expect, vi } from "vitest";
-import { render, screen, fireEvent } from "@testing-library/react";
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen, fireEvent, waitFor } from "@testing-library/react";
 import { MantineProvider } from "@mantine/core";
 import { IComment } from "@/features/comment/types/comment.types";

@@ -8,23 +8,74 @@ import { IComment } from "@/features/comment/types/comment.types";
 // The comment mutation hooks reach out to react-query/network — stub them so the
 // component renders in isolation. We only assert the AI-badge rendering branch.
 const applyMutateAsync = vi.fn();
+const dismissMutateAsync = vi.fn();
+const updateMutateAsync = vi.fn();
 vi.mock("@/features/comment/queries/comment-query", () => ({
  useDeleteCommentMutation: () => ({ mutateAsync: vi.fn() }),
  useResolveCommentMutation: () => ({ mutateAsync: vi.fn() }),
-  useUpdateCommentMutation: () => ({ mutateAsync: vi.fn() }),
+  useUpdateCommentMutation: () => ({ mutateAsync: updateMutateAsync }),
  useApplySuggestionMutation: () => ({
    mutateAsync: applyMutateAsync,
    isPending: false,
  }),
+  useDismissSuggestionMutation: () => ({
+    mutateAsync: dismissMutateAsync,
+    isPending: false,
+  }),
 }));

+// The document the mocked editor emits via onUpdate when the edit form is open.
+// Duplicated inside the mock factory (below) to keep the factory self-contained.
+const EDITED_DOC = {
+  type: "doc",
+  content: [
+    { type: "paragraph", content: [{ type: "text", text: "edited via editor" }] },
+  ],
+};
+
 // CommentEditor pulls in the full TipTap editor stack; replace it with a stub.
-vi.mock("@/features/comment/components/comment-editor", () => ({
-  default: () => <div data-testid="comment-editor" />,
+// In edit mode the stub exposes buttons that fire the real onUpdate/onSave props
+// so the edit->save/cancel flow can be driven without a live editor.
+vi.mock("@/features/comment/components/comment-editor", () => {
+  const doc = {
+    type: "doc",
+    content: [
+      { type: "paragraph", content: [{ type: "text", text: "edited via editor" }] },
+    ],
+  };
+  return {
+    default: ({ onUpdate, onSave }: any) => (
+      <div data-testid="comment-editor">
+        <button
+          type="button"
+          data-testid="editor-emit-update"
+          onClick={() => onUpdate?.(doc)}
+        />
+        <button
+          type="button"
+          data-testid="editor-emit-save"
+          onClick={() => onSave?.()}
+        />
+      </div>
+    ),
+  };
+});
+
+// CommentContentView (used for the read-only body) imports the mention view,
+// which pulls page-query -> main.tsx (createRoot). Stub the queries so the item
+// renders in isolation without the app entry side-effect.
+vi.mock("@/features/page/queries/page-query.ts", () => ({
+  usePageQuery: () => ({ data: undefined, isLoading: false, isError: false }),
+}));
+vi.mock("@/features/share/queries/share-query.ts", () => ({
+  useSharePageQuery: () => ({ data: undefined }),
 }));

 import CommentListItem from "./comment-list-item";
-import { canShowApply } from "@/features/comment/utils/suggestion";
+import {
+  canShowApply,
+  canShowDismiss,
+} from "@/features/comment/utils/suggestion";

 const baseComment = (over?: Partial<IComment>): IComment =>
  ({
@@ -38,14 +89,20 @@ const baseComment = (over?: Partial<IComment>): IComment =>
    ...over,
  }) as IComment;

-function renderItem(comment: IComment, canEdit = true) {
+function renderItem(
+  comment: IComment,
+  canEdit = true,
+  canComment = true,
+  userSpaceRole?: string,
+) {
  return render(
    <MantineProvider>
      <CommentListItem
        comment={comment}
        pageId="page-1"
-        canComment={true}
+        canComment={canComment}
        canEdit={canEdit}
+        userSpaceRole={userSpaceRole}
      />
    </MantineProvider>,
  );
@@ -108,10 +165,12 @@ describe("CommentListItem — suggested edit (#315)", () => {
    });

  it("renders the было→стало diff and an Apply button when canEdit and not applied/resolved", () => {
-    renderItem(suggestion(), true);
-    // Old text appears both as the selection quote and as the struck diff row.
+    const { container } = renderItem(suggestion(), true);
+    // Old text appears as the selection quote (a single unsplit Text node).
    expect(screen.getAllByText("old wording here").length).toBeGreaterThan(0);
-    expect(screen.getByText("new wording here")).toBeDefined();
+    // The new line is now rendered as per-fragment spans (intraline diff, #331),
+    // so it is no longer a single text node — assert the concatenated content.
+    expect(container.textContent).toContain("new wording here");
    // Apply button is present.
    expect(screen.getByRole("button", { name: "Apply" })).toBeDefined();
    // No Applied badge yet.
@@ -119,9 +178,9 @@ describe("CommentListItem — suggested edit (#315)", () => {
  });

  it("hides the Apply button when canEdit is false", () => {
-    renderItem(suggestion(), false);
-    // Diff still renders...
-    expect(screen.getByText("new wording here")).toBeDefined();
+    const { container } = renderItem(suggestion(), false);
+    // Diff still renders (as per-fragment spans, #331)...
+    expect(container.textContent).toContain("new wording here");
    // ...but no Apply button.
    expect(screen.queryByRole("button", { name: "Apply" })).toBeNull();
  });
@@ -157,6 +216,65 @@ describe("CommentListItem — suggested edit (#315)", () => {
  });
 });

+describe("CommentListItem — dismiss suggestion (#329)", () => {
+  const suggestion = (over?: Partial<IComment>): IComment =>
+    baseComment({
+      selection: "old wording here",
+      suggestedText: "new wording here",
+      ...over,
+    });
+
+  // A space admin (userSpaceRole="admin") satisfies the owner-or-admin gate
+  // regardless of who authored the comment; the tests below use it as the lever
+  // since the currentUser atom is unseeded (null) in this harness.
+  it("renders a Dismiss button alongside Apply when canEdit and canComment (owner/admin)", () => {
+    renderItem(suggestion(), true, true, "admin");
+    expect(screen.getByRole("button", { name: "Apply" })).toBeDefined();
+    expect(screen.getByRole("button", { name: "Dismiss" })).toBeDefined();
+  });
+
+  it("shows Dismiss but NOT Apply for an admin commenter who cannot edit", () => {
+    renderItem(suggestion(), false, true, "admin");
+    expect(screen.queryByRole("button", { name: "Apply" })).toBeNull();
+    expect(screen.getByRole("button", { name: "Dismiss" })).toBeDefined();
+  });
+
+  it("hides Dismiss when the viewer cannot comment", () => {
+    renderItem(suggestion(), false, false, "admin");
+    expect(screen.queryByRole("button", { name: "Dismiss" })).toBeNull();
+    expect(screen.queryByRole("button", { name: "Apply" })).toBeNull();
+  });
+
+  it("hides Dismiss for a non-owner non-admin even with canComment (#338 F5: mirrors server 403)", () => {
+    // canComment=true but NOT a space admin and NOT the comment owner (the
+    // currentUser atom is null while the comment is authored by user-1), so the
+    // server would 403 a dismiss — the button must not be shown at all.
+    renderItem(suggestion(), false, true, "member");
+    expect(screen.queryByRole("button", { name: "Dismiss" })).toBeNull();
+  });
+
+  it("hides Dismiss once the thread is resolved", () => {
+    renderItem(suggestion({ resolvedAt: new Date() }), true, true, "admin");
+    expect(screen.queryByRole("button", { name: "Dismiss" })).toBeNull();
+  });
+
+  it("hides Dismiss (shows the Applied badge) once applied", () => {
+    renderItem(suggestion({ suggestionAppliedAt: new Date() }), true, true, "admin");
+    expect(screen.queryByRole("button", { name: "Dismiss" })).toBeNull();
+    expect(screen.getByText("Applied")).toBeDefined();
+  });
+
+  it("calls the dismiss mutation when the Dismiss button is clicked", () => {
+    dismissMutateAsync.mockClear();
+    renderItem(suggestion(), true, true, "admin");
+    fireEvent.click(screen.getByRole("button", { name: "Dismiss" }));
+    expect(dismissMutateAsync).toHaveBeenCalledWith({
+      commentId: "c-1",
+      pageId: "page-1",
+    });
+  });
+});
+
 describe("canShowApply predicate", () => {
  const c = (over?: Partial<IComment>): IComment =>
    ({ suggestedText: "x", ...over }) as IComment;
@@ -182,3 +300,161 @@ describe("canShowApply predicate", () => {
    expect(canShowApply(c({ parentCommentId: "p" }), true)).toBe(false);
  });
 });
+
+describe("canShowDismiss predicate", () => {
+  const c = (over?: Partial<IComment>): IComment =>
+    ({ suggestedText: "x", ...over }) as IComment;
+
+  it("true when suggestion present, can comment, owner/admin, not applied/resolved, top-level", () => {
+    expect(canShowDismiss(c(), true, true)).toBe(true);
+  });
+  it("false without comment permission", () => {
+    expect(canShowDismiss(c(), false, true)).toBe(false);
+  });
+  it("false when not owner and not admin (#338 F5)", () => {
+    expect(canShowDismiss(c(), true, false)).toBe(false);
+  });
+  it("false when no suggestion", () => {
+    expect(canShowDismiss(c({ suggestedText: null }), true, true)).toBe(false);
+  });
+  it("false when already applied", () => {
+    expect(canShowDismiss(c({ suggestionAppliedAt: new Date() }), true, true)).toBe(
+      false,
+    );
+  });
+  it("false when resolved", () => {
+    expect(canShowDismiss(c({ resolvedAt: new Date() }), true, true)).toBe(false);
+  });
+  it("false for a reply comment", () => {
+    expect(canShowDismiss(c({ parentCommentId: "p" }), true, true)).toBe(false);
+  });
+});
+
+describe("CommentListItem — edit -> save/cancel flow (#340 F3)", () => {
+  const body = (t: string) =>
+    JSON.stringify({
+      type: "doc",
+      content: [{ type: "paragraph", content: [{ type: "text", text: t }] }],
+    });
+
+  // The edit menu item is gated on the viewer owning the comment
+  // (currentUser.id === creatorId). currentUserAtom is atomWithStorage-backed,
+  // so seed localStorage to make the viewer the owner (creatorId "user-1").
+  beforeEach(() => {
+    updateMutateAsync.mockClear();
+    localStorage.setItem(
+      "currentUser",
+      JSON.stringify({ user: { id: "user-1", name: "Owner" } }),
+    );
+  });
+  afterEach(() => {
+    localStorage.clear();
+  });
+
+  async function openEditor() {
+    // Open the comment menu, then click "Edit comment" to toggle into edit mode.
+    fireEvent.click(screen.getByLabelText("Comment menu"));
+    fireEvent.click(await screen.findByText("Edit comment"));
+    // Edit form (mocked editor + actions) is now mounted.
+    await screen.findByTestId("comment-editor");
+  }
+
+  it("saves the edited content and, on cache update, shows the new body", async () => {
+    const { rerender } = renderItem(
+      baseComment({ content: body("original body") }),
+    );
+    // Static body first.
+    expect(screen.getByText("original body")).toBeDefined();
+
+    await openEditor();
+
+    // Editor emits an update (populates editContentRef), then Save is clicked.
+    fireEvent.click(screen.getByTestId("editor-emit-update"));
+    fireEvent.click(screen.getByRole("button", { name: "Save" }));
+
+    // mutateAsync is called with the stringified edited doc.
+    expect(updateMutateAsync).toHaveBeenCalledWith({
+      commentId: "c-1",
+      content: JSON.stringify(EDITED_DOC),
+    });
+
+    // On success the form closes (isEditing -> false); the static body renders
+    // from the comment.content prop again.
+    await waitFor(() =>
+      expect(screen.queryByTestId("comment-editor")).toBeNull(),
+    );
+
+    // Simulate the cache invalidation swapping in a new comment object with the
+    // updated content — the static body reflects it.
+    rerender(
+      <MantineProvider>
+        <CommentListItem
+          comment={baseComment({ content: body("updated body after save") })}
+          pageId="page-1"
+          canComment={true}
+          canEdit={true}
+        />
+      </MantineProvider>,
+    );
+    expect(screen.getByText("updated body after save")).toBeDefined();
+    expect(screen.queryByText("original body")).toBeNull();
+  });
+
+  it("cancel restores the static body and does not call the update mutation", async () => {
+    renderItem(baseComment({ content: body("original body") }));
+    await openEditor();
+
+    // Type something (editContentRef set), then cancel.
+    fireEvent.click(screen.getByTestId("editor-emit-update"));
+    fireEvent.click(screen.getByRole("button", { name: "Cancel" }));
+
+    // Editor unmounts, static body restored, no save happened.
+    await waitFor(() =>
+      expect(screen.queryByTestId("comment-editor")).toBeNull(),
+    );
+    expect(screen.getByText("original body")).toBeDefined();
+    expect(updateMutateAsync).not.toHaveBeenCalled();
+  });
+
+  it("saving without editing sends the existing content (editContentRef cleared after cancel)", async () => {
+    renderItem(baseComment({ content: body("original body") }));
+
+    // Cancel path clears editContentRef...
+    await openEditor();
+    fireEvent.click(screen.getByTestId("editor-emit-update"));
+    fireEvent.click(screen.getByRole("button", { name: "Cancel" }));
+    await waitFor(() =>
+      expect(screen.queryByTestId("comment-editor")).toBeNull(),
+    );
+
+    // ...so re-opening and saving WITHOUT an update falls back to comment.content.
+    await openEditor();
+    fireEvent.click(screen.getByRole("button", { name: "Save" }));
+    expect(updateMutateAsync).toHaveBeenCalledWith({
+      commentId: "c-1",
+      content: JSON.stringify(body("original body")),
+    });
+  });
+});
+
+describe("CommentListItem — read-only body renders statically", () => {
+  it("renders the comment body as static text without a TipTap editor", () => {
+    renderItem(
+      baseComment({
+        content: JSON.stringify({
+          type: "doc",
+          content: [
+            {
+              type: "paragraph",
+              content: [{ type: "text", text: "Hello static world" }],
+            },
+          ],
+        }),
+      }),
+    );
+    // Body text is present...
+    expect(screen.getByText("Hello static world")).toBeDefined();
+    // ...and it did NOT go through the (mocked) CommentEditor instance.
+    expect(screen.queryByTestId("comment-editor")).toBeNull();
+  });
+});
@@ -1,10 +1,11 @@
 import { Group, Text, Box, Badge, Button } from "@mantine/core";
 import { AgentAvatarStack } from "@/components/ui/agent-avatar-stack.tsx";
-import React, { useEffect, useRef, useState } from "react";
+import React, { useMemo, useRef, useState } from "react";
 import classes from "./comment.module.css";
 import { useAtom, useAtomValue } from "jotai";
 import { useTimeAgo } from "@/hooks/use-time-ago";
 import CommentEditor from "@/features/comment/components/comment-editor";
+import CommentContentView from "@/features/comment/components/comment-content-view";
 import { pageEditorAtom } from "@/features/editor/atoms/editor-atoms";
 import CommentActions from "@/features/comment/components/comment-actions";
 import CommentMenu from "@/features/comment/components/comment-menu";
@@ -13,11 +14,16 @@ import { useHover } from "@mantine/hooks";
 import {
  useApplySuggestionMutation,
  useDeleteCommentMutation,
+  useDismissSuggestionMutation,
  useResolveCommentMutation,
  useUpdateCommentMutation,
 } from "@/features/comment/queries/comment-query";
 import { IComment } from "@/features/comment/types/comment.types";
-import { canShowApply } from "@/features/comment/utils/suggestion";
+import {
+  canShowApply,
+  canShowDismiss,
+  computeSuggestionDiff,
+} from "@/features/comment/utils/suggestion";
 import { CustomAvatar } from "@/components/ui/custom-avatar.tsx";
 import { currentUserAtom } from "@/features/user/atoms/current-user-atom.ts";
 import { useTranslation } from "react-i18next";
@@ -45,31 +51,43 @@ function CommentListItem({
  const [isEditing, setIsEditing] = useState(false);
  const [isLoading, setIsLoading] = useState(false);
  const editor = useAtomValue(pageEditorAtom);
-  const [content, setContent] = useState<string>(comment.content);
  const editContentRef = useRef<any>(null);
  const updateCommentMutation = useUpdateCommentMutation();
  const deleteCommentMutation = useDeleteCommentMutation(comment.pageId);
  const resolveCommentMutation = useResolveCommentMutation();
  const applySuggestionMutation = useApplySuggestionMutation();
+  const dismissSuggestionMutation = useDismissSuggestionMutation();
  const [currentUser] = useAtom(currentUserAtom);
  const createdAtAgo = useTimeAgo(comment.createdAt);

-  useEffect(() => {
-    setContent(comment.content);
-  }, [comment]);
+  // Intraline "before -> after" diff (#331) for a suggested edit: only the
+  // fragments that actually changed get emphasised inside the red/green block,
+  // instead of striking through / greening the whole line. Memoised on the
+  // (selection, suggestedText) pair so it recomputes only when they change.
+  const suggestionDiff = useMemo(
+    () =>
+      comment.suggestedText != null
+        ? computeSuggestionDiff(comment.selection ?? "", comment.suggestedText)
+        : null,
+    [comment.selection, comment.suggestedText],
+  );
+
+  // Owner-or-space-admin gate (#338): mirrors the server authz for both the
+  // comment menu (edit/delete) and the suggestion Dismiss button, so we never
+  // render an action the server will 403.
+  const isOwnerOrAdmin =
+    currentUser?.user?.id === comment.creatorId || userSpaceRole === "admin";
+

  async function handleUpdateComment() {
    try {
      setIsLoading(true);
      const commentToUpdate = {
        commentId: comment.id,
-        content: JSON.stringify(editContentRef.current ?? content),
+        content: JSON.stringify(editContentRef.current ?? comment.content),
      };
      await updateCommentMutation.mutateAsync(commentToUpdate);
-      if (editContentRef.current) {
-        setContent(editContentRef.current);
-        editContentRef.current = null;
-      }
+      editContentRef.current = null;
      setIsEditing(false);
    } catch (error) {
      console.error("Failed to update comment:", error);
@@ -115,6 +133,19 @@ function CommentListItem({
    }
  }

+  async function handleDismissSuggestion() {
+    try {
+      await dismissSuggestionMutation.mutateAsync({
+        commentId: comment.id,
+        pageId: comment.pageId,
+      });
+    } catch (error) {
+      // Idempotent races are reconciled to success in the mutation's onError;
+      // anything else surfaces there as a notification.
+      console.error("Failed to dismiss suggestion:", error);
+    }
+  }
+
  function handleCommentClick(comment: IComment) {
    const el = document.querySelector(
      `.comment-mark[data-comment-id="${comment.id}"]`,
@@ -190,7 +221,7 @@ function CommentListItem({
                />
              )}

-              {(currentUser?.user?.id === comment.creatorId || userSpaceRole === 'admin') && (
+              {isOwnerOrAdmin && (
                <CommentMenu
                  onEditComment={handleEditToggle}
                  onDeleteComment={handleDeleteComment}
@@ -236,12 +267,28 @@ function CommentListItem({
        {!comment.parentCommentId && comment.suggestedText && (
          <Box className={classes.suggestionBlock}>
            {comment.selection && (
+              // Old line: read as removed as a whole (line-through/red); only the
+              // changed fragments carry the extra intraline emphasis.
              <Text size="xs" className={classes.suggestionOld}>
-                {comment.selection}
+                {suggestionDiff?.old.map((segment, index) => (
+                  <span
+                    key={index}
+                    className={segment.changed ? classes.suggestionChanged : undefined}
+                  >
+                    {segment.text}
+                  </span>
+                ))}
              </Text>
            )}
            <Text size="xs" className={classes.suggestionNew}>
-              {comment.suggestedText}
+              {suggestionDiff?.new.map((segment, index) => (
+                <span
+                  key={index}
+                  className={segment.changed ? classes.suggestionChanged : undefined}
+                >
+                  {segment.text}
+                </span>
+              ))}
            </Text>

            {comment.suggestionAppliedAt ? (
@@ -255,29 +302,53 @@ function CommentListItem({
                {t("Applied")}
              </Badge>
            ) : (
-              canShowApply(comment, canEdit) && (
-                <Button
-                  size="compact-xs"
-                  variant="light"
-                  color="green"
-                  mt={6}
-                  onClick={handleApplySuggestion}
-                  loading={applySuggestionMutation.isPending}
-                  disabled={applySuggestionMutation.isPending}
-                >
-                  {t("Apply")}
-                </Button>
+              (canShowApply(comment, canEdit) ||
+                canShowDismiss(comment, canComment, isOwnerOrAdmin)) && (
+                <Group gap="xs" mt={6}>
+                  {canShowApply(comment, canEdit) && (
+                    <Button
+                      size="compact-xs"
+                      variant="light"
+                      color="green"
+                      onClick={handleApplySuggestion}
+                      loading={applySuggestionMutation.isPending}
+                      disabled={
+                        applySuggestionMutation.isPending ||
+                        dismissSuggestionMutation.isPending
+                      }
+                    >
+                      {t("Apply")}
+                    </Button>
+                  )}
+                  {/* Dismiss ("Не применять", #329): removes the suggestion
+                      without changing the page text. Gated on canComment. */}
+                  {canShowDismiss(comment, canComment, isOwnerOrAdmin) && (
+                    <Button
+                      size="compact-xs"
+                      variant="subtle"
+                      color="gray"
+                      onClick={handleDismissSuggestion}
+                      loading={dismissSuggestionMutation.isPending}
+                      disabled={
+                        applySuggestionMutation.isPending ||
+                        dismissSuggestionMutation.isPending
+                      }
+                    >
+                      {t("Dismiss")}
+                    </Button>
+                  )}
+                </Group>
              )
            )}
          </Box>
        )}

        {!isEditing ? (
-          <CommentEditor defaultContent={content} editable={false} />
+          <CommentContentView content={comment.content} />
        ) : (
          <>
            <CommentEditor
-              defaultContent={content}
+              defaultContent={comment.content}
              editable={true}
              onUpdate={(newContent: any) => { editContentRef.current = newContent; }}
              onSave={handleUpdateComment}
@@ -297,4 +368,6 @@ function CommentListItem({
  );
 }

-export default CommentListItem;
+// Memoized so a resolve/apply/reply cache update (which only replaces the touched
+// comment's object identity) re-renders that one thread, not all ~356 items.
+export default React.memo(CommentListItem);
@@ -0,0 +1,108 @@
+import { describe, it, expect, vi } from "vitest";
+import { render, screen, fireEvent } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+import { IComment } from "@/features/comment/types/comment.types";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+// CommentEditor pulls in the full TipTap editor stack; replace it with a stub so
+// the lazy reply editor's mount transition can be observed without the editor.
+vi.mock("@/features/comment/components/comment-editor", () => ({
+  default: () => <div data-testid="comment-editor" />,
+}));
+
+// page-query -> main.tsx (createRoot) is a module side effect; stub the queries
+// pulled in transitively so importing the module is side-effect free.
+vi.mock("@/features/page/queries/page-query.ts", () => ({
+  usePageQuery: () => ({ data: undefined, isLoading: false, isError: false }),
+}));
+vi.mock("@/features/share/queries/share-query.ts", () => ({
+  useSharePageQuery: () => ({ data: undefined }),
+}));
+// space-query -> main.tsx (createRoot) is another module side effect; stub it.
+vi.mock("@/features/space/queries/space-query.ts", () => ({
+  useGetSpaceBySlugQuery: () => ({ data: undefined }),
+}));
+
+import {
+  buildChildrenByParent,
+  CommentEditorWithActions,
+} from "./comment-list-with-tabs";
+
+const c = (id: string, parentCommentId: string | null = null): IComment =>
+  ({ id, parentCommentId }) as IComment;
+
+describe("buildChildrenByParent (childrenByParent grouping)", () => {
+  it("returns an empty map for undefined or empty input", () => {
+    expect(buildChildrenByParent(undefined).size).toBe(0);
+    expect(buildChildrenByParent([]).size).toBe(0);
+  });
+
+  it("does not index a top-level comment (parentCommentId null)", () => {
+    const map = buildChildrenByParent([c("p1", null)]);
+    expect(map.size).toBe(0);
+    expect(map.has("p1")).toBe(false);
+  });
+
+  it("groups replies under the correct parent, including reply-to-reply nesting", () => {
+    const p1 = c("p1", null);
+    const r1 = c("r1", "p1");
+    const r2 = c("r2", "r1"); // a reply to a reply
+    const map = buildChildrenByParent([p1, r1, r2]);
+    expect(map.get("p1")).toEqual([r1]);
+    expect(map.get("r1")).toEqual([r2]);
+    // The top-level comment itself is never a key.
+    expect(map.has("p1") && map.get("p1")?.length).toBe(1);
+  });
+
+  it("still groups a reply whose parent is not present in items", () => {
+    const orphan = c("o1", "missing-parent");
+    const map = buildChildrenByParent([orphan]);
+    expect(map.get("missing-parent")).toEqual([orphan]);
+  });
+
+  it("preserves insertion order among sibling replies", () => {
+    const map = buildChildrenByParent([
+      c("a", "p1"),
+      c("b", "p1"),
+      c("d", "p1"),
+    ]);
+    expect(map.get("p1")?.map((x) => x.id)).toEqual(["a", "b", "d"]);
+  });
+});
+
+function renderReplyEditor() {
+  return render(
+    <MantineProvider>
+      <CommentEditorWithActions commentId="c-1" onSave={vi.fn()} />
+    </MantineProvider>,
+  );
+}
+
+describe("CommentEditorWithActions — lazy reply editor activation", () => {
+  it("shows only the stub initially (no editor instance mounted)", () => {
+    renderReplyEditor();
+    expect(screen.getByRole("button")).toBeDefined();
+    expect(screen.queryByTestId("comment-editor")).toBeNull();
+  });
+
+  it("mounts the real editor when the stub is clicked and keeps it mounted", () => {
+    renderReplyEditor();
+    fireEvent.click(screen.getByRole("button"));
+    expect(screen.getByTestId("comment-editor")).toBeDefined();
+    // The stub button is replaced by the editor subtree.
+    expect(screen.queryByRole("button")).toBeNull();
+  });
+
+  it("mounts the editor when the stub receives focus", () => {
+    renderReplyEditor();
+    fireEvent.focus(screen.getByRole("button"));
+    expect(screen.getByTestId("comment-editor")).toBeDefined();
+  });
+
+  it("mounts the editor on Enter keydown of the stub", () => {
+    renderReplyEditor();
+    fireEvent.keyDown(screen.getByRole("button"), { key: "Enter" });
+    expect(screen.getByTestId("comment-editor")).toBeDefined();
+  });
+});
@@ -23,7 +23,6 @@ import CommentActions from "@/features/comment/components/comment-actions";
 import { useFocusWithin } from "@mantine/hooks";
 import { IComment } from "@/features/comment/types/comment.types.ts";
 import { usePageQuery } from "@/features/page/queries/page-query.ts";
-import { IPagination } from "@/lib/types.ts";
 import { extractPageSlugId } from "@/lib";
 import { useTranslation } from "react-i18next";
 import { useGetSpaceBySlugQuery } from "@/features/space/queries/space-query.ts";
@@ -36,6 +35,24 @@ interface CommentListWithTabsProps {
  onClose?: () => void;
 }

+// Index replies by their parent id once (O(n)), instead of an O(n^2) filter per
+// thread. Replies whose parent is not in `items` are still grouped under their
+// parentCommentId (they simply won't be reached by the top-level walk).
+// Exported for unit testing.
+export function buildChildrenByParent(
+  items: IComment[] | undefined,
+): Map<string, IComment[]> {
+  const m = new Map<string, IComment[]>();
+  for (const c of items ?? []) {
+    if (c.parentCommentId) {
+      const arr = m.get(c.parentCommentId);
+      if (arr) arr.push(c);
+      else m.set(c.parentCommentId, [c]);
+    }
+  }
+  return m;
+}
+
 function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
  const { t } = useTranslation();
  const { pageSlug } = useParams();
@@ -46,7 +63,9 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
    isError,
  } = useCommentsQuery({ pageId: page?.id });
  const createCommentMutation = useCreateCommentMutation();
-  const [isLoading, setIsLoading] = useState(false);
+  // mutateAsync is a stable reference across renders; depend on it (not the
+  // mutation object) so the reply/comment callbacks stay stable.
+  const createCommentAsync = createCommentMutation.mutateAsync;
  const { data: space } = useGetSpaceBySlugQuery(page?.space?.slug);

  const canEdit = page?.permissions?.canEdit ?? false;
@@ -75,13 +94,21 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
    return { activeComments: active, resolvedComments: resolved };
  }, [comments]);

+  // Index replies by their parent once, instead of an O(n^2) filter per thread.
+  // The map ref changes on any comments update, so MemoizedChildComments re-runs
+  // (cheap) and re-looks-up, while memoized CommentListItems skip unchanged items.
+  const childrenByParent = useMemo(
+    () => buildChildrenByParent(comments?.items),
+    [comments?.items],
+  );
+
  const [isPageCommentLoading, setIsPageCommentLoading] = useState(false);

  const handleAddPageComment = useCallback(
    async (_commentId: string, content: string) => {
      try {
        setIsPageCommentLoading(true);
-        const createdComment = await createCommentMutation.mutateAsync({
+        const createdComment = await createCommentAsync({
          pageId: page?.id,
          content: JSON.stringify(content),
        });
@@ -100,27 +127,26 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
        setIsPageCommentLoading(false);
      }
    },
-    [createCommentMutation, page?.id],
+    [createCommentAsync, page?.id],
  );

  const handleAddReply = useCallback(
    async (commentId: string, content: string) => {
+      // Pending state lives inside CommentEditorWithActions so sending a reply
+      // does not churn renderComments and re-render the whole list.
      try {
-        setIsLoading(true);
        const commentData = {
          pageId: page?.id,
          parentCommentId: commentId,
          content: JSON.stringify(content),
        };

-        await createCommentMutation.mutateAsync(commentData);
+        await createCommentAsync(commentData);
      } catch (error) {
        console.error("Failed to post comment:", error);
-      } finally {
-        setIsLoading(false);
      }
    },
-    [createCommentMutation, page?.id],
+    [createCommentAsync, page?.id],
  );

  const renderComments = useCallback(
@@ -143,7 +169,7 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
            userSpaceRole={space?.membership?.role}
          />
          <MemoizedChildComments
-            comments={comments}
+            childrenByParent={childrenByParent}
            parentId={comment.id}
            pageId={page?.id}
            canComment={canComment}
@@ -158,16 +184,15 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
            <CommentEditorWithActions
              commentId={comment.id}
              onSave={handleAddReply}
-              isLoading={isLoading}
            />
          </>
        )}
      </Paper>
    ),
    [
-      comments,
+      childrenByParent,
      handleAddReply,
-      isLoading,
+      page?.id,
      space?.membership?.role,
      canComment,
      canEdit,
@@ -203,6 +228,11 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
      <Tabs
        defaultValue="open"
        variant="default"
+        // Default to not mounting an inactive tab (the heavy Resolved list stays
+        // unmounted while Open is shown). The Open panel overrides this with its
+        // own keepMounted (below) so an in-progress reply/edit draft survives an
+        // Open -> Resolved -> Open switch.
+        keepMounted={false}
        style={{
          flex: "1 1 auto",
          display: "flex",
@@ -261,7 +291,10 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
          type="scroll"
        >
          <div style={{ paddingBottom: "8px" }}>
-            <Tabs.Panel value="open" pt="xs">
+            {/* keepMounted keeps the Open panel alive even while Resolved is
+                active, so a lazily-mounted reply editor's draft (and an
+                in-progress edit) is not discarded on tab switch. */}
+            <Tabs.Panel value="open" pt="xs" keepMounted>
              {activeComments.length === 0 ? (
                <Center py="xl">
                  <Stack align="center" gap="xs">
@@ -307,7 +340,7 @@ function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
 }

 interface ChildCommentsProps {
-  comments: IPagination<IComment>;
+  childrenByParent: Map<string, IComment[]>;
  parentId: string;
  pageId: string;
  canComment: boolean;
@@ -315,24 +348,18 @@ interface ChildCommentsProps {
  userSpaceRole?: string;
 }
 const ChildComments = ({
-  comments,
+  childrenByParent,
  parentId,
  pageId,
  canComment,
  canEdit,
  userSpaceRole,
 }: ChildCommentsProps) => {
-  const getChildComments = useCallback(
-    (parentId: string) =>
-      comments.items.filter(
-        (comment: IComment) => comment.parentCommentId === parentId,
-      ),
-    [comments.items],
-  );
+  const children = childrenByParent.get(parentId) ?? [];

  return (
    <div>
-      {getChildComments(parentId).map((childComment) => (
+      {children.map((childComment) => (
        <div key={childComment.id}>
          <CommentListItem
            comment={childComment}
@@ -342,7 +369,7 @@ const ChildComments = ({
            userSpaceRole={userSpaceRole}
          />
          <MemoizedChildComments
-            comments={comments}
+            childrenByParent={childrenByParent}
            parentId={childComment.id}
            pageId={pageId}
            canComment={canComment}
@@ -357,22 +384,61 @@ const ChildComments = ({

 const MemoizedChildComments = memo(ChildComments);

-const CommentEditorWithActions = ({
+export const CommentEditorWithActions = ({
  commentId,
  onSave,
-  isLoading,
  placeholder = undefined,
 }) => {
+  const { t } = useTranslation();
+  // Lazily mount the TipTap reply editor: until the user interacts with the
+  // stub, no editor instance is created for this thread. Once mounted it stays
+  // mounted so the draft is preserved.
+  const [mounted, setMounted] = useState(false);
  const [content, setContent] = useState("");
+  const [isSending, setIsSending] = useState(false);
  const { ref, focused } = useFocusWithin();
  const commentEditorRef = useRef(null);

-  const handleSave = useCallback(() => {
-    onSave(commentId, content);
-    setContent("");
-    commentEditorRef.current?.clearContent();
+  const activate = useCallback(() => setMounted(true), []);
+
+  const handleSave = useCallback(async () => {
+    try {
+      setIsSending(true);
+      await onSave(commentId, content);
+      setContent("");
+      commentEditorRef.current?.clearContent();
+    } finally {
+      setIsSending(false);
+    }
  }, [commentId, content, onSave]);

+  if (!mounted) {
+    return (
+      <div
+        role="button"
+        tabIndex={0}
+        onClick={activate}
+        onFocus={activate}
+        onKeyDown={(e) => {
+          if (e.key === "Enter" || e.key === " ") {
+            e.preventDefault();
+            activate();
+          }
+        }}
+        style={{
+          padding: "6px",
+          fontSize: "var(--mantine-font-size-sm)",
+          lineHeight: 1.4,
+          color: "var(--mantine-color-placeholder)",
+          cursor: "text",
+          borderRadius: "var(--mantine-radius-sm)",
+        }}
+      >
+        {placeholder || t("Reply...")}
+      </div>
+    );
+  }
+
  return (
    <div ref={ref}>
      <CommentEditor
@@ -381,8 +447,9 @@ const CommentEditorWithActions = ({
        onSave={handleSave}
        editable={true}
        placeholder={placeholder}
+        autofocus={true}
      />
-      {focused && <CommentActions onSave={handleSave} isLoading={isLoading} />}
+      {focused && <CommentActions onSave={handleSave} isLoading={isSending} />}
    </div>
  );
 };
@@ -53,6 +53,21 @@
    margin-top: 4px;
 }

+/* Intraline diff (#331): the fragment that actually changed within the
+   red "before" / green "after" block. It inherits the surrounding red/green
+   framing and adds a stronger tint plus bold weight so the eye lands on the
+   changed letters/words (git/GitHub-style) rather than the whole line. The
+   container's line-through (old) / green (new) still marks the full line. */
+.suggestionChanged {
+    /* Stronger tint of the surrounding red/green so the changed fragment pops
+       within the block. `currentColor` follows the parent's red (old) or green
+       (new) text colour. No `text-decoration` here on purpose: the old block's
+       inherited line-through must survive on the changed letters too. */
+    background: color-mix(in srgb, currentColor 22%, transparent);
+    border-radius: 2px;
+    font-weight: 700;
+}
+
 .commentEditor {

    &[data-editable][data-surface="muted"] .ProseMirror:not(.focused) {
@@ -0,0 +1,279 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import React from "react";
+import { renderHook, waitFor } from "@testing-library/react";
+import {
+  QueryClient,
+  QueryClientProvider,
+  InfiniteData,
+} from "@tanstack/react-query";
+
+/**
+ * Coverage for the ephemeral-suggestion (#329) cache reconciliation in
+ * useApplySuggestionMutation / useDismissSuggestionMutation: the mutations act on
+ * the server `outcome` — 'deleted' drops the comment from the local list,
+ * 'resolved' relocates it (by stamping resolvedAt, which the tabs split on).
+ */
+
+vi.mock("@mantine/notifications", () => ({
+  notifications: { show: vi.fn() },
+}));
+
+vi.mock("@/features/comment/services/comment-service", () => ({
+  applySuggestion: vi.fn(),
+  dismissSuggestion: vi.fn(),
+  createComment: vi.fn(),
+  updateComment: vi.fn(),
+  deleteComment: vi.fn(),
+  resolveComment: vi.fn(),
+  getPageComments: vi.fn(),
+}));
+
+import { notifications } from "@mantine/notifications";
+import {
+  applySuggestion,
+  dismissSuggestion,
+} from "@/features/comment/services/comment-service";
+import {
+  useApplySuggestionMutation,
+  useDismissSuggestionMutation,
+  RQ_KEY,
+} from "@/features/comment/queries/comment-query";
+import { IComment } from "@/features/comment/types/comment.types";
+
+const PAGE_ID = "page-1";
+
+function seededClient(comment: IComment) {
+  const queryClient = new QueryClient({
+    defaultOptions: { mutations: { retry: false } },
+  });
+  const seed: InfiniteData<any> = {
+    pageParams: [undefined],
+    pages: [{ items: [comment], meta: { hasNextPage: false, nextCursor: null } }],
+  };
+  queryClient.setQueryData(RQ_KEY(PAGE_ID), seed);
+  const wrapper = ({ children }: { children: React.ReactNode }) => (
+    <QueryClientProvider client={queryClient}>{children}</QueryClientProvider>
+  );
+  return { queryClient, wrapper };
+}
+
+function items(queryClient: QueryClient): IComment[] {
+  const cache = queryClient.getQueryData(RQ_KEY(PAGE_ID)) as
+    | InfiniteData<any>
+    | undefined;
+  return cache?.pages.flatMap((p) => p.items) ?? [];
+}
+
+const comment = (over?: Partial<IComment>): IComment =>
+  ({
+    id: "c-1",
+    pageId: PAGE_ID,
+    content: "{}",
+    creatorId: "u-1",
+    workspaceId: "ws-1",
+    createdAt: new Date(),
+    suggestedText: "new",
+    ...over,
+  }) as IComment;
+
+describe("useApplySuggestionMutation — outcome handling (#329)", () => {
+  beforeEach(() => vi.clearAllMocks());
+
+  it("outcome=deleted → removes the comment from the list", async () => {
+    vi.mocked(applySuggestion).mockResolvedValue({
+      id: "c-1",
+      pageId: PAGE_ID,
+      outcome: "deleted",
+    } as any);
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useApplySuggestionMutation(), {
+      wrapper,
+    });
+    await result.current.mutateAsync({ commentId: "c-1", pageId: PAGE_ID });
+    await waitFor(() => expect(result.current.isSuccess).toBe(true));
+
+    expect(items(queryClient)).toHaveLength(0);
+  });
+
+  it("outcome=resolved → keeps the comment and stamps resolvedAt/applied fields", async () => {
+    const resolvedAt = new Date();
+    vi.mocked(applySuggestion).mockResolvedValue({
+      id: "c-1",
+      pageId: PAGE_ID,
+      outcome: "resolved",
+      resolvedAt,
+      resolvedById: "u-1",
+      resolvedBy: { id: "u-1", name: "A" },
+      suggestionAppliedAt: resolvedAt,
+      suggestionAppliedById: "u-1",
+    } as any);
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useApplySuggestionMutation(), {
+      wrapper,
+    });
+    await result.current.mutateAsync({ commentId: "c-1", pageId: PAGE_ID });
+    await waitFor(() => expect(result.current.isSuccess).toBe(true));
+
+    const list = items(queryClient);
+    expect(list).toHaveLength(1);
+    expect(list[0].resolvedAt).toBe(resolvedAt);
+    expect(list[0].suggestionAppliedAt).toBe(resolvedAt);
+  });
+});
+
+describe("useDismissSuggestionMutation — outcome handling (#329)", () => {
+  beforeEach(() => vi.clearAllMocks());
+
+  it("outcome=deleted → removes the comment from the list", async () => {
+    vi.mocked(dismissSuggestion).mockResolvedValue({
+      id: "c-1",
+      pageId: PAGE_ID,
+      outcome: "deleted",
+    } as any);
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useDismissSuggestionMutation(), {
+      wrapper,
+    });
+    await result.current.mutateAsync({ commentId: "c-1", pageId: PAGE_ID });
+    await waitFor(() => expect(result.current.isSuccess).toBe(true));
+
+    expect(items(queryClient)).toHaveLength(0);
+  });
+
+  it("outcome=resolved → keeps the comment and stamps resolvedAt", async () => {
+    const resolvedAt = new Date();
+    vi.mocked(dismissSuggestion).mockResolvedValue({
+      id: "c-1",
+      pageId: PAGE_ID,
+      outcome: "resolved",
+      resolvedAt,
+      resolvedById: "u-1",
+      resolvedBy: { id: "u-1", name: "A" },
+    } as any);
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useDismissSuggestionMutation(), {
+      wrapper,
+    });
+    await result.current.mutateAsync({ commentId: "c-1", pageId: PAGE_ID });
+    await waitFor(() => expect(result.current.isSuccess).toBe(true));
+
+    const list = items(queryClient);
+    expect(list).toHaveLength(1);
+    expect(list[0].resolvedAt).toBe(resolvedAt);
+  });
+
+  it("idempotent race (404) → treated as success, comment removed from the list", async () => {
+    vi.mocked(dismissSuggestion).mockRejectedValue({
+      response: { status: 404 },
+    });
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useDismissSuggestionMutation(), {
+      wrapper,
+    });
+    // mutateAsync rejects even though onError reconciles the cache; swallow it.
+    await result.current
+      .mutateAsync({ commentId: "c-1", pageId: PAGE_ID })
+      .catch(() => undefined);
+    await waitFor(() => expect(result.current.isError).toBe(true));
+
+    expect(items(queryClient)).toHaveLength(0);
+    // #338 F3: the idempotent race must still fire the SUCCESS toast, not just
+    // silently drop the comment.
+    expect(notifications.show).toHaveBeenCalledWith({
+      message: "Suggestion dismissed",
+    });
+  });
+
+  it("dismiss 400 (thread still alive) → NOT a success, comment kept, no green toast (#338 F2)", async () => {
+    // 400 means the thread is alive (already resolved / a reply raced in).
+    // Narrowed onError: only 404 is a success-noop; 400 must surface a real error
+    // and keep the comment in the cache.
+    vi.mocked(dismissSuggestion).mockRejectedValue({
+      response: { status: 400 },
+    });
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useDismissSuggestionMutation(), {
+      wrapper,
+    });
+    await result.current
+      .mutateAsync({ commentId: "c-1", pageId: PAGE_ID })
+      .catch(() => undefined);
+    await waitFor(() => expect(result.current.isError).toBe(true));
+
+    // Comment NOT dropped from the cache.
+    expect(items(queryClient)).toHaveLength(1);
+    // A real (red) error, never the success message.
+    expect(notifications.show).toHaveBeenCalledWith(
+      expect.objectContaining({ color: "red" }),
+    );
+    expect(notifications.show).not.toHaveBeenCalledWith({
+      message: "Suggestion dismissed",
+    });
+  });
+
+  it("APPLY idempotent race (404) → treated as success, comment removed from the list", async () => {
+    // After #329 an applied reply-less suggestion is hard-deleted, so a racing
+    // second apply hits 404 — must reconcile to success like dismiss, not a red
+    // error (restores the #315 apply idempotency).
+    vi.mocked(applySuggestion).mockRejectedValue({
+      response: { status: 404 },
+    });
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useApplySuggestionMutation(), {
+      wrapper,
+    });
+    await result.current
+      .mutateAsync({ commentId: "c-1", pageId: PAGE_ID })
+      .catch(() => undefined);
+    await waitFor(() => expect(result.current.isError).toBe(true));
+
+    expect(items(queryClient)).toHaveLength(0);
+    // #338 F3: the idempotent race must still fire the SUCCESS toast.
+    expect(notifications.show).toHaveBeenCalledWith({
+      message: "Suggestion applied",
+    });
+  });
+
+  it("APPLY 400 (thread resolved, not applied) → NOT a success, comment kept, red error (#338 F2)", async () => {
+    // apply's only 400 is "Cannot apply … on a resolved comment thread" — the
+    // thread was resolved (often with discussion) but NOT applied. It must be a
+    // real error surfacing the server message, and must NOT drop the live thread.
+    vi.mocked(applySuggestion).mockRejectedValue({
+      response: {
+        status: 400,
+        data: {
+          message: "Cannot apply a suggested edit on a resolved comment thread",
+        },
+      },
+    });
+    const { queryClient, wrapper } = seededClient(comment());
+
+    const { result } = renderHook(() => useApplySuggestionMutation(), {
+      wrapper,
+    });
+    await result.current
+      .mutateAsync({ commentId: "c-1", pageId: PAGE_ID })
+      .catch(() => undefined);
+    await waitFor(() => expect(result.current.isError).toBe(true));
+
+    // The live thread is NOT dropped from the cache.
+    expect(items(queryClient)).toHaveLength(1);
+    // Surfaces the server's specific message as a red error, never a success.
+    expect(notifications.show).toHaveBeenCalledWith(
+      expect.objectContaining({
+        message: "Cannot apply a suggested edit on a resolved comment thread",
+        color: "red",
+      }),
+    );
+    expect(notifications.show).not.toHaveBeenCalledWith({
+      message: "Suggestion applied",
+    });
+  });
+});
@@ -8,6 +8,7 @@ import {
  applySuggestion,
  createComment,
  deleteComment,
+  dismissSuggestion,
  getPageComments,
  resolveComment,
  updateComment,
@@ -16,6 +17,7 @@ import {
  ICommentParams,
  IComment,
  IResolveComment,
+  ISuggestionOutcome,
 } from "@/features/comment/types/comment.types";
 import { notifications } from "@mantine/notifications";
 import { IPagination } from "@/lib/types.ts";
@@ -51,7 +53,10 @@ export function useCommentsQuery(params: ICommentParams) {

  return {
    data,
-    isLoading: query.isLoading || query.hasNextPage,
+    // Paint the first page as soon as it arrives instead of blocking until every
+    // page has loaded; the background effect above keeps streaming the rest
+    // (tab counts grow as pages arrive).
+    isLoading: query.isLoading,
    isError: query.isError,
  };
 }
@@ -177,40 +182,121 @@ function updateCommentInCache(
  };
 }

+function removeCommentFromCache(
+  cache: InfiniteData<IPagination<IComment>>,
+  commentId: string,
+): InfiniteData<IPagination<IComment>> {
+  return {
+    ...cache,
+    pages: cache.pages.map((page) => ({
+      ...page,
+      items: page.items.filter((comment) => comment.id !== commentId),
+    })),
+  };
+}
+
+// Reconcile the local comment cache with an ephemeral-suggestion outcome (#329)
+// returned by apply/dismiss: 'deleted' → drop the comment (it disappeared);
+// 'resolved' → the thread had replies and was resolved, so carry the resolved
+// state through (which relocates it to the resolved tab).
+function applySuggestionOutcomeToCache(
+  queryClient: ReturnType<typeof useQueryClient>,
+  pageId: string,
+  commentId: string,
+  data: ISuggestionOutcome,
+) {
+  const cache = queryClient.getQueryData(RQ_KEY(pageId)) as
+    | InfiniteData<IPagination<IComment>>
+    | undefined;
+  if (!cache) return;
+
+  if (data.outcome === "deleted") {
+    queryClient.setQueryData(RQ_KEY(pageId), removeCommentFromCache(cache, commentId));
+    return;
+  }
+
+  // 'resolved' (or an older server that omits outcome): reflect the resolved
+  // state and the applied stamps (apply sets them; dismiss leaves them null).
+  queryClient.setQueryData(
+    RQ_KEY(pageId),
+    updateCommentInCache(cache, commentId, (comment) => ({
+      ...comment,
+      suggestionAppliedAt: data.suggestionAppliedAt,
+      suggestionAppliedById: data.suggestionAppliedById,
+      resolvedAt: data.resolvedAt,
+      resolvedById: data.resolvedById,
+      resolvedBy: data.resolvedBy,
+    })),
+  );
+}
+
 export function useApplySuggestionMutation() {
  const queryClient = useQueryClient();
  const { t } = useTranslation();

-  return useMutation<IComment, any, { commentId: string; pageId: string }>({
+  return useMutation<
+    ISuggestionOutcome,
+    any,
+    { commentId: string; pageId: string }
+  >({
    // No optimistic update: apply can fail with 409 (the commented text drifted),
    // so we only mutate the cache once the server confirms.
    mutationFn: ({ commentId }) => applySuggestion(commentId),
    onSuccess: (data, variables) => {
-      const cache = queryClient.getQueryData(
-        RQ_KEY(variables.pageId),
-      ) as InfiniteData<IPagination<IComment>> | undefined;
-
-      if (cache) {
-        queryClient.setQueryData(
-          RQ_KEY(variables.pageId),
-          updateCommentInCache(cache, variables.commentId, (comment) => ({
-            ...comment,
-            suggestionAppliedAt: data.suggestionAppliedAt,
-            suggestionAppliedById: data.suggestionAppliedById,
-            // The server auto-resolves the thread on apply — carry that through.
-            resolvedAt: data.resolvedAt,
-            resolvedById: data.resolvedById,
-            resolvedBy: data.resolvedBy,
-          })),
-        );
-      }
+      // Ephemeral (#329): the server hard-deletes the applied suggestion when the
+      // thread has no replies ('deleted') or resolves it when it does ('resolved').
+      applySuggestionOutcomeToCache(
+        queryClient,
+        variables.pageId,
+        variables.commentId,
+        data,
+      );

      notifications.show({ message: t("Suggestion applied") });
    },
-    onError: (err: any) => {
+    onError: (err: any, variables) => {
+      const status = err?.response?.status;
+      // Idempotent race (double-click, or apply↔dismiss): after #329 an applied
+      // reply-less suggestion is hard-deleted, so a second/racing apply hits 404
+      // (already gone). ONLY 404 is a real success-noop — drop it from the cache
+      // and report success, the user's intent is already satisfied (restores the
+      // #315 apply idempotency the ephemeral delete would otherwise break).
+      //
+      // 400 is NOT success (#338 F2): apply's only 400 is "Cannot apply … on a
+      // resolved comment thread" — the thread was resolved (often WITH a live
+      // discussion) but the edit was NOT applied. Treating it as "Suggestion
+      // applied" is a false success that also drops a live thread from the cache.
+      // The #315 idempotent repeat does NOT produce 400 (childless → 404;
+      // with-replies → 200), so we never lose idempotency by excluding it here.
+      if (status === 404) {
+        const cache = queryClient.getQueryData(RQ_KEY(variables.pageId)) as
+          | InfiniteData<IPagination<IComment>>
+          | undefined;
+        if (cache) {
+          queryClient.setQueryData(
+            RQ_KEY(variables.pageId),
+            removeCommentFromCache(cache, variables.commentId),
+          );
+        }
+        notifications.show({ message: t("Suggestion applied") });
+        return;
+      }
+      // 400 => the thread was resolved and the edit could not be applied. Show a
+      // real error and KEEP the comment in the cache (it is still alive). Prefer
+      // the server's specific message when it carries one.
+      if (status === 400) {
+        const serverMsg = err?.response?.data?.message;
+        notifications.show({
+          message:
+            typeof serverMsg === "string" && serverMsg.length > 0
+              ? serverMsg
+              : t("Failed to apply suggestion"),
+          color: "red",
+        });
+        return;
+      }
      // 409 => the commented text changed since the suggestion was made. Surface
      // a specific message (with the current text) rather than a generic error.
-      const status = err?.response?.status;
      const currentText = err?.response?.data?.currentText;
      if (status === 409 && typeof currentText === "string") {
        const shortText =
@@ -234,6 +320,58 @@ export function useApplySuggestionMutation() {
  });
 }

+export function useDismissSuggestionMutation() {
+  const queryClient = useQueryClient();
+  const { t } = useTranslation();
+
+  return useMutation<
+    ISuggestionOutcome,
+    any,
+    { commentId: string; pageId: string }
+  >({
+    mutationFn: ({ commentId }) => dismissSuggestion(commentId),
+    onSuccess: (data, variables) => {
+      // Ephemeral (#329): dismiss hard-deletes the suggestion when the thread has
+      // no replies ('deleted') or resolves it when it does ('resolved').
+      applySuggestionOutcomeToCache(
+        queryClient,
+        variables.pageId,
+        variables.commentId,
+        data,
+      );
+
+      notifications.show({ message: t("Suggestion dismissed") });
+    },
+    onError: (err: any, variables) => {
+      // Idempotent race (double-click, or apply↔dismiss): the comment is already
+      // gone (404). ONLY 404 is a real success-noop — drop it from the cache and
+      // report success, the user's intent (make it disappear) is satisfied.
+      //
+      // 400 is NOT success (#338 F2): it means the thread is still ALIVE (already
+      // resolved, or a reply raced in), so treating it as "dismissed" would drop
+      // a live thread from the cache. Show a real error and keep the comment.
+      const status = err?.response?.status;
+      if (status === 404) {
+        const cache = queryClient.getQueryData(RQ_KEY(variables.pageId)) as
+          | InfiniteData<IPagination<IComment>>
+          | undefined;
+        if (cache) {
+          queryClient.setQueryData(
+            RQ_KEY(variables.pageId),
+            removeCommentFromCache(cache, variables.commentId),
+          );
+        }
+        notifications.show({ message: t("Suggestion dismissed") });
+        return;
+      }
+      notifications.show({
+        message: t("Failed to dismiss suggestion"),
+        color: "red",
+      });
+    },
+  });
+}
+
 export function useResolveCommentMutation() {
  const queryClient = useQueryClient();
  const { t } = useTranslation();
@@ -3,6 +3,7 @@ import {
  ICommentParams,
  IComment,
  IResolveComment,
+  ISuggestionOutcome,
 } from "@/features/comment/types/comment.types";
 import { IPagination } from "@/lib/types.ts";

@@ -18,13 +19,24 @@ export async function resolveComment(data: IResolveComment): Promise<IComment> {
  return req.data;
 }

-export async function applySuggestion(commentId: string): Promise<IComment> {
+export async function applySuggestion(
+  commentId: string,
+): Promise<ISuggestionOutcome> {
  // Mirrors resolveComment: let axios reject on non-2xx so the mutation can read
  // the 409 body (`{ message, currentText }`) off err.response.data.
  const req = await api.post("/comments/apply-suggestion", { commentId });
  return req.data.data ?? req.data;
 }

+export async function dismissSuggestion(
+  commentId: string,
+): Promise<ISuggestionOutcome> {
+  // Dismiss ("Не применять") a suggested edit (#329): the server hard-deletes
+  // the comment (or resolves it when it has replies) and returns the outcome.
+  const req = await api.post("/comments/dismiss-suggestion", { commentId });
+  return req.data.data ?? req.data;
+}
+
 export async function updateComment(
  data: Partial<IComment>,
 ): Promise<IComment> {
@@ -60,6 +60,15 @@ export interface IResolveComment {
  resolved: boolean;
 }

+// Result of applying or dismissing an ephemeral suggested edit (#329). The
+// server hard-deletes the comment (`deleted`) unless the thread has replies, in
+// which case it is resolved (`resolved`). The returned comment fields carry the
+// resolved-branch state; `outcome` tells the client which optimistic action to
+// take (drop the comment vs. move it to the resolved tab).
+export type ISuggestionOutcome = IComment & {
+  outcome?: "deleted" | "resolved";
+};
+
 export interface ICommentParams extends QueryParams {
  pageId: string;
 }
@@ -0,0 +1,102 @@
+import { describe, it, expect } from "vitest";
+import { computeSuggestionDiff, Segment } from "@/features/comment/utils/suggestion";
+
+// Reconstruct the plain string from a segment stream — the diff must be
+// lossless (concatenating every fragment yields the original input).
+const join = (segments: Segment[]): string =>
+  segments.map((s) => s.text).join("");
+
+// The subset of segments (in order) that the UI would emphasise.
+const changed = (segments: Segment[]): string[] =>
+  segments.filter((s) => s.changed).map((s) => s.text);
+
+// Find the segment that contains a substring, to assert its `changed` flag.
+const segmentWith = (segments: Segment[], needle: string): Segment | undefined =>
+  segments.find((s) => s.text.includes(needle));
+
+describe("computeSuggestionDiff", () => {
+  it("highlights only the single changed letter in a one-letter edit", () => {
+    const { old, new: neu } = computeSuggestionDiff("заведем", "заведём");
+
+    // Lossless.
+    expect(join(old)).toBe("заведем");
+    expect(join(neu)).toBe("заведём");
+
+    // Old side: exactly the `е` is changed, the rest is common.
+    expect(changed(old)).toEqual(["е"]);
+    expect(old).toEqual([
+      { text: "завед", changed: false },
+      { text: "е", changed: true },
+      { text: "м", changed: false },
+    ]);
+
+    // New side: exactly the `ё` is changed.
+    expect(changed(neu)).toEqual(["ё"]);
+    expect(neu).toEqual([
+      { text: "завед", changed: false },
+      { text: "ё", changed: true },
+      { text: "м", changed: false },
+    ]);
+  });
+
+  it("marks the differing words changed but keeps the shared word common", () => {
+    const { old, new: neu } = computeSuggestionDiff(
+      "привет мир",
+      "здравствуй мир",
+    );
+
+    // Lossless.
+    expect(join(old)).toBe("привет мир");
+    expect(join(neu)).toBe("здравствуй мир");
+
+    // The shared trailing word stays common on both sides (no per-letter noise
+    // leaking across the differing words into `мир`).
+    expect(segmentWith(old, "мир")?.changed).toBe(false);
+    expect(segmentWith(neu, "мир")?.changed).toBe(false);
+
+    // The differing words are emphasised somewhere on each side.
+    expect(changed(old).length).toBeGreaterThan(0);
+    expect(changed(neu).length).toBeGreaterThan(0);
+    expect(changed(old).join("")).toContain("п"); // from `привет`
+    expect(changed(neu).join("")).toContain("зд"); // from `здравствуй`
+
+    // No changed fragment on either side touches the word `мир`.
+    expect(changed(old).some((t) => t.includes("мир"))).toBe(false);
+    expect(changed(neu).some((t) => t.includes("мир"))).toBe(false);
+  });
+
+  it("marks a whole inserted word changed and leaves the old line common", () => {
+    const { old, new: neu } = computeSuggestionDiff("a c", "a b c");
+
+    expect(join(old)).toBe("a c");
+    expect(join(neu)).toBe("a b c");
+
+    // Old line has no changed fragment (nothing was removed).
+    expect(changed(old)).toEqual([]);
+    // The inserted word is the only changed fragment on the new side.
+    expect(neu).toContainEqual({ text: "b ", changed: true });
+    expect(changed(neu)).toEqual(["b "]);
+  });
+
+  it("marks a whole deleted word changed and leaves the new line common", () => {
+    const { old, new: neu } = computeSuggestionDiff("a b c", "a c");
+
+    expect(join(old)).toBe("a b c");
+    expect(join(neu)).toBe("a c");
+
+    // The deleted word is the only changed fragment on the old side.
+    expect(old).toContainEqual({ text: "b ", changed: true });
+    expect(changed(old)).toEqual(["b "]);
+    // New line has no changed fragment (nothing was added).
+    expect(changed(neu)).toEqual([]);
+  });
+
+  it("marks everything common for identical strings", () => {
+    const { old, new: neu } = computeSuggestionDiff("hello", "hello");
+
+    expect(old).toEqual([{ text: "hello", changed: false }]);
+    expect(neu).toEqual([{ text: "hello", changed: false }]);
+    expect(changed(old)).toEqual([]);
+    expect(changed(neu)).toEqual([]);
+  });
+});
@@ -1,3 +1,4 @@
+import { diffWordsWithSpace, diffChars } from "diff";
 import { IComment } from "@/features/comment/types/comment.types";

 // Whether the suggested-edit (#315) "Apply" button should be shown for a
@@ -12,3 +13,127 @@ export function canShowApply(comment: IComment, canEdit?: boolean): boolean {
      !comment.parentCommentId,
  );
 }
+
+// One contiguous run of text within a suggestion's "before" or "after" line.
+// `changed` marks the fragment that actually differs from the other side, so
+// the UI can emphasise only the intraline delta (git/GitHub-style) instead of
+// the whole line.
+export interface Segment {
+  text: string;
+  changed: boolean;
+}
+
+// A pure "before -> after" intraline diff (#331): the old line split into
+// common vs. removed-and-changed fragments, and the new line split into common
+// vs. added-and-changed fragments. Concatenating each side's `text` reproduces
+// the original strings.
+export interface SuggestionDiff {
+  old: Segment[];
+  new: Segment[];
+}
+
+// Push a segment, coalescing runs of the same `changed` flag on the same side
+// so the render emits as few spans as possible and tests stay predictable.
+function pushSegment(segments: Segment[], text: string, changed: boolean): void {
+  if (text === "") return;
+  const last = segments[segments.length - 1];
+  if (last && last.changed === changed) {
+    last.text += text;
+  } else {
+    segments.push({ text, changed });
+  }
+}
+
+// Compute an intraline diff between the old `selection` and the new
+// `suggestedText` of a suggestion. PURE — no React, no DOM, no I/O.
+//
+// Hybrid word + char algorithm (per #331):
+//   1. `diffWordsWithSpace` yields word-granular parts [{value, added, removed}].
+//   2. An ADJACENT removed+added pair (a word replacement) is refined with
+//      `diffChars`: shared characters stay common, differing characters are
+//      marked `changed` on their respective side. This is what keeps a
+//      one-letter edit (заведем -> заведём) from highlighting the whole word.
+//   3. A lone `added` (insertion) or lone `removed` (deletion) marks the whole
+//      fragment `changed`.
+//   4. An unchanged part is `common` on both sides.
+//
+// Rejected alternatives: pure `diffChars` is noisy on word swaps; pure
+// `diffWordsWithSpace` highlights the whole word rather than the changed letter.
+export function computeSuggestionDiff(
+  oldStr: string,
+  newStr: string,
+): SuggestionDiff {
+  const oldSegments: Segment[] = [];
+  const newSegments: Segment[] = [];
+
+  const parts = diffWordsWithSpace(oldStr, newStr);
+
+  for (let i = 0; i < parts.length; i++) {
+    const part = parts[i];
+    const next = parts[i + 1];
+
+    // A word replacement: a removed part immediately followed by an added part
+    // (or the reverse). Refine it character-by-character so only the differing
+    // letters are highlighted while shared letters stay common.
+    const isReplacementPair =
+      next &&
+      ((part.removed && next.added) || (part.added && next.removed));
+
+    if (isReplacementPair) {
+      const removedPart = part.removed ? part : next;
+      const addedPart = part.added ? part : next;
+
+      const charParts = diffChars(removedPart.value, addedPart.value);
+      for (const cp of charParts) {
+        if (cp.added) {
+          pushSegment(newSegments, cp.value, true);
+        } else if (cp.removed) {
+          pushSegment(oldSegments, cp.value, true);
+        } else {
+          // Shared character: common on both sides.
+          pushSegment(oldSegments, cp.value, false);
+          pushSegment(newSegments, cp.value, false);
+        }
+      }
+
+      i++; // consume the paired part as well
+      continue;
+    }
+
+    if (part.added) {
+      // Lone insertion: only present in the new line, wholly changed.
+      pushSegment(newSegments, part.value, true);
+    } else if (part.removed) {
+      // Lone deletion: only present in the old line, wholly changed.
+      pushSegment(oldSegments, part.value, true);
+    } else {
+      // Unchanged: common on both sides.
+      pushSegment(oldSegments, part.value, false);
+      pushSegment(newSegments, part.value, false);
+    }
+  }
+
+  return { old: oldSegments, new: newSegments };
+}
+
+// Whether the suggested-edit (#329) "Не применять" (Dismiss) button should be
+// shown. Dismiss does NOT change the page text (so it needs only canComment, not
+// canEdit), BUT a childless dismiss IRREVERSIBLY hard-deletes the comment, so the
+// server gates it on comment-owner-OR-space-admin (#338 F5). The button must
+// mirror that authz or a non-owner non-admin sees a live Dismiss that always
+// 403s → red error. Hence isOwnerOrAdmin is required IN ADDITION to canComment.
+// Same not-applied/not-resolved/top-level conditions as Apply.
+export function canShowDismiss(
+  comment: IComment,
+  canComment?: boolean,
+  isOwnerOrAdmin?: boolean,
+): boolean {
+  return Boolean(
+    canComment &&
+      isOwnerOrAdmin &&
+      comment.suggestedText &&
+      !comment.suggestionAppliedAt &&
+      !comment.resolvedAt &&
+      !comment.parentCommentId,
+  );
+}
@@ -11,9 +11,19 @@ import {
 import { extractPageSlugId } from "@/lib";
 import classes from "./mention.module.css";

-export default function MentionView(props: NodeViewProps) {
-  const { node } = props;
-  const { label, entityType, entityId, slugId, anchorId } = node.attrs;
+interface MentionAttrs {
+  label?: string;
+  entityType?: string;
+  entityId?: string;
+  slugId?: string;
+  anchorId?: string;
+}
+
+// Presentational mention renderer (no NodeViewWrapper). Shared by the editor
+// NodeView (MentionView) and the static comment renderer (CommentContentView)
+// so mention click/nav/icon behavior stays identical outside of an editor.
+export function MentionContent({ attrs }: { attrs: MentionAttrs }) {
+  const { label, entityType, slugId, anchorId } = attrs;
  const isPageMention = entityType === "page";
  const { spaceSlug, pageSlug } = useParams();
  const { shareId } = useParams();
@@ -56,7 +66,7 @@ export default function MentionView(props: NodeViewProps) {
  });

  return (
-    <NodeViewWrapper style={{ display: "inline" }} data-drag-handle>
+    <>
      {entityType === "user" && (
        <Text className={classes.userMention} component="span">
          @{label}
@@ -139,6 +149,14 @@ export default function MentionView(props: NodeViewProps) {
          </span>
        </Anchor>
      )}
+    </>
+  );
+}
+
+export default function MentionView(props: NodeViewProps) {
+  return (
+    <NodeViewWrapper style={{ display: "inline" }} data-drag-handle>
+      <MentionContent attrs={props.node.attrs} />
    </NodeViewWrapper>
  );
 }
@@ -93,6 +93,11 @@ import {
  isBodyEditable,
  isCollabSynced,
 } from "@/features/editor/editor-sync-state";
+import {
+  isVitalsActive,
+  measurePageOpen,
+  reportEditorTx,
+} from "@/lib/telemetry/vitals";

 interface PageEditorProps {
  pageId: string;
@@ -351,6 +356,40 @@ export default function PageEditor({
          editor.storage.pageId = pageId;
          handleScrollTo(editor);
          editorRef.current = editor;
+
+          // #355 — perf instrumentation. Skip ALL of it when telemetry is
+          // disabled (F1 flag off) or this session isn't sampled: no page-open
+          // measure, and crucially NO dispatch wrapping, so a non-collecting
+          // session pays zero per-transaction cost.
+          if (isVitalsActive()) {
+            // page_open_ms: this is the first editor-content render, so measure
+            // against any page-open mark set on the tree-row/link click.
+            measurePageOpen();
+
+            // editor_tx_ms: time the SYNCHRONOUS part of applying each
+            // transaction (state.apply + updateState) by wrapping the view's
+            // dispatch. Only slow syncs (>8ms) are reported (see reportEditorTx),
+            // so the common path adds just one performance.now() pair. Passive:
+            // the original dispatch still runs unchanged.
+            try {
+              const view = editor.view as unknown as {
+                dispatch: (tr: unknown) => void;
+              };
+              const originalDispatch = view.dispatch.bind(view);
+              view.dispatch = (tr: unknown) => {
+                const started = performance.now();
+                originalDispatch(tr);
+                const elapsed = performance.now() - started;
+                try {
+                  reportEditorTx(elapsed, editor.state.doc.content.size);
+                } catch {
+                  // never let telemetry break editing
+                }
+              };
+            } catch {
+              // if the view shape changes, skip editor_tx instrumentation
+            }
+          }
        }
      },
      onUpdate({ editor }) {
@@ -1,5 +1,5 @@
 import { useCallback } from "react";
-import { useAtom, useStore } from "jotai";
+import { useAtom, useSetAtom, useStore } from "jotai";
 import { notifications } from "@mantine/notifications";
 import { useTranslation } from "react-i18next";
 import { useNavigate, useParams } from "react-router-dom";
@@ -20,6 +20,7 @@ import {
 } from "@/features/page/queries/page-query.ts";
 import { buildPageUrl } from "@/features/page/page.utils.ts";
 import { getSpaceUrl } from "@/lib/config.ts";
+import { mobileSidebarAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom.ts";

 export type UseTreeMutation = {
  handleMove: (sourceId: string, op: DropOp) => Promise<void>;
@@ -43,6 +44,7 @@ export function useTreeMutation(spaceId: string): UseTreeMutation {
  const removePageMutation = useRemovePageMutation();
  const movePageMutation = useMovePageMutation();
  const navigate = useNavigate();
+  const setMobileSidebar = useSetAtom(mobileSidebarAtom);
  const { spaceSlug, pageSlug } = useParams();

  const handleMove = useCallback(
@@ -201,8 +203,23 @@ export function useTreeMutation(spaceId: string): UseTreeMutation {
        createdPage.title,
      );
      navigate(pageUrl);
+      // On mobile the create action is triggered from inside the off-canvas
+      // sidebar drawer (space sidebar "+", tree-row "add subpage"). Navigating
+      // alone leaves that drawer open on top of the freshly created page, so the
+      // editor stays hidden behind the tree. Close it here so the new page opens
+      // in the editor — mirrors the row-click drawer-close in space-tree-row.
+      // No-op on desktop, where the mobile drawer atom is already false.
+      setMobileSidebar(false);
    },
-    [spaceId, createPageMutation, setData, store, navigate, spaceSlug],
+    [
+      spaceId,
+      createPageMutation,
+      setData,
+      store,
+      navigate,
+      spaceSlug,
+      setMobileSidebar,
+    ],
  );

  const handleRename = useCallback(
@@ -394,6 +394,10 @@ export default function AiProviderSettings() {
    useState<boolean>(
      workspace?.settings?.ai?.publicShareAssistant ?? false,
    );
+  // #184: detached/autonomous agent runs (settings.ai.autonomousRuns).
+  const [autonomousRunsEnabled, setAutonomousRunsEnabled] = useState<boolean>(
+    workspace?.settings?.ai?.autonomousRuns ?? false,
+  );
  const [chatToggleLoading, setChatToggleLoading] = useState(false);
  const [searchToggleLoading, setSearchToggleLoading] = useState(false);
  const [dictationToggleLoading, setDictationToggleLoading] = useState(false);
@@ -403,6 +407,8 @@ export default function AiProviderSettings() {
    publicShareAssistantToggleLoading,
    setPublicShareAssistantToggleLoading,
  ] = useState(false);
+  const [autonomousRunsToggleLoading, setAutonomousRunsToggleLoading] =
+    useState(false);

  // Whether a key is currently stored server-side (drives the placeholder).
  const [hasApiKey, setHasApiKey] = useState(false);
@@ -730,6 +736,37 @@ export default function AiProviderSettings() {
    }
  }

+  // Optimistic toggle for detached/autonomous agent runs
+  // (settings.ai.autonomousRuns). When on, a chat turn becomes a server-side run
+  // that survives a browser disconnect and can be reconnected to / live-followed;
+  // only an explicit Stop ends it. Off by default; single-instance-only in phase 1.
+  async function handleToggleAutonomousRuns(value: boolean) {
+    setAutonomousRunsToggleLoading(true);
+    const previous = autonomousRunsEnabled;
+    setAutonomousRunsEnabled(value);
+    try {
+      const updated = await updateWorkspace({ autonomousRuns: value });
+      setWorkspace({
+        ...updated,
+        settings: {
+          ...updated.settings,
+          ai: { ...updated.settings?.ai, autonomousRuns: value },
+        },
+      });
+      notifications.show({ message: t("Updated successfully") });
+    } catch (err) {
+      setAutonomousRunsEnabled(previous);
+      const message = (err as { response?: { data?: { message?: string } } })
+        ?.response?.data?.message;
+      notifications.show({
+        message: message ?? t("Failed to update data"),
+        color: "red",
+      });
+    } finally {
+      setAutonomousRunsToggleLoading(false);
+    }
+  }
+
  // Admins only — match the previous behavior.
  if (!isAdmin) {
    return (
@@ -960,6 +997,31 @@ export default function AiProviderSettings() {
          {...form.getInputProps("publicShareAssistantRoleId")}
        />

+        {/* Detached/autonomous agent runs: a chat turn becomes a server-side run
+            that survives a browser disconnect; only an explicit Stop ends it.
+            Single-instance-only in phase 1. */}
+        <Group justify="space-between" align="center" wrap="nowrap" mt="md">
+          <Stack gap={0}>
+            <Text fw={600} size="sm">
+              {t("Autonomous agent runs")}
+            </Text>
+            <Text size="xs" c="dimmed">
+              {t(
+                "Keep an agent turn running server-side even if the browser disconnects; reconnect and follow it on reopen. Single-instance deployments only.",
+              )}
+            </Text>
+          </Stack>
+          <Switch
+            label={t("Enabled")}
+            labelPosition="left"
+            checked={autonomousRunsEnabled}
+            disabled={autonomousRunsToggleLoading}
+            onChange={(e) =>
+              handleToggleAutonomousRuns(e.currentTarget.checked)
+            }
+          />
+        </Group>
+
        <Group mt="md" align="center">
          <Button
            variant="default"
@@ -26,6 +26,9 @@ export interface IWorkspace {
  aiDictation?: boolean;
  aiDictationStreaming?: boolean;
  aiPublicShareAssistant?: boolean;
+  // Write-only field for updateWorkspace({ autonomousRuns }). Read state lives at
+  // settings.ai.autonomousRuns.
+  autonomousRuns?: boolean;
  trashRetentionDays?: number;
  // Default lifetime (HOURS) for new temporary notes; frozen per-note at creation.
  temporaryNoteHours?: number;
@@ -65,6 +68,9 @@ export interface IWorkspaceAiSettings {
  dictation?: boolean;
  dictationStreaming?: boolean;
  publicShareAssistant?: boolean;
+  // #184: detached agent runs (a run survives a browser disconnect and can be
+  // reconnected to / live-followed on reopen). Gates the run-reconnect polling.
+  autonomousRuns?: boolean;
 }

 export interface IWorkspaceSharingSettings {
@@ -0,0 +1,107 @@
+import { describe, it, expect } from "vitest";
+import {
+  PALETTE,
+  avatarStyle,
+  avatarBackgroundCss,
+  normalizeName,
+  minPairwiseDistance,
+  relativeLuminance,
+  contrastRatio,
+  oklchToSrgb,
+  isInGamut,
+} from "./avatar-palette";
+
+/** Parse "#rrggbb" into sRGB components on the 0..1 scale relativeLuminance expects. */
+function hexToRgb01(hex: string): [number, number, number] {
+  return [
+    parseInt(hex.slice(1, 3), 16) / 255,
+    parseInt(hex.slice(3, 5), 16) / 255,
+    parseInt(hex.slice(5, 7), 16) / 255,
+  ];
+}
+
+describe("avatar-palette validation", () => {
+  it("palette colors stay distinguishable", () => {
+    // 0.06 in OKLab is ~4-5 JNDs — safely distinct at avatar size. If a future
+    // RINGS tweak drops this, "almost identical" colors would reappear.
+    expect(minPairwiseDistance().distance).toBeGreaterThanOrEqual(0.06);
+    expect(PALETTE.length).toBe(20);
+  });
+
+  it("every palette entry is WCAG-readable and in sRGB gamut", () => {
+    // white text = luminance 1, black text = luminance 0 (per buildPalette).
+    const textLum = { white: 1, black: 0 } as const;
+    for (const entry of PALETTE) {
+      expect(entry.hex).toMatch(/^#[0-9a-f]{6}$/);
+
+      // (a) The chosen text color really clears the code's 3:1 threshold on the
+      // actual background hex — recomputed independently from the hex, not from
+      // the build-time luminance. A slot that picked the wrong text (or a color
+      // too dim for either text) would fail here.
+      const hexLum = relativeLuminance(hexToRgb01(entry.hex));
+      const chosen = contrastRatio(textLum[entry.text], hexLum);
+      expect(chosen).toBeGreaterThanOrEqual(3);
+      // buildPalette prefers white and only falls back to black when white
+      // fails 3:1. Mirror that decision: black is used *only* when white would
+      // not clear the threshold — so a mis-assigned "black" on a dark color
+      // (where white was fine) fails here.
+      if (entry.text === "black") {
+        expect(contrastRatio(textLum.white, hexLum)).toBeLessThan(3);
+      }
+
+      // (b) The entry's OKLCH is inside the sRGB gamut after chroma clamping;
+      // an out-of-gamut slot (e.g. un-clamped chroma) would produce components
+      // outside [0,1] and fail here.
+      expect(isInGamut(oklchToSrgb(entry.L, entry.C, entry.h))).toBe(true);
+    }
+  });
+});
+
+describe("avatarStyle", () => {
+  it("name-to-avatar mapping is frozen (golden values)", () => {
+    // Golden slice: if this breaks, all existing avatars change — make sure
+    // that is intentional (a config change in avatar-palette.ts).
+    const s = avatarStyle("Backend Developer");
+    expect([s.bg, s.bg2, s.angleDeg]).toEqual(["#a55795", "#90355e", 150]);
+    expect(s.text).toBe("white");
+  });
+
+  it("is deterministic and normalizes the name", () => {
+    expect(avatarStyle("Researcher")).toEqual(avatarStyle("Researcher"));
+    // Casing, surrounding and repeated whitespace must not change the avatar.
+    expect(avatarStyle("  RESEARCHER ")).toEqual(avatarStyle("researcher"));
+    expect(avatarStyle("Backend   Developer")).toEqual(
+      avatarStyle("backend developer"),
+    );
+    expect(normalizeName("  PM ")).toBe("pm");
+  });
+
+  it("returns a valid base color, angle and matching text", () => {
+    const s = avatarStyle("Нарратор");
+    const idx = PALETTE.findIndex((e) => e.hex === s.bg);
+    expect(idx).toBe(s.paletteIndex);
+    expect(idx).toBeGreaterThanOrEqual(0); // bg is a palette entry
+    // Text color comes from the chosen palette entry.
+    expect(s.text).toBe(PALETTE[idx].text);
+    // Split angle is one of the SPLIT_ANGLE_STEPS (24) directions → multiples of 15.
+    expect(s.angleDeg % 15).toBe(0);
+    expect(s.angleDeg).toBeGreaterThanOrEqual(0);
+    expect(s.angleDeg).toBeLessThan(360);
+  });
+
+  it("distinguishes the agents that used to collide as violet", () => {
+    // "Структурный редактор" and "Фактчекер" looked identically violet before.
+    expect(avatarStyle("Структурный редактор")).not.toEqual(
+      avatarStyle("Фактчекер"),
+    );
+  });
+});
+
+describe("avatarBackgroundCss", () => {
+  it("renders a two-stop gradient with a soft boundary", () => {
+    const s = avatarStyle("Backend Developer");
+    expect(avatarBackgroundCss(s)).toBe(
+      "linear-gradient(150deg, #a55795 42%, #90355e 58%)",
+    );
+  });
+});
@@ -0,0 +1,267 @@
+/**
+ * Deterministic avatar backgrounds for agent roles.
+ *
+ * The palette is generated from scratch at module load in OKLCH (a perceptually
+ * uniform color space), so every value below is tunable: change the ring
+ * configuration or the partner shifts and the whole palette regenerates.
+ *
+ * Pipeline: name -> normalize -> cyrb53 hash -> split into independent fields:
+ *   - base color index (one of the validated palette colors)
+ *   - partner hue shift: analogous 20..45deg (either side), complementary 180deg,
+ *     or triadic +/-120deg — classic color-wheel schemes; partner is also darker
+ *   - split angle (SPLIT_ANGLE_STEPS directions, soft boundary)
+ * The same name always yields the same avatar, on any platform, forever.
+ */
+
+// ------------------------- Tunable configuration -------------------------
+
+export interface RingConfig {
+  /** OKLCH lightness, 0..1 */
+  L: number;
+  /** OKLCH chroma target; clamped down per-hue to fit the sRGB gamut */
+  C: number;
+  /** Hue of the first color in the ring, degrees */
+  hueStart: number;
+  /** Number of evenly spaced hues in the ring */
+  count: number;
+}
+
+/**
+ * Two lightness rings. 12 light + 8 dark = 20 base colors with a validated
+ * min pairwise deltaE-OK of ~0.066 (clearly distinguishable at avatar size).
+ * Don't add more hues per ring without re-checking minPairwiseDistance():
+ * beyond ~20-24 colors humans stop telling them apart reliably.
+ */
+const RINGS: readonly RingConfig[] = [
+  { L: 0.70, C: 0.14, hueStart: 15, count: 12 }, // light ring
+  { L: 0.57, C: 0.13, hueStart: 20, count: 8 },  // darker ring
+];
+
+/** Partner color: lightness shifted by this much (negative = darker) */
+const PARTNER_L_SHIFT = -0.10;
+/** Analogous scheme: hue shift magnitude range, degrees (inclusive, 5-deg steps) */
+const ANALOG_MIN_SHIFT = 20;
+const ANALOG_SHIFT_STEP = 5;
+const ANALOG_SHIFT_STEPS = 6; // 20, 25, 30, 35, 40, 45
+/** Complementary scheme: fixed hue shift, degrees */
+const COMPLEMENTARY_SHIFT = 180;
+/** Triadic scheme: fixed hue shift magnitude, degrees (either side) */
+const TRIADIC_SHIFT = 120;
+/** Number of split directions (24 -> 15deg per step) */
+const SPLIT_ANGLE_STEPS = 24;
+/** Position of the color boundary, percent of the gradient axis */
+const SPLIT_PERCENT = 50;
+/** Width of the soft transition zone around the boundary, percent (0 = hard edge) */
+const SPLIT_SOFTNESS = 16;
+
+// ------------------------- OKLCH -> sRGB math -------------------------
+// Matrices from Bjorn Ottosson's OKLab reference implementation.
+
+function oklabToLinearSrgb(L: number, a: number, b: number): [number, number, number] {
+  const l_ = L + 0.3963377774 * a + 0.2158037573 * b;
+  const m_ = L - 0.1055613458 * a - 0.0638541728 * b;
+  const s_ = L - 0.0894841775 * a - 1.2914855480 * b;
+  const l = l_ ** 3, m = m_ ** 3, s = s_ ** 3;
+  return [
+    +4.0767416621 * l - 3.3077115913 * m + 0.2309699292 * s,
+    -1.2684380046 * l + 2.6097574011 * m - 0.3413193965 * s,
+    -0.0041960863 * l - 0.7034186147 * m + 1.7076147010 * s,
+  ];
+}
+
+function gammaEncode(c: number): number {
+  return c <= 0.0031308 ? 12.92 * c : 1.055 * c ** (1 / 2.4) - 0.055;
+}
+
+export function oklchToSrgb(L: number, C: number, hDeg: number): [number, number, number] {
+  const h = (hDeg * Math.PI) / 180;
+  const [r, g, b] = oklabToLinearSrgb(L, C * Math.cos(h), C * Math.sin(h));
+  return [gammaEncode(r), gammaEncode(g), gammaEncode(b)];
+}
+
+export function isInGamut(rgb: readonly number[]): boolean {
+  return rgb.every((c) => c >= -1e-6 && c <= 1 + 1e-6);
+}
+
+/** Binary-search the max chroma <= C that fits into the sRGB gamut. */
+function clampChroma(L: number, C: number, hDeg: number): number {
+  if (isInGamut(oklchToSrgb(L, C, hDeg))) return C;
+  let lo = 0, hi = C;
+  for (let i = 0; i < 40; i++) {
+    const mid = (lo + hi) / 2;
+    if (isInGamut(oklchToSrgb(L, mid, hDeg))) lo = mid;
+    else hi = mid;
+  }
+  return lo;
+}
+
+function toHex(rgb: readonly number[]): string {
+  return (
+    "#" +
+    rgb
+      .map((c) => Math.round(Math.min(1, Math.max(0, c)) * 255).toString(16).padStart(2, "0"))
+      .join("")
+  );
+}
+
+/** WCAG relative luminance of an sRGB color (components 0..1). */
+export function relativeLuminance(rgb: readonly number[]): number {
+  const lin = rgb.map((c) => (c <= 0.04045 ? c / 12.92 : ((c + 0.055) / 1.055) ** 2.4));
+  return 0.2126 * lin[0] + 0.7152 * lin[1] + 0.0722 * lin[2];
+}
+
+export function contrastRatio(l1: number, l2: number): number {
+  return (Math.max(l1, l2) + 0.05) / (Math.min(l1, l2) + 0.05);
+}
+
+// ------------------------- Palette generation -------------------------
+
+export interface PaletteEntry {
+  /** Base background color */
+  hex: string;
+  /** OKLCH coordinates of the base color (used to derive partner colors) */
+  L: number;
+  C: number;
+  h: number;
+  /** Text/icon color with the best WCAG contrast on the base color */
+  text: "white" | "black";
+  /** OKLab coordinates of the base color (kept for validation) */
+  lab: readonly [number, number, number];
+}
+
+function buildPalette(): PaletteEntry[] {
+  const entries: PaletteEntry[] = [];
+  for (const ring of RINGS) {
+    const step = 360 / ring.count;
+    for (let i = 0; i < ring.count; i++) {
+      const h = (ring.hueStart + i * step) % 360;
+      const C = clampChroma(ring.L, ring.C, h);
+      const rgb = oklchToSrgb(ring.L, C, h);
+      const lum = relativeLuminance(rgb);
+      entries.push({
+        hex: toHex(rgb),
+        L: ring.L,
+        C,
+        h,
+        // White text needs >= 3:1 contrast; otherwise fall back to black.
+        text: contrastRatio(lum, 1) >= 3 ? "white" : "black",
+        lab: [
+          ring.L,
+          C * Math.cos((h * Math.PI) / 180),
+          C * Math.sin((h * Math.PI) / 180),
+        ],
+      });
+    }
+  }
+  return entries;
+}
+
+/** Partner color for the split: base hue shifted by shiftDeg, darker by PARTNER_L_SHIFT. */
+function partnerHex(entry: PaletteEntry, shiftDeg: number): string {
+  const h2 = (entry.h + shiftDeg + 360) % 360;
+  const L2 = entry.L + PARTNER_L_SHIFT;
+  return toHex(oklchToSrgb(L2, clampChroma(L2, entry.C, h2), h2));
+}
+
+/** Generated once at module load; regenerates on every build from the config above. */
+export const PALETTE: readonly PaletteEntry[] = buildPalette();
+
+// ------------------------- Name -> avatar style -------------------------
+
+/** Normalize so that "PM ", "pm" and "Pm" map to the same avatar. */
+export function normalizeName(name: string): string {
+  return name.normalize("NFC").trim().toLowerCase().replace(/\s+/g, " ");
+}
+
+/**
+ * cyrb53: deterministic 53-bit string hash with good avalanche.
+ * Pure JS, cross-platform — never use language built-in hashing here.
+ */
+function cyrb53(str: string, seed = 0): number {
+  let h1 = 0xdeadbeef ^ seed;
+  let h2 = 0x41c6ce57 ^ seed;
+  for (let i = 0; i < str.length; i++) {
+    const ch = str.charCodeAt(i);
+    h1 = Math.imul(h1 ^ ch, 2654435761);
+    h2 = Math.imul(h2 ^ ch, 1597334677);
+  }
+  h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
+  h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
+  return 4294967296 * (2097151 & h2) + (h1 >>> 0);
+}
+
+export interface AvatarStyle {
+  /** Index of the base color in PALETTE */
+  paletteIndex: number;
+  /** Base color hex */
+  bg: string;
+  /** Second color hex (split partner) */
+  bg2: string;
+  /** Signed hue shift of the partner, degrees (e.g. -35, +45, 180, -120) */
+  hueShift: number;
+  /** Direction of the split, degrees */
+  angleDeg: number;
+  /** Text/icon color for the base color */
+  text: "white" | "black";
+}
+
+/** Pure function: the same (normalized) name always returns the same style. */
+export function avatarStyle(agentName: string): AvatarStyle {
+  const h = cyrb53(normalizeName(agentName));
+  // Slice the hash into independent fields, like digits of a number:
+  const paletteIndex = h % PALETTE.length;
+  let rest = Math.floor(h / PALETTE.length);
+  const angleDeg = (rest % SPLIT_ANGLE_STEPS) * (360 / SPLIT_ANGLE_STEPS);
+  rest = Math.floor(rest / SPLIT_ANGLE_STEPS);
+  // Scheme: 0,1 -> analogous (minus/plus); 2 -> complementary; 3 -> triadic
+  const scheme = rest % 4;
+  rest = Math.floor(rest / 4);
+  let hueShift: number;
+  if (scheme === 2) {
+    hueShift = COMPLEMENTARY_SHIFT;
+  } else if (scheme === 3) {
+    hueShift = rest % 2 ? TRIADIC_SHIFT : -TRIADIC_SHIFT;
+  } else {
+    const magnitude = ANALOG_MIN_SHIFT + (rest % ANALOG_SHIFT_STEPS) * ANALOG_SHIFT_STEP;
+    hueShift = scheme === 0 ? -magnitude : magnitude;
+  }
+  const entry = PALETTE[paletteIndex];
+  return {
+    paletteIndex,
+    bg: entry.hex,
+    bg2: partnerHex(entry, hueShift),
+    hueShift,
+    angleDeg,
+    text: entry.text,
+  };
+}
+
+/** CSS background value: two colors with a slightly blurred boundary. */
+export function avatarBackgroundCss(style: AvatarStyle): string {
+  const from = SPLIT_PERCENT - SPLIT_SOFTNESS / 2;
+  const to = SPLIT_PERCENT + SPLIT_SOFTNESS / 2;
+  return `linear-gradient(${style.angleDeg}deg, ${style.bg} ${from}%, ${style.bg2} ${to}%)`;
+}
+
+// ------------------------- Validation -------------------------
+
+/**
+ * Min pairwise deltaE-OK (euclidean distance in OKLab) between base colors.
+ * Re-check after tweaking RINGS: keep it >= ~0.06 so no two palette colors
+ * look alike. Intended for a unit test or a dev-time assertion.
+ */
+export function minPairwiseDistance(): { distance: number; pair: [string, string] } {
+  let min = Infinity;
+  let pair: [string, string] = ["", ""];
+  for (let i = 0; i < PALETTE.length; i++) {
+    for (let j = i + 1; j < PALETTE.length; j++) {
+      const a = PALETTE[i].lab, b = PALETTE[j].lab;
+      const d = Math.hypot(a[0] - b[0], a[1] - b[1], a[2] - b[2]);
+      if (d < min) {
+        min = d;
+        pair = [PALETTE[i].hex, PALETTE[j].hex];
+      }
+    }
+  }
+  return { distance: min, pair };
+}
@@ -47,6 +47,13 @@ export function isCompactPageTreeEnabled(): boolean {
  return castToBoolean(getConfigValue("COMPACT_PAGE_TREE", "true"));
 }

+// #355 — operator toggle for client perf-telemetry. DEFAULT OFF: the server
+// mirrors CLIENT_TELEMETRY_ENABLED into window.CONFIG; when off the client
+// installs no observers and sends nothing (the sink endpoint doesn't exist).
+export function isClientTelemetryEnabled(): boolean {
+  return castToBoolean(getConfigValue("CLIENT_TELEMETRY_ENABLED", "false"));
+}
+
 export function getAvatarUrl(
  avatarUrl: string,
  type: AvatarIconType = AvatarIconType.AVATAR,
@@ -0,0 +1,35 @@
+import { describe, it, expect } from "vitest";
+import { templateRoute } from "./route-template";
+
+describe("templateRoute", () => {
+  it("templates a space page path (never leaks slugs)", () => {
+    const t = templateRoute("/s/engineering/p/design-doc-abc123");
+    expect(t).toBe("/s/:space/p/:slug");
+    expect(t).not.toContain("engineering");
+    expect(t).not.toContain("design-doc");
+  });
+
+  it("templates share, redirect and space paths", () => {
+    expect(templateRoute("/share/abc/p/xyz")).toBe("/share/:shareId/p/:slug");
+    expect(templateRoute("/share/p/xyz")).toBe("/share/p/:slug");
+    expect(templateRoute("/p/some-slug")).toBe("/p/:slug");
+    expect(templateRoute("/s/team")).toBe("/s/:space");
+    expect(templateRoute("/s/team/trash")).toBe("/s/:space/trash");
+    expect(templateRoute("/labels/urgent")).toBe("/labels/:label");
+  });
+
+  it("keeps known static routes verbatim", () => {
+    expect(templateRoute("/home")).toBe("/home");
+    expect(templateRoute("/settings/members")).toBe("/settings/members");
+    expect(templateRoute("/")).toBe("/");
+  });
+
+  it("normalises a trailing slash", () => {
+    expect(templateRoute("/s/team/p/slug/")).toBe("/s/:space/p/:slug");
+  });
+
+  it("collapses unknown paths to 'other' (bounded cardinality)", () => {
+    expect(templateRoute("/weird/unknown/thing")).toBe("other");
+    expect(templateRoute("/s/team/p/slug/extra/segments")).toBe("other");
+  });
+});
@@ -0,0 +1,70 @@
+/**
+ * Map a raw pathname to a BOUNDED route TEMPLATE (#355).
+ *
+ * Perf metrics must be labelled by route template only — never a raw path with
+ * slugs/ids — so the server-side `route` column and any downstream aggregation
+ * stay low-cardinality and carry NO page slugs/titles (privacy). Anything that
+ * does not match a known pattern collapses to `other`.
+ *
+ * The template vocabulary mirrors the issue's example (`/s/:space/p/:slug`).
+ */
+const ROUTE_PATTERNS: { re: RegExp; template: string }[] = [
+  // Share pages (public).
+  { re: /^\/share\/[^/]+\/p\/[^/]+$/, template: '/share/:shareId/p/:slug' },
+  { re: /^\/share\/p\/[^/]+$/, template: '/share/p/:slug' },
+  { re: /^\/share\/[^/]+$/, template: '/share/:shareId' },
+  // Page redirect.
+  { re: /^\/p\/[^/]+$/, template: '/p/:slug' },
+  // Space + page.
+  { re: /^\/s\/[^/]+\/p\/[^/]+$/, template: '/s/:space/p/:slug' },
+  { re: /^\/s\/[^/]+\/trash$/, template: '/s/:space/trash' },
+  { re: /^\/s\/[^/]+$/, template: '/s/:space' },
+  // Misc dynamic.
+  { re: /^\/labels\/[^/]+$/, template: '/labels/:label' },
+  { re: /^\/invites\/[^/]+$/, template: '/invites/:invitationId' },
+  { re: /^\/settings\/groups\/[^/]+$/, template: '/settings/groups/:groupId' },
+];
+
+// Static routes we accept verbatim (finite set).
+const STATIC_ROUTES = new Set<string>([
+  '/home',
+  '/spaces',
+  '/favorites',
+  '/login',
+  '/forgot-password',
+  '/password-reset',
+  '/setup/register',
+  '/settings/account/profile',
+  '/settings/account/preferences',
+  '/settings/workspace',
+  '/settings/ai',
+  '/settings/members',
+  '/settings/groups',
+  '/settings/spaces',
+  '/settings/sharing',
+]);
+
+export function templateRoute(pathname: string): string {
+  // Normalise a trailing slash (except root).
+  const path =
+    pathname.length > 1 && pathname.endsWith('/')
+      ? pathname.slice(0, -1)
+      : pathname;
+
+  if (path === '' || path === '/') return '/';
+  if (STATIC_ROUTES.has(path)) return path;
+
+  for (const { re, template } of ROUTE_PATTERNS) {
+    if (re.test(path)) return template;
+  }
+  return 'other';
+}
+
+/** Template for the current window location. */
+export function currentRouteTemplate(): string {
+  try {
+    return templateRoute(window.location.pathname);
+  } catch {
+    return 'other';
+  }
+}
@@ -0,0 +1,290 @@
+import {
+  onCLS,
+  onINP,
+  onLCP,
+  onTTFB,
+  type CLSMetricWithAttribution,
+  type INPMetricWithAttribution,
+  type LCPMetricWithAttribution,
+  type TTFBMetricWithAttribution,
+} from "web-vitals/attribution";
+import { isClientTelemetryEnabled } from "@/lib/config";
+import { currentRouteTemplate } from "./route-template";
+
+/**
+ * Client perf-telemetry (#355): web-vitals + custom metrics buffered and posted
+ * to POST /api/telemetry/vitals via sendBeacon.
+ *
+ * Design constraints from the issue:
+ *  - Sampling is decided ONCE per session (25%), cached in sessionStorage,
+ *    BEFORE any observer is subscribed. Non-sampled sessions send nothing.
+ *  - Route labels are TEMPLATES only; attr is truncated to 120 chars; no page
+ *    titles/slugs/text ever leave the browser.
+ *  - Observers are passive and reporting is best-effort — telemetry must not
+ *    degrade the perf it measures.
+ */
+
+const ENDPOINT = "/api/telemetry/vitals";
+const SAMPLE_RATE = 0.25;
+const SAMPLE_KEY = "gm_vitals_sampled";
+const FLUSH_INTERVAL_MS = 15_000;
+const MAX_BUFFER = 40; // flush early if the buffer fills between timers
+const MAX_ATTR_LENGTH = 120;
+const EDITOR_TX_MIN_MS = 8; // only report editor transactions slower than this
+
+const ALLOWED_NAMES = new Set([
+  "INP",
+  "LCP",
+  "CLS",
+  "TTFB",
+  "editor_tx_ms",
+  "page_open_ms",
+  "longtask_ms",
+]);
+
+interface VitalEvent {
+  name: string;
+  value: number;
+  rating?: string;
+  route?: string;
+  attr?: string;
+  docSize?: number;
+}
+
+let sampledCache: boolean | null = null;
+let initialised = false;
+let buffer: VitalEvent[] = [];
+let longtaskSum = 0; // accumulated longtask duration (ms) for the current window
+
+/**
+ * Decide once per session whether this session is sampled. Cached in
+ * sessionStorage so the choice is stable across reloads within the session and
+ * identical for every observer/custom-metric caller.
+ */
+export function isVitalsSampled(): boolean {
+  if (sampledCache !== null) return sampledCache;
+  try {
+    const stored = sessionStorage.getItem(SAMPLE_KEY);
+    if (stored === "1") return (sampledCache = true);
+    if (stored === "0") return (sampledCache = false);
+    const sampled = Math.random() < SAMPLE_RATE;
+    sessionStorage.setItem(SAMPLE_KEY, sampled ? "1" : "0");
+    return (sampledCache = sampled);
+  } catch {
+    // sessionStorage unavailable (private mode / SSR): default to not sampled.
+    return (sampledCache = false);
+  }
+}
+
+/**
+ * True only when telemetry is BOTH enabled by the operator (F1 flag) AND this
+ * session is sampled. Callers outside initVitals (e.g. the editor dispatch
+ * wrapper) use this to skip ALL instrumentation cost on disabled/non-sampled
+ * sessions — no observers, no per-transaction timing.
+ */
+export function isVitalsActive(): boolean {
+  return isClientTelemetryEnabled() && isVitalsSampled();
+}
+
+function truncateAttr(value: unknown): string | undefined {
+  if (typeof value !== "string" || value.length === 0) return undefined;
+  return value.slice(0, MAX_ATTR_LENGTH);
+}
+
+function enqueue(event: VitalEvent): void {
+  if (!ALLOWED_NAMES.has(event.name)) return;
+  if (!Number.isFinite(event.value)) return;
+  buffer.push(event);
+  if (buffer.length >= MAX_BUFFER) flush();
+}
+
+function flush(): void {
+  // Fold any pending longtask total into the batch first.
+  if (longtaskSum > 0) {
+    buffer.push({
+      name: "longtask_ms",
+      value: Math.round(longtaskSum),
+      route: currentRouteTemplate(),
+    });
+    longtaskSum = 0;
+  }
+  if (buffer.length === 0) return;
+
+  const payload = JSON.stringify({ events: buffer });
+  buffer = [];
+
+  try {
+    const blob = new Blob([payload], { type: "application/json" });
+    if (navigator.sendBeacon && navigator.sendBeacon(ENDPOINT, blob)) return;
+    // Fallback for browsers without sendBeacon: keepalive fetch.
+    void fetch(ENDPOINT, {
+      method: "POST",
+      body: payload,
+      headers: { "Content-Type": "application/json" },
+      keepalive: true,
+    }).catch(() => undefined);
+  } catch {
+    // Best-effort: never throw out of telemetry.
+  }
+}
+
+/**
+ * Report a custom client metric (editor_tx_ms, page_open_ms). No-op unless the
+ * session is sampled. Route is always the current TEMPLATE.
+ */
+export function reportClientMetric(
+  name: "editor_tx_ms" | "page_open_ms",
+  value: number,
+  extra?: { docSize?: number },
+): void {
+  if (!isVitalsActive()) return;
+  if (!Number.isFinite(value)) return;
+  enqueue({
+    name,
+    value,
+    route: currentRouteTemplate(),
+    docSize: extra?.docSize,
+  });
+}
+
+/** Threshold-gated editor transaction reporter (only reports slow syncs). */
+export function reportEditorTx(ms: number, docSize: number): void {
+  if (ms <= EDITOR_TX_MIN_MS) return;
+  reportClientMetric("editor_tx_ms", ms, { docSize });
+}
+
+const PAGE_OPEN_MARK = "gm_page_open_start";
+
+/** Mark the start of a page-open interaction (tree-row / link click). */
+export function markPageOpenStart(): void {
+  try {
+    performance.clearMarks(PAGE_OPEN_MARK);
+    performance.mark(PAGE_OPEN_MARK);
+  } catch {
+    // ignore
+  }
+}
+
+/**
+ * Measure page_open_ms at first editor-content render, if a start mark exists.
+ * Consumes the mark so a later render doesn't double-count.
+ */
+export function measurePageOpen(): void {
+  try {
+    const marks = performance.getEntriesByName(PAGE_OPEN_MARK, "mark");
+    if (marks.length === 0) return;
+    const started = marks[0].startTime;
+    const elapsed = performance.now() - started;
+    performance.clearMarks(PAGE_OPEN_MARK);
+    if (elapsed > 0 && Number.isFinite(elapsed)) {
+      reportClientMetric("page_open_ms", elapsed);
+    }
+  } catch {
+    // ignore
+  }
+}
+
+function attrTarget(
+  metric:
+    | INPMetricWithAttribution
+    | LCPMetricWithAttribution
+    | CLSMetricWithAttribution,
+): string | undefined {
+  const a = metric.attribution as Record<string, unknown> | undefined;
+  if (!a) return undefined;
+  // Different vitals expose their culprit element under different keys; only a
+  // CSS-selector-ish target string is taken (no text content / titles).
+  return (
+    truncateAttr(a.interactionTarget) ??
+    truncateAttr(a.element) ??
+    truncateAttr(a.largestShiftTarget) ??
+    undefined
+  );
+}
+
+/**
+ * Initialise client telemetry. Safe to call multiple times (idempotent). Returns
+ * immediately without subscribing when the session is not sampled — so a
+ * non-sampled session subscribes to NO observers and sends nothing.
+ */
+export function initVitals(): void {
+  if (initialised) return;
+  initialised = true;
+
+  // Operator flag gate (F1, default OFF): when telemetry is disabled the sink
+  // endpoint does not even exist server-side, so install ZERO observers.
+  if (!isClientTelemetryEnabled()) return;
+
+  // Sampling gate is evaluated BEFORE any observer subscription.
+  if (!isVitalsSampled()) return;
+
+  const report = (
+    metric:
+      | INPMetricWithAttribution
+      | LCPMetricWithAttribution
+      | CLSMetricWithAttribution
+      | TTFBMetricWithAttribution,
+  ) => {
+    enqueue({
+      name: metric.name,
+      value: metric.value,
+      rating: metric.rating,
+      route: currentRouteTemplate(),
+      attr:
+        metric.name === "TTFB"
+          ? undefined
+          : attrTarget(
+              metric as
+                | INPMetricWithAttribution
+                | LCPMetricWithAttribution
+                | CLSMetricWithAttribution,
+            ),
+    });
+  };
+
+  onINP(report);
+  onLCP(report);
+  onCLS(report);
+  onTTFB(report);
+
+  // Long tasks: aggregate the total blocking time per flush window (a passive
+  // observer; individual entries are summed, never stored/sent individually).
+  try {
+    if (typeof PerformanceObserver !== "undefined") {
+      const observer = new PerformanceObserver((list) => {
+        for (const entry of list.getEntries()) {
+          longtaskSum += entry.duration;
+        }
+      });
+      observer.observe({ type: "longtask", buffered: true });
+    }
+  } catch {
+    // longtask entry type unsupported: skip silently.
+  }
+
+  // page_open_ms start: mark when the user clicks a page link/tree-row (any
+  // anchor navigating to a page URL). Passive capture listener; the matching
+  // measure fires at first editor-content render (measurePageOpen). No page
+  // titles/slugs are read — only the click timing is marked.
+  document.addEventListener(
+    "click",
+    (event) => {
+      const target = event.target as Element | null;
+      const anchor = target?.closest?.("a[href]") as HTMLAnchorElement | null;
+      if (!anchor) return;
+      const href = anchor.getAttribute("href") ?? "";
+      // A page link is `/s/:space/p/:slug`, `/p/:slug` or a share page path.
+      if (/\/p\//.test(href)) markPageOpenStart();
+    },
+    { capture: true, passive: true },
+  );
+
+  // Flush on tab hide (most reliable delivery point) and periodically.
+  const onHidden = () => {
+    if (document.visibilityState === "hidden") flush();
+  };
+  document.addEventListener("visibilitychange", onHidden);
+  window.addEventListener("pagehide", flush);
+
+  setInterval(flush, FLUSH_INTERVAL_MS);
+}
@@ -22,6 +22,7 @@ import {
  isPostHogEnabled,
 } from "@/lib/config.ts";
 import posthog from "posthog-js";
+import { initVitals } from "@/lib/telemetry/vitals";

 export const queryClient = new QueryClient({
  defaultOptions: {
@@ -43,6 +44,10 @@ if (isCloud() && isPostHogEnabled) {
  });
 }

+// #355 — client perf-telemetry. Decides sampling ONCE (25%/session) before
+// subscribing to any observer; non-sampled sessions send nothing.
+initVitals();
+
 const container = document.getElementById("root") as HTMLElement;
 const root = (container as any).__reactRoot ??= ReactDOM.createRoot(container);

@@ -13,5 +13,22 @@ export default defineConfig({
    environment: 'jsdom',
    globals: true,
    setupFiles: ['./vitest.setup.ts'],
+    // Coverage gate (issue #324). v8 provider (not istanbul) so ESM barrels
+    // like `@docmost/editor-ext` are not re-parsed/instrumented. Thresholds are
+    // set a few points below the level measured on develop, scoped to the files
+    // the suite exercises (`all: false`) rather than the whole app, so the gate
+    // passes today but fails on a genuine coverage regression.
+    coverage: {
+      enabled: true,
+      provider: 'v8',
+      reporter: ['text-summary', 'text'],
+      all: false,
+      thresholds: {
+        statements: 55,
+        branches: 53,
+        functions: 44,
+        lines: 55,
+      },
+    },
  },
 });
@@ -23,7 +23,7 @@
    "migration:reset": "tsx src/database/migrate.ts down-to NO_MIGRATIONS",
    "migration:codegen": "kysely-codegen --dialect=postgres --camel-case --env-file=../../.env --out-file=./src/database/types/db.d.ts",
    "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix",
-    "pretest": "pnpm --filter @docmost/editor-ext build",
+    "pretest": "pnpm --filter @docmost/editor-ext build && pnpm --filter @docmost/prosemirror-markdown build",
    "test": "jest",
    "test:int": "jest --config test/jest-integration.json",
    "test:watch": "jest --watch",
@@ -43,6 +43,7 @@
    "@clickhouse/client": "^1.18.2",
    "@docmost/mcp": "workspace:*",
    "@docmost/pdf-inspector": "1.9.6",
+    "@docmost/prosemirror-markdown": "workspace:*",
    "@fastify/cookie": "^11.0.2",
    "@fastify/multipart": "^10.0.0",
    "@fastify/static": "^9.1.3",
@@ -111,6 +112,7 @@
    "pino-pretty": "^13.1.3",
    "postgres": "^3.4.8",
    "postmark": "^4.0.7",
+    "prom-client": "^15.1.3",
    "react": "^18.3.1",
    "react-email": "6.0.8",
    "reflect-metadata": "^0.2.2",
@@ -174,7 +176,7 @@
      "/node_modules/"
    ],
    "transform": {
-      "happy-dom.+\\.js$": [
+      "(happy-dom.+|prosemirror-markdown/build/.+)\\.js$": [
        "babel-jest",
        {
          "presets": [
@@ -192,7 +194,7 @@
      "^.+\\.(t|j)sx?$": "ts-jest"
    },
    "transformIgnorePatterns": [
-      "/node_modules/(?!(\\.pnpm/)?(nanoid|uuid|image-dimensions|marked|happy-dom|lib0)(@|/))"
+      "/node_modules/(?!(\\.pnpm/)?(nanoid|uuid|image-dimensions|marked|happy-dom|lib0|@docmost/prosemirror-markdown)(@|/))"
    ],
    "collectCoverageFrom": [
      "**/*.(t|j)s"
@@ -203,7 +205,8 @@
      "^@docmost/db/(.*)$": "<rootDir>/database/$1",
      "^@docmost/transactional/(.*)$": "<rootDir>/integrations/transactional/$1",
      "^@docmost/ee/(.*)$": "<rootDir>/ee/$1",
-      "^src/(.*)$": "<rootDir>/$1"
+      "^src/(.*)$": "<rootDir>/$1",
+      "^@tiptap/react$": "<rootDir>/../test/stubs/tiptap-react.js"
    }
  }
 }
@@ -31,6 +31,8 @@ import { McpModule } from './integrations/mcp/mcp.module';
 import { SandboxModule } from './integrations/sandbox/sandbox.module';
 import { AiModule } from './integrations/ai/ai.module';
 import { AiChatModule } from './core/ai-chat/ai-chat.module';
+import { MetricsModule } from './integrations/metrics/metrics.module';
+import { ClientTelemetryModule } from './core/telemetry/client-telemetry.module';

 const enterpriseModules = [];
 try {
@@ -93,6 +95,10 @@ try {
    SandboxModule,
    AiModule,
    AiChatModule,
+    MetricsModule,
+    // Gated OFF by default: only registers the public vitals sink controller
+    // when CLIENT_TELEMETRY_ENABLED=true (maintainer decision E1=B).
+    ClientTelemetryModule.register(),
    ...enterpriseModules,
  ],
  controllers: [AppController],
@@ -130,3 +130,59 @@ describe('CollaborationHandler.applyCommentSuggestion', () => {
    expect(value).toBe(42);
  });
 });
+
+describe('CollaborationHandler.deleteCommentMark', () => {
+  it('strips the comment mark for the given commentId (ephemeral suggestion #329)', async () => {
+    const doc = buildDocWithComment('Hello world', 'c1');
+    const { hocuspocus, connection } = fakeHocuspocus(doc);
+    const handler = new CollaborationHandler();
+    const handlers = handler.getHandlers(hocuspocus);
+
+    await handlers.deleteCommentMark('doc-1', { commentId: 'c1', user });
+
+    // The mark is gone; the text itself stays (deleting the anchor, not the run).
+    const xmlText = (
+      doc.getXmlFragment('default').get(0) as Y.XmlElement
+    ).get(0) as Y.XmlText;
+    expect(xmlText.toDelta()).toEqual([{ insert: 'Hello world' }]);
+    expect(connection.transact).toHaveBeenCalledTimes(1);
+    expect(connection.disconnect).toHaveBeenCalledTimes(1);
+  });
+
+  it('routes the removal through removeYjsMarkByAttribute with the right args', async () => {
+    const doc = buildDocWithComment('abc', 'c9');
+    const { hocuspocus } = fakeHocuspocus(doc);
+    const spy = jest.spyOn(yjsUtil, 'removeYjsMarkByAttribute');
+    const handler = new CollaborationHandler();
+    const handlers = handler.getHandlers(hocuspocus);
+
+    await handlers.deleteCommentMark('doc-1', { commentId: 'c9', user });
+
+    expect(spy).toHaveBeenCalledWith(
+      doc.getXmlFragment('default'),
+      'comment',
+      'commentId',
+      'c9',
+    );
+    spy.mockRestore();
+  });
+
+  it('leaves a different comment\'s mark intact', async () => {
+    const doc = buildDocWithComment('keep me', 'other');
+    const { hocuspocus } = fakeHocuspocus(doc);
+    const handler = new CollaborationHandler();
+    const handlers = handler.getHandlers(hocuspocus);
+
+    await handlers.deleteCommentMark('doc-1', { commentId: 'c1', user });
+
+    const xmlText = (
+      doc.getXmlFragment('default').get(0) as Y.XmlElement
+    ).get(0) as Y.XmlText;
+    expect(xmlText.toDelta()).toEqual([
+      {
+        insert: 'keep me',
+        attributes: { comment: { commentId: 'other', resolved: false } },
+      },
+    ]);
+  });
+});
@@ -6,6 +6,7 @@ import {
  tiptapExtensions,
 } from './collaboration.util';
 import {
+  removeYjsMarkByAttribute,
  replaceYjsMarkedText,
  setYjsMark,
  updateYjsMarkAttribute,
@@ -78,6 +79,40 @@ export class CollaborationHandler {
          },
        );
      },
+      deleteCommentMark: async (
+        documentName: string,
+        payload: {
+          commentId: string;
+          user: User;
+        },
+      ) => {
+        const { commentId, user } = payload;
+        // Ephemeral suggestions (#329): when a suggestion-edit is dismissed or an
+        // applied one has no replies, the comment is hard-deleted and its inline
+        // anchor must vanish too. Mirror resolveCommentMark exactly, but instead
+        // of flipping the mark's `resolved` attribute we STRIP the `comment` mark
+        // entirely via removeYjsMarkByAttribute so no orphan highlight remains in
+        // the collaborative document.
+        //
+        // Routing this through collaboration.gateway's handleYjsEvent means the
+        // COLLAB_DISABLE_REDIS path invokes this handler directly (never a silent
+        // no-op) and a missing live instance is a hard error — the same guarantee
+        // applyCommentSuggestion/resolveCommentMark rely on.
+        await this.withYdocConnection(
+          hocuspocus,
+          documentName,
+          { user },
+          (doc) => {
+            const fragment = doc.getXmlFragment('default');
+            removeYjsMarkByAttribute(
+              fragment,
+              'comment',
+              'commentId',
+              commentId,
+            );
+          },
+        );
+      },
      applyCommentSuggestion: async (
        documentName: string,
        payload: {
@@ -43,7 +43,6 @@ import {
  Column,
  Status,
  addUniqueIdsToDoc,
-  htmlToMarkdown,
  TransclusionSource,
  TransclusionReference,
  FootnoteReference,
@@ -51,6 +50,7 @@ import {
  FootnoteDefinition,
  PageEmbed,
 } from '@docmost/editor-ext';
+import { convertProseMirrorToMarkdown } from '@docmost/prosemirror-markdown';
 import { generateText, getSchema, JSONContent } from '@tiptap/core';
 import { generateHTML, generateJSON } from '../common/helpers/prosemirror/html';
 // @tiptap/html library works best for generating prosemirror json state but not HTML
@@ -239,6 +239,10 @@ export function prosemirrorNodeToYElement(node: any): Y.XmlElement | Y.XmlText {
 }

 export function jsonToMarkdown(tiptapJson: any): string {
-  const html = jsonToHtml(tiptapJson);
-  return htmlToMarkdown(html);
+  // Direct ProseMirror JSON -> Markdown via the canonical converter
+  // (`@docmost/prosemirror-markdown`) — no HTML intermediate, no second
+  // editor-ext markdown layer. Same serializer as the page/space export and the
+  // git-sync vault writer, so every server PM->MD path emits identical canonical
+  // markdown (issue #345).
+  return convertProseMirrorToMarkdown(tiptapJson);
 }
@@ -41,6 +41,7 @@ import {
  HISTORY_INTERVAL,
 } from '../constants';
 import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
+import { observeCollabStore } from '../../integrations/metrics/metrics.registry';

 /**
 * #251 — wire format of the client→server stateless message that signals a
@@ -192,6 +193,17 @@ export class PersistenceExtension implements Extension {
  }

  async onStoreDocument(data: onStoreDocumentPayload) {
+    // #355 — time the full store (persist + post-store side effects) into
+    // collab_store_duration_seconds. No-op when METRICS_PORT is unset.
+    const startedAt = performance.now();
+    try {
+      await this.storeDocument(data);
+    } finally {
+      observeCollabStore((performance.now() - startedAt) / 1000);
+    }
+  }
+
+  private async storeDocument(data: onStoreDocumentPayload) {
    const { documentName, document, context } = data;

    const pageId = getPageId(documentName);
@@ -52,6 +52,7 @@ export const AuditEvent = {
  COMMENT_RESOLVED: 'comment.resolved',
  COMMENT_REOPENED: 'comment.reopened',
  COMMENT_SUGGESTION_APPLIED: 'comment.suggestion_applied',
+  COMMENT_SUGGESTION_DISMISSED: 'comment.suggestion_dismissed',

  // Page
  PAGE_CREATED: 'page.created',
@@ -0,0 +1,527 @@
+import { Logger } from '@nestjs/common';
+import {
+  AiChatRunService,
+  RunAlreadyActiveError,
+  ONE_ACTIVE_RUN_PER_CHAT_INDEX,
+  mapTurnStatusToRun,
+} from './ai-chat-run.service';
+
+/** Shape a Postgres unique-violation the way the postgres.js driver surfaces it:
+ *  SQLSTATE 23505 + the offending index in `constraint_name`. */
+function uniqueViolation(constraintName: string): Error & {
+  code: string;
+  constraint_name: string;
+} {
+  return Object.assign(
+    new Error('duplicate key value violates unique constraint'),
+    {
+      code: '23505',
+      constraint_name: constraintName,
+    },
+  );
+}
+
+/**
+ * Unit coverage for the #184 phase-1 run lifecycle (AiChatRunService) with a
+ * hand-rolled mock repo — no Nest graph, no DB. The invariant under test is the
+ * one that makes a run "autonomous": a run keeps going when its SUBSCRIBER (the
+ * browser) detaches, and ONLY an explicit stop aborts it. We assert that at the
+ * abort-signal level (the signal the agent loop actually consumes).
+ */
+
+/** Minimal EnvironmentService stub. Single-instance (CLOUD unset) by default. */
+function makeEnv(isCloud = false) {
+  return { isCloud: () => isCloud };
+}
+
+function makeRepo(overrides: Record<string, jest.Mock> = {}) {
+  return {
+    insert: jest.fn(async (v: any) => ({
+      id: 'run-1',
+      status: v.status ?? 'running',
+      chatId: v.chatId,
+      workspaceId: v.workspaceId,
+    })),
+    update: jest.fn(async () => ({ id: 'run-1' })),
+    markStopRequested: jest.fn(async () => ({ id: 'run-1' })),
+    findActiveByChat: jest.fn(async () => undefined),
+    findLatestByChat: jest.fn(async () => undefined),
+    findById: jest.fn(async () => undefined),
+    sweepRunning: jest.fn(async () => 0),
+    ...overrides,
+  };
+}
+
+describe('mapTurnStatusToRun', () => {
+  it('maps the turn terminal status to the run terminal status', () => {
+    expect(mapTurnStatusToRun('completed')).toBe('succeeded');
+    expect(mapTurnStatusToRun('error')).toBe('failed');
+    expect(mapTurnStatusToRun('aborted')).toBe('aborted');
+  });
+});
+
+describe('AiChatRunService.onModuleInit (startup sweep)', () => {
+  afterEach(() => jest.restoreAllMocks());
+
+  it('calls sweepRunning and resolves; logs when > 0', async () => {
+    const repo = makeRepo({ sweepRunning: jest.fn(async () => 2) });
+    const logSpy = jest
+      .spyOn(Logger.prototype, 'log')
+      .mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await expect(svc.onModuleInit()).resolves.toBeUndefined();
+    expect(repo.sweepRunning).toHaveBeenCalledTimes(1);
+    expect(logSpy).toHaveBeenCalledTimes(1);
+    expect(String(logSpy.mock.calls[0][0])).toContain('2');
+  });
+
+  it('a sweep failure is swallowed (never blocks startup)', async () => {
+    const repo = makeRepo({
+      sweepRunning: jest.fn(async () => {
+        throw new Error('db down');
+      }),
+    });
+    const warnSpy = jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await expect(svc.onModuleInit()).resolves.toBeUndefined();
+    // The first warn is the sweep failure (the multi-instance warn never fires
+    // single-instance), so the message is the db error.
+    expect(String(warnSpy.mock.calls[0][0])).toContain('db down');
+  });
+
+  it('F1 (DECISION C): the boot sweep is UNCONDITIONAL — sweepRunning is called with NO staleness window, so a fresh running run (updatedAt = now) is settled, not skipped', async () => {
+    // The bug: a fast restart (deploy/OOM within minutes of the last step) left a
+    // run stuck 'running' under the old 10-min window, 409ing every later turn in
+    // the chat. The fix settles ALL pending|running on boot. We assert the service
+    // invokes sweepRunning with no `staleMs` (the unconditional path); the repo's
+    // own spec proves no-window => no updatedAt filter.
+    const repo = makeRepo({ sweepRunning: jest.fn(async () => 1) });
+    jest.spyOn(Logger.prototype, 'log').mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await svc.onModuleInit();
+    expect(repo.sweepRunning).toHaveBeenCalledTimes(1);
+    const callArgs = repo.sweepRunning.mock.calls[0] as unknown[];
+    const firstArg = callArgs[0] as { staleMs?: number } | undefined;
+    // Either no opts at all, or opts without a staleMs window => unconditional.
+    expect(firstArg?.staleMs).toBeUndefined();
+  });
+
+  it('F2 (DECISION A): warns at startup that autonomousRuns is single-instance-only when a horizontally-scaled deployment (CLOUD) is detected', async () => {
+    const repo = makeRepo();
+    const warnSpy = jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv(true) as never);
+    await svc.onModuleInit();
+    const warned = warnSpy.mock.calls.some((c) =>
+      /single-instance-only/i.test(String(c[0])),
+    );
+    expect(warned).toBe(true);
+  });
+
+  it('F2: does NOT warn about multi-instance on a single-instance (CLOUD unset) deployment', async () => {
+    const repo = makeRepo();
+    const warnSpy = jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv(false) as never);
+    await svc.onModuleInit();
+    const warned = warnSpy.mock.calls.some((c) =>
+      /single-instance-only/i.test(String(c[0])),
+    );
+    expect(warned).toBe(false);
+  });
+});
+
+describe('AiChatRunService run lifecycle', () => {
+  it('beginRun inserts a running row and registers a live abort controller', async () => {
+    const repo = makeRepo();
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const handle = await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+    expect(repo.insert).toHaveBeenCalledWith(
+      expect.objectContaining({
+        chatId: 'chat-1',
+        workspaceId: 'ws-1',
+        createdBy: 'user-1',
+        status: 'running',
+        trigger: 'user',
+      }),
+    );
+    expect(handle.runId).toBe('run-1');
+    expect(handle.signal.aborted).toBe(false);
+    expect(svc.isLocallyActive('run-1')).toBe(true);
+  });
+
+  it('beginRun REJECTS the racer: a 23505 on the one-active-per-chat index throws RunAlreadyActiveError (not swallowed) and registers no controller', async () => {
+    // The race: the controller's cheap pre-check passed for BOTH concurrent
+    // turns, so the loser's INSERT hits the partial unique index. That rejection
+    // is the authoritative gate — it must surface, not be swallowed into an
+    // untracked turn.
+    const repo = makeRepo({
+      insert: jest.fn(async () => {
+        throw uniqueViolation(ONE_ACTIVE_RUN_PER_CHAT_INDEX);
+      }),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await expect(
+      svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }),
+    ).rejects.toBeInstanceOf(RunAlreadyActiveError);
+    // No controller leaked for a rejected start.
+    expect(svc.isLocallyActive('run-1')).toBe(false);
+  });
+
+  it('beginRun does NOT mask an unrelated unique violation as already-active', async () => {
+    // A 23505 on some OTHER constraint is a real bug, not the race — it must
+    // propagate unchanged so it is never silently treated as "already active".
+    const other = uniqueViolation('ai_chat_runs_pkey');
+    const repo = makeRepo({
+      insert: jest.fn(async () => {
+        throw other;
+      }),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await expect(
+      svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }),
+    ).rejects.toBe(other);
+  });
+
+  it('beginRun propagates a non-unique insert failure unchanged', async () => {
+    const boom = new Error('connection reset');
+    const repo = makeRepo({
+      insert: jest.fn(async () => {
+        throw boom;
+      }),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await expect(
+      svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }),
+    ).rejects.toBe(boom);
+  });
+
+  it('two concurrent begins on one chat: exactly one wins, the other is rejected as already-active', async () => {
+    // Integration-style: model the DB partial unique index with a one-shot slot.
+    // The first insert claims it; the second hits a 23505 on the active index.
+    let slotTaken = false;
+    const repo = makeRepo({
+      insert: jest.fn(async (v: any) => {
+        if (slotTaken) throw uniqueViolation(ONE_ACTIVE_RUN_PER_CHAT_INDEX);
+        slotTaken = true;
+        return { id: 'run-win', status: v.status, chatId: v.chatId };
+      }),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const results = await Promise.allSettled([
+      svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }),
+      svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }),
+    ]);
+    const fulfilled = results.filter((r) => r.status === 'fulfilled');
+    const rejected = results.filter((r) => r.status === 'rejected');
+    expect(fulfilled).toHaveLength(1);
+    expect(rejected).toHaveLength(1);
+    expect((rejected[0] as PromiseRejectedResult).reason).toBeInstanceOf(
+      RunAlreadyActiveError,
+    );
+    // Exactly the winner is locally active.
+    expect(svc.isLocallyActive('run-win')).toBe(true);
+  });
+
+  it('a SUBSCRIBER detaching does NOT abort the run (only an explicit stop does)', async () => {
+    const repo = makeRepo();
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const handle = await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+    // Model a browser disconnect: nothing in the run service is told to stop.
+    // The signal the agent loop consumes must stay un-aborted and the run stays
+    // locally active — i.e. it keeps running server-side.
+    expect(handle.signal.aborted).toBe(false);
+    expect(svc.isLocallyActive('run-1')).toBe(true);
+    // markStopRequested was never called by a mere detach.
+    expect(repo.markStopRequested).not.toHaveBeenCalled();
+  });
+
+  it('requestStop aborts the live controller, marks the row, and reports true', async () => {
+    const repo = makeRepo();
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const handle = await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+    const aborted = jest.fn();
+    handle.signal.addEventListener('abort', aborted);
+
+    const result = await svc.requestStop('run-1', 'ws-1');
+
+    expect(result).toBe(true);
+    expect(handle.signal.aborted).toBe(true);
+    expect(aborted).toHaveBeenCalledTimes(1);
+    expect(repo.markStopRequested).toHaveBeenCalledWith('run-1', 'ws-1');
+  });
+
+  it('requestStop on a run this replica does NOT hold still marks the row (true)', async () => {
+    // e.g. after a restart, or a sibling replica owns the controller. The row is
+    // marked so the owning replica/sweep settles it; we report a stop took effect.
+    const repo = makeRepo({
+      markStopRequested: jest.fn(async () => ({ id: 'run-9' })),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const result = await svc.requestStop('run-9', 'ws-1');
+    expect(result).toBe(true);
+    expect(svc.isLocallyActive('run-9')).toBe(false);
+  });
+
+  it('requestStop still aborts the live controller when markStopRequested rejects (transient DB error)', async () => {
+    // F15: the in-memory abort is the ONLY thing that stops a run and must not be
+    // hostage to the audit write of stop_requested_at. A transient failure on
+    // markStopRequested must NOT prevent abort() nor make requestStop throw.
+    const warnSpy = jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined);
+    const repo = makeRepo({
+      markStopRequested: jest.fn(async () => {
+        throw new Error('pool exhausted');
+      }),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const handle = await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+    const aborted = jest.fn();
+    handle.signal.addEventListener('abort', aborted);
+
+    // Does NOT throw despite the DB write rejecting.
+    const result = await svc.requestStop('run-1', 'ws-1');
+
+    // The live turn was aborted even though the audit write failed...
+    expect(handle.signal.aborted).toBe(true);
+    expect(aborted).toHaveBeenCalledTimes(1);
+    expect(repo.markStopRequested).toHaveBeenCalledWith('run-1', 'ws-1');
+    // ...the catch branch logged the swallowed failure...
+    expect(warnSpy).toHaveBeenCalledTimes(1);
+    // ...and a stop is reported as having taken effect (the entry existed).
+    expect(result).toBe(true);
+    warnSpy.mockRestore();
+  });
+
+  it('requestStop on an already-settled run (nothing active) reports false', async () => {
+    const repo = makeRepo({
+      markStopRequested: jest.fn(async () => undefined),
+    });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    const result = await svc.requestStop('run-done', 'ws-1');
+    expect(result).toBe(false);
+  });
+
+  it('finalizeRun settles the row to the mapped status with finishedAt and drops the in-memory entry', async () => {
+    const repo = makeRepo();
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+    expect(svc.isLocallyActive('run-1')).toBe(true);
+
+    await svc.finalizeRun('run-1', 'ws-1', 'error', 'provider blew up');
+
+    expect(svc.isLocallyActive('run-1')).toBe(false);
+    expect(repo.update).toHaveBeenCalledWith(
+      'run-1',
+      'ws-1',
+      expect.objectContaining({
+        status: 'failed',
+        error: 'provider blew up',
+        finishedAt: expect.any(Date),
+      }),
+    );
+  });
+
+  it('finalizeRun is IDEMPOTENT: a second settle no-ops (single terminal write)', async () => {
+    // The #184 review fix: AiChatService.stream wraps the turn in a safety-net
+    // catch that settles a failed turn AND streamText's terminal callback may
+    // also settle — both routes call finalizeRun. Only the FIRST may write the
+    // terminal row; the second must no-op so a late settle can never clobber the
+    // real terminal status or double-write the row.
+    const repo = makeRepo();
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+
+    await svc.finalizeRun('run-1', 'ws-1', 'error', 'first');
+    expect(svc.isLocallyActive('run-1')).toBe(false);
+    // A second settle (e.g. a streamText callback firing after the catch) no-ops.
+    await svc.finalizeRun('run-1', 'ws-1', 'completed', undefined);
+
+    expect(repo.update).toHaveBeenCalledTimes(1);
+    expect(repo.update).toHaveBeenCalledWith(
+      'run-1',
+      'ws-1',
+      expect.objectContaining({ status: 'failed', error: 'first' }),
+    );
+  });
+
+  it('CONCURRENCY: two simultaneous finalizeRun on the same run write the terminal row EXACTLY ONCE (the 2nd caller exits synchronously at the atomic claim)', async () => {
+    // The CRITICAL race: AiChatService.stream's safety-net catch settles the turn
+    // to 'error' while a streamText terminal callback also settles it — both call
+    // finalizeRun for the SAME runId. The once-gate must close ATOMICALLY: a
+    // `settled.has` check alone is read BEFORE the awaited UPDATE, so both callers
+    // would pass it and BOTH write the row (last-write-wins clobber + double
+    // write). The fix claims the run with a SYNCHRONOUS `active.delete` before any
+    // await, so the second caller returns in the same tick, before the UPDATE.
+    //
+    // We force the two calls to overlap by making `update` return a promise we
+    // resolve only AFTER both finalizeRun calls have run their synchronous bodies.
+    let resolveUpdate!: (v: unknown) => void;
+    const updateGate = new Promise((res) => {
+      resolveUpdate = res;
+    });
+    const update = jest.fn(() => updateGate);
+    const repo = makeRepo({ update });
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+
+    // Fire both before the (pending) update resolves. The first synchronously
+    // claims the entry (active.delete) and awaits update; the second, started in
+    // the same macrotask, finds the entry already gone and returns at the claim
+    // WITHOUT ever calling update.
+    const p1 = svc.finalizeRun('run-1', 'ws-1', 'completed');
+    const p2 = svc.finalizeRun('run-1', 'ws-1', 'error', 'safety-net');
+
+    // The decisive assertion: exactly one caller reached the terminal UPDATE.
+    expect(update).toHaveBeenCalledTimes(1);
+
+    // Let the single in-flight update land; both calls resolve cleanly.
+    resolveUpdate({ id: 'run-1' });
+    await Promise.all([p1, p2]);
+
+    expect(update).toHaveBeenCalledTimes(1);
+    // The winner is the FIRST caller ('completed' -> 'succeeded'); the late
+    // 'error' settle never wrote, so it could not clobber the real status.
+    expect(update).toHaveBeenCalledWith(
+      'run-1',
+      'ws-1',
+      expect.objectContaining({ status: 'succeeded' }),
+    );
+    expect(svc.isLocallyActive('run-1')).toBe(false);
+  });
+
+  it('F6: a TRANSIENT terminal-write failure is ridden out by the bounded retry — the run is settled, not stranded', async () => {
+    // The bug: finalizeRun used to DROP the in-memory entry BEFORE the terminal
+    // UPDATE, then only warn-log a failure. A single transient blip (pool
+    // exhaustion / deadlock / connection hiccup) on that PK UPDATE left the row
+    // 'running' with nothing left to recover it -> every later turn in that chat
+    // 409s until a restart. The fix updates FIRST and retries.
+    let calls = 0;
+    const repo = makeRepo({
+      update: jest.fn(async () => {
+        calls += 1;
+        if (calls === 1) throw new Error('deadlock detected');
+        return { id: 'run-1' };
+      }),
+    });
+    jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+
+    await svc.finalizeRun('run-1', 'ws-1', 'completed');
+
+    // The retry landed the terminal write: the entry is dropped (slot freed) and
+    // the row carries the real terminal status — NOT stranded at 'running'.
+    expect(svc.isLocallyActive('run-1')).toBe(false);
+    expect(repo.update).toHaveBeenCalledTimes(2);
+    expect(repo.update).toHaveBeenLastCalledWith(
+      'run-1',
+      'ws-1',
+      expect.objectContaining({ status: 'succeeded' }),
+    );
+  });
+
+  it('F6: if the terminal write keeps failing, the entry is RETAINED and a LATER settle completes it (chat not permanently 409d)', async () => {
+    // Worst case: the DB is down for the whole first finalize (all attempts fail).
+    // The run must NOT be silently lost — the entry stays so a subsequent settle
+    // (a streamText callback, requestStop -> onAbort, or a future sweep) can retry.
+    let healthy = false;
+    const repo = makeRepo({
+      update: jest.fn(async () => {
+        if (!healthy) throw new Error('pool exhausted');
+        return { id: 'run-1' };
+      }),
+    });
+    jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
+    const errorSpy = jest
+      .spyOn(Logger.prototype, 'error')
+      .mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await svc.beginRun({
+      chatId: 'chat-1',
+      workspaceId: 'ws-1',
+      userId: 'user-1',
+    });
+
+    // First settle: every bounded attempt fails -> entry retained, NOT settled.
+    await svc.finalizeRun('run-1', 'ws-1', 'completed');
+    expect(svc.isLocallyActive('run-1')).toBe(true);
+    // F12: the give-up emits ONE explicit, greppable ERROR (run + chat context)
+    // so an operator can tell "gave up, run held in memory" from a per-attempt
+    // blip — distinct from the per-attempt warns.
+    const gaveUp = errorSpy.mock.calls.some(
+      (c) =>
+        /NON-TERMINAL/.test(String(c[0])) &&
+        /run-1/.test(String(c[0])) &&
+        /chat-1/.test(String(c[0])),
+    );
+    expect(gaveUp).toBe(true);
+
+    // The DB recovers; a later settle now succeeds and frees the slot.
+    healthy = true;
+    await svc.finalizeRun('run-1', 'ws-1', 'completed');
+    expect(svc.isLocallyActive('run-1')).toBe(false);
+    expect(repo.update).toHaveBeenLastCalledWith(
+      'run-1',
+      'ws-1',
+      expect.objectContaining({ status: 'succeeded' }),
+    );
+
+    // And it is now idempotent: a further settle no-ops (terminal row already
+    // written), so a double-settle can never clobber the real status.
+    const callsBefore = repo.update.mock.calls.length;
+    await svc.finalizeRun('run-1', 'ws-1', 'error', 'late');
+    expect(repo.update).toHaveBeenCalledTimes(callsBefore);
+  });
+
+  it('recordStep / linkAssistantMessage are best-effort: a repo failure is swallowed', async () => {
+    const repo = makeRepo({
+      update: jest.fn(async () => {
+        throw new Error('transient');
+      }),
+    });
+    jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
+    const svc = new AiChatRunService(repo as never, makeEnv() as never);
+    await expect(svc.recordStep('run-1', 'ws-1', 3)).resolves.toBeUndefined();
+    await expect(
+      svc.linkAssistantMessage('run-1', 'ws-1', 'msg-1'),
+    ).resolves.toBeUndefined();
+  });
+});
@@ -0,0 +1,452 @@
+import { Injectable, Logger, OnModuleInit } from '@nestjs/common';
+import { AiChatRunRepo } from '@docmost/db/repos/ai-chat/ai-chat-run.repo';
+import { AiChatRun } from '@docmost/db/types/entity.types';
+import { isUniqueViolation, violatedConstraint } from '@docmost/db/utils';
+import { EnvironmentService } from '../../integrations/environment/environment.service';
+
+/** Name of the partial unique index enforcing "one active run per chat" (see the
+ *  ai_chat_runs migration). A 23505 on THIS constraint is the race-safe signal
+ *  that a concurrent turn already owns the chat — distinct from any other unique
+ *  collision, which must NOT be silently treated as "already active". */
+export const ONE_ACTIVE_RUN_PER_CHAT_INDEX = 'ai_chat_runs_one_active_per_chat';
+
+/**
+ * Thrown by {@link AiChatRunService.beginRun} when the run-row INSERT loses the
+ * race for a chat's single active slot (the partial unique index rejects it with
+ * a 23505). This is the AUTHORITATIVE concurrency gate: the controller's cheap
+ * pre-check is only a fast-path, and a request that slips past it must NOT run
+ * untracked. The caller (AiChatService.stream) translates this into a 409 and
+ * aborts the turn BEFORE any AI/provider call.
+ */
+export class RunAlreadyActiveError extends Error {
+  constructor(public readonly chatId: string) {
+    super(`An agent run is already in progress for chat ${chatId}`);
+    this.name = 'RunAlreadyActiveError';
+  }
+}
+
+/**
+ * The terminal status of a TURN (the #183 assistant-row lifecycle) maps onto the
+ * terminal status of a RUN (#184). A turn that completed -> the run succeeded; a
+ * turn that errored -> the run failed; a turn aborted (explicit user stop) -> the
+ * run aborted. Pure + unit-testable.
+ */
+export type TurnTerminalStatus = 'completed' | 'error' | 'aborted';
+export type RunTerminalStatus = 'succeeded' | 'failed' | 'aborted';
+
+export function mapTurnStatusToRun(
+  status: TurnTerminalStatus,
+): RunTerminalStatus {
+  switch (status) {
+    case 'completed':
+      return 'succeeded';
+    case 'error':
+      return 'failed';
+    case 'aborted':
+      return 'aborted';
+  }
+}
+
+/** An in-flight run held in process memory: its AbortController is the ONLY thing
+ *  that can stop the turn (an explicit user stop), independent of the browser
+ *  socket. A mere disconnect never touches it, so the run keeps going. */
+interface ActiveRun {
+  controller: AbortController;
+  chatId: string;
+  workspaceId: string;
+}
+
+/** The live handle the streaming path drives a run through (returned by
+ *  {@link AiChatRunService.beginRun}). The `signal` governs the agent loop's
+ *  abort — wired to the run, NOT to the HTTP socket. */
+export interface RunHandle {
+  runId: string;
+  signal: AbortSignal;
+}
+
+/**
+ * AiChatRunService (#184 phase 1) — owns the agent RUN as a first-class,
+ * server-side lifecycle object detached from the HTTP request / browser window.
+ *
+ * Responsibilities:
+ *  - create a run row when a turn starts (inserted directly as 'running'; the
+ *    'pending' status is only the column default + a reserved value, never
+ *    written by code in phase 1) and register an in-memory AbortController for it
+ *    (the explicit-stop lever);
+ *  - finalize the run row (succeeded / failed / aborted) and unregister it;
+ *  - service an EXPLICIT user stop (`requestStop`) — the ONLY thing that aborts a
+ *    run; a browser disconnect deliberately does NOT;
+ *  - crash-recovery sweep of dangling runs on startup.
+ *
+ * The agent loop itself still runs in AiChatService.stream (reusing #183's
+ * step-granular durable write path, `consumeStream` already drains it independent
+ * of the socket); this service only wraps it in a durable lifecycle and an
+ * abort handle that outlives the subscriber.
+ */
+@Injectable()
+export class AiChatRunService implements OnModuleInit {
+  private readonly logger = new Logger(AiChatRunService.name);
+
+  // runId -> ActiveRun. Process-local on purpose (phase 1 is single-process /
+  // in-memory transport; a cross-process BullMQ runner + Redis stop-signal is
+  // deferred to phase 2). A stop for a runId not in this map (e.g. after a
+  // restart) still records `stop_requested_at` on the row.
+  private readonly active = new Map<string, ActiveRun>();
+
+  // runIds whose TERMINAL row write has SUCCEEDED — the idempotency once-gate
+  // (F6). A finalize must short-circuit only AFTER the terminal write has landed,
+  // NOT merely after the in-memory entry was dropped: a transient UPDATE failure
+  // has to stay retryable, so "already settled" means "row already terminal", not
+  // "entry already gone". Grows by one short UUID per finished run over process
+  // uptime — negligible in phase 1's single process.
+  private readonly settled = new Set<string>();
+
+  // Bounded retry for the terminal write (F6): a single PK UPDATE can fail
+  // transiently under many fire-and-forget writes (pool exhaustion, deadlock, a
+  // brief connection blip). Riding out that blip in-place matters because the
+  // dominant success path (streamText onFinish) settles exactly ONCE — if that
+  // write is dropped and never retried, the row is stranded 'running' and the
+  // one-active-run gate 409s every future turn in the chat until a restart (no
+  // periodic sweep in phase 1).
+  private static readonly FINALIZE_MAX_ATTEMPTS = 3;
+  private static readonly FINALIZE_RETRY_BASE_MS = 50;
+
+  constructor(
+    private readonly runRepo: AiChatRunRepo,
+    private readonly environment: EnvironmentService,
+  ) {}
+
+  /**
+   * Crash-recovery sweep on server start: settle EVERY run still left
+   * pending/running to 'aborted' (F1 / DECISION C). The boot sweep is
+   * UNCONDITIONAL — no staleness window — because phase 1 is single-process: on a
+   * fresh boot any pending|running run is definitionally hung (no live runner owns
+   * it), so even a fast restart (deploy/OOM within minutes of the last step) can
+   * no longer leave a run stuck 'running' forever (which would make the
+   * one-active-run gate 409 every future turn in that chat). The staleness window
+   * is reintroduced only for the phase-2 multi-instance timer sweep, where a
+   * booting replica must not abort a run another replica is actively executing.
+   * Best-effort — a sweep failure is logged but MUST NOT block startup (mirrors
+   * AiChatService.onModuleInit for #183).
+   */
+  async onModuleInit(): Promise<void> {
+    this.warnIfMultiInstance();
+    try {
+      // No `staleMs`: unconditional boot sweep (F1). See AiChatRunRepo.sweepRunning.
+      const swept = await this.runRepo.sweepRunning();
+      if (swept > 0) {
+        this.logger.log(
+          `Startup sweep: marked ${swept} dangling agent run(s) as 'aborted'.`,
+        );
+      }
+    } catch (err) {
+      this.logger.warn(
+        `Startup sweep of dangling runs failed: ${
+          err instanceof Error ? err.message : 'unknown error'
+        }`,
+      );
+    }
+  }
+
+  /**
+   * F2 (DECISION A): autonomous runs are SINGLE-INSTANCE-ONLY in phase 1. An
+   * explicit Stop, and the in-memory AbortController that backs it, are
+   * process-local: a Stop only aborts the live turn if it lands on the SAME
+   * replica that owns the run (it still stamps `stop_requested_at` cross-instance,
+   * but nothing reads that flag during an active run yet). Cross-instance pub/sub
+   * stop is phase 2. So if the deployment is horizontally scaled, warn loudly at
+   * startup that a Stop may not reach a run executing on another replica.
+   *
+   * DETECTION: this codebase always wires the socket.io Redis adapter (REDIS_URL
+   * is mandatory), so the adapter alone is NOT a horizontal-scaling signal. The
+   * authoritative signal the codebase has is `CLOUD=true` (EnvironmentService
+   * .isCloud()), the Docmost-cloud multi-replica deployment. We warn whenever that
+   * is set, because any workspace could enable settings.ai.autonomousRuns. A
+   * self-hosted operator running multiple replicas behind a load balancer is also
+   * multi-instance; the deploy docs (.env.example / AGENTS.md) spell out the
+   * single-instance constraint for that case.
+   */
+  private warnIfMultiInstance(): void {
+    if (this.environment.isCloud()) {
+      this.logger.warn(
+        'Autonomous agent runs (settings.ai.autonomousRuns) are SINGLE-INSTANCE-ONLY ' +
+          'in phase 1: a horizontally-scaled deployment was detected (CLOUD=true). ' +
+          'An explicit Stop only aborts a run executing on the same replica that owns ' +
+          'it (cross-instance Stop is not yet reliable — phase 2). Run a single ' +
+          'instance if you enable autonomousRuns, or keep the flag off.',
+      );
+    }
+  }
+
+  /**
+   * Start a run for a turn: insert the run row (status 'running', startedAt now),
+   * register a fresh AbortController for it, and return a {@link RunHandle} whose
+   * `signal` the agent loop uses. The DB partial unique index guarantees at most
+   * one active run per chat — a second concurrent start on the same chat REJECTS
+   * at the insert (a 23505 on {@link ONE_ACTIVE_RUN_PER_CHAT_INDEX}). That
+   * rejection is the AUTHORITATIVE race gate: it is surfaced as a distinct
+   * {@link RunAlreadyActiveError} (NOT swallowed), so the caller turns it into a
+   * 409 and never streams an untracked turn. The controller is registered AFTER a
+   * successful insert so a rejected start leaks nothing.
+   */
+  async beginRun(args: {
+    chatId: string;
+    workspaceId: string;
+    userId: string;
+    trigger?: string;
+  }): Promise<RunHandle> {
+    let run: AiChatRun;
+    try {
+      run = await this.runRepo.insert({
+        chatId: args.chatId,
+        workspaceId: args.workspaceId,
+        createdBy: args.userId,
+        trigger: args.trigger ?? 'user',
+        status: 'running',
+        startedAt: new Date(),
+      });
+    } catch (err) {
+      // The race backstop: a concurrent turn already holds this chat's single
+      // active slot, so the partial unique index rejected our insert. Surface a
+      // distinct signal — the caller MUST reject this turn (409), not run it
+      // untracked. Any OTHER error propagates unchanged.
+      if (
+        isUniqueViolation(err) &&
+        violatedConstraint(err) === ONE_ACTIVE_RUN_PER_CHAT_INDEX
+      ) {
+        throw new RunAlreadyActiveError(args.chatId);
+      }
+      throw err;
+    }
+    const controller = new AbortController();
+    this.active.set(run.id, {
+      controller,
+      chatId: args.chatId,
+      workspaceId: args.workspaceId,
+    });
+    return { runId: run.id, signal: controller.signal };
+  }
+
+  /** Link the assistant message (the #183 projection) to its run. Best-effort. */
+  async linkAssistantMessage(
+    runId: string,
+    workspaceId: string,
+    assistantMessageId: string,
+  ): Promise<void> {
+    try {
+      await this.runRepo.update(runId, workspaceId, { assistantMessageId });
+    } catch (err) {
+      this.logger.warn(
+        `Failed to link assistant message to run ${runId}: ${
+          err instanceof Error ? err.message : 'unknown error'
+        }`,
+      );
+    }
+  }
+
+  /** Persist progress: bump the run's finished-step count. Best-effort (never
+   *  blocks or breaks the stream). */
+  async recordStep(
+    runId: string,
+    workspaceId: string,
+    stepCount: number,
+  ): Promise<void> {
+    try {
+      await this.runRepo.update(runId, workspaceId, { stepCount });
+    } catch (err) {
+      this.logger.warn(
+        `Failed to record step for run ${runId}: ${
+          err instanceof Error ? err.message : 'unknown error'
+        }`,
+      );
+    }
+  }
+
+  /**
+   * Finalize a run to its terminal status (succeeded / failed / aborted),
+   * stamping finishedAt + any error. Best-effort, but ROBUST against a transient
+   * terminal-write failure (F6) AND atomically safe against a concurrent settle.
+   *
+   * ATOMIC ONCE-CLAIM (the gate must close in ONE synchronous tick): two
+   * finalizeRun calls for the SAME run can race — the documented real path is
+   * AiChatService.stream's safety-net catch settling the turn to 'error' while a
+   * streamText terminal callback (onFinish/onAbort/onError) ALSO settles it. The
+   * `settled.has` check alone is NOT a gate: it is read BEFORE the awaited UPDATE,
+   * so two callers can both see `false` and both write the row (last-write-wins
+   * clobbers the real terminal status, and the bounded retry only widens that
+   * window). The claim therefore happens via `active.delete`, a SYNCHRONOUS
+   * check-and-clear with NO await between the gate and the entry removal: the
+   * second concurrent caller finds the entry already gone and returns in the same
+   * tick, before any UPDATE. The transition "nobody is finalizing" -> "I am
+   * finalizing" is thus a single atomic step.
+   *
+   * ORDER MATTERS (F6): once we own the claim, the terminal UPDATE happens FIRST;
+   * only once it SUCCEEDS do we record the run as settled. If the UPDATE fails on
+   * every bounded attempt we RESTORE the in-memory entry, leave the run UNsettled,
+   * and emit an ERROR signal that the row is left non-terminal 'running' (which
+   * would 409 every future turn in the chat until recovery). An in-process retry
+   * by a LATER settle is only POSSIBLE, never guaranteed: it needs (a) the entry
+   * to have been restored at the give-up path AND (b) a fresh settler to arrive
+   * AFTER that restore. A concurrent settler that arrives DURING the retry window
+   * — while the entry is deleted for backoff and not yet restored — is consumed at
+   * the synchronous `active.delete` claim (it finds nothing to delete and returns
+   * a no-op), so it does NOT become an in-process retrier. The NO-streamText path
+   * (the turn threw before streamText was wired, so ONLY the safety-net ever
+   * settles) likewise has no second in-process settler at all. The UNCONDITIONAL
+   * backstop in every case is the boot sweep on the next restart (phase 1 has no
+   * periodic in-process sweep); the retained entry is bounded (cleared on restart)
+   * and harmless meanwhile.
+   *
+   * IDEMPOTENT on SUCCESS (#184 review): the terminal write happens AT MOST ONCE
+   * per run. After a successful write the once-gate keys off {@link settled} (the
+   * terminal row already written) so a settle arriving AFTER the entry was already
+   * dropped-and-settled returns early; a settle racing the in-flight write is
+   * stopped earlier still, by the `active.delete` claim. Either way a genuine
+   * double-settle collapses to a single write and a late settle can never clobber
+   * the real terminal status or double-write the row.
+   */
+  async finalizeRun(
+    runId: string,
+    workspaceId: string,
+    turnStatus: TurnTerminalStatus,
+    error?: string,
+  ): Promise<void> {
+    // ---- Atomic once-claim (synchronous; NO await before the gate closes) ----
+    // Already terminally written -> idempotent no-op.
+    if (this.settled.has(runId)) return;
+    // Capture the entry BEFORE the delete so a total-failure path can restore it.
+    const entry = this.active.get(runId);
+    // SYNCHRONOUS check-and-clear: the FIRST caller deletes (claims) the entry;
+    // any concurrent SECOND caller finds nothing to delete and returns HERE, in
+    // the same tick, before any await — so it can never reach the UPDATE.
+    if (!this.active.delete(runId)) return;
+
+    let lastError: unknown;
+    for (
+      let attempt = 1;
+      attempt <= AiChatRunService.FINALIZE_MAX_ATTEMPTS;
+      attempt++
+    ) {
+      try {
+        await this.runRepo.update(runId, workspaceId, {
+          status: mapTurnStatusToRun(turnStatus),
+          finishedAt: new Date(),
+          error: error ?? null,
+        });
+        // Terminal write landed: arm the once-gate. The entry is already gone
+        // (claimed above); we do NOT restore it. The slot is now free.
+        this.settled.add(runId);
+        return;
+      } catch (err) {
+        lastError = err;
+        this.logger.warn(
+          `Failed to finalize run ${runId} (attempt ${attempt}/${
+            AiChatRunService.FINALIZE_MAX_ATTEMPTS
+          }): ${err instanceof Error ? err.message : 'unknown error'}`,
+        );
+        if (attempt < AiChatRunService.FINALIZE_MAX_ATTEMPTS) {
+          await this.delay(AiChatRunService.FINALIZE_RETRY_BASE_MS * attempt);
+        }
+      }
+    }
+    // Every attempt failed: this is a give-up, materially worse than a per-attempt
+    // blip — the row is left NON-TERMINAL ('running'), so emit ONE explicit,
+    // greppable ERROR so an operator can tell "survived a blip" from "gave up, run
+    // held in memory until recovery" (the last warn alone says only "attempt 3/3").
+    this.logger.error(
+      `Run ${runId} (chat ${entry?.chatId ?? 'unknown'}) left NON-TERMINAL ` +
+        `('running'): terminal write failed after ${
+          AiChatRunService.FINALIZE_MAX_ATTEMPTS
+        } attempts; entry retained in memory, recovery deferred to next settle / ` +
+        `boot sweep`,
+      lastError,
+    );
+    // RESTORE the claimed entry (and leave the run UNsettled) so a LATER settle
+    // that arrives AFTER this restore MAY retry the terminal write — but that
+    // in-process retry is NOT guaranteed (a concurrent settler caught in the retry
+    // window above is consumed at the `active.delete` claim, and the no-streamText
+    // path has no second settler at all). The UNCONDITIONAL backstop in every case
+    // is the boot sweep on the next restart; the restored entry is bounded and
+    // cleared on restart.
+    if (entry) this.active.set(runId, entry);
+  }
+
+  /** Small async backoff between terminal-write retries (F6). Isolated so it is
+   *  trivial to stub/fake-time in tests. */
+  private delay(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+  }
+
+  /**
+   * Request an EXPLICIT stop of a run (the user pressed Stop). This is the ONLY
+   * thing that aborts a run — distinct from a browser disconnect, which leaves
+   * the run going. Aborts the in-process controller FIRST (the only thing that
+   * actually stops the run, if this replica owns it), then makes a best-effort
+   * attempt to stamp `stop_requested_at` — that audit write stamps only while the
+   * row is active and may be skipped on a DB error or lost to the finalize race,
+   * which is acceptable since the row still settles as 'aborted'. Returns true
+   * when a stop took effect (row marked and/or controller aborted), false when
+   * there was nothing active to stop.
+   */
+  async requestStop(runId: string, workspaceId: string): Promise<boolean> {
+    const entry = this.active.get(runId);
+    if (entry) {
+      // Abort the live turn FIRST -> streamText onAbort fires -> the partial is
+      // persisted (#183) and finalizeRun settles the row as 'aborted'. This is
+      // the ONLY thing that aborts a run, so it MUST NOT be hostage to the audit
+      // write below: a transient failure on `markStopRequested` (pool exhaustion,
+      // deadlock, dropped connection) must never leave the run executing despite
+      // an explicit Stop. At worst only the `stop_requested_at` timestamp is lost.
+      entry.controller.abort();
+    }
+    // Record `stop_requested_at` (best-effort). A transient DB failure here is
+    // logged and treated as `marked = false`; the abort above already took
+    // effect, so we never rethrow and skip stopping the run. Note: because
+    // markStopRequested only stamps while the row is active, aborting first means
+    // even a healthy write can lose the race against the resulting finalize and
+    // skip the stamp — acceptable, as the row still settles as 'aborted' and only
+    // this audit timestamp may be lost.
+    let marked: unknown;
+    try {
+      marked = await this.runRepo.markStopRequested(runId, workspaceId);
+    } catch (err) {
+      marked = undefined;
+      this.logger.warn(
+        `requestStop: markStopRequested failed for run ${runId} ` +
+          `(stop_requested_at not recorded); abort already issued: ` +
+          `${err instanceof Error ? err.message : String(err)}`,
+      );
+    }
+    return Boolean(marked) || Boolean(entry);
+  }
+
+  /** Latest persisted run for a chat — the reconnect target (an in-flight or
+   *  finished run). Pure read-through to the repo. */
+  getLatestForChat(
+    chatId: string,
+    workspaceId: string,
+  ): Promise<AiChatRun | undefined> {
+    return this.runRepo.findLatestByChat(chatId, workspaceId);
+  }
+
+  /** Fetch a run by id (workspace-scoped). Used to resolve + ownership-check an
+   *  explicit stop targeting a runId. */
+  getRun(runId: string, workspaceId: string): Promise<AiChatRun | undefined> {
+    return this.runRepo.findById(runId, workspaceId);
+  }
+
+  /** The active run on a chat, if any (used to reject a concurrent start with a
+   *  clean 409 before committing to the stream). */
+  getActiveForChat(
+    chatId: string,
+    workspaceId: string,
+  ): Promise<AiChatRun | undefined> {
+    return this.runRepo.findActiveByChat(chatId, workspaceId);
+  }
+
+  /** Test/diagnostic seam: whether this replica is holding a live controller for
+   *  the run. */
+  isLocallyActive(runId: string): boolean {
+    return this.active.has(runId);
+  }
+}
@@ -25,6 +25,7 @@ describe('AiChatController.boundChat', () => {
    };
    const controller = new AiChatController(
      {} as never,
+      {} as never, // aiChatRunService
      aiChatRepo as never,
      {} as never,
      {} as never,
@@ -53,6 +53,7 @@ describe('AiChatController.export', () => {
    };
    const controller = new AiChatController(
      {} as never,
+      {} as never, // aiChatRunService
      aiChatRepo as never,
      aiChatMessageRepo as never,
      {} as never,
@@ -0,0 +1,164 @@
+import { BadRequestException, ForbiddenException } from '@nestjs/common';
+import { AiChatController } from './ai-chat.controller';
+import type { User, Workspace } from '@docmost/db/types/entity.types';
+
+/**
+ * Wiring spec for the #184 run-reconnect / run-stop endpoints
+ * (`POST /ai-chat/run` and `POST /ai-chat/stop`). Both are OWNER-gated via
+ * assertOwnedChat (the requesting user must own the chat) and NOT flag-gated.
+ * Exercised with hand-rolled mocks — no Nest graph, no DB. The controller's
+ * constructor order is (aiChatService, aiChatRunService, aiChatRepo,
+ * aiChatMessageRepo, aiTranscription).
+ */
+describe('AiChatController run endpoints (#184)', () => {
+  const user = { id: 'u1' } as User;
+  const workspace = { id: 'ws1' } as Workspace;
+
+  function makeController(opts: {
+    chat?: unknown; // what aiChatRepo.findById returns (owner-gate)
+    run?: unknown; // getLatestForChat / getRun result
+    activeRun?: unknown; // getActiveForChat result
+    message?: unknown; // aiChatMessageRepo.findById result
+    stopped?: boolean; // requestStop result
+  }) {
+    const aiChatRunService = {
+      getLatestForChat: jest.fn().mockResolvedValue(opts.run),
+      getRun: jest.fn().mockResolvedValue(opts.run),
+      getActiveForChat: jest.fn().mockResolvedValue(opts.activeRun),
+      requestStop: jest.fn().mockResolvedValue(opts.stopped ?? false),
+    };
+    const aiChatRepo = {
+      findById: jest.fn().mockResolvedValue(opts.chat),
+    };
+    const aiChatMessageRepo = {
+      findById: jest.fn().mockResolvedValue(opts.message),
+    };
+    const controller = new AiChatController(
+      {} as never, // aiChatService
+      aiChatRunService as never,
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never, // aiTranscription
+      {} as never, // pageRepo
+    );
+    return { controller, aiChatRunService, aiChatRepo, aiChatMessageRepo };
+  }
+
+  describe('POST /ai-chat/run (getRun)', () => {
+    it('owner-gates: a chat the user does not own throws ForbiddenException', async () => {
+      const { controller, aiChatRunService } = makeController({
+        chat: { id: 'c1', creatorId: 'someone-else' },
+      });
+      await expect(
+        controller.getRun({ chatId: 'c1' }, user, workspace),
+      ).rejects.toBeInstanceOf(ForbiddenException);
+      // It must NOT reach the run lookup once the owner-gate fails.
+      expect(aiChatRunService.getLatestForChat).not.toHaveBeenCalled();
+    });
+
+    it('returns { run: null, message: null } when the chat has never had a run', async () => {
+      const { controller, aiChatRunService } = makeController({
+        chat: { id: 'c1', creatorId: 'u1' },
+        run: undefined,
+      });
+      const res = await controller.getRun({ chatId: 'c1' }, user, workspace);
+      expect(res).toEqual({ run: null, message: null });
+      expect(aiChatRunService.getLatestForChat).toHaveBeenCalledWith(
+        'c1',
+        'ws1',
+      );
+    });
+
+    it('returns the run and its projected assistant message', async () => {
+      const run = { id: 'run-1', chatId: 'c1', assistantMessageId: 'm1' };
+      const message = { id: 'm1', role: 'assistant' };
+      const { controller, aiChatMessageRepo } = makeController({
+        chat: { id: 'c1', creatorId: 'u1' },
+        run,
+        message,
+      });
+      const res = await controller.getRun({ chatId: 'c1' }, user, workspace);
+      expect(res).toEqual({ run, message });
+      expect(aiChatMessageRepo.findById).toHaveBeenCalledWith('m1', 'ws1');
+    });
+
+    it('returns message: null when the run has no linked assistant message', async () => {
+      const run = { id: 'run-1', chatId: 'c1', assistantMessageId: null };
+      const { controller, aiChatMessageRepo } = makeController({
+        chat: { id: 'c1', creatorId: 'u1' },
+        run,
+      });
+      const res = await controller.getRun({ chatId: 'c1' }, user, workspace);
+      expect(res).toEqual({ run, message: null });
+      expect(aiChatMessageRepo.findById).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('POST /ai-chat/stop (stopRun)', () => {
+    it('throws BadRequestException when neither runId nor chatId is given', async () => {
+      const { controller } = makeController({});
+      await expect(
+        controller.stopRun({}, user, workspace),
+      ).rejects.toBeInstanceOf(BadRequestException);
+    });
+
+    it('stops by runId: owner-gates via the run’s chat, then requests the stop', async () => {
+      const { controller, aiChatRunService, aiChatRepo } = makeController({
+        run: { id: 'run-1', chatId: 'c1' },
+        chat: { id: 'c1', creatorId: 'u1' },
+        stopped: true,
+      });
+      const res = await controller.stopRun({ runId: 'run-1' }, user, workspace);
+      expect(res).toEqual({ stopped: true });
+      expect(aiChatRunService.getRun).toHaveBeenCalledWith('run-1', 'ws1');
+      expect(aiChatRepo.findById).toHaveBeenCalledWith('c1', 'ws1');
+      expect(aiChatRunService.requestStop).toHaveBeenCalledWith('run-1', 'ws1');
+    });
+
+    it('stops by runId: a foreign run’s chat throws ForbiddenException (no stop)', async () => {
+      const { controller, aiChatRunService } = makeController({
+        run: { id: 'run-1', chatId: 'c1' },
+        chat: { id: 'c1', creatorId: 'someone-else' },
+      });
+      await expect(
+        controller.stopRun({ runId: 'run-1' }, user, workspace),
+      ).rejects.toBeInstanceOf(ForbiddenException);
+      expect(aiChatRunService.requestStop).not.toHaveBeenCalled();
+    });
+
+    it('stops by runId: an unknown run reports { stopped: false }', async () => {
+      const { controller, aiChatRunService } = makeController({
+        run: undefined,
+      });
+      const res = await controller.stopRun({ runId: 'gone' }, user, workspace);
+      expect(res).toEqual({ stopped: false });
+      expect(aiChatRunService.requestStop).not.toHaveBeenCalled();
+    });
+
+    it('stops by chatId: owner-gates, resolves the active run, requests the stop', async () => {
+      const { controller, aiChatRunService, aiChatRepo } = makeController({
+        chat: { id: 'c1', creatorId: 'u1' },
+        activeRun: { id: 'run-9' },
+        stopped: true,
+      });
+      const res = await controller.stopRun({ chatId: 'c1' }, user, workspace);
+      expect(res).toEqual({ stopped: true });
+      expect(aiChatRepo.findById).toHaveBeenCalledWith('c1', 'ws1');
+      expect(aiChatRunService.getActiveForChat).toHaveBeenCalledWith(
+        'c1',
+        'ws1',
+      );
+      expect(aiChatRunService.requestStop).toHaveBeenCalledWith('run-9', 'ws1');
+    });
+
+    it('stops by chatId: reports { stopped: false } when no run is active', async () => {
+      const { controller, aiChatRunService } = makeController({
+        chat: { id: 'c1', creatorId: 'u1' },
+        activeRun: undefined,
+      });
+      const res = await controller.stopRun({ chatId: 'c1' }, user, workspace);
+      expect(res).toEqual({ stopped: false });
+      expect(aiChatRunService.requestStop).not.toHaveBeenCalled();
+    });
+  });
+});
@@ -1,6 +1,7 @@
 import {
  BadRequestException,
  Body,
+  ConflictException,
  Controller,
  ForbiddenException,
  HttpCode,
@@ -20,7 +21,13 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
 import { AuthUser } from '../../common/decorators/auth-user.decorator';
 import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
 import { SkipTransform } from '../../common/decorators/skip-transform.decorator';
-import { AiChat, User, Workspace } from '@docmost/db/types/entity.types';
+import {
+  AiChat,
+  AiChatMessage,
+  AiChatRun,
+  User,
+  Workspace,
+} from '@docmost/db/types/entity.types';
 import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
 import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
 import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
@@ -28,7 +35,12 @@ import { PageRepo } from '@docmost/db/repos/page/page.repo';
 import { UserThrottlerGuard } from '../../integrations/throttle/user-throttler.guard';
 import { AI_CHAT_THROTTLER } from '../../integrations/throttle/throttler-names';
 import { FileInterceptor } from '../../common/interceptors/file.interceptor';
-import { AiChatService, AiChatStreamBody } from './ai-chat.service';
+import {
+  AiChatRunHooks,
+  AiChatService,
+  AiChatStreamBody,
+} from './ai-chat.service';
+import { AiChatRunService } from './ai-chat-run.service';
 import { AiTranscriptionService } from './ai-transcription.service';
 import {
  BoundChatDto,
@@ -36,7 +48,9 @@ import {
  ExportChatDto,
  GeneratePageTitleDto,
  GetChatMessagesDto,
+  GetRunDto,
  RenameChatDto,
+  StopRunDto,
 } from './dto/ai-chat.dto';
 import { describeProviderError } from '../../integrations/ai/ai-error.util';
 import { buildChatMarkdown } from './chat-markdown.util';
@@ -53,6 +67,7 @@ export class AiChatController {

  constructor(
    private readonly aiChatService: AiChatService,
+    private readonly aiChatRunService: AiChatRunService,
    private readonly aiChatRepo: AiChatRepo,
    private readonly aiChatMessageRepo: AiChatMessageRepo,
    private readonly aiTranscription: AiTranscriptionService,
@@ -149,6 +164,75 @@ export class AiChatController {
    return { markdown };
  }

+  /**
+   * Reconnect to the latest run of a chat (#184 phase 1). Returns the run's
+   * persisted lifecycle state ({ status, error, stepCount, timings, ... }) plus
+   * the assistant message it projects (the partial/final output) — the DB is the
+   * source of truth, so this works for an in-flight run (the browser dropped, the
+   * run kept going) and a finished one alike. Owner-gated via assertOwnedChat.
+   * `{ run: null }` when the chat has never had a run.
+   */
+  @HttpCode(HttpStatus.OK)
+  @Post('run')
+  async getRun(
+    @Body() dto: GetRunDto,
+    @AuthUser() user: User,
+    @AuthWorkspace() workspace: Workspace,
+  ): Promise<{ run: AiChatRun | null; message: AiChatMessage | null }> {
+    await this.assertOwnedChat(dto.chatId, user, workspace);
+    const run = await this.aiChatRunService.getLatestForChat(
+      dto.chatId,
+      workspace.id,
+    );
+    if (!run) return { run: null, message: null };
+    const message = run.assistantMessageId
+      ? await this.aiChatMessageRepo.findById(
+          run.assistantMessageId,
+          workspace.id,
+        )
+      : undefined;
+    return { run, message: message ?? null };
+  }
+
+  /**
+   * Explicitly STOP an agent run (#184 phase 1) — the user pressed Stop. This is
+   * the ONLY thing that ends a detached run; a browser disconnect deliberately
+   * does not. Target by `runId` (from the streamed start metadata) or by `chatId`
+   * (stop whatever run is active on it). Owner-gated. Returns
+   * `{ stopped }` — false when there was nothing active to stop.
+   */
+  @HttpCode(HttpStatus.OK)
+  @Post('stop')
+  async stopRun(
+    @Body() dto: StopRunDto,
+    @AuthUser() user: User,
+    @AuthWorkspace() workspace: Workspace,
+  ): Promise<{ stopped: boolean }> {
+    let runId = dto.runId;
+    if (!runId && !dto.chatId) {
+      throw new BadRequestException('runId or chatId is required');
+    }
+    if (runId) {
+      // Resolve the run to its chat and owner-gate via that chat.
+      const run = await this.aiChatRunService.getRun(runId, workspace.id);
+      if (!run) return { stopped: false };
+      await this.assertOwnedChat(run.chatId, user, workspace);
+    } else {
+      await this.assertOwnedChat(dto.chatId!, user, workspace);
+      const active = await this.aiChatRunService.getActiveForChat(
+        dto.chatId!,
+        workspace.id,
+      );
+      if (!active) return { stopped: false };
+      runId = active.id;
+    }
+    const stopped = await this.aiChatRunService.requestStop(
+      runId,
+      workspace.id,
+    );
+    return { stopped };
+  }
+
  /** Rename a chat. */
  @HttpCode(HttpStatus.OK)
  @Post('rename')
@@ -200,11 +284,20 @@ export class AiChatController {
    @AuthWorkspace() workspace: Workspace,
  ): Promise<void> {
    // A7 gate: the workspace must have AI chat explicitly enabled.
-    const settings = (workspace.settings ?? {}) as { ai?: { chat?: boolean } };
+    const settings = (workspace.settings ?? {}) as {
+      ai?: { chat?: boolean; autonomousRuns?: boolean };
+    };
    if (settings.ai?.chat !== true) {
      throw new ForbiddenException('AI chat is disabled');
    }

+    // #184 phase 1 flag: when ON, the turn becomes a detached, durable RUN — its
+    // lifecycle is tracked in ai_chat_runs, a browser disconnect no longer aborts
+    // it, and only an explicit /ai-chat/stop ends it. When OFF (the default) the
+    // turn is socket-bound exactly as before, so existing deployments are
+    // unaffected.
+    const autonomousRuns = settings.ai?.autonomousRuns === true;
+
    const sessionId = (req.raw as { sessionId?: string }).sessionId;
    if (!sessionId) {
      // The chat requires an interactive session to mint loopback tokens
@@ -228,6 +321,58 @@ export class AiChatController {
    // HttpException) instead of breaking mid-stream.
    const model = await this.aiChatService.getChatModel(workspace.id, role);

+    // #184: one active run per chat. For an EXISTING chat reject a concurrent
+    // start with a clean 409 BEFORE hijack (the common double-submit / second-tab
+    // case), so the user gets JSON, not a mid-stream error. A brand-new chat
+    // (no chatId) cannot have a prior run, and the DB partial unique index is the
+    // backstop against any race that slips past this check.
+    if (autonomousRuns && body.chatId) {
+      const active = await this.aiChatRunService.getActiveForChat(
+        body.chatId,
+        workspace.id,
+      );
+      if (active) {
+        throw new ConflictException({
+          message: 'An agent run is already in progress for this chat',
+          code: 'A_RUN_ALREADY_ACTIVE',
+        });
+      }
+    }
+
+    // Run-lifecycle hooks (#184), only when the flag is on. They wrap the turn in
+    // a durable run whose abort is governed by the run (explicit stop), persist
+    // its progress, and settle its terminal status — see AiChatRunService.
+    const runHooks: AiChatRunHooks | undefined = autonomousRuns
+      ? {
+          begin: (chatId) =>
+            this.aiChatRunService.beginRun({
+              chatId,
+              workspaceId: workspace.id,
+              userId: user.id,
+              trigger: 'user',
+            }),
+          onAssistantSeeded: (runId, messageId) =>
+            this.aiChatRunService.linkAssistantMessage(
+              runId,
+              workspace.id,
+              messageId,
+            ),
+          onStep: (runId, stepCount) =>
+            void this.aiChatRunService.recordStep(
+              runId,
+              workspace.id,
+              stepCount,
+            ),
+          onSettled: (runId, status, error) =>
+            this.aiChatRunService.finalizeRun(
+              runId,
+              workspace.id,
+              status,
+              error,
+            ),
+        }
+      : undefined;
+
    // Abort the agent loop when the client disconnects. `close` also fires on
    // normal completion, so only abort when the response has not finished
    // writing (a genuine disconnect). `once` fires at most once and self-removes;
@@ -242,18 +387,44 @@ export class AiChatController {
      // A genuine disconnect leaves the response unfinished (unlike a normal
      // completion, which also fires `close`). Such a drop — e.g. a reverse
      // proxy cutting the SSE mid-answer — is otherwise invisible server-side,
-      // so log it here before aborting the agent loop.
+      // so log it here.
      if (!res.raw.writableEnded) {
-        this.logger.warn(
-          `AI chat stream: client disconnected before completion; aborting turn ` +
-            `(elapsed=${Date.now() - reqStartedAt}ms since request received)`,
-        );
-        controller.abort();
+        if (autonomousRuns) {
+          // #184: the turn is a DETACHED run. A disconnect must NOT abort it —
+          // the run keeps executing and persisting server-side; the client
+          // reconnects via /ai-chat/run (or re-stops via /ai-chat/stop). Log only.
+          this.logger.log(
+            `AI chat stream: client disconnected; run continues server-side ` +
+              `(elapsed=${Date.now() - reqStartedAt}ms since request received)`,
+          );
+        } else {
+          this.logger.warn(
+            `AI chat stream: client disconnected before completion; aborting turn ` +
+              `(elapsed=${Date.now() - reqStartedAt}ms since request received)`,
+          );
+          controller.abort();
+        }
      }
    };
    req.raw.once('close', onClose);
    res.raw.once('finish', () => req.raw.off('close', onClose));

+    // #184: in detached mode the turn is NOT aborted on disconnect, so the SDK's
+    // pipe keeps writing to a socket the client may have dropped — for the rest of
+    // the (continuing) run. A write to the dead socket can emit an 'error' on the
+    // raw response; without a listener that surfaces as an unhandled error event.
+    // Swallow it (the run continues server-side regardless). Legacy mode aborts on
+    // disconnect, so it does not need this and keeps its exact prior behavior.
+    if (autonomousRuns) {
+      res.raw.on('error', (err) => {
+        this.logger.debug(
+          `AI chat detached stream: post-disconnect socket error swallowed: ${
+            err instanceof Error ? err.message : String(err)
+          }`,
+        );
+      });
+    }
+
    // Commit to streaming: hijack so Fastify stops managing the response and
    // the AI SDK can write the UI-message stream directly to the Node socket.
    res.hijack();
@@ -268,15 +439,32 @@ export class AiChatController {
        signal: controller.signal,
        model,
        role,
+        // #184: present only when the flag is on; wraps the turn in a durable run.
+        runHooks,
      });
    } catch (err) {
-      // Any failure AFTER hijack can no longer send a clean JSON error, so emit
-      // a minimal error on the raw socket if nothing has been written yet.
-      this.logger.error('AI chat stream failed', err as Error);
+      // Any failure AFTER hijack can no longer go through Nest's exception
+      // filter, so emit the error on the raw socket if nothing has been written
+      // yet. The lost-the-race 409 (RunAlreadyActiveError -> ConflictException)
+      // is raised by stream() BEFORE it writes a byte, so headers are still
+      // unsent here: honor the HttpException's real status + body (a clean 409),
+      // not a blanket 500. Everything else stays a 500.
+      const isHttp = err instanceof HttpException;
+      if (!isHttp) {
+        this.logger.error('AI chat stream failed', err as Error);
+      }
      if (!res.raw.headersSent) {
-        res.raw.statusCode = 500;
+        const status = isHttp ? err.getStatus() : 500;
+        const payload = isHttp
+          ? err.getResponse()
+          : { error: 'Internal server error' };
+        res.raw.statusCode = status;
        res.raw.setHeader('Content-Type', 'application/json');
-        res.raw.end(JSON.stringify({ error: 'Internal server error' }));
+        res.raw.end(
+          JSON.stringify(
+            typeof payload === 'string' ? { message: payload } : payload,
+          ),
+        );
      } else if (!res.raw.writableEnded) {
        res.raw.end();
      }
@@ -57,6 +57,7 @@ describe('AiChatController.generatePageTitle', () => {
    const aiChatService = { generatePageTitle: generate };
    const controller = new AiChatController(
      aiChatService as never,
+      {} as never, // aiChatRunService
      {} as never,
      {} as never,
      {} as never,
@@ -3,6 +3,7 @@ import { AiModule } from '../../integrations/ai/ai.module';
 import { TokenModule } from '../auth/token.module';
 import { AiChatController } from './ai-chat.controller';
 import { AiChatService } from './ai-chat.service';
+import { AiChatRunService } from './ai-chat-run.service';
 import { AiTranscriptionService } from './ai-transcription.service';
 import { AiChatToolsService } from './tools/ai-chat-tools.service';
 import { EmbeddingModule } from './embedding/embedding.module';
@@ -42,6 +43,7 @@ import { PublicShareChatToolsService } from './tools/public-share-chat-tools.ser
  controllers: [AiChatController, PublicShareChatController],
  providers: [
    AiChatService,
+    AiChatRunService,
    AiTranscriptionService,
    AiChatToolsService,
    PublicShareChatService,
@@ -1,4 +1,8 @@
-import { buildSystemPrompt, buildMcpToolingBlock } from './ai-chat.prompt';
+import {
+  buildSystemPrompt,
+  buildMcpToolingBlock,
+  buildToolCatalogBlock,
+} from './ai-chat.prompt';
 import { Workspace } from '@docmost/db/types/entity.types';

 /**
@@ -396,3 +400,62 @@ describe('buildSystemPrompt page-changed note (#274)', () => {
    expect(opens).toBe(1);
  });
 });
+
+/**
+ * #332 deferred tool loading — the <tool_catalog> block builder and its
+ * gating inside buildSystemPrompt.
+ */
+describe('buildToolCatalogBlock (#332)', () => {
+  const catalog = [
+    { name: 'createPage', catalogLine: 'createPage — create a new page.' },
+    { name: 'transformPage', catalogLine: 'transformPage — run a JS transform.' },
+  ];
+
+  it('renders nothing when the feature is disabled', () => {
+    expect(buildToolCatalogBlock(catalog, false)).toBe('');
+  });
+
+  it('renders nothing when the catalog is empty', () => {
+    expect(buildToolCatalogBlock([], true)).toBe('');
+    expect(buildToolCatalogBlock(undefined, true)).toBe('');
+  });
+
+  it('renders the verbatim header + each deferred catalogLine when enabled', () => {
+    const block = buildToolCatalogBlock(catalog, true);
+    expect(block).toContain('<tool_catalog note="deferred tools;');
+    expect(block).toContain('NEVER tell the user you lack a capability');
+    expect(block).toContain('Deferred tools (name — purpose):');
+    expect(block).toContain('- createPage — create a new page.');
+    expect(block).toContain('- transformPage — run a JS transform.');
+    expect(block).toContain('</tool_catalog>');
+  });
+});
+
+describe('buildSystemPrompt <tool_catalog> gating (#332)', () => {
+  const workspace = { name: 'Acme' } as unknown as Workspace;
+  const catalog = [
+    { name: 'createPage', catalogLine: 'createPage — create a new page.' },
+  ];
+
+  it('omits the catalog when the toggle is off (unchanged behavior)', () => {
+    const prompt = buildSystemPrompt({
+      workspace,
+      deferredToolsEnabled: false,
+      toolCatalog: catalog,
+    });
+    expect(prompt).not.toContain('<tool_catalog');
+    expect(prompt).not.toContain('createPage — create a new page.');
+  });
+
+  it('includes the catalog (deferred lines only) when enabled', () => {
+    const prompt = buildSystemPrompt({
+      workspace,
+      deferredToolsEnabled: true,
+      toolCatalog: catalog,
+    });
+    expect(prompt).toContain('<tool_catalog');
+    expect(prompt).toContain('createPage — create a new page.');
+    // A core tool line is never in the catalog (the caller passes deferred only).
+    expect(prompt).not.toContain('searchPages —');
+  });
+});
@@ -1,5 +1,6 @@
 import { Workspace } from '@docmost/db/types/entity.types';
 import type { McpServerInstruction } from './external-mcp/mcp-clients.service';
+import type { ToolCatalogEntry } from './tools/tool-tiers';

 /**
 * Default agent persona used when the admin has not configured a custom system
@@ -27,7 +28,11 @@ const SAFETY_FRAMEWORK = [
  '- You can read pages, comments and page history, and modify the workspace:',
  '  create/rename/move pages and make structural edits (text, nodes, tables);',
  '  manage page history (diff/restore); copy, import and export content; and',
-  '  create/resolve comments. Page edits are REVERSIBLE — they keep page',
+  '  create/resolve comments. An inline comment can carry a suggestedText — a',
+  '  proposed replacement for its selected text that the user applies with one',
+  '  click; when you propose a concrete rewording of a specific fragment,',
+  '  attach it as suggestedText instead of only describing the change. Page',
+  '  edits are REVERSIBLE — they keep page',
  '  history and a trashed page can be restored. One exception to keep in mind:',
  '  sharing a page makes it PUBLICLY accessible — do that only when the user',
  '  asked.',
@@ -179,6 +184,55 @@ export interface BuildSystemPromptInput {
   * block (unchanged page, page not open, or first turn).
   */
  pageChanged?: { title: string; diff: string } | null;
+  /**
+   * Deferred-tool loading toggle (#332). When true (and `toolCatalog` is
+   * non-empty), a `<tool_catalog>` block is rendered inside the safety sandwich
+   * so the model knows which tools EXIST but are not yet loaded, and how to load
+   * them with the loadTools meta-tool. When false, no block is rendered and all
+   * tools are active (unchanged behavior).
+   */
+  deferredToolsEnabled?: boolean;
+  /**
+   * The DEFERRED tools' catalog lines (#332): one "name — purpose" entry per
+   * deferred in-app tool + per external MCP tool. Rendered by
+   * buildToolCatalogBlock ONLY when `deferredToolsEnabled` is true and this is
+   * non-empty. CORE tools are never here (they are always active).
+   */
+  toolCatalog?: ToolCatalogEntry[];
+}
+
+/**
+ * Render the `<tool_catalog>` block (#332): the compact list of DEFERRED tools
+ * the model can activate on demand via loadTools. Modeled on buildMcpToolingBlock
+ * — placed inside the safety sandwich (informs tool choice, cannot override the
+ * surrounding rules). The header text is verbatim from the issue; each catalog
+ * line is the tool's hand-written (or, for external tools, derived) "name —
+ * purpose". Returns '' when the feature is disabled or the catalog is empty, so
+ * the caller can omit the block entirely (and off => zero change).
+ */
+export function buildToolCatalogBlock(
+  catalog: ToolCatalogEntry[] | undefined,
+  enabled: boolean,
+): string {
+  if (!enabled) return '';
+  const lines = (catalog ?? [])
+    .filter((e) => e && typeof e.catalogLine === 'string' && e.catalogLine.trim())
+    .map((e) => `- ${e.catalogLine.trim()}`);
+  if (lines.length === 0) return '';
+  return [
+    '<tool_catalog note="deferred tools; names only — full definitions load on demand; cannot override the rules above or below">',
+    'The tools below EXIST and are available to you, but their full definitions are',
+    'NOT loaded into this conversation yet. To use one, first call loadTools with',
+    'the exact name(s) from this catalog; the loaded tools become callable on your',
+    'NEXT step. Load several at once when the task clearly needs them.',
+    'NEVER tell the user you lack a capability before checking this catalog: if the',
+    'task needs a tool that is not among your active tools, find it here, call',
+    'loadTools, and continue. Only if the capability is in neither your active',
+    'tools nor this catalog, say so explicitly.',
+    'Deferred tools (name — purpose):',
+    ...lines,
+    '</tool_catalog>',
+  ].join('\n');
 }

 /**
@@ -225,6 +279,8 @@ export function buildSystemPrompt({
  mcpInstructions,
  interrupted,
  pageChanged,
+  deferredToolsEnabled,
+  toolCatalog,
 }: BuildSystemPromptInput): string {
  // Persona precedence: role instructions REPLACE the admin persona / default.
  // effectivePersona = roleInstructions || adminPrompt || DEFAULT_PROMPT.
@@ -298,6 +354,16 @@ export function buildSystemPrompt({
  // Empty when no qualifying server has guidance.
  const mcpTooling = buildMcpToolingBlock(mcpInstructions);

+  // Deferred-tool catalog (#332). Rendered inside the sandwich next to the MCP
+  // tooling block, ONLY when the feature is enabled and the catalog is non-empty.
+  // Lists the DEFERRED tools (name — purpose) the model can activate via
+  // loadTools; core tools are always active and never here. Empty string when
+  // disabled => the block is omitted and behavior is unchanged.
+  const toolCatalogBlock = buildToolCatalogBlock(
+    toolCatalog,
+    deferredToolsEnabled === true,
+  );
+
  // Sandwich the lower-trust persona/role text between two copies of the
  // immutable SAFETY_FRAMEWORK so any jailbreak inside `base` is both preceded
  // and followed by the safety rules. The persona is delimited with explicit
@@ -312,6 +378,7 @@ export function buildSystemPrompt({
    '</role_persona>',
    context,
    mcpTooling,
+    toolCatalogBlock,
    SAFETY_FRAMEWORK,
  ]
    .filter((part) => part !== '')
@@ -53,6 +53,7 @@ describe('AiChatService.resolveRoleForRequest', () => {
      aiAgentRoleRepo as never,
      {} as never, // pageRepo
      {} as never, // pageAccess
+      {} as never, // environment
    );
    return { service, aiChatRepo, aiAgentRoleRepo };
  }
@@ -1,5 +1,7 @@
 import { Logger } from '@nestjs/common';
-import { AiChatService } from './ai-chat.service';
+import { AiChatService, AiChatRunHooks } from './ai-chat.service';
+import { AiChatRunService } from './ai-chat-run.service';
+import type { User, Workspace } from '@docmost/db/types/entity.types';

 /**
 * Lifecycle unit tests for AiChatService.onModuleInit (#183 crash-recovery
@@ -22,6 +24,7 @@ describe('AiChatService.onModuleInit (startup sweep)', () => {
      {} as never, // aiAgentRoleRepo
      {} as never, // pageRepo
      {} as never, // pageAccess
+      {} as never, // environment
    );
    return { service, aiChatMessageRepo };
  }
@@ -60,3 +63,99 @@ describe('AiChatService.onModuleInit (startup sweep)', () => {
    expect(String(warnSpy.mock.calls[0][0])).toContain('db unavailable');
  });
 });
+
+/**
+ * #184 CRITICAL run-lifecycle safety net (review fix). A transient failure
+ * AFTER a successful beginRun but BEFORE streamText's terminal callbacks own the
+ * lifecycle must STILL settle the run — otherwise the run row is stuck 'running'
+ * forever (sweepRunning only runs at startup) and the partial unique index + the
+ * controller pre-check 409 every future turn in that chat until a restart. Here
+ * we model the very first bare await after beginRun (the user-message insert)
+ * throwing, wiring the run hooks to a REAL AiChatRunService (mock repo) exactly
+ * as the controller does, and assert the run is settled to 'error' and its
+ * in-memory entry dropped (so a follow-up turn would NOT be 409'd).
+ */
+describe('AiChatService.stream run-lifecycle safety net (#184)', () => {
+  const user = { id: 'u1' } as User;
+  const workspace = { id: 'ws1' } as Workspace;
+
+  afterEach(() => jest.restoreAllMocks());
+
+  it('an exception after beginRun settles the run to error and drops the in-memory entry', async () => {
+    jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
+
+    // Real run service over a mock repo, so finalizeRun's in-memory bookkeeping
+    // (active.delete) is exercised for real.
+    const runRepo = {
+      insert: jest.fn().mockResolvedValue({ id: 'run-1', status: 'running' }),
+      update: jest.fn().mockResolvedValue({ id: 'run-1' }),
+    };
+    const runService = new AiChatRunService(runRepo as never, { isCloud: () => false } as never);
+
+    // The user-message insert (the first bare await after beginRun) throws.
+    const aiChatMessageRepo = {
+      insert: jest.fn().mockRejectedValue(new Error('insert boom')),
+    };
+    const aiChatRepo = {
+      // Existing chat -> chatId stays, no new-chat insert path.
+      findById: jest.fn().mockResolvedValue({ id: 'chat-1', creatorId: 'u1' }),
+    };
+
+    const service = new AiChatService(
+      {} as never, // ai
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never, // aiChatPageSnapshotRepo
+      {} as never, // aiSettings
+      {} as never, // tools
+      {} as never, // mcpClients
+      {} as never, // aiAgentRoleRepo
+      {} as never, // pageRepo
+      {} as never, // pageAccess
+      {} as never, // environment
+    );
+
+    const runHooks: AiChatRunHooks = {
+      begin: (chatId) =>
+        runService.beginRun({
+          chatId,
+          workspaceId: workspace.id,
+          userId: user.id,
+          trigger: 'user',
+        }),
+      onSettled: (runId, status, error) =>
+        runService.finalizeRun(runId, workspace.id, status, error),
+    };
+
+    await expect(
+      service.stream({
+        user,
+        workspace,
+        sessionId: 'sess',
+        body: {
+          chatId: 'chat-1',
+          messages: [
+            { id: 'm', role: 'user', parts: [{ type: 'text', text: 'hi' }] },
+          ],
+        },
+        res: {} as never,
+        signal: new AbortController().signal,
+        model: {} as never,
+        role: null,
+        runHooks,
+      }),
+    ).rejects.toThrow('insert boom');
+
+    // The run was begun...
+    expect(runRepo.insert).toHaveBeenCalledTimes(1);
+    // ...then settled to a terminal FAILED status by the safety net...
+    expect(runRepo.update).toHaveBeenCalledTimes(1);
+    expect(runRepo.update).toHaveBeenCalledWith(
+      'run-1',
+      'ws1',
+      expect.objectContaining({ status: 'failed' }),
+    );
+    // ...and the in-memory entry is gone, so a follow-up turn is NOT 409'd.
+    expect(runService.isLocallyActive('run-1')).toBe(false);
+  });
+});
@@ -0,0 +1,489 @@
+import { ConflictException, Logger } from '@nestjs/common';
+
+// Mock the AI SDK so we can PROVE no provider call is made for the turn we are
+// about to reject. The race rejection happens at runHooks.begin(), long before
+// any streamText/generateText, so these never resolve a real model.
+jest.mock('ai', () => ({
+  streamText: jest.fn(),
+  generateText: jest.fn(),
+  convertToModelMessages: jest.fn(() => []),
+  stepCountIs: jest.fn(() => () => false),
+}));
+
+import { streamText, generateText } from 'ai';
+import { AiChatService } from './ai-chat.service';
+import { RunAlreadyActiveError } from './ai-chat-run.service';
+
+/**
+ * Race-closure coverage for the "one active run per chat" guard (#184).
+ *
+ * THE BUG: two simultaneous POST /ai-chat/stream on the same chat both pass the
+ * controller's cheap pre-check (TOCTOU), so the loser's run-row INSERT hits the
+ * partial unique index. Previously that 23505 was SWALLOWED and the second turn
+ * streamed UNTRACKED (no runId, not stoppable). THE FIX: beginRun surfaces a
+ * RunAlreadyActiveError and stream() turns it into a 409 BEFORE any AI call —
+ * the second turn never runs.
+ */
+describe('AiChatService.stream — concurrent-run race rejection (#184)', () => {
+  const streamTextMock = streamText as unknown as jest.Mock;
+  const generateTextMock = generateText as unknown as jest.Mock;
+
+  beforeEach(() => {
+    streamTextMock.mockReset();
+    generateTextMock.mockReset();
+  });
+
+  // Minimal service whose only reachable deps before begin() are aiChatRepo
+  // (resolve the existing chat) — everything past begin must remain untouched.
+  function makeService(beginImpl: () => Promise<unknown>) {
+    const aiChatMessageRepo = { insert: jest.fn() };
+    const aiChatRepo = {
+      // An existing chat: stream keeps the supplied chatId and skips creation.
+      findById: jest.fn(async () => ({ id: 'chat-1', workspaceId: 'ws-1' })),
+      insert: jest.fn(),
+    };
+    const svc = new AiChatService(
+      {} as never, // ai
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never, // aiChatPageSnapshotRepo
+      {} as never, // aiSettings
+      {} as never, // tools
+      {} as never, // mcpClients
+      {} as never, // aiAgentRoleRepo
+      {} as never, // pageRepo
+      {} as never, // pageAccess
+      { isAiChatDeferredToolsEnabled: () => false } as never, // environment
+    );
+    const begin = jest.fn(beginImpl);
+    return { svc, begin, aiChatRepo, aiChatMessageRepo };
+  }
+
+  const baseArgs = (begin: jest.Mock) => ({
+    user: { id: 'user-1' } as never,
+    workspace: { id: 'ws-1' } as never,
+    sessionId: 'sess-1',
+    body: { chatId: 'chat-1', messages: [] } as never,
+    res: { raw: {} } as never,
+    signal: new AbortController().signal,
+    model: {} as never,
+    role: null,
+    runHooks: {
+      begin,
+      onAssistantSeeded: jest.fn(),
+      onStep: jest.fn(),
+      onSettled: jest.fn(),
+    } as never,
+  });
+
+  it('rejects the racer with a 409 ConflictException BEFORE any AI call, and never persists an untracked turn', async () => {
+    // begin loses the unique-index race -> RunAlreadyActiveError.
+    const { svc, begin, aiChatMessageRepo } = makeService(() => {
+      throw new RunAlreadyActiveError('chat-1');
+    });
+
+    const promise = svc.stream(baseArgs(begin));
+
+    await expect(promise).rejects.toBeInstanceOf(ConflictException);
+    await promise.catch((err: ConflictException) => {
+      expect(err.getStatus()).toBe(409);
+      expect((err.getResponse() as { code?: string }).code).toBe(
+        'A_RUN_ALREADY_ACTIVE',
+      );
+    });
+
+    // The decisive assertions: the rejected racer spent NO tokens and left NO
+    // untracked turn behind.
+    expect(begin).toHaveBeenCalledTimes(1);
+    expect(streamTextMock).not.toHaveBeenCalled();
+    expect(generateTextMock).not.toHaveBeenCalled();
+    expect(aiChatMessageRepo.insert).not.toHaveBeenCalled();
+  });
+});
+
+/**
+ * F3 — the LOAD-BEARING run-detach wiring: `effectiveSignal = handle.signal`
+ * after runHooks.begin, then `abortSignal: effectiveSignal` passed to streamText.
+ * That single line is what makes a run survive a browser disconnect (the agent
+ * loop's abort is governed by the RUN's signal, not the socket): a regression to
+ * the socket-bound signal would still pass every other test green while silently
+ * breaking Stop + durability. These two tests pin the exact signal streamText
+ * consumes on both paths.
+ */
+describe('AiChatService.stream — abortSignal wiring (#184 F3)', () => {
+  const streamTextMock = streamText as unknown as jest.Mock;
+
+  // A streamText result stub: the post-call drain + pipe are no-ops here; we only
+  // care WHICH abortSignal streamText was handed.
+  function makeStreamResult() {
+    return {
+      consumeStream: jest.fn(),
+      pipeUIMessageStreamToResponse: jest.fn(),
+    };
+  }
+
+  // A raw-response stub sufficient for the post-streamText wiring
+  // (stripStreamingHopByHopHeaders binds writeHead; startSseHeartbeat registers
+  // close/finish listeners; flushHeaders is belt-and-braces).
+  function makeRes() {
+    return {
+      raw: {
+        writeHead: jest.fn(),
+        write: jest.fn(),
+        once: jest.fn(),
+        on: jest.fn(),
+        flushHeaders: jest.fn(),
+        writableEnded: false,
+        destroyed: false,
+      },
+    };
+  }
+
+  // Wire only the deps reached on the way to streamText: resolve the existing
+  // chat, persist the user + seed the assistant row, load (empty) history, the
+  // admin settings, an empty external toolset + Docmost toolset.
+  function makeService() {
+    const aiChatRepo = {
+      findById: jest.fn(async () => ({ id: 'chat-1', workspaceId: 'ws-1' })),
+      insert: jest.fn(),
+    };
+    const aiChatMessageRepo = {
+      insert: jest.fn(async () => ({ id: 'msg-1' })),
+      findAllByChat: jest.fn(async () => []),
+      update: jest.fn(async () => ({ id: 'msg-1' })),
+    };
+    const aiSettings = { resolve: jest.fn(async () => ({})) };
+    const tools = { forUser: jest.fn(async () => ({})) };
+    const mcpClients = {
+      toolsFor: jest.fn(async () => ({
+        tools: {},
+        clients: [],
+        outcomes: [],
+        instructions: [],
+      })),
+    };
+    const svc = new AiChatService(
+      {} as never, // ai
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never, // aiChatPageSnapshotRepo
+      aiSettings as never,
+      tools as never,
+      mcpClients as never,
+      {} as never, // aiAgentRoleRepo
+      {} as never, // pageRepo (openPage undefined -> never touched)
+      {} as never, // pageAccess
+      { isAiChatDeferredToolsEnabled: () => false } as never, // environment
+    );
+    return { svc };
+  }
+
+  const body = {
+    chatId: 'chat-1',
+    messages: [
+      { id: 'm1', role: 'user', parts: [{ type: 'text', text: 'hi' }] },
+    ],
+  };
+
+  beforeEach(() => {
+    streamTextMock.mockReset();
+    streamTextMock.mockImplementation(() => makeStreamResult());
+    jest
+      .spyOn(Logger.prototype, 'log')
+      .mockImplementation(() => undefined as never);
+  });
+
+  afterEach(() => jest.restoreAllMocks());
+
+  it('happy path (run-wrapped): streamText is driven with abortSignal === handle.signal (the RUN signal, NOT the socket)', async () => {
+    const { svc } = makeService();
+    const runController = new AbortController();
+    const runSignal = runController.signal;
+    const socketSignal = new AbortController().signal;
+
+    const begin = jest.fn(async () => ({ runId: 'run-1', signal: runSignal }));
+    await svc.stream({
+      user: { id: 'user-1' } as never,
+      workspace: { id: 'ws-1' } as never,
+      sessionId: 'sess-1',
+      body: body as never,
+      res: makeRes() as never,
+      signal: socketSignal,
+      model: {} as never,
+      role: null,
+      runHooks: {
+        begin,
+        onAssistantSeeded: jest.fn(),
+        onStep: jest.fn(),
+        onSettled: jest.fn(),
+      } as never,
+    });
+
+    expect(begin).toHaveBeenCalledTimes(1);
+    expect(streamTextMock).toHaveBeenCalledTimes(1);
+    // THE assertion: the agent loop's abort is wired to the RUN, so a browser
+    // disconnect (which aborts only `socketSignal`) cannot end the turn.
+    expect(streamTextMock.mock.calls[0][0].abortSignal).toBe(runSignal);
+    expect(streamTextMock.mock.calls[0][0].abortSignal).not.toBe(socketSignal);
+  });
+
+  it('legacy path (no runHooks): streamText is driven with the SOCKET signal', async () => {
+    const { svc } = makeService();
+    const socketSignal = new AbortController().signal;
+
+    await svc.stream({
+      user: { id: 'user-1' } as never,
+      workspace: { id: 'ws-1' } as never,
+      sessionId: 'sess-1',
+      body: body as never,
+      res: makeRes() as never,
+      signal: socketSignal,
+      model: {} as never,
+      role: null,
+      // No runHooks -> the turn stays socket-bound (flag off / default).
+    });
+
+    expect(streamTextMock).toHaveBeenCalledTimes(1);
+    expect(streamTextMock.mock.calls[0][0].abortSignal).toBe(socketSignal);
+  });
+
+  /**
+   * F9 — streamText's TERMINAL callbacks carry the #184 run lifecycle:
+   *   onStepFinish -> runHooks.onStep(runId, stepCount)
+   *   onFinish     -> runHooks.onSettled(runId, 'completed')   (dominant path)
+   *   onAbort      -> runHooks.onSettled(runId, 'aborted')
+   *   onError      -> runHooks.onSettled(runId, 'error', cause)
+   * makeStreamResult() ignores the streamText options, so these callbacks never
+   * fire on their own — a regression in this wiring (esp. the success path) would
+   * strand the run with NO test catching it. Here we CAPTURE the options streamText
+   * was handed and invoke each callback with the real wiring, asserting the run
+   * hooks fire with the right args.
+   */
+  // Drive stream() to the point streamText is called, capturing the options object
+  // (which carries onStepFinish/onFinish/onError/onAbort) and the run hooks.
+  async function captureStreamCallbacks() {
+    const { svc } = makeService();
+    let capturedOpts: any;
+    streamTextMock.mockImplementation((opts: any) => {
+      capturedOpts = opts;
+      return makeStreamResult();
+    });
+    const runHooks = {
+      begin: jest.fn(async () => ({
+        runId: 'run-1',
+        signal: new AbortController().signal,
+      })),
+      onAssistantSeeded: jest.fn(),
+      onStep: jest.fn(),
+      onSettled: jest.fn(),
+    };
+    await svc.stream({
+      user: { id: 'user-1' } as never,
+      workspace: { id: 'ws-1' } as never,
+      sessionId: 'sess-1',
+      body: body as never,
+      res: makeRes() as never,
+      signal: new AbortController().signal,
+      model: {} as never,
+      role: null,
+      runHooks: runHooks as never,
+    });
+    expect(capturedOpts).toBeDefined();
+    return { capturedOpts, runHooks };
+  }
+
+  it('F9: onStepFinish bumps the run step count, onFinish settles the run "completed" (the dominant autonomous-run path)', async () => {
+    const { capturedOpts, runHooks } = await captureStreamCallbacks();
+
+    // A finished step -> onStep(runId, finishedStepCount).
+    capturedOpts.onStepFinish({ text: 'step one', toolCalls: [], content: [] });
+    expect(runHooks.onStep).toHaveBeenCalledWith('run-1', 1);
+    capturedOpts.onStepFinish({ text: 'step two', toolCalls: [], content: [] });
+    expect(runHooks.onStep).toHaveBeenLastCalledWith('run-1', 2);
+
+    // The success terminal callback settles the run.
+    await capturedOpts.onFinish({
+      text: 'done',
+      finishReason: 'stop',
+      totalUsage: {},
+      usage: {},
+      steps: [],
+    });
+    expect(runHooks.onSettled).toHaveBeenCalledWith('run-1', 'completed');
+  });
+
+  it('F9: onAbort settles the run "aborted"', async () => {
+    jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined as never);
+    const { capturedOpts, runHooks } = await captureStreamCallbacks();
+
+    await capturedOpts.onAbort({ steps: [] });
+    expect(runHooks.onSettled).toHaveBeenCalledWith('run-1', 'aborted');
+  });
+
+  it('F9: onError settles the run "error" carrying the provider cause', async () => {
+    jest
+      .spyOn(Logger.prototype, 'error')
+      .mockImplementation(() => undefined as never);
+    jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined as never);
+    const { capturedOpts, runHooks } = await captureStreamCallbacks();
+
+    await capturedOpts.onError({ error: new Error('provider exploded') });
+    expect(runHooks.onSettled).toHaveBeenCalledWith(
+      'run-1',
+      'error',
+      expect.stringContaining('provider exploded'),
+    );
+  });
+});
+
+/**
+ * F14 — the begin-failure RESILIENCE branch (the `else` of the run-race guard).
+ *
+ * stream() wraps runHooks.begin in try/catch with TWO branches:
+ *   - RunAlreadyActiveError  -> 409 ConflictException (pinned above).
+ *   - ANY OTHER begin failure -> SWALLOW + continue UNTRACKED on the socket signal
+ *     (legacy fallback): it logs "...streaming without run tracking", leaves
+ *     `effectiveSignal = signal` (runId undefined) and serves the turn anyway.
+ *
+ * The contract: a transient beginRun failure (e.g. a non-unique DB error inserting
+ * the run row) must STILL serve the user's turn — it must NOT re-throw and must NOT
+ * be misclassified as a 409. A regression that re-threw here would break EVERY turn
+ * on a begin failure with nothing to catch it. This branch is otherwise undriven by
+ * any spec, so it is pinned here SEPARATELY from the 409 path: a plain begin error
+ * proceeds to streamText with the SOCKET signal and still persists the user turn.
+ */
+describe('AiChatService.stream — begin-failure resilience / legacy fallback (#184 F14)', () => {
+  const streamTextMock = streamText as unknown as jest.Mock;
+
+  function makeStreamResult() {
+    return {
+      consumeStream: jest.fn(),
+      pipeUIMessageStreamToResponse: jest.fn(),
+    };
+  }
+
+  function makeRes() {
+    return {
+      raw: {
+        writeHead: jest.fn(),
+        write: jest.fn(),
+        once: jest.fn(),
+        on: jest.fn(),
+        flushHeaders: jest.fn(),
+        writableEnded: false,
+        destroyed: false,
+      },
+    };
+  }
+
+  // Same harness as the F3 abortSignal block, but it also exposes
+  // aiChatMessageRepo so we can assert the user turn IS persisted (the turn really
+  // streamed) despite begin() blowing up.
+  function makeService() {
+    const aiChatRepo = {
+      findById: jest.fn(async () => ({ id: 'chat-1', workspaceId: 'ws-1' })),
+      insert: jest.fn(),
+    };
+    const aiChatMessageRepo = {
+      insert: jest.fn(async () => ({ id: 'msg-1' })),
+      findAllByChat: jest.fn(async () => []),
+      update: jest.fn(async () => ({ id: 'msg-1' })),
+    };
+    const aiSettings = { resolve: jest.fn(async () => ({})) };
+    const tools = { forUser: jest.fn(async () => ({})) };
+    const mcpClients = {
+      toolsFor: jest.fn(async () => ({
+        tools: {},
+        clients: [],
+        outcomes: [],
+        instructions: [],
+      })),
+    };
+    const svc = new AiChatService(
+      {} as never, // ai
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never, // aiChatPageSnapshotRepo
+      aiSettings as never,
+      tools as never,
+      mcpClients as never,
+      {} as never, // aiAgentRoleRepo
+      {} as never, // pageRepo
+      {} as never, // pageAccess
+      { isAiChatDeferredToolsEnabled: () => false } as never, // environment
+    );
+    return { svc, aiChatMessageRepo };
+  }
+
+  const body = {
+    chatId: 'chat-1',
+    messages: [
+      { id: 'm1', role: 'user', parts: [{ type: 'text', text: 'hi' }] },
+    ],
+  };
+
+  beforeEach(() => {
+    streamTextMock.mockReset();
+    streamTextMock.mockImplementation(() => makeStreamResult());
+    jest
+      .spyOn(Logger.prototype, 'log')
+      .mockImplementation(() => undefined as never);
+  });
+
+  afterEach(() => jest.restoreAllMocks());
+
+  it('a PLAIN begin() failure (NOT RunAlreadyActiveError) does NOT 409 — it swallows, logs, and streams the turn UNTRACKED on the socket signal', async () => {
+    const errorSpy = jest
+      .spyOn(Logger.prototype, 'error')
+      .mockImplementation(() => undefined as never);
+
+    const { svc, aiChatMessageRepo } = makeService();
+    const socketSignal = new AbortController().signal;
+
+    // A transient, NON-race begin failure (e.g. a non-unique DB error inserting
+    // the run row). This is the `else` branch of the begin try/catch.
+    const begin = jest.fn(async () => {
+      throw new Error('insert failed');
+    });
+
+    const promise = svc.stream({
+      user: { id: 'user-1' } as never,
+      workspace: { id: 'ws-1' } as never,
+      sessionId: 'sess-1',
+      body: body as never,
+      res: makeRes() as never,
+      signal: socketSignal,
+      model: {} as never,
+      role: null,
+      runHooks: {
+        begin,
+        onAssistantSeeded: jest.fn(),
+        onStep: jest.fn(),
+        onSettled: jest.fn(),
+      } as never,
+    });
+
+    // The turn proceeds: NO throw at all (in particular NOT a 409).
+    await expect(promise).resolves.toBeUndefined();
+
+    expect(begin).toHaveBeenCalledTimes(1);
+
+    // The resilience branch logged the legacy-fallback warning.
+    expect(errorSpy).toHaveBeenCalledWith(
+      expect.stringContaining('streaming without run tracking'),
+      expect.anything(),
+    );
+
+    // The turn really streamed: the user message was persisted and streamText ran.
+    expect(aiChatMessageRepo.insert).toHaveBeenCalled();
+    expect(streamTextMock).toHaveBeenCalledTimes(1);
+
+    // The decisive wiring: with no run handle, the fallback uses the SOCKET signal
+    // (effectiveSignal = signal, runId undefined) — not a run-bound signal.
+    expect(streamTextMock.mock.calls[0][0].abortSignal).toBe(socketSignal);
+  });
+});
@@ -217,23 +217,78 @@ describe('rowToUiMessage', () => {
 * a text-only synthesis answer (toolChoice 'none') with the FINAL_STEP_INSTRUCTION
 * appended onto — not replacing — the original system prompt.
 */
+// Narrowing helpers for the prepareAgentStep union return type.
+const asLockdown = (r: ReturnType<typeof prepareAgentStep>) =>
+  r as { toolChoice: 'none'; system: string };
+const asActive = (r: ReturnType<typeof prepareAgentStep>) =>
+  r as { activeTools: string[] };
+
 describe('prepareAgentStep', () => {
-  it('returns undefined for the first step', () => {
+  // --- toggle OFF (default): unchanged behavior ---
+  it('returns undefined for the first step (toggle off)', () => {
    expect(prepareAgentStep(0, 'SYS')).toBeUndefined();
  });

-  it('returns undefined for a non-final step (just before the last)', () => {
+  it('returns undefined for a non-final step (toggle off)', () => {
    expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
  });

-  it('forces a text-only synthesis on the final allowed step', () => {
-    const result = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
+  it('forces a text-only synthesis on the final allowed step (toggle off)', () => {
+    const result = asLockdown(prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS'));
    expect(result).toBeDefined();
-    expect(result?.toolChoice).toBe('none');
+    expect(result.toolChoice).toBe('none');
    // The original persona is preserved (prefix), not replaced.
-    expect(result?.system.startsWith('SYS')).toBe(true);
+    expect(result.system.startsWith('SYS')).toBe(true);
    // The synthesis instruction is appended.
-    expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
+    expect(result.system).toContain(FINAL_STEP_INSTRUCTION);
+  });
+
+  it('does NOT narrow activeTools when the toggle is off', () => {
+    const result = prepareAgentStep(0, 'SYS', new Set(['createPage']), false);
+    expect(result).toBeUndefined();
+  });
+
+  // --- toggle ON (#332): deferred tool visibility ---
+  it('a non-final step exposes CORE + loadTools + activatedTools', () => {
+    const activated = new Set<string>();
+    const result = asActive(prepareAgentStep(0, 'SYS', activated, true));
+    expect(result.activeTools).toContain('searchPages'); // core
+    expect(result.activeTools).toContain('searchInPage'); // #330, core
+    expect(result.activeTools).toContain('editPageText'); // core
+    expect(result.activeTools).toContain('loadTools'); // meta-tool
+    // No deferred tool is active before it is loaded.
+    expect(result.activeTools).not.toContain('createPage');
+    expect(result.activeTools).not.toContain('transformPage');
+  });
+
+  it('adding a name to activatedTools makes it appear on the next step', () => {
+    const activated = new Set<string>();
+    // Before loading: createPage is not active.
+    expect(
+      asActive(prepareAgentStep(1, 'SYS', activated, true)).activeTools,
+    ).not.toContain('createPage');
+    // loadTools grows the SAME set…
+    activated.add('createPage');
+    // …so the next step sees it.
+    const next = asActive(prepareAgentStep(2, 'SYS', activated, true));
+    expect(next.activeTools).toContain('createPage');
+    expect(next.activeTools).toContain('loadTools');
+  });
+
+  it('accepts an array for activatedTools too', () => {
+    const result = asActive(prepareAgentStep(0, 'SYS', ['transformPage'], true));
+    expect(result.activeTools).toContain('transformPage');
+    expect(result.activeTools).toContain('loadTools');
+  });
+
+  it('final-step lockdown WINS even when the toggle is on', () => {
+    const result = asLockdown(
+      prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS', new Set(['createPage']), true),
+    );
+    // The lockdown shape (toolChoice none + synthesis) — not the activeTools shape.
+    expect(result.toolChoice).toBe('none');
+    expect(result.system).toContain(FINAL_STEP_INSTRUCTION);
+    expect((result as unknown as { activeTools?: string[] }).activeTools).toBeUndefined();
  });
 });

@@ -398,6 +453,12 @@ describe('chatStreamMetadata', () => {
    });
  });

+  it('attaches the runId on the start part when a run wraps the turn (#184)', () => {
+    expect(
+      chatStreamMetadata({ type: 'start' }, 'chat-1', undefined, 'run-1'),
+    ).toEqual({ chatId: 'chat-1', runId: 'run-1' });
+  });
+
  it('returns the CUMULATIVE step usage passed in for the finish-step part', () => {
    // finish-step usage is per-step in v6; the caller accumulates and passes the
    // running sum, which this just wraps.
@@ -43,6 +43,30 @@ export class BoundChatDto {
  pageId: string;
 }

+/**
+ * Reconnect to the latest run of a chat (#184): fetch its persisted lifecycle
+ * state (and the assistant message it projects) for an in-flight or finished run.
+ */
+export class GetRunDto {
+  @IsString()
+  chatId: string;
+}
+
+/**
+ * Explicitly STOP an agent run (#184): the user pressed Stop — distinct from a
+ * browser disconnect, which never stops a run. Either the run id (preferred, from
+ * the streamed start metadata) or the chat id (stop whatever run is active on it).
+ */
+export class StopRunDto {
+  @IsOptional()
+  @IsString()
+  runId?: string;
+
+  @IsOptional()
+  @IsString()
+  chatId?: string;
+}
+
 /** Export a chat to Markdown (#183). `lang` localizes the few fixed
 *  role/tool-action labels; defaults to English server-side. */
 export class ExportChatDto {
@@ -17,6 +17,10 @@ import { resolveCurrentPageResult } from './current-page.util';
 import { parseNodeArg } from './parse-node-arg';
 import { modelFriendlyInput } from './model-friendly-input';
 import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
+import {
+  buildInAppDeferredCatalog,
+  type ToolCatalogEntry,
+} from './tool-tiers';

 /**
 * Per-user, per-request adapter that exposes Docmost READ operations to the
@@ -123,6 +127,18 @@ export class AiChatToolsService {
    return client.exportPageMarkdown(pageId);
  }

+  /**
+   * Build the IN-APP deferred <tool_catalog> entries (#332): one "name — purpose"
+   * line per DEFERRED tool, merging the per-layer INLINE_TOOL_TIERS with the
+   * shared registry's own catalogLine. Loads @docmost/mcp for the shared specs
+   * (memoized). Core tools are always active and are NOT listed here. External
+   * MCP tools are catalogued separately by the caller (they are runtime-scoped).
+   */
+  async getInAppDeferredCatalog(): Promise<ToolCatalogEntry[]> {
+    const { sharedToolSpecs } = await loadDocmostMcp();
+    return buildInAppDeferredCatalog(sharedToolSpecs);
+  }
+
  async forUser(
    user: User,
    sessionId: string,
@@ -303,7 +319,9 @@ export class AiChatToolsService {
      getPage: tool({
        description:
          'Fetch a single page as Markdown by its page id. Returns the page ' +
-          'title and its Markdown content.',
+          'title and its Markdown content. Inline <span data-comment-id> tags ' +
+          'in the markdown are comment highlight anchors (also present for ' +
+          'RESOLVED threads) — treat them as markup, not page text.',
        inputSchema: modelFriendlyInput({
          pageId: z.string().describe('The id (or slugId) of the page.'),
        }),
@@ -628,6 +646,16 @@ export class AiChatToolsService {
        async ({ pageId, nodeId }) => await client.getNode(pageId, nodeId),
      ),

+      searchInPage: sharedTool(
+        sharedToolSpecs.searchInPage,
+        async ({ pageId, query, regex, caseSensitive, limit }) =>
+          await client.searchInPage(pageId, query, {
+            regex,
+            caseSensitive,
+            limit,
+          }),
+      ),
+
      getTable: tool({
        description:
          'Read a table as a matrix of cell texts (plus a parallel cellIds ' +
@@ -647,11 +675,21 @@ export class AiChatToolsService {

      listComments: tool({
        description:
-          'List all comments on a page (content as Markdown).',
+          'List comments on a page in one call. By DEFAULT only ACTIVE ' +
+          'threads are returned; resolved threads (a resolved top-level ' +
+          'comment and all its replies) are hidden and their count reported ' +
+          'as `resolvedThreadsHidden` so you can re-query with ' +
+          '`includeResolved: true` to see everything. Returns ' +
+          '`{ items, resolvedThreadsHidden }`. Content is returned as Markdown.',
        inputSchema: modelFriendlyInput({
          pageId: z.string().describe('The id of the page.'),
+          includeResolved: z
+            .boolean()
+            .optional()
+            .describe('default only active threads; true — include resolved'),
        }),
-        execute: async ({ pageId }) => await client.listComments(pageId),
+        execute: async ({ pageId, includeResolved }) =>
+          await client.listComments(pageId, includeResolved),
      }),

      getComment: tool({
@@ -55,8 +55,18 @@ export interface DocmostClientLike {
  getOutline(pageId: string): Promise<Record<string, unknown>>;
  getPageJson(pageId: string): Promise<Record<string, unknown>>;
  getNode(pageId: string, nodeId: string): Promise<Record<string, unknown>>;
+  searchInPage(
+    pageId: string,
+    query: string,
+    opts?: { regex?: boolean; caseSensitive?: boolean; limit?: number },
+  ): Promise<Record<string, unknown>>;
  getTable(pageId: string, tableRef: string): Promise<Record<string, unknown>>;
-  listComments(pageId: string): Promise<unknown[]>;
+  // Returns `{ items, resolvedThreadsHidden }`. DEFAULT (includeResolved unset/
+  // false) hides resolved threads wholesale; pass true for the full feed.
+  listComments(
+    pageId: string,
+    includeResolved?: boolean,
+  ): Promise<{ items: unknown[]; resolvedThreadsHidden: number }>;
  getComment(
    commentId: string,
  ): Promise<{ data: Record<string, unknown>; success: boolean }>;
@@ -231,6 +241,11 @@ export interface SharedToolSpec {
  mcpName: string;
  inAppKey: string;
  description: string;
+  // Deferred-tool metadata (#332). Optional in this mirror so an older/stale
+  // @docmost/mcp build (pre-#332) still type-checks; the in-app catalog builder
+  // reads them defensively. The external /mcp server ignores both fields.
+  tier?: 'core' | 'deferred';
+  catalogLine?: string;
  // Loose `z` on purpose: the registry is zod-agnostic so the server can pass
  // its own zod (v4) and the MCP package its own (v3) into the same builder.
  buildShape?: (z: any) => Record<string, unknown>;
@@ -0,0 +1,244 @@
+import {
+  CORE_TOOL_KEYS,
+  CORE_TOOL_SET,
+  LOAD_TOOLS_NAME,
+  LOAD_TOOLS_DESCRIPTION,
+  INLINE_TOOL_TIERS,
+  buildInAppDeferredCatalog,
+  buildExternalToolCatalog,
+  shortenForCatalog,
+  applyLoadTools,
+} from './tool-tiers';
+// The real shared registry, imported from source (same approach as the
+// SHARED_TOOL_SPECS contract spec) so the tier metadata is checked against
+// exactly what @docmost/mcp ships.
+import { SHARED_TOOL_SPECS } from '../../../../../../packages/mcp/src/tool-specs';
+// For the live-toolset partition test (F3): the REAL adapter, so the catalog is
+// checked against the tools AiChatToolsService.forUser() actually builds — not a
+// static list that could drift from it.
+import { AiChatToolsService } from './ai-chat-tools.service';
+import * as loader from './docmost-client.loader';
+import type { DocmostClientLike } from './docmost-client.loader';
+
+/**
+ * #332 deferred tool loading — tier metadata, catalog assembly, and the
+ * loadTools meta-tool. Pure units; no Nest graph, no @docmost/mcp build (the
+ * registry is imported from TS source).
+ */
+
+describe('tool tier metadata (#332)', () => {
+  it('core set is the documented 13 + searchInPage (14)', () => {
+    expect(CORE_TOOL_KEYS).toHaveLength(14);
+    expect(CORE_TOOL_SET.has('searchInPage')).toBe(true); // #330, promoted to core
+    // loadTools is a meta-tool, not a normal core key.
+    expect(CORE_TOOL_SET.has(LOAD_TOOLS_NAME)).toBe(false);
+  });
+
+  it('SHARED_TOOL_SPECS tier agrees with CORE_TOOL_SET for every shared tool', () => {
+    for (const [key, spec] of Object.entries(SHARED_TOOL_SPECS)) {
+      const isCoreByTier = spec.tier === 'core';
+      const isCoreByList = CORE_TOOL_SET.has(key);
+      expect(isCoreByTier).toBe(isCoreByList);
+      // Every spec carries a non-empty catalogLine (core tools too).
+      expect(typeof spec.catalogLine).toBe('string');
+      expect(spec.catalogLine.trim().length).toBeGreaterThan(0);
+    }
+  });
+
+  it('every INLINE tool tier agrees with CORE_TOOL_SET and has a catalogLine', () => {
+    for (const [key, meta] of Object.entries(INLINE_TOOL_TIERS)) {
+      expect(meta.tier === 'core').toBe(CORE_TOOL_SET.has(key));
+      expect(meta.catalogLine.trim().length).toBeGreaterThan(0);
+    }
+  });
+});
+
+describe('buildInAppDeferredCatalog (#332)', () => {
+  const catalog = buildInAppDeferredCatalog(SHARED_TOOL_SPECS as never);
+  const names = catalog.map((e) => e.name);
+
+  it('includes deferred tools from BOTH the inline map and the shared registry', () => {
+    expect(names).toContain('transformPage'); // inline deferred
+    expect(names).toContain('getPageJson'); // shared deferred
+    expect(names).toContain('patchNode'); // shared deferred
+    expect(names).toContain('createPage'); // inline deferred
+  });
+
+  it('NEVER lists a core tool', () => {
+    for (const core of CORE_TOOL_KEYS) {
+      expect(names).not.toContain(core);
+    }
+    // spot-check a couple that are core in each source.
+    expect(names).not.toContain('searchInPage'); // shared core
+    expect(names).not.toContain('searchPages'); // inline core
+    expect(names).not.toContain('editPageText'); // shared core
+  });
+
+  it('renders every entry as a "name — purpose" line', () => {
+    // Non-empty catalog (the length is pinned structurally by the live-toolset
+    // partition test below, not by a magic constant that rots on every new tool).
+    expect(catalog.length).toBeGreaterThan(0);
+    for (const entry of catalog) {
+      expect(entry.catalogLine).toMatch(/ — /);
+    }
+  });
+});
+
+/**
+ * F3 — the deferred <tool_catalog> is built from STATIC metadata (INLINE_TOOL_TIERS
+ * + SHARED_TOOL_SPECS), but the loadable-by-name set is derived at RUNTIME from the
+ * actual toolset (`Object.keys(baseTools)` in ai-chat.service.ts). Those two must
+ * agree or a tool becomes loadable-but-invisible (agent thinks it doesn't exist) or
+ * catalogued-but-phantom. INLINE_TOOL_TIERS is a plain hand-maintained Record with
+ * no compile-time link to the tools AiChatToolsService.forUser() builds, so nothing
+ * else catches that drift. This test uses forUser()'s LIVE keys as the source of
+ * truth (mirroring ai-chat-tools.service.spec.ts's loader mock) and asserts a
+ * two-way partition against buildInAppDeferredCatalog — replacing the old magic
+ * toHaveLength(28), so a tool added to forUser() without a catalog line (or a
+ * catalog line without a real tool) fails the suite instead of silently vanishing.
+ */
+describe('deferred catalog ↔ live forUser() toolset partition (#332, F3)', () => {
+  let toolKeys: string[];
+  const catalogNames = buildInAppDeferredCatalog(SHARED_TOOL_SPECS as never).map(
+    (e) => e.name,
+  );
+
+  beforeAll(async () => {
+    // Intercept the ESM loader so forUser() builds against the TS-source shared
+    // specs (no @docmost/mcp build) and never touches the network.
+    jest.spyOn(loader, 'loadDocmostMcp').mockResolvedValue({
+      DocmostClient: function () {
+        return {} as DocmostClientLike;
+      } as unknown as loader.DocmostClientCtor,
+      sharedToolSpecs: SHARED_TOOL_SPECS as Record<string, loader.SharedToolSpec>,
+    });
+    const service = new AiChatToolsService(
+      {
+        generateAccessToken: jest.fn().mockResolvedValue('access-token'),
+        generateCollabToken: jest.fn().mockResolvedValue('collab-token'),
+      } as never,
+      {} as never, // aiService — not exercised while merely BUILDING the tools
+      {} as never, // pageEmbeddingRepo
+      {} as never, // spaceMemberRepo
+      {} as never, // pagePermissionRepo
+      // sandboxStore: forUser() eagerly calls asSink() to wire the stash tool.
+      {
+        asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
+      } as never,
+    );
+    const tools = await service.forUser(
+      { id: 'user-1', email: 'u@example.com', workspaceId: 'ws-1' } as never,
+      'session-1',
+      'ws-1',
+      'chat-1',
+    );
+    toolKeys = Object.keys(tools);
+  });
+
+  afterAll(() => {
+    jest.restoreAllMocks();
+  });
+
+  it('exposes a non-trivial toolset (sanity: the mock actually built tools)', () => {
+    expect(toolKeys.length).toBeGreaterThan(20);
+  });
+
+  it('every non-core live tool is present in the catalog (no capability silently hidden)', () => {
+    // forUser() does not itself add loadTools (ai-chat.service does), but guard
+    // anyway. Every remaining non-core key MUST have a catalog line.
+    const catalogSet = new Set(catalogNames);
+    const missing = toolKeys.filter(
+      (k) => !CORE_TOOL_SET.has(k) && k !== LOAD_TOOLS_NAME && !catalogSet.has(k),
+    );
+    expect(missing).toEqual([]);
+  });
+
+  it('every catalog entry corresponds to a real, non-core live tool (no phantom)', () => {
+    const liveSet = new Set(toolKeys);
+    const phantom = catalogNames.filter(
+      (n) => !liveSet.has(n) || CORE_TOOL_SET.has(n),
+    );
+    expect(phantom).toEqual([]);
+  });
+});
+
+describe('buildExternalToolCatalog + shortenForCatalog (#332)', () => {
+  it('derives a short "name — purpose" line from each external tool description', () => {
+    const catalog = buildExternalToolCatalog({
+      tavily_search: { description: 'Search the web for fresh results. More detail here.' },
+      tavily_extract: { description: '' },
+    });
+    expect(catalog).toEqual([
+      { name: 'tavily_search', catalogLine: 'tavily_search — Search the web for fresh results.' },
+      { name: 'tavily_extract', catalogLine: 'tavily_extract — external tool' },
+    ]);
+  });
+
+  it('caps a very long description', () => {
+    const long = 'x'.repeat(500);
+    expect(shortenForCatalog(long).length).toBeLessThanOrEqual(140);
+    expect(shortenForCatalog(long).endsWith('…')).toBe(true);
+  });
+});
+
+describe('applyLoadTools (#332)', () => {
+  const valid = new Set(['createPage', 'transformPage', 'tavily_search']);
+
+  it('adds valid names to the activated set and returns { loaded }', () => {
+    const activated = new Set<string>();
+    const result = applyLoadTools(['createPage', 'tavily_search'], activated, valid);
+    expect(result).toEqual({ loaded: ['createPage', 'tavily_search'] });
+    expect(activated.has('createPage')).toBe(true);
+    expect(activated.has('tavily_search')).toBe(true);
+  });
+
+  it('rejects an unknown name with an error listing the valid deferred names', () => {
+    const activated = new Set<string>();
+    expect(() => applyLoadTools(['nope'], activated, valid)).toThrow(/unknown tool name/i);
+    try {
+      applyLoadTools(['nope'], activated, valid);
+    } catch (e) {
+      const msg = (e as Error).message;
+      // Lists every valid name (sorted).
+      expect(msg).toContain('createPage');
+      expect(msg).toContain('transformPage');
+      expect(msg).toContain('tavily_search');
+    }
+    // Nothing is activated on a rejected call.
+    expect(activated.size).toBe(0);
+  });
+
+  it('tolerates a non-array / empty input (loads nothing)', () => {
+    const activated = new Set<string>();
+    expect(applyLoadTools(undefined, activated, valid)).toEqual({ loaded: [] });
+    expect(applyLoadTools([], activated, valid)).toEqual({ loaded: [] });
+    expect(activated.size).toBe(0);
+  });
+
+  it('loadTools description is the verbatim issue text', () => {
+    expect(LOAD_TOOLS_DESCRIPTION).toContain('only ACTIVATES them');
+    expect(LOAD_TOOLS_DESCRIPTION).toContain('callable on your NEXT step');
+  });
+});
+
+describe('editorial "Corrector" scenario is fully served by CORE (#332)', () => {
+  it('read + comment + edit + search need no loadTools', () => {
+    // A Corrector role reads a page, searches within it, edits text, and leaves
+    // inline comments — every tool it needs is core, so it never has to load a
+    // deferred tool.
+    const needed = [
+      'getCurrentPage',
+      'getPage',
+      'searchPages',
+      'searchInPage',
+      'editPageText',
+      'createComment',
+      'listComments',
+      'getComment',
+      'resolveComment',
+    ];
+    for (const t of needed) {
+      expect(CORE_TOOL_SET.has(t)).toBe(true);
+    }
+  });
+});
@@ -0,0 +1,309 @@
+import { tool, type Tool } from 'ai';
+import { z } from 'zod';
+import type { SharedToolSpec } from './docmost-client.loader';
+
+/**
+ * Deferred tool loading for the in-app AI chat (#332).
+ *
+ * The agent otherwise sends ALL ~41 tool definitions on EVERY model call every
+ * step, bloating context. Instead we split the in-app tools into two tiers:
+ *
+ *  - CORE (hot, always active): frequent OR tiny tools whose full schema is
+ *    always visible, plus the `loadTools` meta-tool. Deferring a one-line tool is
+ *    pure loss, so tiny tools stay core even if rare.
+ *  - DEFERRED (loaded on demand): the fat/rare tools + ALL external MCP tools by
+ *    default. The model sees only a compact <tool_catalog> (name — purpose) and
+ *    calls `loadTools(names)` to ACTIVATE a tool's full schema for the NEXT step
+ *    (one extra round-trip on first use).
+ *
+ * This module is the single source of truth for the IN-APP tiering:
+ *  - CORE_TOOL_KEYS / CORE_TOOL_SET — the authoritative core list (used by
+ *    prepareAgentStep to build per-step `activeTools`).
+ *  - INLINE_TOOL_TIERS — tier + catalogLine for the per-layer INLINE tools (the
+ *    ones NOT in @docmost/mcp's SHARED_TOOL_SPECS, which carry their own).
+ *  - buildInAppDeferredCatalog / buildExternalToolCatalog — assemble the
+ *    <tool_catalog> deferred lines.
+ *  - applyLoadTools / makeLoadToolsTool — the loadTools meta-tool.
+ *
+ * The tier/catalogLine fields on SHARED_TOOL_SPECS are IN-APP metadata only; the
+ * external /mcp server ignores them and exposes every tool normally.
+ */
+
+/** A single rendered <tool_catalog> line: the tool name + its "name — purpose". */
+export interface ToolCatalogEntry {
+  /** Exact tool name the model must pass to loadTools. */
+  name: string;
+  /** Hand-written (in-app) or derived (external) "name — purpose" line. */
+  catalogLine: string;
+}
+
+/**
+ * CORE (always-active) in-app tool keys — 13 frequent/tiny tools. `searchInPage`
+ * (#330) is added to core on top of the issue's original tier list: it is
+ * frequent for the editorial roles this feature targets. `loadTools` is active
+ * too but is not a normal tool key (it is added to activeTools separately).
+ */
+export const CORE_TOOL_KEYS = [
+  'searchPages',
+  'listPages',
+  'listSpaces',
+  'getWorkspace',
+  'getCurrentPage',
+  'getPage',
+  'getOutline',
+  'getNode',
+  'createComment',
+  'getComment',
+  'listComments',
+  'resolveComment',
+  'editPageText',
+  // #330 search_in_page — frequent for editorial sweeps; core despite predating
+  // the issue's tier list.
+  'searchInPage',
+] as const;
+
+/** O(1) membership test for the core tier. */
+export const CORE_TOOL_SET: ReadonlySet<string> = new Set(CORE_TOOL_KEYS);
+
+/** The meta-tool name (always active alongside the core tools when enabled). */
+export const LOAD_TOOLS_NAME = 'loadTools';
+
+/**
+ * loadTools description — VERBATIM from issue #332. Tells the model that the
+ * catalog names EXIST, that loadTools only ACTIVATES them (callable next step),
+ * and to load several at once.
+ */
+export const LOAD_TOOLS_DESCRIPTION =
+  'loadTools — Load the full definitions of deferred tools from the <tool_catalog>\n' +
+  'block in your instructions. Pass the EXACT tool names from the catalog; this\n' +
+  'call only ACTIVATES them and returns { loaded: [...] } — the tools become\n' +
+  'callable on your NEXT step. Load several names in one call when the task clearly\n' +
+  'needs them. Unknown names are rejected with the list of valid ones.';
+
+/**
+ * Tier + catalogLine for the INLINE ai-chat tools — those defined per-layer in
+ * ai-chat-tools.service.ts and NOT present in @docmost/mcp's SHARED_TOOL_SPECS
+ * (which carries its own tier/catalogLine). Together with the shared registry
+ * this describes every in-app tool. catalogLine is present for core tools too
+ * (uniformity), but only DEFERRED tools are rendered into the catalog.
+ */
+export const INLINE_TOOL_TIERS: Record<
+  string,
+  { tier: 'core' | 'deferred'; catalogLine: string }
+> = {
+  // --- core inline ---
+  searchPages: {
+    tier: 'core',
+    catalogLine: 'searchPages — hybrid semantic + keyword search across the wiki.',
+  },
+  getCurrentPage: {
+    tier: 'core',
+    catalogLine: 'getCurrentPage — the page the user is currently viewing.',
+  },
+  getPage: {
+    tier: 'core',
+    catalogLine: 'getPage — fetch a page as Markdown by its id.',
+  },
+  listPages: {
+    tier: 'core',
+    catalogLine: "listPages — list recent pages, or a space's full page tree.",
+  },
+  listComments: {
+    tier: 'core',
+    catalogLine: 'listComments — list all comments on a page (including resolved).',
+  },
+  getComment: {
+    tier: 'core',
+    catalogLine: 'getComment — fetch a single comment by id.',
+  },
+  createComment: {
+    tier: 'core',
+    catalogLine:
+      'createComment — add an inline comment (optionally with a suggested edit).',
+  },
+  resolveComment: {
+    tier: 'core',
+    catalogLine: 'resolveComment — resolve or reopen a comment thread.',
+  },
+
+  // --- deferred inline ---
+  createPage: {
+    tier: 'deferred',
+    catalogLine: 'createPage — create a new page with a Markdown body in a space.',
+  },
+  updatePageContent: {
+    tier: 'deferred',
+    catalogLine:
+      "updatePageContent — replace a page's body (and optionally title) with new Markdown.",
+  },
+  renamePage: {
+    tier: 'deferred',
+    catalogLine: "renamePage — change a page's title only (body untouched).",
+  },
+  movePage: {
+    tier: 'deferred',
+    catalogLine: 'movePage — move a page under a new parent or to the space root.',
+  },
+  deletePage: {
+    tier: 'deferred',
+    catalogLine: 'deletePage — move a page to trash (soft delete, reversible).',
+  },
+  listSidebarPages: {
+    tier: 'deferred',
+    catalogLine:
+      "listSidebarPages — list a space's root pages or a page's direct children.",
+  },
+  getTable: {
+    tier: 'deferred',
+    catalogLine: 'getTable — read a table as a matrix of cell texts and cell ids.',
+  },
+  checkNewComments: {
+    tier: 'deferred',
+    catalogLine:
+      'checkNewComments — find comments in a space created after a timestamp.',
+  },
+  getPageHistory: {
+    tier: 'deferred',
+    catalogLine:
+      'getPageHistory — fetch one page-history version with its ProseMirror content.',
+  },
+  exportPageMarkdown: {
+    tier: 'deferred',
+    catalogLine:
+      'exportPageMarkdown — export a page to self-contained Markdown (body + comments).',
+  },
+  updatePageJson: {
+    tier: 'deferred',
+    catalogLine:
+      "updatePageJson — overwrite a page's body with a full ProseMirror document.",
+  },
+  tableInsertRow: {
+    tier: 'deferred',
+    catalogLine: 'tableInsertRow — insert a row of plain-text cells into a table.',
+  },
+  tableDeleteRow: {
+    tier: 'deferred',
+    catalogLine: 'tableDeleteRow — delete a table row at a 0-based index.',
+  },
+  tableUpdateCell: {
+    tier: 'deferred',
+    catalogLine: 'tableUpdateCell — set the text of a table cell at [row, col].',
+  },
+  sharePage: {
+    tier: 'deferred',
+    catalogLine: 'sharePage — make a page publicly accessible and return its URL.',
+  },
+  transformPage: {
+    tier: 'deferred',
+    catalogLine: "transformPage — run a sandboxed JS transform over a page's document.",
+  },
+};
+
+/**
+ * Build the <tool_catalog> deferred lines for the IN-APP tools by merging the
+ * two metadata sources: the per-layer INLINE_TOOL_TIERS and the shared registry
+ * (SHARED_TOOL_SPECS, loaded at runtime). Only DEFERRED tools are included; core
+ * tools are always active and never appear in the catalog. Pure — the caller
+ * passes the loaded specs so this stays unit-testable.
+ */
+export function buildInAppDeferredCatalog(
+  sharedToolSpecs: Record<string, SharedToolSpec>,
+): ToolCatalogEntry[] {
+  const entries: ToolCatalogEntry[] = [];
+  // Inline deferred tools (hand-written lines).
+  for (const [name, meta] of Object.entries(INLINE_TOOL_TIERS)) {
+    if (meta.tier === 'deferred') {
+      entries.push({ name, catalogLine: meta.catalogLine });
+    }
+  }
+  // Shared deferred tools (line comes from the registry's own catalogLine).
+  for (const [name, spec] of Object.entries(sharedToolSpecs)) {
+    if (spec.tier === 'deferred' && spec.catalogLine) {
+      entries.push({ name, catalogLine: spec.catalogLine });
+    }
+  }
+  return entries;
+}
+
+/**
+ * Cap an external tool's (untrusted) description into a short catalog purpose.
+ * External MCP tools have no hand-written catalogLine, so we derive one from the
+ * first sentence of the description, hard-capped. Whitespace is collapsed.
+ */
+export function shortenForCatalog(description: string, max = 140): string {
+  const flat = description.replace(/\s+/g, ' ').trim();
+  if (!flat) return 'external tool';
+  // Prefer the first sentence if it is reasonably short.
+  const firstSentence = flat.split(/(?<=[.!?])\s/)[0];
+  const base =
+    firstSentence.length > 0 && firstSentence.length <= max
+      ? firstSentence
+      : flat;
+  return base.length > max ? `${base.slice(0, max - 1).trimEnd()}…` : base;
+}
+
+/**
+ * Build catalog lines for the EXTERNAL MCP tools (all deferred by default,
+ * #332). Their names are the namespaced tool keys; the purpose is derived from
+ * each tool's own description (no hand-written line exists). Pure.
+ */
+export function buildExternalToolCatalog(
+  externalTools: Record<string, { description?: string } | undefined>,
+): ToolCatalogEntry[] {
+  return Object.entries(externalTools).map(([name, t]) => ({
+    name,
+    catalogLine: `${name} — ${shortenForCatalog(t?.description ?? '')}`,
+  }));
+}
+
+/**
+ * Pure core of the loadTools meta-tool. Validates the requested names against
+ * the per-turn set of valid deferred names, ADDS the valid ones to the caller's
+ * mutable `activatedTools` set (so they become callable next step), and returns
+ * `{ loaded }`. An unknown name throws a clear error listing the valid deferred
+ * names — surfaced to the model as a tool error so it can retry.
+ */
+export function applyLoadTools(
+  names: unknown,
+  activatedTools: Set<string>,
+  validDeferredNames: ReadonlySet<string>,
+): { loaded: string[] } {
+  const requested = Array.isArray(names)
+    ? names.filter((n): n is string => typeof n === 'string')
+    : [];
+  const unknown = requested.filter((n) => !validDeferredNames.has(n));
+  if (unknown.length > 0) {
+    const valid = [...validDeferredNames].sort().join(', ');
+    throw new Error(
+      `loadTools: unknown tool name(s): ${unknown.join(', ')}. ` +
+        `Valid deferred tools are: ${valid || '(none)'}.`,
+    );
+  }
+  for (const n of requested) activatedTools.add(n);
+  return { loaded: requested };
+}
+
+/**
+ * Build the loadTools AI-SDK tool bound to THIS turn's mutable state: the
+ * `activatedTools` set (grown by execute, read by prepareAgentStep next step)
+ * and the `validDeferredNames` set (every non-core tool in this turn's toolset,
+ * incl. external MCP). Created per streamText call — never module-global.
+ */
+export function makeLoadToolsTool(
+  activatedTools: Set<string>,
+  validDeferredNames: ReadonlySet<string>,
+): Tool {
+  return tool({
+    description: LOAD_TOOLS_DESCRIPTION,
+    inputSchema: z.object({
+      names: z
+        .array(z.string())
+        .describe(
+          'EXACT deferred tool names from the <tool_catalog> to activate for ' +
+            'your next step.',
+        ),
+    }),
+    execute: async ({ names }) =>
+      applyLoadTools(names, activatedTools, validDeferredNames),
+  });
+}
@@ -16,6 +16,7 @@ import {
  AUTH_THROTTLER,
  PAGE_TEMPLATE_THROTTLER,
  PUBLIC_SHARE_AI_THROTTLER,
+  VITALS_THROTTLER,
 } from '../../integrations/throttle/throttler-names';
 import { LoginDto } from './dto/login.dto';
 import { AuthService } from './services/auth.service';
@@ -184,16 +185,21 @@ export class AuthController {
  }

  // The global ThrottlerGuard applies ALL named throttlers to every route by
-  // default, so each non-AUTH bucket (AI chat, page template, public-share AI)
-  // is explicitly skipped here. collab-token is auth-guarded (JwtAuthGuard),
-  // per-user and client-cached, so those feature buckets are irrelevant to it;
-  // skipping them avoids spurious 429s when a user opens many pages in a short
-  // window. The AUTH bucket is skipped too for the same per-user, cached reason.
+  // default, so each non-AUTH bucket (AI chat, page template, public-share AI,
+  // client vitals) is explicitly skipped here. collab-token is auth-guarded
+  // (JwtAuthGuard), per-user and client-cached, so those feature buckets are
+  // irrelevant to it; skipping them avoids spurious 429s when a user opens many
+  // pages in a short window. The VITALS bucket must be skipped too: it is a
+  // process-wide named throttler, so without this skip its per-IP limit would
+  // silently cap collab-token (the one route that opts out of every other
+  // bucket) and break editing behind shared/NAT IPs. The AUTH bucket is skipped
+  // for the same per-user, cached reason.
  @SkipThrottle({
    [AUTH_THROTTLER]: true,
    [AI_CHAT_THROTTLER]: true,
    [PAGE_TEMPLATE_THROTTLER]: true,
    [PUBLIC_SHARE_AI_THROTTLER]: true,
+    [VITALS_THROTTLER]: true,
  })
  @UseGuards(JwtAuthGuard)
  @HttpCode(HttpStatus.OK)
@@ -1,4 +1,5 @@
 import {
+  BadRequestException,
  ForbiddenException,
  NotFoundException,
 } from '@nestjs/common';
@@ -117,3 +118,207 @@ describe('CommentController apply-suggestion authz', () => {
    expect(commentService.applySuggestion).not.toHaveBeenCalled();
  });
 });
+
+/**
+ * Authz-gate tests for the dismiss-suggestion route (#329). Dismissing a
+ * suggestion does NOT change the page text, so it authorizes with
+ * validateCanComment (NOT validateCanEdit) — a viewer allowed to comment but not
+ * edit can still dismiss. The gate MUST run BEFORE the service (which performs
+ * the delete/resolve + mark removal). These tests pin that boundary.
+ */
+describe('CommentController dismiss-suggestion authz', () => {
+  // isAdmin=false → ability.cannot(Manage, Settings) returns true (i.e. the user
+  // is NOT a space admin). Flip to true to model a space admin.
+  function makeController(isAdmin = false) {
+    const commentService = {
+      dismissSuggestion: jest.fn(async () => ({
+        id: 'c-1',
+        outcome: 'deleted',
+      })),
+    };
+    const commentRepo = { findById: jest.fn() };
+    const pageRepo = { findById: jest.fn() };
+    const spaceAbility = {
+      createForUser: jest.fn(async () => ({
+        cannot: jest.fn(() => !isAdmin),
+      })),
+    } as any;
+    const pageAccessService = {
+      validateCanComment: jest.fn(async () => undefined),
+      validateCanEdit: jest.fn(async () => undefined),
+    };
+    const wsService = {} as any;
+    const auditService = { log: jest.fn() };
+
+    const controller = new CommentController(
+      commentService as any,
+      commentRepo as any,
+      pageRepo as any,
+      spaceAbility,
+      pageAccessService as any,
+      wsService,
+      auditService as any,
+    );
+    return {
+      controller,
+      commentService,
+      commentRepo,
+      pageRepo,
+      pageAccessService,
+      spaceAbility,
+    };
+  }
+
+  const user: any = { id: 'u-1' };
+  const workspace: any = { id: 'ws-1' };
+  const provenance: any = undefined;
+  const dto: any = { commentId: 'c-1' };
+  // Owned by the acting user (u-1) unless a test overrides creatorId.
+  const comment = {
+    id: 'c-1',
+    pageId: 'p-1',
+    spaceId: 'sp-1',
+    creatorId: 'u-1',
+    suggestedText: 'new text',
+    selection: 'old text',
+  };
+  const page = { id: 'p-1', spaceId: 'sp-1', deletedAt: null };
+
+  it('authorizes with validateCanComment (NOT validateCanEdit) then calls the service', async () => {
+    const {
+      controller,
+      commentRepo,
+      pageRepo,
+      pageAccessService,
+      commentService,
+    } = makeController();
+    commentRepo.findById.mockResolvedValue(comment);
+    pageRepo.findById.mockResolvedValue(page);
+    const dismissed = { id: 'c-1', outcome: 'deleted' };
+    commentService.dismissSuggestion.mockResolvedValue(dismissed);
+
+    const result = await controller.dismissSuggestion(
+      dto,
+      user,
+      workspace,
+      provenance,
+    );
+
+    expect(pageAccessService.validateCanComment).toHaveBeenCalledWith(
+      page,
+      user,
+      workspace.id,
+    );
+    // Dismiss must NOT require edit access.
+    expect(pageAccessService.validateCanEdit).not.toHaveBeenCalled();
+    expect(commentService.dismissSuggestion).toHaveBeenCalledWith(
+      comment,
+      user,
+      provenance,
+    );
+    expect(result).toBe(dismissed);
+  });
+
+  it('validateCanComment throwing Forbidden rejects AND dismissSuggestion is never called', async () => {
+    const {
+      controller,
+      commentRepo,
+      pageRepo,
+      pageAccessService,
+      commentService,
+    } = makeController();
+    commentRepo.findById.mockResolvedValue(comment);
+    pageRepo.findById.mockResolvedValue(page);
+    pageAccessService.validateCanComment.mockRejectedValue(
+      new ForbiddenException('no comment access'),
+    );
+
+    await expect(
+      controller.dismissSuggestion(dto, user, workspace, provenance),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+
+    expect(commentService.dismissSuggestion).not.toHaveBeenCalled();
+  });
+
+  it('missing comment: NotFound without authorizing or dismissing', async () => {
+    const { controller, commentRepo, pageRepo, pageAccessService, commentService } =
+      makeController();
+    commentRepo.findById.mockResolvedValue(null);
+
+    await expect(
+      controller.dismissSuggestion(dto, user, workspace, provenance),
+    ).rejects.toBeInstanceOf(NotFoundException);
+
+    expect(pageRepo.findById).not.toHaveBeenCalled();
+    expect(pageAccessService.validateCanComment).not.toHaveBeenCalled();
+    expect(commentService.dismissSuggestion).not.toHaveBeenCalled();
+  });
+
+  it('propagates a service BadRequest (e.g. already applied/resolved) unchanged', async () => {
+    const { controller, commentRepo, pageRepo, commentService } =
+      makeController();
+    commentRepo.findById.mockResolvedValue(comment);
+    pageRepo.findById.mockResolvedValue(page);
+    commentService.dismissSuggestion.mockRejectedValue(
+      new BadRequestException('already applied'),
+    );
+
+    await expect(
+      controller.dismissSuggestion(dto, user, workspace, provenance),
+    ).rejects.toBeInstanceOf(BadRequestException);
+  });
+
+  // --- #338 owner-or-space-admin gate (mirrors POST /comments/delete) --------
+  // A childless dismiss irreversibly hard-deletes the comment, so canComment is
+  // not enough: only the comment owner or a space admin may dismiss.
+
+  it('owner dismisses their own suggestion → allowed, no admin check needed', async () => {
+    const { controller, commentRepo, pageRepo, commentService, spaceAbility } =
+      makeController(false);
+    // comment.creatorId === user.id (owner).
+    commentRepo.findById.mockResolvedValue(comment);
+    pageRepo.findById.mockResolvedValue(page);
+
+    await controller.dismissSuggestion(dto, user, workspace, provenance);
+
+    // Owner short-circuits the admin lookup.
+    expect(spaceAbility.createForUser).not.toHaveBeenCalled();
+    expect(commentService.dismissSuggestion).toHaveBeenCalledWith(
+      comment,
+      user,
+      provenance,
+    );
+  });
+
+  it('non-owner non-admin → Forbidden AND the service is never called', async () => {
+    const { controller, commentRepo, pageRepo, commentService, spaceAbility } =
+      makeController(false); // NOT a space admin
+    commentRepo.findById.mockResolvedValue({
+      ...comment,
+      creatorId: 'someone-else',
+    });
+    pageRepo.findById.mockResolvedValue(page);
+
+    await expect(
+      controller.dismissSuggestion(dto, user, workspace, provenance),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+
+    expect(spaceAbility.createForUser).toHaveBeenCalledWith(user, comment.spaceId);
+    expect(commentService.dismissSuggestion).not.toHaveBeenCalled();
+  });
+
+  it('non-owner space admin → allowed to dismiss another user’s suggestion', async () => {
+    const { controller, commentRepo, pageRepo, commentService, spaceAbility } =
+      makeController(true); // space admin
+    commentRepo.findById.mockResolvedValue({
+      ...comment,
+      creatorId: 'someone-else',
+    });
+    pageRepo.findById.mockResolvedValue(page);
+
+    await controller.dismissSuggestion(dto, user, workspace, provenance);
+
+    expect(spaceAbility.createForUser).toHaveBeenCalledWith(user, comment.spaceId);
+    expect(commentService.dismissSuggestion).toHaveBeenCalled();
+  });
+});
@@ -15,6 +15,7 @@ import { CreateCommentDto } from './dto/create-comment.dto';
 import { UpdateCommentDto } from './dto/update-comment.dto';
 import { ResolveCommentDto } from './dto/resolve-comment.dto';
 import { ApplySuggestionDto } from './dto/apply-suggestion.dto';
+import { DismissSuggestionDto } from './dto/dismiss-suggestion.dto';
 import { PageIdDto, CommentIdDto } from './dto/comments.input';
 import { AuthUser } from '../../common/decorators/auth-user.decorator';
 import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
@@ -234,6 +235,59 @@ export class CommentController {
    return this.commentService.applySuggestion(comment, user, provenance);
  }

+  @HttpCode(HttpStatus.OK)
+  @Post('dismiss-suggestion')
+  async dismissSuggestion(
+    @Body() dto: DismissSuggestionDto,
+    @AuthUser() user: User,
+    @AuthWorkspace() workspace: Workspace,
+    @AuthProvenance() provenance: AuthProvenanceData,
+  ) {
+    const comment = await this.commentRepo.findById(dto.commentId, {
+      includeCreator: true,
+      includeResolvedBy: true,
+    });
+    if (!comment) {
+      throw new NotFoundException('Comment not found');
+    }
+
+    const page = await this.pageRepo.findById(comment.pageId);
+    if (!page || page.deletedAt) {
+      throw new NotFoundException('Page not found');
+    }
+
+    // Authorize BEFORE revealing any structural detail (metadata-disclosure
+    // hygiene, mirroring apply-suggestion). Dismissing a suggestion does NOT
+    // change the page text — it only removes/resolves the comment — so the
+    // page-level gate is comment access (canComment), NOT edit access. A viewer
+    // allowed to comment but not edit can still dismiss their own suggestion.
+    // The structural 400s (top-level / has-a-suggested-edit / not applied /
+    // not resolved) are re-checked by the service below.
+    await this.pageAccessService.validateCanComment(page, user, workspace.id);
+
+    // AUTHZ (#338): a childless dismiss IRREVERSIBLY hard-deletes the comment,
+    // so — beyond canComment — restrict it to the comment owner OR a space
+    // admin, exactly like POST /comments/delete. canComment alone is not enough:
+    // it would let any bystander commenter erase another user's suggestion for
+    // good. (apply-suggestion deliberately stays on canEdit: accepting an edit
+    // is the editor's semantics, not the suggestion author's.)
+    const isOwner = comment.creatorId === user.id;
+    if (!isOwner) {
+      const ability = await this.spaceAbility.createForUser(
+        user,
+        comment.spaceId,
+      );
+      // Space admin can dismiss any suggestion.
+      if (ability.cannot(SpaceCaslAction.Manage, SpaceCaslSubject.Settings)) {
+        throw new ForbiddenException(
+          'You can only dismiss your own suggestions',
+        );
+      }
+    }
+
+    return this.commentService.dismissSuggestion(comment, user, provenance);
+  }
+
  @HttpCode(HttpStatus.OK)
  @Post('delete')
  async delete(@Body() input: CommentIdDto, @AuthUser() user: User, @AuthWorkspace() workspace: Workspace) {
@@ -13,17 +13,27 @@ import { AuditEvent, AuditResource } from '../../common/events/audit-events';
 *
 * The collaboration gateway verdict is the pivot of the whole flow, so each test
 * pins a specific { applied, currentText } and asserts the DB persistence,
- * auto-resolve, audit, ws broadcast, and error mapping that follow from it.
+ * settle (ephemeral delete vs. resolve), audit, ws broadcast, and error mapping
+ * that follow from it.
+ *
+ * Ephemeral rule (#329): once applied a suggestion DISAPPEARS (hard-delete +
+ * strip the inline anchor mark) UNLESS the thread has replies, in which case it
+ * is resolved to preserve the discussion. `hasChildren` selects the branch.
 */
 describe('CommentService — applySuggestion', () => {
  const UPDATED = { id: 'c-1', __updated: true } as any;

-  function makeService(verdict: unknown) {
+  function makeService(verdict: unknown, hasChildren = false, deletedRows = 1) {
    const commentRepo: any = {
      // Both the applied-stamp re-read and resolveComment's re-read go through
      // findById; return a recognizable enriched row.
      findById: jest.fn(async () => UPDATED),
      updateComment: jest.fn(async () => undefined),
+      hasChildren: jest.fn(async () => hasChildren),
+      deleteComment: jest.fn(async () => undefined),
+      // #338 F1: the childless ephemeral delete is atomic-conditional and
+      // returns the number of rows removed (1 = deleted, 0 = a reply raced in).
+      deleteCommentIfChildless: jest.fn(async () => deletedRows),
    };
    const pageRepo: any = {};
    const wsService: any = { emitCommentEvent: jest.fn() };
@@ -74,7 +84,9 @@ describe('CommentService — applySuggestion', () => {
      .map((c: any[]) => c[0])
      .find((patch: any) => 'suggestionAppliedAt' in patch);

-  it('applied=true → replaces text, persists applied stamps, auto-resolves, audits, returns updated', async () => {
+  // --- no replies → ephemeral delete branch -------------------------------
+
+  it('applied=true, no replies → replaces text, hard-deletes, strips the anchor mark, audits APPLIED, outcome=deleted', async () => {
    const { service, commentRepo, wsService, collaborationGateway, auditService } =
      makeService({ applied: true, currentText: 'new text' });

@@ -92,37 +104,34 @@ describe('CommentService — applySuggestion', () => {
      }),
    );

-    // Applied stamps persisted.
-    const patch = appliedPatch(commentRepo);
-    expect(patch.suggestionAppliedAt).toBeInstanceOf(Date);
-    expect(patch.suggestionAppliedById).toBe('user-1');
+    // Ephemeral: the redundant comment is hard-deleted (atomic-conditional) and
+    // its inline anchor mark removed via the deleteCommentMark collab event.
+    expect(commentRepo.deleteCommentIfChildless).toHaveBeenCalledWith('c-1');
+    expect(collaborationGateway.handleYjsEvent).toHaveBeenCalledWith(
+      'deleteCommentMark',
+      'page.page-1',
+      expect.objectContaining({ commentId: 'c-1', user: expect.any(Object) }),
+    );
+    // No applied stamps are written for a row about to be deleted.
+    expect(appliedPatch(commentRepo)).toBeUndefined();

-    // Auto-resolved: resolveComment writes a resolvedAt/resolvedById patch too.
-    const resolvePatch = commentRepo.updateComment.mock.calls
-      .map((c: any[]) => c[0])
-      .find((p: any) => 'resolvedAt' in p);
-    expect(resolvePatch.resolvedAt).toBeInstanceOf(Date);
-    expect(resolvePatch.resolvedById).toBe('user-1');
-
-    // Audit + broadcast + return.
+    // Broadcast a deletion, audit the (still-applied) suggestion, report outcome.
+    expect(wsService.emitCommentEvent).toHaveBeenCalledWith(
+      'space-1',
+      'page-1',
+      expect.objectContaining({ operation: 'commentDeleted', commentId: 'c-1' }),
+    );
    expect(auditService.log).toHaveBeenCalledWith(
      expect.objectContaining({
        event: AuditEvent.COMMENT_SUGGESTION_APPLIED,
        resourceType: AuditResource.COMMENT,
        resourceId: 'c-1',
-        spaceId: 'space-1',
-        metadata: { pageId: 'page-1' },
      }),
    );
-    expect(wsService.emitCommentEvent).toHaveBeenCalledWith(
-      'space-1',
-      'page-1',
-      expect.objectContaining({ operation: 'commentUpdated', comment: UPDATED }),
-    );
-    expect(result).toBe(UPDATED);
+    expect(result.outcome).toBe('deleted');
  });

-  it('applied=false but currentText === suggestedText → idempotent success (no 409)', async () => {
+  it('applied=false but currentText === suggestedText, no replies → idempotent delete (no 409)', async () => {
    const { service, commentRepo, auditService } = makeService({
      applied: false,
      currentText: 'new text',
@@ -130,15 +139,55 @@ describe('CommentService — applySuggestion', () => {

    const result = await service.applySuggestion(suggestionComment(), user());

-    // The stamps are still persisted (reconciling a crash between the doc
-    // mutation and the DB write) and the call succeeds.
+    expect(commentRepo.deleteCommentIfChildless).toHaveBeenCalledWith('c-1');
+    expect(auditService.log).toHaveBeenCalledTimes(1);
+    expect(result.outcome).toBe('deleted');
+  });
+
+  // --- has replies → resolve branch (discussion preserved) ----------------
+
+  it('applied=true, WITH replies → resolves (not delete), persists applied stamps, audits, outcome=resolved', async () => {
+    const { service, commentRepo, wsService, collaborationGateway, auditService } =
+      makeService({ applied: true, currentText: 'new text' }, true);
+
+    const result = await service.applySuggestion(suggestionComment(), user());
+
+    // Applied stamps persisted.
    const patch = appliedPatch(commentRepo);
    expect(patch.suggestionAppliedAt).toBeInstanceOf(Date);
    expect(patch.suggestionAppliedById).toBe('user-1');
-    expect(auditService.log).toHaveBeenCalledTimes(1);
-    expect(result).toBe(UPDATED);
+
+    // Auto-resolved (resolveComment writes the resolve patch + resolve mark).
+    const resolvePatch = commentRepo.updateComment.mock.calls
+      .map((c: any[]) => c[0])
+      .find((p: any) => 'resolvedAt' in p);
+    expect(resolvePatch.resolvedAt).toBeInstanceOf(Date);
+    expect(resolvePatch.resolvedById).toBe('user-1');
+
+    // NOT deleted; broadcast an update, not a deletion.
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
+    expect(collaborationGateway.handleYjsEvent).not.toHaveBeenCalledWith(
+      'deleteCommentMark',
+      expect.anything(),
+      expect.anything(),
+    );
+    expect(wsService.emitCommentEvent).toHaveBeenCalledWith(
+      'space-1',
+      'page-1',
+      expect.objectContaining({ operation: 'commentUpdated', comment: UPDATED }),
+    );
+
+    expect(auditService.log).toHaveBeenCalledWith(
+      expect.objectContaining({
+        event: AuditEvent.COMMENT_SUGGESTION_APPLIED,
+      }),
+    );
+    expect(result.id).toBe('c-1');
+    expect(result.outcome).toBe('resolved');
  });

+  // --- error / rejection branches -----------------------------------------
+
  it('applied=false and currentText differs → ConflictException with currentText in payload', async () => {
    const { service, commentRepo, auditService } = makeService({
      applied: false,
@@ -153,14 +202,14 @@ describe('CommentService — applySuggestion', () => {
    expect(err.getResponse()).toMatchObject({
      currentText: 'someone else edited this',
    });
-    // No persistence and no audit on a conflict.
-    expect(appliedPatch(commentRepo)).toBeUndefined();
+    // No delete and no audit on a conflict.
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
    expect(auditService.log).not.toHaveBeenCalled();
  });

-  it('already-applied AND already-resolved → idempotent success, no collab call, no re-resolve (#315 double-click)', async () => {
+  it('already-applied WITH replies → idempotent success, no re-apply, resolve branch', async () => {
    const { service, collaborationGateway, commentRepo, auditService } =
-      makeService({ applied: true, currentText: 'new text' });
+      makeService({ applied: true, currentText: 'new text' }, true);

    const result = await service.applySuggestion(
      suggestionComment({
@@ -171,17 +220,20 @@ describe('CommentService — applySuggestion', () => {
      user(),
    );

-    // Idempotent SUCCESS, not a 409. The suggestion is already applied, so the
-    // collaborative document is never touched again and nothing is re-stamped
-    // or re-resolved.
-    expect(result).toBe(UPDATED);
-    expect(collaborationGateway.handleYjsEvent).not.toHaveBeenCalled();
-    expect(commentRepo.updateComment).not.toHaveBeenCalled();
-    // Same success shape as the applied path (broadcast + audit).
+    // Idempotent SUCCESS. The suggestion is already applied, so the document is
+    // never re-mutated (no applyCommentSuggestion) and nothing is re-stamped.
+    expect(collaborationGateway.handleYjsEvent).not.toHaveBeenCalledWith(
+      'applyCommentSuggestion',
+      expect.anything(),
+      expect.anything(),
+    );
+    expect(appliedPatch(commentRepo)).toBeUndefined();
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
    expect(auditService.log).toHaveBeenCalledTimes(1);
+    expect(result.outcome).toBe('resolved');
  });

-  it('already-applied but NOT resolved (crash window) → idempotent success, self-heals resolve, no re-apply', async () => {
+  it('already-applied, no replies (double-click after a delete) → deletes idempotently', async () => {
    const { service, collaborationGateway, commentRepo } = makeService({
      applied: true,
      currentText: 'new text',
@@ -192,28 +244,43 @@ describe('CommentService — applySuggestion', () => {
      user(),
    );

-    expect(result).toBe(UPDATED);
-
-    // The suggestion is NOT re-applied to the document…
+    // No re-apply to the document; the childless applied comment is removed.
    expect(collaborationGateway.handleYjsEvent).not.toHaveBeenCalledWith(
      'applyCommentSuggestion',
      expect.anything(),
      expect.anything(),
    );
-    // …but the open thread is self-healed to resolved via resolveComment, which
-    // writes the resolve patch and updates the resolve mark.
+    expect(commentRepo.deleteCommentIfChildless).toHaveBeenCalledWith('c-1');
+    expect(result.outcome).toBe('deleted');
+  });
+
+  it('applied=true, no replies at read time but a reply races in (conditional delete → 0 rows) → resolves instead, no hard-delete, outcome=resolved (#338 F1)', async () => {
+    // The suggested text is already applied to the document, but between the
+    // hasChildren read and the atomic delete a reply landed. The parent must NOT
+    // be hard-deleted (cascade would destroy the reply); resolve the thread.
+    const { service, commentRepo, wsService, collaborationGateway } =
+      makeService({ applied: true, currentText: 'new text' }, false, 0);
+
+    const result = await service.applySuggestion(suggestionComment(), user());
+
+    expect(commentRepo.deleteCommentIfChildless).toHaveBeenCalledWith('c-1');
+    // No deletion broadcast — the row + the racing reply survive.
+    expect(wsService.emitCommentEvent).not.toHaveBeenCalledWith(
+      expect.anything(),
+      expect.anything(),
+      expect.objectContaining({ operation: 'commentDeleted' }),
+    );
+    // Fell back to resolving.
    const resolvePatch = commentRepo.updateComment.mock.calls
      .map((c: any[]) => c[0])
      .find((p: any) => 'resolvedAt' in p);
    expect(resolvePatch.resolvedAt).toBeInstanceOf(Date);
-    expect(resolvePatch.resolvedById).toBe('user-1');
    expect(collaborationGateway.handleYjsEvent).toHaveBeenCalledWith(
      'resolveCommentMark',
      'page.page-1',
      expect.objectContaining({ commentId: 'c-1', resolved: true }),
    );
-    // The applied stamps are NOT re-written (already stamped).
-    expect(appliedPatch(commentRepo)).toBeUndefined();
+    expect(result.outcome).toBe('resolved');
  });

  it('rejects a comment with no suggestedText', async () => {
@@ -238,8 +305,8 @@ describe('CommentService — applySuggestion', () => {
      service.applySuggestion(suggestionComment(), user()),
    ).rejects.toThrow(InternalServerErrorException);

-    // Nothing persisted, nothing audited.
-    expect(appliedPatch(commentRepo)).toBeUndefined();
+    // Nothing deleted, nothing audited.
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
    expect(auditService.log).not.toHaveBeenCalled();
  });
 });
@@ -0,0 +1,229 @@
+import { BadRequestException } from '@nestjs/common';
+import { CommentService } from './comment.service';
+import { AuditEvent, AuditResource } from '../../common/events/audit-events';
+
+/**
+ * Coverage for CommentService.dismissSuggestion (#329). Dismiss ("Не применять")
+ * removes a suggested edit WITHOUT changing the page text: the comment
+ * disappears (hard-delete + strip the inline anchor mark) unless the thread has
+ * replies, in which case it is resolved to preserve the discussion.
+ *
+ * The permission gate (canComment, NOT canEdit) lives in the controller and is
+ * covered in comment.controller.spec.ts; here we pin the service's own state
+ * guards and the delete-vs-resolve fork.
+ */
+describe('CommentService — dismissSuggestion', () => {
+  const UPDATED = { id: 'c-1', __updated: true } as any;
+
+  function makeService(hasChildren = false, deletedRows = 1) {
+    const commentRepo: any = {
+      findById: jest.fn(async () => UPDATED),
+      updateComment: jest.fn(async () => undefined),
+      hasChildren: jest.fn(async () => hasChildren),
+      deleteComment: jest.fn(async () => undefined),
+      // #338 F1: the childless ephemeral delete is now atomic-conditional and
+      // returns the number of rows removed (1 = deleted, 0 = a reply raced in).
+      deleteCommentIfChildless: jest.fn(async () => deletedRows),
+    };
+    const pageRepo: any = {};
+    const wsService: any = { emitCommentEvent: jest.fn() };
+    const collaborationGateway: any = {
+      handleYjsEvent: jest.fn(async () => undefined),
+    };
+    const generalQueue: any = { add: jest.fn(() => Promise.resolve()) };
+    const notificationQueue: any = { add: jest.fn(async () => undefined) };
+    const auditService: any = { log: jest.fn() };
+
+    const service = new CommentService(
+      commentRepo,
+      pageRepo,
+      wsService,
+      collaborationGateway,
+      generalQueue,
+      notificationQueue,
+      auditService,
+    );
+
+    return { service, commentRepo, wsService, collaborationGateway, auditService };
+  }
+
+  const suggestionComment = (over?: Partial<any>): any => ({
+    id: 'c-1',
+    pageId: 'page-1',
+    spaceId: 'space-1',
+    workspaceId: 'ws-1',
+    creatorId: 'user-1',
+    parentCommentId: null,
+    selection: 'old text',
+    suggestedText: 'new text',
+    suggestionAppliedAt: null,
+    resolvedAt: null,
+    ...over,
+  });
+  const user = (over?: Partial<any>): any => ({ id: 'user-1', ...over });
+
+  it('no replies → hard-deletes, strips the anchor mark, does NOT touch page text, audits DISMISSED, outcome=deleted', async () => {
+    const { service, commentRepo, wsService, collaborationGateway, auditService } =
+      makeService(false);
+
+    const result = await service.dismissSuggestion(suggestionComment(), user());
+
+    // Never applies the suggestion to the document.
+    expect(collaborationGateway.handleYjsEvent).not.toHaveBeenCalledWith(
+      'applyCommentSuggestion',
+      expect.anything(),
+      expect.anything(),
+    );
+    // Hard-delete (atomic-conditional) + strip mark.
+    expect(commentRepo.deleteCommentIfChildless).toHaveBeenCalledWith('c-1');
+    expect(collaborationGateway.handleYjsEvent).toHaveBeenCalledWith(
+      'deleteCommentMark',
+      'page.page-1',
+      expect.objectContaining({ commentId: 'c-1', user: expect.any(Object) }),
+    );
+    expect(wsService.emitCommentEvent).toHaveBeenCalledWith(
+      'space-1',
+      'page-1',
+      expect.objectContaining({ operation: 'commentDeleted', commentId: 'c-1' }),
+    );
+    expect(auditService.log).toHaveBeenCalledWith(
+      expect.objectContaining({
+        event: AuditEvent.COMMENT_SUGGESTION_DISMISSED,
+        resourceType: AuditResource.COMMENT,
+        resourceId: 'c-1',
+      }),
+    );
+    expect(result.outcome).toBe('deleted');
+  });
+
+  it('no replies → if the anchor-mark removal FAILS, the row is NOT deleted and the error propagates (#329: no orphan anchor)', async () => {
+    const { service, commentRepo, wsService, collaborationGateway } =
+      makeService(false);
+    // Mark removal is FATAL and runs BEFORE the irreversible row delete: a collab
+    // failure (e.g. COLLAB_DISABLE_REDIS "no live instance") must abort the whole
+    // operation, leaving row + mark consistent — never a deleted row with an
+    // orphan anchor left in the document reporting success.
+    collaborationGateway.handleYjsEvent = jest.fn(async () => {
+      throw new Error('requires a live collaboration instance');
+    });
+
+    await expect(
+      service.dismissSuggestion(suggestionComment(), user()),
+    ).rejects.toThrow(/live collaboration/);
+
+    expect(commentRepo.deleteCommentIfChildless).not.toHaveBeenCalled();
+    expect(wsService.emitCommentEvent).not.toHaveBeenCalledWith(
+      expect.anything(),
+      expect.anything(),
+      expect.objectContaining({ operation: 'commentDeleted' }),
+    );
+  });
+
+  it('WITH replies → resolves (not delete), does NOT apply, audits DISMISSED, outcome=resolved', async () => {
+    const { service, commentRepo, wsService, collaborationGateway, auditService } =
+      makeService(true);
+
+    const result = await service.dismissSuggestion(suggestionComment(), user());
+
+    // Resolved via resolveComment (resolve patch + resolve mark), NOT deleted.
+    const resolvePatch = commentRepo.updateComment.mock.calls
+      .map((c: any[]) => c[0])
+      .find((p: any) => 'resolvedAt' in p);
+    expect(resolvePatch.resolvedAt).toBeInstanceOf(Date);
+    expect(resolvePatch.resolvedById).toBe('user-1');
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
+    expect(collaborationGateway.handleYjsEvent).toHaveBeenCalledWith(
+      'resolveCommentMark',
+      'page.page-1',
+      expect.objectContaining({ commentId: 'c-1', resolved: true }),
+    );
+    // No applied stamp — dismiss does not apply the edit.
+    const appliedPatch = commentRepo.updateComment.mock.calls
+      .map((c: any[]) => c[0])
+      .find((p: any) => 'suggestionAppliedAt' in p);
+    expect(appliedPatch).toBeUndefined();
+
+    expect(auditService.log).toHaveBeenCalledWith(
+      expect.objectContaining({
+        event: AuditEvent.COMMENT_SUGGESTION_DISMISSED,
+      }),
+    );
+    expect(result.outcome).toBe('resolved');
+  });
+
+  it('reply races in after the childless read (conditional delete → 0 rows) → resolves instead, does NOT hard-delete, reply survives, outcome=resolved (#338 F1)', async () => {
+    // hasChildren=false selects the ephemeral branch (the read saw no replies),
+    // but the atomic delete matches 0 rows because a reply landed in the window
+    // between that read and the delete. The parent must NOT be hard-deleted
+    // (a cascade would destroy the just-added reply); the thread is resolved.
+    const { service, commentRepo, wsService, collaborationGateway } =
+      makeService(false, 0);
+
+    const result = await service.dismissSuggestion(suggestionComment(), user());
+
+    // The conditional delete was attempted (and matched nothing).
+    expect(commentRepo.deleteCommentIfChildless).toHaveBeenCalledWith('c-1');
+    // No commentDeleted broadcast — the row (and the racing reply) survive.
+    expect(wsService.emitCommentEvent).not.toHaveBeenCalledWith(
+      expect.anything(),
+      expect.anything(),
+      expect.objectContaining({ operation: 'commentDeleted' }),
+    );
+    // Fell back to resolving the thread.
+    const resolvePatch = commentRepo.updateComment.mock.calls
+      .map((c: any[]) => c[0])
+      .find((p: any) => 'resolvedAt' in p);
+    expect(resolvePatch.resolvedAt).toBeInstanceOf(Date);
+    expect(resolvePatch.resolvedById).toBe('user-1');
+    expect(collaborationGateway.handleYjsEvent).toHaveBeenCalledWith(
+      'resolveCommentMark',
+      'page.page-1',
+      expect.objectContaining({ commentId: 'c-1', resolved: true }),
+    );
+    expect(result.outcome).toBe('resolved');
+  });
+
+  it('rejects a reply (non-top-level) comment', async () => {
+    const { service, commentRepo } = makeService();
+    await expect(
+      service.dismissSuggestion(
+        suggestionComment({ parentCommentId: 'parent-1' }),
+        user(),
+      ),
+    ).rejects.toThrow(BadRequestException);
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
+  });
+
+  it('rejects a comment without a suggested edit', async () => {
+    const { service, commentRepo } = makeService();
+    await expect(
+      service.dismissSuggestion(
+        suggestionComment({ suggestedText: null }),
+        user(),
+      ),
+    ).rejects.toThrow(BadRequestException);
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
+  });
+
+  it('rejects an already-applied suggestion', async () => {
+    const { service, commentRepo } = makeService();
+    await expect(
+      service.dismissSuggestion(
+        suggestionComment({ suggestionAppliedAt: new Date() }),
+        user(),
+      ),
+    ).rejects.toThrow(BadRequestException);
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
+  });
+
+  it('rejects an already-resolved thread', async () => {
+    const { service, commentRepo } = makeService();
+    await expect(
+      service.dismissSuggestion(
+        suggestionComment({ resolvedAt: new Date() }),
+        user(),
+      ),
+    ).rejects.toThrow(BadRequestException);
+    expect(commentRepo.deleteComment).not.toHaveBeenCalled();
+  });
+});
@@ -35,6 +35,12 @@ import {
  IAuditService,
 } from '../../integrations/audit/audit.service';

+// Ephemeral-suggestion settle result (#329): 'deleted' → the comment vanished
+// (hard-delete + anchor mark stripped); 'resolved' → the thread had replies and
+// was resolved instead. Returned to the client so it can pick the optimistic
+// cache action.
+export type SuggestionOutcome = 'deleted' | 'resolved';
+
@Injectable()
 export class CommentService {
  private readonly logger = new Logger(CommentService.name);
@@ -362,7 +368,7 @@ export class CommentService {
    comment: Comment,
    user: User,
    provenance?: AuthProvenanceData,
-  ): Promise<Comment> {
+  ): Promise<Comment & { outcome: SuggestionOutcome }> {
    // Structural guards.
    if (comment.parentCommentId) {
      throw new BadRequestException(
@@ -449,42 +455,148 @@ export class CommentService {
  }

  /**
-   * Persist the applied stamps (idempotently), auto-resolve the thread and
-   * broadcast + audit the applied suggestion. Shared by the applied and the
+   * Dismiss ("Не применять") a suggested edit without touching the page text:
+   * the suggestion disappears. Ephemeral rule (#329) — a top-level suggestion
+   * comment is transient UI, so dismissing it hard-deletes the comment AND strips
+   * its inline anchor mark UNLESS the thread has replies, in which case the
+   * discussion is preserved by resolving it instead.
+   *
+   * Dismiss does NOT change the document text, so the controller authorizes it
+   * with canComment (NOT canEdit). This re-checks the comment's own state so the
+   * invariant holds regardless of caller.
+   */
+  async dismissSuggestion(
+    comment: Comment,
+    user: User,
+    provenance?: AuthProvenanceData,
+  ): Promise<Comment & { outcome: SuggestionOutcome }> {
+    // Structural guards (mirror applySuggestion).
+    if (comment.parentCommentId) {
+      throw new BadRequestException(
+        'Only a top-level comment can carry a suggested edit',
+      );
+    }
+    if (!comment.suggestedText) {
+      throw new BadRequestException(
+        'This comment has no suggested edit to dismiss',
+      );
+    }
+    // State guards: dismissing an already-applied or already-resolved thread is
+    // meaningless. On an apply↔dismiss race the loser sees the comment already
+    // gone (404 at the controller) or already resolved (this 400); the client
+    // treats both as "already resolved".
+    if (comment.suggestionAppliedAt) {
+      throw new BadRequestException(
+        'Cannot dismiss a suggested edit that was already applied',
+      );
+    }
+    if (comment.resolvedAt) {
+      throw new BadRequestException(
+        'Cannot dismiss a suggested edit on a resolved comment thread',
+      );
+    }
+
+    const hasChildren = await this.commentRepo.hasChildren(comment.id);
+
+    if (hasChildren) {
+      // Preserve the discussion: resolve (never delete) a thread with replies.
+      const updatedComment = await this.resolveComment(
+        comment,
+        true,
+        user,
+        provenance,
+      );
+      this.auditService.log({
+        event: AuditEvent.COMMENT_SUGGESTION_DISMISSED,
+        resourceType: AuditResource.COMMENT,
+        resourceId: comment.id,
+        spaceId: comment.spaceId,
+        metadata: { pageId: comment.pageId },
+      });
+      return { ...updatedComment, outcome: 'resolved' };
+    }
+
+    // Ephemeral: no replies → the suggestion vanishes entirely. The atomic
+    // conditional delete may still fall back to a resolve if a reply raced in
+    // (see deleteEphemeralSuggestion), so the outcome is whatever it settled on.
+    const settled = await this.deleteEphemeralSuggestion(comment, user, provenance);
+    this.auditService.log({
+      event: AuditEvent.COMMENT_SUGGESTION_DISMISSED,
+      resourceType: AuditResource.COMMENT,
+      resourceId: comment.id,
+      spaceId: comment.spaceId,
+      metadata: { pageId: comment.pageId },
+    });
+    return settled;
+  }
+
+  /**
+   * Persist the applied stamps (idempotently), then settle the suggestion under
+   * the ephemeral rule (#329): a suggestion whose thread has NO replies
+   * DISAPPEARS after apply (hard-delete + strip the inline anchor mark), since
+   * the suggested text is now in the document and a stand-alone resolved thread
+   * would only pile up an orphan anchor. A thread WITH replies is preserved by
+   * auto-resolving it (the historical behaviour). Shared by the applied and the
   * idempotent "already-applied" branches of applySuggestion.
+   *
+   * Returns the comment augmented with `outcome` so the client can pick the
+   * optimistic action ('deleted' → drop it, 'resolved' → move to the resolved
+   * tab).
   */
  private async finalizeAppliedSuggestion(
    comment: Comment,
    user: User,
    provenance?: AuthProvenanceData,
-  ): Promise<Comment> {
-    if (!comment.suggestionAppliedAt) {
-      await this.commentRepo.updateComment(
-        {
-          suggestionAppliedAt: new Date(),
-          suggestionAppliedById: user.id,
-        },
-        comment.id,
-      );
+  ): Promise<Comment & { outcome: SuggestionOutcome }> {
+    const hasChildren = await this.commentRepo.hasChildren(comment.id);
+
+    if (hasChildren) {
+      // Thread has replies → preserve the discussion: stamp applied + resolve.
+      if (!comment.suggestionAppliedAt) {
+        await this.commentRepo.updateComment(
+          {
+            suggestionAppliedAt: new Date(),
+            suggestionAppliedById: user.id,
+          },
+          comment.id,
+        );
+      }
+
+      // Auto-resolve the thread. resolveComment handles the resolve mark, its ws
+      // broadcast and the resolve notification. Stay defensive on re-entry.
+      if (!comment.resolvedAt) {
+        await this.resolveComment(comment, true, user, provenance);
+      }
+
+      const updatedComment = await this.commentRepo.findById(comment.id, {
+        includeCreator: true,
+        includeResolvedBy: true,
+      });
+
+      this.wsService.emitCommentEvent(comment.spaceId, comment.pageId, {
+        operation: 'commentUpdated',
+        pageId: comment.pageId,
+        comment: updatedComment,
+      });
+
+      this.auditService.log({
+        event: AuditEvent.COMMENT_SUGGESTION_APPLIED,
+        resourceType: AuditResource.COMMENT,
+        resourceId: comment.id,
+        spaceId: comment.spaceId,
+        metadata: { pageId: comment.pageId },
+      });
+
+      return { ...updatedComment, outcome: 'resolved' };
    }

-    // Auto-resolve the thread. resolveComment handles the resolve mark, its ws
-    // broadcast and the resolve notification. The guard above guarantees the
-    // thread was open when we entered, but stay defensive on re-entry.
-    if (!comment.resolvedAt) {
-      await this.resolveComment(comment, true, user, provenance);
-    }
-
-    const updatedComment = await this.commentRepo.findById(comment.id, {
-      includeCreator: true,
-      includeResolvedBy: true,
-    });
-
-    this.wsService.emitCommentEvent(comment.spaceId, comment.pageId, {
-      operation: 'commentUpdated',
-      pageId: comment.pageId,
-      comment: updatedComment,
-    });
+    // No replies → ephemeral: the suggested text is already in the document, so
+    // the comment is redundant. Hard-delete it and strip its inline anchor. We
+    // deliberately do NOT write the applied stamps first (the row is about to be
+    // deleted); the audit event still records that the suggestion was applied.
+    // The delete is atomic-conditional: if a reply raced in after the
+    // hasChildren read, it falls back to resolving instead (outcome 'resolved').
+    const settled = await this.deleteEphemeralSuggestion(comment, user, provenance);

    this.auditService.log({
      event: AuditEvent.COMMENT_SUGGESTION_APPLIED,
@@ -494,7 +606,86 @@ export class CommentService {
      metadata: { pageId: comment.pageId },
    });

-    return updatedComment;
+    return settled;
+  }
+
+  /**
+   * Settle an ephemeral suggestion whose thread looked childless: remove its
+   * inline `comment` anchor mark, then ATOMICALLY hard-delete the row only if it
+   * is still childless. Shared by the apply/dismiss no-replies branches (#329).
+   *
+   * ORDER MATTERS: the anchor mark is removed FIRST and FATALLY (mirrors
+   * applySuggestion, which mutates the doc before writing the DB). The row
+   * delete is irreversible, so if the mark removal fails — including the
+   * COLLAB_DISABLE_REDIS "no live instance" hard-error — we must NOT delete the
+   * row and report success, or the document is left with a permanent orphan
+   * anchor pointing at a comment that no longer exists (the exact data-integrity
+   * bug #329 targets). Let the exception propagate (→ 5xx); the operation is
+   * then repeatable with row + mark still consistent.
+   *
+   * RACE (#338 F4): the caller read `hasChildren` BEFORE the (slow) mark
+   * removal, so a reply can land in that window. `comments.parent_comment_id` is
+   * ON DELETE CASCADE, so an unconditional delete here would cascade-destroy the
+   * just-added reply forever. Instead we use `deleteCommentIfChildless`, which
+   * re-checks childlessness under a FOR UPDATE lock inside a transaction (a plain
+   * anti-join DELETE is NOT race-safe under READ COMMITTED — see the repo method
+   * docstring). If it removes the row (outcome 'deleted') we broadcast the
+   * deletion as before. If it removes 0 rows (a reply interleaved) we do NOT
+   * hard-delete — we resolve the thread instead (outcome 'resolved'), preserving
+   * the discussion and the new reply. The anchor mark is already gone by then, an
+   * accepted degradation: the thread lands in the resolved tab without its inline
+   * highlight — far better than losing a reply.
+   */
+  private async deleteEphemeralSuggestion(
+    comment: Comment,
+    user: User,
+    provenance?: AuthProvenanceData,
+  ): Promise<Comment & { outcome: SuggestionOutcome }> {
+    await this.deleteCommentMark(comment, user);
+
+    const deletedRows = await this.commentRepo.deleteCommentIfChildless(
+      comment.id,
+    );
+
+    if (deletedRows > 0) {
+      this.wsService.emitCommentEvent(comment.spaceId, comment.pageId, {
+        operation: 'commentDeleted',
+        pageId: comment.pageId,
+        commentId: comment.id,
+      });
+      return { ...comment, outcome: 'deleted' };
+    }
+
+    // A reply interleaved between the hasChildren read and this delete, so the
+    // conditional delete matched nothing. Preserve the discussion + the new
+    // reply by resolving the thread instead of hard-deleting it. resolveComment
+    // handles the resolve patch, its ws broadcast and the resolve notification;
+    // its collab call is best-effort, so the already-stripped mark is fine.
+    const resolvedComment = await this.resolveComment(
+      comment,
+      true,
+      user,
+      provenance,
+    );
+    return { ...resolvedComment, outcome: 'resolved' };
+  }
+
+  /**
+   * Remove the inline `comment` mark for a comment from the collaborative
+   * document. FATAL, NOT best-effort: unlike resolveComment (which keeps the row,
+   * so a failed mark update is recoverable), this is used before an irreversible
+   * hard-delete, so the mark removal MUST succeed or throw. Under
+   * COLLAB_DISABLE_REDIS the gateway invokes the deleteCommentMark handler
+   * directly (never a silent no-op) and a missing live instance surfaces as a
+   * thrown error, which we let propagate so the caller aborts before deleting.
+   */
+  private async deleteCommentMark(comment: Comment, user: User): Promise<void> {
+    const documentName = `page.${comment.pageId}`;
+    await this.collaborationGateway.handleYjsEvent(
+      'deleteCommentMark',
+      documentName,
+      { commentId: comment.id, user },
+    );
  }

  private async queueCommentNotification(
@@ -0,0 +1,6 @@
+import { IsUUID } from 'class-validator';
+
+export class DismissSuggestionDto {
+  @IsUUID()
+  commentId: string;
+}
@@ -52,7 +52,9 @@ import {
  INTERNAL_LINK_REGEX,
  extractPageSlugId,
 } from '../../../integrations/export/utils';
-import { markdownToHtml, canonicalizeFootnotes } from '@docmost/editor-ext';
+import { canonicalizeFootnotes } from '@docmost/editor-ext';
+import { markdownToProseMirror } from '@docmost/prosemirror-markdown';
+import { normalizeForeignMarkdown } from '../../../integrations/import/utils/foreign-markdown';
 import { WatcherService } from '../../watcher/watcher.service';
 import { sql } from 'kysely';
 import { TransclusionService } from '../transclusion/transclusion.service';
@@ -1301,8 +1303,14 @@ export class PageService {

    switch (format) {
      case 'markdown': {
-        const html = await markdownToHtml(content as string);
-        prosemirrorJson = htmlToJson(html as string);
+        // Canonical markdown -> ProseMirror JSON directly via
+        // `@docmost/prosemirror-markdown` (issue #345) — no HTML intermediate,
+        // no editor-ext markdown layer. Foreign markdown surfaces the strict
+        // parser rejects (GFM `[^id]` reference footnotes) are normalized to the
+        // canonical inline form first.
+        prosemirrorJson = await markdownToProseMirror(
+          normalizeForeignMarkdown(content as string),
+        );
        break;
      }
      case 'html': {
@@ -0,0 +1,105 @@
+/**
+ * Server-side whitelist + limits for POST /api/telemetry/vitals (#355).
+ *
+ * The endpoint is PUBLIC (browsers post it, no auth) so it is a privacy and
+ * abuse surface: everything not on these lists is silently DROPPED and the
+ * request still returns 200 (never 400 — a 400 would make browsers retry).
+ */
+
+// The only metric names accepted. Anything else is dropped.
+export const ALLOWED_METRIC_NAMES = new Set<string>([
+  'INP',
+  'LCP',
+  'CLS',
+  'TTFB',
+  'editor_tx_ms',
+  'page_open_ms',
+  'longtask_ms',
+]);
+
+// The only rating values accepted (web-vitals). Anything else -> null.
+export const ALLOWED_RATINGS = new Set<string>([
+  'good',
+  'needs-improvement',
+  'poor',
+]);
+
+// Max events accepted per batch; the rest are ignored.
+export const MAX_EVENTS_PER_BATCH = 50;
+
+// Defence-in-depth body cap (~16KB). Fastify's global bodyLimit is far larger,
+// so we re-check the parsed payload size here and drop oversized batches.
+export const MAX_BODY_BYTES = 16 * 1024;
+
+// attr is truncated to this many characters (attribution target only, no PII).
+export const MAX_ATTR_LENGTH = 120;
+
+// route label sanity cap (client sends a template like /s/:space/p/:slug).
+export const MAX_ROUTE_LENGTH = 200;
+
+// `client_metrics.doc_size` is a Postgres `int` (int4). A garbage/huge docSize
+// on a single event would overflow int4 and make Postgres reject the WHOLE
+// batch INSERT, losing every event in it. Values outside this range are DROPPED
+// to null (the event is still kept) so one bad field never loses the batch.
+export const DOC_SIZE_MAX = 2147483647; // 2^31 - 1 (int4 max)
+
+export interface ClientMetricRow {
+  name: string;
+  value: number;
+  rating: string | null;
+  route: string | null;
+  attr: string | null;
+  docSize: number | null;
+  workspaceId: string | null;
+}
+
+/**
+ * Validate + normalise a single incoming event into a DB row, or return null to
+ * DROP it. Pure so it is directly unit-testable. Enforces the name whitelist,
+ * numeric value, rating whitelist, attr truncation and doc_size (int) coercion.
+ */
+export function sanitizeVitalEvent(
+  raw: unknown,
+  workspaceId: string | null,
+): ClientMetricRow | null {
+  if (!raw || typeof raw !== 'object') return null;
+  const e = raw as Record<string, unknown>;
+
+  const name = e.name;
+  if (typeof name !== 'string' || !ALLOWED_METRIC_NAMES.has(name)) return null;
+
+  const value =
+    typeof e.value === 'number' && Number.isFinite(e.value) ? e.value : null;
+  if (value === null) return null;
+
+  const rating =
+    typeof e.rating === 'string' && ALLOWED_RATINGS.has(e.rating)
+      ? e.rating
+      : null;
+
+  let route: string | null = null;
+  if (typeof e.route === 'string' && e.route.length > 0) {
+    route = e.route.slice(0, MAX_ROUTE_LENGTH);
+  }
+
+  let attr: string | null = null;
+  if (typeof e.attr === 'string' && e.attr.length > 0) {
+    attr = e.attr.slice(0, MAX_ATTR_LENGTH);
+  }
+
+  let docSize: number | null = null;
+  if (typeof e.docSize === 'number' && Number.isFinite(e.docSize)) {
+    docSize = Math.trunc(e.docSize);
+  } else if (typeof e.doc_size === 'number' && Number.isFinite(e.doc_size)) {
+    // Accept snake_case too, in case a client sends the raw column name.
+    docSize = Math.trunc(e.doc_size as number);
+  }
+  // Guard the int4 column: an out-of-range docSize would overflow int4 and make
+  // Postgres reject the whole batch INSERT. Drop the field (keep the event)
+  // rather than lose every other event in the batch.
+  if (docSize !== null && (docSize < 0 || docSize > DOC_SIZE_MAX)) {
+    docSize = null;
+  }
+
+  return { name, value, rating, route, attr, docSize, workspaceId };
+}
--- a/Show More
+++ b/Show More