0.94.1

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
style(editor): align footnote marker and center task checkbox
2026-06-26 19:33:57 +03:00 · 2026-06-26 19:24:13 +03:00 · 2026-06-26 19:16:55 +03:00 · 2026-06-26 19:13:22 +03:00 · 2026-06-26 18:54:42 +03:00 · 2026-06-26 18:34:56 +03:00
499 changed files with 42854 additions and 7131 deletions
--- a/.env.example
+++ b/.env.example
@@ -123,11 +123,45 @@ MCP_DOCMOST_PASSWORD=
 # expose the port publicly).
 # MCP_TOKEN=
 # MCP_SESSION_IDLE_MS=1800000
+#
+# AI-AGENT ATTRIBUTION (comments/pages written via MCP are badged as "AI"):
+# attribution is driven by a per-user `is_agent` flag on the users row. There is
+# NO admin UI/API for it — set it out-of-band with SQL. Use a DEDICATED service
+# account for the MCP fallback above and flag ONLY that account, e.g.:
+#     UPDATE users SET is_agent = true WHERE email = 'mcp-bot@your-domain';
+# NEVER set is_agent on a human or shared account — every action by that account
+# (including normal human edits) would then be mis-attributed as AI.

 # Per-embedding-call timeout in milliseconds for the RAG indexer.
 # A slow/hung embeddings endpoint fails after this and the batch continues.
 # AI_EMBEDDING_TIMEOUT_MS=120000

+# Silence timeout (ms) for streaming chat/agent AI calls AND external-MCP traffic.
+# Bounds time-to-first-byte and the gap BETWEEN chunks (NOT the total turn length),
+# so an arbitrarily long turn that keeps streaming is never cut. Finite so a hung
+# provider is eventually broken instead of leaking forever. Default 900000 (15 min).
+# AI_STREAM_TIMEOUT_MS=900000
+
+# Keep-alive recycle window (ms) for streaming chat/agent AI + external-MCP calls.
+# A pooled connection idle longer than this is closed instead of reused, so a
+# NAT / egress firewall / reverse proxy that silently drops idle connections
+# cannot poison a reused socket into a PRE-RESPONSE `read ECONNRESET`. Lower it if
+# your egress drops idle connections faster than ~10s. Default 10000 (10 s).
+# AI_STREAM_KEEPALIVE_MS=10000
+
+# Silence timeout (ms) for EXTERNAL-MCP transport ONLY (not the chat provider).
+# Tighter than AI_STREAM_TIMEOUT_MS so a byte-silent/hung MCP server is broken in
+# ~5 min instead of 15. Note it also cuts a legitimately long but byte-silent
+# single tool call (a slow crawl that emits nothing until done) and an SSE
+# transport idling >5 min BETWEEN tool calls. Default 300000 (5 min).
+# AI_MCP_STREAM_TIMEOUT_MS=300000
+
+# Total wall-clock cap (ms) for ONE external MCP tool call (app-level, not
+# transport). Aborts a tool that keeps the socket warm (SSE heartbeats / trickle)
+# but never returns a result — which the silence timeout above never breaks.
+# Default 900000 (15 min).
+# AI_MCP_CALL_TIMEOUT_MS=900000
+
 # --- Anonymous public-share AI assistant ---
 # Opt-in per workspace (AI settings -> "public share assistant"; off by default).
 # When enabled, anonymous visitors of a published share can ask an AI about that
@@ -147,9 +181,17 @@ MCP_DOCMOST_PASSWORD=
 # per-IP limit is fully evaded. It is a COST backstop, not an access control, and
 # FAILS CLOSED if Redis is unavailable (an optional assistant briefly going
 # offline is safer than an unbounded bill). Override the hourly cap below
-# (default: 300 calls per workspace per rolling hour).
-# SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
+# (default: 100 calls per workspace per rolling hour).
+# SHARE_AI_WORKSPACE_MAX_PER_HOUR=100
 #
 # Per-request output-token ceiling for the anonymous assistant (default: 512).
 # Worst-case output per accepted call = agent steps (5) × this value.
 # SHARE_AI_MAX_OUTPUT_TOKENS=512
+#
+# Second cost backstop: a cluster-wide per-workspace rolling-DAY token budget
+# (input re-sent per step + output, summed across every accepted turn). The
+# hourly request cap above bounds how MANY calls run, not how expensive each is,
+# so this caps the owner's actual provider bill directly. Like the request cap it
+# FAILS CLOSED if Redis is unavailable (default: 1,000,000 tokens per workspace
+# per rolling day).
+# SHARE_AI_WORKSPACE_TOKEN_BUDGET_PER_DAY=1000000
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -56,3 +56,160 @@ jobs:
          tags: ${{ env.IMAGE }}:develop
          cache-from: type=gha,scope=develop-amd64
          cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true
+
+  # e2e jobs run on every develop push but DO NOT gate the build/publish above:
+  # `build` stays `needs: test` only, so the :develop image still ships even if
+  # e2e fails. A failing e2e job turns the run red and triggers GitHub's email
+  # to the pusher — that red run + email is the intended notification, not a
+  # deploy block.
+  e2e-server:
+    runs-on: ubuntu-latest
+    env:
+      DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost
+      REDIS_URL: redis://localhost:6379
+      APP_SECRET: ci-e2e-secret-change-me-min-32-characters
+      APP_URL: http://localhost:3000
+    services:
+      postgres:
+        image: pgvector/pgvector:pg18
+        env:
+          POSTGRES_DB: docmost
+          POSTGRES_USER: docmost
+          POSTGRES_PASSWORD: docmost
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U docmost"
+          --health-interval 5s
+          --health-timeout 5s
+          --health-retries 20
+      redis:
+        image: redis:7
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 5s
+          --health-timeout 5s
+          --health-retries 20
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up pnpm
+        uses: pnpm/action-setup@v4
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Build editor-ext
+        run: pnpm --filter @docmost/editor-ext build
+
+      - name: Run migrations
+        run: pnpm --filter ./apps/server migration:latest
+
+      - name: Run server e2e
+        run: pnpm --filter ./apps/server test:e2e
+
+  # Same rationale as e2e-server: this job is intentionally NOT in
+  # `build.needs`. Deploy of the :develop image must not be blocked by e2e;
+  # a red run plus GitHub's email to the pusher is the notification mechanism.
+  e2e-mcp:
+    runs-on: ubuntu-latest
+    env:
+      DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost
+      REDIS_URL: redis://localhost:6379
+      APP_SECRET: ci-e2e-secret-change-me-min-32-characters
+      APP_URL: http://localhost:3000
+      NODE_ENV: production
+    services:
+      postgres:
+        image: pgvector/pgvector:pg18
+        env:
+          POSTGRES_DB: docmost
+          POSTGRES_USER: docmost
+          POSTGRES_PASSWORD: docmost
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U docmost"
+          --health-interval 5s
+          --health-timeout 5s
+          --health-retries 20
+      redis:
+        image: redis:7
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 5s
+          --health-timeout 5s
+          --health-retries 20
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up pnpm
+        uses: pnpm/action-setup@v4
+
+      - name: Set up Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Build editor-ext
+        run: pnpm --filter @docmost/editor-ext build
+
+      - name: Build server
+        run: pnpm server:build
+
+      - name: Build mcp
+        run: pnpm --filter @docmost/mcp build
+
+      - name: Run migrations
+        run: pnpm --filter ./apps/server migration:latest
+
+      - name: Start server (prod)
+        # Capture stdout/stderr so a start-up crash (bind error, stack trace,
+        # migration mismatch) is diagnosable; without this the only signal is
+        # the generic health-loop timeout below, ~120s later.
+        run: pnpm --filter ./apps/server start:prod > /tmp/server.log 2>&1 &
+
+      - name: Wait for server health
+        run: |
+          for i in $(seq 1 60); do
+            if curl -fsS http://localhost:3000/api/health > /dev/null; then
+              echo "Server is healthy"
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "Server did not become healthy in time"
+          exit 1
+
+      - name: Dump server log on failure
+        if: failure()
+        run: cat /tmp/server.log || true
+
+      - name: Seed admin
+        run: |
+          curl -fsS -X POST http://localhost:3000/api/auth/setup \
+            -H "Content-Type: application/json" \
+            -d '{"name":"E2E","email":"e2e@example.com","password":"E2ePassword123","workspaceName":"E2E"}'
+
+      - name: Run mcp e2e
+        env:
+          DOCMOST_API_URL: http://localhost:3000/api
+          DOCMOST_EMAIL: e2e@example.com
+          DOCMOST_PASSWORD: E2ePassword123
+        run: pnpm --filter @docmost/mcp test:e2e
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,6 +15,38 @@ permissions:
 jobs:
  test:
    runs-on: ubuntu-latest
+    # Real Postgres + Redis so the server integration suite (`*.int-spec.ts`,
+    # behind `pnpm --filter server test:int`) runs in CI (red-team finding #7).
+    # Without it, cost-cap / FK-cascade / jsonb-round-trip / real-apply tests
+    # only ran locally, so regressions in those paths stayed green in CI.
+    # Postgres uses the pgvector image because migrations create vector columns
+    # and global-setup runs `CREATE EXTENSION vector`. Credentials/db match the
+    # defaults in apps/server/test/integration/db.ts + global-setup.ts
+    # (docmost / docmost_dev_pw, maintenance db `docmost`, redis on 6379), so no
+    # TEST_*_URL overrides are needed.
+    services:
+      postgres:
+        image: pgvector/pgvector:pg18
+        env:
+          POSTGRES_USER: docmost
+          POSTGRES_PASSWORD: docmost_dev_pw
+          POSTGRES_DB: docmost
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U docmost"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+      redis:
+        image: redis:7
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -36,5 +68,12 @@ jobs:
      - name: Build editor-ext
        run: pnpm --filter @docmost/editor-ext build

-      - name: Run tests
+      - name: Run unit tests
        run: pnpm -r test
+
+      # Integration suite against the real Postgres/Redis services above. Runs
+      # the FK-cascade, cost-cap, jsonb-round-trip and real-apply specs that the
+      # unit run (mocks only) cannot cover. global-setup drops/recreates the
+      # isolated `docmost_test` DB and migrates it to latest.
+      - name: Run server integration tests
+        run: pnpm --filter server test:int
--- a/.gitignore
+++ b/.gitignore
@@ -45,3 +45,6 @@ lerna-debug.log*

 # TypeScript incremental build artifacts
 *.tsbuildinfo
+
+# Self-hosted VAD / onnxruntime-web assets (copied from node_modules at dev/build time)
+apps/client/public/vad/
--- a/.vscode/tasks.json
+++ b/.vscode/tasks.json
@@ -0,0 +1,14 @@
+{
+  // VSCode tasks for this repo.
+  "version": "2.0.0",
+  "tasks": [
+    {
+      "label": "git push (github + gitea)",
+      "type": "shell",
+      "command": "git push github develop && git push gitea develop",
+      "options": { "cwd": "${workspaceFolder}" },
+      "presentation": { "reveal": "never", "focus": false, "panel": "shared", "showReuseMessage": false, "close": true },
+      "problemMatcher": []
+    }
+  ]
+}
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -5,45 +5,48 @@ repository. It has two layers: **how to run a task end-to-end** (the
 sections below), and **how the codebase is built** (the technical sections
 further down, formerly in `CLAUDE.md`).

-## Жизненный цикл задачи
+## Task lifecycle

-### 1. Старт: синхронизация с develop
+### 1. Start: sync with develop

-Перед началом **любой** работы обнови локальный `develop` и ветвись от него:
+Before starting **any** work, update your local `develop` and branch off it:

 ```bash
 git checkout develop
 git fetch gitea
 git pull --ff-only gitea develop
-git checkout -b <короткое-имя-фичи>
+git checkout -b <short-feature-name>
 ```

-Никогда не пилит фичу прямо в `develop` и не ветвись от устаревшего
-`develop` — иначе PR будет содержать лишние коммиты или конфликтовать.
+Never build a feature directly on `develop`, and never branch off a stale
+`develop` — otherwise the PR will carry extra commits or conflict.

-### 2. Реализация
+### 2. Implementation

-Веди задачу по workflow из системного промпта (Phase 1 анализ → Phase 3
-реализация → Phase 4 review → Phase 5 верификация → Phase 6 отчёт). Большие
-изменения делегируй в general subagent, ревьюй через review subagent.
+Run the task through the workflow from the system prompt (Phase 1 analysis →
+Phase 3 implementation → Phase 4 review → Phase 5 verification → Phase 6
+report). Delegate large changes to a general subagent; review via the review
+subagent.

-### 3. Коммит — ТОЛЬКО в Gitea и ТОЛЬКО от `claude_code`
+**Create worktrees only inside the `.claude` folder** (e.g.
+`.claude/worktrees/<name>`). Creating a git worktree anywhere else — the repo
+root, sibling directories, or temp folders — is forbidden.

-Это правило без исключений:
+### 3. Commit — ONLY to Gitea and ONLY as `claude_code`

- **Куда:** единственный remote для коммитов/пушей — **`gitea`**
-  (`gitea.vvzvlad.xyz`). **Никогда** не пушь в `origin` (GitHub-зеркало) и
-  тем более в `upstream` (оригинальный Docmost). GitHub-зеркало обновляется
-  CI-процессом владельца, не агентом.
- **От кого:** коммить **только** от агентского identity. Любой коммит,
-  у которого author или committer — `vvzvlad`, считается ошибкой и должен
-  быть переписан.
+This rule has no exceptions:
+
+- **Where:** the only remote for commits/pushes is **`gitea`**
+  (`gitea.vvzvlad.xyz`). **Never** push to `origin` (the GitHub mirror), and
+  especially not to `upstream` (the original Docmost). The GitHub mirror is
+  updated by the owner's CI process, not by the agent.
+- **Who:** commit **only** as the agent identity. Any commit whose author or
+  committer is `vvzvlad` is an error and must be rewritten.
  - **name:** `claude_code`
  - **email:** `claude_code@vvzvlad.xyz`

-Используй `--reset-author` при amend, иначе git оставит оригинального
-автора (по умолчанию config на этой машине — `vvzvlad`, поэтому проверяй
-после каждого коммита):
+Use `--reset-author` when amending, otherwise git keeps the original author
+(the default config on this machine is `vvzvlad`, so check after every commit):

 ```bash
 GIT_AUTHOR_NAME="claude_code" \
@@ -53,34 +56,33 @@ GIT_COMMITTER_EMAIL="claude_code@vvzvlad.xyz" \
 git commit --amend --no-edit --reset-author
 ```

-Для обычного нового коммита достаточно один раз выставить локальный
-config ветки и коммитить штатно:
+For a regular new commit, set the branch-local config once and commit normally:

 ```bash
 git config user.name "claude_code"
 git config user.email "claude_code@vvzvlad.xyz"
 ```

-Проверка перед push:
+Check before push:

 ```bash
 git log -1 --format='Author: %an <%ae>%nCommitter: %cn <%ce>'
-# обе строки должны показать claude_code <claude_code@vvzvlad.xyz>
+# both lines must show claude_code <claude_code@vvzvlad.xyz>
 ```

-### 4. Push и PR в develop
+### 4. Push and PR to develop

-PR всегда в `develop`. Пароль `claude_code` лежит в macOS keychain как
-**generic password** под service `gitea-claude-code` (не дублируй его как
-internet-password для `gitea.vvzvlad.xyz` — это создаст конфликт с учёткой
-владельца в git credential helper):
+PRs always target `develop`. The `claude_code` password lives in the macOS
+keychain as a **generic password** under service `gitea-claude-code` (do not
+duplicate it as an internet-password for `gitea.vvzvlad.xyz` — that creates a
+conflict with the owner's account in the git credential helper):

 ```bash
 AGENT_PASS=$(security find-generic-password -s gitea-claude-code -w)
 ```

-Push — через временную подстановку кредов в remote URL, после чего URL
-обязательно возвращается в чистый вид (пароль не должен оседать в git
+Push by temporarily injecting the credentials into the remote URL, then always
+restore the URL to its clean form (the password must not linger in git
 config / reflog):

 ```bash
@@ -92,7 +94,7 @@ git remote set-url gitea "$ORIG_URL"
 unset AGENT_PASS SAFE_PASS
 ```

-PR создаётся через Gitea REST API (Basic Auth от `claude_code`):
+The PR is created via the Gitea REST API (Basic Auth as `claude_code`):

 ```bash
 curl -s -X POST \
@@ -102,63 +104,75 @@ curl -s -X POST \
  "https://gitea.vvzvlad.xyz/api/v1/repos/vvzvlad/gitmost/pulls"
 ```

-`base: develop`, `head: <branch>`. В теле PR — что сделано, что вне scope,
-результаты верификации (tsc/lint/tests).
+`base: develop`, `head: <branch>`. In the PR body: what was done, what is out
+of scope, verification results (tsc/lint/tests).

-> Если push падает с `User permission denied for writing` — значит у
-> `claude_code` нет коллабораторских прав на репо. Попроси владельца
-> добавить (один раз, через Gitea UI или
-> `PUT /api/v1/repos/vvzvlad/gitmost/collaborators/claude_code` с
-> `{"permission":"write"}` от его учётки).
+> If push fails with `User permission denied for writing`, then `claude_code`
+> lacks collaborator rights on the repo. Ask the owner to add them (once, via
+> the Gitea UI or `PUT /api/v1/repos/vvzvlad/gitmost/collaborators/claude_code`
+> with `{"permission":"write"}` from their account).

-### 5. Мерж и cleanup
+### 5. Merge and cleanup

- **Мерж PR в develop делает пользователь** (не агент). Агент не жмёт
-  кнопку merge.
- **После реализации задачи удали её план из `docs/backlog/<task>.md`** —
-  это часть закрытия задачи, не пользовательская работа. Файлы в
-  `docs/backlog/` — это очередь работы, выполненное из неё вычищается.
-  Сделай это в отдельном коммите от того же `claude_code` в той же ветке
-  (или попроси пользователя удалить, если PR уже открыт и ты не хочешь
-  его перепушивать).
- Не закоммичен ли мусор в рабочем дереве? Проверь `git status` перед
-  финальным отчётом.
+- **The user merges the PR into develop** (not the agent). The agent does not
+  press the merge button.
+- **After implementing a task, delete its plan from `docs/backlog/<task>.md`** —
+  this is part of closing the task, not the user's work. Files in
+  `docs/backlog/` are the work queue; completed items get cleaned out of it.
+  Do this in a separate commit from the same `claude_code` on the same branch
+  (or ask the user to delete it if the PR is already open and you don't want to
+  repush it).
+- Any junk left uncommitted in the working tree? Check `git status` before the
+  final report.

-## Релизный цикл: набор на новую версию
+## Release cycle: staging a new version

-Когда в `develop` накопилось достаточно изменений для релиза, запускается
-**финальное ревью тремя скиллами-оркестраторами** перед мержем/тегом:
+When enough changes have accumulated on `develop` for a release, a **final
+review by three orchestrator skills** runs before the merge/tag:

-1. **test-orchestrator** (skill `code-review-orchestrator` с фокусом на
-   тестовом покрытии) — проверяет, что новый код покрыт тестами и нет
-   регрессий в существующих.
-2. **review-orchestrator** (skill `code-review-orchestrator`) —
-   мульти-аспектный код-ревью: безопасность, стабильность, соответствие
-   конвенциям, регрессии, перегруженность.
-3. **red-team-orchestrator** (red-team скилл) — адверсариальный анализ
-   атакующих сценариев на затронутые компоненты.
+1. **test-orchestrator** (the `code-review-orchestrator` skill focused on test
+   coverage) — verifies new code is covered by tests and there are no
+   regressions in existing ones.
+2. **review-orchestrator** (the `code-review-orchestrator` skill) —
+   multi-aspect code review: security, stability, convention conformance,
+   regressions, over-complexity.
+3. **red-team-orchestrator** (the red-team skill) — adversarial analysis of
+   attack scenarios against the affected components.

-Порядок: оркестраторы возвращают списки находок → агент правит всё, что
-они нашли (через subagent или сам, по правилам делегирования) → повторно
-прогоняет ревью затронутых мест → режет тег по процедуре «Cutting a
-release» ниже.
+Order: the orchestrators return finding lists → the agent fixes everything they
+found (via a subagent or itself, per the delegation rules) → re-runs the review
+on the affected areas → cuts the tag per the "Cutting a release" procedure
+below.

-## Шпаргалка по учёткам и endpoint'ам
+## Accounts & endpoints cheat sheet

-| Что | Значение |
+| Item | Value |
 | --- | --- |
-| Единственный remote для коммитов | `gitea` → `https://vvzvlad@gitea.vvzvlad.xyz/vvzvlad/gitmost.git` |
-| Агентский user (Gitea/git) | `claude_code` |
-| Агентский email | `claude_code@vvzvlad.xyz` |
-| Пароль в keychain | `security find-generic-password -s gitea-claude-code -w` |
-| PR API | `https://gitea.vvzvlad.xyz/api/v1/repos/vvzvlad/gitmost/pulls` (тут `gitmost` — реальный slug репо на сервере) |
-| Базовая ветка | `develop` |
-| `origin` | GitHub-зеркало `vvzvlad/gitmost` — **не пушить**, обновляется CI владельца |
-| `upstream` | Оригинальный Docmost — **не пушить никогда** |
+| Only remote for commits | `gitea` → `https://vvzvlad@gitea.vvzvlad.xyz/vvzvlad/gitmost.git` |
+| Agent user (Gitea/git) | `claude_code` |
+| Agent email | `claude_code@vvzvlad.xyz` |
+| Keychain password | `security find-generic-password -s gitea-claude-code -w` |
+| PR API | `https://gitea.vvzvlad.xyz/api/v1/repos/vvzvlad/gitmost/pulls` (here `gitmost` is the repo's real slug on the server) |
+| Base branch | `develop` |
+| `origin` | GitHub mirror `vvzvlad/gitmost` — **do not push**, updated by the owner's CI |
+| `upstream` | The original Docmost — **never push** |
+
+## Creating issues (Gitea `tea` CLI)
+
+Issues are filed with the official Gitea CLI `tea`, already logged in as
+`claude_code` (`tea logins list` shows the `gitea` login as default):
+
+```bash
+tea issues create --repo vvzvlad/gitmost --labels feature \
+  --title '<title>' --description "$(cat body.md)"
+```
+
+> Gotcha (tea 0.14.1): the issue body flag is `--description`/`-d`, **not**
+> `--body` — passing `--body` fails with `flag provided but not defined: -body`.

 ---

-# Архитектура и кодовая база
+# Architecture and codebase

 ## What this is

@@ -209,7 +223,7 @@ pnpm --filter @docmost/mcp test                  # node --test (unit + mock)
 pnpm --filter @docmost/mcp test:e2e              # MCP end-to-end against a live instance
 ```

-**Database migrations** (Kysely, run from `apps/server`; they auto-run on server startup too):
+**Database migrations** (Kysely, run from `apps/server`). **Where they auto-apply:** in **production** (the built image / `start:prod`) pending migrations run automatically on server boot. In **local dev** (the `pnpm dev` stand / `nest start --watch`) they do **NOT** auto-run — after you pull or switch branches you must apply them yourself with `pnpm --filter server migration:latest`, or any endpoint touching a new column/table 500s (e.g. a freshly-added `ai_chats.page_id` blanket-500s all of AI chat until migrated).
 ```bash
 pnpm --filter server migration:create --name=my_change   # new empty migration
 pnpm --filter server migration:latest                    # apply all pending
@@ -277,6 +291,29 @@ The git tag is the source of truth for the displayed version (UI reads `git desc
 4. Update `CHANGELOG.md` (Keep a Changelog format): add a `## [X.Y.Z] - YYYY-MM-DD` section summarising `git log vPREV..HEAD --no-merges` grouped by type (Breaking / Added / Changed / Fixed / Removed), and add the `compare/vPREV...vX.Y.Z` link at the bottom. Fold the bump + changelog into the release commit.
 5. Tag the release commit with a **lightweight** tag (existing release tags are lightweight): `git tag vX.Y.Z`.
 6. Push commit and tag: `git push origin main && git push origin vX.Y.Z`. Pushing the `v*` tag triggers `release.yml` (multi-arch GHCR images + a draft GitHub Release).
+7. **Back-merge the release into `develop`** so develop builds report the new version: `git checkout develop && git merge --no-ff main && git push origin develop` (push to Gitea as well if that is the canonical remote).
+
+#### Why develop keeps showing the *previous* version (and why step 7 matters)
+
+The UI version is `git describe --tags --always` (see `vite.config.ts`), which walks **backwards from the current commit** and picks the **nearest tag reachable in that commit's ancestry**, then appends `-<commits-since-tag>-g<short-hash>`.
+
+The release tag (`vX.Y.Z`) is created on **`main`'s release merge commit**, and that commit is **not** in `develop`'s history. So until the release is back-merged, `git describe` on `develop` cannot see the new tag and falls back to the *previous* reachable tag. Result: every develop build — and the `ghcr.io/vvzvlad/gitmost:develop` image — keeps reporting e.g. `v0.91.0-NNN-g<hash>` even though `main` is already tagged `v0.93.0`. This is the classic git-flow pitfall: the version on `develop` does **not** advance just because a release was tagged on `main`.
+
+Back-merging `main → develop` (step 7) pulls the tagged release commit into `develop`'s ancestry, after which develop builds correctly show `vX.Y.Z-NNN-g<hash>`. If `develop` already drifted (release tagged but never back-merged), just run step 7 now — no new tag is needed.
+
+##### The tag must also exist on the remote that CI builds from (multi-remote gotcha)
+
+`git describe` names a tag **ref**, not just a commit — so the back-merge is *necessary but not sufficient*. The develop image is built by GitHub Actions (`develop.yml`, `actions/checkout` with `fetch-depth: 0`, then `git describe --tags --always`), so the version it prints depends on which tags exist **on the `github` remote**, not on your local clone or on `gitea`.
+
+This repo has two writable remotes — `gitea` (canonical, where commits land) and `github` (where the `:develop` and release images are built) — plus `upstream` (docmost, never push). **`git push <branch>` does NOT push tags**; tags must be pushed explicitly and *to each remote separately*. A release tag that only lives on `gitea` is invisible to the GitHub Actions build: even with the tagged commit fully in `develop`'s history (step 7 done), `git describe` on the GitHub runner falls back to the previous tag it *does* have, so the develop image keeps showing e.g. `v0.91.0-NNN` while `git describe` locally already says `v0.93.0-NN`.
+
+Fix / checklist when develop still shows the old version after a back-merge:
+
+1. Confirm the tag is missing on github: `git ls-remote --tags github` (compare with `gitea`).
+2. Push it there: `git push github vX.Y.Z` (and `git push gitea vX.Y.Z` if it is missing on gitea too). Note: pushing a `v*` tag to `github` also triggers `release.yml` (multi-arch GHCR images + draft Release) — expected, but be aware.
+3. Re-run the develop build (`gh workflow run Develop`, or push any commit to `develop`) so `git describe` re-resolves with the tag now present.
+
+(The `git push origin ...` in steps 6–7 above is shorthand — there is no `origin` remote here; substitute `gitea` **and** `github` as appropriate, and always push release tags to both.)

 ## Planning docs

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,11 +10,191 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+## [0.94.0] - 2026-06-26
+
+This release makes AI chat durable and fast: assistant turns are persisted to
+the database step by step and exported server-side, the desktop app no longer
+freezes at 100% CPU on long agent runs, and MCP writes are badged with
+unspoofable AI attribution. It also reworks footnotes (Pandoc-style reuse and
+per-reference back-links), hardens page moves and duplication against cycles
+and lost edits, and caps the anonymous public-share assistant with a
+per-workspace rolling-day token budget.
+
 ### Added

- Admin-only "Analytics / tracker" workspace setting: a raw HTML/JS snippet
+- **Persistent AI-chat history as the source of truth + server-side export.**
+  An assistant turn is now persisted to the database step by step: the row is
+  inserted upfront as `streaming` and updated as each agent step finishes, then
+  finalized once to `completed`/`error`/`aborted`. A process that dies mid-turn
+  keeps every finished step, and a startup sweep flips any dangling `streaming`
+  row (untouched for 10 minutes) to `aborted`. Chat "Copy" now exports
+  server-side from these rows (`POST /ai-chat/export`) rather than from live
+  client state, so the export is identical whether a chat is freshly streaming,
+  just switched to, or reloaded — and is available from the first turn of a new
+  chat. (#183, #174)
+
+- **AI-agent attribution for MCP writes.** Comments (and pages) created through
+  the MCP endpoint by a dedicated agent account are now badged as "AI", with
+  unspoofable provenance derived from a per-user `is_agent` flag (not from the
+  request body). **Operator setup:** use a _dedicated_ service account for the
+  MCP fallback and set the flag with SQL —
+  `UPDATE users SET is_agent = true WHERE email = '<mcp-account>'`. Never flag a
+  human or shared account, or its normal edits get mis-attributed as AI. See the
+  AI-agent block in `.env.example`. (#143)
+- **Footnote import diagnostics.** The MCP page-write tools (`create_page`,
+  `update_page`, `import_page_markdown`) now return a `footnoteWarnings` array
+  flagging dangling references, empty or duplicate definitions, and `[^id]`
+  markers inside table rows, so an agent can fix its own markup. The page is
+  still created; the field is omitted when there are no problems. (#166)
+- **AI chat "Protocol" setting (`chatApiStyle`).** A new admin choice in AI
+  settings for the `openai` driver: `openai-compatible` (default) routes chat
+  through `@ai-sdk/openai-compatible`, which surfaces a provider's streamed
+  reasoning (`reasoning_content` → reasoning parts) for z.ai/GLM, DeepSeek,
+  OpenRouter, etc.; `openai` uses the official provider (real-OpenAI
+  reasoning-model request shaping). Chosen explicitly rather than inferred from
+  the base URL, since a custom URL can front real OpenAI too. (#175, #177)
+- **Per-MCP-server instructions in the agent prompt.** Each external MCP server
+  now has an admin-authored `instructions` field ("how/when to use this server's
+  tools") that is injected into the agent's system prompt next to that server's
+  tool descriptions. Trusted text, rendered inside the prompt safety sandwich;
+  shown only for a server that actually connected and contributed ≥1 callable
+  tool. (#180)
+- **Footnote multi-backlinks.** A footnote referenced more than once now shows a
+  back-link per reference (↩ a b c …), each scrolling to its own occurrence, like
+  Pandoc/Wikipedia; a single-reference footnote keeps the plain ↩. (#168)
+
+### Changed
+
+- **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
+  For the `openai` driver the chat provider defaults to the openai-compatible
+  implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
+  model's reasoning out of the box. An endpoint that is real OpenAI behind a
+  custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177)
+
+- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the
+  same id are ONE footnote — one number, one definition, several back-references
+  — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are
+  first-wins on import (the rest are dropped and reported via `footnoteWarnings`),
+  and a reference with no definition yields a single empty footnote rather than
+  one per occurrence. This supersedes the 0.93.0 "survive duplicate-id
+  definitions" behavior for the import path. (#166)
+
+- **Public share AI: default per-workspace hourly assistant cap lowered
+  300 → 100.** The limiter falls back to this default whenever
+  `SHARE_AI_WORKSPACE_MAX_PER_HOUR` is unset, so a `0.93.0` deployment that
+  never set the env var has its anonymous public-share assistant hourly cap
+  cut from 300 to 100 on upgrade. Set `SHARE_AI_WORKSPACE_MAX_PER_HOUR` to
+  keep the previous limit. (#62)
+
+### Fixed
+
+- **AI chat: the desktop app no longer freezes at 100% CPU on long agent runs.**
+  `useChat` re-rendered on every streamed token and `MessageItem`/`ReasoningBlock`
+  re-parsed the whole transcript markdown (marked + DOMPurify) on every delta, so
+  per-turn work grew quadratically and saturated the main thread. The stream is now
+  throttled (`experimental_throttle`) to ~20 Hz and each finalized message row /
+  markdown part / reasoning block is memoized, so a long turn no longer re-parses
+  already-finished content. (#182)
+- **Editor: caret/selection landed on the wrong line when clicking inside code
+  blocks and footnotes.** The affected NodeViews rendered their non-editable
+  chrome (language menu, footnotes heading, footnote number marker) before the
+  editable content, so the browser's click hit-testing missed the contentDOM and
+  snapped the caret to a previous node. Content now renders first in the DOM
+  (chrome is lifted back into place via CSS flex `order`), and scroll containers
+  are nudged after a paste to refresh stale hit-testing geometry. The caret
+  symptom is macOS-specific and was confirmed manually on macOS; the automated
+  guard pins the DOM-order invariant, not the caret behavior itself. (#146, #147)
+- **AI chat: the live token counter now ticks between agent steps.** During a
+  multi-step turn the header token badge (and the "Thinking… · N tokens" line)
+  no longer froze on the previous step's authoritative usage; the current step's
+  estimate is combined per-component with `max`, so the count rises smoothly and
+  never jumps backwards. (#163)
+- **AI chat: "New chat" during a streaming first turn now resets the whole
+  chat, not just the role badge.** Starting a new chat mid-stream cleared the
+  header but left the in-flight turn's messages behind, so the fresh chat opened
+  pre-populated with the previous conversation; it now fully resets. (#161)
+- **AI chat: a dropped tool argument now yields an actionable error.** When the
+  model omitted a required parameter (typically `pageId`) in a parallel/batch
+  tool call, the assistant forwarded zod's raw "expected string, received
+  undefined" text; tool inputs now return a message naming each missing/invalid
+  parameter (the JSON Schema contract is unchanged and nothing is backfilled).
+  (#190)
+- **Page move: cycle checks are now atomic and depth-bounded.** Moving a page
+  under one of its own descendants is rejected in the same transaction as the
+  update (closing a TOCTOU window where two concurrent A→B / B→A moves could
+  form a cycle), and the recursive tree-traversal CTEs carry a cycle/depth guard
+  so a pre-existing cycle can no longer spin a query. (#207)
+- **Page/editor robustness batch.** Duplicating a page now copies shared
+  attachments for every referencing page (not just the first); colliding block
+  ids are de-duplicated on import/normalize so MCP addressed edits can't hit the
+  wrong node; transient collab store failures are retried so autosave edits
+  aren't lost; and an out-of-order tree move no longer drops the moved subtree.
+  (#206)
+
+### Security
+
+- **Public share AI: per-workspace rolling-day token budget.** The anonymous
+  share assistant now caps a workspace's actual token spend (input + output,
+  summed across every accepted turn) over a trailing day, on top of the hourly
+  request cap — so a caller who evades the per-IP throttle still cannot run up
+  the owner's provider bill without bound. Cluster-wide via Redis and FAILS
+  CLOSED if Redis is down; default 1,000,000 tokens/day, overridable via
+  `SHARE_AI_WORKSPACE_TOKEN_BUDGET_PER_DAY`. (#159)
+
+## [0.93.0] - 2026-06-21
+
+This release builds on the 0.91.0 AI foundation: admin-defined AI agent roles,
+an anonymous AI assistant on public shares, server-side voice dictation, an
+editor footnotes model, live page-template embeds, and sandboxed arbitrary-HTML
+embeds — plus a large batch of security hardening and test coverage.
+
+### Breaking Changes
+
+- **MCP shared-token auth moved to its own header.** The `/mcp` shared guard
+  no longer reads `Authorization: Bearer <MCP_TOKEN>`; it now reads only the
+  `X-MCP-Token` header. The `Authorization` header is now reserved for per-user
+  HTTP Basic / Bearer access-JWT credentials, so each `/mcp` request
+  authenticates as a specific user (the `MCP_DOCMOST_*` service account is only
+  a fallback). Existing MCP clients (e.g. Claude Desktop) configured with
+  `Authorization: Bearer <MCP_TOKEN>` must be reconfigured to send
+  `X-MCP-Token: <MCP_TOKEN>` instead. See `MCP_TOKEN` in `.env.example`. As a
+  one-time aid, the server logs a single migration warning when it sees the
+  old-style header.
+
+### Added
+
+- **AI agent roles**: admin-defined assistant personas with an optional
+  per-role model override, selectable in chat.
+- **Anonymous AI assistant on public shares**: public-share visitors can chat
+  with a selectable agent-role identity that reuses the internal chat
+  presentation, with per-request output-token caps and a fail-closed Redis
+  limiter.
+- **Voice dictation (STT)**: server-side speech-to-text with a mic button in
+  the chat and the editor, OpenRouter STT support, an endpoint test, and real
+  provider-error surfacing.
+- **Footnotes**: an editor footnotes model (inline references + a definitions
+  list).
+- **Page templates**: live whole-page embed (MVP) with a template-marker icon
+  in the page tree and a working Refresh action.
+- **Arbitrary HTML/CSS/JS embeds**: a sandboxed-iframe embed block gated by a
+  per-workspace toggle (default OFF); insertable by any member when the toggle
+  is on.
+- Admin-only **"Analytics / tracker"** workspace setting: a raw HTML/JS snippet
  injected into the `<head>` of public share pages only (for analytics such as
-  Google Analytics or Yandex.Metrika).
+  Google Analytics or Yandex.Metrika), kept separate from the member-facing
+  HTML-embed feature.
+- **MCP**: a hierarchical tree mode for `list_pages`, and per-user auth for the
+  embedded `/mcp` endpoint.
+- **Page tree**: Expand all / Collapse all for the space tree, and
+  server-authoritative realtime tree updates.
+- **AI chat UX**: a `get_current_page` tool for proxy-robust page context, a
+  current-context-size readout, an agent step cap raised 8→20 with a forced
+  final text answer, and auto-collapse of the chat window on page focus.
+- **AI settings**: a Clear control inside the API-key field and an endpoint
+  status dot bound to "configured × enabled".
+- **Client**: an always-visible space grid replacing the space-switcher popover,
+  removal of the sidebar Overview item, tighter comments-panel density, and no
+  auto-open of the comments panel when adding a comment.

 ### Changed

@@ -28,16 +208,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  server-side strip is the public-share read path, which still honors the
  workspace HTML-embed toggle.

-### Breaking Changes
+### Fixed

- **MCP shared-token auth moved to its own header.** The `/mcp` shared guard
-  no longer reads `Authorization: Bearer <MCP_TOKEN>`; it now reads only the
-  `X-MCP-Token` header. Existing MCP clients (e.g. Claude Desktop) configured
-  with `Authorization: Bearer <MCP_TOKEN>` must be reconfigured to send
-  `X-MCP-Token: <MCP_TOKEN>` instead. The `Authorization` header is now
-  reserved for per-user HTTP Basic / Bearer access JWT credentials. See
-  `MCP_TOKEN` in `.env.example`. As a one-time aid, the server logs a single
-  migration warning when it sees the old-style header.
+- AI chat: preserve scroll position during streaming, record chats that fail on
+  their first turn, and resolve the current page for agent context behind
+  proxies.
+- AI roles: guard `update()` against concurrent soft-delete; harden the model
+  override, role-name uniqueness, and id validation; sandwich the safety
+  framework around the role persona.
+- Auth: handle null-password (SSO/LDAP-only) accounts without a bcrypt throw.
+- Footnotes: survive duplicate-id definitions without collab divergence.
+- HTML embed: fix stale iframe height and damp the resize loop; strip embeds at
+  serve time on authenticated read paths and the plain page-create path.
+- Page templates: import `ThrottleModule` so collab boots, never strand an
+  in-flight page-embed id, and add defense-in-depth workspace checks.
+- Pages: `movePage` cycle guard with no phantom `PAGE_MOVED` event.
+- Import: surface the real error cause from `/pages/import` instead of a generic 400.
+
+### Security
+
+- MCP: close an SSO/MFA bypass on Basic auth and stop minting non-init sessions;
+  close a brute-force limiter check-then-act race.
+- Public share: block restricted descendants in the anonymous assistant, cap
+  per-request output, fail closed when Redis is unavailable, and reject non-text
+  message parts to close a size-cap bypass.
+- Make `trustProxy` env-configurable with a safe default.
+
+### Internal
+
+- CI: gate the `develop` and release image builds on the test suite, run the
+  suites on push/PR, and build the `:develop` image on push to `develop`.
+- Docs: replace `CLAUDE.md` with `AGENTS.md` codifying the agent workflow and
+  the release procedure, add migration-ordering guidance, and prune implemented
+  plans.
+- A large batch of new server/client test coverage.

 ## [0.91.0] - 2026-06-18

@@ -121,5 +325,6 @@ knowledge layer, an embedded MCP server, and the Gitmost rebrand.
 - Build: drop the private EE submodule, retarget CI to GHCR, and update the
  Docker image to the GHCR registry.

-[Unreleased]: https://github.com/vvzvlad/gitmost/compare/v0.91.0...HEAD
+[Unreleased]: https://github.com/vvzvlad/gitmost/compare/v0.93.0...HEAD
+[0.93.0]: https://github.com/vvzvlad/gitmost/compare/v0.91.0...v0.93.0
 [0.91.0]: https://github.com/vvzvlad/gitmost/compare/v0.90.1...v0.91.0
--- a/README.md
+++ b/README.md
@@ -114,7 +114,7 @@ community feature, with no enterprise license. Open it from the page header; the
 - 🔭 **Viewer comments** — let read-only viewers leave comments.
 - 🔭 **Password-protected pages** — protect individual pages / shares with a password.
 - 🔭 **Windows / Linux app** — native desktop app for Windows and Linux.
- 🔭 **Mobile app** — mobile apps (iOS first, Android to follow), reusing the existing responsive web UI and editor via a Capacitor wrapper, with offline planned for later. See [docs/mobile-app-plan.md](docs/mobile-app-plan.md).
+- 🔭 **Mobile app** — mobile apps (iOS first, Android to follow), reusing the existing responsive web UI and editor via a Capacitor wrapper, with offline planned for later. See [issue #195](https://gitea.vvzvlad.xyz/vvzvlad/gitmost/issues/195).
 - 🔭 **Offline mode** — offline sync & PWA support.
 - 🔭 **Editor & UX improvements** — blocks inside tables (lists, to-do items), column layout, additional heading levels, highlight blocks, custom emoji in callouts, floating images, anchor links for page mentions, toggles (shared-page width, aside/sidebar, spellcheck, ligatures), sanitized space-tree export, and mentions in breadcrumbs.

--- a/README.ru.md
+++ b/README.ru.md
@@ -115,7 +115,7 @@ real-time-коллаборации Docmost, поэтому запись нико
 - 🔭 **Комментарии зрителей** — возможность комментировать для пользователей с доступом только на чтение.
 - 🔭 **Защищённые паролем страницы** — защита отдельных страниц / шар паролем.
 - 🔭 **Приложение для Windows / Linux** — нативное десктоп-приложение для Windows и Linux.
- 🔭 **Мобильное приложение** — мобильные приложения (iOS обязательно, Android как пойдёт) на базе существующей адаптивной веб-версии и редактора через обёртку Capacitor; оффлайн запланирован на будущее. См. [docs/mobile-app-plan.md](docs/mobile-app-plan.md).
+- 🔭 **Мобильное приложение** — мобильные приложения (iOS обязательно, Android как пойдёт) на базе существующей адаптивной веб-версии и редактора через обёртку Capacitor; оффлайн запланирован на будущее. См. [issue #195](https://gitea.vvzvlad.xyz/vvzvlad/gitmost/issues/195).
 - 🔭 **Офлайн-режим** — офлайн-синхронизация и поддержка PWA.
 - 🔭 **Улучшения редактора и UX** — блоки внутри таблиц (списки, чек-листы), колоночная вёрстка, дополнительные уровни заголовков, highlight-блоки, кастомные эмодзи в callout-ах, плавающие изображения, anchor-ссылки на упоминания страниц, тоглы (ширина шары, aside/сайдбар, spellcheck, лигатуры), санитизация экспорта дерева спейса и mentions в хлебных крошках.

--- a/apps/client/package.json
+++ b/apps/client/package.json
@@ -1,10 +1,10 @@
 {
  "name": "client",
  "private": true,
-  "version": "0.93.0",
+  "version": "0.94.1",
  "scripts": {
-    "dev": "vite",
-    "build": "tsc && vite build",
+    "dev": "node scripts/copy-vad-assets.mjs && vite",
+    "build": "node scripts/copy-vad-assets.mjs && tsc && vite build",
    "lint": "eslint .",
    "preview": "vite preview",
    "format": "prettier --write \"src/**/*.tsx\" \"src/**/*.ts\"",
@@ -28,6 +28,7 @@
    "@mantine/modals": "8.3.18",
    "@mantine/notifications": "8.3.18",
    "@mantine/spotlight": "8.3.18",
+    "@ricky0123/vad-web": "^0.0.30",
    "@slidoapp/emoji-mart": "5.8.7",
    "@slidoapp/emoji-mart-data": "1.2.4",
    "@slidoapp/emoji-mart-react": "1.1.5",
@@ -53,6 +54,7 @@
    "mantine-form-zod-resolver": "1.3.0",
    "mermaid": "11.15.0",
    "mitt": "3.0.1",
+    "onnxruntime-web": "^1.27.0",
    "posthog-js": "1.372.2",
    "react": "18.3.1",
    "react-clear-modal": "^2.0.18",
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -119,6 +119,8 @@
  "Name": "Name",
  "New email": "New email",
  "New page": "New page",
+  "New note": "New note",
+  "Create in space": "Create in space",
  "New password": "New password",
  "No group found": "No group found",
  "No page history saved yet.": "No page history saved yet.",
@@ -256,6 +258,7 @@
  "Copy to space": "Copy to space",
  "Copy chat": "Copy chat",
  "Copied": "Copied",
+  "Failed to export chat": "Failed to export chat",
  "Duplicate": "Duplicate",
  "Select a user": "Select a user",
  "Select a group": "Select a group",
@@ -418,6 +421,8 @@
  "{{count}} command available_other": "{{count}} commands available",
  "{{count}} result available_one": "1 result available",
  "{{count}} result available_other": "{{count}} results available",
+  "{{count}} result found_one": "{{count}} result found",
+  "{{count}} result found_other": "{{count}} results found",
  "Equal columns": "Equal columns",
  "Left sidebar": "Left sidebar",
  "Right sidebar": "Right sidebar",
@@ -706,10 +711,12 @@
  "Authorization header": "Authorization header",
  "Tool allowlist": "Tool allowlist",
  "Optional. Leave empty to allow all tools the server exposes.": "Optional. Leave empty to allow all tools the server exposes.",
-  "Use Tavily preset": "Use Tavily preset",
+  "Optional guidance for the agent on how and when to use this server's tools. Injected into the system prompt. The server's tools are namespaced as \"<server name>_*\".": "Optional guidance for the agent on how and when to use this server's tools. Injected into the system prompt. The server's tools are namespaced as \"<server name>_*\".",
  "Test": "Test",
  "Available tools": "Available tools",
  "No tools available": "No tools available",
+  "Failed": "Failed",
+  "OK · {{n}}": "OK · {{n}}",
  "Created successfully": "Created successfully",
  "Deleted successfully": "Deleted successfully",
  "Clear": "Clear",
@@ -951,6 +958,7 @@
  "Try a different search term.": "Try a different search term.",
  "Try again": "Try again",
  "Untitled chat": "Untitled chat",
+  "No document": "No document",
  "You": "You",
  "What can I help you with?": "What can I help you with?",
  "Are you sure you want to revoke this {{credential}}": "Are you sure you want to revoke this {{credential}}",
@@ -1073,6 +1081,8 @@
  "Undo": "Undo",
  "Redo": "Redo",
  "Backlinks": "Backlinks",
+  "Back to references": "Back to references",
+  "Back to reference {{label}}": "Back to reference {{label}}",
  "Last updated by": "Last updated by",
  "Last updated": "Last updated",
  "Stats": "Stats",
@@ -1125,15 +1135,32 @@
  "Removed from favorites": "Removed from favorites",
  "Added {{name}} to favorites": "Added {{name}} to favorites",
  "Removed {{name}} from favorites": "Removed {{name}} from favorites",
+  "Label added": "Label added",
+  "Label removed": "Label removed",
+  "Image updated": "Image updated",
+  "Unsupported image type": "Unsupported image type",
+  "Member deactivated": "Member deactivated",
+  "Member activated": "Member activated",
+  "Name is required": "Name is required",
+  "Name must be 40 characters or fewer": "Name must be 40 characters or fewer",
+  "Group name must be at least 2 characters": "Group name must be at least 2 characters",
+  "Group name must be 100 characters or fewer": "Group name must be 100 characters or fewer",
+  "Description must be 500 characters or fewer": "Description must be 500 characters or fewer",
+  "Invalid invitation link": "Invalid invitation link",
  "Page menu for {{name}}": "Page menu for {{name}}",
  "Create subpage of {{name}}": "Create subpage of {{name}}",
  "AI chat": "AI chat",
  "Ask a question about this documentation.": "Ask a question about this documentation.",
  "Ask a question…": "Ask a question…",
  "Thinking…": "Thinking…",
+  "Thinking… · {{count}} tokens": "Thinking… · {{count}} tokens",
+  "Thinking… · {{count}} tokens_one": "Thinking… · {{count}} token",
+  "Thinking… · {{count}} tokens_other": "Thinking… · {{count}} tokens",
+  "Thinking · {{count}} tokens": "Thinking · {{count}} tokens",
+  "Thinking · {{count}} tokens_one": "Thinking · {{count}} token",
+  "Thinking · {{count}} tokens_other": "Thinking · {{count}} tokens",
  "The assistant is unavailable right now. Please try again.": "The assistant is unavailable right now. Please try again.",
  "Public share assistant": "Public share assistant",
-  "Enabled": "Enabled",
  "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.": "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.",
  "Public assistant model": "Public assistant model",
  "Defaults to the chat model": "Defaults to the chat model",
@@ -1142,12 +1169,21 @@
  "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.",
  "Built-in assistant persona": "Built-in assistant persona",
  "Minimize": "Minimize",
-  "Current context size": "Current context size",
+  "Context size / model limit": "Context size / model limit",
+  "Context window (tokens)": "Context window (tokens)",
+  "Shown as used / total in the chat header. Leave empty to hide the limit.": "Shown as used / total in the chat header. Leave empty to hide the limit.",
  "AI agent": "AI agent",
+  "Take a look at the current document": "Take a look at the current document",
  "AI agent is typing…": "AI agent is typing…",
  "{{name}} is typing…": "{{name}} is typing…",
  "Send": "Send",
+  "Send when the agent finishes": "Send when the agent finishes",
+  "Queue message": "Queue message",
+  "Remove queued message": "Remove queued message",
  "Stop": "Stop",
+  "Response stopped.": "Response stopped.",
+  "Connection lost — the answer was interrupted.": "Connection lost — the answer was interrupted.",
+  "Response stopped (manually or the connection dropped).": "Response stopped (manually or the connection dropped).",
  "Chat menu": "Chat menu",
  "No chats yet.": "No chats yet.",
  "Delete this chat?": "Delete this chat?",
@@ -1179,8 +1215,11 @@
  "Semantic search": "Semantic search",
  "Voice / STT": "Voice / STT",
  "Voice dictation": "Voice dictation",
+  "Streaming dictation": "Streaming dictation",
+  "Transcribe as you speak, cutting on pauses": "Transcribe as you speak, cutting on pauses",
  "Voice dictation is not available yet.": "Voice dictation is not available yet.",
  "Test endpoint": "Test endpoint",
+  "Save and test": "Save and test",
  "Save endpoints": "Save endpoints",
  "Configured and enabled": "Configured and enabled",
  "Configured but disabled": "Configured but disabled",
@@ -1213,6 +1252,8 @@
  "No microphone found": "No microphone found",
  "Could not start recording": "Could not start recording",
  "Transcription failed": "Transcription failed",
+  "Transcribe": "Transcribe",
+  "No speech detected": "No speech detected",
  "Voice dictation is not configured": "Voice dictation is not configured",
  "Microphone is unavailable or already in use": "Microphone is unavailable or already in use",
  "Audio recording is not available in this browser/context": "Audio recording is not available in this browser/context",
@@ -1220,6 +1261,9 @@
  "How transcription requests are sent to the endpoint": "How transcription requests are sent to the endpoint",
  "OpenAI-compatible (multipart/form-data)": "OpenAI-compatible (multipart/form-data)",
  "OpenRouter (JSON, base64 audio)": "OpenRouter (JSON, base64 audio)",
+  "Dictation language": "Dictation language",
+  "Auto-detect": "Auto-detect",
+  "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.": "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.",
  "Agent role": "Agent role",
  "Universal assistant": "Universal assistant",
  "Add role": "Add role",
@@ -1236,6 +1280,10 @@
  "Optional. Defaults to the workspace model.": "Optional. Defaults to the workspace model.",
  "e.g. gpt-4o-mini": "e.g. gpt-4o-mini",
  "If you choose a different provider, it must already be configured in AI settings.": "If you choose a different provider, it must already be configured in AI settings.",
+  "Start automatically": "Start automatically",
+  "When on, picking this role sends a launch message and starts the chat. When off, the role is selected and you type the first message yourself.": "When on, picking this role sends a launch message and starts the chat. When off, the role is selected and you type the first message yourself.",
+  "Launch message": "Launch message",
+  "Sent automatically when this role is picked. Leave empty to use the default text. Ignored when “Start automatically” is off.": "Sent automatically when this role is picked. Leave empty to use the default text. Ignored when “Start automatically” is off.",
  "Agent roles": "Agent roles",
  "Reusable presets that shape the agent's behavior (and optionally its model). Picked when starting a new chat.": "Reusable presets that shape the agent's behavior (and optionally its model). Picked when starting a new chat.",
  "No roles configured": "No roles configured",
@@ -1255,5 +1303,20 @@
  "Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.": "Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.",
  "Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.": "Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.",
  "Analytics / tracker": "Analytics / tracker",
-  "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.": "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only."
+  "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.": "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.",
+  "Go to login page": "Go to login page",
+  "Move to space": "Move to space",
+  "Float left (wrap text)": "Float left (wrap text)",
+  "Float right (wrap text)": "Float right (wrap text)",
+  "Switch to tree": "Switch to tree",
+  "Switch to flat list": "Switch to flat list",
+  "Toggle subpages display mode": "Toggle subpages display mode",
+  "Page tree (child pages, recursive)": "Page tree (child pages, recursive)",
+  "Render the full nested tree of all descendant pages": "Render the full nested tree of all descendant pages",
+  "Showing {{count}} subpages_one": "Showing {{count}} subpage",
+  "Showing {{count}} subpages_other": "Showing {{count}} subpages",
+  "Protocol": "Protocol",
+  "How chat requests are sent and how reasoning is surfaced": "How chat requests are sent and how reasoning is surfaced",
+  "OpenAI-compatible (surfaces reasoning)": "OpenAI-compatible (surfaces reasoning)",
+  "OpenAI (official)": "OpenAI (official)"
 }
--- a/apps/client/public/locales/ru-RU/translation.json
+++ b/apps/client/public/locales/ru-RU/translation.json
@@ -119,6 +119,8 @@
  "Name": "Имя",
  "New email": "Новый электронный адрес",
  "New page": "Новая страница",
+  "New note": "Новая заметка",
+  "Create in space": "Создать в пространстве",
  "New password": "Новый пароль",
  "No group found": "Группа не найдена",
  "No page history saved yet.": "История страниц ещё не сохранена.",
@@ -255,6 +257,7 @@
  "Copy": "Копировать",
  "Copy to space": "Копировать в пространство",
  "Copied": "Скопировано",
+  "Failed to export chat": "Не удалось экспортировать чат",
  "Duplicate": "Дублировать",
  "Select a user": "Выберите пользователя",
  "Select a group": "Выберите группу",
@@ -383,6 +386,11 @@
  "Quote": "Цитата",
  "Image": "Изображение",
  "Audio": "Аудио",
+  "Transcribe": "Транскрибировать",
+  "Transcribing…": "Транскрибация…",
+  "No speech detected": "Речь не распознана",
+  "Transcription failed": "Не удалось распознать речь",
+  "Voice dictation is not configured": "Голосовой ввод не настроен",
  "Embed PDF": "Встроить PDF",
  "Upload and embed a PDF file.": "Загрузите и встроите PDF-файл.",
  "Embed as PDF": "Встроить как PDF",
@@ -398,6 +406,8 @@
  "Footnote {{number}}": "Сноска {{number}}",
  "Go to footnote": "Перейти к сноске",
  "Back to reference": "Вернуться к ссылке",
+  "Back to references": "Вернуться к ссылкам",
+  "Back to reference {{label}}": "Вернуться к ссылке {{label}}",
  "Empty footnote": "Пустая сноска",
  "Math inline": "Строчная формула",
  "Insert inline math equation.": "Вставить математическое выражение в строку.",
@@ -669,8 +679,56 @@
  "AI Answer": "Ответ ИИ",
  "Ask AI": "Спросить ИИ",
  "AI agent": "AI-агент",
+  "Take a look at the current document": "Посмотри текущий документ",
+  "Start automatically": "Запускать автоматически",
+  "When on, picking this role sends a launch message and starts the chat. When off, the role is selected and you type the first message yourself.": "Когда включено, выбор этой роли отправляет стартовое сообщение и начинает чат. Когда выключено, роль выбирается, а первое сообщение вы вводите сами.",
+  "Launch message": "Стартовое сообщение",
+  "Sent automatically when this role is picked. Leave empty to use the default text. Ignored when “Start automatically” is off.": "Отправляется автоматически при выборе этой роли. Оставьте пустым, чтобы использовать текст по умолчанию. Игнорируется, когда «Запускать автоматически» выключено.",
  "AI agent is typing…": "AI-агент печатает…",
  "{{name}} is typing…": "{{name}} печатает…",
+  "Thinking…": "Думаю…",
+  "Thinking… · {{count}} tokens": "Думаю… · {{count}} токенов",
+  "Thinking… · {{count}} tokens_one": "Думаю… · {{count}} токен",
+  "Thinking… · {{count}} tokens_few": "Думаю… · {{count}} токена",
+  "Thinking… · {{count}} tokens_many": "Думаю… · {{count}} токенов",
+  "Thinking · {{count}} tokens": "Размышления · {{count}} токенов",
+  "Thinking · {{count}} tokens_one": "Размышления · {{count}} токен",
+  "Thinking · {{count}} tokens_few": "Размышления · {{count}} токена",
+  "Thinking · {{count}} tokens_many": "Размышления · {{count}} токенов",
+  "Agent role": "Роль агента",
+  "AI chat": "AI-чат",
+  "AI chat is disabled for this workspace.": "AI-чат отключён для этого рабочего пространства.",
+  "Ask a question about this documentation.": "Задайте вопрос об этой документации.",
+  "Ask a question…": "Задайте вопрос…",
+  "Ask the AI agent anything about your workspace.": "Спросите AI-агента о чём угодно по вашему рабочему пространству.",
+  "Ask the AI agent…": "Спросите AI-агента…",
+  "Copy chat": "Копировать чат",
+  "Created successfully": "Успешно создано",
+  "Context size / model limit": "Размер контекста / лимит модели",
+  "Context window (tokens)": "Окно контекста (токены)",
+  "Shown as used / total in the chat header. Leave empty to hide the limit.": "Показывается в шапке чата как использовано / всего. Пусто — лимит скрыт.",
+  "Delete this chat?": "Удалить этот чат?",
+  "Deleted successfully": "Успешно удалено",
+  "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}",
+  "Failed to delete chat": "Не удалось удалить чат",
+  "Failed to rename chat": "Не удалось переименовать чат",
+  "Failed": "Ошибка",
+  "OK · {{n}}": "OK · {{n}}",
+  "Test": "Тест",
+  "No tools available": "Инструменты недоступны",
+  "Available tools": "Доступные инструменты",
+  "Minimize": "Свернуть",
+  "No chats yet.": "Чатов пока нет.",
+  "Send": "Отправить",
+  "Send when the agent finishes": "Отправить, когда агент закончит",
+  "Queue message": "Поставить в очередь",
+  "Remove queued message": "Убрать из очереди",
+  "Something went wrong": "Что-то пошло не так",
+  "Stop": "Стоп",
+  "The AI agent could not respond. Please try again.": "AI-агент не смог ответить. Попробуйте ещё раз.",
+  "The AI provider is not configured. Ask an administrator to set it up.": "AI-провайдер не настроен. Попросите администратора настроить его.",
+  "Universal assistant": "Универсальный ассистент",
+  "You": "Вы",
  "AI is thinking...": "ИИ обрабатывает запрос...",
  "Thinking": "Думаю",
  "Ask a question...": "Задайте вопрос...",
@@ -700,6 +758,8 @@
  "Manage API keys for all users in the workspace. View the <anchor>API documentation</anchor> for usage details.": "Управляйте API-ключами для всех пользователей в рабочем пространстве. Смотрите <anchor>документацию по API</anchor> для получения информации об использовании.",
  "View the <anchor>API documentation</anchor> for usage details.": "Смотрите <anchor>документацию по API</anchor> для получения информации об использовании.",
  "View the <anchor>MCP documentation</anchor>.": "Смотрите <anchor>документацию по MCP</anchor>.",
+  "Instructions": "Инструкции",
+  "Optional guidance for the agent on how and when to use this server's tools. Injected into the system prompt. The server's tools are namespaced as \"<server name>_*\".": "Необязательное указание агенту, как и когда использовать инструменты этого сервера. Добавляется в системный промпт. Инструменты сервера именуются с префиксом «<имя сервера>_*».",
  "Sources": "Источники",
  "AI Answers not available for attachments": "Ответы ИИ недоступны для вложений",
  "No answer available": "Ответ недоступен",
@@ -926,6 +986,7 @@
  "Try a different search term.": "Попробуйте другой поисковый запрос.",
  "Try again": "Попробовать снова",
  "Untitled chat": "Чат без названия",
+  "No document": "Без документа",
  "What can I help you with?": "Чем я могу вам помочь?",
  "Are you sure you want to revoke this {{credential}}": "Вы уверены, что хотите отозвать этот {{credential}}",
  "Automatically provision users and groups from your identity provider via SCIM.": "Автоматически предоставляйте доступ пользователям и группам из вашего провайдера удостоверений через SCIM.",
@@ -1097,5 +1158,22 @@
  "Added {{name}} to favorites": "{{name}} добавлено в избранное",
  "Removed {{name}} from favorites": "{{name}} удалено из избранного",
  "Page menu for {{name}}": "Меню страницы для {{name}}",
-  "Create subpage of {{name}}": "Создать подстраницу для {{name}}"
+  "Create subpage of {{name}}": "Создать подстраницу для {{name}}",
+  "Dictation language": "Язык диктовки",
+  "Auto-detect": "Автоопределение",
+  "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.": "Подсказка языка речи для модели транскрипции. «Автоопределение» оставляет выбор за моделью.",
+  "Float left (wrap text)": "Обтекание слева",
+  "Float right (wrap text)": "Обтекание справа",
+  "Switch to tree": "Переключить на дерево",
+  "Switch to flat list": "Переключить на плоский список",
+  "Toggle subpages display mode": "Переключить режим отображения подстраниц",
+  "Page tree (child pages, recursive)": "Дерево страниц (дочерние, рекурсивно)",
+  "Render the full nested tree of all descendant pages": "Показать полное вложенное дерево всех дочерних страниц",
+  "Showing {{count}} subpages_one": "Показано {{count}} подстраница",
+  "Showing {{count}} subpages_few": "Показано {{count}} подстраницы",
+  "Showing {{count}} subpages_many": "Показано {{count}} подстраниц",
+  "Protocol": "Протокол",
+  "How chat requests are sent and how reasoning is surfaced": "Как отправляются запросы чата и как показывается reasoning",
+  "OpenAI-compatible (surfaces reasoning)": "OpenAI-совместимый (показывает reasoning)",
+  "OpenAI (official)": "OpenAI (официальный)"
 }
--- a/apps/client/scripts/copy-vad-assets.mjs
+++ b/apps/client/scripts/copy-vad-assets.mjs
@@ -0,0 +1,70 @@
+// Self-host the @ricky0123/vad-web + onnxruntime-web runtime assets under
+// apps/client/public/vad/.
+//
+// WHY THIS EXISTS:
+// Both vad-web and onnxruntime-web resolve their assets by URL *at runtime* (the
+// VAD audio worklet + Silero model, and ORT's wasm/mjs backend). In vad-web
+// 0.0.30 the default baseAssetPath / onnxWASMBasePath is "./" — i.e. relative to
+// the current page URL — NOT a CDN. In this SPA that "./" request hits the
+// client-side catch-all route and gets served index.html (text/html), so the
+// onnxruntime ESM/wasm backend fails to initialize ("'text/html' is not a valid
+// JavaScript MIME type"). We fix that by copying the needed runtime files into
+// public/vad/ and pointing both path constants at the fixed absolute "/vad/".
+//
+// These copies are NOT committed (the ORT wasm is ~26 MB); this script runs
+// before `dev` and `build` (see package.json) to repopulate them from
+// node_modules. It is idempotent: it (re)creates the dir and overwrites.
+
+import { createRequire } from "node:module";
+import { fileURLToPath } from "node:url";
+import path from "node:path";
+import fs from "node:fs";
+
+const require = createRequire(import.meta.url);
+const here = path.dirname(fileURLToPath(import.meta.url));
+const outDir = path.join(here, "..", "public", "vad");
+
+// vad-web exposes ./package.json, so derive its dist dir from there.
+const vadDist = path.join(
+  path.dirname(require.resolve("@ricky0123/vad-web/package.json")),
+  "dist",
+);
+
+// onnxruntime-web's "exports" map does NOT expose ./package.json, so resolving
+// it would throw ERR_PACKAGE_PATH_NOT_EXPORTED. It DOES export the exact asset
+// subpaths we need, so resolve those files directly.
+//
+// ORT ships several wasm backends and which one the app bundle references depends
+// on the resolver: Vite dev resolves the JSEP build (ort-wasm-simd-threaded.jsep.*)
+// while the production rolldown build resolves the plain build
+// (ort-wasm-simd-threaded.*). Ship BOTH variants so the runtime fetch hits a real
+// file under /vad/ regardless of which the bundle picked (each .mjs proxy fetches
+// its matching .wasm at init).
+const ortJsepMjs = require.resolve(
+  "onnxruntime-web/ort-wasm-simd-threaded.jsep.mjs",
+);
+const ortJsepWasm = require.resolve(
+  "onnxruntime-web/ort-wasm-simd-threaded.jsep.wasm",
+);
+const ortMjs = require.resolve("onnxruntime-web/ort-wasm-simd-threaded.mjs");
+const ortWasm = require.resolve("onnxruntime-web/ort-wasm-simd-threaded.wasm");
+
+// [absolute source path, output filename]
+const files = [
+  [path.join(vadDist, "vad.worklet.bundle.min.js"), "vad.worklet.bundle.min.js"],
+  [path.join(vadDist, "silero_vad_v5.onnx"), "silero_vad_v5.onnx"],
+  [ortJsepMjs, "ort-wasm-simd-threaded.jsep.mjs"],
+  [ortJsepWasm, "ort-wasm-simd-threaded.jsep.wasm"],
+  [ortMjs, "ort-wasm-simd-threaded.mjs"],
+  [ortWasm, "ort-wasm-simd-threaded.wasm"],
+];
+
+fs.mkdirSync(outDir, { recursive: true });
+for (const [src, name] of files) {
+  if (!fs.existsSync(src)) {
+    console.error(`[copy-vad-assets] missing source: ${src}`);
+    process.exit(1);
+  }
+  fs.copyFileSync(src, path.join(outDir, name));
+  console.log(`[copy-vad-assets] ${name}`);
+}
--- a/apps/client/src/components/common/avatar-uploader.tsx
+++ b/apps/client/src/components/common/avatar-uploader.tsx
@@ -42,6 +42,23 @@ export default function AvatarUploader({
      return;
    }

+    // Validate file type. The `accept` attribute only filters the dialog;
+    // a user can still select a non-image file, which previously failed
+    // silently. Surface a visible error instead (issue #133). Accept any
+    // image/* MIME (png, jpeg, webp, gif, svg, ...) so we don't narrow below
+    // what the server accepts; only genuinely non-image files are rejected.
+    if (!file.type.startsWith("image/")) {
+      notifications.show({
+        message: t("Unsupported image type"),
+        color: "red",
+      });
+      // Reset the input
+      if (fileInputRef.current) {
+        fileInputRef.current.value = "";
+      }
+      return;
+    }
+
    // Validate file size (max 10MB)
    const maxSizeInBytes = 10 * 1024 * 1024;
    if (file.size > maxSizeInBytes) {
@@ -58,6 +75,8 @@ export default function AvatarUploader({

    try {
      await onUpload(file);
+      // Notify on success so the upload gives visible feedback (issue #128)
+      notifications.show({ message: t("Image updated") });
    } catch (error) {
      console.error(error);
      notifications.show({
@@ -117,7 +136,7 @@ export default function AvatarUploader({
        type="file"
        ref={fileInputRef}
        onChange={handleFileInputChange}
-        accept="image/png,image/jpeg,image/jpg"
+        accept="image/*"
        aria-label={ariaLabel}
        tabIndex={-1}
        style={{ display: "none" }}
--- a/apps/client/src/components/common/recent-changes.tsx
+++ b/apps/client/src/components/common/recent-changes.tsx
@@ -67,6 +67,7 @@ export default function RecentChanges({ spaceId }: Props) {
                    <Badge
                      color={getInitialsColor(page?.space.name)}
                      variant="light"
+                      tt="none"
                      component={Link}
                      to={getSpaceUrl(page?.space.slug)}
                      style={{ cursor: "pointer" }}
--- a/apps/client/src/components/icons/icon-columns-4.tsx
+++ b/apps/client/src/components/icons/icon-columns-4.tsx
@@ -9,8 +9,10 @@ export function IconColumns4({ size = 24, stroke = 2 }: Props) {
  return (
    <svg
      xmlns="http://www.w3.org/2000/svg"
-      width={rem(size)}
-      height={rem(size)}
+      // rem(size) returns a `calc(...)` string, which is invalid for the raw
+      // SVG width/height length attributes ("Expected length, calc(...)"). Pass
+      // it via CSS style instead (matching the other icon components).
+      style={{ width: rem(size), height: rem(size) }}
      viewBox="0 0 24 24"
      fill="none"
      stroke="currentColor"
--- a/apps/client/src/components/icons/icon-columns-5.tsx
+++ b/apps/client/src/components/icons/icon-columns-5.tsx
@@ -9,8 +9,10 @@ export function IconColumns5({ size = 24, stroke = 2 }: Props) {
  return (
    <svg
      xmlns="http://www.w3.org/2000/svg"
-      width={rem(size)}
-      height={rem(size)}
+      // rem(size) returns a `calc(...)` string, which is invalid for the raw
+      // SVG width/height length attributes ("Expected length, calc(...)"). Pass
+      // it via CSS style instead (matching the other icon components).
+      style={{ width: rem(size), height: rem(size) }}
      viewBox="0 0 24 24"
      fill="none"
      stroke="currentColor"
--- a/apps/client/src/components/layouts/global/app-header.module.css
+++ b/apps/client/src/components/layouts/global/app-header.module.css
@@ -13,6 +13,7 @@
    text-decoration: none;
    color: inherit;
    cursor: pointer;
+    user-select: none;
 }

 .brandIcon {
@@ -33,21 +34,3 @@
       that is ~9.3px, minus the font descent (~2px) ≈ 7px. */
    margin-bottom: rem(7px);
 }
-
-.link {
-    display: block;
-    line-height: 1;
-    padding: rem(8px) rem(12px);
-    border-radius: var(--mantine-radius-sm);
-    text-decoration: none;
-    color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-dark-0));
-    font-size: var(--mantine-font-size-sm);
-    font-weight: 500;
-    user-select: none;
-    white-space: nowrap;
-    flex-shrink: 0;
-
-    @mixin hover {
-        background-color: light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-6));
-    }
-}
--- a/apps/client/src/components/layouts/global/app-header.tsx
+++ b/apps/client/src/components/layouts/global/app-header.tsx
@@ -5,12 +5,11 @@ import {
  Text,
  Tooltip,
 } from "@mantine/core";
-import { IconSparkles } from "@tabler/icons-react";
+import { IconMessage } from "@tabler/icons-react";
 import classes from "./app-header.module.css";
 import { BrandLogo } from "@/components/ui/brand-logo";
 import TopMenu from "@/components/layouts/global/top-menu.tsx";
 import { Link } from "react-router-dom";
-import APP_ROUTE from "@/lib/app-route.ts";
 import { useAtom, useSetAtom } from "jotai";
 import {
  desktopSidebarAtom,
@@ -30,10 +29,6 @@ import {
 } from "@/features/search/constants.ts";
 import { NotificationPopover } from "@/features/notification/components/notification-popover.tsx";

-const links = [
-  { link: APP_ROUTE.HOME, label: "Home" },
-];
-
 export function AppHeader() {
  const { t } = useTranslation();
  const [mobileOpened] = useAtom(mobileSidebarAtom);
@@ -47,12 +42,6 @@ export function AppHeader() {
  // AI chat entry point: only shown when the workspace enables it (A7 gate).
  const aiChatEnabled = workspace?.settings?.ai?.chat === true;

-  const items = links.map((link) => (
-    <Link key={link.label} to={link.link} className={classes.link}>
-      {t(link.label)}
-    </Link>
-  ));
-
  return (
    <>
      <Group h="100%" px="md" justify="space-between" wrap={"nowrap"}>
@@ -97,10 +86,6 @@ export function AppHeader() {
              </Text>
            </Tooltip>
          </Group>
-
-          <Group ml="xl" gap={5} className={classes.links} visibleFrom="sm">
-            {items}
-          </Group>
        </Group>

        <div>
@@ -122,7 +107,7 @@ export function AppHeader() {
                aria-label={t("AI chat")}
                onClick={() => setAiChatWindowOpen((v) => !v)}
              >
-                <IconSparkles size={20} />
+                <IconMessage size={20} />
              </ActionIcon>
            </Tooltip>
          )}
--- a/apps/client/src/components/layouts/global/aside.tsx
+++ b/apps/client/src/components/layouts/global/aside.tsx
@@ -27,7 +27,7 @@ export default function Aside() {

  switch (tab) {
    case "comments":
-      component = <CommentListWithTabs />;
+      component = <CommentListWithTabs onClose={closeAside} />;
      title = "Comments";
      break;
    case "toc":
@@ -44,26 +44,27 @@ export default function Aside() {
  }

  return (
-    <Box p="md" style={{ height: "100%", display: "flex", flexDirection: "column" }}>
-      {component && (
-        <>
-          <Group justify="space-between" wrap="nowrap" mb="md">
-            <Title order={2} size="h6" fw={500}>{t(title)}</Title>
-            <Tooltip label={t("Close")} withArrow>
-              <ActionIcon
-                variant="subtle"
-                color="gray"
-                onClick={closeAside}
-                aria-label={t("Close")}
-              >
-                <IconX size={18} />
-              </ActionIcon>
-            </Tooltip>
-          </Group>
-
-          {tab === "comments" ? (
-            component
-          ) : (
+    <Box p={0} style={{ height: "100%", display: "flex", flexDirection: "column" }}>
+      {component &&
+        (tab === "comments" ? (
+          component
+        ) : (
+          <>
+            <Group justify="space-between" wrap="nowrap" mb="sm">
+              <Title order={2} size="h6" fw={500}>
+                {t(title)}
+              </Title>
+              <Tooltip label={t("Close")} withArrow>
+                <ActionIcon
+                  variant="subtle"
+                  color="gray"
+                  onClick={closeAside}
+                  aria-label={t("Close")}
+                >
+                  <IconX size={18} />
+                </ActionIcon>
+              </Tooltip>
+            </Group>
            <ScrollArea
              style={{ height: "85vh" }}
              scrollbarSize={5}
@@ -71,9 +72,8 @@ export default function Aside() {
            >
              <div style={{ paddingBottom: "200px" }}>{component}</div>
            </ScrollArea>
-          )}
-        </>
-      )}
+          </>
+        ))}
    </Box>
  );
 }
--- a/apps/client/src/components/layouts/global/global-app-shell.tsx
+++ b/apps/client/src/components/layouts/global/global-app-shell.tsx
@@ -14,6 +14,7 @@ import { SpaceSidebar } from "@/features/space/components/sidebar/space-sidebar.
 import { AppHeader } from "@/components/layouts/global/app-header.tsx";
 import Aside from "@/components/layouts/global/aside.tsx";
 import AiChatWindow from "@/features/ai-chat/components/ai-chat-window.tsx";
+import GitmostGlobalBridge from "@/features/editor/gitmost/gitmost-global-bridge.tsx";
 import classes from "./app-shell.module.css";
 import { useToggleSidebar } from "@/components/layouts/global/hooks/hooks/use-toggle-sidebar.ts";
 import GlobalSidebar from "@/components/layouts/global/global-sidebar.tsx";
@@ -94,12 +95,12 @@ export default function GlobalAppShell({
      }}
      aside={
        isPageRoute && {
-          width: 350,
+          width: 420,
          breakpoint: "sm",
          collapsed: { mobile: !isAsideOpen, desktop: !isAsideOpen },
        }
      }
-      padding="md"
+      padding={{ base: "xs", sm: "md" }}
    >
      <AppShell.Header px="md" className={classes.header}>
        <AppHeader />
@@ -138,7 +139,7 @@ export default function GlobalAppShell({
          id={ASIDE_PANEL_ID}
          tabIndex={-1}
          className={classes.aside}
-          p="md"
+          p="sm"
          withBorder={false}
          aria-label={
            asideTab === "comments"
@@ -157,6 +158,10 @@ export default function GlobalAppShell({
    {/* Floating AI chat window. Mounted once globally; it is position: fixed
        and self-hides when closed, so its place in the tree is not critical. */}
    <AiChatWindow />
+      {/* Global gitmost native bridge: registers listSpaces / listPages /
+          createPageWithRecording on window.gitmost so the native host can
+          create a page with a recording even when no page editor is open. */}
+      <GitmostGlobalBridge />
    </>
  );
 }
--- a/apps/client/src/components/layouts/global/top-menu.tsx
+++ b/apps/client/src/components/layouts/global/top-menu.tsx
@@ -20,18 +20,29 @@ import {
 } from "@tabler/icons-react";
 import { useAtom } from "jotai";
 import { currentUserAtom } from "@/features/user/atoms/current-user-atom.ts";
-import { Link } from "react-router-dom";
+import { Link, useMatch } from "react-router-dom";
 import APP_ROUTE from "@/lib/app-route.ts";
 import useAuth from "@/features/auth/hooks/use-auth.ts";
 import { CustomAvatar } from "@/components/ui/custom-avatar.tsx";
 import { useTranslation } from "react-i18next";
 import { AvatarIconType } from "@/features/attachments/types/attachment.types.ts";
+import { useDisclosure } from "@mantine/hooks";
+import SpaceSettingsModal from "@/features/space/components/settings-modal.tsx";

 export default function TopMenu() {
  const { t } = useTranslation();
  const [currentUser] = useAtom(currentUserAtom);
  const { logout } = useAuth();
  const { colorScheme, setColorScheme } = useMantineColorScheme();
+  // Detect the currently viewed space so the "Space settings" item is only
+  // offered while the user is inside a space. The "/*" splat also matches the
+  // bare "/s/:spaceSlug" route (the splat matches an empty segment).
+  const spaceMatch = useMatch("/s/:spaceSlug/*");
+  const spaceSlug = spaceMatch?.params?.spaceSlug;
+  const [
+    spaceSettingsOpened,
+    { open: openSpaceSettings, close: closeSpaceSettings },
+  ] = useDisclosure(false);

  const user = currentUser?.user;
  const workspace = currentUser?.workspace;
@@ -41,124 +52,143 @@ export default function TopMenu() {
  }

  return (
-    <Menu width={250} position="bottom-end" withArrow shadow={"lg"}>
-      <Menu.Target>
-        <UnstyledButton>
-          <Group gap={7} wrap={"nowrap"}>
-            <CustomAvatar
-              avatarUrl={workspace?.logo}
-              name={workspace?.name}
-              variant="filled"
-              size="sm"
-              type={AvatarIconType.WORKSPACE_ICON}
-            />
-            <Text fw={500} size="sm" lh={1} mr={3} lineClamp={1}>
-              {workspace?.name}
-            </Text>
-            <IconChevronDown size={16} />
-          </Group>
-        </UnstyledButton>
-      </Menu.Target>
-      <Menu.Dropdown>
-        <Menu.Label>{t("Workspace")}</Menu.Label>
-
-        <Menu.Item
-          component={Link}
-          to={APP_ROUTE.SETTINGS.WORKSPACE.GENERAL}
-          leftSection={<IconSettings size={16} />}
-        >
-          {t("Workspace settings")}
-        </Menu.Item>
-
-        <Menu.Item
-          component={Link}
-          to={APP_ROUTE.SETTINGS.WORKSPACE.MEMBERS}
-          leftSection={<IconUsers size={16} />}
-        >
-          {t("Manage members")}
-        </Menu.Item>
-
-        <Menu.Divider />
-
-        <Menu.Label>{t("Account")}</Menu.Label>
-        <Menu.Item component={Link} to={APP_ROUTE.SETTINGS.ACCOUNT.PROFILE}>
-          <Group wrap={"nowrap"}>
-            <CustomAvatar
-              size={"sm"}
-              avatarUrl={user.avatarUrl}
-              name={user.name}
-            />
-
-            <div style={{ width: 190 }}>
-              <Text size="sm" fw={500} lineClamp={1}>
-                {user.name}
+    <>
+      <Menu width={250} position="bottom-end" withArrow shadow={"lg"}>
+        <Menu.Target>
+          <UnstyledButton>
+            <Group gap={7} wrap={"nowrap"}>
+              <CustomAvatar
+                avatarUrl={workspace?.logo}
+                name={workspace?.name}
+                variant="filled"
+                size="sm"
+                type={AvatarIconType.WORKSPACE_ICON}
+              />
+              <Text fw={500} size="sm" lh={1} mr={3} lineClamp={1}>
+                {workspace?.name}
              </Text>
-              <Text size="xs" c="dimmed" truncate="end">
-                {user.email}
-              </Text>
-            </div>
-          </Group>
-        </Menu.Item>
-        <Menu.Item
-          component={Link}
-          to={APP_ROUTE.SETTINGS.ACCOUNT.PROFILE}
-          leftSection={<IconUserCircle size={16} />}
-        >
-          {t("My profile")}
-        </Menu.Item>
+              <IconChevronDown size={16} />
+            </Group>
+          </UnstyledButton>
+        </Menu.Target>
+        <Menu.Dropdown>
+          <Menu.Label>{t("Workspace")}</Menu.Label>

-        <Menu.Item
-          component={Link}
-          to={APP_ROUTE.SETTINGS.ACCOUNT.PREFERENCES}
-          leftSection={<IconBrush size={16} />}
-        >
-          {t("My preferences")}
-        </Menu.Item>
+          <Menu.Item
+            component={Link}
+            to={APP_ROUTE.SETTINGS.WORKSPACE.GENERAL}
+            leftSection={<IconSettings size={16} />}
+          >
+            {t("Workspace settings")}
+          </Menu.Item>

-        <Menu.Sub>
-          <Menu.Sub.Target>
-            <Menu.Sub.Item leftSection={<IconBrightnessFilled size={16} />}>
-              {t("Theme")}
-            </Menu.Sub.Item>
-          </Menu.Sub.Target>
-
-          <Menu.Sub.Dropdown>
+          {spaceSlug && (
            <Menu.Item
-              onClick={() => setColorScheme("light")}
-              leftSection={<IconSun size={16} />}
-              rightSection={
-                colorScheme === "light" ? <IconCheck size={16} /> : null
-              }
+              onClick={openSpaceSettings}
+              leftSection={<IconSettings size={16} />}
            >
-              {t("Light")}
+              {t("Space settings")}
            </Menu.Item>
-            <Menu.Item
-              onClick={() => setColorScheme("dark")}
-              leftSection={<IconMoon size={16} />}
-              rightSection={
-                colorScheme === "dark" ? <IconCheck size={16} /> : null
-              }
-            >
-              {t("Dark")}
-            </Menu.Item>
-            <Menu.Item
-              onClick={() => setColorScheme("auto")}
-              leftSection={<IconDeviceDesktop size={16} />}
-              rightSection={
-                colorScheme === "auto" ? <IconCheck size={16} /> : null
-              }
-            >
-              {t("System settings")}
-            </Menu.Item>
-          </Menu.Sub.Dropdown>
-        </Menu.Sub>
+          )}

-        <Menu.Divider />
+          <Menu.Item
+            component={Link}
+            to={APP_ROUTE.SETTINGS.WORKSPACE.MEMBERS}
+            leftSection={<IconUsers size={16} />}
+          >
+            {t("Manage members")}
+          </Menu.Item>

-        <Menu.Item onClick={logout} leftSection={<IconLogout size={16} />}>
-          {t("Logout")}
-        </Menu.Item>
-      </Menu.Dropdown>
-    </Menu>
+          <Menu.Divider />
+
+          <Menu.Label>{t("Account")}</Menu.Label>
+          <Menu.Item component={Link} to={APP_ROUTE.SETTINGS.ACCOUNT.PROFILE}>
+            <Group wrap={"nowrap"}>
+              <CustomAvatar
+                size={"sm"}
+                avatarUrl={user.avatarUrl}
+                name={user.name}
+              />
+
+              <div style={{ width: 190 }}>
+                <Text size="sm" fw={500} lineClamp={1}>
+                  {user.name}
+                </Text>
+                <Text size="xs" c="dimmed" truncate="end">
+                  {user.email}
+                </Text>
+              </div>
+            </Group>
+          </Menu.Item>
+          <Menu.Item
+            component={Link}
+            to={APP_ROUTE.SETTINGS.ACCOUNT.PROFILE}
+            leftSection={<IconUserCircle size={16} />}
+          >
+            {t("My profile")}
+          </Menu.Item>
+
+          <Menu.Item
+            component={Link}
+            to={APP_ROUTE.SETTINGS.ACCOUNT.PREFERENCES}
+            leftSection={<IconBrush size={16} />}
+          >
+            {t("My preferences")}
+          </Menu.Item>
+
+          <Menu.Sub>
+            <Menu.Sub.Target>
+              <Menu.Sub.Item leftSection={<IconBrightnessFilled size={16} />}>
+                {t("Theme")}
+              </Menu.Sub.Item>
+            </Menu.Sub.Target>
+
+            <Menu.Sub.Dropdown>
+              <Menu.Item
+                onClick={() => setColorScheme("light")}
+                leftSection={<IconSun size={16} />}
+                rightSection={
+                  colorScheme === "light" ? <IconCheck size={16} /> : null
+                }
+              >
+                {t("Light")}
+              </Menu.Item>
+              <Menu.Item
+                onClick={() => setColorScheme("dark")}
+                leftSection={<IconMoon size={16} />}
+                rightSection={
+                  colorScheme === "dark" ? <IconCheck size={16} /> : null
+                }
+              >
+                {t("Dark")}
+              </Menu.Item>
+              <Menu.Item
+                onClick={() => setColorScheme("auto")}
+                leftSection={<IconDeviceDesktop size={16} />}
+                rightSection={
+                  colorScheme === "auto" ? <IconCheck size={16} /> : null
+                }
+              >
+                {t("System settings")}
+              </Menu.Item>
+            </Menu.Sub.Dropdown>
+          </Menu.Sub>
+
+          <Menu.Divider />
+
+          <Menu.Item onClick={logout} leftSection={<IconLogout size={16} />}>
+            {t("Logout")}
+          </Menu.Item>
+        </Menu.Dropdown>
+      </Menu>
+
+      {spaceSlug && (
+        <SpaceSettingsModal
+          spaceId={spaceSlug}
+          opened={spaceSettingsOpened}
+          onClose={closeSpaceSettings}
+        />
+      )}
+    </>
  );
 }
--- a/apps/client/src/components/settings/settings-sidebar.tsx
+++ b/apps/client/src/components/settings/settings-sidebar.tsx
@@ -20,7 +20,6 @@ import {
  prefetchSpaces,
  prefetchWorkspaceMembers,
 } from "@/components/settings/settings-queries.tsx";
-import AppVersion from "@/components/settings/app-version.tsx";
 import { mobileSidebarAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom.ts";
 import { useToggleSidebar } from "@/components/layouts/global/hooks/hooks/use-toggle-sidebar.ts";
 import { useSettingsNavigation } from "@/hooks/use-settings-navigation";
@@ -141,8 +140,6 @@ export default function SettingsSidebar() {
      </Group>

      <ScrollArea w="100%">{menuItems}</ScrollArea>
-
-      <AppVersion />
    </div>
  );
 }
--- a/apps/client/src/components/ui/ai-agent-badge.test.tsx
+++ b/apps/client/src/components/ui/ai-agent-badge.test.tsx
@@ -0,0 +1,96 @@
+import { describe, it, expect, vi } from "vitest";
+import { render, screen, fireEvent } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+import { Provider, createStore } from "jotai";
+import { AiAgentBadge } from "./ai-agent-badge";
+import {
+  activeAiChatIdAtom,
+  aiChatWindowOpenAtom,
+  aiChatDraftAtom,
+} from "@/features/ai-chat/atoms/ai-chat-atom.ts";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+function renderBadge(props: { authorName?: string; aiChatId?: string | null }) {
+  return render(
+    <MantineProvider>
+      <AiAgentBadge {...props} />
+    </MantineProvider>,
+  );
+}
+
+// Render a clickable badge inside an explicit jotai store, with a leftover draft
+// and an onActivate + parent-click spy, so the deep-link side effects are
+// assertable. Returns the store and spies.
+function setupClickable() {
+  const store = createStore();
+  store.set(aiChatDraftAtom, "leftover draft from another chat");
+  const onActivate = vi.fn();
+  const onParentClick = vi.fn();
+  render(
+    <Provider store={store}>
+      <MantineProvider>
+        <div onClick={onParentClick}>
+          <AiAgentBadge authorName="Bot" aiChatId="chat-1" onActivate={onActivate} />
+        </div>
+      </MantineProvider>
+    </Provider>,
+  );
+  return { store, onActivate, onParentClick, badge: screen.getByRole("button") };
+}
+
+function expectDeepLinked(store: ReturnType<typeof createStore>, onActivate: ReturnType<typeof vi.fn>) {
+  expect(store.get(activeAiChatIdAtom)).toBe("chat-1");
+  expect(store.get(aiChatWindowOpenAtom)).toBe(true);
+  expect(store.get(aiChatDraftAtom)).toBe(""); // draft cleared
+  expect(onActivate).toHaveBeenCalledTimes(1); // caller closes its own modal etc.
+}
+
+describe("AiAgentBadge", () => {
+  it("renders the AI-agent label", () => {
+    renderBadge({ authorName: "Bot" });
+    expect(screen.getByText("AI-agent")).toBeDefined();
+  });
+
+  it("is clickable (accessible button) when aiChatId is present", () => {
+    renderBadge({ authorName: "Bot", aiChatId: "chat-1" });
+    const badge = screen.getByRole("button");
+    expect(badge).toBeDefined();
+    expect(badge.textContent).toContain("AI-agent");
+  });
+
+  it("click deep-links: sets active chat, clears draft, opens window, fires onActivate, stops propagation", () => {
+    const { store, onActivate, onParentClick, badge } = setupClickable();
+    fireEvent.click(badge);
+    expectDeepLinked(store, onActivate);
+    expect(onParentClick).not.toHaveBeenCalled(); // stopPropagation contained the click
+  });
+
+  it.each(["Enter", " "])(
+    "keyboard %j activates the deep-link (same side effects as click)",
+    (key) => {
+      const { store, onActivate, badge } = setupClickable();
+      fireEvent.keyDown(badge, { key });
+      expectDeepLinked(store, onActivate);
+    },
+  );
+
+  it("an unrelated key does NOT activate the badge", () => {
+    const { store, onActivate, badge } = setupClickable();
+    fireEvent.keyDown(badge, { key: "Tab" });
+    expect(store.get(activeAiChatIdAtom)).toBeNull();
+    expect(store.get(aiChatWindowOpenAtom)).toBe(false);
+    expect(store.get(aiChatDraftAtom)).toBe("leftover draft from another chat");
+    expect(onActivate).not.toHaveBeenCalled();
+  });
+
+  it.each([{ aiChatId: null }, {}])(
+    "is a plain non-clickable label without a chat target (%o)",
+    (props) => {
+      renderBadge({ authorName: "Bot", ...props });
+      expect(screen.getByText("AI-agent")).toBeDefined();
+      // No interactive role is exposed when there is no chat to deep-link into.
+      expect(screen.queryByRole("button")).toBeNull();
+    },
+  );
+});
--- a/apps/client/src/components/ui/ai-agent-badge.tsx
+++ b/apps/client/src/components/ui/ai-agent-badge.tsx
@@ -0,0 +1,99 @@
+import { Badge, Tooltip } from "@mantine/core";
+import { IconSparkles } from "@tabler/icons-react";
+import { useCallback } from "react";
+import { useTranslation } from "react-i18next";
+import { useSetAtom } from "jotai";
+import {
+  activeAiChatIdAtom,
+  aiChatWindowOpenAtom,
+  aiChatDraftAtom,
+} from "@/features/ai-chat/atoms/ai-chat-atom.ts";
+
+interface AiAgentBadgeProps {
+  authorName?: string;
+  aiChatId?: string | null;
+  // Fired after the badge deep-links into its chat. The caller handles its own
+  // context (e.g. the page-history row closes the history modal) so this generic
+  // ui/ primitive stays free of cross-feature coupling (#143 review Arch B).
+  onActivate?: () => void;
+}
+
+/**
+ * Badge marking content written by the AI agent (provenance C3 / §7.4). It is
+ * ADDITIVE — shown next to the human author, never replacing them. Reused by the
+ * page-history list and the comments sidebar.
+ *
+ * When the item carries an `aiChatId` (an internal AI-chat edit), clicking the
+ * badge deep-links into that chat: it sets the active-chat atom and opens the
+ * floating AI-chat window, then invokes `onActivate` so the caller can react
+ * (e.g. the history modal closes itself). When `aiChatId` is null/absent (an
+ * external MCP write with no internal ai_chats row), the badge is a plain
+ * non-clickable label. The click is contained (stopPropagation) so it does not
+ * also trigger an enclosing row's click handler.
+ */
+export function AiAgentBadge({
+  authorName,
+  aiChatId,
+  onActivate,
+}: AiAgentBadgeProps) {
+  const { t } = useTranslation();
+  const setAiChatWindowOpen = useSetAtom(aiChatWindowOpenAtom);
+  const setActiveChatId = useSetAtom(activeAiChatIdAtom);
+  const setDraft = useSetAtom(aiChatDraftAtom);
+
+  const tooltip = t("Edited by AI agent on behalf of {{name}}", {
+    name: authorName ?? "",
+  });
+
+  const openChat = useCallback(
+    (event: React.SyntheticEvent) => {
+      event.stopPropagation();
+      if (!aiChatId) return;
+      setActiveChatId(aiChatId);
+      // Switching to another chat must start with a clean composer — clear any
+      // unsent draft so it does not leak from the previously open chat.
+      setDraft("");
+      setAiChatWindowOpen(true);
+      onActivate?.();
+    },
+    [aiChatId, setActiveChatId, setDraft, setAiChatWindowOpen, onActivate],
+  );
+
+  const badge = (
+    <Badge
+      size="sm"
+      variant="light"
+      color="violet"
+      radius="sm"
+      leftSection={<IconSparkles size={12} stroke={2} />}
+      style={aiChatId ? { cursor: "pointer" } : undefined}
+      {...(aiChatId
+        ? {
+            // Keep the default Badge root element (not a <button>) to avoid an
+            // invalid <button>-in-<button> nesting inside a row's
+            // UnstyledButton; expose it as an accessible button via
+            // role/keyboard.
+            role: "button",
+            tabIndex: 0,
+            onClick: openChat,
+            onKeyDown: (event: React.KeyboardEvent) => {
+              if (event.key === "Enter" || event.key === " ") {
+                event.preventDefault();
+                openChat(event);
+              }
+            },
+          }
+        : {})}
+    >
+      {t("AI-agent")}
+    </Badge>
+  );
+
+  return (
+    <Tooltip label={tooltip} withArrow>
+      {badge}
+    </Tooltip>
+  );
+}
+
+export default AiAgentBadge;
--- a/apps/client/src/components/ui/brand-logo.tsx
+++ b/apps/client/src/components/ui/brand-logo.tsx
@@ -27,6 +27,7 @@ export function BrandLogo({
      src={src}
      alt="Gitmost"
      className={className}
+      draggable={false}
      style={{ height, width: "auto", display: "block", userSelect: "none" }}
    />
  );
--- a/apps/client/src/features/ai-chat/atoms/ai-chat-atom.ts
+++ b/apps/client/src/features/ai-chat/atoms/ai-chat-atom.ts
@@ -1,4 +1,22 @@
 import { atom } from "jotai";
+import { atomWithStorage } from "jotai/utils";
+
+/**
+ * Persisted floating AI chat window geometry (position + size). Held in
+ * localStorage so a drag/resize survives a full page reload. `null` means
+ * "never placed yet" — the window then computes an initial top-right placement.
+ * On restore the value is clamped to the current viewport (see AiChatWindow).
+ */
+export type AiChatWindowGeom = {
+  left: number;
+  top: number;
+  width: number;
+  height: number;
+};
+export const aiChatWindowGeomAtom = atomWithStorage<AiChatWindowGeom | null>(
+  "ai-chat-window-geom",
+  null,
+);

 /**
 * The currently selected chat id. `null` means a fresh (not-yet-created) chat:
--- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
+++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
@@ -6,7 +6,7 @@ import {
  useRef,
  useState,
 } from "react";
-import { Group, Loader, Select, Tooltip } from "@mantine/core";
+import { Group, Loader, Tooltip } from "@mantine/core";
 import {
  IconArrowsDiagonal,
  IconCheck,
@@ -24,6 +24,7 @@ import { useQueryClient } from "@tanstack/react-query";
 import {
  activeAiChatIdAtom,
  aiChatWindowOpenAtom,
+  aiChatWindowGeomAtom,
  aiChatDraftAtom,
  selectedAiRoleIdAtom,
 } from "@/features/ai-chat/atoms/ai-chat-atom.ts";
@@ -31,17 +32,20 @@ import { usePageQuery } from "@/features/page/queries/page-query.ts";
 import { extractPageSlugId } from "@/lib";
 import {
  AI_CHATS_RQ_KEY,
+  AI_CHAT_MESSAGES_RQ_KEY,
  useAiChatMessagesQuery,
  useAiChatsQuery,
  useAiRolesQuery,
 } from "@/features/ai-chat/queries/ai-chat-query.ts";
 import ConversationList from "@/features/ai-chat/components/conversation-list.tsx";
 import ChatThread from "@/features/ai-chat/components/chat-thread.tsx";
-import { buildChatMarkdown } from "@/features/ai-chat/utils/chat-markdown.ts";
+import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts";
+import { useChatSession } from "@/features/ai-chat/hooks/use-chat-session.ts";
 import {
  shouldCollapseOnOutsidePointer,
  isHeaderClick,
 } from "@/features/ai-chat/utils/collapse-helpers.ts";
+import { selectContextBadge } from "@/features/ai-chat/utils/context-badge.ts";
 import { useClipboard } from "@/hooks/use-clipboard";
 import { notifications } from "@mantine/notifications";
 import classes from "@/features/ai-chat/components/ai-chat-window.module.css";
@@ -76,17 +80,31 @@ function computeInitialGeom() {
    Math.min(DEFAULT_HEIGHT, window.innerHeight - 2 * EDGE_MARGIN),
  );
  const left = Math.max(EDGE_MARGIN, window.innerWidth - width - 24);
-  const maxTop = Math.max(EDGE_MARGIN, window.innerHeight - height - EDGE_MARGIN);
+  const maxTop = Math.max(
+    EDGE_MARGIN,
+    window.innerHeight - height - EDGE_MARGIN,
+  );
  const top = Math.min(60, maxTop);
  return { left, top, width, height };
 }

 // Clamp a geometry so the window stays within the current viewport.
-function clampGeom(g: { left: number; top: number; width: number; height: number }) {
+function clampGeom(g: {
+  left: number;
+  top: number;
+  width: number;
+  height: number;
+}) {
  const effWidth = Math.max(g.width, MIN_WIDTH);
  const effHeight = Math.max(g.height, MIN_HEIGHT);
-  const maxLeft = Math.max(EDGE_MARGIN, window.innerWidth - effWidth - EDGE_MARGIN);
-  const maxTop = Math.max(EDGE_MARGIN, window.innerHeight - effHeight - EDGE_MARGIN);
+  const maxLeft = Math.max(
+    EDGE_MARGIN,
+    window.innerWidth - effWidth - EDGE_MARGIN,
+  );
+  const maxTop = Math.max(
+    EDGE_MARGIN,
+    window.innerHeight - effHeight - EDGE_MARGIN,
+  );
  return {
    ...g,
    left: Math.min(Math.max(EDGE_MARGIN, g.left), maxLeft),
@@ -97,12 +115,13 @@ function clampGeom(g: { left: number; top: number; width: number; height: number
 /**
 * Floating, draggable, resizable, minimizable AI chat window. Replaces the
 * former right-aside `AiChatPanel`: it owns ALL chat orchestration (active
- * chat, new chat, adopt-new-chat, open-page context, token sum) and wraps the
+ * chat, new chat, in-place id adoption from streamed metadata, open-page
+ * context, token sum) and wraps the
 * reused inner components (ConversationList + ChatThread) in window chrome
 * ported from the GitmostAgent.jsx design.
 */
 export default function AiChatWindow() {
-  const { t } = useTranslation();
+  const { t, i18n } = useTranslation();
  const clipboard = useClipboard({ timeout: 500 });
  const queryClient = useQueryClient();
  const [windowOpen, setWindowOpen] = useAtom(aiChatWindowOpenAtom);
@@ -120,19 +139,13 @@ export default function AiChatWindow() {
  minimizedRef.current = minimized;

  const winRef = useRef<HTMLDivElement>(null);
-  // Live window geometry (position + size); initialized lazily on first open so
-  // it is anchored to the current viewport (top-right corner). Kept in state so
-  // a user resize survives close/reopen and can be re-clamped to the viewport.
-  const [geom, setGeom] = useState<{
-    left: number;
-    top: number;
-    width: number;
-    height: number;
-  } | null>(null);
-
-  // Track whether we are awaiting the id of a just-created (new) chat, so we
-  // can adopt it once the chat list refreshes after the first turn finishes.
-  const adoptNewChat = useRef(false);
+  // Live window geometry (position + size); persisted to localStorage so a
+  // drag/resize survives a full page reload (and close/reopen). `null` means
+  // "never placed yet" — the layout effect below then computes an initial
+  // top-right placement anchored to the current viewport, and on restore it is
+  // re-clamped to the viewport (so a placement saved on a larger screen is not
+  // left partly off-screen).
+  const [geom, setGeom] = useAtom(aiChatWindowGeomAtom);

  const { data: chats } = useAiChatsQuery();
  // Roles for the new-chat picker (any member may list them). Only fetched while
@@ -145,6 +158,7 @@ export default function AiChatWindow() {
    () => (roles ?? []).filter((r) => r.enabled === true),
    [roles],
  );
+
  const { data: messageRows, isLoading: messagesLoading } =
    useAiChatMessagesQuery(activeChatId ?? undefined);

@@ -165,71 +179,115 @@ export default function AiChatWindow() {
    ? { id: openPageData.id, title: openPageData.title }
    : null;

+  // The AI-chat thread-identity lifecycle (mount key, both new-chat id adoption
+  // paths, the history-loaded latch, the render-phase reconciler) lives in this
+  // hook. See adopt-chat-id.ts for the canonical #137 two-tab race explanation.
+  // The invalidate closures are passed inline: `onTurnFinished` is read live by
+  // useChat's onFinish (never in an effect dep array), so their identity does not
+  // matter — no memoization ceremony needed.
+  const {
+    threadKey,
+    waitingForHistory,
+    startFreshThread,
+    onTurnFinished,
+    onServerChatId,
+    cancelPendingAdoption,
+  } = useChatSession({
+    activeChatId,
+    setActiveChatId,
+    chats,
+    messagesLoading,
+    onInvalidateChatList: () =>
+      queryClient.invalidateQueries({ queryKey: AI_CHATS_RQ_KEY }),
+    onInvalidateChatMessages: (id) =>
+      queryClient.invalidateQueries({ queryKey: AI_CHAT_MESSAGES_RQ_KEY(id) }),
+  });
+
+  // startNewChat/selectChat set the public atom; the hook's render-phase
+  // reconciler handles the remount when activeChatId actually CHANGES. But
+  // pressing "New chat" while already in a new chat leaves activeChatId === null
+  // (a no-op for the atom), so the reconciler never fires — explicitly disarm any
+  // armed error-path fallback here so a late refetch can't yank the user into a
+  // just-failed chat after they chose a fresh one.
  const startNewChat = useCallback((): void => {
+    cancelPendingAdoption();
+    // Force a fresh, empty thread UNCONDITIONALLY (#161). Pressing "New chat"
+    // while a brand-new chat's first turn is still streaming leaves activeChatId
+    // null (the real id is adopted only at turn end), so setActiveChatId(null)
+    // alone is a no-op and the reconciler never remounts — the chat/stream/history
+    // would persist and only the role badge would drop. This always remounts the
+    // thread into a clean new chat.
+    startFreshThread();
    setActiveChatId(null);
    setHistoryOpen(false);
    setDraft("");
    // Default the picker back to "Universal assistant" for the fresh chat.
    setSelectedRoleId(null);
-  }, [setActiveChatId, setDraft, setSelectedRoleId]);
+  }, [
+    cancelPendingAdoption,
+    startFreshThread,
+    setActiveChatId,
+    setDraft,
+    setSelectedRoleId,
+  ]);

  const selectChat = useCallback(
    (chatId: string): void => {
+      cancelPendingAdoption();
      setActiveChatId(chatId);
      setHistoryOpen(false);
      setDraft("");
+      // Reset the card-picked role so a stale pick can't leak into the existing
+      // chat's header/assistant-name (which prefers the chat's persisted role).
+      setSelectedRoleId(null);
    },
-    [setActiveChatId, setDraft],
+    [cancelPendingAdoption, setActiveChatId, setDraft, setSelectedRoleId],
  );

-  // After a turn finishes, refresh the chat list. For a brand-new chat (no id
-  // yet), the server has just created the row; adopt the newest chat id so the
-  // thread switches from "new" to the persisted chat (and loads its history on
-  // later opens).
-  const onTurnFinished = useCallback(() => {
-    if (activeChatId === null) adoptNewChat.current = true;
-    queryClient.invalidateQueries({ queryKey: AI_CHATS_RQ_KEY });
-  }, [activeChatId, queryClient]);
-
-  // The active chat object (for its title) and an export gate: only enable the
-  // export button when an existing chat with loaded persisted rows is active.
+  // The active chat object (for its title) and an export gate. The export is now
+  // SERVER-sourced (the DB is the single source of truth — #183): the assistant
+  // row is persisted upfront + per step, so even a brand-new chat whose first
+  // turn is streaming/interrupted has a server row to render. Enable the button
+  // whenever a persisted chat is active (`activeChatId` is set). For a BRAND-NEW
+  // chat that id is adopted EARLY — at the stream's `start` chunk via
+  // onServerChatId (#174) — so the Copy button is available during the first
+  // turn's stream, not only after it terminates.
  const activeChat = useMemo(
    () => chats?.items?.find((c) => c.id === activeChatId) ?? null,
    [chats, activeChatId],
  );
-  const canExport = !!activeChatId && !!messageRows && messageRows.length > 0;
+  const canExport = !!activeChatId;

-  // Build a Markdown export from the already-loaded persisted rows (no network
-  // call) and copy it to the clipboard. The "Copied" notification is the
-  // feedback.
-  const handleCopy = useCallback(() => {
-    if (!activeChatId || !messageRows || messageRows.length === 0) return;
-    const markdown = buildChatMarkdown({
-      title: activeChat?.title ?? null,
-      chatId: activeChatId,
-      rows: messageRows,
-      t,
-    });
-    clipboard.copy(markdown);
-    notifications.show({ message: t("Copied") });
-  }, [activeChatId, messageRows, activeChat, clipboard, t]);
-
-  // When awaiting a new chat's id, adopt the most-recent chat (the list is
-  // ordered newest-first) once it appears.
-  useEffect(() => {
-    if (!adoptNewChat.current) return;
-    const newest = chats?.items?.[0];
-    if (newest) {
-      adoptNewChat.current = false;
-      setActiveChatId(newest.id);
+  // The role to display in the header and as the assistant's name. Prefer the
+  // persisted role of an existing chat (chat-list JOIN); fall back to the role
+  // picked via a card click for a brand-new or just-adopted chat. selectChat
+  // resets selectedRoleId, so this fallback never leaks into an unrelated chat.
+  const currentRole = useMemo<{
+    name: string;
+    emoji: string | null;
+  } | null>(() => {
+    if (activeChat?.roleName) {
+      return { name: activeChat.roleName, emoji: activeChat.roleEmoji ?? null };
    }
-  }, [chats, setActiveChatId]);
+    const picked = enabledRoles.find((r) => r.id === selectedRoleId);
+    return picked ? { name: picked.name, emoji: picked.emoji } : null;
+  }, [activeChat, enabledRoles, selectedRoleId]);

-  // The thread is remounted when the active chat changes so initial messages
-  // re-seed. For a new chat we key on "new"; adopting the id remounts the
-  // thread with the persisted history loaded.
-  const threadKey = activeChatId ?? "new";
-  const waitingForHistory = activeChatId !== null && messagesLoading;
+  // Fetch the server-rendered Markdown export and copy it to the clipboard. The
+  // server is the single source of truth (#183): it renders the transcript from
+  // the persisted rows — including an interrupted turn's in-progress row — so the
+  // export is identical whether the chat is freshly streaming, just switched to,
+  // or reloaded. The `lang` of the active i18n drives the few localized labels.
+  const handleCopy = useCallback(async () => {
+    if (!activeChatId) return;
+    try {
+      const markdown = await exportAiChat(activeChatId, i18n.language);
+      clipboard.copy(markdown);
+      notifications.show({ message: t("Copied") });
+    } catch {
+      notifications.show({ message: t("Failed to export chat"), color: "red" });
+    }
+  }, [activeChatId, clipboard, t, i18n.language]);

  // Current context size for the active chat: how much the conversation now
  // occupies in the model's context window — NOT the cumulative tokens spent.
@@ -238,24 +296,19 @@ export default function AiChatWindow() {
  // shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
  // PERSISTED rows (updates on chat open/switch); it does not tick live
  // mid-stream — acceptable for v1.
-  const contextTokens = useMemo(() => {
-    if (!activeChatId || !messageRows) return 0;
-    for (let i = messageRows.length - 1; i >= 0; i--) {
-      const meta = messageRows[i].metadata;
-      if (!meta) continue;
-      if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
-        return meta.contextTokens;
-      }
-      const usage = meta.usage;
-      if (usage) {
-        const fallback =
-          usage.totalTokens ??
-          (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
-        if (fallback > 0) return fallback;
-      }
-    }
-    return 0;
-  }, [activeChatId, messageRows]);
+  //
+  // The denominator `maxContextTokens` (the model's configured max window) is
+  // derived in the SAME backward scan: it is stamped alongside `contextTokens`
+  // on a completed turn, but the numerator and denominator are taken from the
+  // most recent row carrying EACH value independently — they may land on
+  // different rows (e.g. a fresh error row can carry contextTokens but not
+  // maxContextTokens), so we keep scanning for whichever is still unset. 0 when
+  // no row has it (older rows, or no admin-configured limit) — the badge then
+  // shows just the current size with no denominator.
+  const { contextTokens, maxContextTokens } = useMemo(
+    () => selectContextBadge(activeChatId ? messageRows : undefined),
+    [activeChatId, messageRows],
+  );

  // On (re)open, settle the geometry before paint (useLayoutEffect → no
  // first-frame jump): compute an initial top-right placement the first time,
@@ -296,18 +349,23 @@ export default function AiChatWindow() {
  useEffect(() => {
    if (!windowOpen || minimized) return;
    const el = winRef.current;
+    // `geom` is in the deps so this re-runs once geometry is settled and the
+    // window is actually rendered (on the first open `geom` is still null on the
+    // render that flips windowOpen, so winRef.current is null then — without the
+    // geom dep the observer would never attach and resizes would not persist).
    if (!el) return;
    const ro = new ResizeObserver(() => {
      const width = el.offsetWidth;
      const height = el.offsetHeight;
      setGeom((prev) => {
-        if (!prev || (prev.width === width && prev.height === height)) return prev;
+        if (!prev || (prev.width === width && prev.height === height))
+          return prev;
        return { ...prev, width, height };
      });
    });
    ro.observe(el);
    return () => ro.disconnect();
-  }, [windowOpen, minimized]);
+  }, [windowOpen, minimized, geom !== null]);

  const startDrag = useCallback((e: React.MouseEvent): void => {
    // Ignore drags that originate on a button (minimize/close/new chat).
@@ -430,21 +488,31 @@ export default function AiChatWindow() {
          {t("AI chat")}
        </span>

-        {/* Role badge for the active chat (emoji + name). Shown only when the
-            chat is bound to a role that still exists. */}
-        {activeChat?.roleName && (
+        {/* Role badge (emoji + name). Shows the persisted role of an existing
+            chat, or the role picked via a card for a brand-new chat. Hidden for
+            a universal (no-role) chat. */}
+        {currentRole && (
          <span className={classes.badge} title={t("Agent role")}>
-            {activeChat.roleEmoji ? `${activeChat.roleEmoji} ` : ""}
-            {activeChat.roleName}
+            {currentRole.emoji ? `${currentRole.emoji} ` : ""}
+            {currentRole.name}
          </span>
        )}

        <div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
-          {contextTokens > 0 && (
-            <Tooltip label={t("Current context size")} withArrow>
-              <span className={classes.badge}>{formatTokens(contextTokens)}</span>
+          {/* Always show the persisted "current / max" context. The denominator
+              (the admin-configured model limit) is appended only when known;
+              not clamped when current > max (shown as-is, e.g. "210k / 200k").
+              Hidden entirely until a turn has recorded a context figure. */}
+          {contextTokens > 0 ? (
+            <Tooltip label={t("Context size / model limit")} withArrow>
+              <span className={classes.badge}>
+                {formatTokens(contextTokens)}
+                {maxContextTokens > 0
+                  ? ` / ${formatTokens(maxContextTokens)}`
+                  : ""}
+              </span>
            </Tooltip>
-          )}
+          ) : null}
        </div>

        <div style={{ display: "flex", alignItems: "center", gap: 1 }}>
@@ -456,7 +524,11 @@ export default function AiChatWindow() {
              aria-label={t("Copy chat")}
              onClick={handleCopy}
            >
-              {clipboard.copied ? <IconCheck size={14} /> : <IconCopy size={14} />}
+              {clipboard.copied ? (
+                <IconCheck size={14} />
+              ) : (
+                <IconCopy size={14} />
+              )}
            </button>
          )}
          <button
@@ -537,28 +609,10 @@ export default function AiChatWindow() {
          )}
        </div>

-        {/* Role picker — only for a NEW chat (before it is created). Once the
-            chat exists, its role is fixed and shown as a header badge instead.
-            Defaults to "Universal assistant" (no role). */}
-        {activeChatId === null && (enabledRoles?.length ?? 0) > 0 && (
-          <div style={{ padding: "4px 8px 0" }}>
-            <Select
-              size="xs"
-              label={t("Agent role")}
-              value={selectedRoleId ?? ""}
-              onChange={(value) => setSelectedRoleId(value || null)}
-              allowDeselect={false}
-              comboboxProps={{ withinPortal: true }}
-              data={[
-                { value: "", label: t("Universal assistant") },
-                ...enabledRoles.map((r) => ({
-                  value: r.id,
-                  label: `${r.emoji ? `${r.emoji} ` : ""}${r.name}`,
-                })),
-              ]}
-            />
-          </div>
-        )}
+        {/* The role picker for a NEW chat is rendered as the chat's empty-state
+            (colored role cards centered in the empty window) by ChatThread
+            itself — clicking a card starts the chat with that role. Once the
+            chat exists, its role is fixed and shown as a header badge instead. */}

        {/* body: active chat thread */}
        <div className={classes.body}>
@@ -569,12 +623,19 @@ export default function AiChatWindow() {
          ) : (
            <ChatThread
              key={threadKey}
+              threadKey={threadKey}
              chatId={activeChatId}
              initialRows={activeChatId ? messageRows : []}
              openPage={openPage}
              // Honoured only for a new chat; null = universal assistant.
              roleId={activeChatId === null ? selectedRoleId : null}
+              // Role cards are the new-chat empty-state; offered only when this
+              // is a brand-new chat. Clicking a card starts the chat with it.
+              roles={activeChatId === null ? enabledRoles : undefined}
+              onRolePicked={(role) => setSelectedRoleId(role.id)}
+              assistantName={currentRole?.name}
              onTurnFinished={onTurnFinished}
+              onServerChatId={onServerChatId}
            />
          )}
        </div>
--- a/apps/client/src/features/ai-chat/components/ai-chat.module.css
+++ b/apps/client/src/features/ai-chat/components/ai-chat.module.css
@@ -55,6 +55,45 @@
    padding-inline-start: 1.4em;
 }

+/* GFM tables in assistant markdown. The chat lives in a NARROW side panel, so a
+   wide LLM table must scroll horizontally instead of collapsing its columns:
+   `.markdown` sets `word-break: break-word`, which (with the default table
+   layout) shrinks columns to a single glyph and wraps headers mid-word
+   ("Секция" -> "Секци / я"). Make the table a horizontally scrollable block,
+   give cells a readable minimum width, and restore word-boundary wrapping. */
+.markdown table {
+    display: block;
+    /* lets the table scroll horizontally on its own */
+    max-width: 100%;
+    overflow-x: auto;
+    border-collapse: collapse;
+    margin-block-end: 0.5em;
+}
+
+.markdown th,
+.markdown td {
+    border: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
+    padding: 3px 8px;
+    /* readable floor; the block scrolls when the row exceeds the panel */
+    min-width: 6em;
+    text-align: left;
+    vertical-align: top;
+    /* cancel the inherited break-word so words don't split mid-glyph */
+    word-break: normal;
+    /* still wrap genuinely long words / URLs at the cell edge */
+    overflow-wrap: break-word;
+}
+
+.markdown th {
+    background: light-dark(var(--mantine-color-gray-1), var(--mantine-color-dark-5));
+    font-weight: 600;
+}
+
+/* GFM wraps cell text in <p>; drop its default block margin inside cells. */
+.markdown table p {
+    margin: 0;
+}
+
 /* Animated three-dot "typing" indicator shown while the agent is thinking but
   has not yet produced any visible text/tool parts. */
 .typingDots {
@@ -88,16 +127,18 @@
        opacity: 0.4;
    }
    40% {
-        transform: translateY(-3px);
+        /* Bounce height is driven by --bounce so reduced-motion can dampen it
+           (below) without disabling the animation outright. */
+        transform: translateY(var(--bounce, -6px));
        opacity: 1;
    }
 }

-/* Respect reduced-motion preferences: fall back to a static dimmed state. */
+/* Respect reduced-motion preferences: keep a smaller bounce instead of a full
+   stop, so the "thinking" indicator still reads as active rather than frozen. */
@media (prefers-reduced-motion: reduce) {
    .typingDots span {
-        animation: none;
-        opacity: 0.6;
+        --bounce: -3px;
    }
 }

@@ -109,6 +150,28 @@
    background: light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-6));
 }

+/* Collapsible "Thinking" (reasoning) block: a subtle left rule, dimmer than the
+   answer so it reads as secondary thinking context above the real answer. */
+.reasoningBlock {
+    border-left: 2px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
+    padding-left: 8px;
+}
+
+.reasoningText {
+    margin-top: 4px;
+    font-size: var(--mantine-font-size-xs);
+    color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-dark-1));
+    /* NOTE: `white-space: pre-wrap` is intentionally NOT set here. On the
+       rendered markdown <div> it would turn the newlines between block tags
+       (</li>\n<li>, </p>\n<ol>) into visible blank lines/indents on top of the
+       margins. The plain-text fallback <Text> that needs pre-wrap sets it
+       inline itself (see reasoning-block.tsx). */
+}
+
+.reasoningText p {
+    margin: 0 0 4px;
+}
+
 .inputWrapper {
    flex: 0 0 auto;
    padding-top: var(--mantine-spacing-xs);
@@ -126,3 +189,29 @@
 .conversationItemActive {
    background: var(--mantine-color-gray-light);
 }
+
+/* Pending messages queued by the user while a turn is still streaming. They
+   are sent automatically, FIFO, once the current turn finishes. */
+.queuedList {
+    padding-bottom: var(--mantine-spacing-xs);
+}
+
+.queuedItem {
+    background: var(--mantine-color-gray-light);
+    border-radius: var(--mantine-radius-sm);
+    padding: 4px 8px;
+}
+
+.queuedIcon {
+    flex: none;
+    color: var(--mantine-color-dimmed);
+}
+
+.queuedText {
+    flex: 1;
+    min-width: 0;
+    color: var(--mantine-color-dimmed);
+    white-space: pre-wrap;
+    overflow-wrap: break-word;
+    word-break: break-word;
+}
--- a/apps/client/src/features/ai-chat/components/chat-error-alert.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-error-alert.tsx
@@ -0,0 +1,49 @@
+import { Alert, Group, Text, type AlertProps } from "@mantine/core";
+import { IconAlertTriangle } from "@tabler/icons-react";
+
+/**
+ * A classified AI chat error banner: a warning icon + bold heading on the first
+ * row, with the detail text spanning the full width below. Rendered for BOTH the
+ * live stream error (ChatThread) and a persisted assistant error (MessageItem),
+ * so this markup lives in one place. The detail is full-width (no hanging indent
+ * under the heading) so it wraps less and leaves no stranded icon / empty gap.
+ * The heading reuses Mantine's adaptive red "light" colour so it stays correct
+ * in dark mode. Layout-only props (mb/mt/...) are forwarded to the Alert root.
+ */
+interface ChatErrorAlertProps extends Omit<AlertProps, "title" | "children"> {
+  title: string;
+  detail: string;
+}
+
+export default function ChatErrorAlert({
+  title,
+  detail,
+  style,
+  ...alertProps
+}: ChatErrorAlertProps) {
+  // Mantine's own "light" alert colour, adaptive across light/dark schemes.
+  const accent = "var(--mantine-color-red-light-color)";
+  return (
+    // flexShrink: 0 keeps the banner fully visible. Mantine's Alert root is
+    // `overflow: hidden`, so as a flex child of the chat panel it can otherwise
+    // be compressed below its content height and clip the detail text; the
+    // scrollable message list absorbs the height pressure instead.
+    <Alert
+      {...alertProps}
+      variant="light"
+      color="red"
+      p="xs"
+      style={[{ flexShrink: 0 }, style]}
+    >
+      <Group gap={8} wrap="nowrap" align="center" mb={4}>
+        <IconAlertTriangle size={18} style={{ flex: "none", color: accent }} />
+        <Text fw={700} size="sm" lh={1.2} style={{ color: accent }}>
+          {title}
+        </Text>
+      </Group>
+      <Text size="sm" lh={1.4}>
+        {detail}
+      </Text>
+    </Alert>
+  );
+}
--- a/apps/client/src/features/ai-chat/components/chat-input.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-input.tsx
@@ -9,18 +9,24 @@ import { MicButton } from "@/features/dictation/components/mic-button";

 interface ChatInputProps {
  onSend: (text: string) => void;
+  /** Called instead of `onSend` while a turn is streaming: the text is queued
+   *  and sent automatically once the current turn finishes. */
+  onQueue: (text: string) => void;
  onStop: () => void;
  isStreaming: boolean;
  disabled?: boolean;
 }

 /**
- * Message composer. Enter sends, Shift+Enter inserts a newline. While the agent
- * is streaming, the send button becomes a Stop button (calls `stop()`); the
- * textarea stays usable so the user can draft the next turn.
+ * Message composer. Enter submits, Shift+Enter inserts a newline. While the
+ * agent is streaming, submitting QUEUES the message (via `onQueue`) instead of
+ * dropping it — it is sent automatically once the current turn finishes; the
+ * Stop button (calls `stop()`) is also shown. The textarea stays usable so the
+ * user can draft / queue the next turn while the agent is busy.
 */
 export default function ChatInput({
  onSend,
+  onQueue,
  onStop,
  isStreaming,
  disabled,
@@ -29,18 +35,23 @@ export default function ChatInput({
  const [value, setValue] = useAtom(aiChatDraftAtom);
  const workspace = useAtomValue(workspaceAtom);
  const isDictationEnabled = workspace?.settings?.ai?.dictation === true;
+  // Streaming (silence-cut) dictation is opt-in per workspace; absent/false
+  // keeps the stable batch path.
+  const streamingDictation =
+    workspace?.settings?.ai?.dictationStreaming === true;

-  const send = (): void => {
+  const submit = (): void => {
    const text = value.trim();
-    if (!text || isStreaming || disabled) return;
-    onSend(text);
+    if (!text || disabled) return;
+    if (isStreaming) onQueue(text);
+    else onSend(text);
    setValue("");
  };

  const handleKeyDown = (e: KeyboardEvent<HTMLTextAreaElement>): void => {
    if (e.key === "Enter" && !e.shiftKey) {
      e.preventDefault();
-      send();
+      submit();
    }
  };

@@ -64,28 +75,43 @@ export default function ChatInput({
      {isDictationEnabled && (
        <MicButton
          size="lg"
+          streaming={streamingDictation}
          disabled={isStreaming || disabled}
          onText={(text) => setValue((v) => (v ? `${v} ${text}` : text))}
        />
      )}
      {isStreaming ? (
-        <Tooltip label={t("Stop")} withArrow>
-          <ActionIcon
-            size="lg"
-            color="red"
-            variant="light"
-            onClick={onStop}
-            aria-label={t("Stop")}
-          >
-            <IconPlayerStopFilled size={18} />
-          </ActionIcon>
-        </Tooltip>
+        <Group gap="xs" wrap="nowrap">
+          {value.trim().length > 0 && (
+            <Tooltip label={t("Send when the agent finishes")} withArrow>
+              <ActionIcon
+                size="lg"
+                variant="filled"
+                onClick={submit}
+                aria-label={t("Queue message")}
+              >
+                <IconSend size={18} />
+              </ActionIcon>
+            </Tooltip>
+          )}
+          <Tooltip label={t("Stop")} withArrow>
+            <ActionIcon
+              size="lg"
+              color="red"
+              variant="light"
+              onClick={onStop}
+              aria-label={t("Stop")}
+            >
+              <IconPlayerStopFilled size={18} />
+            </ActionIcon>
+          </Tooltip>
+        </Group>
      ) : (
        <Tooltip label={t("Send")} withArrow>
          <ActionIcon
            size="lg"
            variant="filled"
-            onClick={send}
+            onClick={submit}
            disabled={disabled || value.trim().length === 0}
            aria-label={t("Send")}
          >
--- a/apps/client/src/features/ai-chat/components/chat-stopped-notice.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-stopped-notice.tsx
@@ -0,0 +1,41 @@
+import { Alert, Group, Text, type AlertProps } from "@mantine/core";
+import { IconPlayerStopFilled } from "@tabler/icons-react";
+
+/**
+ * A neutral "turn was interrupted" notice (NOT an error). Rendered for an
+ * aborted turn — a manual Stop or a dropped connection — both live (ChatThread)
+ * and in reopened history (MessageItem). Deliberately gray/subtle so it reads as
+ * an informational marker, distinct from the red ChatErrorAlert. Layout-only
+ * props (mt/mb/...) are forwarded to the Alert root.
+ */
+interface ChatStoppedNoticeProps extends Omit<AlertProps, "title" | "children"> {
+  text: string;
+}
+
+export default function ChatStoppedNotice({
+  text,
+  style,
+  ...alertProps
+}: ChatStoppedNoticeProps) {
+  return (
+    <Alert
+      {...alertProps}
+      variant="light"
+      color="gray"
+      p="xs"
+      // flexShrink: 0 mirrors ChatErrorAlert so the notice is not compressed as a
+      // flex child of the chat panel.
+      style={[{ flexShrink: 0 }, style]}
+    >
+      <Group gap={8} wrap="nowrap" align="center">
+        <IconPlayerStopFilled
+          size={16}
+          style={{ flex: "none", color: "var(--mantine-color-dimmed)" }}
+        />
+        <Text size="sm" lh={1.3} c="dimmed">
+          {text}
+        </Text>
+      </Group>
+    </Alert>
+  );
+}
--- a/apps/client/src/features/ai-chat/components/chat-thread.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx
@@ -1,16 +1,41 @@
-import { useMemo, useRef } from "react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import { generateId } from "ai";
-import { Alert, Box, Stack } from "@mantine/core";
-import { IconAlertTriangle } from "@tabler/icons-react";
+import { ActionIcon, Box, Group, Stack, Text } from "@mantine/core";
+import { IconClockHour4, IconX } from "@tabler/icons-react";
 import { useTranslation } from "react-i18next";
 import { useChat, type UIMessage } from "@ai-sdk/react";
 import { DefaultChatTransport } from "ai";
 import MessageList from "@/features/ai-chat/components/message-list.tsx";
 import ChatInput from "@/features/ai-chat/components/chat-input.tsx";
-import { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
+import RoleCards from "@/features/ai-chat/components/role-cards.tsx";
+import ChatErrorAlert from "@/features/ai-chat/components/chat-error-alert.tsx";
+import ChatStoppedNotice from "@/features/ai-chat/components/chat-stopped-notice.tsx";
+import {
+  IAiChatMessageRow,
+  IAiRole,
+} from "@/features/ai-chat/types/ai-chat.types.ts";
+import {
+  roleLaunchMessage,
+  shouldResetRolePicked,
+} from "@/features/ai-chat/utils/role-launch.ts";
 import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
+import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
+import {
+  dequeue,
+  enqueueMessage,
+  removeQueuedById,
+  type QueuedMessage,
+} from "@/features/ai-chat/utils/queue-helpers.ts";
 import classes from "@/features/ai-chat/components/ai-chat.module.css";

+// Throttle how often the streamed `messages` state triggers a re-render. Without
+// it, useChat updates state on EVERY token, so the whole transcript's markdown
+// (marked + DOMPurify) is re-parsed per token — on a long agent run that grows
+// into a quadratic CPU storm that pins the main thread and freezes the UI.
+// ~50ms (20 Hz) keeps streaming visually smooth while decoupling re-render cost
+// from the token rate.
+const STREAM_THROTTLE_MS = 50;
+
 /** The page the user is currently viewing, sent as chat context. */
 export interface OpenPageContext {
  id: string;
@@ -20,6 +45,11 @@ export interface OpenPageContext {
 interface ChatThreadProps {
  /** The open chat id, or null for a brand-new (not-yet-created) chat. */
  chatId: string | null;
+  /** This thread's mount key (the same value the parent uses as React `key`).
+   *  Forwarded to onTurnFinished so the session can tell a turn finishing on the
+   *  CURRENT thread from one ABANDONED by New chat mid-stream — whose onFinish/
+   *  onError still fire after unmount and must not adopt the abandoned chat (#161). */
+  threadKey?: string;
  /** Persisted rows to seed initial messages (existing chats only). */
  initialRows?: IAiChatMessageRow[];
  /** The page currently open in the workspace, or null on a non-page route.
@@ -29,9 +59,28 @@ interface ChatThreadProps {
   *  in the request body so the server persists it on chat creation; ignored by
   *  the server for existing chats (the role is read from the chat row). */
  roleId?: string | null;
-  /** Called when a turn finishes; the parent refreshes the chat list and, for
-   *  a new chat, adopts the freshly created chat id. */
-  onTurnFinished: () => void;
+  /** Enabled roles for the new-chat empty state (only meaningful when
+   *  `chatId === null`). Rendered as the colored role cards. */
+  roles?: IAiRole[];
+  /** Notify the parent which role was picked via a card, so it can update the
+   *  header badge / assistant name for the brand-new chat. */
+  onRolePicked?: (role: IAiRole) => void;
+  /** Display name for the assistant label / typing line (the role name);
+   *  forwarded to MessageList. Absent => the generic "AI agent". */
+  assistantName?: string;
+  /** Called when a turn finishes; the parent refreshes the chat list and, for a
+   *  new chat, adopts the freshly created chat id. `serverChatId` is the
+   *  authoritative id the server streamed on the assistant message metadata, or
+   *  undefined on a failed turn — see adopt-chat-id.ts for the full #137 design.
+   *  `finishingThreadKey` (this thread's mount key) lets the session ignore a turn
+   *  finishing on a thread already abandoned by New chat mid-stream (#161). */
+  onTurnFinished: (serverChatId?: string, finishingThreadKey?: string) => void;
+  /** Called EARLY (at the stream's `start` chunk) with the authoritative server
+   *  chat id streamed on the assistant message metadata, so a brand-new chat
+   *  adopts its real id WHILE the first turn is still streaming (#174 — makes the
+   *  Copy/export button available mid-stream). Distinct from onTurnFinished,
+   *  which fires only at the terminal outcome. */
+  onServerChatId?: (serverChatId?: string) => void;
 }

 /**
@@ -46,13 +95,18 @@ function rowToUiMessage(row: IAiChatMessageRow): UIMessage {
      ? row.metadata.parts
      : ([{ type: "text", text: row.content ?? "" }] as UIMessage["parts"]);
  const error = row.metadata?.error;
+  const finishReason = row.metadata?.finishReason;
+  const metadata: Record<string, unknown> = {};
+  if (error) metadata.error = error;
+  if (finishReason) metadata.finishReason = finishReason;
  return {
    id: row.id,
    role,
    parts,
-    // Carry a persisted turn error so MessageItem can render it after a remount
-    // (e.g. when a new chat adopts its id) and in reopened chat history.
-    ...(error ? { metadata: { error } } : {}),
+    // Carry persisted turn outcome (error text and/or finishReason) so MessageItem
+    // can render the error banner / "stopped" marker after a remount and in
+    // reopened history.
+    ...(Object.keys(metadata).length > 0 ? { metadata } : {}),
  } as UIMessage;
 }

@@ -63,10 +117,15 @@ function rowToUiMessage(row: IAiChatMessageRow): UIMessage {
 */
 export default function ChatThread({
  chatId,
+  threadKey,
  initialRows,
  openPage,
  roleId,
+  roles,
+  onRolePicked,
+  assistantName,
  onTurnFinished,
+  onServerChatId,
 }: ChatThreadProps) {
  const { t } = useTranslation();

@@ -113,7 +172,55 @@ export default function ChatThread({
  // The id only needs to be stable per mount — the parent remounts this via
  // `key` on chat switch, which re-seeds cleanly.
  const stableIdRef = useRef<string>(chatId ?? `new-${generateId()}`);
-  const chatStoreId = chatId ?? stableIdRef.current;
+  // Stable for the LIFETIME of this mount. When a brand-new chat adopts its
+  // server id, the parent now updates the `chatId` prop WITHOUT remounting this
+  // thread, so the store id must NOT follow `chatId`: recreating the useChat
+  // store would wipe the live (just-finished) turn. The server still resolves
+  // the real chat from `chatId` in the request body (see chatIdRef /
+  // prepareSendMessagesRequest), so this purely-client store key can stay fixed.
+  const chatStoreId = stableIdRef.current;
+
+  // Pending messages the user composed WHILE a turn was streaming. They are sent
+  // automatically, FIFO, on successful turn completion (`onFinish`). The queue is
+  // LOCAL state so it is scoped to this conversation: it is cleared when the user
+  // deliberately switches chat / starts a new chat (the parent remounts this via
+  // `key`), but it SURVIVES in-place new-chat id adoption (no remount), so a
+  // message queued during a brand-new chat's first turn is not lost. On Stop or
+  // error the queue is intentionally preserved (onFinish does not fire then) so
+  // the user decides what to do with the pending messages.
+  const [queued, setQueued] = useState<QueuedMessage[]>([]);
+  // Mirror the queue in a ref so the `onFinish` flush always reads the latest
+  // queue without a stale closure; `setQueue` updates BOTH the ref and the state.
+  const queuedRef = useRef<QueuedMessage[]>([]);
+  const setQueue = useCallback((next: QueuedMessage[]) => {
+    queuedRef.current = next;
+    setQueued(next);
+  }, []);
+
+  // Capture the latest `sendMessage` (returned by useChat below) so the flush
+  // helper can call the current instance from the stable `onFinish` callback.
+  const sendMessageRef = useRef<((m: { text: string }) => void) | null>(null);
+
+  // FIFO dequeue + send the next queued message (no-op when the queue is empty).
+  const flushNext = useCallback(() => {
+    const { head, rest } = dequeue(queuedRef.current);
+    if (!head) return;
+    setQueue(rest);
+    sendMessageRef.current?.({ text: head.text });
+  }, [setQueue]);
+
+  const enqueue = useCallback(
+    (text: string) => {
+      setQueue(enqueueMessage(queuedRef.current, { id: generateId(), text }));
+    },
+    [setQueue],
+  );
+  const removeQueued = useCallback(
+    (id: string) => {
+      setQueue(removeQueuedById(queuedRef.current, id));
+    },
+    [setQueue],
+  );

  const transport = useMemo(
    () =>
@@ -147,37 +254,191 @@ export default function ChatThread({
    id: chatStoreId,
    messages: initialMessages,
    transport,
-    onFinish: () => onTurnFinished(),
-    // In AI SDK v6 `onFinish` does NOT fire when the stream errors, so a brand
-    // new chat that fails on its first turn would never invalidate the chat list
-    // nor adopt the server-created chat id (the server still creates the row and
-    // saves the error message). Run the same post-turn path on error so the
-    // failed chat appears in history immediately instead of after a manual
-    // refresh. The error itself is still surfaced via `error` below.
-    onError: () => onTurnFinished(),
+    // See STREAM_THROTTLE_MS — bounds re-render/markdown-reparse frequency.
+    experimental_throttle: STREAM_THROTTLE_MS,
+    // `onFinish` (ai@6 useChat) fires from a `finally` on EVERY terminal outcome
+    // — success, user Stop/abort (`isAbort`), network drop (`isDisconnect`), and
+    // stream error (`isError`). Keep calling `onTurnFinished()` on all of them
+    // (chat-list refresh + new-chat id adoption must happen even on a failed
+    // first turn), but flush the pending queue ONLY on a clean finish: auto-
+    // sending after the user hit Stop — or blindly retrying after a failure —
+    // would be wrong, so on Stop/disconnect/error the queue is left intact for
+    // the user to decide.
+    onFinish: ({ message, isAbort, isDisconnect, isError }) => {
+      // Forward the authoritative server chatId (streamed on the assistant
+      // message metadata) so the parent adopts the REAL created chat id for a new
+      // chat — see adopt-chat-id.ts for the full #137 design. `threadKey` lets the
+      // session ignore this finish if it belongs to a thread abandoned by New chat
+      // mid-stream (#161).
+      onTurnFinished(extractServerChatId(message), threadKey);
+      // Show a neutral "stopped" marker for an aborted turn; the red error banner
+      // (via `error`) already covers isError, and a clean finish clears any marker.
+      if (isError) setStopNotice(null);
+      else if (isAbort) setStopNotice("manual");
+      else if (isDisconnect) setStopNotice("disconnect");
+      else setStopNotice(null);
+      if (isAbort || isDisconnect || isError) return;
+      flushNext();
+    },
+    // `onError` runs in addition to `onFinish` (which ai@6 also calls on error).
+    // Log the raw failure here for devtools; the UI shows a friendly classified
+    // banner via `error` below. We still call `onTurnFinished()` with NO server id
+    // (idempotent with the onFinish call): for a brand-new chat that ARMS the
+    // bounded list-refetch fallback (adopt the single newly-appeared chat once the
+    // refetch lands); for an existing chat it just refreshes the chat list
+    // immediately rather than after a manual refresh.
+    onError: (streamError) => {
+      // Surface the raw failure in the browser console (devtools) for debugging;
+      // the UI separately shows a friendly classified banner (see errorView).
+      console.error("AI chat stream error:", streamError);
+      onTurnFinished(undefined, threadKey);
+    },
  });

+  // Keep the flush helper pointed at the latest sendMessage instance.
+  sendMessageRef.current = sendMessage;
+
+  // EARLY chat-id adoption (#174): the server streams the authoritative chat id
+  // on the assistant message metadata at the `start` chunk (message.metadata.
+  // chatId — see adopt-chat-id.ts / chatStreamMetadata). Forward it to the parent
+  // AS SOON AS it appears (mid-stream), so a brand-new chat adopts its real id
+  // WHILE the first turn is still streaming and activeChatId-gated affordances
+  // (the Copy/export button) light up immediately, instead of only at onFinish.
+  // Keyed by the last-seen id so we forward each distinct id exactly once. The
+  // parent's onServerChatId is idempotent and a no-op once the chat has an id.
+  const lastForwardedChatIdRef = useRef<string | undefined>(undefined);
+  useEffect(() => {
+    if (!onServerChatId) return;
+    const tail = messages[messages.length - 1];
+    if (tail?.role !== "assistant") return;
+    const serverChatId = extractServerChatId(tail);
+    if (!serverChatId || serverChatId === lastForwardedChatIdRef.current)
+      return;
+    lastForwardedChatIdRef.current = serverChatId;
+    onServerChatId(serverChatId);
+  }, [messages, onServerChatId]);
+
+  // Live "turn was interrupted" marker for the CURRENT session. The red error
+  // banner (driven by `error`) covers the error case; this covers an aborted
+  // turn, distinguishing a manual Stop (`isAbort`) from a dropped connection
+  // (`isDisconnect`) — a distinction only available live (the server persists
+  // both as finishReason 'aborted'). Cleared when the next turn starts.
+  const [stopNotice, setStopNotice] = useState<null | "manual" | "disconnect">(
+    null,
+  );
+
  const isStreaming = status === "submitted" || status === "streaming";

+  // Clear the stopped marker as soon as a new turn begins streaming.
+  useEffect(() => {
+    if (isStreaming) setStopNotice(null);
+  }, [isStreaming]);
+
+  // Classify the turn error into a heading + detail so the banner names the cause
+  // (connection reset, timeout, rate limit, context overflow, quota, ...) instead
+  // of a generic "Something went wrong". Computed here (not only in the JSX) so
+  // the SAME on-screen banner text can be mirrored into the export (issue #160).
+  const errorView = error ? describeChatError(error.message ?? "", t) : null;
+
+  // A role was picked with autoStart=false: the role is bound but NOTHING was
+  // sent, so chatId stays null and the empty state would keep showing the cards.
+  // This flag hides the cards and reveals the composer (with the role indicated)
+  // so the user can type the first message themselves. roleIdRef is already set,
+  // so that first manual message carries the roleId.
+  const [rolePickedNoSend, setRolePickedNoSend] = useState(false);
+
+  // Clicking a role card always binds the role to THIS new chat. Whether it also
+  // auto-starts the conversation is per-role (autoStart). roleIdRef is set
+  // synchronously here because the parent's selectedRoleId state update would
+  // only reach roleIdRef on the next render — after this synchronous sendMessage
+  // has already read it.
+  const handleRolePick = (role: IAiRole): void => {
+    roleIdRef.current = role.id;
+    onRolePicked?.(role);
+    const launch = roleLaunchMessage(
+      role,
+      t("Take a look at the current document"),
+    );
+    if (launch !== null) {
+      sendMessage({ text: launch });
+    } else {
+      // autoStart=false -> bind only: hide the cards, show the composer.
+      setRolePickedNoSend(true);
+    }
+  };
+  // Reset the "picked, not sent" flag when the thread returns to a truly empty,
+  // role-less state — e.g. the user hit "New chat" after picking an autoStart=false
+  // role. That path clears the parent's selectedRoleId (roleId -> null) but leaves
+  // chatId null, so the thread never remounts and the flag would stay set, hiding
+  // the cards forever. A picked-and-bound role keeps roleId non-null, so the cards
+  // correctly stay hidden then. Render-phase reset (React "adjust state on prop
+  // change"): one-shot — it re-renders with the flag false and the guard no longer
+  // matches, so it cannot loop. (Review of #149.)
+  if (shouldResetRolePicked(chatId, roleId, rolePickedNoSend)) {
+    setRolePickedNoSend(false);
+  }
+  const showRoleCards =
+    chatId === null && (roles?.length ?? 0) > 0 && !rolePickedNoSend;
+  const roleCardsEmptyState = showRoleCards ? (
+    <RoleCards roles={roles ?? []} onPick={handleRolePick} />
+  ) : undefined;
+
  return (
    <Box className={classes.panel}>
-      <MessageList messages={messages} isStreaming={isStreaming} />
+      <MessageList
+        messages={messages}
+        isStreaming={isStreaming}
+        emptyState={roleCardsEmptyState}
+        assistantName={assistantName}
+      />

-      {error && (
-        <Alert
-          variant="light"
-          color="red"
-          icon={<IconAlertTriangle size={16} />}
+      {errorView ? (
+        <ChatErrorAlert
+          title={errorView.title}
+          detail={errorView.detail}
          mb="xs"
-          title={t("Something went wrong")}
-        >
-          {describeChatError(error.message ?? "", t)}
-        </Alert>
-      )}
+        />
+      ) : stopNotice ? (
+        <ChatStoppedNotice
+          text={
+            stopNotice === "manual"
+              ? t("Response stopped.")
+              : t("Connection lost — the answer was interrupted.")
+          }
+          mb="xs"
+        />
+      ) : null}

      <Stack gap={0} className={classes.inputWrapper}>
+        {queued.length > 0 && (
+          <Stack gap={4} className={classes.queuedList}>
+            {queued.map((m) => (
+              <Group
+                key={m.id}
+                gap={6}
+                wrap="nowrap"
+                className={classes.queuedItem}
+              >
+                <IconClockHour4 size={14} className={classes.queuedIcon} />
+                <Text size="xs" lineClamp={2} className={classes.queuedText}>
+                  {m.text}
+                </Text>
+                <ActionIcon
+                  size="xs"
+                  variant="subtle"
+                  color="gray"
+                  onClick={() => removeQueued(m.id)}
+                  aria-label={t("Remove queued message")}
+                >
+                  <IconX size={12} />
+                </ActionIcon>
+              </Group>
+            ))}
+          </Stack>
+        )}
        <ChatInput
          onSend={(text) => sendMessage({ text })}
+          onQueue={enqueue}
          onStop={stop}
          isStreaming={isStreaming}
        />
--- a/apps/client/src/features/ai-chat/components/conversation-list.tsx
+++ b/apps/client/src/features/ai-chat/components/conversation-list.tsx
@@ -18,8 +18,31 @@ import {
  useRenameAiChatMutation,
 } from "@/features/ai-chat/queries/ai-chat-query.ts";
 import { IAiChat } from "@/features/ai-chat/types/ai-chat.types.ts";
+import { useTimeAgo } from "@/hooks/use-time-ago.tsx";
 import classes from "@/features/ai-chat/components/ai-chat.module.css";

+/**
+ * The dimmed second line of a chat row: how long ago the chat was created and
+ * the document it was created in. Its own component so the self-updating
+ * `useTimeAgo` hook is called per row legally (hooks cannot run inside `.map()`).
+ */
+function ChatMetaLine({
+  createdAt,
+  pageTitle,
+}: {
+  createdAt: string;
+  pageTitle?: string | null;
+}) {
+  const { t } = useTranslation();
+  const ago = useTimeAgo(createdAt);
+  // e.g. "2 hours ago · Onboarding guide" / "2 hours ago · No document"
+  return (
+    <Text size="xs" c="dimmed" lineClamp={1}>
+      {ago} · {pageTitle || t("No document")}
+    </Text>
+  );
+}
+
 interface ConversationListProps {
  activeChatId: string | null;
  onSelect: (chatId: string) => void;
@@ -127,16 +150,24 @@ export default function ConversationList({
              }
            }}
          >
-            <Group gap={4} wrap="nowrap" style={{ flex: 1, minWidth: 0 }}>
-              {chat.roleName && (
-                <Text size="sm" span title={chat.roleName} style={{ flex: "none" }}>
-                  {chat.roleEmoji || "🤖"}
+            <Box style={{ flex: 1, minWidth: 0 }}>
+              <Group gap={4} wrap="nowrap" style={{ minWidth: 0 }}>
+                {chat.roleName && (
+                  <Text
+                    size="sm"
+                    span
+                    title={chat.roleName}
+                    style={{ flex: "none" }}
+                  >
+                    {chat.roleEmoji || "🤖"}
+                  </Text>
+                )}
+                <Text size="sm" lineClamp={1} style={{ flex: 1, minWidth: 0 }}>
+                  {chat.title || t("Untitled chat")}
                </Text>
-              )}
-              <Text size="sm" lineClamp={1} style={{ flex: 1, minWidth: 0 }}>
-                {chat.title || t("Untitled chat")}
-              </Text>
-            </Group>
+              </Group>
+              <ChatMetaLine createdAt={chat.createdAt} pageTitle={chat.pageTitle} />
+            </Box>
            <Menu shadow="md" width={180} position="bottom-end">
              <Menu.Target>
                <ActionIcon
--- a/apps/client/src/features/ai-chat/components/message-item-memo.test.tsx
+++ b/apps/client/src/features/ai-chat/components/message-item-memo.test.tsx
@@ -0,0 +1,81 @@
+import { describe, expect, it, vi } from "vitest";
+import { render } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+import type { UIMessage } from "@ai-sdk/react";
+
+// Stub react-i18next (the component reads `useTranslation`). Mirrors the stub in
+// reasoning-block.test.tsx.
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({ t: (key: string) => key }),
+}));
+
+// Spy on `renderChatMarkdown` so we can count parse calls per text. We keep every
+// OTHER named export of markdown.ts intact via `importActual`, and override only
+// `renderChatMarkdown` with a `vi.fn()` that returns simple HTML so the component
+// still renders. This is the seam that proves the MarkdownPart memo works: a
+// finalized text part must NOT be re-parsed on a later streamed delta.
+// `vi.hoisted` so the spy exists when the hoisted `vi.mock` factory runs.
+const { renderChatMarkdownSpy } = vi.hoisted(() => ({
+  renderChatMarkdownSpy: vi.fn((text: string) => `<p>${text}</p>`),
+}));
+vi.mock("@/features/ai-chat/utils/markdown.ts", async () => {
+  const actual = await vi.importActual<
+    typeof import("@/features/ai-chat/utils/markdown.ts")
+  >("@/features/ai-chat/utils/markdown.ts");
+  return { ...actual, renderChatMarkdown: renderChatMarkdownSpy };
+});
+
+import MessageItem from "./message-item";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+const msg = (parts: UIMessage["parts"]): UIMessage =>
+  ({ id: "m1", role: "assistant", parts }) as UIMessage;
+
+const renderRow = (message: UIMessage) =>
+  render(
+    <MantineProvider>
+      <MessageItem message={message} />
+    </MantineProvider>,
+  );
+
+/** Count how many spy calls parsed exactly `text` (filtering by the first arg). */
+const callsFor = (text: string) =>
+  renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === text).length;
+
+describe("MessageItem markdown memoization", () => {
+  it("does not re-parse finalized text parts when only a tail part grows", () => {
+    renderChatMarkdownSpy.mockClear();
+
+    // Two finalized text parts.
+    const first = msg([
+      { type: "text", text: "alpha" },
+      { type: "text", text: "beta" },
+    ]);
+    const { rerender } = renderRow(first);
+
+    // Both finalized parts parsed exactly once on the initial render.
+    expect(callsFor("alpha")).toBe(1);
+    expect(callsFor("beta")).toBe(1);
+
+    // A streamed delta: a NEW message object where only a third tail part grows;
+    // the first two parts' text is byte-identical.
+    const next = msg([
+      { type: "text", text: "alpha" },
+      { type: "text", text: "beta" },
+      { type: "text", text: "gamm" },
+    ]);
+    rerender(
+      <MantineProvider>
+        <MessageItem message={next} />
+      </MantineProvider>,
+    );
+
+    // The finalized parts hit the MarkdownPart memo: still parsed at most once
+    // each across BOTH renders (the resilient invariant). The only new parse is
+    // for the changed/added tail part.
+    expect(callsFor("alpha")).toBe(1);
+    expect(callsFor("beta")).toBe(1);
+    expect(callsFor("gamm")).toBe(1);
+  });
+});
--- a/apps/client/src/features/ai-chat/components/message-item.test.ts
+++ b/apps/client/src/features/ai-chat/components/message-item.test.ts
@@ -0,0 +1,73 @@
+import { describe, expect, it, vi } from "vitest";
+import type { UIMessage } from "@ai-sdk/react";
+
+// Stub react-i18next: importing the component module pulls in `useTranslation`,
+// and we only exercise the pure `arePropsEqual` comparator (no rendering), so a
+// minimal `t` that echoes the key is enough. Mirrors the stub in
+// reasoning-block.test.tsx.
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({ t: (key: string) => key }),
+}));
+
+import { arePropsEqual } from "./message-item";
+
+/**
+ * Tests for `arePropsEqual`, the `React.memo` comparator for MessageItem. It must
+ * return false on any visible prop/content change (so the row re-renders) and
+ * true when nothing visible changed (so a finalized row is skipped). A FIXED
+ * message id is used so a content-identical clone yields an equal signature.
+ */
+const msg = (parts: UIMessage["parts"]): UIMessage =>
+  ({ id: "m1", role: "assistant", parts }) as UIMessage;
+
+const props = (
+  message: UIMessage,
+  over: Record<string, unknown> = {},
+) => ({
+  message,
+  showCitations: true,
+  neutralizeInternalLinks: false,
+  assistantName: "AI",
+  ...over,
+});
+
+describe("arePropsEqual", () => {
+  it("returns false when showCitations differs", () => {
+    const m = msg([{ type: "text", text: "answer" }]);
+    expect(
+      arePropsEqual(props(m), props(m, { showCitations: false })),
+    ).toBe(false);
+  });
+
+  it("returns false when neutralizeInternalLinks differs", () => {
+    const m = msg([{ type: "text", text: "answer" }]);
+    expect(
+      arePropsEqual(props(m), props(m, { neutralizeInternalLinks: true })),
+    ).toBe(false);
+  });
+
+  it("returns false when assistantName differs", () => {
+    const m = msg([{ type: "text", text: "answer" }]);
+    expect(
+      arePropsEqual(props(m), props(m, { assistantName: "Other" })),
+    ).toBe(false);
+  });
+
+  it("returns true on the identity fast path (same message object, equal props)", () => {
+    const m = msg([{ type: "text", text: "answer" }]);
+    expect(arePropsEqual(props(m), props(m))).toBe(true);
+  });
+
+  it("returns true for the same content in a different message object", () => {
+    const a = msg([{ type: "text", text: "answer" }]);
+    const b = msg([{ type: "text", text: "answer" }]);
+    expect(a).not.toBe(b);
+    expect(arePropsEqual(props(a), props(b))).toBe(true);
+  });
+
+  it("returns false when content changed in a different message object", () => {
+    const a = msg([{ type: "text", text: "answer" }]);
+    const b = msg([{ type: "text", text: "answer grown" }]);
+    expect(arePropsEqual(props(a), props(b))).toBe(false);
+  });
+});
--- a/apps/client/src/features/ai-chat/components/message-item.tsx
+++ b/apps/client/src/features/ai-chat/components/message-item.tsx
@@ -1,11 +1,17 @@
-import { Alert, Box, Text } from "@mantine/core";
-import { IconAlertTriangle } from "@tabler/icons-react";
+import { memo } from "react";
+import { Box, Text } from "@mantine/core";
 import { useTranslation } from "react-i18next";
 import type { UIMessage } from "@ai-sdk/react";
 import ToolCallCard from "@/features/ai-chat/components/tool-call-card.tsx";
+import ReasoningBlock from "@/features/ai-chat/components/reasoning-block.tsx";
+import ChatErrorAlert from "@/features/ai-chat/components/chat-error-alert.tsx";
+import ChatStoppedNotice from "@/features/ai-chat/components/chat-stopped-notice.tsx";
 import { ToolUiPart, isToolPart } from "@/features/ai-chat/utils/tool-parts.tsx";
+import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts";
 import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
 import { resolveAssistantName } from "@/features/ai-chat/utils/assistant-name.ts";
+import { reasoningTokensForPart } from "@/features/ai-chat/utils/reasoning-tokens.ts";
+import { messageSignature } from "@/features/ai-chat/utils/message-signature.ts";
 import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
 import classes from "@/features/ai-chat/components/ai-chat.module.css";

@@ -30,6 +36,39 @@ interface MessageItemProps {
  assistantName?: string;
 }

+/**
+ * One assistant text part rendered as sanitized markdown. Memoized on its inputs
+ * so a finalized text part is NOT re-parsed on every streamed delta: during a
+ * turn only the actively-growing tail part changes its `text`, so every earlier
+ * part hits the memo and skips the expensive marked + DOMPurify pass. Props are
+ * primitives, so React.memo's default shallow compare is exactly right (the
+ * `text` string is compared by value).
+ */
+const MarkdownPart = memo(function MarkdownPart({
+  text,
+  neutralizeInternalLinks,
+}: {
+  text: string;
+  neutralizeInternalLinks: boolean;
+}) {
+  const html = renderChatMarkdown(text, { neutralizeInternalLinks });
+  if (html) {
+    return (
+      <div
+        className={classes.markdown}
+        // Sanitized by renderChatMarkdown (DOMPurify) before insertion.
+        dangerouslySetInnerHTML={{ __html: html }}
+      />
+    );
+  }
+  // Fallback when markdown could not render synchronously: raw text.
+  return (
+    <Text className={classes.markdown} style={{ whiteSpace: "pre-wrap" }}>
+      {text}
+    </Text>
+  );
+});
+
 /**
 * Render a single UIMessage by iterating its `parts`:
 *  - `text` parts -> sanitized markdown.
@@ -37,12 +76,13 @@ interface MessageItemProps {
 * Other part kinds (reasoning, sources, files, step-start) are ignored for v1.
 * User messages render their text as a right-aligned plain bubble.
 *
- * This component is intentionally NOT memoized: `useChat` replaces the streaming
- * assistant message with a freshly cloned object on every streamed delta, so the
- * `message` prop identity (and its `parts`) changes each tick. Re-rendering the
- * text parts on each delta is what makes the answer stream in progressively.
+ * This component is memoized (see `arePropsEqual` at the bottom) on a cheap
+ * per-message content signature: the streaming TAIL message's signature changes
+ * on each delta so it still re-renders and streams in, while finalized rows are
+ * skipped. Each text part's markdown is itself memoized via `MarkdownPart`, so a
+ * long turn no longer re-parses the whole transcript on every token.
 */
-export default function MessageItem({
+function MessageItem({
  message,
  showCitations = true,
  neutralizeInternalLinks = false,
@@ -65,35 +105,52 @@ export default function MessageItem({
    );
  }

+  // An assistant message with nothing visible to render yet (an empty streaming
+  // text part, or a reasoning/step-start part while the model is still thinking)
+  // renders nothing here. The standalone TypingIndicator stands in for the nascent
+  // bubble (name + dots) until real content arrives, so exactly one element owns
+  // the agent name during the pre-content gap and the layout never jumps. Persisted
+  // errored/aborted turns DO have visible content per the helper (metadata.error /
+  // finishReason === "aborted"), so their banners below still render — this early
+  // return won't fire for them.
+  if (!assistantMessageHasVisibleContent(message)) return null;
+
+  // Authoritative reasoning token count to attribute to a reasoning block, or
+  // undefined when the block must estimate on its own. See reasoningTokensForPart
+  // for the #151 anti-double-count rule (only a single reasoning part may carry
+  // the turn total). The authoritative turn total is still surfaced live in the
+  // header badge regardless.
+  const reasoningTokens = reasoningTokensForPart(message);
+
  return (
    <Box className={classes.messageRow}>
      <Text size="xs" c="dimmed" mb={4}>
        {resolveAssistantName(assistantName) ?? t("AI agent")}
      </Text>
      {message.parts.map((part, index) => {
+        if (part.type === "reasoning") {
+          // Reasoning ("thinking") -> a collapsible block with its own token
+          // count. Empty/whitespace reasoning with no authoritative count carries
+          // nothing to show, so skip it (avoids an empty 0-token block).
+          const text = (part as { text?: string }).text ?? "";
+          if (!text.trim() && !(reasoningTokens && reasoningTokens > 0))
+            return null;
+          return (
+            <ReasoningBlock key={index} text={text} tokens={reasoningTokens} />
+          );
+        }
+
        if (part.type === "text") {
          // Skip empty/whitespace-only text parts (a streaming message often
          // starts with an empty text part before the first token arrives); the
          // typing indicator covers that gap until real content streams in.
          if (!part.text.trim()) return null;
-          const html = renderChatMarkdown(part.text, {
-            neutralizeInternalLinks,
-          });
-          if (html) {
-            return (
-              <div
-                key={index}
-                className={classes.markdown}
-                // Sanitized by renderChatMarkdown (DOMPurify) before insertion.
-                dangerouslySetInnerHTML={{ __html: html }}
-              />
-            );
-          }
-          // Fallback when markdown could not render synchronously: raw text.
          return (
-            <Text key={index} className={classes.markdown} style={{ whiteSpace: "pre-wrap" }}>
-              {part.text}
-            </Text>
+            <MarkdownPart
+              key={index}
+              text={part.text}
+              neutralizeInternalLinks={neutralizeInternalLinks}
+            />
          );
        }

@@ -114,17 +171,56 @@ export default function MessageItem({
      {(() => {
        const errorText = (message.metadata as { error?: string } | undefined)?.error;
        if (!errorText) return null;
+        // Same classified-error banner as the live chat: a heading naming the
+        // cause plus a one-line detail.
+        const errorView = describeChatError(errorText, t);
        return (
-          <Alert
-            variant="light"
-            color="red"
-            icon={<IconAlertTriangle size={16} />}
+          <ChatErrorAlert
+            title={errorView.title}
+            detail={errorView.detail}
            mt={4}
-          >
-            {describeChatError(errorText, t)}
-          </Alert>
+          />
+        );
+      })()}
+      {/* A persisted turn that was aborted (manual Stop or a dropped connection)
+          with no error banner. The server cannot tell a manual Stop from a
+          connection drop (both persist as finishReason 'aborted'), so reopened
+          history uses a combined wording. */}
+      {(() => {
+        const meta = message.metadata as
+          | { error?: string; finishReason?: string }
+          | undefined;
+        if (meta?.error || meta?.finishReason !== "aborted") return null;
+        return (
+          <ChatStoppedNotice
+            text={t("Response stopped (manually or the connection dropped).")}
+            mt={4}
+          />
        );
      })()}
    </Box>
  );
 }
+
+/** Skip re-rendering a message whose visible content is unchanged. The streaming
+ *  TAIL message gets a fresh object whose signature changes each delta, so it
+ *  still re-renders and streams in; every FINALIZED message is skipped, turning a
+ *  per-token whole-transcript re-render into a tail-only one. */
+export function arePropsEqual(
+  prev: MessageItemProps,
+  next: MessageItemProps,
+): boolean {
+  if (
+    prev.showCitations !== next.showCitations ||
+    prev.neutralizeInternalLinks !== next.neutralizeInternalLinks ||
+    prev.assistantName !== next.assistantName
+  ) {
+    return false;
+  }
+  // Fast path: identical message object (finalized rows keep their identity
+  // across deltas) — skip without building signatures.
+  if (prev.message === next.message) return true;
+  return messageSignature(prev.message) === messageSignature(next.message);
+}
+
+export default memo(MessageItem, arePropsEqual);
--- a/apps/client/src/features/ai-chat/components/message-list.tsx
+++ b/apps/client/src/features/ai-chat/components/message-list.tsx
@@ -4,7 +4,8 @@ import { useTranslation } from "react-i18next";
 import type { UIMessage } from "@ai-sdk/react";
 import MessageItem from "@/features/ai-chat/components/message-item.tsx";
 import TypingIndicator from "@/features/ai-chat/components/typing-indicator.tsx";
-import { isToolPart } from "@/features/ai-chat/utils/tool-parts.tsx";
+import { isToolPart, toolRunState, ToolUiPart } from "@/features/ai-chat/utils/tool-parts.tsx";
+import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts";
 import classes from "@/features/ai-chat/components/ai-chat.module.css";

 interface MessageListProps {
@@ -43,23 +44,68 @@ interface MessageListProps {
 const BOTTOM_THRESHOLD = 40;

 /**
- * Whether to show the standalone "AI agent is typing…" indicator. It bridges the
- * gap between sending and the first streamed content, so it shows only while a
- * turn is in flight AND the latest assistant message has nothing visible yet:
+ * Whether to show the standalone "Thinking…" indicator. It bridges every
+ * gap in a turn where the assistant is working but nothing visible is actively
+ * being produced yet — so it shows while a turn is in flight AND the latest
+ * assistant message's LAST part is not live output:
 *  - the last message is still the user's (assistant hasn't started a row), or
- *  - the last (assistant) message has no non-empty text and no tool part.
- * Once any text/tool part arrives, MessageItem renders it and this hides.
+ *  - the assistant row has no parts yet, or
+ *  - its last part is an empty/whitespace text part, or a finished ("done")
+ *    text part while the turn continues (the model paused after some narration
+ *    and is thinking about its next step), or
+ *  - its last part is a finished/errored tool (the model is thinking about the
+ *    next step between tool calls).
+ * It hides only while output is actively rendering: a non-empty streaming text
+ * part, or a tool that is still running (ToolCallCard shows its own Loader).
 */
 export function showTypingIndicator(messages: UIMessage[], isStreaming: boolean): boolean {
  if (!isStreaming) return false;
  const last = messages[messages.length - 1];
  if (!last) return true; // submitted with nothing rendered yet.
  if (last.role !== "assistant") return true; // assistant row not started.
-  const hasVisible = last.parts.some(
-    (p) =>
-      (p.type === "text" && p.text.trim().length > 0) || isToolPart(p.type),
-  );
-  return !hasVisible;
+  const lastPart = last.parts[last.parts.length - 1];
+  if (!lastPart) return true; // assistant row exists but has no parts yet.
+  // The answer text is actively streaming in -> MessageItem renders it; no dots.
+  // Only while it is STILL streaming, though: once a non-empty text part is
+  // finalized ("done") but the turn is still in flight, the model has paused
+  // after some narration and is working on its next step (e.g. about to call a
+  // tool) — nothing is visibly progressing, so the dots must show. A text part
+  // without a `state` is treated as still-rendering (kept suppressed); this
+  // branch only runs while streaming, where live parts always carry a state.
+  if (
+    lastPart.type === "text" &&
+    lastPart.text.trim().length > 0 &&
+    (lastPart as { state?: "streaming" | "done" }).state !== "done"
+  ) {
+    return false;
+  }
+  // A tool still in flight shows its own Loader in ToolCallCard -> no dots.
+  if (
+    isToolPart(lastPart.type) &&
+    toolRunState((lastPart as unknown as ToolUiPart).state) === "running"
+  ) {
+    return false;
+  }
+  // Otherwise the turn is in flight but nothing is actively producing visible
+  // output yet: a finished/errored tool with no follow-up content, or an empty
+  // trailing text part. The model is thinking between steps -> show the dots.
+  return true;
+}
+
+/**
+ * Whether the standalone typing indicator should render its own assistant-name
+ * label. The indicator OWNS the name while the tail assistant row has no visible
+ * content yet (an empty streaming text part, or reasoning/step-start while the
+ * model is still thinking): in that gap the assistant MessageItem renders nothing,
+ * so the indicator stands in for the nascent bubble (name + dots) at a constant
+ * gap. It hides the name only once that row shows visible content, because then
+ * MessageItem draws the same name — avoids a duplicate stacked label and the
+ * layout jump that switching owners mid-stream used to cause.
+ */
+export function typingIndicatorShowsName(messages: UIMessage[]): boolean {
+  const last = messages[messages.length - 1];
+  if (!last || last.role !== "assistant") return true;
+  return !assistantMessageHasVisibleContent(last);
 }

 /**
@@ -158,7 +204,12 @@ export default function MessageList({
            assistantName={assistantName}
          />
        ))}
-        {typing && <TypingIndicator assistantName={assistantName} />}
+        {typing && (
+          <TypingIndicator
+            assistantName={assistantName}
+            showName={typingIndicatorShowsName(messages)}
+          />
+        )}
      </Stack>
    </ScrollArea>
  );
--- a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx
+++ b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx
@@ -0,0 +1,65 @@
+import { describe, it, expect, vi } from "vitest";
+import { render, screen } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+
+// Stub react-i18next so `t` returns the key with `{{count}}` interpolated. This
+// keeps the assertions on the component's OWN count logic (authoritative vs
+// estimate) rather than on translation, and mirrors the t-mock pattern used by
+// other component tests in the repo.
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({
+    t: (key: string, opts?: { count?: number }) =>
+      opts && typeof opts.count === "number"
+        ? key.replace("{{count}}", String(opts.count))
+        : key,
+  }),
+}));
+
+import ReasoningBlock from "./reasoning-block";
+import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+function renderBlock(props: { text: string; tokens?: number }) {
+  return render(
+    <MantineProvider>
+      <ReasoningBlock {...props} />
+    </MantineProvider>,
+  );
+}
+
+describe("ReasoningBlock", () => {
+  it("shows the authoritative count in the header when tokens > 0", () => {
+    // Text "thinking…" estimates to ceil(9/4) = 3, but the authoritative 42
+    // must win, so the header shows 42 (and NOT the 3-token estimate).
+    renderBlock({ text: "thinking…", tokens: 42 });
+    expect(screen.getByText("Thinking · 42 tokens")).toBeDefined();
+    expect(screen.queryByText("Thinking · 3 tokens")).toBeNull();
+  });
+
+  it("falls back to the text-length estimate when no authoritative tokens", () => {
+    const text = "some reasoning prose that streams in";
+    const estimate = estimateTokens(text);
+    renderBlock({ text });
+    expect(estimate).toBeGreaterThan(0);
+    expect(screen.getByText(new RegExp(`${estimate} tokens`))).toBeDefined();
+  });
+
+  it("header-only when text is empty but an authoritative count is present", () => {
+    renderBlock({ text: "", tokens: 17 });
+    expect(screen.getByText(/17 tokens/)).toBeDefined();
+    // No disclosure body to expand: the toggle button is disabled.
+    const button = screen.getByRole("button");
+    expect((button as HTMLButtonElement).disabled).toBe(true);
+  });
+
+  it("renders the reasoning body (markdown or raw-text fallback)", () => {
+    renderBlock({ text: "**bold** reasoning", tokens: 5 });
+    // The toggle is enabled because there IS body text to expand.
+    const button = screen.getByRole("button");
+    expect((button as HTMLButtonElement).disabled).toBe(false);
+    // The body prose renders (markdown -> sanitized html, or raw-text fallback);
+    // either way the text is present in the document.
+    expect(screen.getByText(/reasoning/)).toBeDefined();
+  });
+});
--- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx
+++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
@@ -0,0 +1,98 @@
+import { memo, useMemo, useState } from "react";
+import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core";
+import { IconChevronDown } from "@tabler/icons-react";
+import { useTranslation } from "react-i18next";
+import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
+import { collapseBlankLines } from "@/features/ai-chat/utils/collapse-blank-lines.ts";
+import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
+import classes from "@/features/ai-chat/components/ai-chat.module.css";
+
+interface ReasoningBlockProps {
+  /** The streamed/persisted reasoning (thinking) text. May be empty when the
+   *  provider reports only a reasoning token COUNT without the text. */
+  text: string;
+  /** Authoritative reasoning token count from `usage.reasoningTokens`, when the
+   *  step/turn has finished. When absent (or 0) the count is estimated from the
+   *  text length so it ticks live as the reasoning streams in. */
+  tokens?: number;
+}
+
+/**
+ * Collapsible "Thinking" block for an assistant `reasoning` part. Mirrors Claude
+ * Code's surfacing of the model's thinking: a header that shows the thinking
+ * token count (authoritative when the step has reported usage, else a live
+ * estimate from the streamed text) and an expandable body with the reasoning
+ * prose. Collapsed by default so it never crowds out the answer.
+ *
+ * Providers that don't stream reasoning TEXT still render this block from the
+ * authoritative count alone (header only, empty body) so the cost is visible.
+ */
+function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
+  const { t } = useTranslation();
+  const [open, setOpen] = useState(false);
+
+  // Authoritative count wins; otherwise estimate live from the streamed text.
+  const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
+  const trimmed = text.trim();
+  // Memoize the markdown render so toggling `open` (or a parent re-render caused
+  // by an unrelated streamed delta) does not re-parse the reasoning text; it
+  // recomputes only when the reasoning text itself changes (while it streams in).
+  // collapseBlankLines collapses the blank-line gaps the model emits between every
+  // list item / paragraph so the reasoning renders compactly (tight lists, joined
+  // paragraphs) — ONLY here, not in the normal answer.
+  const html = useMemo(
+    () => (trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : ""),
+    [trimmed],
+  );
+
+  return (
+    <Box className={classes.reasoningBlock} mb={6}>
+      <UnstyledButton
+        onClick={() => setOpen((o) => !o)}
+        // No body to expand when the provider reported only a token count.
+        disabled={!trimmed}
+        aria-expanded={open}
+      >
+        <Group gap={6} wrap="nowrap" align="center">
+          <IconChevronDown
+            size={12}
+            style={{
+              transform: open ? "none" : "rotate(-90deg)",
+              transition: "transform 150ms ease",
+              opacity: trimmed ? 1 : 0.4,
+            }}
+          />
+          <Text size="xs" c="dimmed">
+            {count > 0
+              ? t("Thinking · {{count}} tokens", { count })
+              : t("Thinking")}
+          </Text>
+        </Group>
+      </UnstyledButton>
+
+      {trimmed && (
+        <Collapse in={open}>
+          {html ? (
+            <div
+              className={classes.reasoningText}
+              // Sanitized by renderChatMarkdown (DOMPurify) before insertion.
+              dangerouslySetInnerHTML={{ __html: html }}
+            />
+          ) : (
+            <Text
+              className={classes.reasoningText}
+              style={{ whiteSpace: "pre-wrap" }}
+            >
+              {trimmed}
+            </Text>
+          )}
+        </Collapse>
+      )}
+    </Box>
+  );
+}
+
+// Memoized: re-renders only when `text`/`tokens` change (primitive props, default
+// shallow compare), so a parent re-render during streaming of OTHER content does
+// not re-run the markdown parse for an already-finalized reasoning block.
+export default memo(ReasoningBlock);
--- a/apps/client/src/features/ai-chat/components/role-cards.module.css
+++ b/apps/client/src/features/ai-chat/components/role-cards.module.css
@@ -0,0 +1,65 @@
+/* Layout only — per-card colors are injected inline via Mantine CSS vars. */
+
+.container {
+  display: flex;
+  flex-wrap: wrap;
+  justify-content: center;
+  /* flex-start keeps the first row reachable when the wrapped cards overflow and
+     the container scrolls. With align-content: center, an overflowing top row is
+     pushed out of the scrollable area and becomes unreachable. The parent Mantine
+     Center still vertically centers the whole block when it fits. */
+  align-content: flex-start;
+  gap: 10px;
+  /* Cap the height so a large number of roles scrolls instead of blowing out
+     the empty chat area. */
+  max-height: 100%;
+  overflow-y: auto;
+  padding: 8px;
+}
+
+.card {
+  position: relative;
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  gap: 4px;
+  /* Grow to fill the row so cards use the available window width instead of
+     leaving large side gaps; the flex-basis sets how many fit per row before
+     wrapping (≈2 columns at the default window width, more as it widens). */
+  flex: 1 1 240px;
+  min-width: 200px;
+  max-width: 360px;
+  min-height: 90px;
+  padding: 12px 10px;
+  border-radius: var(--mantine-radius-md);
+  border: 2px solid transparent;
+  cursor: pointer;
+  text-align: center;
+  transition:
+    transform 120ms ease,
+    box-shadow 120ms ease,
+    border-color 120ms ease;
+}
+
+.card:hover {
+  transform: translateY(-2px);
+  box-shadow: var(--mantine-shadow-sm);
+}
+
+.emoji {
+  font-size: 22px;
+  line-height: 1;
+}
+
+/* The description: small and slightly muted, inheriting the card's color. We
+   reduce opacity instead of using Mantine's `c="dimmed"` so it doesn't clash
+   with the card's inline color. */
+.description {
+  opacity: 0.8;
+  line-height: 1.3;
+  /* Break long unbreakable tokens (URLs, long foreign words) in the
+     admin-configured description so they wrap instead of overflowing the card
+     width now that the line clamp no longer caps the text. */
+  overflow-wrap: anywhere;
+}
--- a/apps/client/src/features/ai-chat/components/role-cards.test.tsx
+++ b/apps/client/src/features/ai-chat/components/role-cards.test.tsx
@@ -0,0 +1,59 @@
+import { describe, it, expect, vi } from "vitest";
+import { render, screen, fireEvent } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+import RoleCards from "./role-cards";
+import { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+const roles: IAiRole[] = [
+  {
+    id: "r1",
+    name: "Pirate",
+    emoji: "🏴‍☠️",
+    description: "Talks like a pirate",
+    enabled: true,
+    autoStart: true,
+    launchMessage: null,
+  },
+  {
+    id: "r2",
+    name: "Grandpa",
+    emoji: null,
+    description: null,
+    enabled: true,
+    autoStart: true,
+    launchMessage: null,
+  },
+];
+
+function renderCards(onPick = vi.fn()) {
+  render(
+    <MantineProvider>
+      <RoleCards roles={roles} onPick={onPick} />
+    </MantineProvider>,
+  );
+  return onPick;
+}
+
+describe("RoleCards", () => {
+  it("renders one card per role with name, emoji, and description", () => {
+    renderCards();
+    expect(screen.getByText("Pirate")).toBeDefined();
+    expect(screen.getByText("Talks like a pirate")).toBeDefined();
+    expect(screen.getByText("Grandpa")).toBeDefined();
+    // The emoji is shown for the role that has one.
+    expect(screen.getByText("🏴‍☠️")).toBeDefined();
+  });
+
+  it("does NOT render a Universal assistant card", () => {
+    renderCards();
+    expect(screen.queryByText("Universal assistant")).toBeNull();
+  });
+
+  it("calls onPick with the role object when a card is clicked", () => {
+    const onPick = renderCards();
+    fireEvent.click(screen.getByText("Pirate"));
+    expect(onPick).toHaveBeenCalledWith(roles[0]);
+  });
+});
--- a/apps/client/src/features/ai-chat/components/role-cards.tsx
+++ b/apps/client/src/features/ai-chat/components/role-cards.tsx
@@ -0,0 +1,78 @@
+import { UnstyledButton, Text } from "@mantine/core";
+import { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
+import { roleCardColor } from "@/features/ai-chat/utils/role-card-color.ts";
+import classes from "@/features/ai-chat/components/role-cards.module.css";
+
+interface RoleCardsProps {
+  /** The enabled roles to render (one card each). */
+  roles: IAiRole[];
+  /** Called with the picked role when a card is clicked. The parent starts the
+   *  chat with this role (binds it and sends the opening message). */
+  onPick: (role: IAiRole) => void;
+}
+
+/**
+ * One role card. Colors are injected inline via theme-aware Mantine CSS vars so
+ * they render correctly in both light and dark themes; the CSS module owns only
+ * the layout. The card shows the emoji (if any), the role name, and a small
+ * dimmed description line (if any).
+ */
+function RoleCard({
+  color,
+  name,
+  emoji,
+  description,
+  onClick,
+}: {
+  color: string;
+  name: string;
+  emoji?: string | null;
+  description?: string | null;
+  onClick: () => void;
+}) {
+  return (
+    <UnstyledButton
+      className={classes.card}
+      style={{
+        backgroundColor: `var(--mantine-color-${color}-light)`,
+        color: `var(--mantine-color-${color}-light-color)`,
+      }}
+      title={description ?? name}
+      onClick={onClick}
+    >
+      {emoji && <span className={classes.emoji}>{emoji}</span>}
+      <Text size="sm" fw={600} lineClamp={2}>
+        {name}
+      </Text>
+      {description && (
+        <Text size="xs" className={classes.description}>
+          {description}
+        </Text>
+      )}
+    </UnstyledButton>
+  );
+}
+
+/**
+ * Colored role cards rendered as the empty-state of a brand-new chat. There is
+ * no Universal assistant card — the universal assistant is the implicit default
+ * the user gets by simply typing into the composer without picking a card.
+ * Clicking a card immediately STARTS the chat with that role (the parent binds
+ * the role to the new chat and sends the opening message).
+ */
+export default function RoleCards({ roles, onPick }: RoleCardsProps) {
+  return (
+    <div className={classes.container}>
+      {roles.map((role, index) => (
+        <RoleCard
+          key={role.id}
+          color={roleCardColor(index)}
+          name={role.name}
+          emoji={role.emoji}
+          description={role.description}
+          onClick={() => onPick(role)}
+        />
+      ))}
+    </div>
+  );
+}
--- a/apps/client/src/features/ai-chat/components/show-typing-indicator.test.ts
+++ b/apps/client/src/features/ai-chat/components/show-typing-indicator.test.ts
@@ -5,7 +5,7 @@ import { showTypingIndicator } from "@/features/ai-chat/components/message-list.
 /**
 * Pure-helper tests for the typing-indicator bridging logic that the internal
 * chat and the public share widget now share. This is the behavior that decides
- * whether the animated "AI agent is typing…" placeholder shows in the gap
+ * whether the animated "Thinking…" placeholder shows in the gap
 * between sending and the first streamed token.
 */
 const msg = (
@@ -52,4 +52,44 @@ describe("showTypingIndicator", () => {
      showTypingIndicator([msg("assistant", [toolPart])], true),
    ).toBe(false);
  });
+
+  it("shows while streaming after a tool has finished (thinking between steps)", () => {
+    const doneTool = { type: "tool-getPage", state: "output-available" } as unknown as UIMessage["parts"][number];
+    expect(
+      showTypingIndicator([msg("assistant", [doneTool])], true),
+    ).toBe(true);
+  });
+
+  it("shows while streaming when a finished tool is the last part after some text", () => {
+    const text = { type: "text", text: "Let me check" } as unknown as UIMessage["parts"][number];
+    const doneTool = { type: "tool-getPage", state: "output-available" } as unknown as UIMessage["parts"][number];
+    expect(
+      showTypingIndicator([msg("assistant", [text, doneTool])], true),
+    ).toBe(true);
+  });
+
+  it("hides while a tool is still running", () => {
+    const runningTool = { type: "tool-getPage", state: "input-available" } as unknown as UIMessage["parts"][number];
+    expect(
+      showTypingIndicator([msg("assistant", [runningTool])], true),
+    ).toBe(false);
+  });
+
+  it("hides once the assistant streams non-empty text after a finished tool", () => {
+    const doneTool = { type: "tool-getPage", state: "output-available" } as unknown as UIMessage["parts"][number];
+    const text = { type: "text", text: "The answer is 42" } as unknown as UIMessage["parts"][number];
+    expect(
+      showTypingIndicator([msg("assistant", [doneTool, text])], true),
+    ).toBe(false);
+  });
+
+  it("shows while streaming after a text part is finalized (paused before the next step)", () => {
+    const doneText = { type: "text", text: "Now creating the page in", state: "done" } as unknown as UIMessage["parts"][number];
+    expect(showTypingIndicator([msg("assistant", [doneText])], true)).toBe(true);
+  });
+
+  it("hides while a text part is actively streaming (state: streaming)", () => {
+    const streamingText = { type: "text", text: "Now writ", state: "streaming" } as unknown as UIMessage["parts"][number];
+    expect(showTypingIndicator([msg("assistant", [streamingText])], true)).toBe(false);
+  });
 });
--- a/apps/client/src/features/ai-chat/components/typing-indicator-shows-name.test.ts
+++ b/apps/client/src/features/ai-chat/components/typing-indicator-shows-name.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from "vitest";
+import type { UIMessage } from "@ai-sdk/react";
+import { typingIndicatorShowsName } from "@/features/ai-chat/components/message-list.tsx";
+
+/**
+ * Pure-helper tests for whether the standalone "Thinking…" indicator renders its
+ * own dimmed assistant-name label. The indicator OWNS the name while the tail
+ * assistant row has no visible content yet (an empty streaming text part, or
+ * reasoning/step-start while the model is still thinking) — in that gap the
+ * assistant MessageItem renders nothing, so the indicator stands in for the
+ * nascent bubble (name + dots). It hides the name only once the tail assistant
+ * row shows visible content, because then MessageItem draws the same name — this
+ * avoids a duplicate stacked label and the layout jump that switching owners
+ * mid-stream used to cause.
+ */
+const msg = (
+  role: "user" | "assistant",
+  parts: UIMessage["parts"],
+): UIMessage => ({ id: Math.random().toString(), role, parts }) as UIMessage;
+
+describe("typingIndicatorShowsName", () => {
+  it("shows the name with no messages yet (standalone, just submitted)", () => {
+    expect(typingIndicatorShowsName([])).toBe(true);
+  });
+
+  it("shows the name when the last message is still the user's", () => {
+    expect(
+      typingIndicatorShowsName([msg("user", [{ type: "text", text: "q" }])]),
+    ).toBe(true);
+  });
+
+  it("shows the name when the tail assistant row has no visible content yet (empty text part)", () => {
+    // The empty streaming text part has no visible content, so MessageItem renders
+    // nothing and the indicator owns the name (the nascent bubble).
+    expect(
+      typingIndicatorShowsName([msg("assistant", [{ type: "text", text: "" }])]),
+    ).toBe(true);
+  });
+
+  it("hides the name once the tail assistant row shows content (a tool part)", () => {
+    const doneTool = { type: "tool-getPage", state: "output-available" } as unknown as UIMessage["parts"][number];
+    expect(
+      typingIndicatorShowsName([msg("assistant", [doneTool])]),
+    ).toBe(false);
+  });
+
+  it("hides the name once the tail assistant row shows content (non-empty text)", () => {
+    expect(
+      typingIndicatorShowsName([msg("assistant", [{ type: "text", text: "answer" }])]),
+    ).toBe(false);
+  });
+});
--- a/apps/client/src/features/ai-chat/components/typing-indicator.tsx
+++ b/apps/client/src/features/ai-chat/components/typing-indicator.tsx
@@ -10,6 +10,12 @@ interface TypingIndicatorProps {
   * (agent role) name.
   */
  assistantName?: string;
+  /**
+   * Whether to render the dimmed assistant-name label. Defaults to true
+   * (standalone behavior preserved). Set false between agent steps where the
+   * assistant row above already shows the same name, to avoid a duplicate label.
+   */
+  showName?: boolean;
 }

 /**
@@ -19,27 +25,30 @@ interface TypingIndicatorProps {
 * the real assistant message once content starts arriving.
 *
 * Mirrors the assistant row layout in MessageItem (the dimmed label), so it reads
- * as the assistant's bubble taking shape. The label and typing line use the
- * configured identity name when provided, otherwise the generic "AI agent".
+ * as the assistant's bubble taking shape. The dimmed label uses the configured
+ * identity name when provided (otherwise the generic "AI agent"); below it the
+ * animated dots stand in for the nascent bubble until content arrives.
 */
-export default function TypingIndicator({ assistantName }: TypingIndicatorProps) {
+export default function TypingIndicator({ assistantName, showName = true }: TypingIndicatorProps) {
  const { t } = useTranslation();
  const name = resolveAssistantName(assistantName);

  return (
    <Box className={classes.messageRow}>
-      <Text size="xs" c="dimmed" mb={4}>
-        {name ?? t("AI agent")}
-      </Text>
+      {showName !== false && (
+        // Extra bottom gap (vs MessageItem's mb={4}) gives the small bouncing
+        // dots room below the name label; without it they crowd the label. Only
+        // applies when the name is shown — the nameless case spaces fine on its own.
+        <Text size="xs" c="dimmed" mb={8}>
+          {name ?? t("AI agent")}
+        </Text>
+      )}
      <Group gap={8} align="center">
        <span className={classes.typingDots} aria-hidden="true">
          <span />
          <span />
          <span />
        </span>
-        <Text size="sm" c="dimmed">
-          {name ? t("{{name}} is typing…", { name }) : t("AI agent is typing…")}
-        </Text>
      </Group>
    </Box>
  );
--- a/apps/client/src/features/ai-chat/hooks/use-chat-session.test.tsx
+++ b/apps/client/src/features/ai-chat/hooks/use-chat-session.test.tsx
@@ -0,0 +1,290 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+import { renderHook, act } from "@testing-library/react";
+import { useChatSession } from "./use-chat-session";
+import type { UseChatSessionOptions } from "./use-chat-session";
+
+// The props the test drives: the parent-owned subset of UseChatSessionOptions
+// (the spies are injected by setup, not per-render). messagesLoading is optional
+// here (defaulted to false in setup) for terser test call sites.
+type DriverProps = Pick<UseChatSessionOptions, "activeChatId" | "chats"> & {
+  messagesLoading?: boolean;
+};
+
+// Drive the hook the way the window does: the parent owns `activeChatId` and
+// passes it back in. `setActiveChatId` is a spy so we can assert the EXACT id the
+// hook adopts (the #137 regression: it must be the authoritative streamed id, not
+// the newest chat in the list).
+function setup(initial: DriverProps) {
+  const setActiveChatId = vi.fn();
+  const onInvalidateChatList = vi.fn();
+  const onInvalidateChatMessages = vi.fn();
+  const { result, rerender } = renderHook(
+    (props: DriverProps) =>
+      useChatSession({
+        activeChatId: props.activeChatId,
+        setActiveChatId,
+        chats: props.chats,
+        messagesLoading: props.messagesLoading ?? false,
+        onInvalidateChatList,
+        onInvalidateChatMessages,
+      }),
+    { initialProps: initial },
+  );
+  return {
+    result,
+    rerender,
+    setActiveChatId,
+    onInvalidateChatList,
+    onInvalidateChatMessages,
+  };
+}
+
+describe("useChatSession", () => {
+  beforeEach(() => vi.clearAllMocks());
+
+  it("#137 REGRESSION LOCK: adopts the authoritative streamed id, NOT items[0]", () => {
+    // Brand-new chat, list already holds a SIBLING chat B as items[0] (a second
+    // tab just created it). The server streams the real id "A" for THIS chat.
+    const { result, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "B" }] },
+    });
+    result.current.onTurnFinished("A");
+    // Must adopt the authoritative id, not the newest-in-list guess.
+    expect(setActiveChatId).toHaveBeenCalledWith("A");
+    expect(setActiveChatId).not.toHaveBeenCalledWith("B");
+  });
+
+  it("fallback adopt: arms on a server-id-less finish, adopts the single new id after refetch", () => {
+    const { result, rerender, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }] },
+    });
+    // No server id => arm the fallback (no adoption yet).
+    result.current.onTurnFinished(undefined);
+    expect(setActiveChatId).not.toHaveBeenCalled();
+    // The refetch lands with the new row => adopt it.
+    rerender({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }, { id: "new" }] },
+    });
+    expect(setActiveChatId).toHaveBeenCalledWith("new");
+  });
+
+  it("fallback ambiguous: two new ids appear => no adoption", () => {
+    const { result, rerender, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }] },
+    });
+    result.current.onTurnFinished(undefined);
+    rerender({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }, { id: "n1" }, { id: "n2" }] },
+    });
+    expect(setActiveChatId).not.toHaveBeenCalled();
+  });
+
+  it("fallback add+delete in one window: adopts the new id (membership compare)", () => {
+    const { result, rerender, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "a" }, { id: "b" }] },
+    });
+    result.current.onTurnFinished(undefined);
+    // a was deleted, new was added — same length, but membership changed.
+    rerender({
+      activeChatId: null,
+      chats: { items: [{ id: "b" }, { id: "new" }] },
+    });
+    expect(setActiveChatId).toHaveBeenCalledWith("new");
+  });
+
+  it("disarm on reconcile: a fallback armed then switched away is NOT adopted by a late refetch", () => {
+    // Arm the error-path fallback on a brand-new chat (snapshot before=["x"]).
+    const { result, rerender, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }] },
+    });
+    result.current.onTurnFinished(undefined);
+    // The user switches to an existing chat C BEFORE the refetch lands; the
+    // render-phase reconciler must DISARM the pending fallback.
+    rerender({ activeChatId: "C", chats: { items: [{ id: "x" }] } });
+    // ...then starts a fresh new chat again (back to null), without re-arming.
+    rerender({ activeChatId: null, chats: { items: [{ id: "x" }] } });
+    // A late refetch now brings a new row. Because the earlier fallback was
+    // disarmed on the switch (not left armed with the stale ["x"] snapshot), it
+    // must NOT be adopted. (Without the disarm this would wrongly adopt "new".)
+    rerender({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }, { id: "new" }] },
+    });
+    expect(setActiveChatId).not.toHaveBeenCalledWith("new");
+  });
+
+  it("startNewChat while already in a new chat: cancelPendingAdoption stops a late refetch adopting the failed chat", () => {
+    // The Warning path the render-phase reconciler can't catch: pressing "New
+    // chat" while already in a new chat keeps activeChatId === null (a no-op for
+    // the atom), so only the explicit cancelPendingAdoption() disarms.
+    const { result, rerender, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }] },
+    });
+    result.current.onTurnFinished(undefined); // first turn failed → arm (before=["x"])
+    result.current.cancelPendingAdoption(); // window calls this from startNewChat
+    // The just-failed row lands in a late refetch; it must NOT be adopted.
+    rerender({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }, { id: "failed" }] },
+    });
+    expect(setActiveChatId).not.toHaveBeenCalledWith("failed");
+  });
+
+  it("onTurnFinished for an existing chat: no adoption, invalidates that chat's messages", () => {
+    const {
+      result,
+      setActiveChatId,
+      onInvalidateChatList,
+      onInvalidateChatMessages,
+    } = setup({ activeChatId: "chat-1", chats: { items: [{ id: "chat-1" }] } });
+    result.current.onTurnFinished("chat-1");
+    expect(setActiveChatId).not.toHaveBeenCalled(); // existing chat is never re-adopted
+    expect(onInvalidateChatList).toHaveBeenCalled();
+    expect(onInvalidateChatMessages).toHaveBeenCalledWith("chat-1");
+  });
+
+  it("double onTurnFinished on a failed-after-start turn: primary adopt, 2nd no-id call does NOT re-arm the fallback", () => {
+    // ai@6 fires onFinish AND onError on a failed turn. If the failure happened
+    // AFTER the `start` chunk, onFinish carries the streamed id and onError does
+    // not — so onTurnFinished runs twice in one turn (id, then no-id) before any
+    // re-render. The 2nd call must NOT re-arm the fallback off the still-null
+    // closure; otherwise a late refetch (parent hasn't reflected the adoption yet)
+    // would wrongly adopt a sibling row.
+    const { result, rerender, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }] },
+    });
+    result.current.onTurnFinished("A"); // onFinish: primary adoption
+    expect(setActiveChatId).toHaveBeenCalledWith("A");
+    result.current.onTurnFinished(undefined); // onError: same turn, no id
+    // Even in the worst case (the parent has NOT yet reflected activeChatId="A"
+    // and a late refetch lands a new row), the just-failed sibling must NOT be
+    // adopted. Two layers guarantee this: the ref guard keeps the 2nd call from
+    // re-arming at the source, and the render-phase reconciler disarms anything
+    // stale once thread.chatId ("A") diverges from the still-null activeChatId.
+    rerender({
+      activeChatId: null,
+      chats: { items: [{ id: "x" }, { id: "late" }] },
+    });
+    expect(setActiveChatId).not.toHaveBeenCalledWith("late");
+  });
+
+  it("#174 early adopt: onServerChatId adopts the streamed id mid-stream (Copy button available during the first turn)", () => {
+    // Brand-new chat: no id yet. The server streams the real chat id "A" on the
+    // `start` chunk WHILE the first turn is still streaming (before onTurnFinished
+    // fires at the terminal outcome). The hook must adopt it immediately so the
+    // window's activeChatId-gated Copy/export button lights up during the stream.
+    const { result, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [] },
+    });
+    result.current.onServerChatId("A");
+    expect(setActiveChatId).toHaveBeenCalledWith("A");
+  });
+
+  it("#174 early adopt is in-place: threadKey stays stable (live stream not torn down)", () => {
+    const chats = { items: [] };
+    const { result, rerender } = setup({ activeChatId: null, chats });
+    const keyBefore = result.current.threadKey;
+    result.current.onServerChatId("A");
+    // Parent reflects the adopted id back in; the SAME mount key is kept so the
+    // in-flight useChat store (the streaming turn) is preserved.
+    rerender({ activeChatId: "A", chats });
+    expect(result.current.threadKey).toBe(keyBefore);
+  });
+
+  it("#174 early adopt: no-op for an existing chat and for a missing id", () => {
+    const { result, setActiveChatId } = setup({
+      activeChatId: "chat-1",
+      chats: { items: [{ id: "chat-1" }] },
+    });
+    result.current.onServerChatId("chat-1"); // already has an id
+    result.current.onServerChatId(undefined); // no streamed id
+    expect(setActiveChatId).not.toHaveBeenCalled();
+  });
+
+  it("in-place adopt keeps threadKey stable; an external switch remounts", () => {
+    const chats = { items: [{ id: "B" }] };
+    const { result, rerender } = setup({ activeChatId: null, chats });
+    const keyBefore = result.current.threadKey;
+    // Adopt the streamed id; the PARENT then reflects activeChatId="A" back in.
+    result.current.onTurnFinished("A");
+    rerender({ activeChatId: "A", chats });
+    // In-place adoption: SAME mount key (the live useChat store is preserved).
+    expect(result.current.threadKey).toBe(keyBefore);
+
+    // An EXTERNAL switch (not via adopt) to a different chat must remount: the
+    // key becomes the chat id.
+    rerender({ activeChatId: "C", chats });
+    expect(result.current.threadKey).toBe("C");
+  });
+
+  it("#161: New chat during a streaming first turn forces a fresh thread (remount), not just a no-op", () => {
+    // Brand-new chat whose first turn is still streaming: the id is adopted only
+    // at turn end, so activeChatId AND thread.chatId are both null. Pressing "New
+    // chat" must still remount to a clean thread even though the atom is unchanged
+    // — the render-phase reconciler (null === null) would otherwise do nothing,
+    // leaving the old chat/stream/history in place (the bug: only the role badge
+    // dropped).
+    const { result } = setup({ activeChatId: null, chats: { items: [] } });
+    const keyBefore = result.current.threadKey;
+    act(() => result.current.startFreshThread());
+    expect(result.current.threadKey).not.toBe(keyBefore);
+  });
+
+  it("#161: an abandoned thread's late onTurnFinished does NOT adopt its chat (thread-aware guard)", () => {
+    // New chat mid-stream remounts to a fresh thread, but @ai-sdk/react does not
+    // abort the abandoned stream on unmount: its onFinish still fires later with
+    // the real server id, tagged with the OLD (abandoned) mount key. That must not
+    // adopt — it would yank the user back into the chat they just left.
+    const { result, setActiveChatId, onInvalidateChatList } = setup({
+      activeChatId: null,
+      chats: { items: [] },
+    });
+    const abandonedKey = result.current.threadKey;
+    act(() => result.current.startFreshThread());
+    expect(result.current.threadKey).not.toBe(abandonedKey);
+    // The abandoned turn finishes in the background, streaming its real id "A".
+    result.current.onTurnFinished("A", abandonedKey);
+    expect(setActiveChatId).not.toHaveBeenCalledWith("A");
+    // It still refreshes the chat list so the left-behind chat shows in history.
+    expect(onInvalidateChatList).toHaveBeenCalled();
+  });
+
+  it("#161: a turn finishing on the CURRENT thread still adopts (guard is key-scoped, not blanket)", () => {
+    // The happy path must keep working: onTurnFinished tagged with the mounted
+    // thread's own key adopts in place as before.
+    const { result, setActiveChatId } = setup({
+      activeChatId: null,
+      chats: { items: [] },
+    });
+    const currentKey = result.current.threadKey;
+    result.current.onTurnFinished("A", currentKey);
+    expect(setActiveChatId).toHaveBeenCalledWith("A");
+  });
+
+  it("waitingForHistory gates the loader only while opening an unloaded existing chat", () => {
+    // Open an existing chat whose history is still loading => loader on.
+    const { result, rerender } = setup({
+      activeChatId: "chat-1",
+      chats: { items: [{ id: "chat-1" }] },
+      messagesLoading: true,
+    });
+    expect(result.current.waitingForHistory).toBe(true);
+    // Once loading finishes, the latch flips and the loader is off.
+    rerender({
+      activeChatId: "chat-1",
+      chats: { items: [{ id: "chat-1" }] },
+      messagesLoading: false,
+    });
+    expect(result.current.waitingForHistory).toBe(false);
+  });
+});
--- a/apps/client/src/features/ai-chat/hooks/use-chat-session.ts
+++ b/apps/client/src/features/ai-chat/hooks/use-chat-session.ts
@@ -0,0 +1,322 @@
+import { useCallback, useEffect, useReducer, useRef } from "react";
+import { generateId } from "ai";
+import {
+  resolveAdoptedChatId,
+  newlyAddedChatIds,
+} from "@/features/ai-chat/utils/adopt-chat-id.ts";
+import {
+  newThread,
+  switchThread,
+  threadSessionReducer,
+} from "@/features/ai-chat/utils/thread-identity.ts";
+
+/** Inputs to {@link useChatSession}. `activeChatId`/`setActiveChatId` are the
+ *  public selection atom (also written from outside the window, e.g. page
+ *  history); the rest is read-only context the hook needs. */
+export interface UseChatSessionOptions {
+  activeChatId: string | null;
+  setActiveChatId: (id: string | null) => void;
+  chats: { items?: { id: string }[] } | undefined;
+  messagesLoading: boolean;
+  /** Wraps queryClient.invalidateQueries(AI_CHATS_RQ_KEY). */
+  onInvalidateChatList: () => void;
+  /** Wraps the per-chat messages invalidation. */
+  onInvalidateChatMessages: (chatId: string) => void;
+}
+
+/** What the window needs from a chat session: the ChatThread mount key, the
+ *  history-loader gate, and the turn-finished callback. */
+export interface UseChatSessionResult {
+  /** ChatThread mount key (was `thread.key`). */
+  threadKey: string;
+  /** Show the history loader instead of the live thread. */
+  waitingForHistory: boolean;
+  /** Force a brand-new, empty thread (new mount key, no chat id) UNCONDITIONALLY,
+   *  even when `activeChatId` is unchanged. The window calls this from
+   *  startNewChat so "New chat" pressed WHILE a brand-new chat's first turn is
+   *  still streaming (activeChatId still null, nothing to diverge) actually
+   *  resets the chat instead of only dropping the role badge (#161). */
+  startFreshThread: () => void;
+  /** Call when a turn finishes; `serverChatId` is the authoritative streamed id
+   *  (undefined on a failed turn). `finishingThreadKey` is the mount key of the
+   *  thread that produced the turn (omit => "current thread", back-compatible):
+   *  a turn ABANDONED by New chat mid-stream still fires this after its thread
+   *  unmounted, so adoption is gated to the still-mounted thread (#161). Handles
+   *  new-chat id adoption + invalidations. */
+  onTurnFinished: (serverChatId?: string, finishingThreadKey?: string) => void;
+  /** Call EARLY (at the stream's `start` chunk) with the authoritative streamed
+   *  chat id so a brand-new chat adopts its real id WHILE its first turn is still
+   *  streaming — making `activeChatId`-gated affordances (e.g. the Copy/export
+   *  button, #174) available immediately. In-place adoption only (same mount key,
+   *  no list/messages invalidation — that is left to onTurnFinished at the end).
+   *  Idempotent and a no-op once the chat already has an id. */
+  onServerChatId: (serverChatId?: string) => void;
+  /** Disarm any pending error-path new-chat fallback. The window calls this from
+   *  startNewChat/selectChat so a late refetch can't yank the user back into a
+   *  just-failed chat after they explicitly moved on. */
+  cancelPendingAdoption: () => void;
+}
+
+/** Project a chat list to its id array (the before/after snapshot for the
+ *  error-path fallback). */
+function chatIdSnapshot(
+  chats: { items?: { id: string }[] } | undefined,
+): string[] {
+  return chats?.items?.map((c) => c.id) ?? [];
+}
+
+/**
+ * Owns the AI-chat thread-identity lifecycle: the single atomic thread identity,
+ * both new-chat id adoption paths (primary streamed-metadata + bounded error-path
+ * fallback), the history-loaded latch, and the render-phase reconciler that keeps
+ * the thread's mount key in sync with the public `activeChatId` atom.
+ *
+ * This is the twice-bugged area for the #137 two-tab adoption race; the canonical
+ * explanation of the adoption design lives in adopt-chat-id.ts.
+ */
+export function useChatSession(
+  params: UseChatSessionOptions,
+): UseChatSessionResult {
+  const {
+    activeChatId,
+    setActiveChatId,
+    chats,
+    messagesLoading,
+    onInvalidateChatList,
+    onInvalidateChatMessages,
+  } = params;
+
+  // Live mirror of `activeChatId`, read by onTurnFinished. ai@6 fires both
+  // onFinish AND onError on a failed turn, so onTurnFinished can run twice in one
+  // turn (once with the streamed id, once without) BEFORE a re-render. Reading
+  // the ref — which the primary-adoption branch updates imperatively — makes that
+  // second call see the just-adopted id, so it cannot re-arm the fallback. (A
+  // plain closure over `activeChatId` would still read null on the second call.)
+  const activeChatIdRef = useRef(activeChatId);
+  activeChatIdRef.current = activeChatId;
+
+  // The mounted thread's identity: ONE atomic value tying ChatThread's mount key
+  // (`thread.key`) to the chat id that mounted thread holds (`thread.chatId`).
+  // Consolidating these makes the "key vs chat id diverged" state unrepresentable
+  // — every change goes through an explicit transition (see thread-identity.ts):
+  // `newThread`/`switchThread` to (re)mount, `adoptThread` for in-place adoption.
+  // Initial: a non-null activeChatId switches to it; a null one gets a fresh
+  // session key with no chat id yet.
+  const [thread, dispatch] = useReducer(threadSessionReducer, undefined, () =>
+    activeChatId === null
+      ? newThread(`new-${generateId()}`)
+      : switchThread(activeChatId),
+  );
+
+  // Live mirror of the mounted thread's mount key, read by onTurnFinished to tell
+  // the CURRENT thread from one ABANDONED by New chat mid-stream. @ai-sdk/react
+  // does not abort a stream on unmount and proxies callbacks through a ref, so an
+  // abandoned turn's onFinish/onError still fires AFTER its ChatThread unmounted;
+  // matching its key against this ref keeps that late finish from adopting the
+  // abandoned chat and yanking the user out of the fresh chat they opened (#161).
+  const threadKeyRef = useRef(thread.key);
+  threadKeyRef.current = thread.key;
+
+  // Error-path fallback for new-chat id adoption. When a brand-new chat's first
+  // turn errors BEFORE the server's `start` chunk, no authoritative chatId ever
+  // reaches the client, so the primary metadata adoption cannot run. We then ARM
+  // this ref with a snapshot of the currently-known chat ids; once the list
+  // refetch lands with the just-created row, the fallback effect below adopts the
+  // SINGLE newly-appeared id. `null` = not armed. See adopt-chat-id.ts (#137).
+  const pendingNewChatRef = useRef<string[] | null>(null);
+
+  // Latch: the chat id whose full persisted history has finished loading while
+  // its thread is mounted. Used so a later BACKGROUND refetch (the post-turn
+  // messages invalidation) never tears the live thread back down to the loader.
+  const historyLoadedKeyRef = useRef<string | null>(null);
+
+  // After a turn finishes, refresh the chat list. For a brand-new chat (no id
+  // yet) we adopt the server's AUTHORITATIVE streamed id (never the newest in the
+  // list, which races a second tab — #137; see adopt-chat-id.ts).
+  const onTurnFinished = useCallback(
+    (serverChatId?: string, finishingThreadKey?: string) => {
+      // Thread-aware guard (#161). A turn ABANDONED by "New chat" mid-stream still
+      // fires onFinish/onError after its ChatThread unmounted (@ai-sdk/react does
+      // not abort on unmount and proxies callbacks through a ref). If that late
+      // finish ran the adoption path it would set activeChatId to the abandoned
+      // chat's real id and yank the user out of the fresh chat they just opened.
+      // So adopt / arm the fallback ONLY for the still-mounted thread; an
+      // abandoned one merely refreshes the chat list (so the left-behind chat
+      // surfaces in history) and does nothing else. A missing key (undefined)
+      // means "current thread" — keeps old call sites/tests working.
+      if (
+        finishingThreadKey !== undefined &&
+        finishingThreadKey !== threadKeyRef.current
+      ) {
+        onInvalidateChatList();
+        return;
+      }
+      // Read the live id from the ref, not the closure: on a failed turn this can
+      // run twice in one turn (onFinish + onError) before any re-render, and the
+      // primary branch below updates the ref so the second call sees the adopted id.
+      const current = activeChatIdRef.current;
+      const adopted = resolveAdoptedChatId(current, serverChatId);
+      if (adopted) {
+        // PRIMARY path. In-place adoption: set the public selection and the
+        // thread identity to the real id together. `adopt` keeps the SAME mount
+        // key, so the render-phase reconciler sees `activeChatId === thread.chatId`
+        // and keeps the SAME mounted thread (its useChat already holds the
+        // just-finished turn) instead of remounting + re-seeding from
+        // not-yet-persisted history.
+        activeChatIdRef.current = adopted; // a same-turn 2nd call now sees the id
+        setActiveChatId(adopted);
+        dispatch({ type: "adopt", chatId: adopted });
+        // Primary adoption won — disarm any previously-armed fallback.
+        pendingNewChatRef.current = null;
+      } else if (current === null) {
+        // FALLBACK path: a brand-new chat finished with NO server id (the first
+        // turn errored before the `start` chunk). Arm the bounded list-refetch
+        // fallback by snapshotting the currently-known chat ids. `chats` is still
+        // the pre-refetch list here, so the just-created row is NOT yet in it; the
+        // effect below adopts the single id that newly appears after the refetch.
+        pendingNewChatRef.current = chatIdSnapshot(chats);
+      }
+      onInvalidateChatList();
+      // Re-sync the persisted message rows for the active chat so the Markdown
+      // export and token counters reflect the just-finished turn. The live thread
+      // renders from its own useChat store (stable thread.key), so this never
+      // re-seeds or tears down the open thread. For a brand-new chat `current` is
+      // still null here; later turns hit this with the adopted id.
+      if (current) {
+        onInvalidateChatMessages(current);
+      }
+    },
+    [chats, setActiveChatId, onInvalidateChatList, onInvalidateChatMessages],
+  );
+
+  // EARLY adoption (#174): adopt the authoritative streamed chat id the moment
+  // the server emits it on the `start` chunk, so a brand-new chat gets its real
+  // `activeChatId` WHILE its first turn streams — not only at terminal
+  // onTurnFinished. This makes the activeChatId-gated Copy/export button
+  // available during the first turn. Pure in-place adoption (same mount key, like
+  // the primary path) with NO invalidation: the list/messages refresh stays on
+  // onTurnFinished at the end of the turn. Reads the live id from the ref so a
+  // repeat call after adoption is a no-op (resolveAdoptedChatId only fires for a
+  // still-new chat).
+  const onServerChatId = useCallback(
+    (serverChatId?: string) => {
+      const adopted = resolveAdoptedChatId(
+        activeChatIdRef.current,
+        serverChatId,
+      );
+      if (!adopted) return;
+      activeChatIdRef.current = adopted;
+      setActiveChatId(adopted);
+      dispatch({ type: "adopt", chatId: adopted });
+      // Early adoption beat the error-path fallback to it — disarm.
+      pendingNewChatRef.current = null;
+    },
+    [setActiveChatId],
+  );
+
+  // FALLBACK resolver. Armed only by onTurnFinished when a brand-new chat's first
+  // turn errored before the `start` chunk (no authoritative id streamed). Once
+  // the per-user list refetch lands with the just-created row, adopt the SINGLE
+  // id that newly appeared relative to the pre-refetch snapshot. Adoption is IN
+  // PLACE (set activeChatId + `adopt` together) like the primary path, so the
+  // render-phase reconciler does not remount.
+  useEffect(() => {
+    const before = pendingNewChatRef.current;
+    if (before === null || activeChatId !== null) return; // not armed / already adopted
+    const after = chatIdSnapshot(chats);
+    const added = newlyAddedChatIds(before, after);
+    // Keep waiting until a genuinely-new id appears. Set-based, so it is robust
+    // to an add+delete in the same window (a length compare would miss it), and
+    // it deliberately keeps waiting through an unrelated deletion (no new id yet)
+    // until the just-created row actually lands, rather than giving up early.
+    if (added.size === 0) return; // list not refetched yet — keep waiting
+    pendingNewChatRef.current = null; // resolved — disarm
+    if (added.size === 1) {
+      // single unambiguous new id; >1 = ambiguous → give up
+      const adopted = [...added][0];
+      setActiveChatId(adopted);
+      dispatch({ type: "adopt", chatId: adopted });
+    }
+  }, [chats, activeChatId, setActiveChatId]);
+
+  // Reconcile the thread identity against the active-chat atom during render when
+  // they diverge — the React-sanctioned alternative to an effect (re-renders
+  // before paint, no extra commit, and converges since the next render finds them
+  // equal). This reconciliation MUST remain: `activeChatId` is the public
+  // selection and is ALSO set from OUTSIDE this component (e.g. page-history opens
+  // a referenced chat via setActiveChatId). A divergence here is a genuine SWITCH
+  // (external atom change OR user switch via selectChat/startNewChat), so
+  // `reconcile` remounts + reseeds. In-place adoption never reaches this branch:
+  // it set activeChatId and thread.chatId to the same value.
+  if (activeChatId !== thread.chatId) {
+    // A genuine switch makes any pending error-path new-chat fallback moot.
+    pendingNewChatRef.current = null;
+    dispatch({
+      type: "reconcile",
+      chatId: activeChatId,
+      newKey: `new-${generateId()}`,
+    });
+  }
+
+  // Latch the active chat once its full history has loaded and its thread is
+  // mounted, so a later background refetch (the post-turn messages invalidation,
+  // which can transiently flip hasNextPage for a chat whose message count is an
+  // exact multiple of the server page size) does not tear the live thread down to
+  // a loader and lose its in-progress useChat state.
+  if (
+    activeChatId !== null &&
+    thread.key === activeChatId &&
+    !messagesLoading &&
+    historyLoadedKeyRef.current !== activeChatId
+  ) {
+    historyLoadedKeyRef.current = activeChatId;
+  }
+
+  // Show the history loader only when freshly OPENING an existing chat (the key
+  // equals the chat id) whose history has not been fully loaded yet. For a live
+  // in-place thread that adopted its id, the key is still the "new-…" session
+  // key, so the live thread keeps rendering; and once a chat's history has loaded,
+  // a later background refetch no longer tears it down (see the latch above).
+  const waitingForHistory =
+    activeChatId !== null &&
+    messagesLoading &&
+    thread.key === activeChatId &&
+    historyLoadedKeyRef.current !== activeChatId;
+
+  // Explicit disarm for startNewChat/selectChat. The render-phase reconciler only
+  // disarms when activeChatId actually changes, but "New chat" pressed while the
+  // user is ALREADY in a new chat is a no-op for the atom (activeChatId stays
+  // null), so the reconciler never fires — without this an armed fallback could
+  // adopt the just-failed chat from a late refetch and yank the user out of their
+  // fresh chat. Stable identity (writes a ref).
+  const cancelPendingAdoption = useCallback(() => {
+    pendingNewChatRef.current = null;
+  }, []);
+
+  // Force a fresh, empty thread regardless of `activeChatId` (#161). The render-
+  // phase reconciler only remounts when activeChatId diverges from thread.chatId,
+  // so "New chat" pressed while a brand-new chat's first turn is still streaming
+  // (activeChatId AND thread.chatId both null — the real id is adopted only at the
+  // end of the turn) is a no-op for it and the abandoned thread/stream/history
+  // would persist. Dispatching reconcile with a fresh key and chatId:null here
+  // always produces a new mount key, so React remounts ChatThread (a clean useChat
+  // store) and the post-dispatch state (activeChatId null === thread.chatId null)
+  // keeps the reconciler from interfering. Also disarms any pending fallback.
+  const startFreshThread = useCallback(() => {
+    pendingNewChatRef.current = null;
+    dispatch({
+      type: "reconcile",
+      chatId: null,
+      newKey: `new-${generateId()}`,
+    });
+  }, []);
+
+  return {
+    threadKey: thread.key,
+    waitingForHistory,
+    startFreshThread,
+    onTurnFinished,
+    onServerChatId,
+    cancelPendingAdoption,
+  };
+}
--- a/apps/client/src/features/ai-chat/queries/ai-chat-query.ts
+++ b/apps/client/src/features/ai-chat/queries/ai-chat-query.ts
@@ -4,7 +4,7 @@ import {
  useQuery,
  useQueryClient,
 } from "@tanstack/react-query";
-import { useMemo } from "react";
+import { useEffect, useMemo } from "react";
 import { useTranslation } from "react-i18next";
 import { notifications } from "@mantine/notifications";
 import {
@@ -75,6 +75,31 @@ export function useAiChatMessagesQuery(chatId: string | undefined) {
    enabled: !!chatId,
  });

+  // useInfiniteQuery only fetches the first page on its own. The hook's contract
+  // (and both the Markdown export and the model-history seed) require the
+  // COMPLETE thread, so keep pulling subsequent pages until the server reports
+  // none remain. The isFetchingNextPage guard issues one request at a time;
+  // when chatId is undefined the query is disabled and hasNextPage is false, so
+  // this is a no-op. The isFetchNextPageError guard is critical: the app sets a
+  // global `retry: false`, so a rejected fetchNextPage leaves hasNextPage true
+  // and isFetchingNextPage false — without this guard the effect would re-fire
+  // immediately and hammer the endpoint in a tight loop. isFetchNextPageError
+  // latches the last next-page failure and clears once a fetch succeeds.
+  useEffect(() => {
+    if (
+      query.hasNextPage &&
+      !query.isFetchingNextPage &&
+      !query.isFetchNextPageError
+    ) {
+      void query.fetchNextPage();
+    }
+  }, [
+    query.hasNextPage,
+    query.isFetchingNextPage,
+    query.isFetchNextPageError,
+    query.fetchNextPage,
+  ]);
+
  const data = useMemo<IAiChatMessageRow[] | undefined>(() => {
    if (!query.data) return undefined;
    return query.data.pages.flatMap((p) => p.items);
--- a/apps/client/src/features/ai-chat/services/ai-chat-service.ts
+++ b/apps/client/src/features/ai-chat/services/ai-chat-service.ts
@@ -50,6 +50,24 @@ export async function deleteAiChat(chatId: string): Promise<void> {
  await api.post("/ai-chat/delete", { chatId });
 }

+/**
+ * Export a chat to Markdown (#183). The server renders the transcript from the
+ * persisted rows (the DB is the single source of truth — including an
+ * interrupted turn's in-progress row, persisted upfront + per step), so the
+ * client just copies the returned string. `lang` localizes the few fixed
+ * role/tool labels; defaults to English server-side when omitted.
+ */
+export async function exportAiChat(
+  chatId: string,
+  lang?: string,
+): Promise<string> {
+  const req = await api.post<{ markdown: string }>("/ai-chat/export", {
+    chatId,
+    lang,
+  });
+  return req.data.markdown;
+}
+
 /**
 * Agent roles API (`/ai-chat/roles`). `list` is available to any workspace
 * member (for the chat-creation picker); create/update/delete are admin-only
@@ -76,6 +94,8 @@ export async function updateAiRole(data: IAiRoleUpdate): Promise<IAiRole> {

 /** Soft-delete a role (admin). */
 export async function deleteAiRole(id: string): Promise<{ success: true }> {
-  const req = await api.post<{ success: true }>("/ai-chat/roles/delete", { id });
+  const req = await api.post<{ success: true }>("/ai-chat/roles/delete", {
+    id,
+  });
  return req.data;
 }
--- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts
+++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts
@@ -19,6 +19,12 @@ export interface IAiChat {
  // Null when the chat has no role or the role was soft-deleted.
  roleName?: string | null;
  roleEmoji?: string | null;
+  // The document the chat was created in (ai_chats.page_id). Null when started
+  // outside any document.
+  pageId?: string | null;
+  // Denormalized via a JOIN in the chat list response: the origin page's title.
+  // Null when there is no origin page (or it was hard-deleted).
+  pageTitle?: string | null;
 }

 /** Supported model drivers (mirrors the server `AI_DRIVERS`). */
@@ -47,6 +53,10 @@ export interface IAiRole {
  instructions?: string;
  modelConfig?: IAiRoleModelConfig | null;
  enabled: boolean;
+  // Whether picking the role auto-sends a launch message and starts the chat.
+  autoStart: boolean;
+  // Custom auto-start text; null/empty => the default launch message is sent.
+  launchMessage: string | null;
  createdAt?: string;
  updatedAt?: string;
 }
@@ -59,6 +69,8 @@ export interface IAiRoleCreate {
  instructions: string;
  modelConfig?: IAiRoleModelConfig | null;
  enabled?: boolean;
+  autoStart?: boolean;
+  launchMessage?: string;
 }

 /** Admin update payload for a role (partial). */
@@ -70,6 +82,8 @@ export interface IAiRoleUpdate {
  instructions?: string;
  modelConfig?: IAiRoleModelConfig | null;
  enabled?: boolean;
+  autoStart?: boolean;
+  launchMessage?: string;
 }

 /**
@@ -92,15 +106,27 @@ export interface IAiChatMessageRow {
      inputTokens?: number;
      outputTokens?: number;
      totalTokens?: number;
+      // Reasoning (thinking) tokens, when the provider reports them. Optional so
+      // old history rows (recorded before this shipped) stay valid. Included in
+      // `outputTokens` per the AI SDK usage shape.
+      reasoningTokens?: number;
    };
    // Current context size for the turn = final-step (input+output) tokens, i.e.
    // how much the conversation occupies in the model's context window after this
    // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
    // floating window's header badge.
    contextTokens?: number;
+    // The model's max context window (denominator for the header badge); set
+    // alongside contextTokens on a completed turn; absent on older rows.
+    maxContextTokens?: number;
    // Set on an assistant row whose turn ended in a provider/stream error; the
    // raw provider error text (e.g. "402: ...") for inline display in the thread.
    error?: string;
+    // Terminal outcome of the assistant turn: 'error' (provider/stream error,
+    // paired with `error`), 'aborted' (client disconnect — a manual Stop or a
+    // dropped connection), or the SDK's finish reason on a clean turn. The UI
+    // renders a "stopped" marker on interrupted turns.
+    finishReason?: string;
  } | null;
  createdAt: string;
 }
--- a/apps/client/src/features/ai-chat/utils/adopt-chat-id.test.ts
+++ b/apps/client/src/features/ai-chat/utils/adopt-chat-id.test.ts
@@ -0,0 +1,72 @@
+import { describe, it, expect } from "vitest";
+import {
+  resolveAdoptedChatId,
+  newlyAddedChatIds,
+  extractServerChatId,
+} from "./adopt-chat-id";
+
+describe("resolveAdoptedChatId", () => {
+  it("adopts the server id for a brand-new chat (activeChatId null + id)", () => {
+    expect(resolveAdoptedChatId(null, "chat-1")).toBe("chat-1");
+  });
+
+  it("returns null for an existing chat even with a server id", () => {
+    expect(resolveAdoptedChatId("chat-existing", "chat-1")).toBeNull();
+  });
+
+  it("returns null for a new chat with no server id", () => {
+    expect(resolveAdoptedChatId(null, undefined)).toBeNull();
+    expect(resolveAdoptedChatId(null, null)).toBeNull();
+  });
+});
+
+describe("newlyAddedChatIds", () => {
+  it("returns the single new id", () => {
+    expect([...newlyAddedChatIds(["a", "b"], ["a", "b", "c"])]).toEqual(["c"]);
+  });
+
+  it("returns an empty set when nothing was added", () => {
+    expect(newlyAddedChatIds(["a", "b"], ["b", "a"]).size).toBe(0);
+  });
+
+  it("returns both new ids when two were added", () => {
+    expect(newlyAddedChatIds(["a"], ["a", "b", "c"])).toEqual(
+      new Set(["b", "c"]),
+    );
+  });
+
+  it("keeps only the new id across an add+delete in the same window", () => {
+    // before [a,b] -> after [b,new]: a was deleted, new was added.
+    expect([...newlyAddedChatIds(["a", "b"], ["b", "new"])]).toEqual(["new"]);
+  });
+
+  it("dedupes a repeated new id to a single entry", () => {
+    expect(newlyAddedChatIds(["a"], ["a", "new", "new"])).toEqual(
+      new Set(["new"]),
+    );
+  });
+});
+
+describe("extractServerChatId", () => {
+  it("returns the chatId when present on metadata", () => {
+    expect(extractServerChatId({ metadata: { chatId: "chat-1" } })).toBe(
+      "chat-1",
+    );
+  });
+
+  it("returns undefined when the message has no metadata", () => {
+    expect(extractServerChatId({})).toBeUndefined();
+  });
+
+  it("returns undefined when metadata lacks chatId", () => {
+    expect(extractServerChatId({ metadata: { other: 1 } })).toBeUndefined();
+  });
+
+  it("returns undefined for a non-string chatId", () => {
+    expect(extractServerChatId({ metadata: { chatId: 42 } })).toBeUndefined();
+  });
+
+  it("returns undefined for an undefined message", () => {
+    expect(extractServerChatId(undefined)).toBeUndefined();
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/adopt-chat-id.ts
+++ b/apps/client/src/features/ai-chat/utils/adopt-chat-id.ts
@@ -0,0 +1,70 @@
+/**
+ * Pure helpers for adopting a brand-new chat's authoritative server id.
+ *
+ * ============================ CANONICAL #137 NOTE ============================
+ * This docblock is the single authoritative explanation of the new-chat id
+ * adoption design and the #137 two-tab race it fixes. Other call sites
+ * (use-chat-session.ts, the server's `chatStreamMetadata`) reference here
+ * rather than restating it.
+ *
+ * When a user sends the first turn of a BRAND-NEW chat, the client has no chat
+ * id yet (`activeChatId === null`). The server creates the row and the client
+ * must "adopt" that row's real id so the SECOND turn targets the same chat.
+ *
+ * The OLD heuristic adopted `items[0]` — the newest chat in the refetched list.
+ * That races a second tab: if another tab created a chat in the same moment,
+ * its row could be `items[0]`, so this tab would adopt the SIBLING chat and
+ * leak its later turns into it (#137). We adopt by IDENTITY instead, two ways:
+ *
+ * PRIMARY path: the server streams the real chat id on the assistant message
+ * metadata's `start` part (see `chatStreamMetadata` server-side);
+ * `extractServerChatId` reads it off the finished message and
+ * `resolveAdoptedChatId` turns it into the id to adopt for a new chat. This is
+ * authoritative and immune to the race.
+ *
+ * FALLBACK path (only when a new chat's first turn errors BEFORE the `start`
+ * chunk, so no metadata id ever reached the client): adopt the single chat that
+ * NEWLY appeared in the per-user list relative to a pre-refetch snapshot —
+ * `newlyAddedChatIds` (the fallback effect adopts only when exactly one id is
+ * new). This is unambiguous and does not race a second tab the way the old
+ * "newest chat in the list" guess did.
+ * ============================================================================
+ */
+
+/**
+ * Resolve the id to adopt from the server-streamed metadata. Returns
+ * `serverChatId` only for a brand-new chat (`activeChatId === null`) that
+ * received a truthy id; otherwise null (existing chat, or no id streamed).
+ */
+export function resolveAdoptedChatId(
+  activeChatId: string | null,
+  serverChatId: string | null | undefined,
+): string | null {
+  return activeChatId === null && serverChatId ? serverChatId : null;
+}
+
+/**
+ * Read the authoritative server chat id off a finished assistant message. The
+ * server attaches it as `message.metadata.chatId` on the `start` part (see
+ * `chatStreamMetadata`). Returns it only when it is a string; undefined for
+ * a missing message, missing metadata, or a non-string `chatId`.
+ */
+export function extractServerChatId(
+  message: { metadata?: unknown } | undefined,
+): string | undefined {
+  const m = message?.metadata as { chatId?: string } | undefined;
+  return typeof m?.chatId === "string" ? m.chatId : undefined;
+}
+
+/**
+ * The deduped set of ids present in `afterIds` but not in `beforeIds`. A
+ * paginated/flatMapped list can repeat the same id, so dedupe: one genuinely-new
+ * chat must not read as multiple from a duplicate.
+ */
+export function newlyAddedChatIds(
+  beforeIds: readonly string[],
+  afterIds: readonly string[],
+): Set<string> {
+  const before = new Set(beforeIds);
+  return new Set(afterIds.filter((id) => !before.has(id)));
+}
--- a/apps/client/src/features/ai-chat/utils/chat-markdown.ts
+++ b/apps/client/src/features/ai-chat/utils/chat-markdown.ts
@@ -1,165 +0,0 @@
-/**
- * Client-only Markdown builder for an AI agent chat. Serializes the already
- * persisted message rows (loaded via `useAiChatMessagesQuery`) into a single
- * Markdown string suitable for copying to the clipboard. NO network call is
- * made and NO server/DB code is touched — this reuses the rich "request
- * internals" (tool calls with input/output, per-message token usage,
- * finish/error info) that the chat already holds client-side.
- *
- * Only role labels and tool action labels are localized via the passed-in `t`
- * translator; the structural document words (Input/Output/Error/Tokens/...) are
- * plain English constants because the output is a technical artifact.
- */
-
-import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
-import {
-  ToolUiPart,
-  getToolName,
-  toolRunState,
-  toolLabelKey,
-} from "@/features/ai-chat/utils/tool-parts.tsx";
-
-// Minimal translator signature compatible with react-i18next's `t`.
-type Translate = (key: string, values?: Record<string, unknown>) => string;
-
-interface BuildChatMarkdownArgs {
-  title: string | null;
-  chatId: string;
-  rows: IAiChatMessageRow[];
-  t: Translate;
-}
-
-/** A single AI SDK UIMessage part (text part or other). */
-interface TextLikePart {
-  type: string;
-  text?: string;
-}
-
-/**
- * Stringify an arbitrary tool input/output value for a fenced block. Strings
- * pass through as-is; everything else is pretty-printed JSON, falling back to
- * `String(value)` if serialization throws (e.g. a circular structure).
- */
-function stringify(value: unknown): string {
-  if (typeof value === "string") return value;
-  try {
-    return JSON.stringify(value, null, 2);
-  } catch {
-    return String(value);
-  }
-}
-
-/**
- * Wrap `code` in a fenced code block whose backtick delimiter is LONGER than
- * the longest backtick run inside the content, so embedded backticks (or even
- * a literal ``` fence) never break out of the block. Minimum 3 backticks.
- */
-function fence(code: string, lang = ""): string {
-  const runs: string[] = code.match(/`+/g) ?? [];
-  const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
-  const delim = "`".repeat(Math.max(3, longest + 1));
-  return `${delim}${lang}\n${code}\n${delim}`;
-}
-
-/** Per-row token count, mirroring the header sum in ai-chat-window.tsx. */
-function rowTokens(usage: {
-  inputTokens?: number;
-  outputTokens?: number;
-  totalTokens?: number;
-}): number {
-  return (
-    usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
-  );
-}
-
-/**
- * Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the
- * export timestamp), so it is straightforward to unit-test.
- */
-export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
-  const { title, chatId, rows, t } = args;
-  const blocks: string[] = [];
-
-  const heading = (title ?? "").trim() || t("Untitled chat");
-  blocks.push(`# ${heading}`);
-
-  // Metadata bullet list. Total tokens is only shown when there is a sum.
-  const totalTokens = rows.reduce((sum, row) => {
-    const usage = row.metadata?.usage;
-    return usage ? sum + rowTokens(usage) : sum;
-  }, 0);
-  const meta = [
-    `- Chat ID: \`${chatId}\``,
-    `- Exported: ${new Date().toISOString()}`,
-    `- Messages: ${rows.length}`,
-  ];
-  if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
-  blocks.push(meta.join("\n"));
-
-  rows.forEach((row, index) => {
-    blocks.push("---");
-
-    const roleLabel = row.role === "assistant" ? t("AI agent") : t("You");
-    blocks.push(`## ${index + 1}. ${roleLabel}`);
-
-    // Created-at kept in source as an HTML comment (out of the rendered prose).
-    blocks.push(`<!-- ${row.createdAt} -->`);
-
-    // Resolve parts: prefer the rich persisted parts, else a single text part
-    // built from the plain-text content (mirrors `rowToUiMessage`).
-    const parts: TextLikePart[] =
-      Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
-        ? (row.metadata.parts as TextLikePart[])
-        : [{ type: "text", text: row.content ?? "" }];
-
-    for (const part of parts) {
-      if (part.type === "text") {
-        const text = (part.text ?? "").trim();
-        // Skip empty/whitespace-only text parts (matches MessageItem).
-        if (text.length > 0) blocks.push(text);
-        continue;
-      }
-
-      const isToolPart =
-        part.type.startsWith("tool-") || part.type === "dynamic-tool";
-      if (!isToolPart) continue;
-
-      const tp = part as unknown as ToolUiPart;
-      const name = getToolName(tp);
-      const { key, values } = toolLabelKey(name);
-      const label = t(key, values);
-      const state = toolRunState(tp.state);
-
-      const toolLines: string[] = [
-        `**Tool: ${label}** (\`${name}\`) — ${state}`,
-      ];
-      if (tp.input !== undefined) {
-        toolLines.push("Input:");
-        toolLines.push(fence(stringify(tp.input), "json"));
-      }
-      if (tp.output !== undefined) {
-        toolLines.push("Output:");
-        toolLines.push(fence(stringify(tp.output), "json"));
-      }
-      if (tp.errorText) {
-        toolLines.push(`**Error:** ${tp.errorText}`);
-      }
-      blocks.push(toolLines.join("\n\n"));
-    }
-
-    if (row.metadata?.error) {
-      blocks.push(`**⚠️ Error:** ${row.metadata.error}`);
-    }
-
-    const usage = row.metadata?.usage;
-    if (usage) {
-      const total = usage.totalTokens ?? rowTokens(usage);
-      blocks.push(
-        `_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}, total: ${total}_`,
-      );
-    }
-  });
-
-  // Blank line between blocks so the Markdown renders cleanly.
-  return blocks.join("\n\n");
-}
--- a/apps/client/src/features/ai-chat/utils/collapse-blank-lines.test.ts
+++ b/apps/client/src/features/ai-chat/utils/collapse-blank-lines.test.ts
@@ -0,0 +1,61 @@
+import { describe, it, expect } from "vitest";
+import { collapseBlankLines } from "@/features/ai-chat/utils/collapse-blank-lines.ts";
+import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
+
+describe("collapseBlankLines", () => {
+  it("collapses a run of 2+ newlines to a single newline", () => {
+    expect(collapseBlankLines("a\n\nb")).toBe("a\nb");
+    expect(collapseBlankLines("a\n\n\n\nb")).toBe("a\nb");
+  });
+
+  it("keeps single newlines untouched", () => {
+    expect(collapseBlankLines("a\nb\nc")).toBe("a\nb\nc");
+  });
+
+  it("preserves blank lines INSIDE a fenced code block", () => {
+    const src = "a\n\n\nb\n\n```\nx\n\n\ny\n```\n\nc";
+    // Prose blanks collapse; the blank lines between the ``` fences survive.
+    expect(collapseBlankLines(src)).toBe("a\nb\n```\nx\n\n\ny\n```\nc");
+  });
+
+  it("handles a tilde fence and preserves its interior blanks", () => {
+    const src = "p\n\n~~~\ncode\n\nmore\n~~~\n\nq";
+    expect(collapseBlankLines(src)).toBe("p\n~~~\ncode\n\nmore\n~~~\nq");
+  });
+
+  it("leaves an unclosed fence's remaining lines verbatim", () => {
+    const src = "intro\n\n```\nstill\n\nopen";
+    expect(collapseBlankLines(src)).toBe("intro\n```\nstill\n\nopen");
+  });
+
+  it("is a no-op for text with no blank lines", () => {
+    expect(collapseBlankLines("just one line")).toBe("just one line");
+  });
+});
+
+describe("collapseBlankLines + renderChatMarkdown (tight reasoning rendering)", () => {
+  it("renders a blank-line-separated list as a TIGHT list (no <li><p>)", () => {
+    const loose =
+      "Intro paragraph.\n\n- item one\n\n- item two\n\n- item three";
+    const html = renderChatMarkdown(collapseBlankLines(loose), {});
+    // Tight list: each <li> holds the text directly, not wrapped in a <p>.
+    expect(html).toContain("<li>item one</li>");
+    expect(html).not.toContain("<li><p>");
+    // The list still parses as a list after the paragraph (not a paragraph+<br>).
+    expect(html).toContain("<ul>");
+    expect(html).toContain("<p>Intro paragraph.</p>");
+  });
+
+  it("renders an ordered list (1. 2.) as tight after collapsing", () => {
+    const loose = "Intro.\n\n1. first\n\n2. second";
+    const html = renderChatMarkdown(collapseBlankLines(loose), {});
+    expect(html).toContain("<ol>");
+    expect(html).toContain("<li>first</li>");
+    expect(html).not.toContain("<li><p>");
+  });
+
+  it("the loose source WOULD render <li><p> without collapsing (control)", () => {
+    const loose = "- a\n\n- b";
+    expect(renderChatMarkdown(loose, {})).toContain("<li><p>");
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/collapse-blank-lines.ts
+++ b/apps/client/src/features/ai-chat/utils/collapse-blank-lines.ts
@@ -0,0 +1,56 @@
+// Pure helper for compact reasoning ("Thinking") rendering. Kept free of React
+// so it can be unit-tested in isolation (see collapse-blank-lines.test.ts).
+
+/**
+ * Collapse runs of 2+ newlines down to a single newline, EXCEPT inside fenced
+ * code blocks (``` ... ``` or ~~~ ... ~~~), where blank lines are significant.
+ *
+ * Why: reasoning models emit thinking with a blank line (`\n\n`) between every
+ * list item and paragraph. `marked` turns those into "loose" lists (each `<li>`
+ * wrapped in a `<p>`) and separate `<p>` paragraphs, each carrying a vertical
+ * margin — so the "Thinking" block renders with large, airy gaps. Removing the
+ * blank-line gaps yields tight lists (no `<li><p>`) and joined paragraphs. The
+ * chat markdown renderer runs with `breaks: true`, so a single `\n` still
+ * becomes a `<br>` — line breaks inside the reasoning are preserved; only the
+ * empty gaps between blocks disappear. Apply ONLY to reasoning text, never to a
+ * normal assistant answer (where paragraph spacing is intentional).
+ *
+ * Fenced code is preserved verbatim: a fence opens on a line whose first
+ * non-space characters are ``` or ~~~ and closes on the next line that starts
+ * with the same fence character. Blank lines between fences (significant for
+ * code formatting) are never collapsed.
+ */
+export function collapseBlankLines(text: string): string {
+  const lines = text.split("\n");
+  const out: string[] = [];
+  let inFence = false;
+  let fenceChar = "";
+
+  for (const line of lines) {
+    const fenceMatch = line.match(/^\s*(`{3,}|~{3,})/);
+    if (fenceMatch) {
+      const ch = fenceMatch[1][0];
+      if (!inFence) {
+        inFence = true;
+        fenceChar = ch;
+      } else if (ch === fenceChar) {
+        inFence = false;
+      }
+      out.push(line);
+      continue;
+    }
+
+    // Inside a fenced block every line (including blanks) is significant.
+    if (inFence) {
+      out.push(line);
+      continue;
+    }
+
+    // Outside fences: drop blank lines so a `\n\n+` gap collapses to a single
+    // `\n` between the surrounding content lines.
+    if (line.trim() === "") continue;
+    out.push(line);
+  }
+
+  return out.join("\n");
+}
--- a/apps/client/src/features/ai-chat/utils/context-badge.test.ts
+++ b/apps/client/src/features/ai-chat/utils/context-badge.test.ts
@@ -0,0 +1,90 @@
+import { describe, expect, it } from "vitest";
+import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
+import { selectContextBadge } from "@/features/ai-chat/utils/context-badge.ts";
+
+/**
+ * Pure-helper tests for the header context badge selection. Covers the two
+ * non-obvious rules: numerator and denominator are each taken from the most
+ * recent row carrying THAT value (they may live on different rows), and a fresh
+ * row with a zero/absent value must NOT shadow an older positive one.
+ */
+const row = (metadata: IAiChatMessageRow["metadata"]): IAiChatMessageRow => ({
+  id: Math.random().toString(),
+  role: "assistant",
+  content: null,
+  metadata,
+  createdAt: "2026-01-01T00:00:00.000Z",
+});
+
+describe("selectContextBadge", () => {
+  it("returns zeros for empty / nullish input", () => {
+    expect(selectContextBadge(undefined)).toEqual({
+      contextTokens: 0,
+      maxContextTokens: 0,
+    });
+    expect(selectContextBadge(null)).toEqual({
+      contextTokens: 0,
+      maxContextTokens: 0,
+    });
+    expect(selectContextBadge([])).toEqual({
+      contextTokens: 0,
+      maxContextTokens: 0,
+    });
+  });
+
+  it("reads both figures from the most recent row that carries them", () => {
+    expect(
+      selectContextBadge([
+        row({ contextTokens: 100, maxContextTokens: 200000 }),
+        row({ contextTokens: 1500, maxContextTokens: 200000 }),
+      ]),
+    ).toEqual({ contextTokens: 1500, maxContextTokens: 200000 });
+  });
+
+  it("falls back to legacy usage total for older rows without contextTokens", () => {
+    expect(
+      selectContextBadge([
+        row({ usage: { inputTokens: 30, outputTokens: 70 } }),
+      ]),
+    ).toEqual({ contextTokens: 100, maxContextTokens: 0 });
+
+    expect(
+      selectContextBadge([row({ usage: { totalTokens: 250 } })]),
+    ).toEqual({ contextTokens: 250, maxContextTokens: 0 });
+  });
+
+  it("takes numerator and denominator from different rows", () => {
+    // Freshest row (an error turn) carries contextTokens but no max; the older
+    // completed turn carries the max. Each is picked from its own latest row.
+    expect(
+      selectContextBadge([
+        row({ contextTokens: 800, maxContextTokens: 200000 }),
+        row({ contextTokens: 1200, error: "402: nope" }),
+      ]),
+    ).toEqual({ contextTokens: 1200, maxContextTokens: 200000 });
+  });
+
+  it("does not let a fresh zero/absent max shadow an older positive max", () => {
+    expect(
+      selectContextBadge([
+        row({ contextTokens: 100, maxContextTokens: 200000 }),
+        row({ contextTokens: 1200, maxContextTokens: 0 }),
+      ]),
+    ).toEqual({ contextTokens: 1200, maxContextTokens: 200000 });
+  });
+
+  it("skips rows with null metadata", () => {
+    expect(
+      selectContextBadge([
+        row({ contextTokens: 500, maxContextTokens: 200000 }),
+        row(null),
+      ]),
+    ).toEqual({ contextTokens: 500, maxContextTokens: 200000 });
+  });
+
+  it("reports current > max as-is (no clamp)", () => {
+    expect(
+      selectContextBadge([row({ contextTokens: 250000, maxContextTokens: 200000 })]),
+    ).toEqual({ contextTokens: 250000, maxContextTokens: 200000 });
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/context-badge.ts
+++ b/apps/client/src/features/ai-chat/utils/context-badge.ts
@@ -0,0 +1,49 @@
+import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
+
+/**
+ * Derive the header context badge figures from the persisted message rows.
+ *
+ * - `contextTokens` (numerator): how much the conversation now occupies in the
+ *   model's context window. Read from the most recent row carrying a context
+ *   figure — `contextTokens` (final-step input+output) on rows recorded after
+ *   this shipped, else that turn's legacy `usage` total for older rows.
+ * - `maxContextTokens` (denominator): the model's configured max window, stamped
+ *   alongside `contextTokens` on a completed turn.
+ *
+ * Each value is taken from the most recent row carrying THAT value
+ * independently — they may land on different rows (e.g. a fresh error row can
+ * carry `contextTokens` but not `maxContextTokens`), so the scan continues for
+ * whichever is still unset. `0` means "no row has it" (older rows, or no
+ * admin-configured limit); the badge then omits the value.
+ */
+export function selectContextBadge(
+  messageRows: readonly IAiChatMessageRow[] | undefined | null,
+): { contextTokens: number; maxContextTokens: number } {
+  let contextTokens = 0;
+  let maxContextTokens = 0;
+  if (!messageRows) return { contextTokens, maxContextTokens };
+  for (let i = messageRows.length - 1; i >= 0; i--) {
+    const meta = messageRows[i].metadata;
+    if (!meta) continue;
+    if (contextTokens === 0) {
+      if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
+        contextTokens = meta.contextTokens;
+      } else if (meta.usage) {
+        const usage = meta.usage;
+        const fallback =
+          usage.totalTokens ??
+          (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
+        if (fallback > 0) contextTokens = fallback;
+      }
+    }
+    if (
+      maxContextTokens === 0 &&
+      typeof meta.maxContextTokens === "number" &&
+      meta.maxContextTokens > 0
+    ) {
+      maxContextTokens = meta.maxContextTokens;
+    }
+    if (contextTokens !== 0 && maxContextTokens !== 0) break;
+  }
+  return { contextTokens, maxContextTokens };
+}
--- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts
+++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts
@@ -0,0 +1,15 @@
+import { describe, expect, it } from "vitest";
+import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
+
+describe("estimateTokens", () => {
+  it("returns 0 for the empty string", () => {
+    expect(estimateTokens("")).toBe(0);
+  });
+
+  it("ceils chars/4 so any non-empty text is at least 1 token", () => {
+    expect(estimateTokens("a")).toBe(1);
+    expect(estimateTokens("abcd")).toBe(1);
+    expect(estimateTokens("abcde")).toBe(2);
+    expect(estimateTokens("12345678")).toBe(2);
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts
+++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts
@@ -0,0 +1,19 @@
+/**
+ * Rough client-side token estimation for AI-chat UI affordances.
+ *
+ * No provider streams exact per-token usage mid-stream, so any in-flight figure
+ * is a CLIENT ESTIMATE (chars/≈4 heuristic). Pure + unit-testable: it never runs
+ * a real BPE tokenizer (that would be O(n²) on the hot path, bloat the bundle,
+ * and be wrong for Gemini/Ollama anyway). Used by the in-body reasoning counter
+ * ("Thinking · N tokens").
+ */
+
+/**
+ * Rough token estimate for a piece of text using the standard chars/≈4 heuristic.
+ * Returns 0 for empty/whitespace-free-of-content input, and ceils so any
+ * non-empty text counts as at least one token.
+ */
+export function estimateTokens(text: string): number {
+  if (!text) return 0;
+  return Math.ceil(text.length / 4);
+}
--- a/apps/client/src/features/ai-chat/utils/error-message.test.ts
+++ b/apps/client/src/features/ai-chat/utils/error-message.test.ts
@@ -6,48 +6,163 @@ import { describeChatError } from "./error-message";
 const t = (key: string) => key;

 describe("describeChatError", () => {
-  it('surfaces a provider "402: ..." stream error verbatim', () => {
-    expect(describeChatError("402: Insufficient credits", t)).toBe(
-      "402: Insufficient credits",
-    );
-  });
-
-  it('does NOT misclassify a body that merely contains "403" (no "statusCode":403)', () => {
-    // A provider message mentioning the number 403 must be surfaced verbatim,
-    // never folded into the "AI chat is disabled" gating message.
-    const msg = "429: rate limited after 403 attempts";
-    expect(describeChatError(msg, t)).toBe(msg);
-  });
-
-  it('maps a {"statusCode":403} body to the disabled message', () => {
+  it('maps a {"statusCode":403} body to the disabled heading', () => {
    const body = '{"statusCode":403,"message":"Forbidden"}';
-    expect(describeChatError(body, t)).toBe(
-      "AI chat is disabled for this workspace.",
-    );
+    expect(describeChatError(body, t)).toEqual({
+      title: "AI chat is disabled",
+      detail: "AI chat is disabled for this workspace.",
+    });
  });

-  it('maps a {"statusCode":503} body to the not-configured message', () => {
+  it('maps a {"statusCode":503} body to the not-configured heading', () => {
    const body = '{"statusCode":503,"message":"Service Unavailable"}';
-    expect(describeChatError(body, t)).toBe(
-      "The AI provider is not configured. Ask an administrator to set it up.",
+    expect(describeChatError(body, t)).toEqual({
+      title: "AI provider not configured",
+      detail:
+        "The AI provider is not configured. Ask an administrator to set it up.",
+    });
+  });
+
+  it("classifies a dropped connection (ECONNRESET) as a lost-connection error", () => {
+    expect(
+      describeChatError("Cannot connect to API: read ECONNRESET", t).title,
+    ).toBe("Lost connection to the AI provider");
+  });
+
+  it('classifies "fetch failed" as a lost-connection error', () => {
+    expect(describeChatError("fetch failed", t).title).toBe(
+      "Lost connection to the AI provider",
    );
  });

-  it('falls back to the generic message for "An error occurred."', () => {
-    expect(describeChatError("An error occurred.", t)).toBe(
-      "The AI agent could not respond. Please try again.",
+  it("classifies ETIMEDOUT as a timeout", () => {
+    expect(describeChatError("ETIMEDOUT", t).title).toBe(
+      "The AI provider timed out",
    );
  });

-  it('falls back to the generic message for "Internal server error"', () => {
-    expect(describeChatError("Internal server error", t)).toBe(
-      "The AI agent could not respond. Please try again.",
+  it('classifies "504: Gateway Timeout" as a timeout', () => {
+    expect(describeChatError("504: Gateway Timeout", t).title).toBe(
+      "The AI provider timed out",
    );
  });

-  it("falls back to the generic message for empty input", () => {
-    expect(describeChatError("", t)).toBe(
-      "The AI agent could not respond. Please try again.",
+  it('classifies "429: Too Many Requests" as rate limited', () => {
+    expect(describeChatError("429: Too Many Requests", t).title).toBe(
+      "Rate limited by the AI provider",
+    );
+  });
+
+  it('does NOT misclassify a body that merely contains "403" as disabled', () => {
+    // Regression intent: a provider message mentioning the number 403 must never
+    // be folded into the "AI chat is disabled" gating heading. Here the 429
+    // signature wins (checked before any bare-403 logic exists), so it maps to
+    // the rate-limit category instead.
+    const view = describeChatError("429: rate limited after 403 attempts", t);
+    expect(view.title).toBe("Rate limited by the AI provider");
+    expect(view.title).not.toBe("AI chat is disabled");
+  });
+
+  it("classifies a context-window overflow as too-large", () => {
+    expect(
+      describeChatError(
+        "This model's maximum context length is 128000 tokens",
+        t,
+      ).title,
+    ).toBe("The conversation is too large");
+  });
+
+  it('classifies "402: Insufficient credits" as quota exceeded', () => {
+    expect(describeChatError("402: Insufficient credits", t).title).toBe(
+      "AI provider quota exceeded",
+    );
+  });
+
+  it('classifies "401: Unauthorized" as an auth failure', () => {
+    expect(describeChatError("401: Unauthorized", t).title).toBe(
+      "AI provider authentication failed",
+    );
+  });
+
+  it("falls back to the generic heading + detail for empty input", () => {
+    expect(describeChatError("", t)).toEqual({
+      title: "Something went wrong",
+      detail: "The AI agent could not respond. Please try again.",
+    });
+  });
+
+  it('falls back to the generic heading + detail for "An error occurred."', () => {
+    expect(describeChatError("An error occurred.", t)).toEqual({
+      title: "Something went wrong",
+      detail: "The AI agent could not respond. Please try again.",
+    });
+  });
+
+  it('falls back to the generic heading + detail for "Internal server error"', () => {
+    expect(describeChatError("Internal server error", t)).toEqual({
+      title: "Something went wrong",
+      detail: "The AI agent could not respond. Please try again.",
+    });
+  });
+
+  it("surfaces an unknown-but-informative provider detail verbatim under the generic heading", () => {
+    expect(describeChatError("418: I'm a teapot", t)).toEqual({
+      title: "Something went wrong",
+      detail: "418: I'm a teapot",
+    });
+  });
+
+  it("does NOT treat a number inside the response body as a leading status code (no auth)", () => {
+    // The real status (500) leads the string; the "401" lives in the snippet and
+    // must not trigger the auth category. The verbatim provider text is surfaced.
+    const body =
+      "500: Server error | response body: model gpt-4o-401-preview not found";
+    expect(describeChatError(body, t)).toEqual({
+      title: "Something went wrong",
+      detail: body,
+    });
+  });
+
+  it("does NOT treat a passing mention of billing as a quota error", () => {
+    // "billing" is no longer a quota signature; the verbatim text is surfaced.
+    const body = "502: Bad Gateway | response body: see our billing page";
+    expect(describeChatError(body, t)).toEqual({
+      title: "Something went wrong",
+      detail: body,
+    });
+  });
+
+  it('still rate-limits "429: rate limited after 403 attempts" and never disables', () => {
+    const view = describeChatError("429: rate limited after 403 attempts", t);
+    expect(view.title).toBe("Rate limited by the AI provider");
+    expect(view.title).not.toBe("AI chat is disabled");
+  });
+
+  it('does NOT treat "rate limit" inside the response body as a rate-limit error', () => {
+    // The textual rate-limit phrase lives only in the response-body snippet, and
+    // the leading 500 is not a classified numeric code, so it must not leak into
+    // the rate-limit category. (The detail itself falls back to the generic line
+    // here because the leading message contains "Internal Server Error", which
+    // providerDetail suppresses — the title is what this case pins.)
+    const body =
+      "500: Internal Server Error | response body: rate limit info: see our docs";
+    expect(describeChatError(body, t).title).toBe("Something went wrong");
+    expect(describeChatError(body, t).title).not.toBe(
+      "Rate limited by the AI provider",
+    );
+  });
+
+  it('does NOT treat ETIMEDOUT inside the response body as a timeout', () => {
+    // The 503 leads the string but is not a classified numeric code, and the
+    // ETIMEDOUT signature appears only in the body, so it must not leak into the
+    // timeout category; the verbatim text is surfaced under the generic heading.
+    const body = "503: x | response body: ETIMEDOUT appears in this log line";
+    expect(describeChatError(body, t)).toEqual({
+      title: "Something went wrong",
+      detail: body,
+    });
+    expect(describeChatError(body, t).title).not.toBe(
+      "The AI provider timed out",
    );
  });
 });
--- a/apps/client/src/features/ai-chat/utils/error-message.ts
+++ b/apps/client/src/features/ai-chat/utils/error-message.ts
@@ -1,24 +1,174 @@
 /**
- * Turn an AI chat error message into a friendly inline string. Used for BOTH the
- * live `useChat().error` (its `.message`) and a persisted assistant error stored
- * in `metadata.error`. Our own gating responses arrive as a raw NestJS JSON error
- * body carrying a numeric "statusCode" field (matched precisely, not by bare
- * substring, so a provider message that merely contains "403"/"503"/"disabled" is
- * never misclassified). Everything else — provider stream failures forwarded as
- * "<status>: <message>" (402 credits, 429 rate limit, ...) — is surfaced verbatim.
+ * A classified AI chat error: a short bold heading naming the cause category and
+ * a one-line human-readable detail / next step. Both strings are already passed
+ * through `t`, so callers render them directly.
+ */
+export interface ChatErrorView {
+  title: string;
+  detail: string;
+}
+
+/**
+ * Turn an AI chat error message into a friendly heading + detail. Used for BOTH
+ * the live `useChat().error` (its `.message`) and a persisted assistant error in
+ * `metadata.error`. Our own gating responses arrive as a raw NestJS JSON error
+ * body carrying a numeric "statusCode" (matched precisely, not by bare substring,
+ * so a provider message that merely contains "403"/"503" is never misclassified).
+ * Known provider/network failures (connection reset, timeout, rate limit, context
+ * overflow, quota, auth) are mapped to a clear category; anything else falls back
+ * to the raw provider detail (or a generic line) under the original heading.
 */
 export function describeChatError(
  message: string,
  t: (key: string) => string,
-): string {
+): ChatErrorView {
  const msg = message ?? "";
+
  if (/"statusCode"\s*:\s*403\b/.test(msg)) {
-    return t("AI chat is disabled for this workspace.");
+    return {
+      title: t("AI chat is disabled"),
+      detail: t("AI chat is disabled for this workspace."),
+    };
  }
  if (/"statusCode"\s*:\s*503\b/.test(msg)) {
-    return t("The AI provider is not configured. Ask an administrator to set it up.");
+    return {
+      title: t("AI provider not configured"),
+      detail: t(
+        "The AI provider is not configured. Ask an administrator to set it up.",
+      ),
+    };
  }
-  return providerDetail(msg) ?? t("The AI agent could not respond. Please try again.");
+
+  const category = classifyProviderError(msg);
+  if (category) {
+    return { title: t(category.title), detail: t(category.detail) };
+  }
+
+  // Unknown error: surface the raw provider detail when it is informative,
+  // otherwise a generic line. The heading stays the original generic one.
+  return {
+    title: t("Something went wrong"),
+    detail:
+      providerDetail(msg) ??
+      t("The AI agent could not respond. Please try again."),
+  };
+}
+
+interface ErrorCategory {
+  /** English key for the bold heading. */
+  title: string;
+  /** English key for the one-line explanation. */
+  detail: string;
+}
+
+/**
+ * Map a provider/network error string to a friendly category. Order matters: the
+ * most specific signatures are tested first. Returns null when nothing matches,
+ * so the caller can fall back to the raw provider text. The English keys returned
+ * here are passed through `t` by the caller.
+ *
+ * The server formats provider errors as "<statusCode>: <message> | response body:
+ * <snippet>" (see server-side describeProviderError), so the HTTP status is always
+ * the LEADING token. We match a numeric code only when it leads the string, so a
+ * number inside the response-body snippet never triggers a category; textual
+ * signatures are matched only against the leading message (before the response
+ * body), so a phrase inside the snippet never triggers a category either.
+ */
+function classifyProviderError(msg: string): ErrorCategory | null {
+  const code = /^\s*(\d{3})\b/.exec(msg)?.[1] ?? "";
+  // The server appends "| response body: <snippet>" to provider errors; match
+  // textual signatures only against the leading provider message so a phrase
+  // inside the response-body snippet never triggers a wrong category. The numeric
+  // status code is read from the start of the full string above.
+  const head = msg.split(/\|\s*response body:/i)[0];
+
+  // The browser's OWN fetch-failure messages — WebKit/Safari "Load failed",
+  // Chrome "Failed to fetch", Firefox "NetworkError when attempting to fetch
+  // resource". These mean the streaming connection between the browser and THIS
+  // server (/api/ai-chat/stream) dropped mid-answer: the browser<->server link,
+  // NOT the server<->AI-provider link, so do NOT blame the provider. A failed
+  // fetch carries no status/body, so the browser has no further detail — the real
+  // cause is in the server logs (the stream controller logs the disconnect) and
+  // the reverse proxy (often buffering or timing out the long-lived SSE).
+  if (/failed to fetch|load failed|networkerror/i.test(head)) {
+    return {
+      title: "Lost connection to the server",
+      detail:
+        "The streaming connection to the server dropped before the answer finished. The browser reports no further detail — the cause is in the server logs and the reverse proxy (often buffering or timing out the stream). Reload and try again.",
+    };
+  }
+  // Connection dropped / provider unreachable. ECONNRESET is the production case:
+  // the LLM socket was reset mid-stream (surfaced by the server's error
+  // formatter). "terminated" is scoped to a connection/stream context so it does
+  // not match benign "... was terminated" messages.
+  if (
+    /ECONNRESET|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|EPIPE|socket hang up|cannot connect|fetch failed|network error|connection (?:error|closed|reset|terminated)|stream terminated/i.test(
+      head,
+    )
+  ) {
+    return {
+      title: "Lost connection to the AI provider",
+      detail:
+        "The connection to the AI provider dropped before the answer finished. Please try again.",
+    };
+  }
+  // Timeout.
+  if (
+    code === "504" ||
+    code === "408" ||
+    /ETIMEDOUT|timed[\s-]?out|\btimeout\b/i.test(head)
+  ) {
+    return {
+      title: "The AI provider timed out",
+      detail: "The AI provider took too long to respond. Please try again.",
+    };
+  }
+  // Rate limited.
+  if (code === "429" || /rate[\s-]?limit|too many requests/i.test(head)) {
+    return {
+      title: "Rate limited by the AI provider",
+      detail:
+        "The AI provider is rate-limiting requests. Wait a moment and try again.",
+    };
+  }
+  // Context window / token budget exceeded.
+  if (
+    code === "413" ||
+    /context[\s_-]?(?:length|window)|maximum context|context_length_exceeded|too many tokens|maximum[^.]*tokens|reduce the length/i.test(
+      head,
+    )
+  ) {
+    return {
+      title: "The conversation is too large",
+      detail:
+        "The document and search results exceeded the model's context window. Start a new chat or narrow the request.",
+    };
+  }
+  // Out of credits / quota / payment required.
+  if (
+    code === "402" ||
+    /payment required|insufficient (?:credits|quota|funds|balance)|out of credits|quota (?:exceeded|exhausted)/i.test(
+      head,
+    )
+  ) {
+    return {
+      title: "AI provider quota exceeded",
+      detail:
+        "The AI provider rejected the request because of credits or quota. Check the provider account.",
+    };
+  }
+  // Authentication / bad API key.
+  if (
+    code === "401" ||
+    /\bunauthorized\b|invalid api key|user not found|\bauthentication\b/i.test(head)
+  ) {
+    return {
+      title: "AI provider authentication failed",
+      detail:
+        "The AI provider rejected the credentials. Ask an administrator to check the API key.",
+    };
+  }
+  return null;
 }

 /**
--- a/apps/client/src/features/ai-chat/utils/message-content.test.ts
+++ b/apps/client/src/features/ai-chat/utils/message-content.test.ts
@@ -0,0 +1,94 @@
+import { describe, expect, it } from "vitest";
+import type { UIMessage } from "@ai-sdk/react";
+import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts";
+
+/**
+ * Pure-helper tests for `assistantMessageHasVisibleContent`, the single source of
+ * truth shared by MessageItem (whether to render the bubble) and
+ * typingIndicatorShowsName (whether the standalone indicator owns the name). It
+ * must mirror MessageItem's render decisions exactly so exactly one element owns
+ * the agent name during the pre-content "thinking" gap.
+ */
+const msg = (
+  parts: UIMessage["parts"],
+  metadata?: unknown,
+): UIMessage =>
+  ({
+    id: Math.random().toString(),
+    role: "assistant",
+    parts,
+    metadata,
+  }) as UIMessage;
+
+describe("assistantMessageHasVisibleContent", () => {
+  it("is false for an empty text part", () => {
+    expect(assistantMessageHasVisibleContent(msg([{ type: "text", text: "" }]))).toBe(false);
+  });
+
+  it("is false for a whitespace-only text part", () => {
+    expect(assistantMessageHasVisibleContent(msg([{ type: "text", text: "   " }]))).toBe(false);
+  });
+
+  it("is true for a non-empty text part", () => {
+    expect(assistantMessageHasVisibleContent(msg([{ type: "text", text: "answer" }]))).toBe(true);
+  });
+
+  it("is true for a tool part", () => {
+    const toolPart = { type: "tool-getPage", state: "output-available" } as unknown as UIMessage["parts"][number];
+    expect(assistantMessageHasVisibleContent(msg([toolPart]))).toBe(true);
+  });
+
+  it("is true when metadata.error is set (persisted error banner)", () => {
+    expect(
+      assistantMessageHasVisibleContent(msg([{ type: "text", text: "" }], { error: "boom" })),
+    ).toBe(true);
+  });
+
+  it("is true when metadata.finishReason is 'aborted' (persisted stopped notice)", () => {
+    expect(
+      assistantMessageHasVisibleContent(msg([], { finishReason: "aborted" })),
+    ).toBe(true);
+  });
+
+  it("is false for a message with no parts and no metadata", () => {
+    expect(assistantMessageHasVisibleContent(msg([]))).toBe(false);
+  });
+
+  it("is false for an unsupported part kind (reasoning)", () => {
+    const reasoning = { type: "reasoning", text: "let me think" } as unknown as UIMessage["parts"][number];
+    expect(assistantMessageHasVisibleContent(msg([reasoning]))).toBe(false);
+  });
+
+  it("is true for a running tool part (input-available)", () => {
+    // Tool visibility does not depend on tool state: MessageItem renders a
+    // ToolCallCard for any tool part, so a still-running tool is visible.
+    const runningTool = { type: "tool-getPage", state: "input-available" } as unknown as UIMessage["parts"][number];
+    expect(assistantMessageHasVisibleContent(msg([runningTool]))).toBe(true);
+  });
+
+  it("is true for an empty leading text part followed by a non-empty one", () => {
+    // An empty leading text part followed by a non-empty one is still visible
+    // (mirrors the real streaming sequence where text arrives incrementally).
+    expect(
+      assistantMessageHasVisibleContent(
+        msg([{ type: "text", text: "" }, { type: "text", text: "answer" }]),
+      ),
+    ).toBe(true);
+  });
+
+  it("is false for an empty completed turn (finishReason 'stop')", () => {
+    // A completed turn with no text/tools and a non-aborted finishReason renders
+    // nothing — this is intentional (hiding a dangling name-only row), distinct
+    // from the `aborted`/`error` cases which DO render.
+    expect(
+      assistantMessageHasVisibleContent(msg([{ type: "text", text: "" }], { finishReason: "stop" })),
+    ).toBe(false);
+  });
+
+  it("is false for a parts-less message (the `?? []` guard makes it safe)", () => {
+    // The `?? []` guard makes a parts-less object safe instead of throwing.
+    expect(
+      assistantMessageHasVisibleContent({ id: "x", role: "assistant" } as unknown as UIMessage),
+    ).toBe(false);
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/message-content.ts
+++ b/apps/client/src/features/ai-chat/utils/message-content.ts
@@ -0,0 +1,39 @@
+import type { UIMessage } from "@ai-sdk/react";
+import { isToolPart } from "@/features/ai-chat/utils/tool-parts.tsx";
+
+/**
+ * Whether an assistant `UIMessage` has anything visible to render in its bubble.
+ *
+ * This mirrors MessageItem's render decisions EXACTLY and is the single source of
+ * truth shared by both MessageItem (to decide whether to render the bubble at all)
+ * and typingIndicatorShowsName (to decide whether the standalone "Thinking…"
+ * indicator owns the dimmed agent-name label). Keeping one helper guarantees the
+ * two stay in lockstep, so exactly one element owns the name during the pre-content
+ * "thinking" gap and the layout never reflows mid-stream.
+ *
+ * An assistant message has visible content iff ANY of:
+ *  - a `text` part whose trimmed length > 0 (non-empty markdown), OR
+ *  - ANY tool part (`isToolPart(part.type)`), OR
+ *  - `metadata.error` is truthy (a persisted error banner renders), OR
+ *  - `metadata.finishReason === "aborted"` (a persisted "response stopped" notice).
+ * Empty/whitespace-only text parts and unsupported part kinds (reasoning, sources,
+ * files, step-start) are NOT visible.
+ */
+export function assistantMessageHasVisibleContent(message: UIMessage): boolean {
+  const meta = message.metadata as
+    | { error?: string; finishReason?: string }
+    | undefined;
+  // Persisted errored/aborted turns always render their banner/notice.
+  if (meta?.error) return true;
+  if (meta?.finishReason === "aborted") return true;
+
+  // `parts` may be empty (a nascent streaming message has no parts yet).
+  // `?? []` also guards a sparse/partial message object (metadata-only, no
+  // `parts`) so iterating cannot throw — it does not change behavior for any
+  // current input.
+  for (const part of message.parts ?? []) {
+    if (part.type === "text" && part.text.trim().length > 0) return true;
+    if (isToolPart(part.type)) return true;
+  }
+  return false;
+}
--- a/apps/client/src/features/ai-chat/utils/message-signature.test.ts
+++ b/apps/client/src/features/ai-chat/utils/message-signature.test.ts
@@ -0,0 +1,241 @@
+import { describe, expect, it } from "vitest";
+import type { UIMessage } from "@ai-sdk/react";
+import { messageSignature } from "@/features/ai-chat/utils/message-signature.ts";
+
+/**
+ * Pure-helper tests for `messageSignature`, the cheap per-message content
+ * signature that drives MessageItem's memo (a streaming row's signature must
+ * change on every delta so it re-renders, while a finalized row's stays stable
+ * so it is skipped). Each test exercises ONE change signal and asserts it flips
+ * the signature; a content-identical clone must keep an EQUAL signature.
+ *
+ * The signature embeds `message.id` and `message.role`, so the `msg` factory
+ * uses a FIXED id/role here (not `Math.random()`): otherwise two messages with
+ * identical content would get different signatures and the negative case would
+ * be impossible to express.
+ */
+const msg = (
+  parts: UIMessage["parts"],
+  metadata?: unknown,
+): UIMessage =>
+  ({
+    id: "m1",
+    role: "assistant",
+    parts,
+    metadata,
+  }) as UIMessage;
+
+describe("messageSignature", () => {
+  it("changes when a text part grows", () => {
+    const before = msg([{ type: "text", text: "alpha" }]);
+    const after = msg([{ type: "text", text: "alpha beta" }]);
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when a new part is appended", () => {
+    const before = msg([{ type: "text", text: "alpha" }]);
+    const after = msg([
+      { type: "text", text: "alpha" },
+      { type: "text", text: "beta" },
+    ]);
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when a part's state flips", () => {
+    const before = msg([
+      { type: "tool-getPage", state: "input-streaming" } as never,
+    ]);
+    const after = msg([
+      { type: "tool-getPage", state: "output-available" } as never,
+    ]);
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when a tool part gains an output", () => {
+    const before = msg([
+      { type: "tool-getPage", state: "output-available" } as never,
+    ]);
+    const after = msg([
+      {
+        type: "tool-getPage",
+        state: "output-available",
+        output: { ok: true },
+      } as never,
+    ]);
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when a part gains an errorText", () => {
+    const before = msg([
+      { type: "tool-getPage", state: "output-error" } as never,
+    ]);
+    const after = msg([
+      {
+        type: "tool-getPage",
+        state: "output-error",
+        errorText: "boom",
+      } as never,
+    ]);
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when usage.reasoningTokens arrives on finish-step (text/state already frozen)", () => {
+    // The specifically-commented edge case: the authoritative turn total lands on
+    // the final finish-step AFTER the reasoning text length and state are frozen.
+    // Only the token count appears between these two snapshots, so the signature
+    // MUST still flip — otherwise the "Thinking · N tokens" header would never
+    // snap from the live estimate to the exact figure.
+    const before = msg([
+      { type: "reasoning", text: "thinking", state: "done" } as never,
+    ]);
+    const after = msg(
+      [{ type: "reasoning", text: "thinking", state: "done" } as never],
+      { usage: { reasoningTokens: 42 } },
+    );
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when metadata.error appears", () => {
+    const before = msg([{ type: "text", text: "answer" }]);
+    const after = msg([{ type: "text", text: "answer" }], { error: "boom" });
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("changes when metadata.finishReason changes (e.g. to 'aborted')", () => {
+    const before = msg([{ type: "text", text: "answer" }], {
+      finishReason: "stop",
+    });
+    const after = msg([{ type: "text", text: "answer" }], {
+      finishReason: "aborted",
+    });
+    expect(messageSignature(before)).not.toBe(messageSignature(after));
+  });
+
+  it("is UNCHANGED for a content-identical clone (different object, same values)", () => {
+    // A finalized row that is re-created as a fresh object (different parts array
+    // by reference, same parts by value) must keep an EQUAL signature, so the
+    // memo skips re-rendering it.
+    const a = msg([
+      { type: "text", text: "alpha" },
+      { type: "tool-getPage", state: "output-available", output: { ok: true } } as never,
+    ]);
+    const b = msg([
+      { type: "text", text: "alpha" },
+      { type: "tool-getPage", state: "output-available", output: { ok: true } } as never,
+    ]);
+    expect(a).not.toBe(b);
+    expect(messageSignature(a)).toBe(messageSignature(b));
+  });
+});
+
+/**
+ * Per-part-kind coupling guard for the load-bearing invariant documented at the
+ * top of message-signature.ts: the signature MUST sample every VISIBLE field the
+ * MessageItem render body draws, or the memo freezes a stale row. This is an
+ * executable lock for the part kinds rendered TODAY — read alongside
+ * `MessageItem` (message-item.tsx) and the `assistantMessageHasVisibleContent`
+ * helper (message-content.ts), which "mirrors MessageItem's render decisions
+ * EXACTLY". For each kind, mutating a field the render body DRAWS must flip the
+ * signature. If a new visible field is rendered without being added here AND to
+ * the signature, the corresponding assertion below should fail — that is the
+ * guard. (This intentionally stops short of the render-descriptor refactor:
+ * adding a part kind or a visible field still requires a human to extend both
+ * the signature and this block.)
+ */
+describe("messageSignature ↔ render coupling (per visible part kind)", () => {
+  describe("text part — render draws part.text (MarkdownPart text={part.text})", () => {
+    it("flips when the visible text changes", () => {
+      // Streaming is append-only, so the visible text only grows; the signature
+      // samples its length, so the growth is the change signal.
+      const before = msg([{ type: "text", text: "answer" }]);
+      const after = msg([{ type: "text", text: "answer extended" }]);
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+  });
+
+  describe("reasoning part — render draws text + tokens (ReasoningBlock)", () => {
+    it("flips when the visible reasoning text changes", () => {
+      const before = msg([
+        { type: "reasoning", text: "think", state: "streaming" } as never,
+      ]);
+      const after = msg([
+        { type: "reasoning", text: "think harder", state: "streaming" } as never,
+      ]);
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+
+    it("flips when the visible token count (metadata.usage.reasoningTokens) lands", () => {
+      // The header's "Thinking · N tokens" reads reasoningTokensForPart, fed by
+      // metadata.usage.reasoningTokens — a VISIBLE field that arrives on the final
+      // finish-step after text length and state are frozen.
+      const before = msg([
+        { type: "reasoning", text: "think", state: "done" } as never,
+      ]);
+      const after = msg(
+        [{ type: "reasoning", text: "think", state: "done" } as never],
+        { usage: { reasoningTokens: 99 } },
+      );
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+  });
+
+  describe("tool-* part — render draws state/errorText/citations (ToolCallCard)", () => {
+    it("flips when the run state changes (running ↔ done icon + label)", () => {
+      // toolRunState(part.state) selects the spinner/check/error icon.
+      const before = msg([
+        { type: "tool-getPage", state: "input-available" } as never,
+      ]);
+      const after = msg([
+        { type: "tool-getPage", state: "output-available" } as never,
+      ]);
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+
+    it("flips when output arrives (drives the rendered citation links)", () => {
+      // toolCitations reads part.output to render the "/p/{id}" anchors.
+      const before = msg([
+        { type: "tool-getPage", state: "output-available" } as never,
+      ]);
+      const after = msg([
+        {
+          type: "tool-getPage",
+          state: "output-available",
+          output: { id: "page-1", title: "Doc" },
+        } as never,
+      ]);
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+
+    it("flips when errorText appears (the visible red error detail line)", () => {
+      const before = msg([
+        { type: "tool-getPage", state: "output-error" } as never,
+      ]);
+      const after = msg([
+        {
+          type: "tool-getPage",
+          state: "output-error",
+          errorText: "permission denied",
+        } as never,
+      ]);
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+  });
+
+  describe("metadata banners — render draws error / aborted notices", () => {
+    it("flips when metadata.error appears (ChatErrorAlert banner)", () => {
+      const before = msg([{ type: "text", text: "answer" }]);
+      const after = msg([{ type: "text", text: "answer" }], { error: "boom" });
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+
+    it("flips when metadata.finishReason becomes 'aborted' (ChatStoppedNotice)", () => {
+      const before = msg([{ type: "text", text: "answer" }], {
+        finishReason: "stop",
+      });
+      const after = msg([{ type: "text", text: "answer" }], {
+        finishReason: "aborted",
+      });
+      expect(messageSignature(before)).not.toBe(messageSignature(after));
+    });
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/message-signature.ts
+++ b/apps/client/src/features/ai-chat/utils/message-signature.ts
@@ -0,0 +1,44 @@
+import type { UIMessage } from "@ai-sdk/react";
+
+/** Cheap content signature for one message: changes iff something VISIBLE in the
+ *  row changed. Streaming is APPEND-ONLY (text parts only grow, parts are only
+ *  appended, a tool/text part flips state once), so a per-part [type, text
+ *  length, state, error/output presence] tuple + the persisted metadata
+ *  (error/finishReason) is a sufficient change signal without comparing full
+ *  strings on every delta. WARNING — load-bearing for the MessageItem memo:
+ *  if a future part kind's VISIBLE content can change WITHOUT changing [type,
+ *  text length, state, error/output presence] (e.g. a tool that streams
+ *  `preliminary` output, or a client-side regenerate that edits a finalized
+ *  row in place), extend this signature or the memo will freeze a stale row. */
+export function messageSignature(message: UIMessage): string {
+  const parts = message.parts
+    .map((p) => {
+      const any = p as {
+        type: string;
+        text?: string;
+        state?: string;
+        errorText?: string;
+        output?: unknown;
+      };
+      return [
+        any.type,
+        any.text?.length ?? 0,
+        any.state ?? "",
+        any.errorText ? 1 : 0,
+        any.output !== undefined ? 1 : 0,
+      ].join(":");
+    })
+    .join("|");
+  const meta = message.metadata as
+    | { error?: string; finishReason?: string; usage?: { reasoningTokens?: number } }
+    | undefined;
+  // `usage.reasoningTokens` is neither append-only nor part-bound: the authoritative
+  // turn total arrives on the final `finish-step` AFTER the reasoning text length and
+  // state are already frozen. Without it in the signature the row's signature would be
+  // unchanged at that point and the re-render skipped, so the "Thinking · N tokens"
+  // header (reasoningTokensForPart) would keep the live estimate instead of snapping
+  // to the exact figure.
+  return `${message.id}#${message.role}#${parts}#${meta?.error ?? ""}#${
+    meta?.finishReason ?? ""
+  }#${meta?.usage?.reasoningTokens ?? ""}`;
+}
--- a/apps/client/src/features/ai-chat/utils/queue-helpers.test.ts
+++ b/apps/client/src/features/ai-chat/utils/queue-helpers.test.ts
@@ -0,0 +1,107 @@
+import { describe, it, expect } from "vitest";
+import {
+  enqueueMessage,
+  dequeue,
+  removeQueuedById,
+  type QueuedMessage,
+} from "./queue-helpers";
+
+describe("enqueueMessage", () => {
+  it("appends a message to the end of the queue", () => {
+    const queue: QueuedMessage[] = [{ id: "a", text: "first" }];
+    const next = enqueueMessage(queue, { id: "b", text: "second" });
+    expect(next).toEqual([
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+    ]);
+  });
+
+  it("does not mutate the input queue", () => {
+    const queue: QueuedMessage[] = [{ id: "a", text: "first" }];
+    enqueueMessage(queue, { id: "b", text: "second" });
+    expect(queue).toEqual([{ id: "a", text: "first" }]);
+  });
+});
+
+describe("dequeue", () => {
+  it("returns {head:null, rest:[]} for an empty queue", () => {
+    expect(dequeue([])).toEqual({ head: null, rest: [] });
+  });
+
+  it("returns the first item as head and the remainder as rest", () => {
+    const queue: QueuedMessage[] = [
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+      { id: "c", text: "third" },
+    ];
+    const { head, rest } = dequeue(queue);
+    expect(head).toEqual({ id: "a", text: "first" });
+    expect(rest).toEqual([
+      { id: "b", text: "second" },
+      { id: "c", text: "third" },
+    ]);
+  });
+
+  it("does not mutate the input queue", () => {
+    const queue: QueuedMessage[] = [
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+    ];
+    dequeue(queue);
+    expect(queue).toEqual([
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+    ]);
+  });
+});
+
+describe("removeQueuedById", () => {
+  it("removes the matching id and leaves the others", () => {
+    const queue: QueuedMessage[] = [
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+      { id: "c", text: "third" },
+    ];
+    const next = removeQueuedById(queue, "b");
+    expect(next).toEqual([
+      { id: "a", text: "first" },
+      { id: "c", text: "third" },
+    ]);
+  });
+
+  it("returns an equivalent list when the id is not present", () => {
+    const queue: QueuedMessage[] = [{ id: "a", text: "first" }];
+    expect(removeQueuedById(queue, "missing")).toEqual([
+      { id: "a", text: "first" },
+    ]);
+  });
+
+  it("does not mutate the input queue", () => {
+    const queue: QueuedMessage[] = [
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+    ];
+    removeQueuedById(queue, "a");
+    expect(queue).toEqual([
+      { id: "a", text: "first" },
+      { id: "b", text: "second" },
+    ]);
+  });
+});
+
+describe("FIFO order", () => {
+  it("preserves order across enqueue -> dequeue", () => {
+    let queue: QueuedMessage[] = [];
+    queue = enqueueMessage(queue, { id: "1", text: "one" });
+    queue = enqueueMessage(queue, { id: "2", text: "two" });
+    queue = enqueueMessage(queue, { id: "3", text: "three" });
+
+    const order: string[] = [];
+    while (queue.length > 0) {
+      const { head, rest } = dequeue(queue);
+      if (head) order.push(head.text);
+      queue = rest;
+    }
+    expect(order).toEqual(["one", "two", "three"]);
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/queue-helpers.ts
+++ b/apps/client/src/features/ai-chat/utils/queue-helpers.ts
@@ -0,0 +1,34 @@
+// Pure FIFO helpers for the AI-chat "send while the agent is busy" queue.
+// Kept side-effect free so they can be unit-tested without React.
+
+export interface QueuedMessage {
+  id: string;
+  text: string;
+}
+
+/** Append a message to the end of the queue (returns a new array). */
+export function enqueueMessage(
+  queue: QueuedMessage[],
+  message: QueuedMessage,
+): QueuedMessage[] {
+  return [...queue, message];
+}
+
+/** Split the queue into its first item (`head`) and the remainder (`rest`).
+ *  `head` is null when the queue is empty. Does not mutate the input. */
+export function dequeue(queue: QueuedMessage[]): {
+  head: QueuedMessage | null;
+  rest: QueuedMessage[];
+} {
+  if (queue.length === 0) return { head: null, rest: [] };
+  const [head, ...rest] = queue;
+  return { head, rest };
+}
+
+/** Remove the queued message with the given id (returns a new array). */
+export function removeQueuedById(
+  queue: QueuedMessage[],
+  id: string,
+): QueuedMessage[] {
+  return queue.filter((m) => m.id !== id);
+}
--- a/apps/client/src/features/ai-chat/utils/reasoning-tokens.test.ts
+++ b/apps/client/src/features/ai-chat/utils/reasoning-tokens.test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, it } from "vitest";
+import type { UIMessage } from "@ai-sdk/react";
+import { reasoningTokensForPart } from "@/features/ai-chat/utils/reasoning-tokens.ts";
+
+/**
+ * Pure-helper tests for `reasoningTokensForPart`, the #151 anti-double-count
+ * rule: the authoritative `usage.reasoningTokens` is the TURN TOTAL, so it may
+ * only be attributed when the turn has exactly one reasoning part. With multiple
+ * reasoning parts (or no authoritative usage) every part falls back to its own
+ * per-part estimate, signalled here by `undefined`.
+ */
+const msg = (
+  parts: UIMessage["parts"],
+  metadata?: unknown,
+): UIMessage =>
+  ({
+    id: Math.random().toString(),
+    role: "assistant",
+    parts,
+    metadata,
+  }) as UIMessage;
+
+describe("reasoningTokensForPart", () => {
+  it("single reasoning part -> the authoritative turn total", () => {
+    const m = msg(
+      [
+        { type: "reasoning", text: "thinking…" } as never,
+        { type: "text", text: "answer" },
+      ],
+      { usage: { reasoningTokens: 42 } },
+    );
+    expect(reasoningTokensForPart(m)).toBe(42);
+  });
+
+  it("multiple reasoning parts -> undefined (each estimates on its own)", () => {
+    const m = msg(
+      [
+        { type: "reasoning", text: "step one" } as never,
+        { type: "reasoning", text: "step two" } as never,
+        { type: "text", text: "answer" },
+      ],
+      { usage: { reasoningTokens: 99 } },
+    );
+    // Even with an authoritative total, two reasoning parts must each estimate
+    // (attributing the total to one would double-count against the other).
+    expect(reasoningTokensForPart(m)).toBeUndefined();
+  });
+
+  it("no authoritative usage -> undefined even for a single reasoning part", () => {
+    const m = msg([
+      { type: "reasoning", text: "thinking…" } as never,
+      { type: "text", text: "answer" },
+    ]);
+    expect(reasoningTokensForPart(m)).toBeUndefined();
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/reasoning-tokens.ts
+++ b/apps/client/src/features/ai-chat/utils/reasoning-tokens.ts
@@ -0,0 +1,34 @@
+import type { UIMessage } from "@ai-sdk/react";
+
+/**
+ * Decide the authoritative reasoning token count to attribute to a single
+ * `reasoning` part of an assistant message — or `undefined` when the part should
+ * fall back to its own per-part estimate.
+ *
+ * `usage.reasoningTokens` is the TURN TOTAL, so it may only be attributed to a
+ * block when the turn has exactly ONE reasoning part (the common one-step turn):
+ * then that block can show the exact figure. With MULTIPLE reasoning parts (a
+ * multi-step agent turn) every block must fall back to its own estimate —
+ * attributing the turn total to one of them would double-count against the
+ * others' estimates (#151 review anti-double-count rule). When there is no
+ * authoritative usage at all, every part estimates.
+ *
+ * Returns the authoritative `reasoningTokens` only for the single-reasoning-part
+ * case; `undefined` otherwise (the caller estimates from the part text).
+ */
+export function reasoningTokensForPart(
+  message: UIMessage,
+): number | undefined {
+  const reasoningTokens = (
+    message.metadata as { usage?: { reasoningTokens?: number } } | undefined
+  )?.usage?.reasoningTokens;
+
+  const reasoningPartCount = (message.parts ?? []).reduce(
+    (acc, p) => (p.type === "reasoning" ? acc + 1 : acc),
+    0,
+  );
+
+  // Exactly one reasoning part -> attribute the authoritative turn total to it.
+  // Otherwise (zero or multiple) each part estimates on its own.
+  return reasoningPartCount === 1 ? reasoningTokens : undefined;
+}
--- a/apps/client/src/features/ai-chat/utils/role-card-color.test.ts
+++ b/apps/client/src/features/ai-chat/utils/role-card-color.test.ts
@@ -0,0 +1,23 @@
+import { describe, it, expect } from "vitest";
+import { ROLE_CARD_PALETTE, roleCardColor } from "./role-card-color";
+
+describe("roleCardColor", () => {
+  it("has a 10-color palette", () => {
+    expect(ROLE_CARD_PALETTE).toHaveLength(10);
+  });
+
+  it("maps index 0 to the first palette color (blue)", () => {
+    expect(roleCardColor(0)).toBe("blue");
+    expect(roleCardColor(1)).toBe("grape");
+  });
+
+  it("wraps around at the end of the palette", () => {
+    expect(roleCardColor(10)).toBe("blue");
+    expect(roleCardColor(11)).toBe("grape");
+  });
+
+  it("is safe for negative indices", () => {
+    expect(roleCardColor(-1)).toBe("violet");
+    expect(roleCardColor(-10)).toBe("blue");
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/role-card-color.ts
+++ b/apps/client/src/features/ai-chat/utils/role-card-color.ts
@@ -0,0 +1,25 @@
+// Fixed Mantine color palette for the new-chat role cards. Cards cycle through
+// these names by index; the colors are applied via theme-aware Mantine CSS vars
+// (`--mantine-color-<name>-light` etc.) so they are correct in both themes.
+// Universal assistant uses neutral `gray` separately (not part of this palette).
+export const ROLE_CARD_PALETTE = [
+  "blue",
+  "grape",
+  "teal",
+  "orange",
+  "pink",
+  "cyan",
+  "lime",
+  "indigo",
+  "red",
+  "violet",
+] as const;
+
+/**
+ * Pick a palette color name for a role card by its index. Cycles through the
+ * palette and is safe for negative indices.
+ */
+export function roleCardColor(index: number): string {
+  const len = ROLE_CARD_PALETTE.length;
+  return ROLE_CARD_PALETTE[((index % len) + len) % len];
+}
--- a/apps/client/src/features/ai-chat/utils/role-launch.test.ts
+++ b/apps/client/src/features/ai-chat/utils/role-launch.test.ts
@@ -0,0 +1,72 @@
+import { describe, it, expect } from "vitest";
+import { roleLaunchMessage, shouldResetRolePicked } from "./role-launch.ts";
+
+const DEFAULT = "Take a look at the current document";
+
+// Covers the three-way handleRolePick behavior (issue #149) without mounting the
+// chat-thread component — the logic lives in these pure helpers.
+describe("roleLaunchMessage", () => {
+  it("autoStart=true + custom launchMessage -> the trimmed custom text", () => {
+    expect(
+      roleLaunchMessage(
+        { autoStart: true, launchMessage: "  Draft a plan  " },
+        DEFAULT,
+      ),
+    ).toBe("Draft a plan");
+  });
+
+  it("autoStart=true + empty launchMessage -> the default fallback", () => {
+    expect(
+      roleLaunchMessage({ autoStart: true, launchMessage: "" }, DEFAULT),
+    ).toBe(DEFAULT);
+  });
+
+  it("autoStart=true + whitespace-only launchMessage -> the default fallback", () => {
+    expect(
+      roleLaunchMessage({ autoStart: true, launchMessage: "   " }, DEFAULT),
+    ).toBe(DEFAULT);
+  });
+
+  it("autoStart=true + null launchMessage -> the default fallback", () => {
+    expect(
+      roleLaunchMessage({ autoStart: true, launchMessage: null }, DEFAULT),
+    ).toBe(DEFAULT);
+  });
+
+  it("autoStart=false -> null (bind only, send nothing) regardless of message", () => {
+    expect(
+      roleLaunchMessage(
+        { autoStart: false, launchMessage: "ignored" },
+        DEFAULT,
+      ),
+    ).toBeNull();
+    expect(
+      roleLaunchMessage({ autoStart: false, launchMessage: null }, DEFAULT),
+    ).toBeNull();
+  });
+});
+
+// Regression guard for #149: the "picked, not sent" flag must reset when the
+// user starts a fresh chat after an autoStart=false pick. On pre-fix code there
+// was no reset, so the flag stayed stuck and the role cards never returned —
+// this is exactly the `true` case below (which the old code never acted on).
+describe("shouldResetRolePicked", () => {
+  it("resets when the thread is empty and the bound role was cleared (New chat)", () => {
+    // chatId still null, roleId cleared by the parent, flag stuck -> reset.
+    expect(shouldResetRolePicked(null, null, true)).toBe(true);
+    expect(shouldResetRolePicked(null, undefined, true)).toBe(true);
+  });
+
+  it("does NOT reset while a role is still bound (cards stay hidden, composer shown)", () => {
+    // Right after the autoStart=false pick, roleId is the picked role -> keep hidden.
+    expect(shouldResetRolePicked(null, "role-1", true)).toBe(false);
+  });
+
+  it("does NOT reset once the chat exists (a message was sent / chat created)", () => {
+    expect(shouldResetRolePicked("chat-1", null, true)).toBe(false);
+  });
+
+  it("is a no-op when the flag is already false", () => {
+    expect(shouldResetRolePicked(null, null, false)).toBe(false);
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/role-launch.ts
+++ b/apps/client/src/features/ai-chat/utils/role-launch.ts
@@ -0,0 +1,34 @@
+import type { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
+
+/**
+ * Decide what (if anything) to auto-send when an agent role card is picked
+ * (issue #149). Extracted as a pure function so the three-way behavior is
+ * unit-testable without mounting the chat-thread component:
+ *   - autoStart=false              -> null  (bind the role only, send nothing)
+ *   - autoStart=true + message     -> the trimmed custom launchMessage
+ *   - autoStart=true + empty/null  -> the default fallback text
+ */
+export function roleLaunchMessage(
+  role: Pick<IAiRole, "autoStart" | "launchMessage">,
+  defaultText: string,
+): string | null {
+  if (!role.autoStart) return null;
+  return role.launchMessage?.trim() || defaultText;
+}
+
+/**
+ * Whether the "role picked but nothing sent yet" flag (`rolePickedNoSend`)
+ * should reset to false. After an autoStart=false pick the thread shows the
+ * composer with chatId still null; when the user then starts a fresh chat the
+ * parent clears the bound role (roleId -> null) but chatId stays null, so the
+ * thread never remounts and the flag would otherwise stay set — hiding the role
+ * cards forever. Reset exactly in that state; a still-bound role (roleId set)
+ * keeps the cards hidden. (Regression guard for #149.)
+ */
+export function shouldResetRolePicked(
+  chatId: string | null,
+  roleId: string | null | undefined,
+  rolePickedNoSend: boolean,
+): boolean {
+  return chatId === null && roleId == null && rolePickedNoSend;
+}
--- a/apps/client/src/features/ai-chat/utils/thread-identity.test.ts
+++ b/apps/client/src/features/ai-chat/utils/thread-identity.test.ts
@@ -0,0 +1,79 @@
+import { describe, it, expect } from "vitest";
+import {
+  newThread,
+  switchThread,
+  adoptThread,
+  threadSessionReducer,
+} from "./thread-identity";
+
+describe("newThread", () => {
+  it("uses the supplied key and has no chat id yet", () => {
+    expect(newThread("new-abc")).toEqual({ key: "new-abc", chatId: null });
+  });
+});
+
+describe("switchThread", () => {
+  it("switches to an existing chat: key becomes the chat id", () => {
+    expect(switchThread("chat-1")).toEqual({
+      key: "chat-1",
+      chatId: "chat-1",
+    });
+  });
+});
+
+describe("adoptThread", () => {
+  // Key UNCHANGED (no remount) + chatId moved null->realId. The unchanged key is
+  // what keeps the live useChat store alive; the matching chatId is what makes the
+  // window's render-phase reconciler (activeChatId !== thread.chatId) treat the
+  // adopted thread as already-in-sync rather than a switch.
+  it("adopts in place for a new chat: keeps the key, sets the chat id", () => {
+    const prev = newThread("new-abc");
+    expect(adoptThread(prev, "chat-1")).toEqual({
+      key: "new-abc",
+      chatId: "chat-1",
+    });
+  });
+
+  it("is a no-op for an already-persisted chat", () => {
+    const prev: { key: string; chatId: string | null } = {
+      key: "chat-1",
+      chatId: "chat-1",
+    };
+    expect(adoptThread(prev, "chat-2")).toBe(prev);
+  });
+});
+
+describe("threadSessionReducer", () => {
+  it("reconcile to an existing id switches (key becomes the id)", () => {
+    const next = threadSessionReducer(newThread("new-abc"), {
+      type: "reconcile",
+      chatId: "chat-1",
+      newKey: "new-xyz",
+    });
+    expect(next).toEqual({ key: "chat-1", chatId: "chat-1" });
+  });
+
+  it("reconcile to null starts a fresh new thread with the supplied key", () => {
+    const next = threadSessionReducer(switchThread("chat-1"), {
+      type: "reconcile",
+      chatId: null,
+      newKey: "new-xyz",
+    });
+    expect(next).toEqual({ key: "new-xyz", chatId: null });
+  });
+
+  it("adopt on a new thread keeps the key and sets the id", () => {
+    const next = threadSessionReducer(newThread("new-abc"), {
+      type: "adopt",
+      chatId: "chat-1",
+    });
+    expect(next).toEqual({ key: "new-abc", chatId: "chat-1" });
+  });
+
+  it("adopt on a persisted thread is a no-op", () => {
+    const prev = switchThread("chat-1");
+    expect(threadSessionReducer(prev, { type: "adopt", chatId: "chat-2" })).toBe(
+      prev,
+    );
+  });
+});
--- a/apps/client/src/features/ai-chat/utils/thread-identity.ts
+++ b/apps/client/src/features/ai-chat/utils/thread-identity.ts
@@ -0,0 +1,73 @@
+/**
+ * Pure transitions for the AI-chat thread's identity: the single source of
+ * truth tying ChatThread's mount key to the chat id that mounted thread holds.
+ *
+ * The window keeps exactly ONE of these in state. Consolidating the mount key
+ * and the live thread's chat id into one atomic value makes the "stale chat id
+ * vs key" state unrepresentable: every change goes through one of the explicit
+ * transitions below, so the key and chatId can never silently diverge.
+ *
+ * - `newThread`/`switchThread` produce a key that forces a remount (+ reseed):
+ *   `newThread` for a brand-new (id-less) chat, `switchThread` for an existing
+ *   one. The caller picks which based on whether there is a chat id.
+ * - `adoptThread` keeps the SAME key so a brand-new chat learns its real id
+ *   WITHOUT remounting (the live useChat store, holding the just-finished turn,
+ *   is preserved and the next turn sends the real chatId).
+ *
+ * `newThread` takes the session key from the impure `generateId()` at the call
+ * site so these stay pure and unit-testable.
+ */
+export type ThreadIdentity = { key: string; chatId: string | null };
+
+/**
+ * A brand-new chat: a fresh session key and no chat id yet. `newKey` is
+ * supplied by the caller (generateId() is impure) so this stays pure/testable.
+ */
+export function newThread(newKey: string): ThreadIdentity {
+  return { key: newKey, chatId: null };
+}
+
+/**
+ * Switch to an EXISTING chat: the mount key becomes the chat id, forcing a
+ * remount + reseed from the persisted history. (A switch to a brand-new chat
+ * goes through `newThread` instead — there is no id to key on.)
+ */
+export function switchThread(chatId: string): ThreadIdentity {
+  return { key: chatId, chatId };
+}
+
+/**
+ * In-place adoption: a brand-new chat (`prev.chatId === null`) learns its real
+ * id WITHOUT remounting — keep the SAME key, set the chat id. If `prev` already
+ * has a chatId (not a new chat), this is a no-op (returns `prev`): adoption only
+ * applies to an as-yet-unadopted new thread.
+ */
+export function adoptThread(prev: ThreadIdentity, chatId: string): ThreadIdentity {
+  return prev.chatId === null ? { key: prev.key, chatId } : prev;
+}
+
+/**
+ * Thread-identity transitions as a reducer action. See `threadSessionReducer`.
+ */
+export type ThreadSessionAction =
+  | { type: "reconcile"; chatId: string | null; newKey: string }
+  | { type: "adopt"; chatId: string };
+
+/**
+ * Single source of truth for thread-identity transitions. `reconcile` handles a
+ * genuine switch (user OR external atom write) -> remount; `adopt` moves a brand-
+ * new chat to its real id in place (no remount).
+ */
+export function threadSessionReducer(
+  state: ThreadIdentity,
+  action: ThreadSessionAction,
+): ThreadIdentity {
+  switch (action.type) {
+    case "reconcile":
+      return action.chatId === null
+        ? newThread(action.newKey)
+        : switchThread(action.chatId);
+    case "adopt":
+      return adoptThread(state, action.chatId);
+  }
+}
--- a/apps/client/src/features/auth/components/invite-sign-up-form.tsx
+++ b/apps/client/src/features/auth/components/invite-sign-up-form.tsx
@@ -10,9 +10,12 @@ import {
  PasswordInput,
  Box,
  Stack,
+  Group,
+  Text,
 } from "@mantine/core";
 import { zod4Resolver } from "mantine-form-zod-resolver";
-import { useParams, useSearchParams } from "react-router-dom";
+import { Link, useParams, useSearchParams } from "react-router-dom";
+import APP_ROUTE from "@/lib/app-route";
 import useAuth from "@/features/auth/hooks/use-auth";
 import classes from "@/features/auth/components/auth.module.css";
 import { useGetInvitationQuery } from "@/features/workspace/queries/workspace-query.ts";
@@ -58,7 +61,27 @@ export function InviteSignUpForm() {
  }

  if (isError) {
-    return <div>{t("invalid invitation link")}</div>;
+    // Styled error with a CTA to login, mirroring the password-reset
+    // error page and the 404 page (issue #133)
+    return (
+      <AuthLayout>
+        <Container my={40}>
+          <Text size="lg" ta="center">
+            {t("Invalid invitation link")}
+          </Text>
+          <Group justify="center">
+            <Button
+              component={Link}
+              to={APP_ROUTE.AUTH.LOGIN}
+              variant="subtle"
+              size="md"
+            >
+              {t("Go to login page")}
+            </Button>
+          </Group>
+        </Container>
+      </AuthLayout>
+    );
  }

  if (!invitation) {
--- a/apps/client/src/features/comment/components/comment-list-item.test.tsx
+++ b/apps/client/src/features/comment/components/comment-list-item.test.tsx
@@ -0,0 +1,59 @@
+import { describe, it, expect, vi } from "vitest";
+import { render, screen } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+import { IComment } from "@/features/comment/types/comment.types";
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+// The comment mutation hooks reach out to react-query/network — stub them so the
+// component renders in isolation. We only assert the AI-badge rendering branch.
+vi.mock("@/features/comment/queries/comment-query", () => ({
+  useDeleteCommentMutation: () => ({ mutateAsync: vi.fn() }),
+  useResolveCommentMutation: () => ({ mutateAsync: vi.fn() }),
+  useUpdateCommentMutation: () => ({ mutateAsync: vi.fn() }),
+}));
+
+// CommentEditor pulls in the full TipTap editor stack; replace it with a stub.
+vi.mock("@/features/comment/components/comment-editor", () => ({
+  default: () => <div data-testid="comment-editor" />,
+}));
+
+import CommentListItem from "./comment-list-item";
+
+const baseComment = (over?: Partial<IComment>): IComment =>
+  ({
+    id: "c-1",
+    content: JSON.stringify({ type: "doc", content: [] }),
+    creatorId: "user-1",
+    pageId: "page-1",
+    workspaceId: "ws-1",
+    createdAt: new Date(),
+    creator: { id: "user-1", name: "Service Bot", avatarUrl: null } as any,
+    ...over,
+  }) as IComment;
+
+function renderItem(comment: IComment) {
+  return render(
+    <MantineProvider>
+      <CommentListItem comment={comment} pageId="page-1" canComment={true} />
+    </MantineProvider>,
+  );
+}
+
+describe("CommentListItem — AI badge", () => {
+  it('renders the AI-agent badge when createdSource === "agent"', () => {
+    renderItem(baseComment({ createdSource: "agent", aiChatId: null }));
+    expect(screen.getByText("AI-agent")).toBeDefined();
+    expect(screen.getByText("Service Bot")).toBeDefined();
+  });
+
+  it('does NOT render the badge for a normal user comment (createdSource "user")', () => {
+    renderItem(baseComment({ createdSource: "user" }));
+    expect(screen.queryByText("AI-agent")).toBeNull();
+    expect(screen.getByText("Service Bot")).toBeDefined();
+  });
+
+  // The non-clickable (null aiChatId) branch is a property of AiAgentBadge itself
+  // and is covered in ai-agent-badge.test.tsx; this integration suite only needs
+  // the insertion gate (agent → badge, user → no badge) above (#143 review).
+});
--- a/apps/client/src/features/comment/components/comment-list-item.tsx
+++ b/apps/client/src/features/comment/components/comment-list-item.tsx
@@ -1,4 +1,5 @@
-import { Group, Text, Box, Badge } from "@mantine/core";
+import { Group, Text, Box } from "@mantine/core";
+import { AiAgentBadge } from "@/components/ui/ai-agent-badge.tsx";
 import React, { useEffect, useRef, useState } from "react";
 import classes from "./comment.module.css";
 import { useAtom, useAtomValue } from "jotai";
@@ -126,9 +127,18 @@ function CommentListItem({

        <div style={{ flex: 1 }}>
          <Group justify="space-between" wrap="nowrap">
-            <Text size="xs" fw={500} lineClamp={1}>
-              {comment.creator.name}
-            </Text>
+            <Group gap={6} wrap="nowrap" style={{ minWidth: 0 }}>
+              <Text size="xs" fw={500} lineClamp={1} lh={1.2}>
+                {comment.creator.name}
+              </Text>
+
+              {comment.createdSource === "agent" && (
+                <AiAgentBadge
+                  authorName={comment.creator?.name}
+                  aiChatId={comment.aiChatId}
+                />
+              )}
+            </Group>

            <div style={{ visibility: hovered ? "visible" : "hidden" }}>
              {!comment.parentCommentId && canComment && (
@@ -155,7 +165,7 @@ function CommentListItem({
          </Group>

          <Group gap="xs">
-            <Text size="xs" fw={500} c="dimmed">
+            <Text size="xs" fw={500} c="dimmed" lh={1.1}>
              {createdAtAgo}
            </Text>
          </Group>
--- a/apps/client/src/features/comment/components/comment-list-with-tabs.tsx
+++ b/apps/client/src/features/comment/components/comment-list-with-tabs.tsx
@@ -11,6 +11,7 @@ import {
  Badge,
  Text,
  ScrollArea,
+  Tooltip,
 } from "@mantine/core";
 import CommentListItem from "@/features/comment/components/comment-list-item";
 import {
@@ -26,12 +27,16 @@ import { IPagination } from "@/lib/types.ts";
 import { extractPageSlugId } from "@/lib";
 import { useTranslation } from "react-i18next";
 import { useGetSpaceBySlugQuery } from "@/features/space/queries/space-query.ts";
-import { IconArrowUp, IconMessageOff } from "@tabler/icons-react";
+import { IconArrowUp, IconMessageOff, IconX } from "@tabler/icons-react";
 import { useAtom } from "jotai";
 import { currentUserAtom } from "@/features/user/atoms/current-user-atom";
 import { CustomAvatar } from "@/components/ui/custom-avatar.tsx";

-function CommentListWithTabs() {
+interface CommentListWithTabsProps {
+  onClose?: () => void;
+}
+
+function CommentListWithTabs({ onClose }: CommentListWithTabsProps) {
  const { t } = useTranslation();
  const { pageSlug } = useParams();
  const { data: page } = usePageQuery({ pageId: extractPageSlugId(pageSlug) });
@@ -194,28 +199,50 @@ function CommentListWithTabs() {
          overflow: "hidden",
        }}
      >
-        <Tabs.List justify="center">
-          <Tabs.Tab
-            value="open"
-            leftSection={
-              <Badge size="sm" variant="light" color="blue">
-                {activeComments.length}
-              </Badge>
-            }
-          >
-            {t("Open")}
-          </Tabs.Tab>
-          <Tabs.Tab
-            value="resolved"
-            leftSection={
-              <Badge size="sm" variant="light" color="green">
-                {resolvedComments.length}
-              </Badge>
-            }
-          >
-            {t("Resolved")}
-          </Tabs.Tab>
-        </Tabs.List>
+        {/* Header row: full-width centered tab list with the close button overlaid on the right. */}
+        <div style={{ position: "relative" }}>
+          <Tabs.List justify="center">
+            <Tabs.Tab
+              value="open"
+              leftSection={
+                <Badge size="sm" variant="light" color="blue">
+                  {activeComments.length}
+                </Badge>
+              }
+            >
+              {t("Open")}
+            </Tabs.Tab>
+            <Tabs.Tab
+              value="resolved"
+              leftSection={
+                <Badge size="sm" variant="light" color="green">
+                  {resolvedComments.length}
+                </Badge>
+              }
+            >
+              {t("Resolved")}
+            </Tabs.Tab>
+          </Tabs.List>
+          {onClose && (
+            <Tooltip label={t("Close")} withArrow>
+              <ActionIcon
+                variant="subtle"
+                color="gray"
+                onClick={onClose}
+                aria-label={t("Close")}
+                style={{
+                  position: "absolute",
+                  right: 0,
+                  top: "50%",
+                  // Nudge the close button slightly up to align with the tab labels.
+                  transform: "translateY(calc(-50% - 4px))",
+                }}
+              >
+                <IconX size={18} />
+              </ActionIcon>
+            </Tooltip>
+          )}
+        </div>

        <ScrollArea
          style={{ flex: "1 1 auto" }}
@@ -365,7 +392,7 @@ const PageCommentInput = ({ onSave, isLoading }) => {
        flex: "0 0 auto",
        borderTop: "1px solid var(--mantine-color-default-border)",
        paddingTop: "var(--mantine-spacing-sm)",
-        paddingBottom: 25,
+        paddingBottom: 10,
        position: "relative",
      }}
    >
@@ -374,7 +401,7 @@ const PageCommentInput = ({ onSave, isLoading }) => {
          size="sm"
          avatarUrl={currentUser?.user?.avatarUrl}
          name={currentUser?.user?.name}
-          style={{ flexShrink: 0, marginTop: 10 }}
+          style={{ flexShrink: 0, marginTop: 2 }}
        />
        <div style={{ flex: 1, minWidth: 0 }}>
          <CommentEditor
@@ -396,7 +423,7 @@ const PageCommentInput = ({ onSave, isLoading }) => {
          onClick={handleSave}
          onMouseDown={(e) => e.preventDefault()}
          loading={isLoading}
-          style={{ position: "absolute", right: 8, bottom: 30 }}
+          style={{ position: "absolute", right: 8, bottom: 15 }}
        >
          <IconArrowUp size={16} />
        </ActionIcon>
--- a/apps/client/src/features/comment/components/comment.module.css
+++ b/apps/client/src/features/comment/components/comment.module.css
@@ -3,7 +3,12 @@
 }

 .textSelection {
-    margin-top: 2px;
+    /* Breathing room below the comment header (author + timestamp) so the
+       quote does not stick to the timestamp when it is the first block. */
+    margin-top: 8px;
+    /* Align the quote's left bar with the comment body text left edge
+       (the comment editor insets its text by 6px). */
+    margin-left: 6px;
    border-left: 2px solid var(--mantine-color-gray-6);
    padding: 6px;
    background: var(--mantine-color-gray-light);
--- a/apps/client/src/features/comment/types/comment.types.ts
+++ b/apps/client/src/features/comment/types/comment.types.ts
@@ -17,6 +17,13 @@ export interface IComment {
  deletedAt?: Date;
  creator: IUser;
  resolvedBy?: IUser;
+  // Agent-edit provenance (returned by the backend via selectAll('comments')).
+  // createdSource === "agent" marks a comment authored via an AI agent (MCP /
+  // internal AI chat); aiChatId deep-links to the internal chat when present
+  // (null for an external MCP agent); resolvedSource marks an AI-resolved thread.
+  createdSource?: string;
+  aiChatId?: string | null;
+  resolvedSource?: string | null;
  yjsSelection?: {
    anchor: any;
    head: any;
--- a/apps/client/src/features/dictation/components/mic-button.module.css
+++ b/apps/client/src/features/dictation/components/mic-button.module.css
@@ -0,0 +1,24 @@
+.recordingWrap {
+  position: relative;
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+}
+
+/* Translucent red halo that sits behind the stop button and scales with the
+   live microphone level (scale set inline from audioLevel). Radius follows the
+   ActionIcon's own radius so the halo matches the button's rounded-square
+   outline instead of being a circle. */
+.pulse {
+  position: absolute;
+  inset: 0;
+  border-radius: var(--mantine-radius-default);
+  background-color: var(--mantine-color-red-5);
+  opacity: 0.35;
+  transform-origin: center;
+  transform: scale(1);
+  transition: transform 90ms linear;
+  pointer-events: none;
+  will-change: transform;
+  z-index: 0;
+}
--- a/apps/client/src/features/dictation/components/mic-button.tsx
+++ b/apps/client/src/features/dictation/components/mic-button.tsx
@@ -3,6 +3,8 @@ import { ActionIcon, Loader, Tooltip } from "@mantine/core";
 import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react";
 import { useTranslation } from "react-i18next";
 import { useDictation } from "@/features/dictation/hooks/use-dictation";
+import { useStreamingDictation } from "@/features/dictation/hooks/use-streaming-dictation";
+import classes from "./mic-button.module.css";

 interface MicButtonProps {
  onText: (text: string) => void;
@@ -11,6 +13,14 @@ interface MicButtonProps {
  // Mantine ActionIcon size token; "lg" matches the chat composer, "md" the
  // editor toolbar.
  size?: "md" | "lg";
+  // Optional Mantine color override for the idle/transcribing states (the
+  // recording state stays red). Defaults to the theme primary when omitted.
+  color?: string;
+  // Optional explicit glyph size override; defaults to the size-token value.
+  iconSize?: number;
+  // When true, use the streaming (Silero-VAD) dictation controller, which emits
+  // text progressively as the user pauses; otherwise use the batch controller.
+  streaming?: boolean;
 }

 /**
@@ -24,35 +34,64 @@ export const MicButton: FC<MicButtonProps> = ({
  onStart,
  disabled,
  size = "lg",
+  color,
+  iconSize,
+  streaming = false,
 }) => {
  const { t } = useTranslation();
-  const { status, start, stop } = useDictation({ onText, onStart });
-  const iconSize = size === "lg" ? 18 : 16;
+  // Call BOTH hooks unconditionally to respect the rules of hooks: which one is
+  // active is a render-time choice, but both must be invoked every render. This
+  // is safe because both controllers are inert until start() is called — neither
+  // opens the mic on mount — so the unused one costs nothing.
+  const batchCtl = useDictation({ onText, onStart });
+  const streamingCtl = useStreamingDictation({ onText, onStart });
+  const ctl = streaming ? streamingCtl : batchCtl;
+  const { status, start, stop, audioLevel } = ctl;
+  const resolvedIconSize = iconSize ?? (size === "lg" ? 18 : 16);

  if (status === "recording") {
+    // Live volume-driven halo: the scale follows the current mic level.
+    const haloScale = 1 + Math.min(1, audioLevel) * 0.9;
    return (
      <Tooltip label={t("Stop recording")} withArrow>
-        <ActionIcon
-          size={size}
-          color="red"
-          variant="light"
-          onClick={stop}
-          aria-label={t("Stop recording")}
-        >
-          <IconPlayerStopFilled size={iconSize} />
-        </ActionIcon>
+        <span className={classes.recordingWrap}>
+          <span
+            className={classes.pulse}
+            style={{ transform: `scale(${haloScale})` }}
+            aria-hidden="true"
+          />
+          <ActionIcon
+            size={size}
+            color="red"
+            variant="light"
+            onClick={stop}
+            aria-label={t("Stop recording")}
+            style={{ position: "relative", zIndex: 1 }}
+          >
+            <IconPlayerStopFilled size={resolvedIconSize} />
+          </ActionIcon>
+        </span>
      </Tooltip>
    );
  }

-  if (status === "transcribing" || status === "error") {
+  if (
+    status === "loading" ||
+    status === "transcribing" ||
+    status === "error"
+  ) {
+    // "loading" (streaming hook fetching the VAD model on first use) shows the
+    // same spinner+disabled state so the first click is visibly acknowledged and
+    // a confusing second click can't fire while the model loads.
+    const label = status === "loading" ? t("Preparing…") : t("Transcribing…");
    return (
-      <Tooltip label={t("Transcribing…")} withArrow>
+      <Tooltip label={label} withArrow>
        <ActionIcon
          size={size}
          variant="subtle"
+          color={color}
          disabled
-          aria-label={t("Transcribing…")}
+          aria-label={label}
        >
          <Loader size="xs" />
        </ActionIcon>
@@ -65,11 +104,12 @@ export const MicButton: FC<MicButtonProps> = ({
      <ActionIcon
        size={size}
        variant="subtle"
+        color={color}
        onClick={() => void start()}
        disabled={disabled}
        aria-label={t("Start dictation")}
      >
-        <IconMicrophone size={iconSize} />
+        <IconMicrophone size={resolvedIconSize} />
      </ActionIcon>
    </Tooltip>
  );
--- a/apps/client/src/features/dictation/hooks/use-dictation.ts
+++ b/apps/client/src/features/dictation/hooks/use-dictation.ts
@@ -3,7 +3,15 @@ import { notifications } from "@mantine/notifications";
 import { useTranslation } from "react-i18next";
 import { transcribeAudio } from "@/features/dictation/services/dictation-service";

-export type DictationStatus = "idle" | "recording" | "transcribing" | "error";
+// "loading" is set only by the streaming hook while it lazily loads the VAD
+// model on first use; the batch hook never sets it. It exists so the streaming
+// hook and the mic button can show immediate feedback during that load.
+export type DictationStatus =
+  | "idle"
+  | "recording"
+  | "transcribing"
+  | "error"
+  | "loading";

 interface UseDictationOptions {
  onText: (text: string) => void;
@@ -16,6 +24,8 @@ interface UseDictationResult {
  start: () => Promise<void>;
  stop: () => void;
  cancel: () => void;
+  // Smoothed live microphone level in the 0..1 range while recording (0 when idle).
+  audioLevel: number;
 }

 // Candidate container/codec combinations in preference order. The first one the
@@ -56,6 +66,7 @@ export function useDictation(
 ): UseDictationResult {
  const { t } = useTranslation();
  const [status, setStatus] = useState<DictationStatus>("idle");
+  const [audioLevel, setAudioLevel] = useState(0);

  // Keep the latest callbacks in a ref so the recorder's onstop closure always
  // calls the current handlers without re-creating the recorder.
@@ -70,6 +81,15 @@ export function useDictation(
  const canceledRef = useRef(false);
  const startingRef = useRef(false);

+  // Web Audio metering: derives a live input level from the captured stream.
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const analyserRef = useRef<AnalyserNode | null>(null);
+  const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
+  const rafRef = useRef<number | null>(null);
+  // Exponentially smoothed level, and the last value pushed to React state.
+  const smoothedLevelRef = useRef(0);
+  const emittedLevelRef = useRef(0);
+
  const clearTimer = useCallback(() => {
    if (timerRef.current !== null) {
      clearTimeout(timerRef.current);
@@ -82,6 +102,91 @@ export function useDictation(
    streamRef.current = null;
  }, []);

+  // Tear the audio meter down fully. Safe to call multiple times and on any exit
+  // path; defensive try/catch so cleanup never throws.
+  const stopMeter = useCallback(() => {
+    // Cancel the rAF first so getByteTimeDomainData can't run on a closed context.
+    if (rafRef.current !== null) {
+      cancelAnimationFrame(rafRef.current);
+      rafRef.current = null;
+    }
+    try {
+      sourceRef.current?.disconnect();
+      sourceRef.current = null;
+      analyserRef.current = null;
+      if (audioContextRef.current && audioContextRef.current.state !== "closed") {
+        void audioContextRef.current.close();
+      }
+      audioContextRef.current = null;
+    } catch (err) {
+      // Cleanup must never throw; just log for diagnosis.
+      console.warn("[dictation] audio meter teardown failed", err);
+    }
+    smoothedLevelRef.current = 0;
+    emittedLevelRef.current = 0;
+    setAudioLevel(0);
+  }, []);
+
+  // Set up Web Audio metering on the already-captured stream. Reuses the existing
+  // MediaStream — never requests a second mic. Failure here must not break
+  // recording: on any error we warn and return, leaving the recorder running.
+  const startMeter = useCallback((stream: MediaStream) => {
+    try {
+      const Ctor =
+        window.AudioContext ||
+        (window as unknown as { webkitAudioContext?: typeof AudioContext })
+          .webkitAudioContext;
+      if (!Ctor) return;
+
+      const audioContext = new Ctor();
+      // Some browsers start the context suspended; resume so the loop produces
+      // data. Swallow rejection (e.g. context already closed by a fast
+      // start/stop race) to avoid an unhandled promise rejection.
+      audioContext.resume().catch(() => {});
+      const source = audioContext.createMediaStreamSource(stream);
+      const analyser = audioContext.createAnalyser();
+      analyser.fftSize = 512;
+      analyser.smoothingTimeConstant = 0.5;
+      // Connect ONLY to the analyser — never to destination, which would echo the
+      // mic back to the speakers.
+      source.connect(analyser);
+
+      audioContextRef.current = audioContext;
+      sourceRef.current = source;
+      analyserRef.current = analyser;
+
+      // Allocate the time-domain buffer once and reuse it on every tick.
+      const data = new Uint8Array(analyser.fftSize);
+
+      const tick = () => {
+        const a = analyserRef.current;
+        if (!a) return;
+        a.getByteTimeDomainData(data);
+        // RMS of the centered waveform (samples are 0..255, midpoint 128).
+        let sumSquares = 0;
+        for (let i = 0; i < data.length; i++) {
+          const v = (data[i] - 128) / 128;
+          sumSquares += v * v;
+        }
+        const rms = Math.sqrt(sumSquares / data.length);
+        // Boost + clamp so normal speech maps to a visible 0..1 range.
+        const level = Math.min(1, rms * 3);
+        // Exponential smoothing to avoid jitter.
+        smoothedLevelRef.current = smoothedLevelRef.current * 0.8 + level * 0.2;
+        // Throttle React re-renders: only push when it changed meaningfully.
+        if (Math.abs(smoothedLevelRef.current - emittedLevelRef.current) > 0.01) {
+          emittedLevelRef.current = smoothedLevelRef.current;
+          setAudioLevel(smoothedLevelRef.current);
+        }
+        rafRef.current = requestAnimationFrame(tick);
+      };
+      rafRef.current = requestAnimationFrame(tick);
+    } catch (err) {
+      // Web Audio unavailable or threw: recording continues without the meter.
+      console.warn("[dictation] audio meter unavailable", err);
+    }
+  }, []);
+
  const start = useCallback(async (): Promise<void> => {
    // Synchronous live guard: status is stale between renders, so also block on
    // refs to prevent a double-click from opening two MediaStreams (the first
@@ -163,8 +268,9 @@ export function useDictation(
      const recordedMime = recorder.mimeType || mimeType || "audio/webm";
      const wasCanceled = canceledRef.current;

-      // Stop the mic tracks regardless of how we got here.
+      // Stop the mic tracks and the audio meter regardless of how we got here.
      stopTracks();
+      stopMeter();
      recorderRef.current = null;

      if (wasCanceled) {
@@ -237,34 +343,49 @@ export function useDictation(
    // Recording has truly begun; release the synchronous start guard.
    startingRef.current = false;

+    // Start the live audio meter on the stream we already acquired.
+    startMeter(stream);
+
    const maxDurationMs = optionsRef.current.maxDurationMs ?? 120000;
    timerRef.current = setTimeout(() => {
      if (recorderRef.current?.state === "recording") {
        recorderRef.current.stop();
      }
    }, maxDurationMs);
-  }, [status, t, clearTimer, stopTracks]);
+  }, [status, t, clearTimer, stopTracks, startMeter, stopMeter]);

  const stop = useCallback((): void => {
    clearTimer();
    const recorder = recorderRef.current;
    if (recorder && recorder.state === "recording") {
+      // Normal path: onstop tears down tracks + meter and runs transcription.
      recorder.stop();
+    } else {
+      // No live recorder (e.g. the track ended on its own): tear everything
+      // down directly so the meter/AudioContext and stream don't leak, and
+      // recover the UI to idle.
+      stopTracks();
+      stopMeter();
+      recorderRef.current = null;
+      chunksRef.current = [];
+      setStatus("idle");
    }
-  }, [clearTimer]);
+  }, [clearTimer, stopTracks, stopMeter]);

  const cancel = useCallback((): void => {
    clearTimer();
    canceledRef.current = true;
    const recorder = recorderRef.current;
    if (recorder && recorder.state === "recording") {
-      // onstop sees canceledRef and skips transcription; it also stops tracks.
+      // onstop sees canceledRef and skips transcription; it also stops tracks
+      // and the meter.
      recorder.stop();
    } else {
      stopTracks();
+      stopMeter();
    }
    setStatus("idle");
-  }, [clearTimer, stopTracks]);
+  }, [clearTimer, stopTracks, stopMeter]);

  // Clean up on unmount: stop any live recorder/stream and clear the timers.
  useEffect(() => {
@@ -280,8 +401,9 @@ export function useDictation(
        recorder.stop();
      }
      stopTracks();
+      stopMeter();
    };
-  }, [clearTimer, stopTracks]);
+  }, [clearTimer, stopTracks, stopMeter]);

-  return { status, start, stop, cancel };
+  return { status, start, stop, cancel, audioLevel };
 }
--- a/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts
+++ b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts
@@ -0,0 +1,474 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { notifications } from "@mantine/notifications";
+import { useTranslation } from "react-i18next";
+import { transcribeAudio } from "@/features/dictation/services/dictation-service";
+import { encodeWavPcm16 } from "@/features/dictation/utils/encode-wav";
+import type { DictationStatus } from "@/features/dictation/hooks/use-dictation";
+
+// Lazily-imported MicVAD type. The runtime import happens inside start() so the
+// heavy onnxruntime-web / Silero model is code-split out of the main bundle and
+// only fetched when the user actually begins dictation.
+type MicVADInstance = {
+  start: () => Promise<void>;
+  pause: () => Promise<void>;
+  destroy: () => Promise<void>;
+};
+
+interface UseStreamingDictationOptions {
+  onText: (text: string) => void;
+  onStart?: () => void;
+  maxDurationMs?: number;
+}
+
+interface UseStreamingDictationResult {
+  status: DictationStatus;
+  start: () => Promise<void>;
+  stop: () => void;
+  cancel: () => void;
+  // Smoothed live speech level in the 0..1 range while recording (0 when idle).
+  audioLevel: number;
+}
+
+// Sample rate of the audio MicVAD hands to onSpeechEnd (Silero VAD runs at 16k).
+const VAD_SAMPLE_RATE = 16000;
+
+// Asset paths for the VAD worklet/Silero model and the onnxruntime-web WASM
+// binaries. vad-web 0.0.30's default asset path is "./" (relative to the current
+// page URL), NOT a CDN — in this SPA that request hits the client-side catch-all
+// route and returns index.html (text/html), so the onnxruntime ESM/wasm backend
+// fails to initialize. We instead self-host the four needed files (the vad-web
+// worklet + `silero_vad_v5.onnx` model and the onnxruntime-web `*.jsep.mjs`/
+// `*.jsep.wasm`) under `apps/client/public/vad/` — populated by
+// `scripts/copy-vad-assets.mjs`, which runs before `dev`/`build` — and point both
+// paths at the fixed absolute "/vad/".
+const VAD_BASE_ASSET_PATH: string | undefined = "/vad/";
+const VAD_ONNX_WASM_BASE_PATH: string | undefined = "/vad/";
+
+/**
+ * Streaming variant of useDictation. Detects speech with a real (Silero) VAD and,
+ * each time the speaker pauses, cuts that speech segment and POSTs it to the same
+ * batch transcription endpoint, so text appears progressively as the user speaks.
+ *
+ * Returns the SAME shape as useDictation ({ status, start, stop, cancel,
+ * audioLevel }) so MicButton can use either interchangeably. Refs hold the live
+ * VAD instance / counters / timer so component re-renders never lose them, and
+ * every exit path destroys the VAD and stops the MediaStream.
+ */
+export function useStreamingDictation(
+  options: UseStreamingDictationOptions,
+): UseStreamingDictationResult {
+  const { t } = useTranslation();
+  const [status, setStatus] = useState<DictationStatus>("idle");
+  const [audioLevel, setAudioLevel] = useState(0);
+
+  // Keep the latest callbacks in a ref so async VAD/HTTP closures always call the
+  // current handlers without re-creating the VAD.
+  const optionsRef = useRef(options);
+  optionsRef.current = options;
+
+  const vadRef = useRef<MicVADInstance | null>(null);
+  // AudioContext we create+resume inside the click gesture and inject into
+  // MicVAD (see start()). We own it; MicVAD does not close an injected context.
+  const audioContextRef = useRef<AudioContext | null>(null);
+  const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const canceledRef = useRef(false);
+  const startingRef = useRef(false);
+  // True while a recording session is active (VAD listening). Used to ignore late
+  // VAD callbacks that fire after stop()/cancel().
+  const activeRef = useRef(false);
+
+  // In-order emission: each segment gets a monotonically increasing seq when its
+  // speech ends; completed transcriptions are buffered by seq and flushed in
+  // order so out-of-order HTTP responses can't scramble the text.
+  const nextSeqRef = useRef(0);
+  const nextEmitSeqRef = useRef(0);
+  const resultsRef = useRef<Map<number, string>>(new Map());
+  // Number of transcription requests still in flight.
+  const inFlightRef = useRef(0);
+  // Session epoch: bumped when a NEW session starts (start) or everything is
+  // hard-discarded (cancel). Each in-flight request captures the epoch at send
+  // time; if the epoch has since changed, the request is stale and its
+  // then/catch/finally are skipped so old text can't leak into a new session and
+  // the in-flight counter can't be driven negative across sessions.
+  const epochRef = useRef(0);
+
+  // Exponentially smoothed speech level, and the last value pushed to React state.
+  const smoothedLevelRef = useRef(0);
+  const emittedLevelRef = useRef(0);
+
+  const clearTimer = useCallback(() => {
+    if (timerRef.current !== null) {
+      clearTimeout(timerRef.current);
+      timerRef.current = null;
+    }
+  }, []);
+
+  // Reset the level meter back to zero (refs + React state).
+  const resetLevel = useCallback(() => {
+    smoothedLevelRef.current = 0;
+    emittedLevelRef.current = 0;
+    setAudioLevel(0);
+  }, []);
+
+  // Destroy the live VAD instance (which also releases the mic stream and audio
+  // context it created). Safe to call multiple times and on any exit path;
+  // defensive try/catch so teardown never throws.
+  const destroyVad = useCallback(() => {
+    const vad = vadRef.current;
+    vadRef.current = null;
+    if (vad) {
+      try {
+        // destroy() pauses + tears down the worklet/stream/context internally.
+        // It returns a promise, so attach a .catch too: the surrounding
+        // try/catch only catches synchronous throws, and a rejected destroy()
+        // would otherwise surface as an unhandled rejection.
+        void vad
+          .destroy()
+          .catch((err) =>
+            console.warn("[dictation] VAD teardown failed", err),
+          );
+      } catch (err) {
+        // Cleanup must never throw; just log for diagnosis.
+        console.warn("[dictation] VAD teardown failed", err);
+      }
+    }
+  }, []);
+
+  // Decide the status once recording has ended: stay "transcribing" while
+  // requests are in flight, otherwise return to "idle".
+  const settleAfterStop = useCallback(() => {
+    if (inFlightRef.current > 0) {
+      setStatus("transcribing");
+    } else {
+      setStatus("idle");
+    }
+  }, []);
+
+  // Drain the in-order result buffer: while the next expected seq is ready, trim
+  // it, emit it if non-empty, and advance. Called after every resolved request.
+  const drainResults = useCallback(() => {
+    const results = resultsRef.current;
+    while (results.has(nextEmitSeqRef.current)) {
+      const text = results.get(nextEmitSeqRef.current)!;
+      results.delete(nextEmitSeqRef.current);
+      nextEmitSeqRef.current += 1;
+      const trimmed = text.trim();
+      // Whisper often returns a leading space; emit the trimmed value.
+      if (trimmed.length > 0) optionsRef.current.onText(trimmed);
+    }
+  }, []);
+
+  // Map a transcription error to a user-facing message, mirroring the batch hook.
+  const transcriptionErrorMessage = useCallback(
+    (err: unknown): string => {
+      const resp = (
+        err as { response?: { status?: number; data?: { message?: string } } }
+      )?.response;
+      const serverMsg = resp?.data?.message;
+      if (serverMsg && serverMsg.trim().length > 0) {
+        // The server already explains the cause (e.g. provider 404, bad format,
+        // STT not configured) — show it verbatim.
+        return serverMsg;
+      }
+      if (resp?.status === 503 || resp?.status === 403) {
+        return t("Voice dictation is not configured");
+      }
+      return `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
+    },
+    [t],
+  );
+
+  // Handle one ended speech segment: encode to WAV and transcribe. Results are
+  // buffered by seq and flushed in order. A single failed segment does NOT kill
+  // the session: log + one notification, then advance past that seq so later
+  // segments still flush.
+  const handleSegment = useCallback(
+    (audio: Float32Array) => {
+      const seq = nextSeqRef.current;
+      nextSeqRef.current += 1;
+      inFlightRef.current += 1;
+      // Capture the epoch for this request synchronously at send time.
+      const epoch = epochRef.current;
+
+      const wavBlob = encodeWavPcm16(audio, VAD_SAMPLE_RATE);
+      void transcribeAudio(wavBlob, "speech.wav")
+        .then((text) => {
+          // Stale request from a previous session: drop it without touching any
+          // current-session state.
+          if (epoch !== epochRef.current) return;
+          // Defend against a non-string server value before drainResults trims.
+          resultsRef.current.set(seq, typeof text === "string" ? text : "");
+          drainResults();
+        })
+        .catch((err: unknown) => {
+          if (epoch !== epochRef.current) return;
+          // Log the full error for diagnosis (status + body + stack).
+          console.error("[dictation] segment transcription failed", err);
+          notifications.show({
+            color: "red",
+            message: transcriptionErrorMessage(err),
+          });
+          // Skip this seq so later segments can still flush in order.
+          if (nextEmitSeqRef.current === seq) {
+            nextEmitSeqRef.current += 1;
+            drainResults();
+          } else {
+            resultsRef.current.set(seq, "");
+            drainResults();
+          }
+        })
+        .finally(() => {
+          if (epoch !== epochRef.current) return;
+          inFlightRef.current -= 1;
+          // If recording already stopped, flip to idle once everything drained.
+          if (!activeRef.current && inFlightRef.current === 0) {
+            setStatus("idle");
+          }
+        });
+    },
+    [drainResults, transcriptionErrorMessage],
+  );
+
+  const start = useCallback(async (): Promise<void> => {
+    // Synchronous live guard: status is stale between renders, so also block on
+    // refs to prevent a double-click from creating two VAD instances (the first
+    // would leak its mic stream).
+    if (startingRef.current || vadRef.current || activeRef.current) return;
+    if (status !== "idle") return;
+    startingRef.current = true;
+
+    // Notify the caller right when dictation begins (before any async work) so the
+    // editor can snapshot the caret position.
+    optionsRef.current.onStart?.();
+
+    // Reset per-session in-order emission state. Bump the epoch so any request
+    // still in flight from a previous (stopped) session becomes stale and its
+    // then/catch/finally are skipped — it can neither emit old text into this
+    // new session nor decrement this session's freshly-zeroed in-flight counter.
+    epochRef.current += 1;
+    canceledRef.current = false;
+    nextSeqRef.current = 0;
+    nextEmitSeqRef.current = 0;
+    resultsRef.current = new Map();
+    inFlightRef.current = 0;
+    resetLevel();
+
+    // Create and resume the AudioContext NOW, inside the click gesture, before
+    // the (first-time-slow) model load below. A context first touched outside a
+    // user gesture stays "suspended" and the VAD audio worklet never runs — that
+    // is exactly why the first click did nothing and only the second (model
+    // already cached, so MicVAD.new was fast enough to create the context inside
+    // the gesture) started recording. We own this context and inject it into
+    // MicVAD (which then will NOT close it); it is reused across start/stop and
+    // closed only on unmount.
+    const AudioCtor =
+      window.AudioContext ||
+      (window as unknown as { webkitAudioContext?: typeof AudioContext })
+        .webkitAudioContext;
+    if (AudioCtor && !audioContextRef.current) {
+      audioContextRef.current = new AudioCtor();
+    }
+    // Resume within the gesture; swallow rejection (e.g. already running/closed).
+    void audioContextRef.current?.resume().catch(() => {});
+    // Show immediate feedback while the model loads (see Part B).
+    setStatus("loading");
+
+    let vad: MicVADInstance;
+    try {
+      // Lazy import so the heavy onnx model/worklet are only fetched on first use
+      // and code-split out of the main bundle.
+      const { MicVAD } = await import("@ricky0123/vad-web");
+
+      vad = await MicVAD.new({
+        // Silero v5 model (smaller/faster than the legacy model).
+        model: "v5",
+        // vad-web 0.0.30 defaults startOnLoad:true, which opens the mic (calls
+        // getUserMedia) inside new() and leaves the later vad.start() a no-op —
+        // making its mic-permission error handling dead code. Force it off so the
+        // mic is opened only by the explicit vad.start() below, where the real
+        // getUserMedia errors are caught and mapped.
+        startOnLoad: false,
+        // Inject the AudioContext we created+resumed inside the click gesture so
+        // the VAD worklet runs on a "running" context. When provided, the library
+        // uses it and does NOT take ownership/close it.
+        ...(audioContextRef.current
+          ? { audioContext: audioContextRef.current }
+          : {}),
+        // Only pass asset paths when defined; otherwise the library uses its
+        // bundled CDN defaults.
+        ...(VAD_BASE_ASSET_PATH !== undefined
+          ? { baseAssetPath: VAD_BASE_ASSET_PATH }
+          : {}),
+        ...(VAD_ONNX_WASM_BASE_PATH !== undefined
+          ? { onnxWASMBasePath: VAD_ONNX_WASM_BASE_PATH }
+          : {}),
+        // --- VAD tuning (all tunable) ---
+        // Probability over which a frame counts as speech.
+        positiveSpeechThreshold: 0.5,
+        // Probability under which a frame counts as non-speech (~0.15 below the
+        // positive threshold, per Silero guidance).
+        negativeSpeechThreshold: 0.35,
+        // Silence to wait through before ending a segment (the "don't cut
+        // immediately" delay). Each ended segment is ONE transcription request, so
+        // cutting on short gaps over-fragments normal speech into a flood of tiny
+        // requests (and trips the server's per-user rate limit). Wait ~1.5s — a
+        // real sentence/thought boundary — so request count tracks actual pauses,
+        // not every inter-word gap. Higher = fewer requests but more latency
+        // before text appears. NOTE: vad-web 0.0.30 takes this in ms, not frames
+        // (one Silero frame is ~32ms at 16k).
+        redemptionMs: 1500,
+        // Audio kept before speech start (left padding so the first word isn't
+        // clipped) — ~0.3s.
+        preSpeechPadMs: 320,
+        // Ignore sub-100ms blips like clicks.
+        minSpeechMs: 96,
+        onFrameProcessed: (probabilities: { isSpeech: number }) => {
+          // Drive the level meter from the speech probability. Light exponential
+          // smoothing + a throttle so React state isn't updated every frame; this
+          // powers the existing button halo. Reuses the VAD's own frame
+          // probabilities — no second AudioContext/AnalyserNode.
+          if (!activeRef.current) return;
+          const level = Math.min(1, Math.max(0, probabilities.isSpeech));
+          smoothedLevelRef.current = smoothedLevelRef.current * 0.8 + level * 0.2;
+          if (Math.abs(smoothedLevelRef.current - emittedLevelRef.current) > 0.01) {
+            emittedLevelRef.current = smoothedLevelRef.current;
+            setAudioLevel(smoothedLevelRef.current);
+          }
+        },
+        onSpeechStart: () => {
+          // No-op: the segment is only handled once it ends.
+        },
+        onSpeechEnd: (audio: Float32Array) => {
+          // A pause was detected — cut this segment and transcribe it. Ignore late
+          // callbacks that fire after stop()/cancel().
+          if (!activeRef.current || canceledRef.current) return;
+          handleSegment(audio);
+        },
+      });
+    } catch (err) {
+      // With startOnLoad:false, new() loads the model/worklet/wasm but does NOT
+      // open the mic, so a throw here is an asset/init failure (model fetch,
+      // worklet, onnxruntime wasm), not a mic-permission error. Map it as a
+      // generic "could not start" with the underlying detail. (The mic-permission
+      // name checks are kept in the vad.start() catch below, where getUserMedia
+      // actually runs.)
+      console.error("[dictation] VAD init failed", err);
+      const detail = (err as { message?: string })?.message ?? String(err);
+      notifications.show({
+        color: "red",
+        message: `${t("Could not start recording")}: ${detail}`,
+      });
+      // Defensive: if MicVAD.new partially succeeded before throwing, make sure we
+      // don't leak it.
+      destroyVad();
+      setStatus("idle");
+      startingRef.current = false;
+      return;
+    }
+
+    vadRef.current = vad;
+    // Accept frames once start() resolves; the VAD callbacks already guard on
+    // activeRef, so setting it before start() is safe.
+    activeRef.current = true;
+
+    try {
+      // With startOnLoad:false this is where getUserMedia actually runs, so map
+      // mic-permission errors here the same way the batch hook does; otherwise
+      // fall back to a generic "could not start" message.
+      await vad.start();
+    } catch (err) {
+      // Always log the full error for diagnosis (name, message, stack).
+      console.error("[dictation] VAD.start failed", err);
+      const name = (err as { name?: string })?.name;
+      const detail = (err as { message?: string })?.message ?? String(err);
+      let message: string;
+      if (name === "NotAllowedError" || name === "SecurityError") {
+        message = t("Microphone access denied");
+      } else if (name === "NotFoundError" || name === "OverconstrainedError") {
+        message = t("No microphone found");
+      } else if (name === "NotReadableError" || name === "AbortError") {
+        message = t("Microphone is unavailable or already in use");
+      } else {
+        message = `${t("Could not start recording")}: ${detail}`;
+      }
+      notifications.show({ color: "red", message });
+      activeRef.current = false;
+      destroyVad();
+      setStatus("idle");
+      startingRef.current = false;
+      return;
+    }
+
+    setStatus("recording");
+    // Recording has truly begun; release the synchronous start guard.
+    startingRef.current = false;
+
+    // Optional overall safety cap: auto-stop after maxDurationMs like the batch
+    // hook does.
+    const maxDurationMs = optionsRef.current.maxDurationMs ?? 120000;
+    timerRef.current = setTimeout(() => {
+      if (activeRef.current) stopRef.current();
+    }, maxDurationMs);
+  }, [status, t, resetLevel, destroyVad, handleSegment]);
+
+  const stop = useCallback((): void => {
+    clearTimer();
+    if (!activeRef.current && !vadRef.current) {
+      // Nothing is running; make sure the UI is idle.
+      setStatus("idle");
+      return;
+    }
+    // Mark inactive first so late onSpeechEnd/onFrameProcessed callbacks are
+    // ignored. Any speech segment that has NOT yet ended (user clicks Stop
+    // mid-utterance) is dropped — acceptable for v1; users normally pause before
+    // stopping.
+    activeRef.current = false;
+    destroyVad();
+    resetLevel();
+    settleAfterStop();
+  }, [clearTimer, destroyVad, resetLevel, settleAfterStop]);
+
+  // Keep stop() reachable from the maxDuration timer closure (which is created
+  // before stop is defined) without re-creating the VAD.
+  const stopRef = useRef(stop);
+  stopRef.current = stop;
+
+  const cancel = useCallback((): void => {
+    clearTimer();
+    canceledRef.current = true;
+    activeRef.current = false;
+    // Hard discard: bump the epoch so any in-flight request becomes stale and is
+    // ignored the moment it resolves (no emit, no counter touch).
+    epochRef.current += 1;
+    // Drop pending results / queue; in-flight requests will resolve into a now-
+    // empty buffer and be ignored.
+    resultsRef.current = new Map();
+    nextSeqRef.current = 0;
+    nextEmitSeqRef.current = 0;
+    inFlightRef.current = 0;
+    destroyVad();
+    resetLevel();
+    setStatus("idle");
+  }, [clearTimer, destroyVad, resetLevel]);
+
+  // Clean up on unmount: destroy the VAD, stop the mic stream, clear the timer.
+  // Defensive try/catch lives inside destroyVad so teardown never throws.
+  useEffect(() => {
+    return () => {
+      clearTimer();
+      activeRef.current = false;
+      canceledRef.current = true;
+      destroyVad();
+      // Close the AudioContext we own (MicVAD never closes an injected one).
+      if (
+        audioContextRef.current &&
+        audioContextRef.current.state !== "closed"
+      ) {
+        void audioContextRef.current.close().catch(() => {});
+      }
+      audioContextRef.current = null;
+    };
+  }, [clearTimer, destroyVad]);
+
+  return { status, start, stop, cancel, audioLevel };
+}
--- a/apps/client/src/features/dictation/utils/encode-wav.test.ts
+++ b/apps/client/src/features/dictation/utils/encode-wav.test.ts
@@ -0,0 +1,87 @@
+import { describe, it, expect } from "vitest";
+import { encodeWavPcm16 } from "./encode-wav";
+
+// Contract tests for `encodeWavPcm16` (encode-wav.ts). The dictation feature
+// streams microphone audio as mono 16-bit PCM WAV to the STT endpoint, which
+// whitelists audio/wav. A regression in the WAV header or PCM16 clamping would
+// produce audio the server cannot decode (silence / garbled transcripts), so we
+// assert the canonical 44-byte header layout and the sample quantisation rails.
+
+// Read a DataView back out of a Blob. jsdom's Blob does not implement
+// `.arrayBuffer()`, so go through FileReader.readAsArrayBuffer instead.
+function readView(blob: Blob): Promise<DataView> {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.onload = () => resolve(new DataView(reader.result as ArrayBuffer));
+    reader.onerror = () => reject(reader.error);
+    reader.readAsArrayBuffer(blob);
+  });
+}
+
+function readStr(view: DataView, offset: number, length: number): string {
+  let s = "";
+  for (let i = 0; i < length; i++) s += String.fromCharCode(view.getUint8(offset + i));
+  return s;
+}
+
+describe("encodeWavPcm16", () => {
+  it("writes the canonical RIFF/WAVE/fmt /data tags", async () => {
+    const view = await readView(encodeWavPcm16(new Float32Array(4)));
+    expect(readStr(view, 0, 4)).toBe("RIFF");
+    expect(readStr(view, 8, 4)).toBe("WAVE");
+    expect(readStr(view, 12, 4)).toBe("fmt ");
+    expect(readStr(view, 36, 4)).toBe("data");
+  });
+
+  it("writes a PCM fmt chunk (size=16, format=1, mono, 16-bit)", async () => {
+    const samples = new Float32Array(10);
+    const view = await readView(encodeWavPcm16(samples));
+    expect(view.getUint32(16, true)).toBe(16); // fmt chunk size
+    expect(view.getUint16(20, true)).toBe(1); // audioFormat = PCM
+    expect(view.getUint16(22, true)).toBe(1); // channels = mono
+    expect(view.getUint16(34, true)).toBe(16); // bits per sample
+  });
+
+  it("derives byteRate, blockAlign and dataSize from the sample rate and length", async () => {
+    const sampleRate = 16000;
+    const samples = new Float32Array(10);
+    const view = await readView(encodeWavPcm16(samples, sampleRate));
+    expect(view.getUint32(28, true)).toBe(sampleRate * 2); // byteRate = sampleRate * 2
+    expect(view.getUint16(32, true)).toBe(2); // blockAlign = 2 (mono * 16-bit)
+    expect(view.getUint32(40, true)).toBe(samples.length * 2); // dataSize
+    expect(view.getUint32(4, true)).toBe(36 + samples.length * 2); // RIFF chunk size
+  });
+
+  it("defaults the sample rate to 16000 at offset 24", async () => {
+    const view = await readView(encodeWavPcm16(new Float32Array(2)));
+    expect(view.getUint32(24, true)).toBe(16000);
+  });
+
+  it("writes the overridden sample rate at offset 24 (8000 / 48000)", async () => {
+    const view8 = await readView(encodeWavPcm16(new Float32Array(2), 8000));
+    expect(view8.getUint32(24, true)).toBe(8000);
+    expect(view8.getUint32(28, true)).toBe(8000 * 2); // byteRate follows the override
+
+    const view48 = await readView(encodeWavPcm16(new Float32Array(2), 48000));
+    expect(view48.getUint32(24, true)).toBe(48000);
+    expect(view48.getUint32(28, true)).toBe(48000 * 2);
+  });
+
+  it("clamps and quantises PCM16 samples to the asymmetric rails", async () => {
+    // +1.0 -> 32767 (clamped>=0 uses *0x7fff), -1.0 -> -32768 (clamped<0 uses *0x8000),
+    // 0 -> 0, and out-of-range values are clamped to the rails first.
+    const samples = new Float32Array([1.0, -1.0, 0, 1.5, -1.5]);
+    const view = await readView(encodeWavPcm16(samples));
+    expect(view.getInt16(44 + 0 * 2, true)).toBe(32767); // +1.0
+    expect(view.getInt16(44 + 1 * 2, true)).toBe(-32768); // -1.0
+    expect(view.getInt16(44 + 2 * 2, true)).toBe(0); // 0
+    expect(view.getInt16(44 + 3 * 2, true)).toBe(32767); // +1.5 -> clamped to +1.0
+    expect(view.getInt16(44 + 4 * 2, true)).toBe(-32768); // -1.5 -> clamped to -1.0
+  });
+
+  it("produces a mono blob of length 44 + samples.length * 2", () => {
+    expect(encodeWavPcm16(new Float32Array(0)).size).toBe(44);
+    expect(encodeWavPcm16(new Float32Array(100)).size).toBe(44 + 100 * 2);
+    expect(encodeWavPcm16(new Float32Array(100)).type).toBe("audio/wav");
+  });
+});
--- a/apps/client/src/features/dictation/utils/encode-wav.ts
+++ b/apps/client/src/features/dictation/utils/encode-wav.ts
@@ -0,0 +1,32 @@
+// Encode mono Float32 PCM samples into a 16-bit PCM WAV blob (audio/wav).
+// The server STT endpoint whitelists audio/wav, so this is sent as-is.
+export function encodeWavPcm16(samples: Float32Array, sampleRate = 16000): Blob {
+  const bytesPerSample = 2;
+  const blockAlign = bytesPerSample; // mono
+  const dataSize = samples.length * bytesPerSample;
+  const buffer = new ArrayBuffer(44 + dataSize);
+  const view = new DataView(buffer);
+  const writeStr = (offset: number, s: string) => {
+    for (let i = 0; i < s.length; i++) view.setUint8(offset + i, s.charCodeAt(i));
+  };
+  writeStr(0, "RIFF");
+  view.setUint32(4, 36 + dataSize, true);
+  writeStr(8, "WAVE");
+  writeStr(12, "fmt ");
+  view.setUint32(16, 16, true); // PCM fmt chunk size
+  view.setUint16(20, 1, true); // audio format = PCM
+  view.setUint16(22, 1, true); // channels = mono
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * blockAlign, true); // byte rate
+  view.setUint16(32, blockAlign, true);
+  view.setUint16(34, 16, true); // bits per sample
+  writeStr(36, "data");
+  view.setUint32(40, dataSize, true);
+  let offset = 44;
+  for (let i = 0; i < samples.length; i++) {
+    const clamped = Math.max(-1, Math.min(1, samples[i]));
+    view.setInt16(offset, clamped < 0 ? clamped * 0x8000 : clamped * 0x7fff, true);
+    offset += 2;
+  }
+  return new Blob([buffer], { type: "audio/wav" });
+}
--- a/apps/client/src/features/editor/components/audio/audio-menu.tsx
+++ b/apps/client/src/features/editor/components/audio/audio-menu.tsx
@@ -1,23 +1,43 @@
 import { BubbleMenu as BaseBubbleMenu } from "@tiptap/react/menus";
 import { findParentNode, posToDOMRect, useEditorState } from "@tiptap/react";
-import { useCallback } from "react";
+import { useCallback, useState } from "react";
 import { Node as PMNode } from "@tiptap/pm/model";
 import { isEditorReady } from "@docmost/editor-ext";
 import {
  EditorMenuProps,
  ShouldShowProps,
 } from "@/features/editor/components/table/types/types.ts";
-import { ActionIcon, Tooltip } from "@mantine/core";
+import { ActionIcon, Loader, Tooltip } from "@mantine/core";
 import {
  IconDownload,
+  IconFileText,
  IconTrash,
 } from "@tabler/icons-react";
+import { notifications } from "@mantine/notifications";
+import { useAtomValue } from "jotai";
 import { useTranslation } from "react-i18next";
 import { getFileUrl } from "@/lib/config.ts";
+import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
+import { transcribeAudio } from "@/features/dictation/services/dictation-service";
 import classes from "../common/toolbar-menu.module.css";

+// STT-accepted audio MIME types (mirror of the server whitelist). If the
+// fetched blob's type is not one of these, we infer it from the file
+// extension so the upload's content-type is something the endpoint accepts.
+const RECOGNIZED_AUDIO_MIME = new Set([
+  "audio/webm", "audio/ogg", "audio/mp4", "audio/mpeg",
+  "audio/wav", "audio/x-wav", "audio/wave", "audio/m4a", "audio/x-m4a",
+]);
+const AUDIO_MIME_BY_EXT: Record<string, string> = {
+  mp3: "audio/mpeg", m4a: "audio/mp4", mp4: "audio/mp4",
+  wav: "audio/wav", ogg: "audio/ogg", oga: "audio/ogg", webm: "audio/webm",
+};
+
 export function AudioMenu({ editor }: EditorMenuProps) {
  const { t } = useTranslation();
+  const workspace = useAtomValue(workspaceAtom);
+  const dictationEnabled = workspace?.settings?.ai?.dictation === true;
+  const [isTranscribing, setIsTranscribing] = useState(false);

  const editorState = useEditorState({
    editor,
@@ -68,6 +88,100 @@ export function AudioMenu({ editor }: EditorMenuProps) {
    };
  }, [editor]);

+  const handleTranscribe = useCallback(async () => {
+    const src = editorState?.src;
+    if (!src || isTranscribing) return;
+
+    // The bubble menu shows for the selected audio node, so selection.from is
+    // that node's start position. Capture it now to disambiguate duplicate-src
+    // blocks after the async transcription completes.
+    const selectedPos = editor.state.selection.from;
+
+    setIsTranscribing(true);
+    try {
+      const fileUrl = getFileUrl(src);
+      // Derive a filename from the internal src for the multipart part name and
+      // for MIME inference when the fetched blob has no usable type.
+      const filename = decodeURIComponent(
+        src.split("?")[0].split("/").pop() || "audio",
+      );
+
+      const res = await fetch(fileUrl, { credentials: "include" });
+      if (!res.ok) {
+        throw new Error(`Failed to fetch audio file (HTTP ${res.status})`);
+      }
+      const blob = await res.blob();
+
+      // Ensure the upload's content-type is one the STT endpoint accepts; the
+      // server keys off the blob's MIME type.
+      let uploadBlob = blob;
+      const baseType = (blob.type || "").split(";")[0].trim().toLowerCase();
+      if (!RECOGNIZED_AUDIO_MIME.has(baseType)) {
+        const ext = filename.split(".").pop()?.toLowerCase() ?? "";
+        const inferred = AUDIO_MIME_BY_EXT[ext];
+        if (inferred) {
+          // Rebuild the blob with an accepted content-type; the server keys off it.
+          uploadBlob = new Blob([blob], { type: inferred });
+        }
+      }
+
+      const text = (await transcribeAudio(uploadBlob, filename)).trim();
+      if (text.length === 0) {
+        notifications.show({ message: t("No speech detected") });
+        return;
+      }
+
+      // Re-scan the doc at insert time so a collaborative edit during the async
+      // transcription can't misplace the text. Among audio nodes with this src
+      // (the same file may be embedded more than once), pick the occurrence
+      // closest to the originally-selected block.
+      let insertPos: number | null = null;
+      let bestDelta = Infinity;
+      editor.state.doc.descendants((node, pos) => {
+        if (node.type.name === "audio" && node.attrs.src === src) {
+          const delta = Math.abs(pos - selectedPos);
+          if (delta < bestDelta) {
+            bestDelta = delta;
+            insertPos = pos + node.nodeSize; // position just after the audio block
+          }
+        }
+        return true; // visit all nodes to find the closest match
+      });
+
+      const paragraph = { type: "paragraph", content: [{ type: "text", text }] };
+      try {
+        if (insertPos !== null) {
+          editor.chain().focus().insertContentAt(insertPos, paragraph).run();
+        } else {
+          editor.chain().focus().insertContent(paragraph).run();
+        }
+      } catch (insertErr) {
+        // A destroyed editor or out-of-bounds position must not throw; log and
+        // ignore so the transcription itself is not reported as a failure.
+        console.error("[audio-transcribe] insert failed", insertErr);
+      }
+    } catch (err) {
+      console.error("[audio-transcribe] failed", err);
+      const resp = (
+        err as { response?: { status?: number; data?: { message?: string } } }
+      )?.response;
+      const serverMsg = resp?.data?.message;
+      let message: string;
+      if (serverMsg && serverMsg.trim().length > 0) {
+        // The server already explains the cause (e.g. provider error, bad
+        // format, STT not configured) — show it verbatim.
+        message = serverMsg;
+      } else if (resp?.status === 503 || resp?.status === 403) {
+        message = t("Voice dictation is not configured");
+      } else {
+        message = `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
+      }
+      notifications.show({ color: "red", message });
+    } finally {
+      setIsTranscribing(false);
+    }
+  }, [editor, editorState?.src, isTranscribing, t]);
+
  const handleDownload = useCallback(() => {
    if (!editorState?.src) return;
    const url = getFileUrl(editorState.src);
@@ -95,6 +209,20 @@ export function AudioMenu({ editor }: EditorMenuProps) {
      shouldShow={shouldShow}
    >
      <div className={classes.toolbar}>
+        {dictationEnabled && (
+          <Tooltip position="top" label={isTranscribing ? t("Transcribing…") : t("Transcribe")} withinPortal={false}>
+            <ActionIcon
+              onClick={handleTranscribe}
+              size="lg"
+              aria-label={t("Transcribe")}
+              variant="subtle"
+              disabled={isTranscribing}
+            >
+              {isTranscribing ? <Loader size={18} /> : <IconFileText size={18} />}
+            </ActionIcon>
+          </Tooltip>
+        )}
+
        <Tooltip position="top" label={t("Download")} withinPortal={false}>
          <ActionIcon
            onClick={handleDownload}
--- a/apps/client/src/features/editor/components/code-block/code-block-view.tsx
+++ b/apps/client/src/features/editor/components/code-block/code-block-view.tsx
@@ -47,6 +47,26 @@ export default function CodeBlockView(props: NodeViewProps) {

  return (
    <NodeViewWrapper className="codeBlock">
+      {/* #146: the editable <pre><code> (contentDOM) MUST come first in the DOM.
+          With the non-editable menu rendered before it, the browser's click
+          hit-testing snapped the caret up one line. Render content first; the
+          menu is rendered after it and lifted back above visually via flex
+          `order: -1` (the `.codeBlock` wrapper is a flex column — see
+          code-block.module.css). It stays fully in flow as a full-width row
+          above the code: no overlay/absolute positioning. The second #146
+          mitigation lives in editor-paste-handler.tsx (reflowAfterPaste). */}
+      <pre
+        spellCheck="false"
+        hidden={
+          ((language === "mermaid" && !editor.isEditable) ||
+            (language === "mermaid" && !isSelected)) &&
+          node.textContent.length > 0
+        }
+      >
+        {/* @ts-ignore */}
+        <NodeViewContent as="code" className={`language-${language}`} />
+      </pre>
+
      <Group
        justify="flex-end"
        contentEditable={false}
@@ -83,18 +103,6 @@ export default function CodeBlockView(props: NodeViewProps) {
        </CopyButton>
      </Group>

-      <pre
-        spellCheck="false"
-        hidden={
-          ((language === "mermaid" && !editor.isEditable) ||
-            (language === "mermaid" && !isSelected)) &&
-          node.textContent.length > 0
-        }
-      >
-        {/* @ts-ignore */}
-        <NodeViewContent as="code" className={`language-${language}`} />
-      </pre>
-
      {language === "mermaid" && (
        <Suspense fallback={null}>
          <MermaidView props={props} />
--- a/apps/client/src/features/editor/components/code-block/code-block.module.css
+++ b/apps/client/src/features/editor/components/code-block/code-block.module.css
@@ -17,7 +17,14 @@
    justify-content: center;
 }

+/* #146: the menu now follows the <pre> in the DOM (so the editable contentDOM is
+   FIRST and click hit-testing is correct). Lift it back ABOVE the code visually
+   with flex `order` — the .codeBlock wrapper is a flex column (see code.css) —
+   so the menu still reads as a row above the code, exactly as before, without
+   sitting in-flow before the contentDOM. */
 .menuGroup {
+    order: -1;
+
    @media print {
        display: none;
    }
--- a/apps/client/src/features/editor/components/common/editor-paste-handler.test.ts
+++ b/apps/client/src/features/editor/components/common/editor-paste-handler.test.ts
@@ -0,0 +1,160 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+import {
+  collectScrollAncestors,
+  reflowAfterPaste,
+} from "./editor-paste-handler";
+
+/**
+ * Unit tests for the #146 post-paste reflow helpers. jsdom does not compute
+ * styles or layout, so we stub getComputedStyle (per element via a Map) and the
+ * scroll/overflow geometry properties (per element via Object.defineProperty).
+ * Element trees are built DETACHED from `document`, so the ancestor walk only
+ * traverses the elements we create. collectScrollAncestors always appends
+ * document.scrollingElement, so we assert on specific ancestors with
+ * toContain/not.toContain rather than exact-array equality.
+ */
+
+type Overflow = { overflowX: string; overflowY: string };
+const styleMap = new Map<Element, Overflow>();
+
+function makeScrollable(
+  overflowY: string,
+  {
+    sh = 0,
+    ch = 0,
+    sw = 0,
+    cw = 0,
+    left = 0,
+    top = 0,
+    overflowX = "visible",
+  }: {
+    sh?: number;
+    ch?: number;
+    sw?: number;
+    cw?: number;
+    left?: number;
+    top?: number;
+    overflowX?: string;
+  } = {},
+) {
+  const el = document.createElement("div");
+  Object.defineProperty(el, "scrollHeight", { configurable: true, value: sh });
+  Object.defineProperty(el, "clientHeight", { configurable: true, value: ch });
+  Object.defineProperty(el, "scrollWidth", { configurable: true, value: sw });
+  Object.defineProperty(el, "clientWidth", { configurable: true, value: cw });
+  Object.defineProperty(el, "scrollLeft", { configurable: true, value: left });
+  Object.defineProperty(el, "scrollTop", { configurable: true, value: top });
+  styleMap.set(el, { overflowX, overflowY });
+  return el;
+}
+
+// A leaf node whose parentElement is `parent`. The walk starts from
+// node.parentElement, so the parent is the first candidate ancestor.
+function makeNodeUnder(parent: HTMLElement) {
+  const node = document.createElement("div");
+  parent.appendChild(node);
+  return node;
+}
+
+// Override `document.scrollingElement` as an instance own-property (the native
+// implementation is a getter on Document.prototype, which we never touch).
+function setScrollingElement(value: Element | null) {
+  Object.defineProperty(document, "scrollingElement", {
+    configurable: true,
+    get: () => value,
+  });
+}
+
+beforeEach(() => {
+  styleMap.clear();
+  vi.stubGlobal("getComputedStyle", (el: Element) => {
+    return styleMap.get(el) ?? { overflowX: "visible", overflowY: "visible" };
+  });
+});
+
+afterEach(() => {
+  vi.unstubAllGlobals();
+  // Drop the per-test instance override so the native prototype getter shows
+  // through again (it was never modified, so no further restore is needed).
+  delete (document as any).scrollingElement;
+});
+
+describe("collectScrollAncestors", () => {
+  it("includes an overflow:overlay ancestor that overflows (macOS case)", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("overlay", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).toContain(a);
+  });
+
+  it("excludes an overflow:auto ancestor that does NOT overflow (gate fails)", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("auto", { sh: 100, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).not.toContain(a);
+  });
+
+  it("includes an overflow:auto ancestor that overflows", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("auto", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).toContain(a);
+  });
+
+  it("excludes a non-scrollable overflow even when it overflows", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("hidden", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).not.toContain(a);
+  });
+
+  it("includes an X-axis overflow:scroll ancestor that overflows horizontally", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("visible", {
+      overflowX: "scroll",
+      sw: 200,
+      cw: 100,
+    });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).toContain(a);
+  });
+
+  it("dedups: scrollingElement already in the walk is added exactly once", () => {
+    const a = makeScrollable("auto", { sh: 200, ch: 100 });
+    setScrollingElement(a);
+    const node = makeNodeUnder(a);
+    const result = collectScrollAncestors(node);
+    expect(result.filter((x) => x === a).length).toBe(1);
+  });
+
+  it("does not throw and appends nothing when scrollingElement is null", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("auto", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    const result = collectScrollAncestors(node);
+    // Only the qualifying ancestor we built — no trailing scrollingElement.
+    expect(result).toEqual([a]);
+  });
+});
+
+describe("reflowAfterPaste", () => {
+  it("runs the double rAF and nudges each ancestor with scrollTo(scrollLeft, scrollTop)", () => {
+    // Run the double-nested requestAnimationFrame synchronously.
+    vi.stubGlobal(
+      "requestAnimationFrame",
+      (cb: FrameRequestCallback) => {
+        cb(0);
+        return 0;
+      },
+    );
+    setScrollingElement(null);
+
+    const a = makeScrollable("auto", { sh: 200, ch: 100, left: 5, top: 10 });
+    const node = makeNodeUnder(a);
+    (a as any).scrollTo = vi.fn();
+
+    reflowAfterPaste({ view: { dom: node } } as any);
+
+    expect((a as any).scrollTo).toHaveBeenCalledWith(5, 10);
+  });
+});
--- a/apps/client/src/features/editor/components/common/editor-paste-handler.tsx
+++ b/apps/client/src/features/editor/components/common/editor-paste-handler.tsx
@@ -22,12 +22,81 @@ const ATTACHMENT_NODE_TYPES = [

 const ATTACHMENT_URL_RE = /\/api\/files\/([0-9a-f-]+)\//;

+const SCROLLABLE_OVERFLOW = new Set(["auto", "scroll", "overlay"]);
+
+/**
+ * Collect every scrollable ancestor of the editor DOM whose hit-test layer
+ * could be stale after a paste, plus the document scrolling element. We nudge
+ * ALL of them (a zero-delta nudge is harmless) because the real scroll container
+ * varies — a styled overflow ancestor on most pages, the document itself on
+ * others — and `overflow: overlay` (common on macOS, where #146 reproduces)
+ * must count as scrollable too. Called only AFTER the paste has committed, so
+ * `scrollHeight > clientHeight` reflects the inserted content.
+ */
+export function collectScrollAncestors(node: HTMLElement): HTMLElement[] {
+  const targets: HTMLElement[] = [];
+  // Walk every ancestor (incl. body/html) — on some layouts the scroll lives on
+  // body rather than the documentElement that scrollingElement points at.
+  let el: HTMLElement | null = node.parentElement;
+  while (el) {
+    const { overflowX, overflowY } = getComputedStyle(el);
+    const scrollsY =
+      SCROLLABLE_OVERFLOW.has(overflowY) && el.scrollHeight > el.clientHeight;
+    const scrollsX =
+      SCROLLABLE_OVERFLOW.has(overflowX) && el.scrollWidth > el.clientWidth;
+    if (scrollsY || scrollsX) targets.push(el);
+    el = el.parentElement;
+  }
+  const docEl = document.scrollingElement as HTMLElement | null;
+  if (docEl && !targets.includes(docEl)) targets.push(docEl);
+  return targets;
+}
+
+/**
+ * Re-flow the editor's scroll containers after a paste so the browser refreshes
+ * its click hit-testing geometry (#146). Pasting markdown/code inserts React
+ * NodeViews that mount ASYNCHRONOUSLY; until the next reflow, ProseMirror's
+ * posAtCoords/caretRangeFromPoint can map a click to a stale (offset) line —
+ * which users observed clears itself on any scroll. We reproduce that scroll's
+ * side effect with a ZERO-delta nudge (re-assign scrollTop/Left to their current
+ * value), invalidating the hit-test layer WITHOUT moving the viewport. The
+ * container lookup AND the nudge run across two animation frames so they happen
+ * AFTER the pasted content + NodeViews commit (only then is the real scroll
+ * container measurable).
+ *
+ * This is the SECOND of two #146 mitigations; the FIRST is the content-first DOM
+ * order in the NodeViews (code-block-view.tsx, footnotes-list-view.tsx,
+ * footnote-definition-view.tsx). Editing one, check the other.
+ */
+export function reflowAfterPaste(editor: Editor) {
+  const dom = editor.view.dom as HTMLElement;
+  requestAnimationFrame(() => {
+    requestAnimationFrame(() => {
+      for (const el of collectScrollAncestors(dom)) {
+        // Zero-delta nudge: re-set the scroll position to its current value to
+        // invalidate the browser's hit-test layer WITHOUT moving the viewport.
+        // `scrollTo(x, y)` is the repo idiom and avoids a lint-flagged
+        // self-assignment.
+        el.scrollTo(el.scrollLeft, el.scrollTop);
+      }
+    });
+  });
+}
+
 export const handlePaste = (
  editor: Editor,
  event: ClipboardEvent,
  pageId: string,
  creatorId?: string,
 ) => {
+  // Schedule a post-paste reflow on EVERY paste path — intentionally. handlePaste
+  // returns BEFORE the markdown/code-insertion plugin runs, so it cannot know here
+  // whether async NodeViews will be inserted; the nudge is a cheap layout read on
+  // the next frames and a no-op for the viewport, so scheduling it unconditionally
+  // is simpler and harmless. Pairs with the content-first DOM order in the
+  // NodeViews — both address #146 from different angles.
+  reflowAfterPaste(editor);
+
  const clipboardData = event.clipboardData.getData("text/plain");

  if (INTERNAL_LINK_REGEX.test(clipboardData)) {
--- a/apps/client/src/features/editor/components/common/node-resize.module.css
+++ b/apps/client/src/features/editor/components/common/node-resize.module.css
@@ -73,3 +73,18 @@
    display: none !important;
  }
 }
+
+/* Float image (#145): on narrow screens a floated image would crowd the text to
+   an unreadable column, so collapse it to full width and drop the float.
+   `!important` is required because applyAlignment sets `float`/`padding` inline,
+   which a normal rule cannot override. Keys off the `data-image-align` attribute
+   the image node view mirrors onto its container. This module is the one actually
+   imported by the resize node views (node-resize-handles.ts), so the rule loads. */
+@media (max-width: 600px) {
+  .container:global([data-image-align="floatLeft"]),
+  .container:global([data-image-align="floatRight"]) {
+    float: none !important;
+    width: 100% !important;
+    padding: 0 !important;
+  }
+}
--- a/apps/client/src/features/editor/components/fixed-toolbar/fixed-toolbar.tsx
+++ b/apps/client/src/features/editor/components/fixed-toolbar/fixed-toolbar.tsx
@@ -13,7 +13,6 @@ import { QuickInsertsGroup } from "./groups/quick-inserts-group";
 import { MoreInsertsGroup } from "./groups/more-inserts-group";
 import { HistoryGroup } from "./groups/history-group";
 import { AskAiGroup } from "./groups/ask-ai-group";
-import { DictationGroup } from "./groups/dictation-group";
 import { workspaceAtom } from "@/features/user/atoms/current-user-atom";
 import classes from "./fixed-toolbar.module.css";

@@ -31,7 +30,6 @@ export const FixedToolbar: FC<FixedToolbarProps> = ({
  const state = useToolbarState(editor);
  const workspace = useAtomValue(workspaceAtom);
  const isGenerativeAiEnabled = workspace?.settings?.ai?.generative === true;
-  const isDictationEnabled = workspace?.settings?.ai?.dictation === true;

  if (!editor || !state) return null;

@@ -67,12 +65,6 @@ export const FixedToolbar: FC<FixedToolbarProps> = ({
          <MoreInsertsGroup editor={editor} templateMode={templateMode} />
          <div className={classes.divider} />
          <HistoryGroup editor={editor} state={state} />
-          {isDictationEnabled && (
-            <>
-              <div className={classes.divider} />
-              <DictationGroup editor={editor} />
-            </>
-          )}
        </div>
      </div>
      <div className={classes.spacer} aria-hidden />
--- a/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
+++ b/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
@@ -1,48 +1,72 @@
 import { FC, useRef } from "react";
 import type { Editor } from "@tiptap/react";
+import { useAtomValue } from "jotai";
+import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
 import { MicButton } from "@/features/dictation/components/mic-button";

 interface Props {
  editor: Editor;
+  color?: string;
+  iconSize?: number;
 }

-export const DictationGroup: FC<Props> = ({ editor }) => {
+export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
+  // Streaming (silence-cut) dictation is opt-in per workspace; absent/false
+  // keeps the stable batch path.
+  const workspace = useAtomValue(workspaceAtom);
+  const streamingDictation =
+    workspace?.settings?.ai?.dictationStreaming === true;
+  // Caret snapshot taken when dictation starts (where the first segment lands).
  const rangeRef = useRef<{ from: number; to: number } | null>(null);
+  // Running insertion point: after each inserted segment we remember the caret
+  // end so the NEXT segment appends right after it, contiguously, regardless of
+  // where the user's caret currently is. Null until the first segment lands.
+  const insertPosRef = useRef<number | null>(null);

  const handleStart = () => {
    const { from, to } = editor.state.selection;
    rangeRef.current = { from, to };
+    // New session: forget any insertion point from a previous dictation so the
+    // first segment uses the fresh snapshot above.
+    insertPosRef.current = null;
  };

  const handleText = (text: string) => {
    // The editor may be gone by the time async transcription returns; bail out
    // instead of operating on a destroyed instance.
    if (!editor || editor.isDestroyed) return;
-    const snapshot = rangeRef.current;
-    rangeRef.current = null;
    // The document may have shrunk during transcription (e.g. a collaborative
-    // edit), so clamp the snapshot into the current bounds before inserting.
+    // edit), so clamp any position into the current bounds before inserting.
    const docSize = editor.state.doc.content.size;
    const clamp = (p: number) => Math.max(0, Math.min(p, docSize));
+    // First segment lands at the snapshotted caret range; subsequent segments
+    // land at a zero-length range at the running insertion point so they stay
+    // contiguous even if the user clicked elsewhere mid-dictation.
+    const snapshot = rangeRef.current;
+    const range =
+      insertPosRef.current !== null
+        ? { from: clamp(insertPosRef.current), to: clamp(insertPosRef.current) }
+        : snapshot
+          ? { from: clamp(snapshot.from), to: clamp(snapshot.to) }
+          : null;
    try {
-      if (snapshot) {
-        // Insert at the snapshotted caret; a trailing space keeps words
-        // separated (the hook already trims the transcribed text).
-        editor
-          .chain()
-          .focus()
-          .insertContentAt(
-            { from: clamp(snapshot.from), to: clamp(snapshot.to) },
-            `${text} `,
-          )
-          .run();
+      if (range) {
+        // Insert at the resolved range; a trailing space keeps words separated
+        // (the hook already trims the transcribed text).
+        editor.chain().focus().insertContentAt(range, `${text} `).run();
      } else {
+        // No snapshot and no running point (shouldn't happen normally) — fall
+        // back to the current caret.
        editor.chain().focus().insertContent(`${text} `).run();
      }
+      // Remember where the inserted text ends so the next segment appends right
+      // after it, independent of later user caret moves.
+      insertPosRef.current = editor.state.selection.to;
    } catch {
-      // The snapshot drifted out of range; fall back to the current caret.
+      // The range drifted out of bounds; fall back to the current caret.
      try {
        editor.chain().focus().insertContent(`${text} `).run();
+        insertPosRef.current = editor.state.selection.to;
      } catch {
        // The editor may have been destroyed; ignore so a dead editor can't
        // surface an uncaught error.
@@ -53,9 +77,12 @@ export const DictationGroup: FC<Props> = ({ editor }) => {
  return (
    <MicButton
      size="md"
+      streaming={streamingDictation}
      onStart={handleStart}
      onText={handleText}
      disabled={!editor.isEditable}
+      color={color}
+      iconSize={iconSize}
    />
  );
 };
--- a/apps/client/src/features/editor/components/footnote/footnote-definition-view.tsx
+++ b/apps/client/src/features/editor/components/footnote/footnote-definition-view.tsx
@@ -1,25 +1,45 @@
 import { NodeViewContent, NodeViewProps, NodeViewWrapper } from "@tiptap/react";
 import { useTranslation } from "react-i18next";
-import { getFootnoteNumber } from "@docmost/editor-ext";
+import { getFootnoteNumber, getFootnoteRefCount } from "@docmost/editor-ext";
 import classes from "./footnote.module.css";

+/**
+ * A 0-based backlink index -> its lowercase letter label (0 -> "a", 25 -> "z",
+ * 26 -> "aa", ...), matching the Pandoc/Wikipedia "↩ a b c" convention.
+ */
+export function backlinkLabel(index: number): string {
+  let out = "";
+  let x = index;
+  while (x >= 0) {
+    out = String.fromCharCode(97 + (x % 26)) + out;
+    x = Math.floor(x / 26) - 1;
+  }
+  return out;
+}
+
 /**
 * NodeView for a single footnote definition: a decorative number marker, the
 * editable content (NodeViewContent), and a "↩" back-link to its reference.
 * The number is derived from the document (not stored).
+ *
+ * After #166 a footnote can be referenced more than once (one number, one
+ * definition, N forward links). When it is, the back-link becomes a row of
+ * per-occurrence links — ↩ a b c … — each scrolling to its own reference (#168);
+ * a single-reference footnote keeps the plain ↩.
 */
 export default function FootnoteDefinitionView(props: NodeViewProps) {
  const { node, editor } = props;
  const { t } = useTranslation();
  const id = node.attrs.id as string;

-  // Read the cached number from the numbering plugin (computed once per doc
-  // change) rather than recomputing the whole map on every render.
+  // Read the cached number/ref-count from the numbering plugin (computed once
+  // per doc change) rather than recomputing the whole map on every render.
  const number = getFootnoteNumber(editor.state, id) ?? "?";
+  const refCount = getFootnoteRefCount(editor.state, id);

-  const handleBack = (e: React.MouseEvent) => {
+  const jumpTo = (e: React.MouseEvent, index: number) => {
    e.preventDefault();
-    editor.commands.scrollToReference(id);
+    editor.commands.scrollToReference(id, index);
  };

  return (
@@ -29,20 +49,60 @@ export default function FootnoteDefinitionView(props: NodeViewProps) {
      className={classes.definition}
      style={{ ["--footnote-number" as any]: `"${number}"` }}
    >
-      <span className={classes.definitionMarker} contentEditable={false}>
-        {number}.
-      </span>
+      {/* #146: contentDOM MUST be the first child — a non-editable marker before
+          it makes click hit-testing snap the caret above. Content first; the
+          marker + back-link follow in DOM and are placed left/right via CSS
+          flex `order`. The second #146 mitigation lives in
+          editor-paste-handler.tsx (reflowAfterPaste). */}
      <NodeViewContent className={classes.definitionContent} />
      <span
-        className={classes.backLink}
+        className={classes.definitionMarker}
        contentEditable={false}
-        onClick={handleBack}
-        role="button"
-        aria-label={t("Back to reference")}
-        title={t("Back to reference")}
+        aria-hidden="true"
      >
-        ↩
+        {number}.
      </span>
+      {refCount > 1 ? (
+        // Multiple references -> ↩ followed by one lettered link per occurrence.
+        <span
+          className={classes.backLinks}
+          contentEditable={false}
+          role="group"
+          aria-label={t("Back to references")}
+        >
+          <span className={classes.backLinkArrow} aria-hidden="true">
+            ↩
+          </span>
+          {Array.from({ length: refCount }, (_, i) => (
+            <span
+              key={i}
+              className={classes.backLink}
+              onClick={(e) => jumpTo(e, i)}
+              role="button"
+              aria-label={t("Back to reference {{label}}", {
+                label: backlinkLabel(i),
+              })}
+              title={t("Back to reference {{label}}", {
+                label: backlinkLabel(i),
+              })}
+            >
+              {backlinkLabel(i)}
+            </span>
+          ))}
+        </span>
+      ) : (
+        // Single reference -> the plain ↩ (unchanged behavior).
+        <span
+          className={classes.backLink}
+          contentEditable={false}
+          onClick={(e) => jumpTo(e, 0)}
+          role="button"
+          aria-label={t("Back to reference")}
+          title={t("Back to reference")}
+        >
+          ↩
+        </span>
+      )}
    </NodeViewWrapper>
  );
 }
--- a/apps/client/src/features/editor/components/footnote/footnote-views.structure.test.tsx
+++ b/apps/client/src/features/editor/components/footnote/footnote-views.structure.test.tsx
@@ -0,0 +1,231 @@
+import { describe, it, expect, vi, afterEach } from "vitest";
+import { render, fireEvent } from "@testing-library/react";
+
+/**
+ * Structural regression guard for #146 (PR #147).
+ *
+ * Guards ALL THREE editable NodeViews touched by the fix: the two footnote views
+ * (FootnotesListView, FootnoteDefinitionView) AND the code block (CodeBlockView).
+ *
+ * The caret/click-offset fix rests entirely on ONE invariant: in every editable
+ * NodeView the editable `NodeViewContent` (contentDOM) must come FIRST in the
+ * wrapper, with no non-editable (`contenteditable="false"`) element before it.
+ * If a future edit reinserts chrome (separator, heading, marker, back-link,
+ * language menu) ahead of the content, the macOS hit-testing bug returns
+ * silently — and the symptom needs a real browser to see. This test pins the
+ * DOM ORDER (the proxy that IS the fix) in the existing jsdom harness.
+ *
+ * We stub `@tiptap/react` so the views render as plain DOM and we can inspect
+ * the child order our JSX produces — that order is exactly what regresses, and
+ * it does not depend on a live editor. The stubbed `NodeViewContent` carries the
+ * real `data-node-view-content` marker tiptap uses, so the assertion mirrors
+ * production. This test passes on the fixed order and FAILS on the pre-fix order
+ * (chrome-before-content).
+ */
+vi.mock("@tiptap/react", () => ({
+  NodeViewWrapper: ({ children, ...props }: any) => (
+    <div data-testid="nvw" {...props}>
+      {children}
+    </div>
+  ),
+  // Mirror the real contentDOM marker so the guard matches production output.
+  NodeViewContent: (props: any) => <div data-node-view-content="" {...props} />,
+}));
+
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({ t: (key: string) => key }),
+}));
+
+// footnote-definition-view reads a cached number + reference count from the
+// numbering plugin; stub them so we don't need a live ProseMirror state. The
+// ref-count is a hoisted mutable so a test can drive the single-vs-multi
+// backlink branch (#168). Default 1 = single reference (the #146 cases).
+const { mockRefCount } = vi.hoisted(() => ({ mockRefCount: { value: 1 } }));
+vi.mock("@docmost/editor-ext", () => ({
+  getFootnoteNumber: () => 1,
+  getFootnoteRefCount: () => mockRefCount.value,
+}));
+
+// Mocks so CodeBlockView renders cheaply (no MantineProvider, no matchMedia).
+// The Group mock MUST forward contentEditable: React serializes
+// contentEditable={false} to the DOM attribute contenteditable="false", which
+// the structural guard selects on to identify non-editable chrome.
+vi.mock("@mantine/core", () => ({
+  Group: ({ children, className, contentEditable }: any) => (
+    <div className={className} contentEditable={contentEditable}>
+      {children}
+    </div>
+  ),
+  Select: () => null,
+  Tooltip: ({ children }: any) => <>{children}</>,
+  ActionIcon: ({ children, onClick }: any) => (
+    <button onClick={onClick}>{children}</button>
+  ),
+}));
+vi.mock("@/components/common/copy-button", () => ({
+  CopyButton: ({ children }: any) =>
+    children({ copied: false, copy: () => {} }),
+}));
+vi.mock("@tabler/icons-react", () => ({
+  IconCheck: () => null,
+  IconCopy: () => null,
+}));
+vi.mock("@/features/editor/components/code-block/mermaid-view.tsx", () => ({
+  default: () => null,
+}));
+
+import FootnotesListView from "./footnotes-list-view";
+import FootnoteDefinitionView, {
+  backlinkLabel,
+} from "./footnote-definition-view";
+import CodeBlockView from "../code-block/code-block-view";
+
+// Minimal NodeViewProps stub: definition view only touches node.attrs.id and
+// editor.state (the latter unused once getFootnoteNumber is mocked).
+const props = {
+  node: { attrs: { id: "fn-1" }, textContent: "" },
+  editor: { state: {}, isEditable: true, commands: {} },
+  getPos: () => 0,
+  updateAttributes: () => {},
+  deleteNode: () => {},
+} as any;
+
+// CodeBlockView needs more than the footnote stub: a language attr (non-mermaid
+// so MermaidView never renders), an editor with selection/on/off, and an
+// extension exposing lowlight.listLanguages.
+const codeBlockProps = {
+  node: { attrs: { language: "javascript" }, textContent: "", nodeSize: 1 },
+  editor: {
+    state: { selection: { from: 0, to: 0 } },
+    isEditable: true,
+    commands: {},
+    on: vi.fn(),
+    off: vi.fn(),
+  },
+  extension: {
+    options: { lowlight: { listLanguages: () => ["javascript", "python"] } },
+  },
+  getPos: () => 0,
+  updateAttributes: () => {},
+  deleteNode: () => {},
+} as any;
+
+const cases: Array<{ name: string; ui: React.ReactElement }> = [
+  { name: "FootnotesListView", ui: <FootnotesListView {...props} /> },
+  { name: "FootnoteDefinitionView", ui: <FootnoteDefinitionView {...props} /> },
+  { name: "CodeBlockView", ui: <CodeBlockView {...codeBlockProps} /> },
+];
+
+describe("#146 editable NodeView contentDOM-first invariant", () => {
+  it.each(cases)(
+    "$name renders the editable contentDOM ahead of all non-editable chrome",
+    ({ ui }) => {
+      const { getByTestId } = render(ui);
+      const wrapper = getByTestId("nvw");
+
+      const content = wrapper.querySelector("[data-node-view-content]");
+      expect(content).not.toBeNull();
+
+      // The contentDOM sits at the FRONT of the wrapper: it is either the
+      // wrapper's first child (footnote views) or nested in the first child
+      // (code-block wraps it in <pre>). Either way the first element child
+      // must contain it. (compareDocumentPosition below is NOT redundant here:
+      // for code-block the content is not the literal first child, so we keep
+      // the document-order check to prove no chrome precedes the content.)
+      const firstEl = wrapper.firstElementChild!;
+      expect(firstEl === content || firstEl.contains(content!)).toBe(true);
+
+      // Chrome exists (separator/heading/marker/back-link/menu)...
+      const nonEditable = wrapper.querySelectorAll('[contenteditable="false"]');
+      expect(nonEditable.length).toBeGreaterThan(0);
+
+      // ...and every non-editable element comes AFTER the contentDOM, so the
+      // browser's click hit-testing reaches the editable content first (#146).
+      for (const el of Array.from(nonEditable)) {
+        const pos = content!.compareDocumentPosition(el);
+        expect(pos & Node.DOCUMENT_POSITION_FOLLOWING).toBeTruthy();
+      }
+    },
+  );
+});
+
+// #168: a footnote referenced more than once shows one lettered backlink per
+// occurrence (↩ a b c), each scrolling to its own reference; a single-reference
+// footnote keeps the plain ↩.
+describe("#168 footnote definition multi-backlinks", () => {
+  afterEach(() => {
+    // Reset the shared ref-count mock so other tests see a single reference.
+    mockRefCount.value = 1;
+  });
+
+  const makeProps = () =>
+    ({
+      node: { attrs: { id: "fn-1" }, textContent: "" },
+      editor: {
+        state: {},
+        isEditable: true,
+        commands: { scrollToReference: vi.fn() },
+      },
+      getPos: () => 0,
+      updateAttributes: () => {},
+      deleteNode: () => {},
+    }) as any;
+
+  it("renders one lettered backlink per reference (a, b, c) plus the ↩ arrow", () => {
+    mockRefCount.value = 3;
+    const { getByTestId } = render(<FootnoteDefinitionView {...makeProps()} />);
+    const wrapper = getByTestId("nvw");
+
+    const links = wrapper.querySelectorAll('[role="button"]');
+    expect(Array.from(links).map((l) => l.textContent)).toEqual([
+      "a",
+      "b",
+      "c",
+    ]);
+    // The ↩ arrow is present (as decorative chrome, not a button).
+    expect(wrapper.textContent).toContain("↩");
+  });
+
+  it("clicking the n-th backlink scrolls to the n-th occurrence (0-based)", () => {
+    mockRefCount.value = 3;
+    const props = makeProps();
+    const { getByTestId } = render(<FootnoteDefinitionView {...props} />);
+    const links = getByTestId("nvw").querySelectorAll('[role="button"]');
+
+    fireEvent.click(links[1]); // "b"
+    expect(props.editor.commands.scrollToReference).toHaveBeenCalledWith(
+      "fn-1",
+      1,
+    );
+  });
+
+  it("a single-reference footnote renders just one ↩ (no letters)", () => {
+    mockRefCount.value = 1;
+    const props = makeProps();
+    const { getByTestId } = render(<FootnoteDefinitionView {...props} />);
+    const wrapper = getByTestId("nvw");
+
+    const links = wrapper.querySelectorAll('[role="button"]');
+    expect(links.length).toBe(1);
+    expect(links[0].textContent).toBe("↩");
+
+    fireEvent.click(links[0]);
+    expect(props.editor.commands.scrollToReference).toHaveBeenCalledWith(
+      "fn-1",
+      0,
+    );
+  });
+});
+
+// #185 re-review pt 7: backlinkLabel is base-26 (a..z, then aa…). The component
+// tests only cover a,b,c (index 0-2); pin the >= 26 carry boundary.
+describe("backlinkLabel base-26 boundary (#168)", () => {
+  it("maps 0->a, 25->z, 26->aa, 27->ab, 51->az, 52->ba", () => {
+    expect(backlinkLabel(0)).toBe("a");
+    expect(backlinkLabel(25)).toBe("z");
+    expect(backlinkLabel(26)).toBe("aa");
+    expect(backlinkLabel(27)).toBe("ab");
+    expect(backlinkLabel(51)).toBe("az");
+    expect(backlinkLabel(52)).toBe("ba");
+  });
+});
--- a/apps/client/src/features/editor/components/footnote/footnote.module.css
+++ b/apps/client/src/features/editor/components/footnote/footnote.module.css
@@ -57,14 +57,19 @@
  word-break: break-word;
 }

-/* Bottom footnotes container. */
+/* Bottom footnotes container. Flex column so the heading (rendered AFTER the
+   editable NodeViewContent in the DOM for #146) is lifted back above the list
+   visually via `order`, instead of sitting in-flow before the contentDOM. */
 .list {
+  display: flex;
+  flex-direction: column;
  margin-top: var(--mantine-spacing-lg);
  padding-top: var(--mantine-spacing-md);
  border-top: 1px solid var(--mantine-color-default-border);
 }

 .listHeading {
+  order: -1; /* visually above the list, though it follows it in the DOM (#146) */
  font-weight: 600;
  font-size: var(--mantine-font-size-sm);
  color: var(--mantine-color-dimmed);
@@ -83,6 +88,7 @@
 }

 .definitionMarker {
+  order: -1; /* keep the "N." marker on the LEFT though it follows content in DOM (#146) */
  flex: 0 0 auto;
  min-width: 1.5em;
  /* Right-align within the narrow column so the period sits next to the text
@@ -98,6 +104,19 @@
  min-width: 0;
 }

+/* The inner editable paragraph inherits `.ProseMirror p { margin: 0.5em 0 }`,
+   which pushes the first text line ~0.5em below the "N." marker (aligned to
+   flex-start), making the number float above the text. Drop the outer margins
+   so the marker and the first line share the same top edge — same approach
+   used for callouts in core.css. */
+.definitionContent > :first-child {
+  margin-top: 0;
+}
+
+.definitionContent > :last-child {
+  margin-bottom: 0;
+}
+
 .backLink {
  flex: 0 0 auto;
  cursor: pointer;
@@ -109,3 +128,18 @@
 .backLink:hover {
  text-decoration: underline;
 }
+
+/* Multi-backlink row (#168): ↩ a b c — one lettered link per reference
+   occurrence. Sits on the right, after the content, like the single ↩. */
+.backLinks {
+  flex: 0 0 auto;
+  display: inline-flex;
+  align-items: baseline;
+  gap: 0.3em;
+  user-select: none;
+}
+
+.backLinkArrow {
+  color: var(--mantine-color-dimmed);
+  font-size: 0.9em;
+}
--- a/apps/client/src/features/editor/components/footnote/footnotes-list-view.tsx
+++ b/apps/client/src/features/editor/components/footnote/footnotes-list-view.tsx
@@ -3,18 +3,39 @@ import { useTranslation } from "react-i18next";
 import classes from "./footnote.module.css";

 /**
- * NodeView for the bottom footnotes container. Renders a visual separator and a
- * localized heading, then the editable list of definitions via NodeViewContent.
+ * NodeView for the bottom footnotes container: the editable list of definitions
+ * (NodeViewContent) plus a visual separator + localized heading.
+ *
+ * #146: the editable NodeViewContent MUST be the FIRST child in the DOM. A
+ * non-editable block rendered before it (the old separator + heading) makes the
+ * browser's click hit-testing (posAtCoords → caretRangeFromPoint) miss the
+ * contentDOM and snap the caret to the previous node (several lines above, into
+ * the body). So content goes first; the heading is rendered AFTER it and lifted
+ * back above visually with CSS flex `order` (the separator border lives on the
+ * flex container itself).
+ *
+ * The second #146 mitigation lives in editor-paste-handler.tsx (reflowAfterPaste).
 */
 export default function FootnotesListView(_props: NodeViewProps) {
  const { t } = useTranslation();

  return (
-    <NodeViewWrapper>
-      <div className={classes.list} contentEditable={false}>
-        <div className={classes.listHeading}>{t("Footnotes")}</div>
-      </div>
+    // role/aria-label preserve the section label for AT: the visible heading
+    // below is now aria-hidden, so without these the "Footnotes" label would be
+    // lost to a screen reader (WCAG 1.3.2 — DOM order has heading after content).
+    <NodeViewWrapper
+      className={classes.list}
+      role="group"
+      aria-label={t("Footnotes")}
+    >
      <NodeViewContent />
+      <div
+        className={classes.listHeading}
+        contentEditable={false}
+        aria-hidden="true"
+      >
+        {t("Footnotes")}
+      </div>
    </NodeViewWrapper>
  );
 }
--- a/Show More
+++ b/Show More