2026-06-25 12:40:36 +03:00
23 changed files with 2018 additions and 1482 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,10 +12,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - **Persistent AI-chat history as the source of truth + server-side export.**
  An assistant turn is now persisted to the database step by step: the row is
  inserted upfront as `streaming` and updated as each agent step finishes, then
  finalized once to `completed`/`error`/`aborted`. A process that dies mid-turn
  keeps every finished step, and a startup sweep flips any dangling `streaming`
  row (untouched for 10 minutes) to `aborted`. Chat "Copy" now exports
  server-side from these rows (`POST /ai-chat/export`) rather than from live
  client state, so the export is identical whether a chat is freshly streaming,
  just switched to, or reloaded — and is available from the first turn of a new
  chat. (#183, #174)
 - **AI-agent attribution for MCP writes.** Comments (and pages) created through
  the MCP endpoint by a dedicated agent account are now badged as "AI", with
  unspoofable provenance derived from a per-user `is_agent` flag (not from the
-  request body). **Operator setup:** use a *dedicated* service account for the
+  request body). **Operator setup:** use a _dedicated_ service account for the
  MCP fallback and set the flag with SQL —
  `UPDATE users SET is_agent = true WHERE email = '<mcp-account>'`. Never flag a
  human or shared account, or its normal edits get mis-attributed as AI. See the
@@ -150,8 +161,7 @@ embeds — plus a large batch of security hardening and test coverage.
 - Page templates: import `ThrottleModule` so collab boots, never strand an
  in-flight page-embed id, and add defense-in-depth workspace checks.
 - Pages: `movePage` cycle guard with no phantom `PAGE_MOVED` event.
- Import: surface the real error cause from `/pages/import` instead of a generic
+- Import: surface the real error cause from `/pages/import` instead of a generic 400.
  400.
 ### Security
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -258,6 +258,7 @@
  "Copy to space": "Copy to space",
  "Copy chat": "Copy chat",
  "Copied": "Copied",
  "Failed to export chat": "Failed to export chat",
  "Duplicate": "Duplicate",
  "Select a user": "Select a user",
  "Select a group": "Select a group",
--- a/apps/client/public/locales/ru-RU/translation.json
+++ b/apps/client/public/locales/ru-RU/translation.json
@@ -257,6 +257,7 @@
  "Copy": "Копировать",
  "Copy to space": "Копировать в пространство",
  "Copied": "Скопировано",
  "Failed to export chat": "Не удалось экспортировать чат",
  "Duplicate": "Дублировать",
  "Select a user": "Выберите пользователя",
  "Select a group": "Выберите группу",
--- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
+++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
@@ -6,7 +6,6 @@ import {
  useRef,
  useState,
 } from "react";
 import { type UIMessage } from "@ai-sdk/react";
 import { Group, Loader, Tooltip } from "@mantine/core";
 import {
  IconArrowsDiagonal,
@@ -40,7 +39,7 @@ import {
 } from "@/features/ai-chat/queries/ai-chat-query.ts";
 import ConversationList from "@/features/ai-chat/components/conversation-list.tsx";
 import ChatThread from "@/features/ai-chat/components/chat-thread.tsx";
-import { buildChatMarkdown } from "@/features/ai-chat/utils/chat-markdown.ts";
+import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts";
 import { useChatSession } from "@/features/ai-chat/hooks/use-chat-session.ts";
 import {
  shouldCollapseOnOutsidePointer,
@@ -121,7 +120,7 @@ function clampGeom(g: {
 * ported from the GitmostAgent.jsx design.
 */
 export default function AiChatWindow() {
-  const { t } = useTranslation();
+  const { t, i18n } = useTranslation();
  const clipboard = useClipboard({ timeout: 500 });
  const queryClient = useQueryClient();
  const [windowOpen, setWindowOpen] = useAtom(aiChatWindowOpenAtom);
@@ -162,30 +161,11 @@ export default function AiChatWindow() {
  const { data: messageRows, isLoading: messagesLoading } =
    useAiChatMessagesQuery(activeChatId ?? undefined);
  // Live snapshot of the active thread's useChat state, kept up to date by
  // ChatThread. Lets the export include the in-progress (not-yet-persisted)
  // streaming turn. A ref avoids re-rendering this window on every token.
  const liveThreadRef = useRef<{
    messages: UIMessage[];
    isStreaming: boolean;
    banner: string | null;
  }>({
    messages: [],
    isStreaming: false,
    banner: null,
  });
  // Live turn-token total (reasoning + output) for the in-flight turn, pushed up
  // (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream.
  // `null` means no turn is in flight -> the badge falls back to the persisted
  // context size below.
  const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
  // Whether the on-screen thread currently holds at least one message. Reported
  // reactively by ChatThread (the live snapshot lives in a non-reactive ref). This
  // lets the "Copy chat" button stay available for a brand-new, not-yet-persisted
  // chat whose first turn is in flight or was interrupted — that case has no
  // persisted rows yet, so a persisted-rows-only gate would hide the button (#174).
  const [hasLiveContent, setHasLiveContent] = useState(false);
  // The page the user is currently viewing. AiChatWindow lives in a pathless
  // parent layout route, so useParams() can't see :pageSlug. Match the full
@@ -214,6 +194,7 @@ export default function AiChatWindow() {
    threadKey,
    waitingForHistory,
    onTurnFinished,
    onServerChatId,
    cancelPendingAdoption,
  } = useChatSession({
    activeChatId,
@@ -254,20 +235,19 @@ export default function AiChatWindow() {
    [cancelPendingAdoption, setActiveChatId, setDraft, setSelectedRoleId],
  );
-  // The active chat object (for its title) and an export gate: only enable the
+  // The active chat object (for its title) and an export gate. The export is now
-  // export button when an existing chat with loaded persisted rows is active.
+  // SERVER-sourced (the DB is the single source of truth — #183): the assistant
  // row is persisted upfront + per step, so even a brand-new chat whose first
  // turn is streaming/interrupted has a server row to render. Enable the button
  // whenever a persisted chat is active (`activeChatId` is set). For a BRAND-NEW
  // chat that id is adopted EARLY — at the stream's `start` chunk via
  // onServerChatId (#174) — so the Copy button is available during the first
  // turn's stream, not only after it terminates.
  const activeChat = useMemo(
    () => chats?.items?.find((c) => c.id === activeChatId) ?? null,
    [chats, activeChatId],
  );
-  // Export is available when there is anything to export: either persisted rows
+  const canExport = !!activeChatId;
  // for the active chat, OR a live on-screen thread with at least one message.
  // The live arm covers a brand-new chat whose first turn is streaming or was
  // interrupted before the server persisted any row (#174); the persisted arm is
  // the steady-state path for an already-saved chat (#160).
  const canExport =
    hasLiveContent ||
    (!!activeChatId && !!messageRows && messageRows.length > 0);
  // The role to display in the header and as the assistant's name. Prefer the
  // persisted role of an existing chat (chat-list JOIN); fall back to the role
@@ -284,53 +264,21 @@ export default function AiChatWindow() {
    return picked ? { name: picked.name, emoji: picked.emoji } : null;
  }, [activeChat, enabledRoles, selectedRoleId]);
-  // Build a Markdown export from the already-loaded persisted rows (no network
+  // Fetch the server-rendered Markdown export and copy it to the clipboard. The
-  // call) and copy it to the clipboard. The "Copied" notification is the
+  // server is the single source of truth (#183): it renders the transcript from
-  // feedback.
+  // the persisted rows — including an interrupted turn's in-progress row — so the
-  const handleCopy = useCallback(() => {
+  // export is identical whether the chat is freshly streaming, just switched to,
-    // Export gate. There must be SOMETHING to export — either a live on-screen
+  // or reloaded. The `lang` of the active i18n drives the few localized labels.
-    // message or a persisted row. A brand-new chat whose first turn is streaming
+  const handleCopy = useCallback(async () => {
-    // or was interrupted has live messages but no persisted rows yet; it still
+    if (!activeChatId) return;
-    // exports the on-screen thread WYSIWYG (#174). Only a truly empty chat (no
+    try {
-    // live messages and no rows) is non-exportable (the button is hidden too —
+      const markdown = await exportAiChat(activeChatId, i18n.language);
-    // see `canExport`).
+      clipboard.copy(markdown);
-    const live = liveThreadRef.current;
+      notifications.show({ message: t("Copied") });
-    const hasRows = !!messageRows && messageRows.length > 0;
+    } catch {
-    if (live.messages.length === 0 && !hasRows) return;
+      notifications.show({ message: t("Failed to export chat"), color: "red" });
-    // WYSIWYG export: the live on-screen messages ARE the document (so a partial
+    }
-    // reply from an interrupted turn — which never reached the persisted rows —
+  }, [activeChatId, clipboard, t, i18n.language]);
    // is exported just as it appears). The persisted rows enrich each live
    // message (token usage / error / timestamp) by id and serve as the fallback
    // when the live mirror is empty. The on-screen banner is appended too. See
    // issues #160 and #174. `chatId` may be null for a not-yet-saved chat — use a
    // placeholder so the header line still renders.
    const markdown = buildChatMarkdown({
      title: activeChat?.title ?? null,
      chatId: activeChatId ?? "unsaved",
      live: live.messages.map((m) => ({
        id: m.id,
        role: m.role,
        parts: (m.parts ?? []) as { type: string; text?: string }[],
        metadata: m.metadata as
          | {
              usage?: {
                inputTokens?: number;
                outputTokens?: number;
                totalTokens?: number;
                reasoningTokens?: number;
              };
              error?: string;
            }
          | undefined,
      })),
      rows: messageRows,
      isStreaming: live.isStreaming,
      banner: live.banner,
      t,
    });
    clipboard.copy(markdown);
    notifications.show({ message: t("Copied") });
  }, [activeChatId, messageRows, activeChat, clipboard, t]);
  // Current context size for the active chat: how much the conversation now
  // occupies in the model's context window — NOT the cumulative tokens spent.
@@ -685,9 +633,8 @@ export default function AiChatWindow() {
              onRolePicked={(role) => setSelectedRoleId(role.id)}
              assistantName={currentRole?.name}
              onTurnFinished={onTurnFinished}
-              liveStateRef={liveThreadRef}
+              onServerChatId={onServerChatId}
              onLiveTurnTokens={setLiveTurnTokens}
              onLiveContentChange={setHasLiveContent}
            />
          )}
        </div>
--- a/apps/client/src/features/ai-chat/components/chat-thread.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx
@@ -1,11 +1,4 @@
-import {
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
  useCallback,
  useEffect,
  useMemo,
  useRef,
  useState,
  type MutableRefObject,
 } from "react";
 import { generateId } from "ai";
 import { ActionIcon, Box, Group, Stack, Text } from "@mantine/core";
 import { IconClockHour4, IconX } from "@tabler/icons-react";
@@ -68,30 +61,18 @@ interface ChatThreadProps {
   *  authoritative id the server streamed on the assistant message metadata, or
   *  undefined on a failed turn — see adopt-chat-id.ts for the full #137 design. */
  onTurnFinished: (serverChatId?: string) => void;
-  /** Parent-owned ref that this thread keeps updated with its live useChat
+  /** Called EARLY (at the stream's `start` chunk) with the authoritative server
-   *  snapshot (full message list + streaming flag), so the header's
+   *  chat id streamed on the assistant message metadata, so a brand-new chat
-   *  "Copy chat" export can include the in-progress, not-yet-persisted
+   *  adopts its real id WHILE the first turn is still streaming (#174 — makes the
-   *  assistant message. A ref (not state) avoids re-rendering the parent on
+   *  Copy/export button available mid-stream). Distinct from onTurnFinished,
-   *  every streamed delta. */
+   *  which fires only at the terminal outcome. */
-  liveStateRef?: MutableRefObject<{
+  onServerChatId?: (serverChatId?: string) => void;
    messages: UIMessage[];
    isStreaming: boolean;
    banner: string | null;
  }>;
  /** Reports the live turn-token total (reasoning + output) for the in-flight
   *  turn so the parent can show a header badge that ticks mid-stream. THROTTLED
   *  here (~8 Hz) so the parent re-renders a handful of times a second, not on
   *  every streamed delta. Called with `null` when no turn is in flight (the
   *  parent then reverts the badge to the persisted context size). */
  onLiveTurnTokens?: (tokens: number | null) => void;
  /** Reports whether the live thread currently holds at least one message, so the
   *  parent can gate the "Copy chat" button on the on-screen thread rather than on
   *  the persisted rows alone. This stays truthy for a brand-new, not-yet-saved
   *  chat the moment its first user message appears — so an interrupted very first
   *  turn (no persisted rows yet) is still exportable (#174). Called with `false`
   *  on unmount so a thread torn down by `key` on chat switch can't leave the
   *  button enabled for the next, possibly empty, chat. */
  onLiveContentChange?: (hasContent: boolean) => void;
 }
 /**
@@ -135,9 +116,8 @@ export default function ChatThread({
  onRolePicked,
  assistantName,
  onTurnFinished,
-  liveStateRef,
+  onServerChatId,
  onLiveTurnTokens,
  onLiveContentChange,
 }: ChatThreadProps) {
  const { t } = useTranslation();
@@ -306,6 +286,26 @@ export default function ChatThread({
  // Keep the flush helper pointed at the latest sendMessage instance.
  sendMessageRef.current = sendMessage;
  // EARLY chat-id adoption (#174): the server streams the authoritative chat id
  // on the assistant message metadata at the `start` chunk (message.metadata.
  // chatId — see adopt-chat-id.ts / chatStreamMetadata). Forward it to the parent
  // AS SOON AS it appears (mid-stream), so a brand-new chat adopts its real id
  // WHILE the first turn is still streaming and activeChatId-gated affordances
  // (the Copy/export button) light up immediately, instead of only at onFinish.
  // Keyed by the last-seen id so we forward each distinct id exactly once. The
  // parent's onServerChatId is idempotent and a no-op once the chat has an id.
  const lastForwardedChatIdRef = useRef<string | undefined>(undefined);
  useEffect(() => {
    if (!onServerChatId) return;
    const tail = messages[messages.length - 1];
    if (tail?.role !== "assistant") return;
    const serverChatId = extractServerChatId(tail);
    if (!serverChatId || serverChatId === lastForwardedChatIdRef.current)
      return;
    lastForwardedChatIdRef.current = serverChatId;
    onServerChatId(serverChatId);
  }, [messages, onServerChatId]);
  // Live "turn was interrupted" marker for the CURRENT session. The red error
  // banner (driven by `error`) covers the error case; this covers an aborted
  // turn, distinguishing a manual Stop (`isAbort`) from a dropped connection
@@ -328,44 +328,6 @@ export default function ChatThread({
  // the SAME on-screen banner text can be mirrored into the export (issue #160).
  const errorView = error ? describeChatError(error.message ?? "", t) : null;
  // The exact banner the user sees under the message list, flattened to a single
  // string for the "Copy chat" export so the artifact records the interruption
  // WYSIWYG. Mirrors the JSX precedence below: error first, else the stop notice.
  const banner = errorView
    ? errorView.detail
      ? `${errorView.title} — ${errorView.detail}`
      : errorView.title
    : stopNotice === "manual"
      ? t("Response stopped.")
      : stopNotice === "disconnect"
        ? t("Connection lost — the answer was interrupted.")
        : null;
  // Mirror the live useChat snapshot into the parent-owned ref so the export
  // (handled in AiChatWindow) can include the in-progress streaming turn AND the
  // on-screen banner. The cleanup clears the ref on unmount so a thread torn down
  // by `key` on chat switch can't leak its (possibly still-streaming) tail into
  // the next chat's export before the new thread's effect repopulates the ref.
  useEffect(() => {
    if (!liveStateRef) return;
    liveStateRef.current = { messages, isStreaming, banner };
    return () => {
      liveStateRef.current = { messages: [], isStreaming: false, banner: null };
    };
  }, [liveStateRef, messages, isStreaming, banner]);
  // Reactively report "the live thread has content" to the parent. `liveStateRef`
  // above is a ref (deliberately non-reactive so streaming deltas don't re-render
  // the parent), so the export button needs a SEPARATE reactive signal to flip on
  // for a not-yet-persisted chat. Keyed on the boolean only — identical values are
  // a no-op setState in the parent, so this does not add per-delta re-renders.
  const hasLiveContent = messages.length > 0;
  useEffect(() => {
    if (!onLiveContentChange) return;
    onLiveContentChange(hasLiveContent);
    return () => onLiveContentChange(false);
  }, [onLiveContentChange, hasLiveContent]);
  // Report the live turn-token total to the parent header badge, THROTTLED to
  // ~8 Hz so the parent re-renders a few times a second instead of on every
  // streamed delta. The tail assistant message's reasoning+output (estimate while
--- a/apps/client/src/features/ai-chat/hooks/use-chat-session.test.tsx
+++ b/apps/client/src/features/ai-chat/hooks/use-chat-session.test.tsx
@@ -64,7 +64,10 @@ describe("useChatSession", () => {
    result.current.onTurnFinished(undefined);
    expect(setActiveChatId).not.toHaveBeenCalled();
    // The refetch lands with the new row => adopt it.
-    rerender({ activeChatId: null, chats: { items: [{ id: "x" }, { id: "new" }] } });
+    rerender({
      activeChatId: null,
      chats: { items: [{ id: "x" }, { id: "new" }] },
    });
    expect(setActiveChatId).toHaveBeenCalledWith("new");
  });
@@ -88,7 +91,10 @@ describe("useChatSession", () => {
    });
    result.current.onTurnFinished(undefined);
    // a was deleted, new was added — same length, but membership changed.
-    rerender({ activeChatId: null, chats: { items: [{ id: "b" }, { id: "new" }] } });
+    rerender({
      activeChatId: null,
      chats: { items: [{ id: "b" }, { id: "new" }] },
    });
    expect(setActiveChatId).toHaveBeenCalledWith("new");
  });
@@ -171,6 +177,40 @@ describe("useChatSession", () => {
    expect(setActiveChatId).not.toHaveBeenCalledWith("late");
  });
  it("#174 early adopt: onServerChatId adopts the streamed id mid-stream (Copy button available during the first turn)", () => {
    // Brand-new chat: no id yet. The server streams the real chat id "A" on the
    // `start` chunk WHILE the first turn is still streaming (before onTurnFinished
    // fires at the terminal outcome). The hook must adopt it immediately so the
    // window's activeChatId-gated Copy/export button lights up during the stream.
    const { result, setActiveChatId } = setup({
      activeChatId: null,
      chats: { items: [] },
    });
    result.current.onServerChatId("A");
    expect(setActiveChatId).toHaveBeenCalledWith("A");
  });
  it("#174 early adopt is in-place: threadKey stays stable (live stream not torn down)", () => {
    const chats = { items: [] };
    const { result, rerender } = setup({ activeChatId: null, chats });
    const keyBefore = result.current.threadKey;
    result.current.onServerChatId("A");
    // Parent reflects the adopted id back in; the SAME mount key is kept so the
    // in-flight useChat store (the streaming turn) is preserved.
    rerender({ activeChatId: "A", chats });
    expect(result.current.threadKey).toBe(keyBefore);
  });
  it("#174 early adopt: no-op for an existing chat and for a missing id", () => {
    const { result, setActiveChatId } = setup({
      activeChatId: "chat-1",
      chats: { items: [{ id: "chat-1" }] },
    });
    result.current.onServerChatId("chat-1"); // already has an id
    result.current.onServerChatId(undefined); // no streamed id
    expect(setActiveChatId).not.toHaveBeenCalled();
  });
  it("in-place adopt keeps threadKey stable; an external switch remounts", () => {
    const chats = { items: [{ id: "B" }] };
    const { result, rerender } = setup({ activeChatId: null, chats });
--- a/apps/client/src/features/ai-chat/hooks/use-chat-session.ts
+++ b/apps/client/src/features/ai-chat/hooks/use-chat-session.ts
@@ -34,6 +34,13 @@ export interface UseChatSessionResult {
  /** Call when a turn finishes; `serverChatId` is the authoritative streamed id
   *  (undefined on a failed turn). Handles new-chat id adoption + invalidations. */
  onTurnFinished: (serverChatId?: string) => void;
  /** Call EARLY (at the stream's `start` chunk) with the authoritative streamed
   *  chat id so a brand-new chat adopts its real id WHILE its first turn is still
   *  streaming — making `activeChatId`-gated affordances (e.g. the Copy/export
   *  button, #174) available immediately. In-place adoption only (same mount key,
   *  no list/messages invalidation — that is left to onTurnFinished at the end).
   *  Idempotent and a no-op once the chat already has an id. */
  onServerChatId: (serverChatId?: string) => void;
  /** Disarm any pending error-path new-chat fallback. The window calls this from
   *  startNewChat/selectChat so a late refetch can't yank the user back into a
   *  just-failed chat after they explicitly moved on. */
@@ -85,13 +92,10 @@ export function useChatSession(
  // `newThread`/`switchThread` to (re)mount, `adoptThread` for in-place adoption.
  // Initial: a non-null activeChatId switches to it; a null one gets a fresh
  // session key with no chat id yet.
-  const [thread, dispatch] = useReducer(
+  const [thread, dispatch] = useReducer(threadSessionReducer, undefined, () =>
-    threadSessionReducer,
+    activeChatId === null
-    undefined,
+      ? newThread(`new-${generateId()}`)
-    () =>
+      : switchThread(activeChatId),
      activeChatId === null
        ? newThread(`new-${generateId()}`)
        : switchThread(activeChatId),
  );
  // Error-path fallback for new-chat id adoption. When a brand-new chat's first
@@ -150,6 +154,31 @@ export function useChatSession(
    [chats, setActiveChatId, onInvalidateChatList, onInvalidateChatMessages],
  );
  // EARLY adoption (#174): adopt the authoritative streamed chat id the moment
  // the server emits it on the `start` chunk, so a brand-new chat gets its real
  // `activeChatId` WHILE its first turn streams — not only at terminal
  // onTurnFinished. This makes the activeChatId-gated Copy/export button
  // available during the first turn. Pure in-place adoption (same mount key, like
  // the primary path) with NO invalidation: the list/messages refresh stays on
  // onTurnFinished at the end of the turn. Reads the live id from the ref so a
  // repeat call after adoption is a no-op (resolveAdoptedChatId only fires for a
  // still-new chat).
  const onServerChatId = useCallback(
    (serverChatId?: string) => {
      const adopted = resolveAdoptedChatId(
        activeChatIdRef.current,
        serverChatId,
      );
      if (!adopted) return;
      activeChatIdRef.current = adopted;
      setActiveChatId(adopted);
      dispatch({ type: "adopt", chatId: adopted });
      // Early adoption beat the error-path fallback to it — disarm.
      pendingNewChatRef.current = null;
    },
    [setActiveChatId],
  );
  // FALLBACK resolver. Armed only by onTurnFinished when a brand-new chat's first
  // turn errored before the `start` chunk (no authoritative id streamed). Once
  // the per-user list refetch lands with the just-created row, adopt the SINGLE
@@ -233,6 +262,7 @@ export function useChatSession(
    threadKey: thread.key,
    waitingForHistory,
    onTurnFinished,
    onServerChatId,
    cancelPendingAdoption,
  };
 }
--- a/apps/client/src/features/ai-chat/services/ai-chat-service.ts
+++ b/apps/client/src/features/ai-chat/services/ai-chat-service.ts
@@ -50,6 +50,24 @@ export async function deleteAiChat(chatId: string): Promise<void> {
  await api.post("/ai-chat/delete", { chatId });
 }
 /**
 * Export a chat to Markdown (#183). The server renders the transcript from the
 * persisted rows (the DB is the single source of truth — including an
 * interrupted turn's in-progress row, persisted upfront + per step), so the
 * client just copies the returned string. `lang` localizes the few fixed
 * role/tool labels; defaults to English server-side when omitted.
 */
 export async function exportAiChat(
  chatId: string,
  lang?: string,
 ): Promise<string> {
  const req = await api.post<{ markdown: string }>("/ai-chat/export", {
    chatId,
    lang,
  });
  return req.data.markdown;
 }
 /**
 * Agent roles API (`/ai-chat/roles`). `list` is available to any workspace
 * member (for the chat-creation picker); create/update/delete are admin-only
@@ -76,6 +94,8 @@ export async function updateAiRole(data: IAiRoleUpdate): Promise<IAiRole> {
 /** Soft-delete a role (admin). */
 export async function deleteAiRole(id: string): Promise<{ success: true }> {
-  const req = await api.post<{ success: true }>("/ai-chat/roles/delete", { id });
+  const req = await api.post<{ success: true }>("/ai-chat/roles/delete", {
    id,
  });
  return req.data;
 }
--- a/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts
+++ b/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts
@@ -1,747 +0,0 @@
 import { describe, it, expect } from "vitest";
 import { buildChatMarkdown } from "@/features/ai-chat/utils/chat-markdown.ts";
 import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
 /**
 * Tests for the client-only Markdown export builder. The output embeds a live
 * `new Date().toISOString()` export timestamp; we never assert that value, only
 * the deterministic structure (headings, numbering, fenced blocks, totals).
 *
 * A pass-through translator keeps role/tool labels predictable so the
 * structural assertions are stable without an i18n runtime.
 */
 const t = (key: string, values?: Record<string, unknown>): string => {
  if (values && typeof values.name === "string") {
    return key.replace("{{name}}", values.name);
  }
  return key;
 };
 function row(partial: Partial<IAiChatMessageRow>): IAiChatMessageRow {
  return {
    id: partial.id ?? "id",
    role: partial.role ?? "user",
    content: partial.content ?? null,
    metadata: partial.metadata ?? null,
    createdAt: partial.createdAt ?? "2026-06-21T00:00:00.000Z",
  };
 }
 describe("buildChatMarkdown — structure", () => {
  it("emits the title heading, chat id and message count", () => {
    const md = buildChatMarkdown({
      title: "My chat",
      chatId: "chat-123",
      rows: [],
      t,
    });
    expect(md).toContain("# My chat");
    expect(md).toContain("- Chat ID: `chat-123`");
    expect(md).toContain("- Messages: 0");
    expect(md).toContain("- Exported:"); // timestamp present, value not asserted
  });
  it("falls back to the translated 'Untitled chat' for empty/blank titles", () => {
    expect(
      buildChatMarkdown({ title: null, chatId: "c", rows: [], t }),
    ).toContain("# Untitled chat");
    expect(
      buildChatMarkdown({ title: "   ", chatId: "c", rows: [], t }),
    ).toContain("# Untitled chat");
  });
  it("numbers rows sequentially with role headings", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({ role: "user", content: "hi" }),
        row({ role: "assistant", content: "hello" }),
        row({ role: "user", content: "again" }),
      ],
      t,
    });
    expect(md).toContain("## 1. You");
    expect(md).toContain("## 2. AI agent");
    expect(md).toContain("## 3. You");
    // Heading numbering is strictly index+1, not e.g. role-relative.
    expect(md).not.toContain("## 0.");
  });
  it("renders the per-row text content from `content` when no metadata.parts", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [row({ role: "user", content: "plain body" })],
      t,
    });
    expect(md).toContain("plain body");
  });
 });
 describe("buildChatMarkdown — text parts", () => {
  it("skips empty / whitespace-only text parts", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "ignored-content",
          metadata: {
            parts: [
              { type: "text", text: "   " },
              { type: "text", text: "" },
              { type: "text", text: "kept line" },
              // eslint-disable-next-line @typescript-eslint/no-explicit-any
            ] as any,
          },
        }),
      ],
      t,
    });
    expect(md).toContain("kept line");
    // Whitespace-only part contributed no block of its own.
    expect(md).not.toContain("   \n\n");
    // When metadata.parts exists, the plain `content` fallback is NOT used.
    expect(md).not.toContain("ignored-content");
  });
 });
 describe("buildChatMarkdown — tool parts", () => {
  it("renders a tool label, name, state and fenced Input/Output blocks", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "",
          metadata: {
            parts: [
              {
                type: "tool-getPage",
                state: "output-available",
                input: { pageId: "p1" },
                output: { id: "p1", title: "Home" },
                // eslint-disable-next-line @typescript-eslint/no-explicit-any
              } as any,
            ],
          },
        }),
      ],
      t,
    });
    // Known tool name maps to its label key; raw name in backticks; done state.
    expect(md).toContain("**Tool: Read page** (`getPage`) — done");
    expect(md).toContain("Input:");
    expect(md).toContain("Output:");
    // Fenced JSON blocks contain the stringified payloads.
    expect(md).toContain('"pageId": "p1"');
    expect(md).toContain('"title": "Home"');
    expect(md).toContain("```json");
  });
  it("renders the generic label for an unknown tool and surfaces errorText", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "",
          metadata: {
            parts: [
              {
                type: "tool-mysteryTool",
                state: "output-error",
                input: { a: 1 },
                errorText: "boom",
                // eslint-disable-next-line @typescript-eslint/no-explicit-any
              } as any,
            ],
          },
        }),
      ],
      t,
    });
    expect(md).toContain(
      "**Tool: Ran tool mysteryTool** (`mysteryTool`) — error",
    );
    expect(md).toContain("**Error:** boom");
  });
  it("does not throw on a circular tool input (falls back to String)", () => {
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    const circular: any = {};
    circular.self = circular;
    expect(() =>
      buildChatMarkdown({
        title: "t",
        chatId: "c",
        rows: [
          row({
            role: "assistant",
            content: "",
            metadata: {
              parts: [
                {
                  type: "tool-getPage",
                  state: "input-available",
                  input: circular,
                  // eslint-disable-next-line @typescript-eslint/no-explicit-any
                } as any,
              ],
            },
          }),
        ],
        t,
      }),
    ).not.toThrow();
  });
 });
 describe("buildChatMarkdown — fence anti-breakout", () => {
  it("lengthens the delimiter so embedded ``` cannot break out of the block", () => {
    // Tool input whose stringified string form contains a literal ``` run.
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "",
          metadata: {
            parts: [
              {
                type: "tool-getPage",
                state: "output-available",
                // A bare string passes through stringify() verbatim.
                input: "before ``` after",
                output: "x",
                // eslint-disable-next-line @typescript-eslint/no-explicit-any
              } as any,
            ],
          },
        }),
      ],
      t,
    });
    // The fence around the 3-backtick content must use at least 4 backticks so
    // the embedded ``` run cannot terminate the block.
    expect(md).toContain("````json\nbefore ``` after\n````");
    // Robust anti-breakout check: the opening fence delimiter is strictly
    // longer than the longest backtick run inside the wrapped content. (A naive
    // `not.toContain("```json...")` is a false negative — a 4-backtick fence
    // textually contains the 3-backtick substring.)
    const open = md.match(/(`{3,})json\nbefore/);
    expect(open).not.toBeNull();
    expect(open![1].length).toBeGreaterThan(3); // > the 3-backtick run in content
  });
  it("uses a 5-backtick fence when the content has a 4-backtick run", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "",
          metadata: {
            parts: [
              {
                type: "tool-getPage",
                state: "output-available",
                input: "a ```` b",
                // eslint-disable-next-line @typescript-eslint/no-explicit-any
              } as any,
            ],
          },
        }),
      ],
      t,
    });
    expect(md).toContain("`````json\na ```` b\n`````");
  });
 });
 describe("buildChatMarkdown — token totals", () => {
  it("prints the total-tokens line only when the summed usage is > 0", () => {
    const withTokens = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "x",
          metadata: { usage: { inputTokens: 10, outputTokens: 5 } },
        }),
      ],
      t,
    });
    expect(withTokens).toContain("- Total tokens: 15");
    // Per-row usage footer too.
    expect(withTokens).toContain("_Tokens — in: 10, out: 5, total: 15_");
  });
  it("omits the total-tokens line when the sum is 0 / usage absent", () => {
    const noTokens = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({ role: "user", content: "hi" }),
        row({
          role: "assistant",
          content: "x",
          metadata: { usage: { inputTokens: 0, outputTokens: 0 } },
        }),
      ],
      t,
    });
    expect(noTokens).not.toContain("- Total tokens:");
  });
  it("uses totalTokens when present rather than summing in/out", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "x",
          metadata: {
            usage: { inputTokens: 3, outputTokens: 4, totalTokens: 99 },
          },
        }),
      ],
      t,
    });
    expect(md).toContain("- Total tokens: 99");
  });
  it("appends the reasoning figure to the row footer when reasoningTokens > 0", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "x",
          metadata: {
            usage: { inputTokens: 10, outputTokens: 8, reasoningTokens: 3 },
          },
        }),
      ],
      t,
    });
    expect(md).toContain("_Tokens — in: 10, out: 8, reasoning: 3, total: 18_");
  });
  it("omits the reasoning figure when reasoningTokens is 0 / absent", () => {
    const zero = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "x",
          metadata: {
            usage: { inputTokens: 10, outputTokens: 5, reasoningTokens: 0 },
          },
        }),
      ],
      t,
    });
    expect(zero).toContain("_Tokens — in: 10, out: 5, total: 15_");
    expect(zero).not.toContain("reasoning:");
    const absent = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          role: "assistant",
          content: "x",
          metadata: { usage: { inputTokens: 10, outputTokens: 5 } },
        }),
      ],
      t,
    });
    expect(absent).not.toContain("reasoning:");
  });
 });
 // A minimal on-screen (live) message, matching the subset buildChatMarkdown reads.
 function live(partial: {
  id?: string;
  role?: string;
  parts?: { type: string; text?: string }[];
  metadata?: { usage?: Record<string, number>; error?: string };
 }) {
  return {
    id: partial.id ?? "live-id",
    role: partial.role ?? "assistant",
    parts: partial.parts ?? [],
    metadata: partial.metadata,
  };
 }
 describe("buildChatMarkdown — live (WYSIWYG) source", () => {
  it("uses the live messages as the document (what's on screen), numbered from 1", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      // Persisted rows hold only the user turn; the assistant reply is live-only.
      rows: [row({ id: "u1", role: "user", content: "persisted user" })],
      live: [
        live({
          id: "u1",
          role: "user",
          parts: [{ type: "text", text: "on-screen user" }],
        }),
        live({
          id: "a1",
          role: "assistant",
          parts: [{ type: "text", text: "on-screen reply" }],
        }),
      ],
      isStreaming: false,
      t,
    });
    expect(md).toContain("## 1. You");
    expect(md).toContain("## 2. AI agent");
    expect(md).toContain("on-screen user");
    expect(md).toContain("on-screen reply");
    // Message count reflects the LIVE document, not rows + live.
    expect(md).toContain("- Messages: 2");
  });
  it("captures a partial reply from an interrupted (non-streaming) turn — no 'generating' note", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [row({ id: "u1", role: "user", content: "q" })],
      live: [
        live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
        live({
          id: "a-live",
          role: "assistant",
          parts: [{ type: "text", text: "partial plan before the drop" }],
        }),
      ],
      isStreaming: false, // the stream dropped — not streaming anymore
      banner: "Connection lost — the answer was interrupted.",
      t,
    });
    // The partial assistant answer that was on screen IS in the export.
    expect(md).toContain("partial plan before the drop");
    // It is NOT flagged still-generating (the turn is over, just interrupted).
    expect(md).not.toContain("still being generated");
    // The on-screen banner is recorded at the end.
    expect(md).toContain("Connection lost — the answer was interrupted.");
  });
  it("flags ONLY the tail assistant as still generating, and only while streaming", () => {
    const streaming = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [],
      live: [
        live({
          id: "a",
          role: "assistant",
          parts: [{ type: "text", text: "done earlier" }],
        }),
        live({
          id: "u",
          role: "user",
          parts: [{ type: "text", text: "next q" }],
        }),
        live({
          id: "b",
          role: "assistant",
          parts: [{ type: "text", text: "streaming now" }],
        }),
      ],
      isStreaming: true,
      t,
    });
    // Exactly one "still being generated" note (the tail assistant).
    expect(streaming.match(/still being generated/g)?.length).toBe(1);
    const idle = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [],
      live: [
        live({
          id: "b",
          role: "assistant",
          parts: [{ type: "text", text: "final" }],
        }),
      ],
      isStreaming: false,
      t,
    });
    expect(idle).not.toContain("still being generated");
  });
  it("does NOT flag a completed assistant as generating when the streaming tail is a user message", () => {
    // The `status === "submitted"` window: the user just sent, isStreaming is
    // already true, but the new assistant turn has no message yet so the tail is
    // the USER message. The previous assistant answer is complete on screen and
    // must not be marked still-generating (WYSIWYG; regression for #160 review).
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [],
      live: [
        live({
          id: "a",
          role: "assistant",
          parts: [{ type: "text", text: "completed answer" }],
        }),
        live({
          id: "u",
          role: "user",
          parts: [{ type: "text", text: "the new question" }],
        }),
      ],
      isStreaming: true,
      t,
    });
    expect(md).toContain("completed answer");
    expect(md).not.toContain("still being generated");
  });
  it("emits the heading + note for a streaming tail assistant with empty parts", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [row({ id: "u1", role: "user", content: "q" })],
      live: [
        live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
        live({ id: "a-live", role: "assistant", parts: [] }),
      ],
      isStreaming: true,
      t,
    });
    expect(md).toContain("## 2. AI agent");
    expect(md).toContain("still being generated");
  });
 });
 describe("buildChatMarkdown — live enrichment from persisted rows", () => {
  it("pulls usage / error / timestamp from the persisted row matched by id", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          id: "a1",
          role: "assistant",
          content: "x",
          createdAt: "2026-06-22T10:00:00.000Z",
          metadata: {
            usage: { inputTokens: 10, outputTokens: 5 },
            error: "rate limited",
          },
        }),
      ],
      live: [
        // Same id as the persisted row, but no usage/error/timestamp on the live msg.
        live({
          id: "a1",
          role: "assistant",
          parts: [{ type: "text", text: "reply" }],
        }),
      ],
      isStreaming: false,
      t,
    });
    expect(md).toContain("reply");
    // Token footer + total come from the enriched row.
    expect(md).toContain("_Tokens — in: 10, out: 5, total: 15_");
    expect(md).toContain("- Total tokens: 15");
    expect(md).toContain("**⚠️ Error:** rate limited");
    // The persisted timestamp is carried into the export.
    expect(md).toContain("<!-- 2026-06-22T10:00:00.000Z -->");
  });
  it("prefers authoritative usage already on the live message over the row's", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({
          id: "a1",
          role: "assistant",
          content: "x",
          metadata: {
            usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
          },
        }),
      ],
      live: [
        live({
          id: "a1",
          role: "assistant",
          parts: [{ type: "text", text: "reply" }],
          metadata: {
            usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
          },
        }),
      ],
      isStreaming: false,
      t,
    });
    // The live (authoritative, freshest) usage wins, not the stale row usage.
    expect(md).toContain("- Total tokens: 150");
    expect(md).not.toContain("- Total tokens: 2");
  });
  it("a current-turn live message with no matching row renders without a footer", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [row({ id: "u1", role: "user", content: "q" })],
      live: [
        live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
        live({
          id: "a-live",
          role: "assistant",
          parts: [{ type: "text", text: "fresh reply" }],
        }),
      ],
      isStreaming: false,
      t,
    });
    expect(md).toContain("fresh reply");
    // No persisted row for the live assistant -> no token footer, no timestamp.
    expect(md).not.toContain("_Tokens —");
    expect(md).not.toContain("<!-- undefined -->");
  });
 });
 describe("buildChatMarkdown — fallback + banner", () => {
  it("falls back to the persisted rows when there are no live messages", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [
        row({ role: "user", content: "from rows" }),
        row({
          role: "assistant",
          content: "answer",
          metadata: { usage: { inputTokens: 4, outputTokens: 6 } },
        }),
      ],
      live: [], // empty live mirror -> fallback path
      isStreaming: false,
      t,
    });
    expect(md).toContain("## 1. You");
    expect(md).toContain("## 2. AI agent");
    expect(md).toContain("from rows");
    expect(md).toContain("- Messages: 2");
    expect(md).toContain("- Total tokens: 10");
  });
  it("appends the on-screen banner once, after the messages", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [row({ role: "user", content: "q" })],
      live: [
        live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] }),
      ],
      isStreaming: false,
      banner: "Rate limit reached — try again shortly.",
      t,
    });
    expect(md).toContain("_⚠️ Rate limit reached — try again shortly._");
    // Banner comes after the (only) message block.
    expect(md.indexOf("Rate limit reached")).toBeGreaterThan(
      md.indexOf("## 1."),
    );
  });
  it("omits the banner block when there is no banner", () => {
    const md = buildChatMarkdown({
      title: "t",
      chatId: "c",
      rows: [row({ role: "user", content: "q" })],
      live: [
        live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] }),
      ],
      isStreaming: false,
      banner: null,
      t,
    });
    expect(md).not.toContain("_⚠️");
  });
 });
 // #174: a brand-new, not-yet-persisted chat whose first turn is streaming (or was
 // interrupted) has live messages but NO persisted rows yet, and its chat id is not
 // known (the caller passes a placeholder). The export must still capture the
 // on-screen thread WYSIWYG from the live messages alone.
 describe("buildChatMarkdown — first-turn export with no persisted base (#174)", () => {
  it("builds the document from live messages alone when rows are empty", () => {
    const md = buildChatMarkdown({
      title: null,
      chatId: "unsaved",
      rows: [],
      live: [
        live({
          id: "u1",
          role: "user",
          parts: [{ type: "text", text: "hello" }],
        }),
        live({
          id: "a1",
          role: "assistant",
          parts: [{ type: "text", text: "partial reply" }],
        }),
      ],
      isStreaming: true,
      t,
    });
    // Both on-screen messages are serialized, numbered from 1.
    expect(md).toContain("## 1. You");
    expect(md).toContain("hello");
    expect(md).toContain("## 2. AI agent");
    expect(md).toContain("partial reply");
    // The streaming tail assistant is flagged as in-progress.
    expect(md).toContain("still being generated");
    // The placeholder chat id and the live message count are recorded.
    expect(md).toContain("- Chat ID: `unsaved`");
    expect(md).toContain("- Messages: 2");
    // No persisted timestamp exists for a current-turn live message.
    expect(md).not.toContain("<!--");
  });
  it("captures an interrupted first turn (no rows, not streaming) without a generating note", () => {
    const md = buildChatMarkdown({
      title: null,
      chatId: "unsaved",
      rows: [],
      live: [
        live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
        live({
          id: "a1",
          role: "assistant",
          parts: [{ type: "text", text: "half an answer" }],
        }),
      ],
      isStreaming: false,
      banner: "Connection dropped — the response was cut off.",
      t,
    });
    expect(md).toContain("half an answer");
    // An interrupted (non-streaming) partial is exported as-is, no generating note.
    expect(md).not.toContain("still being generated");
    // The on-screen banner records the interruption.
    expect(md).toContain("_⚠️ Connection dropped — the response was cut off._");
  });
 });
--- a/apps/client/src/features/ai-chat/utils/chat-markdown.ts
+++ b/apps/client/src/features/ai-chat/utils/chat-markdown.ts
@@ -1,308 +0,0 @@
 /**
 * Client-only Markdown builder for an AI agent chat. Serializes the already
 * persisted message rows (loaded via `useAiChatMessagesQuery`) into a single
 * Markdown string suitable for copying to the clipboard. NO network call is
 * made and NO server/DB code is touched — this reuses the rich "request
 * internals" (tool calls with input/output, per-message token usage,
 * finish/error info) that the chat already holds client-side.
 *
 * Only role labels and tool action labels are localized via the passed-in `t`
 * translator; the structural document words (Input/Output/Error/Tokens/...) are
 * plain English constants because the output is a technical artifact.
 */
 import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
 import {
  ToolUiPart,
  getToolName,
  toolRunState,
  toolLabelKey,
 } from "@/features/ai-chat/utils/tool-parts.tsx";
 // Minimal translator signature compatible with react-i18next's `t`.
 type Translate = (key: string, values?: Record<string, unknown>) => string;
 interface BuildChatMarkdownArgs {
  title: string | null;
  chatId: string;
  /** The live, on-screen messages — the WYSIWYG source of the export. When
   *  present and non-empty these DRIVE the document (so it mirrors exactly what
   *  the user sees, including a partial reply from an interrupted turn). Each is
   *  matched to a persisted row by `id` to enrich it with token usage / error /
   *  timestamp. When absent or empty the builder falls back to `rows`. */
  live?: LiveMessage[];
  /** Persisted message rows. Enrichment source (matched to `live` by id) AND the
   *  fallback document source when `live` is empty. */
  rows: IAiChatMessageRow[];
  /** Whether the live thread is still streaming. Only then is the tail assistant
   *  message flagged "still generating"; an interrupted (non-streaming) partial
   *  reply is exported as-is and the `banner` explains the interruption. */
  isStreaming?: boolean;
  /** The on-screen banner text (error / dropped connection / manual stop),
   *  appended at the end of the export so the artifact records the interruption
   *  the user saw. */
  banner?: string | null;
  t: Translate;
 }
 /** A single AI SDK UIMessage part (text part or other). */
 interface TextLikePart {
  type: string;
  text?: string;
 }
 /** Authoritative per-turn usage the server attaches to a message / row. */
 interface UsageLike {
  inputTokens?: number;
  outputTokens?: number;
  totalTokens?: number;
  reasoningTokens?: number;
 }
 /** A live, on-screen message (subset of the AI SDK UIMessage we consume). */
 interface LiveMessage {
  id: string;
  role: "user" | "assistant" | string;
  parts: TextLikePart[];
  metadata?: { usage?: UsageLike; error?: string };
 }
 /** One message normalized for rendering, regardless of live/persisted origin. */
 interface ExportItem {
  role: string;
  parts: TextLikePart[];
  usage?: UsageLike;
  error?: string;
  /** ISO timestamp from the persisted row, when one is known. */
  createdAt?: string;
  /** True only for the tail assistant message while the thread is streaming. */
  generating: boolean;
 }
 /**
 * Stringify an arbitrary tool input/output value for a fenced block. Strings
 * pass through as-is; everything else is pretty-printed JSON, falling back to
 * `String(value)` if serialization throws (e.g. a circular structure).
 */
 function stringify(value: unknown): string {
  if (typeof value === "string") return value;
  try {
    return JSON.stringify(value, null, 2);
  } catch {
    return String(value);
  }
 }
 /**
 * Wrap `code` in a fenced code block whose backtick delimiter is LONGER than
 * the longest backtick run inside the content, so embedded backticks (or even
 * a literal ``` fence) never break out of the block. Minimum 3 backticks.
 */
 function fence(code: string, lang = ""): string {
  const runs: string[] = code.match(/`+/g) ?? [];
  const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
  const delim = "`".repeat(Math.max(3, longest + 1));
  return `${delim}${lang}\n${code}\n${delim}`;
 }
 /** Per-row token count, mirroring the header sum in ai-chat-window.tsx. */
 function rowTokens(usage: {
  inputTokens?: number;
  outputTokens?: number;
  totalTokens?: number;
  reasoningTokens?: number;
 }): number {
  return (
    usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
  );
 }
 /** Render one message's UIMessage parts into an array of Markdown blocks
 *  (text blocks + tool blocks). Mirrors MessageItem's part handling. */
 function renderMessageParts(parts: TextLikePart[], t: Translate): string[] {
  const out: string[] = [];
  for (const part of parts) {
    if (part.type === "text") {
      const text = (part.text ?? "").trim();
      // Skip empty/whitespace-only text parts (matches MessageItem).
      if (text.length > 0) out.push(text);
      continue;
    }
    const isToolPart =
      part.type.startsWith("tool-") || part.type === "dynamic-tool";
    if (!isToolPart) continue;
    const tp = part as unknown as ToolUiPart;
    const name = getToolName(tp);
    const { key, values } = toolLabelKey(name);
    const label = t(key, values);
    const state = toolRunState(tp.state);
    const toolLines: string[] = [
      `**Tool: ${label}** (\`${name}\`) — ${state}`,
    ];
    if (tp.input !== undefined) {
      toolLines.push("Input:");
      toolLines.push(fence(stringify(tp.input), "json"));
    }
    if (tp.output !== undefined) {
      toolLines.push("Output:");
      toolLines.push(fence(stringify(tp.output), "json"));
    }
    if (tp.errorText) {
      toolLines.push(`**Error:** ${tp.errorText}`);
    }
    out.push(toolLines.join("\n\n"));
  }
  return out;
 }
 /** Resolve a persisted row's parts: prefer the rich persisted parts, else a
 *  single text part built from the plain-text content (mirrors `rowToUiMessage`). */
 function rowParts(row: IAiChatMessageRow): TextLikePart[] {
  return Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
    ? (row.metadata.parts as TextLikePart[])
    : [{ type: "text", text: row.content ?? "" }];
 }
 /**
 * Normalize the export to one ordered list of {@link ExportItem}, WYSIWYG-first:
 *
 * - When `live` messages are present, THEY are the document (what the user sees,
 *   incl. an interrupted turn's partial reply). Each is matched to a persisted
 *   row by `id` to pull token usage / error / timestamp — a live message of the
 *   CURRENT turn has no matching row yet, so it simply renders without a footer.
 *   Authoritative `usage`/`error` already on the live message metadata win over
 *   the row (the server attaches usage to the streamed message at a step
 *   boundary before the row is refetched). Only the tail assistant message is
 *   flagged `generating`, and only while `isStreaming`.
 * - When `live` is empty (e.g. the export runs before the live mirror is
 *   populated), fall back to the persisted `rows` so the format never regresses.
 */
 function resolveItems(
  live: LiveMessage[] | undefined,
  rows: IAiChatMessageRow[],
  isStreaming: boolean,
 ): ExportItem[] {
  if (live && live.length > 0) {
    const rowsById = new Map(rows.map((r) => [r.id, r]));
    // The "still generating" note may apply ONLY to an assistant message that is
    // the actual TAIL of the list — that is where the on-screen typing indicator
    // sits. While `status === "submitted"` (isStreaming true) right after the
    // user hit send, the tail is the USER message and the new assistant turn has
    // no message yet; the previous assistant answer is shown complete on screen,
    // so it must NOT be flagged (the indicator renders as a separate bottom
    // block, not on that answer).
    const lastIndex = live.length - 1;
    const tailIsStreamingAssistant =
      isStreaming && live[lastIndex]?.role === "assistant";
    return live.map((m, i) => {
      const row = rowsById.get(m.id);
      return {
        role: m.role,
        parts: m.parts ?? [],
        // Authoritative usage/error already on the live message (the server
        // attaches usage to the streamed message at a step boundary) wins over
        // the persisted row; a current-turn live message has no matching row yet
        // and simply renders without a token footer (the accepted WYSIWYG
        // tradeoff — an interrupted turn loses only its token footer, not text).
        usage: m.metadata?.usage ?? row?.metadata?.usage,
        error: m.metadata?.error ?? row?.metadata?.error ?? undefined,
        createdAt: row?.createdAt,
        generating: tailIsStreamingAssistant && i === lastIndex,
      };
    });
  }
  return rows.map((row) => ({
    role: row.role,
    parts: rowParts(row),
    usage: row.metadata?.usage,
    error: row.metadata?.error ?? undefined,
    createdAt: row.createdAt,
    generating: false,
  }));
 }
 /**
 * Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the
 * export timestamp), so it is straightforward to unit-test.
 */
 export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
  const { title, chatId, live, rows, isStreaming, banner, t } = args;
  const blocks: string[] = [];
  const items = resolveItems(live, rows, isStreaming === true);
  const heading = (title ?? "").trim() || t("Untitled chat");
  blocks.push(`# ${heading}`);
  // Metadata bullet list. Total tokens is only shown when there is a sum.
  const totalTokens = items.reduce(
    (sum, item) => (item.usage ? sum + rowTokens(item.usage) : sum),
    0,
  );
  const meta = [
    `- Chat ID: \`${chatId}\``,
    `- Exported: ${new Date().toISOString()}`,
    `- Messages: ${items.length}`,
  ];
  if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
  blocks.push(meta.join("\n"));
  items.forEach((item, index) => {
    blocks.push("---");
    const roleLabel = item.role === "assistant" ? t("AI agent") : t("You");
    blocks.push(`## ${index + 1}. ${roleLabel}`);
    // Created-at kept in source as an HTML comment (out of the rendered prose).
    // A live message of the current turn has no persisted row yet — omit it.
    if (item.createdAt) blocks.push(`<!-- ${item.createdAt} -->`);
    blocks.push(...renderMessageParts(item.parts, t));
    // A generating assistant may have empty/no parts yet — the heading (above)
    // and this note still record the in-progress turn.
    if (item.generating) {
      blocks.push(
        "_⏳ This message is still being generated — the export captured a partial, in-progress response._",
      );
    }
    // A persisted per-message error (the raw provider text) may coexist with the
    // trailing `banner` (the classified on-screen alert) when the failed turn's
    // row has already been refetched by export time. They describe the same
    // failure at different fidelity; showing both is an accepted, minor redundancy.
    if (item.error) {
      blocks.push(`**⚠️ Error:** ${item.error}`);
    }
    const usage = item.usage;
    if (usage) {
      const total = usage.totalTokens ?? rowTokens(usage);
      // Reasoning (thinking) tokens are shown only when the provider reported a
      // positive count; old rows / non-reasoning providers omit it.
      const reasoning =
        usage.reasoningTokens && usage.reasoningTokens > 0
          ? `, reasoning: ${usage.reasoningTokens}`
          : "";
      blocks.push(
        `_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}${reasoning}, total: ${total}_`,
      );
    }
  });
  // Record the on-screen banner (error / dropped connection / manual stop) so
  // the export reflects exactly what the user saw, including an interruption.
  if (banner && banner.trim().length > 0) {
    blocks.push("---");
    blocks.push(`_⚠️ ${banner.trim()}_`);
  }
  // Blank line between blocks so the Markdown renders cleanly.
  return blocks.join("\n\n");
 }
--- a/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
@@ -0,0 +1,159 @@
 import { ForbiddenException } from '@nestjs/common';
 import { AiChatController } from './ai-chat.controller';
 import {
  planFinalizeAssistant,
  applyFinalize,
  flushAssistant,
  type AssistantFlush,
 } from './ai-chat.service';
 import type { User, Workspace } from '@docmost/db/types/entity.types';
 /**
 * Wiring spec for the #183 `POST /ai-chat/export` endpoint. It must: own-gate via
 * the chat lookup (workspace-scoped + creator-owned), load the FULL transcript
 * via findAllByChat, render server-side, and return `{ markdown }`. Exercised by
 * instantiating the controller with hand-rolled mocks — no Nest graph, no DB.
 */
 describe('AiChatController.export', () => {
  const user = { id: 'u1' } as User;
  const workspace = { id: 'ws1' } as Workspace;
  function makeController(
    over: {
      chat?: unknown;
      rows?: unknown[];
    } = {},
  ) {
    const chat =
      'chat' in over
        ? over.chat
        : { id: 'c1', creatorId: 'u1', title: 'My chat' };
    const aiChatRepo = {
      findById: jest.fn().mockResolvedValue(chat),
    };
    const aiChatMessageRepo = {
      findAllByChat: jest.fn().mockResolvedValue(
        over.rows ?? [
          {
            id: 'm1',
            role: 'user',
            content: 'hi',
            metadata: null,
            status: null,
          },
          {
            id: 'm2',
            role: 'assistant',
            content: 'hello',
            metadata: null,
            status: 'completed',
          },
        ],
      ),
    };
    const controller = new AiChatController(
      {} as never,
      aiChatRepo as never,
      aiChatMessageRepo as never,
      {} as never,
    );
    return { controller, aiChatRepo, aiChatMessageRepo };
  }
  it('renders the full transcript and returns { markdown }', async () => {
    const { controller, aiChatMessageRepo } = makeController();
    const res = await controller.export({ chatId: 'c1' }, user, workspace);
    expect(aiChatMessageRepo.findAllByChat).toHaveBeenCalledWith('c1', 'ws1');
    expect(res.markdown).toContain('# My chat');
    expect(res.markdown).toContain('## 1. You');
    expect(res.markdown).toContain('## 2. AI agent');
  });
  it('forbids a chat the user does not own', async () => {
    const { controller } = makeController({
      chat: { id: 'c1', creatorId: 'someone-else', title: 'X' },
    });
    await expect(
      controller.export({ chatId: 'c1' }, user, workspace),
    ).rejects.toBeInstanceOf(ForbiddenException);
  });
  it('forbids a missing / foreign-workspace chat', async () => {
    const { controller } = makeController({ chat: null });
    await expect(
      controller.export({ chatId: 'c1' }, user, workspace),
    ).rejects.toBeInstanceOf(ForbiddenException);
  });
  it('localizes labels when lang=ru is passed', async () => {
    const { controller } = makeController();
    const res = await controller.export(
      { chatId: 'c1', lang: 'ru' },
      user,
      workspace,
    );
    expect(res.markdown).toContain('## 1. Вы');
    expect(res.markdown).toContain('## 2. ИИ-агент');
  });
 });
 /**
 * The terminal-finalize dispatch (#183): the assistant row is INSERTed upfront
 * as 'streaming' and finalized once on the terminal callback. When the upfront
 * insert SUCCEEDED (we hold an id) finalize UPDATEs that row; when it FAILED
 * (assistantId is undefined) finalize falls back to INSERTing the terminal row
 * so the turn is not lost — the only safety against losing the turn entirely.
 *
 * `planFinalizeAssistant` is the pure decision; `applyFinalize` is the REAL
 * dispatch the service uses, exercised here over a mock repo (not a copy of the
 * logic) so a production drift would fail the test (#186 review).
 */
 describe('finalizeAssistant dispatch (planFinalizeAssistant + applyFinalize)', () => {
  const workspaceId = 'ws1';
  // Drive the SAME applyFinalize the service calls (no duplicated logic).
  async function dispatchFinalize(
    repo: { insert: jest.Mock; update: jest.Mock },
    assistantId: string | undefined,
    flushed: AssistantFlush,
  ): Promise<void> {
    await applyFinalize(
      repo,
      planFinalizeAssistant(assistantId),
      { chatId: 'c1', workspaceId, userId: 'u1' },
      flushed,
    );
  }
  it('plan: update when the upfront insert returned an id', () => {
    expect(planFinalizeAssistant('a1')).toEqual({ kind: 'update', id: 'a1' });
  });
  it('plan: insert (fallback) when there is no upfront id', () => {
    expect(planFinalizeAssistant(undefined)).toEqual({ kind: 'insert' });
  });
  it('(a) upfront insert succeeded -> finalize UPDATEs the row by id', async () => {
    const repo = { insert: jest.fn(), update: jest.fn() };
    const flushed = flushAssistant([], 'final answer', 'completed', {
      finishReason: 'stop',
    });
    await dispatchFinalize(repo, 'a1', flushed);
    expect(repo.update).toHaveBeenCalledWith('a1', workspaceId, flushed);
    expect(repo.insert).not.toHaveBeenCalled();
  });
  it('(b) upfront insert failed -> finalize INSERTs the terminal payload', async () => {
    const repo = { insert: jest.fn(), update: jest.fn() };
    const flushed = flushAssistant([], 'partial', 'error', { error: 'boom' });
    await dispatchFinalize(repo, undefined, flushed);
    expect(repo.update).not.toHaveBeenCalled();
    expect(repo.insert).toHaveBeenCalledTimes(1);
    const arg = repo.insert.mock.calls[0][0];
    // The fallback insert carries the terminal content/status/metadata.
    expect(arg.role).toBe('assistant');
    expect(arg.content).toBe('partial');
    expect(arg.status).toBe('error');
    expect((arg.metadata as { error?: string }).error).toBe('boom');
  });
 });
--- a/apps/server/src/core/ai-chat/ai-chat.controller.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts
@@ -20,7 +20,7 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
 import { AuthUser } from '../../common/decorators/auth-user.decorator';
 import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
 import { SkipTransform } from '../../common/decorators/skip-transform.decorator';
-import { User, Workspace } from '@docmost/db/types/entity.types';
+import { AiChat, User, Workspace } from '@docmost/db/types/entity.types';
 import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
 import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
 import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
@@ -31,10 +31,12 @@ import { AiChatService, AiChatStreamBody } from './ai-chat.service';
 import { AiTranscriptionService } from './ai-transcription.service';
 import {
  ChatIdDto,
  ExportChatDto,
  GetChatMessagesDto,
  RenameChatDto,
 } from './dto/ai-chat.dto';
 import { describeProviderError } from '../../integrations/ai/ai-error.util';
 import { buildChatMarkdown } from './chat-markdown.util';
 /**
 * Per-user AI chat API (§6.1). Routes are POST to match this codebase's
@@ -81,6 +83,36 @@ export class AiChatController {
    );
  }
  /**
   * Export a chat to Markdown (#183). The DB is the single source of truth: the
   * whole transcript is loaded (oldest -> newest) and rendered server-side. Now
   * that the assistant row is persisted upfront and per step, an interrupted
   * turn is included up to its last finished step. Workspace-scoped and owner-
   * gated via assertOwnedChat (same as the other read endpoints). Returns
   * `{ markdown }`. `lang` localizes the few fixed labels (default English).
   */
  @HttpCode(HttpStatus.OK)
  @Post('export')
  async export(
    @Body() dto: ExportChatDto,
    @AuthUser() user: User,
    @AuthWorkspace() workspace: Workspace,
  ): Promise<{ markdown: string }> {
    const chat = await this.assertOwnedChat(dto.chatId, user, workspace);
    const rows = await this.aiChatMessageRepo.findAllByChat(
      dto.chatId,
      workspace.id,
    );
    const markdown = buildChatMarkdown({
      title: chat.title ?? null,
      chatId: dto.chatId,
      rows,
      // normalizeLang(undefined) already yields 'en', so no `?? 'en'` is needed.
      lang: dto.lang,
    });
    return { markdown };
  }
  /** Rename a chat. */
  @HttpCode(HttpStatus.OK)
  @Post('rename')
@@ -90,7 +122,11 @@ export class AiChatController {
    @AuthWorkspace() workspace: Workspace,
  ) {
    await this.assertOwnedChat(dto.chatId, user, workspace);
-    await this.aiChatRepo.update(dto.chatId, { title: dto.title }, workspace.id);
+    await this.aiChatRepo.update(
      dto.chatId,
      { title: dto.title },
      workspace.id,
    );
    return { success: true };
  }
@@ -145,7 +181,10 @@ export class AiChatController {
    // Resolve the agent role for this turn BEFORE hijack: existing chats read it
    // from ai_chats.role_id (authoritative), a new chat from body.roleId. The
    // role drives both the persona and the optional model override below.
-    const role = await this.aiChatService.resolveRoleForRequest(workspace, body);
+    const role = await this.aiChatService.resolveRoleForRequest(
      workspace,
      body,
    );
    // Resolve the model (applying the role's optional override) BEFORE hijack so
    // an unconfigured provider — including a role pointing at an unconfigured
@@ -232,7 +271,9 @@ export class AiChatController {
    let file = null;
    try {
      // Whisper hard-caps uploads at 25MB; allow a single file.
-      file = await req.file({ limits: { fileSize: 25 * 1024 * 1024, files: 1 } });
+      file = await req.file({
        limits: { fileSize: 25 * 1024 * 1024, files: 1 },
      });
    } catch (err: any) {
      if (err?.statusCode === 413) {
        throw new BadRequestException('Audio file too large (max 25MB)');
@@ -283,11 +324,12 @@ export class AiChatController {
    chatId: string,
    user: User,
    workspace: Workspace,
-  ): Promise<void> {
+  ): Promise<AiChat> {
    const chat = await this.aiChatRepo.findById(chatId, workspace.id);
    if (!chat || chat.creatorId !== user.id) {
      throw new ForbiddenException();
    }
    return chat;
  }
 }
--- a/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts
@@ -0,0 +1,61 @@
 import { Logger } from '@nestjs/common';
 import { AiChatService } from './ai-chat.service';
 /**
 * Lifecycle unit tests for AiChatService.onModuleInit (#183 crash-recovery
 * sweep). The sweep is BEST-EFFORT: a failure must be logged (warn) but must
 * NEVER throw out of onModuleInit and block server startup. Exercised with a
 * hand-rolled mock repo — no Nest graph, no DB. Only `aiChatMessageRepo` is
 * touched by onModuleInit, so the other constructor deps are stubbed as never.
 */
 describe('AiChatService.onModuleInit (startup sweep)', () => {
  function makeService(sweepStreaming: jest.Mock) {
    const aiChatMessageRepo = { sweepStreaming };
    const service = new AiChatService(
      {} as never, // ai
      {} as never, // aiChatRepo
      aiChatMessageRepo as never,
      {} as never, // aiSettings
      {} as never, // tools
      {} as never, // mcpClients
      {} as never, // aiAgentRoleRepo
      {} as never, // pageRepo
      {} as never, // pageAccess
    );
    return { service, aiChatMessageRepo };
  }
  afterEach(() => jest.restoreAllMocks());
  it('happy path: calls sweepStreaming and resolves', async () => {
    const sweepStreaming = jest.fn().mockResolvedValue(0);
    const { service } = makeService(sweepStreaming);
    await expect(service.onModuleInit()).resolves.toBeUndefined();
    expect(sweepStreaming).toHaveBeenCalledTimes(1);
  });
  it('logs how many rows were swept when > 0', async () => {
    const sweepStreaming = jest.fn().mockResolvedValue(3);
    const logSpy = jest
      .spyOn(Logger.prototype, 'log')
      .mockImplementation(() => undefined);
    const { service } = makeService(sweepStreaming);
    await service.onModuleInit();
    expect(logSpy).toHaveBeenCalledTimes(1);
    expect(String(logSpy.mock.calls[0][0])).toContain('3');
  });
  it('sweepStreaming throws -> onModuleInit resolves (does NOT throw) and warns', async () => {
    const sweepStreaming = jest
      .fn()
      .mockRejectedValue(new Error('db unavailable'));
    const warnSpy = jest
      .spyOn(Logger.prototype, 'warn')
      .mockImplementation(() => undefined);
    const { service } = makeService(sweepStreaming);
    // Must not throw — a sweep failure may never block startup.
    await expect(service.onModuleInit()).resolves.toBeUndefined();
    expect(warnSpy).toHaveBeenCalledTimes(1);
    expect(String(warnSpy.mock.calls[0][0])).toContain('db unavailable');
  });
 });
--- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
@@ -4,7 +4,7 @@ import {
  serializeSteps,
  rowToUiMessage,
  prepareAgentStep,
-  buildPartialAssistantRecord,
+  flushAssistant,
  chatStreamMetadata,
  accumulateStepUsage,
  MAX_AGENT_STEPS,
@@ -94,8 +94,12 @@ describe('assistantParts', () => {
    const steps = [
      {
        text: '',
-        toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
+        toolCalls: [
-        toolResults: [{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } }],
+          { toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
        ],
        toolResults: [
          { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
        ],
      },
    ];
    const parts = assistantParts(steps, '') as AnyPart[];
@@ -109,7 +113,9 @@ describe('assistantParts', () => {
    const steps = [
      {
        text: '',
-        toolCalls: [{ toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } }],
+        toolCalls: [
          { toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } },
        ],
        toolResults: [],
      },
    ];
@@ -136,7 +142,8 @@ describe('assistantParts', () => {
    ];
    const parts = assistantParts(steps, '') as AnyPart[];
    const toolParts = parts.filter(
-      (p) => typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
+      (p) =>
        typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
    );
    expect(toolParts).toHaveLength(0);
  });
@@ -222,79 +229,108 @@ describe('prepareAgentStep', () => {
    // The synthesis instruction is appended.
    expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
  });
  it('pins the off-by-one boundary (MAX-2 is not final, MAX-1 is)', () => {
    // Boundary expressed via the constant, not a hardcoded 18/19, so the test
    // tracks MAX_AGENT_STEPS if the cap ever changes.
    expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
    const atBoundary = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
    expect(atBoundary).toBeDefined();
    expect(atBoundary?.toolChoice).toBe('none');
  });
 });
 /**
- * Unit test for buildPartialAssistantRecord: the pure helper that shapes the
+ * flushAssistant (#183): the PURE row builder behind the step-granular durable
- * assistant-message record persisted on a partial/failed turn (the streamText
+ * write path. It runs identically for the upfront insert (empty steps,
- * onError / onAbort paths). It captures the PARTIAL answer the user already saw
+ * 'streaming'), every per-step update, and the terminal finalize — so a future
- * (finished steps' text + tool parts, plus the in-progress step's text) so a
+ * background worker can call the same function. These tests pin the four status
- * provider error / disconnect no longer throws the streamed answer away. Pinning
+ * shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on
- * the record shape here covers the persist-partial logic without seaming
+ * (per-step text + tool parts via assistantParts, in-progress text appended).
 * streamText itself.
 */
-describe('buildPartialAssistantRecord', () => {
+describe('flushAssistant', () => {
  type AnyPart = Record<string, unknown>;
-  it('records an empty turn with the error text (preserves old behavior)', () => {
+  const toolStep = {
-    const rec = buildPartialAssistantRecord([], '', 'error', '401: Unauthorized');
+    text: 'looked it up',
-    expect(rec).toEqual({
+    toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
-      text: '',
+    toolResults: [
-      toolCalls: null,
+      { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
-      metadata: { finishReason: 'error', parts: [], error: '401: Unauthorized' },
+    ],
  };
  it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
    const f = flushAssistant([], '', 'streaming');
    expect(f.status).toBe('streaming');
    expect(f.content).toBe('');
    expect(f.toolCalls).toBeNull();
    expect(f.metadata.parts).toEqual([]);
    // No finishReason while streaming (it is not a terminal state).
    expect('finishReason' in f.metadata).toBe(false);
  });
  it('streaming update folds in finished steps but keeps status streaming', () => {
    const f = flushAssistant([toolStep], '', 'streaming');
    expect(f.status).toBe('streaming');
    expect(f.content).toBe('looked it up');
    const parts = f.metadata.parts as AnyPart[];
    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
    const toolPart = parts.find((p) => p.type === 'tool-getPage');
    expect(toolPart!.state).toBe('output-available');
    expect(f.toolCalls).not.toBeNull();
  });
  it('completed: attaches finishReason + normalized usage + contextTokens', () => {
    const f = flushAssistant([toolStep], '', 'completed', {
      finishReason: 'stop',
      usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
      contextTokens: 15,
    });
    expect(f.status).toBe('completed');
    expect(f.metadata.finishReason).toBe('stop');
    expect(f.metadata.usage).toEqual({
      inputTokens: 10,
      outputTokens: 5,
      totalTokens: 15,
      reasoningTokens: undefined,
    });
    expect(f.metadata.contextTokens).toBe(15);
  });
  it('error: records the error and a derived finishReason', () => {
    const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
    expect(f.status).toBe('error');
    expect(f.content).toBe('partial answer');
    expect(f.metadata.error).toBe('boom');
    // Derives finishReason from the terminal status when none is supplied.
    expect(f.metadata.finishReason).toBe('error');
    expect(f.metadata.parts).toEqual([
      { type: 'text', text: 'partial answer' },
    ]);
  });
  it('aborted: in-progress text appended last, no error key', () => {
    const f = flushAssistant([toolStep], ' and then', 'aborted');
    expect(f.status).toBe('aborted');
    expect(f.metadata.finishReason).toBe('aborted');
    expect('error' in f.metadata).toBe(false);
    expect(f.content).toBe('looked it up and then');
    const parts = f.metadata.parts as AnyPart[];
    expect(parts[parts.length - 1]).toEqual({
      type: 'text',
      text: ' and then',
    });
  });
-  it('persists in-progress text (no finished steps) as the partial answer', () => {
+  it('combines a finished tool step with trailing in-progress text (error path)', () => {
-    const rec = buildPartialAssistantRecord([], 'partial answer', 'error', 'boom');
+    // The error path captures the PARTIAL answer the user already saw: each
-    expect(rec.text).toBe('partial answer');
+    // finished step's text + tool parts, then the in-progress step's text last.
-    expect(rec.metadata.parts).toEqual([
+    const flushed = flushAssistant([toolStep], ' and then', 'error', {
-      { type: 'text', text: 'partial answer' },
+      error: 'boom',
-    ]);
+    });
-    expect(rec.metadata.error).toBe('boom');
+    const parts = flushed.metadata.parts as AnyPart[];
  });
  it('combines a finished tool step with trailing in-progress text', () => {
    const steps = [
      {
        text: 'looked it up',
        toolCalls: [
          { toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
        ],
        toolResults: [
          { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
        ],
      },
    ];
    const rec = buildPartialAssistantRecord(steps, ' and then', 'error', 'boom');
    const parts = rec.metadata.parts as AnyPart[];
    // The finished step's text part is present.
    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
    // The paired tool call+result becomes an output-available part.
    const toolPart = parts.find((p) => p.type === 'tool-getPage');
    expect(toolPart).toBeDefined();
    expect(toolPart!.state).toBe('output-available');
-    // The in-progress text is appended LAST so the parts match the stream order.
+    // In-progress text appended LAST so the parts match the stream order.
-    expect(parts[parts.length - 1]).toEqual({ type: 'text', text: ' and then' });
+    expect(parts[parts.length - 1]).toEqual({
-    expect(rec.text).toBe('looked it up and then');
+      type: 'text',
-    expect(rec.toolCalls).not.toBeNull();
+      text: ' and then',
-    expect(rec.metadata.error).toBe('boom');
+    });
-  });
+    expect(flushed.content).toBe('looked it up and then');
-
+    expect(flushed.toolCalls).not.toBeNull();
-  it('omits the error key on the abort path (no errorText)', () => {
+    expect(flushed.metadata.error).toBe('boom');
    const rec = buildPartialAssistantRecord([], 'half', 'aborted');
    expect(rec.metadata.finishReason).toBe('aborted');
    expect('error' in rec.metadata).toBe(false);
    expect(rec.text).toBe('half');
  });
 });
@@ -319,10 +355,20 @@ describe('chatStreamMetadata', () => {
      chatStreamMetadata(
        { type: 'finish-step', usage: { outputTokens: 100 } },
        'chat-1',
-        { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 },
+        {
          inputTokens: 500,
          outputTokens: 220,
          totalTokens: 720,
          reasoningTokens: 30,
        },
      ),
    ).toEqual({
-      usage: { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 },
+      usage: {
        inputTokens: 500,
        outputTokens: 220,
        totalTokens: 720,
        reasoningTokens: 30,
      },
    });
  });
@@ -394,8 +440,18 @@ describe('accumulateStepUsage', () => {
  it('sums every field across two steps', () => {
    expect(
      accumulateStepUsage(
-        { inputTokens: 500, outputTokens: 100, totalTokens: 600, reasoningTokens: 30 },
+        {
-        { inputTokens: 520, outputTokens: 80, totalTokens: 600, reasoningTokens: 10 },
+          inputTokens: 500,
          outputTokens: 100,
          totalTokens: 600,
          reasoningTokens: 30,
        },
        {
          inputTokens: 520,
          outputTokens: 80,
          totalTokens: 600,
          reasoningTokens: 10,
        },
      ),
    ).toEqual({
      inputTokens: 1020,
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -1,4 +1,9 @@
-import { ForbiddenException, Injectable, Logger } from '@nestjs/common';
+import {
  ForbiddenException,
  Injectable,
  Logger,
  OnModuleInit,
 } from '@nestjs/common';
 import { FastifyReply } from 'fastify';
 import {
  streamText,
@@ -60,7 +65,10 @@ export function prepareAgentStep(
  system: string,
 ): { toolChoice: 'none'; system: string } | undefined {
  if (stepNumber >= MAX_AGENT_STEPS - 1) {
-    return { toolChoice: 'none', system: `${system}\n\n${FINAL_STEP_INSTRUCTION}` };
+    return {
      toolChoice: 'none',
      system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`,
    };
  }
  return undefined;
 }
@@ -121,7 +129,7 @@ export interface AiChatStreamArgs {
 *                    can be rebuilt for `convertToModelMessages`.
 */
@Injectable()
-export class AiChatService {
+export class AiChatService implements OnModuleInit {
  private readonly logger = new Logger(AiChatService.name);
  constructor(
@@ -136,6 +144,32 @@ export class AiChatService {
    private readonly pageAccess: PageAccessService,
  ) {}
  /**
   * Crash-recovery sweep on server start (#183): any assistant row left in the
   * 'streaming' state is the relic of a turn whose process died before it
   * reached a terminal status. Flip those to 'aborted' so history/export show
   * them settled (with whatever finished steps were already persisted) instead
   * of perpetually "streaming". Best-effort: a sweep failure is logged but must
   * never block server startup.
   */
  async onModuleInit(): Promise<void> {
    try {
      const swept = await this.aiChatMessageRepo.sweepStreaming();
      if (swept > 0) {
        this.logger.log(
          `Startup sweep: marked ${swept} dangling 'streaming' assistant ` +
            `message(s) as 'aborted'.`,
        );
      }
    } catch (err) {
      this.logger.warn(
        `Startup sweep of dangling 'streaming' messages failed: ${
          err instanceof Error ? err.message : 'unknown error'
        }`,
      );
    }
  }
  /**
   * Resolve the agent role that applies to this stream request, scoped to the
   * workspace and soft-delete aware. For an EXISTING chat the role is read from
@@ -259,9 +293,7 @@ export class AiChatService {
      content: incomingText,
      // jsonb column: UIMessage parts are JSON-serializable at runtime but not
      // structurally `JsonValue`, so cast through unknown.
-      metadata: (incoming?.parts
+      metadata: (incoming?.parts ? { parts: incoming.parts } : null) as never,
        ? { parts: incoming.parts }
        : null) as never,
    });
    // Rebuild the conversation from persisted history (not the client payload),
@@ -347,31 +379,6 @@ export class AiChatService {
      );
    };
    // Persist the assistant message. Used by onFinish (full result) and the
    // abort/error paths (partial result). Guarded so we persist at most once.
    let persisted = false;
    const persistAssistant = async (data: {
      text: string;
      toolCalls: unknown;
      metadata: Record<string, unknown>;
    }): Promise<void> => {
      if (persisted) return;
      persisted = true;
      try {
        await this.aiChatMessageRepo.insert({
          chatId,
          workspaceId: workspace.id,
          userId: user.id,
          role: 'assistant',
          content: data.text ?? '',
          toolCalls: (data.toolCalls ?? null) as never,
          metadata: data.metadata as never,
        });
      } catch (err) {
        this.logger.error('Failed to persist assistant message', err as Error);
      }
    };
    // Accumulate the turn's streamed output so a provider error / disconnect can
    // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
    // callbacks don't hand us the in-progress text. `capturedSteps` holds finished
@@ -380,6 +387,101 @@ export class AiChatService {
    const capturedSteps: StepLike[] = [];
    let inProgressText = '';
    // Step-granular durability (#183): create the assistant row UPFRONT in the
    // 'streaming' state (before any token), then UPDATE it as each step finishes
    // and finalize it once on the terminal callback. If the process dies
    // mid-turn the row survives with every finished step already persisted; the
    // startup sweep (sweepStreaming) later flips a dangling 'streaming' row to
    // 'aborted'. The DB is now the single source of truth for the turn — the
    // socket is never required for the write path. A failed upfront insert is
    // logged and leaves assistantId undefined; the per-step/terminal updates then
    // no-op (guarded below) so the turn still streams to the user.
    let assistantId: string | undefined;
    try {
      const seed = flushAssistant([], '', 'streaming');
      const seeded = await this.aiChatMessageRepo.insert({
        chatId,
        workspaceId: workspace.id,
        userId: user.id,
        role: 'assistant',
        content: seed.content,
        // jsonb columns: cast through never (same as the user insert above).
        toolCalls: (seed.toolCalls ?? null) as never,
        metadata: seed.metadata as never,
        status: seed.status,
      });
      assistantId = seeded?.id;
    } catch (err) {
      this.logger.error(
        `Failed to insert upfront assistant row (chat ${chatId}, workspace ${workspace.id})`,
        err as Error,
      );
    }
    // Per-step (non-terminal) update: persist the finished steps the moment a
    // step ends. Tolerant — a failed update is logged and swallowed so it never
    // throws into the stream. Keeps status 'streaming'.
    const updateStreaming = async (): Promise<void> => {
      if (!assistantId) return;
      // Cheap short-circuit once the turn is finalized (see `finalized` below).
      // The AUTHORITATIVE guard is `onlyIfStreaming` on the UPDATE: a late
      // fire-and-forget step update could still be in flight on another pool
      // connection when finalize runs, so the SQL `WHERE status='streaming'`
      // (not this flag) is what prevents it clobbering the terminal row.
      if (finalized) return;
      try {
        await this.aiChatMessageRepo.update(
          assistantId,
          workspace.id,
          flushAssistant(capturedSteps, '', 'streaming'),
          { onlyIfStreaming: true },
        );
      } catch (err) {
        this.logger.warn(
          `Failed to update streaming assistant row: ${
            err instanceof Error ? err.message : 'unknown error'
          }`,
        );
      }
    };
    // Serialize the per-step updates (#183 review): onStepFinish fires them
    // without await, so two could otherwise commit out of order on different pool
    // connections (step N landing after N+1). Chaining each onto the previous
    // keeps the persisted row monotonic with step order; each link short-circuits
    // on `finalized`, so a tail of late updates is cheap.
    let stepUpdateChain: Promise<void> = Promise.resolve();
    // Terminal finalize: write the completed/error/aborted row exactly once
    // across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort
    // callbacks — mirroring the pre-#183 persist-at-most-once guard for the
    // TERMINAL status (the row may be updated many times with 'streaming' before
    // this fires once).
    let finalized = false;
    const finalizeAssistant = async (
      flushed: AssistantFlush,
    ): Promise<void> => {
      if (finalized) return;
      finalized = true;
      const plan = planFinalizeAssistant(assistantId);
      try {
        // Shared dispatch (see applyFinalize): UPDATE the upfront row, or — when
        // the upfront insert failed (kind 'insert') — INSERT the terminal row as
        // the only safety against losing the turn entirely.
        await applyFinalize(
          this.aiChatMessageRepo,
          plan,
          { chatId, workspaceId: workspace.id, userId: user.id },
          flushed,
        );
      } catch (err) {
        this.logger.error(
          `Failed to finalize assistant message (kind=${plan.kind})`,
          err as Error,
        );
      }
    };
    // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
    // first-chunk latency, the model-silent gap right before a disconnect, and
    // how many SSE heartbeats were written, so a Safari drop can be classified
@@ -395,144 +497,151 @@ export class AiChatService {
    let result: ReturnType<typeof streamText>;
    try {
      result = streamText({
-      model,
+        model,
-      system,
+        system,
-      messages,
+        messages,
-      tools,
+        tools,
-      // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full
+        // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full
-      // page body for the write tools) are emitted as OUTPUT tokens, so a fixed
+        // page body for the write tools) are emitted as OUTPUT tokens, so a fixed
-      // cap would truncate complex tool calls mid-argument. Let the model use its
+        // cap would truncate complex tool calls mid-argument. Let the model use its
-      // natural per-step budget. (Cost/credit limits are an account concern, not
+        // natural per-step budget. (Cost/credit limits are an account concern, not
-      // something to enforce by silently breaking the agent.)
+        // something to enforce by silently breaking the agent.)
-      stopWhen: stepCountIs(MAX_AGENT_STEPS),
+        stopWhen: stepCountIs(MAX_AGENT_STEPS),
-      // Forced finalization: reserve the LAST allowed step for a text-only
+        // Forced finalization: reserve the LAST allowed step for a text-only
-      // answer. Without this, a turn that spends all its steps on tool calls
+        // answer. Without this, a turn that spends all its steps on tool calls
-      // ends with no assistant text (an empty turn). prepareAgentStep forbids
+        // ends with no assistant text (an empty turn). prepareAgentStep forbids
-      // further tool calls and appends a synthesis instruction on that step,
+        // further tool calls and appends a synthesis instruction on that step,
-      // concatenated onto the original `system` so the persona is preserved.
+        // concatenated onto the original `system` so the persona is preserved.
-      prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
+        prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
-      abortSignal: signal,
+        abortSignal: signal,
-      onChunk: ({ chunk }) => {
+        onChunk: ({ chunk }) => {
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
-        // output chunk means the stream is actively emitting bytes; track first
+          // output chunk means the stream is actively emitting bytes; track first
-        // + most-recent activity timestamps.
+          // + most-recent activity timestamps.
-        const now = Date.now();
+          const now = Date.now();
-        firstModelChunkAt ??= now;
+          firstModelChunkAt ??= now;
-        lastModelChunkAt = now;
+          lastModelChunkAt = now;
-        // 'text-delta' is the assistant's prose; tool-call args are separate chunk
+          // 'text-delta' is the assistant's prose; tool-call args are separate chunk
-        // types — so this mirrors exactly what streams to the client.
+          // types — so this mirrors exactly what streams to the client.
-        if (chunk.type === 'text-delta') inProgressText += chunk.text;
+          if (chunk.type === 'text-delta') inProgressText += chunk.text;
-      },
+        },
-      onStepFinish: (step) => {
+        onStepFinish: (step) => {
-        // The finished step's full text is now in `step.text`; fold it in and reset
+          // The finished step's full text is now in `step.text`; fold it in and reset
-        // the in-progress accumulator for the next step.
+          // the in-progress accumulator for the next step.
-        capturedSteps.push(step as StepLike);
+          capturedSteps.push(step as StepLike);
-        inProgressText = '';
+          inProgressText = '';
-      },
+          // Step-granular durability (#183): persist this finished step (its text +
-      onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
+          // tool calls + tool RESULTS) the moment it ends, so a process death after
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
+          // this point still recovers the step. Not awaited here (never block the
-        // baseline for Safari comparison.
+          // stream), but SERIALIZED via stepUpdateChain so the writes commit in
-        const diagNow = Date.now();
+          // step order; updateStreaming is error-tolerant (logs + swallows).
-        this.logger.log(
+          stepUpdateChain = stepUpdateChain.then(() => updateStreaming());
-          `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
+        },
-            `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
+        onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
-            `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
-        );
+          // baseline for Safari comparison.
-        await persistAssistant({
+          const diagNow = Date.now();
-          text,
+          this.logger.log(
-          toolCalls: serializeSteps(steps),
+            `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
-          metadata: {
+              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
-            finishReason,
+              `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
            // Persist the turn's cumulative usage WITH reasoning tokens resolved
            // from either the new `outputTokenDetails` or the deprecated top-level
            // field, so reopened history / the Markdown export show the thinking
            // token cost too.
            usage: normalizeStreamUsage(totalUsage as StreamUsage) ?? totalUsage,
            // Final-step usage = the context actually fed to the model on the last LLM
            // call (full history + tool results) plus the answer it just generated.
            // input+output of the FINAL step ≈ the conversation's CURRENT context size,
            // distinct from totalUsage which sums every step (cumulative tokens spent).
            contextTokens:
              (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
            // Persist the FULL set of UIMessage parts for the turn (text +
            // tool-call/result), so the rebuilt history replays prior tool
            // context to the model on later turns.
            parts: assistantParts(steps, text),
          },
        });
        // Lifecycle: release the external MCP clients leased for this turn.
        await closeExternalClients();
        // Generate the chat title for a freshly created chat AFTER the stream's
        // provider call has completed — NOT concurrently with it. The z.ai coding
        // endpoint stalls one of two concurrent requests to the same plan, which
        // black-holed the chat stream (~300s headers timeout) when title
        // generation raced it. Running it here (solo, fire-and-forget) avoids the
        // race; never block the turn on it, swallow any error.
        if (isNewChat && incomingText) {
          void this.generateTitle(chatId, workspace.id, incomingText).catch(
            (err) => {
              this.logger.warn(
                `Title generation failed: ${(err as Error)?.message ?? err}`,
              );
            },
          );
-        }
+          // Finalize the assistant row (#183): the upfront 'streaming' row is
-      },
+          // UPDATEd to 'completed' with the turn's final text, cumulative usage and
-      onError: async ({ error }) => {
+          // full UIMessage parts. We pass the SDK `steps` (which carry the final
-        // NestJS Logger.error(message, stack?, context?): pass the real message
+          // step's text) as the captured steps so metadata.parts matches the
-        // (with statusCode when present) + the stack string, not the Error
+          // pre-#183 onFinish record exactly; `inProgressText` is '' here (the last
-        // object, so the actual provider cause is clearly logged. Reuse the
+          // step already finished). Final-step usage (usage.input+output) ≈ the
-        // shared formatter so provider error formatting stays unified.
+          // conversation's CURRENT context size, distinct from totalUsage.
-        const e = error as { stack?: string };
+          //
-        const errorText = describeProviderError(error, String(error));
+          // COLUMN-SEMANTICS NOTE (#183): `content` is built by flushAssistant as
-        this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
+          // the CONCATENATION of every step's text (stepsText), whereas pre-#183
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
+          // it stored only the FINAL step's text. This is a deliberate, harmless
-        // an error-terminated stream.
+          // change: the UI and the Markdown export render from `metadata.parts`
-        const diagNow = Date.now();
+          // (per-step text + tool parts), not from `content`; `content` is the
-        this.logger.warn(
+          // plain-text projection (full-text search / fallback). A multi-step
-          `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
+          // turn's `content` therefore now holds all steps' prose, not just the
-            `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
+          // last block.
-            `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
+          await finalizeAssistant(
-        );
+            flushAssistant(steps as StepLike[], '', 'completed', {
-        // Persist the PARTIAL answer streamed before the failure (text + any
+              finishReason: finishReason as string,
-        // finished tool steps) WITH the error in metadata, so the turn shows what
+              usage: totalUsage as StreamUsage,
-        // the user already saw plus the cause — not just a bare error.
+              contextTokens:
-        await persistAssistant(
+                (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
-          buildPartialAssistantRecord(
+                undefined,
-            capturedSteps,
+            }),
-            inProgressText,
+          );
-            'error',
+          // Lifecycle: release the external MCP clients leased for this turn.
-            errorText,
+          await closeExternalClients();
-          ),
+
-        );
+          // Generate the chat title for a freshly created chat AFTER the stream's
-        await closeExternalClients();
+          // provider call has completed — NOT concurrently with it. The z.ai coding
-      },
+          // endpoint stalls one of two concurrent requests to the same plan, which
-      onAbort: async ({ steps }) => {
+          // black-holed the chat stream (~300s headers timeout) when title
-        const partialChars =
+          // generation raced it. Running it here (solo, fire-and-forget) avoids the
-          capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) +
+          // race; never block the turn on it, swallow any error.
-          inProgressText.length;
+          if (isNewChat && incomingText) {
-        // Unlike onError/onFinish, this terminal path otherwise writes nothing, so
+            void this.generateTitle(chatId, workspace.id, incomingText).catch(
-        // an aborted turn (client disconnect / proxy drop / stop()) would be
+              (err) => {
-        // invisible in the logs. Log it (warn) so the abort is traceable.
+                this.logger.warn(
-        this.logger.warn(
+                  `Title generation failed: ${(err as Error)?.message ?? err}`,
-          `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
+                );
-            `step(s), ${partialChars} chars partial text; persisting partial turn.`,
+              },
-        );
+            );
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
+          }
-        // line — classifies the Safari drop.
+        },
-        const diagNow = Date.now();
+        onError: async ({ error }) => {
-        this.logger.warn(
+          // NestJS Logger.error(message, stack?, context?): pass the real message
-          `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
+          // (with statusCode when present) + the stack string, not the Error
-            `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
+          // object, so the actual provider cause is clearly logged. Reuse the
-            `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
+          // shared formatter so provider error formatting stays unified.
-            `steps=${steps.length}`,
+          const e = error as { stack?: string };
-        );
+          const errorText = describeProviderError(error, String(error));
-        await persistAssistant(
+          this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
-          buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
-        );
+          // an error-terminated stream.
-        await closeExternalClients();
+          const diagNow = Date.now();
-      },
+          this.logger.warn(
            `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
              `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
          );
          // Finalize the PARTIAL answer streamed before the failure (text + any
          // finished tool steps) WITH the error in metadata, so the turn shows what
          // the user already saw plus the cause — not just a bare error. Status
          // 'error' (#183).
          await finalizeAssistant(
            flushAssistant(capturedSteps, inProgressText, 'error', {
              error: errorText,
            }),
          );
          await closeExternalClients();
        },
        onAbort: async ({ steps }) => {
          const partialChars =
            capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) +
            inProgressText.length;
          // Unlike onError/onFinish, this terminal path otherwise writes nothing, so
          // an aborted turn (client disconnect / proxy drop / stop()) would be
          // invisible in the logs. Log it (warn) so the abort is traceable.
          this.logger.warn(
            `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
              `step(s), ${partialChars} chars partial text; persisting partial turn.`,
          );
          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
          // line — classifies the Safari drop.
          const diagNow = Date.now();
          this.logger.warn(
            `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
              `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
              `steps=${steps.length}`,
          );
          await finalizeAssistant(
            flushAssistant(capturedSteps, inProgressText, 'aborted'),
          );
          await closeExternalClients();
        },
      });
      // Drain the stream independently of the client socket so the turn always
@@ -652,7 +761,10 @@ export class AiChatService {
        'punctuation at the end.',
      prompt: firstMessage.slice(0, 2000),
    });
-    const title = text.trim().replace(/^["']|["']$/g, '').slice(0, 120);
+    const title = text
      .trim()
      .replace(/^["']|["']$/g, '')
      .slice(0, 120);
    if (title) {
      await this.aiChatRepo.update(chatId, { title }, workspaceId);
    }
@@ -975,38 +1087,132 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
 }
 /**
- * Build the assistant-message record persisted on a partial/failed turn (the
+ * The persisted-row patch shape produced by {@link flushAssistant}. It is the
- * streamText onError / onAbort paths). Captures the partial answer the user
+ * SAME shape the assistant repo insert/update consume (content + toolCalls +
- * already saw: each finished step's text + tool parts (via assistantParts),
+ * metadata) plus the lifecycle `status` column added in #183.
 * then the in-progress step's text appended last. When `errorText` is provided
 * it is recorded in metadata.error so the cause shows in history; an aborted
 * turn passes none. Pure, so the partial-recording shape is unit-testable
 * without seaming streamText.
 */
-export function buildPartialAssistantRecord(
+export interface AssistantFlush {
-  steps: ReadonlyArray<StepLike> | undefined,
+  content: string;
  toolCalls: unknown;
  metadata: Record<string, unknown>;
  status: 'streaming' | 'completed' | 'error' | 'aborted';
 }
 /**
 * Pure decision for the terminal finalize (#183): given whether the upfront
 * assistant row exists (`assistantId`), choose whether the terminal payload is
 * written by UPDATEing that row or — when the upfront insert failed and there is
 * no id — by INSERTing a fresh terminal row so the turn is not lost entirely.
 * Returns `{ kind: 'update', id }` or `{ kind: 'insert' }`. Extracted so the
 * fallback-insert branch (the only safety against losing a turn whose upfront
 * insert failed) is unit-testable without seaming streamText.
 */
 export function planFinalizeAssistant(
  assistantId: string | undefined,
 ): { kind: 'update'; id: string } | { kind: 'insert' } {
  return assistantId ? { kind: 'update', id: assistantId } : { kind: 'insert' };
 }
 /** The repo surface the terminal finalize needs (structural — the real repo and
 *  a test mock both satisfy it). */
 export interface FinalizeRepo {
  insert(insertable: Record<string, unknown>): Promise<unknown>;
  update(
    id: string,
    workspaceId: string,
    patch: AssistantFlush,
  ): Promise<unknown>;
 }
 /**
 * Apply a finalize `plan` to the repo with the terminal `flushed` payload (#183):
 * UPDATE the upfront row, or INSERT a fresh terminal row as the fallback when the
 * upfront insert failed. The SINGLE dispatch shared by the service's
 * finalizeAssistant and its test, so the test exercises the real path instead of
 * a copy (#186 review). Pure of error handling — the caller wraps it.
 */
 export async function applyFinalize(
  repo: FinalizeRepo,
  plan: { kind: 'update'; id: string } | { kind: 'insert' },
  base: { chatId: string; workspaceId: string; userId: string },
  flushed: AssistantFlush,
 ): Promise<void> {
  if (plan.kind === 'update') {
    await repo.update(plan.id, base.workspaceId, flushed);
    return;
  }
  await repo.insert({
    chatId: base.chatId,
    workspaceId: base.workspaceId,
    userId: base.userId,
    role: 'assistant',
    content: flushed.content,
    toolCalls: flushed.toolCalls ?? null,
    metadata: flushed.metadata,
    status: flushed.status,
  });
 }
 /**
 * PURE assistant-row builder (#183 step-granular durability). Given the turn's
 * accumulated steps + the in-progress (not-yet-finished) text + the lifecycle
 * status, it returns the row patch to persist. The SAME path runs for the
 * upfront insert (empty steps, status 'streaming'), every per-step update, and
 * the terminal finalize (completed/error/aborted) — and a future background
 * worker can call it identically, so it must stay a pure function of its inputs
 * (NO `this`, no IO).
 *
 * `metadata.parts` is built by assistantParts over the finished steps, then the
 * in-progress text appended as a trailing text part, so rowToUiMessage /
 * findRecent keep replaying the turn unchanged. `metadata.finishReason`,
 * `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
 * only when provided/relevant, matching the pre-#183 onFinish/onError records.
 */
 export function flushAssistant(
  capturedSteps: ReadonlyArray<StepLike> | undefined,
  inProgressText: string,
-  finishReason: 'error' | 'aborted',
+  status: 'streaming' | 'completed' | 'error' | 'aborted',
-  errorText?: string,
+  extra?: {
-): { text: string; toolCalls: unknown; metadata: Record<string, unknown> } {
+    finishReason?: string;
-  const finished = steps ?? [];
+    usage?: ChatStreamUsage | StreamUsage | undefined;
    contextTokens?: number;
    error?: string;
  },
 ): AssistantFlush {
  const finished = capturedSteps ?? [];
  const stepsText = finished.map((s) => s.text ?? '').join('');
  const trailing = inProgressText ?? '';
  // assistantParts emits text parts only for FINISHED steps; append the
-  // in-progress step's text (the answer cut off by the error) as the last text
+  // in-progress step's text (the partial answer cut off by an error/abort, or
-  // part so the persisted parts match what streamed to the client.
+  // simply not yet flushed mid-stream) as the last text part so the persisted
  // parts match what streamed to the client.
  const parts = assistantParts(finished, '') as unknown as Array<
    Record<string, unknown>
  >;
  if (trailing) parts.push({ type: 'text', text: trailing });
  const metadata: Record<string, unknown> = {
    parts: parts as unknown as UIMessage['parts'],
  };
  // finishReason: prefer an explicit one; else derive a sensible value from the
  // terminal status (so onError/onAbort records keep their historical reason).
  if (extra?.finishReason) {
    metadata.finishReason = extra.finishReason;
  } else if (status === 'error' || status === 'aborted') {
    metadata.finishReason = status;
  }
  if (extra?.usage !== undefined) {
    metadata.usage =
      normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
  }
  if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
  if (extra?.error) metadata.error = extra.error;
  return {
-    text: stepsText + trailing,
+    content: stepsText + trailing,
    toolCalls: serializeSteps(finished),
-    metadata: {
+    metadata,
-      finishReason,
+    status,
      parts: parts as unknown as UIMessage['parts'],
      ...(errorText ? { error: errorText } : {}),
    },
  };
 }
--- a/apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
+++ b/apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
@@ -0,0 +1,295 @@
 import { buildChatMarkdown, normalizeLang } from './chat-markdown.util';
 import type { AiChatMessage } from '@docmost/db/types/entity.types';
 /**
 * normalizeLang: the client sends `i18n.language` — a FULL locale tag like
 * 'en-US' / 'ru-RU', NOT a bare 'en'/'ru'. A `@IsIn(['en','ru'])` DTO rejected
 * that with a 400 (caught in real-browser testing); the export now accepts any
 * string and normalizes here. Guards that regression.
 */
 describe('normalizeLang', () => {
  it("maps any 'ru…' locale tag to ru", () => {
    expect(normalizeLang('ru')).toBe('ru');
    expect(normalizeLang('ru-RU')).toBe('ru');
    expect(normalizeLang('RU-ru')).toBe('ru');
  });
  it('maps everything else (incl. region-qualified English) to en', () => {
    expect(normalizeLang('en')).toBe('en');
    expect(normalizeLang('en-US')).toBe('en');
    expect(normalizeLang('fr-FR')).toBe('en');
    expect(normalizeLang(undefined)).toBe('en');
    expect(normalizeLang('')).toBe('en');
  });
 });
 /**
 * Unit tests for the SERVER Markdown export (#183). Mirrors the coverage of the
 * (now-removed) client chat-markdown tests: heading/metadata, role labels, text
 * + tool blocks, token footers, the interrupted-turn note, and NULL-status
 * (legacy) rows. The export embeds a live `new Date().toISOString()` timestamp;
 * we never assert it, only the deterministic structure.
 */
 function row(partial: Partial<AiChatMessage>): AiChatMessage {
  return {
    id: partial.id ?? 'id',
    chatId: partial.chatId ?? 'chat-1',
    workspaceId: partial.workspaceId ?? 'ws-1',
    userId: partial.userId ?? null,
    role: partial.role ?? 'user',
    content: partial.content ?? null,
    toolCalls: partial.toolCalls ?? null,
    metadata: partial.metadata ?? null,
    status: partial.status ?? null,
    createdAt: partial.createdAt ?? ('2026-06-21T00:00:00.000Z' as never),
    updatedAt: partial.updatedAt ?? ('2026-06-21T00:00:00.000Z' as never),
    deletedAt: partial.deletedAt ?? null,
  } as AiChatMessage;
 }
 describe('buildChatMarkdown (server) — structure', () => {
  it('emits the title heading, chat id and message count', () => {
    const md = buildChatMarkdown({
      title: 'My chat',
      chatId: 'chat-123',
      rows: [],
    });
    expect(md).toContain('# My chat');
    expect(md).toContain('- Chat ID: `chat-123`');
    expect(md).toContain('- Messages: 0');
  });
  it('falls back to "Untitled chat" with no title (en)', () => {
    const md = buildChatMarkdown({ title: null, chatId: 'c', rows: [] });
    expect(md).toContain('# Untitled chat');
  });
  it('localizes fixed labels with lang=ru (structure stays English)', () => {
    const md = buildChatMarkdown({
      title: null,
      chatId: 'c',
      lang: 'ru',
      rows: [row({ role: 'assistant', content: 'hi' })],
    });
    expect(md).toContain('# Без названия');
    expect(md).toContain('## 1. ИИ-агент');
    // Structural words remain English.
    expect(md).toContain('- Chat ID:');
  });
  it('numbers messages and labels roles (You / AI agent)', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({ role: 'user', content: 'question' }),
        row({ role: 'assistant', content: 'answer' }),
      ],
    });
    expect(md).toContain('## 1. You');
    expect(md).toContain('question');
    expect(md).toContain('## 2. AI agent');
    expect(md).toContain('answer');
  });
  it('renders a tool part with fenced input/output and the friendly label', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({
          role: 'assistant',
          content: 'done',
          metadata: {
            parts: [
              {
                type: 'tool-getPage',
                state: 'output-available',
                input: { id: 'p1' },
                output: { title: 'Hello' },
              },
              { type: 'text', text: 'done' },
            ],
          } as never,
        }),
      ],
    });
    expect(md).toContain('**Tool: Read page** (`getPage`) — done');
    expect(md).toContain('Input:');
    expect(md).toContain('"id": "p1"');
    expect(md).toContain('Output:');
    expect(md).toContain('"title": "Hello"');
  });
  // #186 re-review pt 1: restore the parity coverage of the removed client spec —
  // error state, unknown-tool fallback (en + ru), and the circular-stringify catch.
  it('renders a tool part in the error state with its errorText', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({
          role: 'assistant',
          metadata: {
            parts: [
              {
                type: 'tool-getPage',
                state: 'output-error',
                input: { id: 'p1' },
                errorText: 'page not found',
              },
            ],
          } as never,
        }),
      ],
    });
    expect(md).toContain('**Tool: Read page** (`getPage`) — error');
    expect(md).toContain('**Error:** page not found');
  });
  it('falls back to "Ran tool <name>" for an unknown tool (en) and the ru variant', () => {
    const parts = [
      {
        type: 'tool-mysteryTool',
        state: 'output-available',
        output: { ok: 1 },
      },
    ];
    const en = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [row({ role: 'assistant', metadata: { parts } as never })],
    });
    expect(en).toContain('**Tool: Ran tool mysteryTool** (`mysteryTool`)');
    const ru = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      lang: 'ru',
      rows: [row({ role: 'assistant', metadata: { parts } as never })],
    });
    expect(ru).toContain('Выполнил инструмент mysteryTool');
  });
  it('does not throw on a circular tool output (falls back to String)', () => {
    const circular: Record<string, unknown> = {};
    circular.self = circular;
    expect(() =>
      buildChatMarkdown({
        title: 'T',
        chatId: 'c',
        rows: [
          row({
            role: 'assistant',
            metadata: {
              parts: [
                {
                  type: 'tool-getPage',
                  state: 'output-available',
                  output: circular,
                },
              ],
            } as never,
          }),
        ],
      }),
    ).not.toThrow();
  });
  it('emits a token footer + total when usage is present', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({
          role: 'assistant',
          content: 'a',
          metadata: {
            usage: {
              inputTokens: 100,
              outputTokens: 20,
              totalTokens: 120,
              reasoningTokens: 8,
            },
          } as never,
        }),
      ],
    });
    expect(md).toContain('- Total tokens: 120');
    expect(md).toContain(
      '_Tokens — in: 100, out: 20, reasoning: 8, total: 120_',
    );
  });
  it('flags a still-streaming (interrupted) row', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({ role: 'assistant', content: 'partial', status: 'streaming' }),
      ],
    });
    expect(md).toContain('still being generated');
  });
  it('does NOT flag a completed row', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [row({ role: 'assistant', content: 'final', status: 'completed' })],
    });
    expect(md).not.toContain('still being generated');
  });
  it('renders a legacy NULL-status row (no parts) from plain content', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({ role: 'assistant', content: 'legacy answer', status: null }),
      ],
    });
    expect(md).toContain('legacy answer');
    expect(md).not.toContain('still being generated');
  });
  it('renders a persisted error', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({
          role: 'assistant',
          content: '',
          status: 'error',
          metadata: { error: '401: Unauthorized' } as never,
        }),
      ],
    });
    expect(md).toContain('**⚠️ Error:** 401: Unauthorized');
  });
  it('escapes embedded triple-backtick fences with a longer delimiter', () => {
    const md = buildChatMarkdown({
      title: 'T',
      chatId: 'c',
      rows: [
        row({
          role: 'assistant',
          content: 'x',
          metadata: {
            parts: [
              {
                type: 'tool-getPage',
                state: 'output-available',
                output: '```inner```',
              },
            ],
          } as never,
        }),
      ],
    });
    // A 4-backtick fence wraps content that itself contains a 3-backtick run.
    expect(md).toContain('````');
  });
 });
--- a/apps/server/src/core/ai-chat/chat-markdown.util.ts
+++ b/apps/server/src/core/ai-chat/chat-markdown.util.ts
@@ -0,0 +1,299 @@
 /**
 * Server-side Markdown export for an AI agent chat (#183). The DB is the single
 * source of truth: this renders a chat purely from its persisted message rows
 * (`AiChatMessage[]` — role / content / metadata.parts / toolCalls / usage).
 * Because the assistant row is now persisted UPFRONT and updated per step, an
 * interrupted turn is included up to its last finished step.
 *
 * Ported from the client `utils/chat-markdown.ts`. It is a PURE function (apart
 * from `new Date()` for the export timestamp), so it is straightforward to
 * unit-test and a future background worker can reuse it.
 *
 * Only a few fixed role/tool labels are localized via the `lang` param; the
 * structural document words (Input/Output/Error/Tokens/...) stay English because
 * the output is a technical artifact.
 */
 import type { AiChatMessage } from '@docmost/db/types/entity.types';
 /** Supported export label languages. Defaults to English. */
 export type ExportLang = 'en' | 'ru';
 /**
 * Normalize an arbitrary client locale code to a supported export language. The
 * client sends `i18n.language`, which is a FULL locale tag (e.g. `en-US`,
 * `ru-RU`), not a bare `en`/`ru` — so match on the language subtag and fall back
 * to English for anything non-Russian.
 */
 export function normalizeLang(lang?: string): ExportLang {
  return lang?.toLowerCase().startsWith('ru') ? 'ru' : 'en';
 }
 /** A single AI SDK UIMessage part (text part or a tool part). */
 interface ExportPart {
  type: string;
  text?: string;
  state?: string;
  toolName?: string;
  input?: unknown;
  output?: unknown;
  errorText?: string;
 }
 /** Authoritative per-turn usage the server attaches to a message row. */
 interface UsageLike {
  inputTokens?: number;
  outputTokens?: number;
  totalTokens?: number;
  reasoningTokens?: number;
 }
 /** Localized label table. The client-side Markdown builder was removed by #183
 *  (the export is now server-side only), so this no longer mirrors a second
 *  exporter — instead the tool-action labels are kept in parity with the
 *  on-screen action-log labels in the client's `tool-parts.tsx` (`toolLabelKey`)
 *  so the export reads the same as the UI. Only role + tool-action labels are
 *  localized; everything structural is an English constant in the renderer. */
 const LABELS: Record<
  ExportLang,
  {
    untitled: string;
    aiAgent: string;
    you: string;
    tools: Record<string, string>;
    ranTool: (name: string) => string;
    stillGenerating: string;
  }
 > = {
  en: {
    untitled: 'Untitled chat',
    aiAgent: 'AI agent',
    you: 'You',
    tools: {
      searchPages: 'Searched pages',
      getPage: 'Read page',
      createPage: 'Created page',
      updatePageContent: 'Updated page',
      renamePage: 'Renamed page',
      movePage: 'Moved page',
      deletePage: 'Deleted page (to trash)',
      createComment: 'Commented',
      resolveComment: 'Resolved comment',
    },
    ranTool: (name) => `Ran tool ${name}`,
    stillGenerating:
      'This message is still being generated — the export captured a partial, in-progress response.',
  },
  ru: {
    untitled: 'Без названия',
    aiAgent: 'ИИ-агент',
    you: 'Вы',
    tools: {
      searchPages: 'Искал по страницам',
      getPage: 'Прочитал страницу',
      createPage: 'Создал страницу',
      updatePageContent: 'Обновил страницу',
      renamePage: 'Переименовал страницу',
      movePage: 'Переместил страницу',
      deletePage: 'Удалил страницу (в корзину)',
      createComment: 'Прокомментировал',
      resolveComment: 'Закрыл комментарий',
    },
    ranTool: (name) => `Выполнил инструмент ${name}`,
    stillGenerating:
      'Это сообщение всё ещё генерируется — экспорт захватил частичный, незавершённый ответ.',
  },
 };
 /** True for AI SDK tool parts (static `tool-*` or `dynamic-tool`). */
 function isToolPart(type: string): boolean {
  return type.startsWith('tool-') || type === 'dynamic-tool';
 }
 /** Extract the tool name from a part `type` of `tool-${name}` (or dynamic). */
 function getToolName(part: ExportPart): string {
  if (part.type === 'dynamic-tool') return part.toolName ?? '';
  return part.type.startsWith('tool-')
    ? part.type.slice('tool-'.length)
    : part.type;
 }
 /** Map an AI SDK tool-part state to the 3 states the action-log renders. */
 function toolRunState(state: string | undefined): 'running' | 'done' | 'error' {
  if (state === 'output-error' || state === 'output-denied') return 'error';
  if (state === 'output-available') return 'done';
  return 'running';
 }
 /** Resolve a tool's friendly action-log label (localized) from its name. */
 function toolLabel(name: string, lang: ExportLang): string {
  return LABELS[lang].tools[name] ?? LABELS[lang].ranTool(name);
 }
 /**
 * Stringify an arbitrary tool input/output value for a fenced block. Strings
 * pass through as-is; everything else is pretty-printed JSON, falling back to
 * `String(value)` if serialization throws (e.g. a circular structure).
 */
 function stringify(value: unknown): string {
  if (typeof value === 'string') return value;
  try {
    return JSON.stringify(value, null, 2);
  } catch {
    return String(value);
  }
 }
 /**
 * Wrap `code` in a fenced code block whose backtick delimiter is LONGER than the
 * longest backtick run inside the content, so embedded backticks (or a literal
 * ``` fence) never break out of the block. Minimum 3 backticks.
 */
 function fence(code: string, lang = ''): string {
  const runs: string[] = code.match(/`+/g) ?? [];
  const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
  const delim = '`'.repeat(Math.max(3, longest + 1));
  return `${delim}${lang}\n${code}\n${delim}`;
 }
 /** Per-row token count, mirroring the header sum in the client window. */
 function rowTokens(usage: UsageLike): number {
  return (
    usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
  );
 }
 /** Render one message's UIMessage parts into an array of Markdown blocks
 *  (text blocks + tool blocks). Mirrors the client renderer / MessageItem. */
 function renderMessageParts(parts: ExportPart[], lang: ExportLang): string[] {
  const out: string[] = [];
  for (const part of parts) {
    if (part.type === 'text') {
      const text = (part.text ?? '').trim();
      if (text.length > 0) out.push(text);
      continue;
    }
    if (!isToolPart(part.type)) continue;
    const name = getToolName(part);
    const label = toolLabel(name, lang);
    const state = toolRunState(part.state);
    const toolLines: string[] = [`**Tool: ${label}** (\`${name}\`) — ${state}`];
    if (part.input !== undefined) {
      toolLines.push('Input:');
      toolLines.push(fence(stringify(part.input), 'json'));
    }
    if (part.output !== undefined) {
      toolLines.push('Output:');
      toolLines.push(fence(stringify(part.output), 'json'));
    }
    if (part.errorText) {
      toolLines.push(`**Error:** ${part.errorText}`);
    }
    out.push(toolLines.join('\n\n'));
  }
  return out;
 }
 /** Resolve a persisted row's parts: prefer the rich persisted parts, else a
 *  single text part built from the plain-text content (mirrors rowToUiMessage). */
 function rowParts(row: AiChatMessage): ExportPart[] {
  const meta = (row.metadata ?? {}) as { parts?: ExportPart[] };
  return Array.isArray(meta.parts) && meta.parts.length > 0
    ? meta.parts
    : [{ type: 'text', text: row.content ?? '' }];
 }
 /**
 * Serialize a chat to a Markdown string from its persisted rows. Source = DB
 * ONLY (no live client state). A row whose `status` is still 'streaming' is an
 * interrupted turn that the export captured mid-flight; it is rendered up to its
 * last finished step and flagged "still generating".
 */
 export function buildChatMarkdown(args: {
  title: string | null;
  chatId: string;
  rows: AiChatMessage[];
  // Accepts a full client locale tag (e.g. 'en-US'/'ru-RU'); normalized below.
  lang?: string;
 }): string {
  const { title, chatId, rows } = args;
  const lang: ExportLang = normalizeLang(args.lang);
  const L = LABELS[lang];
  const blocks: string[] = [];
  const heading = (title ?? '').trim() || L.untitled;
  blocks.push(`# ${heading}`);
  const usageOf = (row: AiChatMessage): UsageLike | undefined => {
    const meta = (row.metadata ?? {}) as { usage?: UsageLike };
    return meta.usage;
  };
  const errorOf = (row: AiChatMessage): string | undefined => {
    const meta = (row.metadata ?? {}) as { error?: string };
    return meta.error;
  };
  // Metadata bullet list. Total tokens is only shown when there is a sum.
  const totalTokens = rows.reduce((sum, row) => {
    const usage = usageOf(row);
    return usage ? sum + rowTokens(usage) : sum;
  }, 0);
  const meta = [
    `- Chat ID: \`${chatId}\``,
    `- Exported: ${new Date().toISOString()}`,
    `- Messages: ${rows.length}`,
  ];
  if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
  blocks.push(meta.join('\n'));
  rows.forEach((row, index) => {
    blocks.push('---');
    const roleLabel = row.role === 'assistant' ? L.aiAgent : L.you;
    blocks.push(`## ${index + 1}. ${roleLabel}`);
    // Created-at kept in source as an HTML comment (out of the rendered prose).
    if (row.createdAt) {
      const iso =
        row.createdAt instanceof Date
          ? row.createdAt.toISOString()
          : String(row.createdAt);
      blocks.push(`<!-- ${iso} -->`);
    }
    blocks.push(...renderMessageParts(rowParts(row), lang));
    // A still-'streaming' row is an interrupted/in-progress turn captured by the
    // export; record that so the partial answer is not mistaken for complete.
    if (row.status === 'streaming') {
      blocks.push(`_⏳ ${L.stillGenerating}_`);
    }
    const error = errorOf(row);
    if (error) {
      blocks.push(`**⚠️ Error:** ${error}`);
    }
    const usage = usageOf(row);
    if (usage) {
      const total = usage.totalTokens ?? rowTokens(usage);
      const reasoning =
        usage.reasoningTokens && usage.reasoningTokens > 0
          ? `, reasoning: ${usage.reasoningTokens}`
          : '';
      blocks.push(
        `_Tokens — in: ${usage.inputTokens ?? '?'}, out: ${
          usage.outputTokens ?? '?'
        }${reasoning}, total: ${total}_`,
      );
    }
  });
  // Blank line between blocks so the Markdown renders cleanly.
  return blocks.join('\n\n');
 }
--- a/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts
+++ b/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts
@@ -26,3 +26,17 @@ export class GetChatMessagesDto {
  @IsString()
  cursor?: string;
 }
 /** Export a chat to Markdown (#183). `lang` localizes the few fixed
 *  role/tool-action labels; defaults to English server-side. */
 export class ExportChatDto {
  @IsString()
  chatId: string;
  // A full client locale tag (e.g. 'en-US', 'ru-RU') — normalized server-side to
  // a supported export language (see normalizeLang). Accept any string so a
  // region-qualified locale is not rejected (the 400 that broke the real client).
  @IsOptional()
  @IsString()
  lang?: string;
 }
--- a/apps/server/src/database/migrations/20260626T120000-ai-chat-message-status.ts
+++ b/apps/server/src/database/migrations/20260626T120000-ai-chat-message-status.ts
@@ -0,0 +1,18 @@
 import { type Kysely } from 'kysely';
 export async function up(db: Kysely<any>): Promise<void> {
  // Step-granular durability for the assistant turn (#183). The assistant row is
  // now created UPFRONT (status 'streaming') and UPDATEd as each step completes,
  // so a process death mid-turn no longer loses the whole answer. The column is
  // NULLABLE on purpose: rows written before this migration carry NULL, which the
  // app treats as 'completed' (a settled, pre-status message). Values written by
  // the app: 'streaming' | 'completed' | 'error' | 'aborted'.
  await db.schema
    .alterTable('ai_chat_messages')
    .addColumn('status', 'text', (col) => col)
    .execute();
 }
 export async function down(db: Kysely<any>): Promise<void> {
  await db.schema.alterTable('ai_chat_messages').dropColumn('status').execute();
 }
--- a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
+++ b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
@@ -1,4 +1,4 @@
-import { Injectable } from '@nestjs/common';
+import { Injectable, Logger } from '@nestjs/common';
 import { InjectKysely } from 'nestjs-kysely';
 import { KyselyDB, KyselyTransaction } from '../../types/kysely.types';
 import { dbOrTx } from '../../utils';
@@ -9,8 +9,24 @@ import {
 import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
 import { executeWithCursorPagination } from '@docmost/db/pagination/cursor-pagination';
 // Crash-recovery sweep recency threshold (#183 review): a 'streaming' row is
 // only swept to 'aborted' once it has been UNTOUCHED for this long. A live turn
 // bumps `updatedAt` on every step (well under this window), so its row never
 // matches; only a turn whose process truly died (no step update for >threshold)
 // is swept. Chosen safely ABOVE the longest realistic turn so a fresh replica's
 // boot-sweep can never abort a turn another replica is actively streaming
 // (multi-instance deploy).
 const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes
 // Hard upper bound on the rows materialized by `findAllByChat` (export path).
 // A generous cap so a pathologically huge chat cannot load an unbounded result
 // into memory; far above any realistic transcript length.
 const FIND_ALL_BY_CHAT_LIMIT = 5000;
@Injectable()
 export class AiChatMessageRepo {
  private readonly logger = new Logger(AiChatMessageRepo.name);
  constructor(@InjectKysely() private readonly db: KyselyDB) {}
  // The `tsv` column is a trigger-maintained tsvector used only for
@@ -25,6 +41,7 @@ export class AiChatMessageRepo {
    'content',
    'toolCalls',
    'metadata',
    'status',
    'createdAt',
    'updatedAt',
    'deletedAt',
@@ -60,6 +77,46 @@ export class AiChatMessageRepo {
    });
  }
  // Load ALL (non-deleted) messages of a chat in ascending chronological order
  // (oldest -> newest), unpaginated. Used by the server-side Markdown export
  // (#183), where the DB is the single source of truth and the whole transcript
  // must be rendered in one pass (findByChat is cursor-paginated and would only
  // return the first page).
  //
  // Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any
  // realistic transcript) so exporting a pathologically huge chat cannot
  // materialize an unbounded result set in memory.
  async findAllByChat(
    chatId: string,
    workspaceId: string,
    // Injectable for tests so truncation can be exercised on a modest volume.
    limit: number = FIND_ALL_BY_CHAT_LIMIT,
  ): Promise<AiChatMessage[]> {
    // Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the
    // NEWEST `limit` messages — the recent conversation matters most for an
    // export — rather than silently dropping the tail (#183 review). Reverse back
    // to chronological for rendering, like findRecent.
    const rows = await this.db
      .selectFrom('aiChatMessages')
      .select(this.baseFields)
      .where('chatId', '=', chatId)
      .where('workspaceId', '=', workspaceId)
      .where('deletedAt', 'is', null)
      .orderBy('createdAt', 'desc')
      .orderBy('id', 'desc')
      .limit(limit + 1)
      .execute();
    if (rows.length > limit) {
      rows.length = limit; // keep the newest `limit` (rows are newest-first here)
      this.logger.warn(
        `Chat ${chatId} export truncated to the newest ${limit} messages ` +
          `(older messages omitted).`,
      );
    }
    return rows.reverse();
  }
  // Load the most RECENT `limit` messages for a chat and return them in
  // ascending chronological order (oldest -> newest), as the model expects.
  // `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses
@@ -96,4 +153,68 @@ export class AiChatMessageRepo {
      .returning(this.baseFields)
      .executeTakeFirst();
  }
  /**
   * Update a single message in place by id + workspace (#183 step-granular
   * durability). The assistant row is created UPFRONT (status 'streaming') and
   * patched as each step completes, then finalized once on the terminal status.
   * `updatedAt` is always bumped. Returns the updated row (baseFields) or
   * undefined when no row matched (e.g. a foreign workspace / deleted row).
   */
  async update(
    id: string,
    workspaceId: string,
    patch: Partial<{
      content: string | null;
      toolCalls: unknown;
      metadata: unknown;
      status: string | null;
    }>,
    opts?: { onlyIfStreaming?: boolean; trx?: KyselyTransaction },
  ): Promise<AiChatMessage | undefined> {
    const db = dbOrTx(this.db, opts?.trx);
    let query = db
      .updateTable('aiChatMessages')
      .set({ ...(patch as Record<string, unknown>), updatedAt: new Date() })
      .where('id', '=', id)
      .where('workspaceId', '=', workspaceId);
    // Concurrency guard (#183 review): a per-step 'streaming' update must NEVER
    // overwrite a row the terminal callback already finalized. onStepFinish
    // fires the streaming update fire-and-forget, so its UPDATE can land AFTER
    // finalize on a DIFFERENT pool connection (commit order is not guaranteed).
    // Scoping the streaming update to rows STILL in 'streaming' makes a late
    // update a no-op once the row is completed/error/aborted — regardless of
    // commit order. The terminal finalize runs WITHOUT this guard so it always
    // wins.
    if (opts?.onlyIfStreaming) {
      query = query.where('status', '=', 'streaming');
    }
    return query.returning(this.baseFields).executeTakeFirst();
  }
  /**
   * Crash-recovery sweep (#183): flip every assistant row still left in the
   * 'streaming' state (a turn that died mid-write before reaching a terminal
   * status) to 'aborted'. Run once on server start. Returns the number of rows
   * swept so the caller can log it. Workspace-wide on purpose — a crash can have
   * dangling streaming rows across any workspace.
   *
   * Bounded by recency (#183 review): only rows UNTOUCHED for
   * SWEEP_STREAMING_STALE_MS are swept. A live turn bumps `updatedAt` on every
   * step, so an actively-streaming row never matches; this prevents a fresh
   * replica's boot-sweep from aborting a turn another replica is still streaming
   * in a multi-instance deploy.
   */
  async sweepStreaming(trx?: KyselyTransaction): Promise<number> {
    const db = dbOrTx(this.db, trx);
    const staleBefore = new Date(Date.now() - SWEEP_STREAMING_STALE_MS);
    const rows = await db
      .updateTable('aiChatMessages')
      .set({ status: 'aborted', updatedAt: new Date() })
      .where('status', '=', 'streaming')
      .where('updatedAt', '<', staleBefore)
      .returning('id')
      .execute();
    return rows.length;
  }
 }
--- a/apps/server/src/database/types/db.d.ts
+++ b/apps/server/src/database/types/db.d.ts
@@ -620,6 +620,10 @@ export interface AiChatMessages {
  content: string | null;
  toolCalls: Json | null;
  metadata: Json | null;
  // Turn lifecycle status (#183): 'streaming' | 'completed' | 'error' |
  // 'aborted'. NULL on rows written before the status column existed; the app
  // treats NULL as 'completed' (a settled, pre-status message).
  status: string | null;
  tsv: string | null;
  createdAt: Generated<Timestamp>;
  updatedAt: Generated<Timestamp>;
--- a/apps/server/test/integration/ai-chat-message-status.int-spec.ts
+++ b/apps/server/test/integration/ai-chat-message-status.int-spec.ts
@@ -0,0 +1,270 @@
 import { Kysely } from 'kysely';
 import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
 import {
  getTestDb,
  destroyTestDb,
  createWorkspace,
  createUser,
  createChat,
  createMessage,
 } from './db';
 /**
 * Integration coverage for the #183 step-granular durability primitives on
 * AiChatMessageRepo: `update` (in-place patch by id+workspace, bumps updatedAt,
 * returns the row) and `sweepStreaming` (crash recovery: flip dangling
 * 'streaming' rows to 'aborted'). Real SQL against docmost_test, not a mock.
 */
 describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => {
  let db: Kysely<any>;
  let repo: AiChatMessageRepo;
  let workspaceId: string;
  let otherWorkspaceId: string;
  let userId: string;
  let chatId: string;
  let otherChatId: string;
  beforeAll(async () => {
    db = getTestDb();
    repo = new AiChatMessageRepo(db as any);
    workspaceId = (await createWorkspace(db)).id;
    otherWorkspaceId = (await createWorkspace(db)).id;
    userId = (await createUser(db, workspaceId)).id;
    chatId = (await createChat(db, { workspaceId, creatorId: userId })).id;
    const otherUser = await createUser(db, otherWorkspaceId);
    otherChatId = (
      await createChat(db, {
        workspaceId: otherWorkspaceId,
        creatorId: otherUser.id,
      })
    ).id;
  });
  afterAll(async () => {
    await destroyTestDb();
  });
  it('update patches content/status/metadata and bumps updatedAt', async () => {
    const seeded = await repo.insert({
      chatId,
      workspaceId,
      userId,
      role: 'assistant',
      content: '',
      status: 'streaming',
      metadata: { parts: [] } as never,
    });
    const before = seeded.updatedAt;
    // Ensure a measurable timestamp delta.
    await new Promise((r) => setTimeout(r, 5));
    const updated = await repo.update(seeded.id, workspaceId, {
      content: 'final answer',
      status: 'completed',
      metadata: { parts: [{ type: 'text', text: 'final answer' }] },
    });
    expect(updated).toBeDefined();
    expect(updated!.content).toBe('final answer');
    expect(updated!.status).toBe('completed');
    expect((updated!.metadata as any).parts).toHaveLength(1);
    // The 5ms sleep above guarantees a strictly-later timestamp.
    expect(new Date(updated!.updatedAt).getTime()).toBeGreaterThan(
      new Date(before).getTime(),
    );
  });
  it('onlyIfStreaming update is a NO-OP once the row is finalized (race guard)', async () => {
    // Reproduce the step-update-vs-finalize race (#183 review): the row is
    // finalized to 'completed', then a LATE per-step 'streaming' update lands.
    // With `onlyIfStreaming` it must match nothing and leave the finalized row
    // untouched (no clobber back to 'streaming', no lost usage).
    const seeded = await repo.insert({
      chatId,
      workspaceId,
      userId,
      role: 'assistant',
      content: 'partial',
      status: 'streaming',
    });
    // Terminal finalize (unguarded) wins.
    await repo.update(seeded.id, workspaceId, {
      content: 'final answer',
      status: 'completed',
      metadata: { usage: { totalTokens: 42 } } as never,
    });
    // A straggler per-step update arrives AFTER finalize.
    const late = await repo.update(
      seeded.id,
      workspaceId,
      { content: 'partial', status: 'streaming', metadata: {} as never },
      { onlyIfStreaming: true },
    );
    expect(late).toBeUndefined(); // matched no 'streaming' row -> no-op
    const rows = await repo.findAllByChat(chatId, workspaceId);
    const row = rows.find((r) => r.id === seeded.id)!;
    expect(row.status).toBe('completed'); // NOT clobbered back to streaming
    expect(row.content).toBe('final answer');
    expect((row.metadata as any).usage.totalTokens).toBe(42); // usage preserved
  });
  it('update is workspace-scoped: a foreign workspace id matches nothing', async () => {
    const seeded = await repo.insert({
      chatId,
      workspaceId,
      userId,
      role: 'assistant',
      content: 'orig',
      status: 'streaming',
    });
    const res = await repo.update(seeded.id, otherWorkspaceId, {
      status: 'completed',
    });
    expect(res).toBeUndefined();
    // The row in the real workspace is untouched.
    const rows = await repo.findAllByChat(chatId, workspaceId);
    const stillThere = rows.find((r) => r.id === seeded.id);
    expect(stillThere!.status).toBe('streaming');
    // Clean up so it does not pollute the sweep test below.
    await repo.update(seeded.id, workspaceId, { status: 'completed' });
  });
  // Backdate a row's updatedAt so it qualifies as a STALE streaming row (the
  // sweep only flips rows untouched for >10 minutes — a live turn bumps
  // updatedAt every step, so it would never match).
  async function backdateUpdatedAt(
    id: string,
    minutesAgo: number,
  ): Promise<void> {
    await db
      .updateTable('aiChatMessages')
      .set({ updatedAt: new Date(Date.now() - minutesAgo * 60 * 1000) })
      .where('id', '=', id)
      .execute();
  }
  it('sweepStreaming flips STALE dangling streaming rows to aborted and counts them', async () => {
    // Two dangling streaming rows in our workspace + one in another workspace —
    // all backdated past the staleness threshold so the sweep picks them up.
    const a = await createMessage(db, {
      workspaceId,
      chatId,
      role: 'assistant',
      status: 'streaming',
    });
    const b = await createMessage(db, {
      workspaceId,
      chatId,
      role: 'assistant',
      status: 'streaming',
    });
    const other = await createMessage(db, {
      workspaceId: otherWorkspaceId,
      chatId: otherChatId,
      role: 'assistant',
      status: 'streaming',
    });
    await backdateUpdatedAt(a.id, 20);
    await backdateUpdatedAt(b.id, 20);
    await backdateUpdatedAt(other.id, 20);
    // A settled row must NOT be touched.
    const done = await createMessage(db, {
      workspaceId,
      chatId,
      role: 'assistant',
      status: 'completed',
    });
    // A legacy NULL-status row must NOT be touched.
    const legacy = await createMessage(db, {
      workspaceId,
      chatId,
      role: 'assistant',
      status: null,
    });
    const swept = await repo.sweepStreaming();
    // At least the 3 stale streaming rows we created (2 here + 1 in the other ws).
    expect(swept).toBeGreaterThanOrEqual(3);
    const rows = await repo.findAllByChat(chatId, workspaceId);
    const byId = new Map(rows.map((r) => [r.id, r]));
    expect(byId.get(a.id)!.status).toBe('aborted');
    expect(byId.get(b.id)!.status).toBe('aborted');
    expect(byId.get(done.id)!.status).toBe('completed');
    expect(byId.get(legacy.id)!.status).toBeNull();
    // Idempotent: a second sweep finds nothing left in our seeded set.
    const again = await repo.sweepStreaming();
    const rows2 = await repo.findAllByChat(chatId, workspaceId);
    // Our two rows stay aborted regardless of `again`'s global count.
    expect(rows2.find((r) => r.id === a.id)!.status).toBe('aborted');
    expect(again).toBeGreaterThanOrEqual(0);
  });
  it('sweepStreaming does NOT sweep a FRESH streaming row (recency bound, #183 review)', async () => {
    // A row that is actively streaming (recent updatedAt) must survive the sweep:
    // a fresh replica's boot-sweep must never abort a turn another replica is
    // still streaming in a multi-instance deploy.
    const fresh = await createMessage(db, {
      workspaceId,
      chatId,
      role: 'assistant',
      status: 'streaming',
    });
    // A STALE streaming row created alongside it IS swept — proving the sweep
    // ran and the only difference is recency.
    const stale = await createMessage(db, {
      workspaceId,
      chatId,
      role: 'assistant',
      status: 'streaming',
    });
    await backdateUpdatedAt(stale.id, 20);
    await repo.sweepStreaming();
    const rows = await repo.findAllByChat(chatId, workspaceId);
    const byId = new Map(rows.map((r) => [r.id, r]));
    // Fresh (recently-updated) streaming row is left untouched...
    expect(byId.get(fresh.id)!.status).toBe('streaming');
    // ...while the stale one alongside it was swept to 'aborted'.
    expect(byId.get(stale.id)!.status).toBe('aborted');
  });
  it('findAllByChat caps the result, keeping the NEWEST messages in order (#183 review)', async () => {
    // A dedicated chat so the cap test is independent of the rows above.
    const cappedChat = (
      await createChat(db, { workspaceId, creatorId: userId })
    ).id;
    const base = Date.now();
    // Three messages at strictly increasing timestamps.
    await createMessage(db, {
      workspaceId,
      chatId: cappedChat,
      content: 'm1-oldest',
      createdAt: new Date(base),
    });
    await createMessage(db, {
      workspaceId,
      chatId: cappedChat,
      content: 'm2',
      createdAt: new Date(base + 1000),
    });
    await createMessage(db, {
      workspaceId,
      chatId: cappedChat,
      content: 'm3-newest',
      createdAt: new Date(base + 2000),
    });
    // Cap of 2 -> the OLDEST message is dropped; the newest two stay, in
    // chronological order (oldest -> newest).
    const capped = await repo.findAllByChat(cappedChat, workspaceId, 2);
    expect(capped.map((r) => r.content)).toEqual(['m2', 'm3-newest']);
    // Without a cap (well above the row count) all three come back in order.
    const all = await repo.findAllByChat(cappedChat, workspaceId, 100);
    expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']);
  });
 });
--- a/apps/server/test/integration/db.ts
+++ b/apps/server/test/integration/db.ts
@@ -104,7 +104,8 @@ export async function createWorkspace(
      name: overrides.name ?? `ws-${suffix}`,
      // hostname is uniquely constrained; keep it unique per workspace.
      hostname: `host-${suffix}`,
-      settings: overrides.settings === undefined ? null : (overrides.settings as any),
+      settings:
        overrides.settings === undefined ? null : (overrides.settings as any),
    })
    .returning(['id', 'settings'])
    .executeTakeFirstOrThrow();
@@ -226,3 +227,37 @@ export async function createChat(
    .executeTakeFirstOrThrow();
  return { id: row.id as string };
 }
 export async function createMessage(
  db: Kysely<any>,
  args: {
    workspaceId: string;
    chatId: string;
    userId?: string | null;
    role?: string;
    content?: string | null;
    status?: string | null;
    metadata?: unknown;
    // Explicit timestamp so a test can control message ORDER (the default DB
    // now() can tie within a millisecond, and the v4 id is not time-ordered).
    createdAt?: Date;
  },
 ): Promise<{ id: string }> {
  const id = randomUUID();
  const row = await db
    .insertInto('aiChatMessages')
    .values({
      id,
      workspaceId: args.workspaceId,
      chatId: args.chatId,
      userId: args.userId ?? null,
      role: args.role ?? 'assistant',
      content: args.content ?? null,
      status: args.status ?? null,
      metadata: (args.metadata ?? null) as any,
      ...(args.createdAt ? { createdAt: args.createdAt } : {}),
    })
    .returning(['id'])
    .executeTakeFirstOrThrow();
  return { id: row.id as string };
 }