Merge remote-tracking branch 'gitea/develop' into batch/issues-2026-06-25

# Conflicts: # apps/server/src/core/ai-chat/ai-chat.service.spec.ts # apps/server/src/core/ai-chat/ai-chat.service.ts
2026-06-25 12:48:47 +03:00
parent 364838d0b2 de115ade1e
commit ed3b65c36b
100 changed files with 10109 additions and 1381 deletions
--- a/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
@@ -0,0 +1,159 @@
+import { ForbiddenException } from '@nestjs/common';
+import { AiChatController } from './ai-chat.controller';
+import {
+  planFinalizeAssistant,
+  applyFinalize,
+  flushAssistant,
+  type AssistantFlush,
+} from './ai-chat.service';
+import type { User, Workspace } from '@docmost/db/types/entity.types';
+
+/**
+ * Wiring spec for the #183 `POST /ai-chat/export` endpoint. It must: own-gate via
+ * the chat lookup (workspace-scoped + creator-owned), load the FULL transcript
+ * via findAllByChat, render server-side, and return `{ markdown }`. Exercised by
+ * instantiating the controller with hand-rolled mocks — no Nest graph, no DB.
+ */
+describe('AiChatController.export', () => {
+  const user = { id: 'u1' } as User;
+  const workspace = { id: 'ws1' } as Workspace;
+
+  function makeController(
+    over: {
+      chat?: unknown;
+      rows?: unknown[];
+    } = {},
+  ) {
+    const chat =
+      'chat' in over
+        ? over.chat
+        : { id: 'c1', creatorId: 'u1', title: 'My chat' };
+    const aiChatRepo = {
+      findById: jest.fn().mockResolvedValue(chat),
+    };
+    const aiChatMessageRepo = {
+      findAllByChat: jest.fn().mockResolvedValue(
+        over.rows ?? [
+          {
+            id: 'm1',
+            role: 'user',
+            content: 'hi',
+            metadata: null,
+            status: null,
+          },
+          {
+            id: 'm2',
+            role: 'assistant',
+            content: 'hello',
+            metadata: null,
+            status: 'completed',
+          },
+        ],
+      ),
+    };
+    const controller = new AiChatController(
+      {} as never,
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never,
+    );
+    return { controller, aiChatRepo, aiChatMessageRepo };
+  }
+
+  it('renders the full transcript and returns { markdown }', async () => {
+    const { controller, aiChatMessageRepo } = makeController();
+    const res = await controller.export({ chatId: 'c1' }, user, workspace);
+    expect(aiChatMessageRepo.findAllByChat).toHaveBeenCalledWith('c1', 'ws1');
+    expect(res.markdown).toContain('# My chat');
+    expect(res.markdown).toContain('## 1. You');
+    expect(res.markdown).toContain('## 2. AI agent');
+  });
+
+  it('forbids a chat the user does not own', async () => {
+    const { controller } = makeController({
+      chat: { id: 'c1', creatorId: 'someone-else', title: 'X' },
+    });
+    await expect(
+      controller.export({ chatId: 'c1' }, user, workspace),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+  });
+
+  it('forbids a missing / foreign-workspace chat', async () => {
+    const { controller } = makeController({ chat: null });
+    await expect(
+      controller.export({ chatId: 'c1' }, user, workspace),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+  });
+
+  it('localizes labels when lang=ru is passed', async () => {
+    const { controller } = makeController();
+    const res = await controller.export(
+      { chatId: 'c1', lang: 'ru' },
+      user,
+      workspace,
+    );
+    expect(res.markdown).toContain('## 1. Вы');
+    expect(res.markdown).toContain('## 2. ИИ-агент');
+  });
+});
+
+/**
+ * The terminal-finalize dispatch (#183): the assistant row is INSERTed upfront
+ * as 'streaming' and finalized once on the terminal callback. When the upfront
+ * insert SUCCEEDED (we hold an id) finalize UPDATEs that row; when it FAILED
+ * (assistantId is undefined) finalize falls back to INSERTing the terminal row
+ * so the turn is not lost — the only safety against losing the turn entirely.
+ *
+ * `planFinalizeAssistant` is the pure decision; `applyFinalize` is the REAL
+ * dispatch the service uses, exercised here over a mock repo (not a copy of the
+ * logic) so a production drift would fail the test (#186 review).
+ */
+describe('finalizeAssistant dispatch (planFinalizeAssistant + applyFinalize)', () => {
+  const workspaceId = 'ws1';
+
+  // Drive the SAME applyFinalize the service calls (no duplicated logic).
+  async function dispatchFinalize(
+    repo: { insert: jest.Mock; update: jest.Mock },
+    assistantId: string | undefined,
+    flushed: AssistantFlush,
+  ): Promise<void> {
+    await applyFinalize(
+      repo,
+      planFinalizeAssistant(assistantId),
+      { chatId: 'c1', workspaceId, userId: 'u1' },
+      flushed,
+    );
+  }
+
+  it('plan: update when the upfront insert returned an id', () => {
+    expect(planFinalizeAssistant('a1')).toEqual({ kind: 'update', id: 'a1' });
+  });
+
+  it('plan: insert (fallback) when there is no upfront id', () => {
+    expect(planFinalizeAssistant(undefined)).toEqual({ kind: 'insert' });
+  });
+
+  it('(a) upfront insert succeeded -> finalize UPDATEs the row by id', async () => {
+    const repo = { insert: jest.fn(), update: jest.fn() };
+    const flushed = flushAssistant([], 'final answer', 'completed', {
+      finishReason: 'stop',
+    });
+    await dispatchFinalize(repo, 'a1', flushed);
+    expect(repo.update).toHaveBeenCalledWith('a1', workspaceId, flushed);
+    expect(repo.insert).not.toHaveBeenCalled();
+  });
+
+  it('(b) upfront insert failed -> finalize INSERTs the terminal payload', async () => {
+    const repo = { insert: jest.fn(), update: jest.fn() };
+    const flushed = flushAssistant([], 'partial', 'error', { error: 'boom' });
+    await dispatchFinalize(repo, undefined, flushed);
+    expect(repo.update).not.toHaveBeenCalled();
+    expect(repo.insert).toHaveBeenCalledTimes(1);
+    const arg = repo.insert.mock.calls[0][0];
+    // The fallback insert carries the terminal content/status/metadata.
+    expect(arg.role).toBe('assistant');
+    expect(arg.content).toBe('partial');
+    expect(arg.status).toBe('error');
+    expect((arg.metadata as { error?: string }).error).toBe('boom');
+  });
+});
--- a/apps/server/src/core/ai-chat/ai-chat.controller.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts
@@ -20,7 +20,7 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
 import { AuthUser } from '../../common/decorators/auth-user.decorator';
 import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
 import { SkipTransform } from '../../common/decorators/skip-transform.decorator';
-import { User, Workspace } from '@docmost/db/types/entity.types';
+import { AiChat, User, Workspace } from '@docmost/db/types/entity.types';
 import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
 import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
 import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
@@ -31,10 +31,12 @@ import { AiChatService, AiChatStreamBody } from './ai-chat.service';
 import { AiTranscriptionService } from './ai-transcription.service';
 import {
  ChatIdDto,
+  ExportChatDto,
  GetChatMessagesDto,
  RenameChatDto,
 } from './dto/ai-chat.dto';
 import { describeProviderError } from '../../integrations/ai/ai-error.util';
+import { buildChatMarkdown } from './chat-markdown.util';

 /**
 * Per-user AI chat API (§6.1). Routes are POST to match this codebase's
@@ -81,6 +83,36 @@ export class AiChatController {
    );
  }

+  /**
+   * Export a chat to Markdown (#183). The DB is the single source of truth: the
+   * whole transcript is loaded (oldest -> newest) and rendered server-side. Now
+   * that the assistant row is persisted upfront and per step, an interrupted
+   * turn is included up to its last finished step. Workspace-scoped and owner-
+   * gated via assertOwnedChat (same as the other read endpoints). Returns
+   * `{ markdown }`. `lang` localizes the few fixed labels (default English).
+   */
+  @HttpCode(HttpStatus.OK)
+  @Post('export')
+  async export(
+    @Body() dto: ExportChatDto,
+    @AuthUser() user: User,
+    @AuthWorkspace() workspace: Workspace,
+  ): Promise<{ markdown: string }> {
+    const chat = await this.assertOwnedChat(dto.chatId, user, workspace);
+    const rows = await this.aiChatMessageRepo.findAllByChat(
+      dto.chatId,
+      workspace.id,
+    );
+    const markdown = buildChatMarkdown({
+      title: chat.title ?? null,
+      chatId: dto.chatId,
+      rows,
+      // normalizeLang(undefined) already yields 'en', so no `?? 'en'` is needed.
+      lang: dto.lang,
+    });
+    return { markdown };
+  }
+
  /** Rename a chat. */
  @HttpCode(HttpStatus.OK)
  @Post('rename')
@@ -90,7 +122,11 @@ export class AiChatController {
    @AuthWorkspace() workspace: Workspace,
  ) {
    await this.assertOwnedChat(dto.chatId, user, workspace);
-    await this.aiChatRepo.update(dto.chatId, { title: dto.title }, workspace.id);
+    await this.aiChatRepo.update(
+      dto.chatId,
+      { title: dto.title },
+      workspace.id,
+    );
    return { success: true };
  }

@@ -145,7 +181,10 @@ export class AiChatController {
    // Resolve the agent role for this turn BEFORE hijack: existing chats read it
    // from ai_chats.role_id (authoritative), a new chat from body.roleId. The
    // role drives both the persona and the optional model override below.
-    const role = await this.aiChatService.resolveRoleForRequest(workspace, body);
+    const role = await this.aiChatService.resolveRoleForRequest(
+      workspace,
+      body,
+    );

    // Resolve the model (applying the role's optional override) BEFORE hijack so
    // an unconfigured provider — including a role pointing at an unconfigured
@@ -232,7 +271,9 @@ export class AiChatController {
    let file = null;
    try {
      // Whisper hard-caps uploads at 25MB; allow a single file.
-      file = await req.file({ limits: { fileSize: 25 * 1024 * 1024, files: 1 } });
+      file = await req.file({
+        limits: { fileSize: 25 * 1024 * 1024, files: 1 },
+      });
    } catch (err: any) {
      if (err?.statusCode === 413) {
        throw new BadRequestException('Audio file too large (max 25MB)');
@@ -283,11 +324,12 @@ export class AiChatController {
    chatId: string,
    user: User,
    workspace: Workspace,
-  ): Promise<void> {
+  ): Promise<AiChat> {
    const chat = await this.aiChatRepo.findById(chatId, workspace.id);
    if (!chat || chat.creatorId !== user.id) {
      throw new ForbiddenException();
    }
+    return chat;
  }
 }

--- a/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts
@@ -0,0 +1,61 @@
+import { Logger } from '@nestjs/common';
+import { AiChatService } from './ai-chat.service';
+
+/**
+ * Lifecycle unit tests for AiChatService.onModuleInit (#183 crash-recovery
+ * sweep). The sweep is BEST-EFFORT: a failure must be logged (warn) but must
+ * NEVER throw out of onModuleInit and block server startup. Exercised with a
+ * hand-rolled mock repo — no Nest graph, no DB. Only `aiChatMessageRepo` is
+ * touched by onModuleInit, so the other constructor deps are stubbed as never.
+ */
+describe('AiChatService.onModuleInit (startup sweep)', () => {
+  function makeService(sweepStreaming: jest.Mock) {
+    const aiChatMessageRepo = { sweepStreaming };
+    const service = new AiChatService(
+      {} as never, // ai
+      {} as never, // aiChatRepo
+      aiChatMessageRepo as never,
+      {} as never, // aiSettings
+      {} as never, // tools
+      {} as never, // mcpClients
+      {} as never, // aiAgentRoleRepo
+      {} as never, // pageRepo
+      {} as never, // pageAccess
+    );
+    return { service, aiChatMessageRepo };
+  }
+
+  afterEach(() => jest.restoreAllMocks());
+
+  it('happy path: calls sweepStreaming and resolves', async () => {
+    const sweepStreaming = jest.fn().mockResolvedValue(0);
+    const { service } = makeService(sweepStreaming);
+    await expect(service.onModuleInit()).resolves.toBeUndefined();
+    expect(sweepStreaming).toHaveBeenCalledTimes(1);
+  });
+
+  it('logs how many rows were swept when > 0', async () => {
+    const sweepStreaming = jest.fn().mockResolvedValue(3);
+    const logSpy = jest
+      .spyOn(Logger.prototype, 'log')
+      .mockImplementation(() => undefined);
+    const { service } = makeService(sweepStreaming);
+    await service.onModuleInit();
+    expect(logSpy).toHaveBeenCalledTimes(1);
+    expect(String(logSpy.mock.calls[0][0])).toContain('3');
+  });
+
+  it('sweepStreaming throws -> onModuleInit resolves (does NOT throw) and warns', async () => {
+    const sweepStreaming = jest
+      .fn()
+      .mockRejectedValue(new Error('db unavailable'));
+    const warnSpy = jest
+      .spyOn(Logger.prototype, 'warn')
+      .mockImplementation(() => undefined);
+    const { service } = makeService(sweepStreaming);
+    // Must not throw — a sweep failure may never block startup.
+    await expect(service.onModuleInit()).resolves.toBeUndefined();
+    expect(warnSpy).toHaveBeenCalledTimes(1);
+    expect(String(warnSpy.mock.calls[0][0])).toContain('db unavailable');
+  });
+});
--- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
@@ -6,7 +6,7 @@ import {
  serializeSteps,
  rowToUiMessage,
  prepareAgentStep,
-  buildPartialAssistantRecord,
+  flushAssistant,
  chatStreamMetadata,
  accumulateStepUsage,
  MAX_AGENT_STEPS,
@@ -233,101 +233,108 @@ describe('prepareAgentStep', () => {
    // The synthesis instruction is appended.
    expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
  });
-
-  it('pins the off-by-one boundary (MAX-2 is not final, MAX-1 is)', () => {
-    // Boundary expressed via the constant, not a hardcoded 18/19, so the test
-    // tracks MAX_AGENT_STEPS if the cap ever changes.
-    expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
-    const atBoundary = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
-    expect(atBoundary).toBeDefined();
-    expect(atBoundary?.toolChoice).toBe('none');
-  });
 });

 /**
- * Unit test for buildPartialAssistantRecord: the pure helper that shapes the
- * assistant-message record persisted on a partial/failed turn (the streamText
- * onError / onAbort paths). It captures the PARTIAL answer the user already saw
- * (finished steps' text + tool parts, plus the in-progress step's text) so a
- * provider error / disconnect no longer throws the streamed answer away. Pinning
- * the record shape here covers the persist-partial logic without seaming
- * streamText itself.
+ * flushAssistant (#183): the PURE row builder behind the step-granular durable
+ * write path. It runs identically for the upfront insert (empty steps,
+ * 'streaming'), every per-step update, and the terminal finalize — so a future
+ * background worker can call the same function. These tests pin the four status
+ * shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on
+ * (per-step text + tool parts via assistantParts, in-progress text appended).
 */
-describe('buildPartialAssistantRecord', () => {
+describe('flushAssistant', () => {
  type AnyPart = Record<string, unknown>;

-  it('records an empty turn with the error text (preserves old behavior)', () => {
-    const rec = buildPartialAssistantRecord(
-      [],
-      '',
-      'error',
-      '401: Unauthorized',
-    );
-    expect(rec).toEqual({
-      text: '',
-      toolCalls: null,
-      metadata: {
-        finishReason: 'error',
-        parts: [],
-        error: '401: Unauthorized',
-      },
-    });
+  const toolStep = {
+    text: 'looked it up',
+    toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
+    toolResults: [
+      { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
+    ],
+  };
+
+  it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
+    const f = flushAssistant([], '', 'streaming');
+    expect(f.status).toBe('streaming');
+    expect(f.content).toBe('');
+    expect(f.toolCalls).toBeNull();
+    expect(f.metadata.parts).toEqual([]);
+    // No finishReason while streaming (it is not a terminal state).
+    expect('finishReason' in f.metadata).toBe(false);
  });

-  it('persists in-progress text (no finished steps) as the partial answer', () => {
-    const rec = buildPartialAssistantRecord(
-      [],
-      'partial answer',
-      'error',
-      'boom',
-    );
-    expect(rec.text).toBe('partial answer');
-    expect(rec.metadata.parts).toEqual([
+  it('streaming update folds in finished steps but keeps status streaming', () => {
+    const f = flushAssistant([toolStep], '', 'streaming');
+    expect(f.status).toBe('streaming');
+    expect(f.content).toBe('looked it up');
+    const parts = f.metadata.parts as AnyPart[];
+    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
+    const toolPart = parts.find((p) => p.type === 'tool-getPage');
+    expect(toolPart!.state).toBe('output-available');
+    expect(f.toolCalls).not.toBeNull();
+  });
+
+  it('completed: attaches finishReason + normalized usage + contextTokens', () => {
+    const f = flushAssistant([toolStep], '', 'completed', {
+      finishReason: 'stop',
+      usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
+      contextTokens: 15,
+    });
+    expect(f.status).toBe('completed');
+    expect(f.metadata.finishReason).toBe('stop');
+    expect(f.metadata.usage).toEqual({
+      inputTokens: 10,
+      outputTokens: 5,
+      totalTokens: 15,
+      reasoningTokens: undefined,
+    });
+    expect(f.metadata.contextTokens).toBe(15);
+  });
+
+  it('error: records the error and a derived finishReason', () => {
+    const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
+    expect(f.status).toBe('error');
+    expect(f.content).toBe('partial answer');
+    expect(f.metadata.error).toBe('boom');
+    // Derives finishReason from the terminal status when none is supplied.
+    expect(f.metadata.finishReason).toBe('error');
+    expect(f.metadata.parts).toEqual([
      { type: 'text', text: 'partial answer' },
    ]);
-    expect(rec.metadata.error).toBe('boom');
  });

-  it('combines a finished tool step with trailing in-progress text', () => {
-    const steps = [
-      {
-        text: 'looked it up',
-        toolCalls: [
-          { toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
-        ],
-        toolResults: [
-          { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
-        ],
-      },
-    ];
-    const rec = buildPartialAssistantRecord(
-      steps,
-      ' and then',
-      'error',
-      'boom',
-    );
-    const parts = rec.metadata.parts as AnyPart[];
-    // The finished step's text part is present.
-    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
-    // The paired tool call+result becomes an output-available part.
-    const toolPart = parts.find((p) => p.type === 'tool-getPage');
-    expect(toolPart).toBeDefined();
-    expect(toolPart!.state).toBe('output-available');
-    // The in-progress text is appended LAST so the parts match the stream order.
+  it('aborted: in-progress text appended last, no error key', () => {
+    const f = flushAssistant([toolStep], ' and then', 'aborted');
+    expect(f.status).toBe('aborted');
+    expect(f.metadata.finishReason).toBe('aborted');
+    expect('error' in f.metadata).toBe(false);
+    expect(f.content).toBe('looked it up and then');
+    const parts = f.metadata.parts as AnyPart[];
    expect(parts[parts.length - 1]).toEqual({
      type: 'text',
      text: ' and then',
    });
-    expect(rec.text).toBe('looked it up and then');
-    expect(rec.toolCalls).not.toBeNull();
-    expect(rec.metadata.error).toBe('boom');
  });

-  it('omits the error key on the abort path (no errorText)', () => {
-    const rec = buildPartialAssistantRecord([], 'half', 'aborted');
-    expect(rec.metadata.finishReason).toBe('aborted');
-    expect('error' in rec.metadata).toBe(false);
-    expect(rec.text).toBe('half');
+  it('combines a finished tool step with trailing in-progress text (error path)', () => {
+    // The error path captures the PARTIAL answer the user already saw: each
+    // finished step's text + tool parts, then the in-progress step's text last.
+    const flushed = flushAssistant([toolStep], ' and then', 'error', {
+      error: 'boom',
+    });
+    const parts = flushed.metadata.parts as AnyPart[];
+    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
+    const toolPart = parts.find((p) => p.type === 'tool-getPage');
+    expect(toolPart!.state).toBe('output-available');
+    // In-progress text appended LAST so the parts match the stream order.
+    expect(parts[parts.length - 1]).toEqual({
+      type: 'text',
+      text: ' and then',
+    });
+    expect(flushed.content).toBe('looked it up and then');
+    expect(flushed.toolCalls).not.toBeNull();
+    expect(flushed.metadata.error).toBe('boom');
  });
 });

--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -1,4 +1,9 @@
-import { ForbiddenException, Injectable, Logger } from '@nestjs/common';
+import {
+  ForbiddenException,
+  Injectable,
+  Logger,
+  OnModuleInit,
+} from '@nestjs/common';
 import { FastifyReply } from 'fastify';
 import {
  streamText,
@@ -124,7 +129,7 @@ export interface AiChatStreamArgs {
 *                    can be rebuilt for `convertToModelMessages`.
 */
@Injectable()
-export class AiChatService {
+export class AiChatService implements OnModuleInit {
  private readonly logger = new Logger(AiChatService.name);

  constructor(
@@ -139,6 +144,32 @@ export class AiChatService {
    private readonly pageAccess: PageAccessService,
  ) {}

+  /**
+   * Crash-recovery sweep on server start (#183): any assistant row left in the
+   * 'streaming' state is the relic of a turn whose process died before it
+   * reached a terminal status. Flip those to 'aborted' so history/export show
+   * them settled (with whatever finished steps were already persisted) instead
+   * of perpetually "streaming". Best-effort: a sweep failure is logged but must
+   * never block server startup.
+   */
+  async onModuleInit(): Promise<void> {
+    try {
+      const swept = await this.aiChatMessageRepo.sweepStreaming();
+      if (swept > 0) {
+        this.logger.log(
+          `Startup sweep: marked ${swept} dangling 'streaming' assistant ` +
+            `message(s) as 'aborted'.`,
+        );
+      }
+    } catch (err) {
+      this.logger.warn(
+        `Startup sweep of dangling 'streaming' messages failed: ${
+          err instanceof Error ? err.message : 'unknown error'
+        }`,
+      );
+    }
+  }
+
  /**
   * Resolve the agent role that applies to this stream request, scoped to the
   * workspace and soft-delete aware. For an EXISTING chat the role is read from
@@ -395,31 +426,6 @@ export class AiChatService {

    const tools = { ...external.tools, ...docmostTools };

-    // Persist the assistant message. Used by onFinish (full result) and the
-    // abort/error paths (partial result). Guarded so we persist at most once.
-    let persisted = false;
-    const persistAssistant = async (data: {
-      text: string;
-      toolCalls: unknown;
-      metadata: Record<string, unknown>;
-    }): Promise<void> => {
-      if (persisted) return;
-      persisted = true;
-      try {
-        await this.aiChatMessageRepo.insert({
-          chatId,
-          workspaceId: workspace.id,
-          userId: user.id,
-          role: 'assistant',
-          content: data.text ?? '',
-          toolCalls: (data.toolCalls ?? null) as never,
-          metadata: data.metadata as never,
-        });
-      } catch (err) {
-        this.logger.error('Failed to persist assistant message', err as Error);
-      }
-    };
-
    // Accumulate the turn's streamed output so a provider error / disconnect can
    // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
    // callbacks don't hand us the in-progress text. `capturedSteps` holds finished
@@ -428,6 +434,101 @@ export class AiChatService {
    const capturedSteps: StepLike[] = [];
    let inProgressText = '';

+    // Step-granular durability (#183): create the assistant row UPFRONT in the
+    // 'streaming' state (before any token), then UPDATE it as each step finishes
+    // and finalize it once on the terminal callback. If the process dies
+    // mid-turn the row survives with every finished step already persisted; the
+    // startup sweep (sweepStreaming) later flips a dangling 'streaming' row to
+    // 'aborted'. The DB is now the single source of truth for the turn — the
+    // socket is never required for the write path. A failed upfront insert is
+    // logged and leaves assistantId undefined; the per-step/terminal updates then
+    // no-op (guarded below) so the turn still streams to the user.
+    let assistantId: string | undefined;
+    try {
+      const seed = flushAssistant([], '', 'streaming');
+      const seeded = await this.aiChatMessageRepo.insert({
+        chatId,
+        workspaceId: workspace.id,
+        userId: user.id,
+        role: 'assistant',
+        content: seed.content,
+        // jsonb columns: cast through never (same as the user insert above).
+        toolCalls: (seed.toolCalls ?? null) as never,
+        metadata: seed.metadata as never,
+        status: seed.status,
+      });
+      assistantId = seeded?.id;
+    } catch (err) {
+      this.logger.error(
+        `Failed to insert upfront assistant row (chat ${chatId}, workspace ${workspace.id})`,
+        err as Error,
+      );
+    }
+
+    // Per-step (non-terminal) update: persist the finished steps the moment a
+    // step ends. Tolerant — a failed update is logged and swallowed so it never
+    // throws into the stream. Keeps status 'streaming'.
+    const updateStreaming = async (): Promise<void> => {
+      if (!assistantId) return;
+      // Cheap short-circuit once the turn is finalized (see `finalized` below).
+      // The AUTHORITATIVE guard is `onlyIfStreaming` on the UPDATE: a late
+      // fire-and-forget step update could still be in flight on another pool
+      // connection when finalize runs, so the SQL `WHERE status='streaming'`
+      // (not this flag) is what prevents it clobbering the terminal row.
+      if (finalized) return;
+      try {
+        await this.aiChatMessageRepo.update(
+          assistantId,
+          workspace.id,
+          flushAssistant(capturedSteps, '', 'streaming'),
+          { onlyIfStreaming: true },
+        );
+      } catch (err) {
+        this.logger.warn(
+          `Failed to update streaming assistant row: ${
+            err instanceof Error ? err.message : 'unknown error'
+          }`,
+        );
+      }
+    };
+
+    // Serialize the per-step updates (#183 review): onStepFinish fires them
+    // without await, so two could otherwise commit out of order on different pool
+    // connections (step N landing after N+1). Chaining each onto the previous
+    // keeps the persisted row monotonic with step order; each link short-circuits
+    // on `finalized`, so a tail of late updates is cheap.
+    let stepUpdateChain: Promise<void> = Promise.resolve();
+
+    // Terminal finalize: write the completed/error/aborted row exactly once
+    // across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort
+    // callbacks — mirroring the pre-#183 persist-at-most-once guard for the
+    // TERMINAL status (the row may be updated many times with 'streaming' before
+    // this fires once).
+    let finalized = false;
+    const finalizeAssistant = async (
+      flushed: AssistantFlush,
+    ): Promise<void> => {
+      if (finalized) return;
+      finalized = true;
+      const plan = planFinalizeAssistant(assistantId);
+      try {
+        // Shared dispatch (see applyFinalize): UPDATE the upfront row, or — when
+        // the upfront insert failed (kind 'insert') — INSERT the terminal row as
+        // the only safety against losing the turn entirely.
+        await applyFinalize(
+          this.aiChatMessageRepo,
+          plan,
+          { chatId, workspaceId: workspace.id, userId: user.id },
+          flushed,
+        );
+      } catch (err) {
+        this.logger.error(
+          `Failed to finalize assistant message (kind=${plan.kind})`,
+          err as Error,
+        );
+      }
+    };
+
    // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
    // first-chunk latency, the model-silent gap right before a disconnect, and
    // how many SSE heartbeats were written, so a Safari drop can be classified
@@ -476,6 +577,12 @@ export class AiChatService {
          // the in-progress accumulator for the next step.
          capturedSteps.push(step as StepLike);
          inProgressText = '';
+          // Step-granular durability (#183): persist this finished step (its text +
+          // tool calls + tool RESULTS) the moment it ends, so a process death after
+          // this point still recovers the step. Not awaited here (never block the
+          // stream), but SERIALIZED via stepUpdateChain so the writes commit in
+          // step order; updateStreaming is error-tolerant (logs + swallows).
+          stepUpdateChain = stepUpdateChain.then(() => updateStreaming());
        },
        onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
@@ -486,30 +593,31 @@ export class AiChatService {
              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
              `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
          );
-          await persistAssistant({
-            text,
-            toolCalls: serializeSteps(steps),
-            metadata: {
-              finishReason,
-              // Persist the turn's cumulative usage WITH reasoning tokens resolved
-              // from either the new `outputTokenDetails` or the deprecated top-level
-              // field, so reopened history / the Markdown export show the thinking
-              // token cost too.
-              usage:
-                normalizeStreamUsage(totalUsage as StreamUsage) ?? totalUsage,
-              // Final-step usage = the context actually fed to the model on the last LLM
-              // call (full history + tool results) plus the answer it just generated.
-              // input+output of the FINAL step ≈ the conversation's CURRENT context size,
-              // distinct from totalUsage which sums every step (cumulative tokens spent).
+          // Finalize the assistant row (#183): the upfront 'streaming' row is
+          // UPDATEd to 'completed' with the turn's final text, cumulative usage and
+          // full UIMessage parts. We pass the SDK `steps` (which carry the final
+          // step's text) as the captured steps so metadata.parts matches the
+          // pre-#183 onFinish record exactly; `inProgressText` is '' here (the last
+          // step already finished). Final-step usage (usage.input+output) ≈ the
+          // conversation's CURRENT context size, distinct from totalUsage.
+          //
+          // COLUMN-SEMANTICS NOTE (#183): `content` is built by flushAssistant as
+          // the CONCATENATION of every step's text (stepsText), whereas pre-#183
+          // it stored only the FINAL step's text. This is a deliberate, harmless
+          // change: the UI and the Markdown export render from `metadata.parts`
+          // (per-step text + tool parts), not from `content`; `content` is the
+          // plain-text projection (full-text search / fallback). A multi-step
+          // turn's `content` therefore now holds all steps' prose, not just the
+          // last block.
+          await finalizeAssistant(
+            flushAssistant(steps as StepLike[], '', 'completed', {
+              finishReason: finishReason as string,
+              usage: totalUsage as StreamUsage,
              contextTokens:
                (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
                undefined,
-              // Persist the FULL set of UIMessage parts for the turn (text +
-              // tool-call/result), so the rebuilt history replays prior tool
-              // context to the model on later turns.
-              parts: assistantParts(steps, text),
-            },
-          });
+            }),
+          );
          // Lifecycle: release the external MCP clients leased for this turn.
          await closeExternalClients();

@@ -545,16 +653,14 @@ export class AiChatService {
              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
              `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
          );
-          // Persist the PARTIAL answer streamed before the failure (text + any
+          // Finalize the PARTIAL answer streamed before the failure (text + any
          // finished tool steps) WITH the error in metadata, so the turn shows what
-          // the user already saw plus the cause — not just a bare error.
-          await persistAssistant(
-            buildPartialAssistantRecord(
-              capturedSteps,
-              inProgressText,
-              'error',
-              errorText,
-            ),
+          // the user already saw plus the cause — not just a bare error. Status
+          // 'error' (#183).
+          await finalizeAssistant(
+            flushAssistant(capturedSteps, inProgressText, 'error', {
+              error: errorText,
+            }),
          );
          await closeExternalClients();
        },
@@ -578,12 +684,8 @@ export class AiChatService {
              `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
              `steps=${steps.length}`,
          );
-          await persistAssistant(
-            buildPartialAssistantRecord(
-              capturedSteps,
-              inProgressText,
-              'aborted',
-            ),
+          await finalizeAssistant(
+            flushAssistant(capturedSteps, inProgressText, 'aborted'),
          );
          await closeExternalClients();
        },
@@ -1032,38 +1134,132 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
 }

 /**
- * Build the assistant-message record persisted on a partial/failed turn (the
- * streamText onError / onAbort paths). Captures the partial answer the user
- * already saw: each finished step's text + tool parts (via assistantParts),
- * then the in-progress step's text appended last. When `errorText` is provided
- * it is recorded in metadata.error so the cause shows in history; an aborted
- * turn passes none. Pure, so the partial-recording shape is unit-testable
- * without seaming streamText.
+ * The persisted-row patch shape produced by {@link flushAssistant}. It is the
+ * SAME shape the assistant repo insert/update consume (content + toolCalls +
+ * metadata) plus the lifecycle `status` column added in #183.
 */
-export function buildPartialAssistantRecord(
-  steps: ReadonlyArray<StepLike> | undefined,
+export interface AssistantFlush {
+  content: string;
+  toolCalls: unknown;
+  metadata: Record<string, unknown>;
+  status: 'streaming' | 'completed' | 'error' | 'aborted';
+}
+
+/**
+ * Pure decision for the terminal finalize (#183): given whether the upfront
+ * assistant row exists (`assistantId`), choose whether the terminal payload is
+ * written by UPDATEing that row or — when the upfront insert failed and there is
+ * no id — by INSERTing a fresh terminal row so the turn is not lost entirely.
+ * Returns `{ kind: 'update', id }` or `{ kind: 'insert' }`. Extracted so the
+ * fallback-insert branch (the only safety against losing a turn whose upfront
+ * insert failed) is unit-testable without seaming streamText.
+ */
+export function planFinalizeAssistant(
+  assistantId: string | undefined,
+): { kind: 'update'; id: string } | { kind: 'insert' } {
+  return assistantId ? { kind: 'update', id: assistantId } : { kind: 'insert' };
+}
+
+/** The repo surface the terminal finalize needs (structural — the real repo and
+ *  a test mock both satisfy it). */
+export interface FinalizeRepo {
+  insert(insertable: Record<string, unknown>): Promise<unknown>;
+  update(
+    id: string,
+    workspaceId: string,
+    patch: AssistantFlush,
+  ): Promise<unknown>;
+}
+
+/**
+ * Apply a finalize `plan` to the repo with the terminal `flushed` payload (#183):
+ * UPDATE the upfront row, or INSERT a fresh terminal row as the fallback when the
+ * upfront insert failed. The SINGLE dispatch shared by the service's
+ * finalizeAssistant and its test, so the test exercises the real path instead of
+ * a copy (#186 review). Pure of error handling — the caller wraps it.
+ */
+export async function applyFinalize(
+  repo: FinalizeRepo,
+  plan: { kind: 'update'; id: string } | { kind: 'insert' },
+  base: { chatId: string; workspaceId: string; userId: string },
+  flushed: AssistantFlush,
+): Promise<void> {
+  if (plan.kind === 'update') {
+    await repo.update(plan.id, base.workspaceId, flushed);
+    return;
+  }
+  await repo.insert({
+    chatId: base.chatId,
+    workspaceId: base.workspaceId,
+    userId: base.userId,
+    role: 'assistant',
+    content: flushed.content,
+    toolCalls: flushed.toolCalls ?? null,
+    metadata: flushed.metadata,
+    status: flushed.status,
+  });
+}
+
+/**
+ * PURE assistant-row builder (#183 step-granular durability). Given the turn's
+ * accumulated steps + the in-progress (not-yet-finished) text + the lifecycle
+ * status, it returns the row patch to persist. The SAME path runs for the
+ * upfront insert (empty steps, status 'streaming'), every per-step update, and
+ * the terminal finalize (completed/error/aborted) — and a future background
+ * worker can call it identically, so it must stay a pure function of its inputs
+ * (NO `this`, no IO).
+ *
+ * `metadata.parts` is built by assistantParts over the finished steps, then the
+ * in-progress text appended as a trailing text part, so rowToUiMessage /
+ * findRecent keep replaying the turn unchanged. `metadata.finishReason`,
+ * `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
+ * only when provided/relevant, matching the pre-#183 onFinish/onError records.
+ */
+export function flushAssistant(
+  capturedSteps: ReadonlyArray<StepLike> | undefined,
  inProgressText: string,
-  finishReason: 'error' | 'aborted',
-  errorText?: string,
-): { text: string; toolCalls: unknown; metadata: Record<string, unknown> } {
-  const finished = steps ?? [];
+  status: 'streaming' | 'completed' | 'error' | 'aborted',
+  extra?: {
+    finishReason?: string;
+    usage?: ChatStreamUsage | StreamUsage | undefined;
+    contextTokens?: number;
+    error?: string;
+  },
+): AssistantFlush {
+  const finished = capturedSteps ?? [];
  const stepsText = finished.map((s) => s.text ?? '').join('');
  const trailing = inProgressText ?? '';
  // assistantParts emits text parts only for FINISHED steps; append the
-  // in-progress step's text (the answer cut off by the error) as the last text
-  // part so the persisted parts match what streamed to the client.
+  // in-progress step's text (the partial answer cut off by an error/abort, or
+  // simply not yet flushed mid-stream) as the last text part so the persisted
+  // parts match what streamed to the client.
  const parts = assistantParts(finished, '') as unknown as Array<
    Record<string, unknown>
  >;
  if (trailing) parts.push({ type: 'text', text: trailing });
+
+  const metadata: Record<string, unknown> = {
+    parts: parts as unknown as UIMessage['parts'],
+  };
+  // finishReason: prefer an explicit one; else derive a sensible value from the
+  // terminal status (so onError/onAbort records keep their historical reason).
+  if (extra?.finishReason) {
+    metadata.finishReason = extra.finishReason;
+  } else if (status === 'error' || status === 'aborted') {
+    metadata.finishReason = status;
+  }
+  if (extra?.usage !== undefined) {
+    metadata.usage =
+      normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
+  }
+  if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
+  if (extra?.error) metadata.error = extra.error;
+
  return {
-    text: stepsText + trailing,
+    content: stepsText + trailing,
    toolCalls: serializeSteps(finished),
-    metadata: {
-      finishReason,
-      parts: parts as unknown as UIMessage['parts'],
-      ...(errorText ? { error: errorText } : {}),
-    },
+    metadata,
+    status,
  };
 }

--- a/apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
+++ b/apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
@@ -0,0 +1,295 @@
+import { buildChatMarkdown, normalizeLang } from './chat-markdown.util';
+import type { AiChatMessage } from '@docmost/db/types/entity.types';
+
+/**
+ * normalizeLang: the client sends `i18n.language` — a FULL locale tag like
+ * 'en-US' / 'ru-RU', NOT a bare 'en'/'ru'. A `@IsIn(['en','ru'])` DTO rejected
+ * that with a 400 (caught in real-browser testing); the export now accepts any
+ * string and normalizes here. Guards that regression.
+ */
+describe('normalizeLang', () => {
+  it("maps any 'ru…' locale tag to ru", () => {
+    expect(normalizeLang('ru')).toBe('ru');
+    expect(normalizeLang('ru-RU')).toBe('ru');
+    expect(normalizeLang('RU-ru')).toBe('ru');
+  });
+
+  it('maps everything else (incl. region-qualified English) to en', () => {
+    expect(normalizeLang('en')).toBe('en');
+    expect(normalizeLang('en-US')).toBe('en');
+    expect(normalizeLang('fr-FR')).toBe('en');
+    expect(normalizeLang(undefined)).toBe('en');
+    expect(normalizeLang('')).toBe('en');
+  });
+});
+
+/**
+ * Unit tests for the SERVER Markdown export (#183). Mirrors the coverage of the
+ * (now-removed) client chat-markdown tests: heading/metadata, role labels, text
+ * + tool blocks, token footers, the interrupted-turn note, and NULL-status
+ * (legacy) rows. The export embeds a live `new Date().toISOString()` timestamp;
+ * we never assert it, only the deterministic structure.
+ */
+
+function row(partial: Partial<AiChatMessage>): AiChatMessage {
+  return {
+    id: partial.id ?? 'id',
+    chatId: partial.chatId ?? 'chat-1',
+    workspaceId: partial.workspaceId ?? 'ws-1',
+    userId: partial.userId ?? null,
+    role: partial.role ?? 'user',
+    content: partial.content ?? null,
+    toolCalls: partial.toolCalls ?? null,
+    metadata: partial.metadata ?? null,
+    status: partial.status ?? null,
+    createdAt: partial.createdAt ?? ('2026-06-21T00:00:00.000Z' as never),
+    updatedAt: partial.updatedAt ?? ('2026-06-21T00:00:00.000Z' as never),
+    deletedAt: partial.deletedAt ?? null,
+  } as AiChatMessage;
+}
+
+describe('buildChatMarkdown (server) — structure', () => {
+  it('emits the title heading, chat id and message count', () => {
+    const md = buildChatMarkdown({
+      title: 'My chat',
+      chatId: 'chat-123',
+      rows: [],
+    });
+    expect(md).toContain('# My chat');
+    expect(md).toContain('- Chat ID: `chat-123`');
+    expect(md).toContain('- Messages: 0');
+  });
+
+  it('falls back to "Untitled chat" with no title (en)', () => {
+    const md = buildChatMarkdown({ title: null, chatId: 'c', rows: [] });
+    expect(md).toContain('# Untitled chat');
+  });
+
+  it('localizes fixed labels with lang=ru (structure stays English)', () => {
+    const md = buildChatMarkdown({
+      title: null,
+      chatId: 'c',
+      lang: 'ru',
+      rows: [row({ role: 'assistant', content: 'hi' })],
+    });
+    expect(md).toContain('# Без названия');
+    expect(md).toContain('## 1. ИИ-агент');
+    // Structural words remain English.
+    expect(md).toContain('- Chat ID:');
+  });
+
+  it('numbers messages and labels roles (You / AI agent)', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({ role: 'user', content: 'question' }),
+        row({ role: 'assistant', content: 'answer' }),
+      ],
+    });
+    expect(md).toContain('## 1. You');
+    expect(md).toContain('question');
+    expect(md).toContain('## 2. AI agent');
+    expect(md).toContain('answer');
+  });
+
+  it('renders a tool part with fenced input/output and the friendly label', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: 'done',
+          metadata: {
+            parts: [
+              {
+                type: 'tool-getPage',
+                state: 'output-available',
+                input: { id: 'p1' },
+                output: { title: 'Hello' },
+              },
+              { type: 'text', text: 'done' },
+            ],
+          } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('**Tool: Read page** (`getPage`) — done');
+    expect(md).toContain('Input:');
+    expect(md).toContain('"id": "p1"');
+    expect(md).toContain('Output:');
+    expect(md).toContain('"title": "Hello"');
+  });
+
+  // #186 re-review pt 1: restore the parity coverage of the removed client spec —
+  // error state, unknown-tool fallback (en + ru), and the circular-stringify catch.
+  it('renders a tool part in the error state with its errorText', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          metadata: {
+            parts: [
+              {
+                type: 'tool-getPage',
+                state: 'output-error',
+                input: { id: 'p1' },
+                errorText: 'page not found',
+              },
+            ],
+          } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('**Tool: Read page** (`getPage`) — error');
+    expect(md).toContain('**Error:** page not found');
+  });
+
+  it('falls back to "Ran tool <name>" for an unknown tool (en) and the ru variant', () => {
+    const parts = [
+      {
+        type: 'tool-mysteryTool',
+        state: 'output-available',
+        output: { ok: 1 },
+      },
+    ];
+    const en = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [row({ role: 'assistant', metadata: { parts } as never })],
+    });
+    expect(en).toContain('**Tool: Ran tool mysteryTool** (`mysteryTool`)');
+    const ru = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      lang: 'ru',
+      rows: [row({ role: 'assistant', metadata: { parts } as never })],
+    });
+    expect(ru).toContain('Выполнил инструмент mysteryTool');
+  });
+
+  it('does not throw on a circular tool output (falls back to String)', () => {
+    const circular: Record<string, unknown> = {};
+    circular.self = circular;
+    expect(() =>
+      buildChatMarkdown({
+        title: 'T',
+        chatId: 'c',
+        rows: [
+          row({
+            role: 'assistant',
+            metadata: {
+              parts: [
+                {
+                  type: 'tool-getPage',
+                  state: 'output-available',
+                  output: circular,
+                },
+              ],
+            } as never,
+          }),
+        ],
+      }),
+    ).not.toThrow();
+  });
+
+  it('emits a token footer + total when usage is present', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: 'a',
+          metadata: {
+            usage: {
+              inputTokens: 100,
+              outputTokens: 20,
+              totalTokens: 120,
+              reasoningTokens: 8,
+            },
+          } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('- Total tokens: 120');
+    expect(md).toContain(
+      '_Tokens — in: 100, out: 20, reasoning: 8, total: 120_',
+    );
+  });
+
+  it('flags a still-streaming (interrupted) row', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({ role: 'assistant', content: 'partial', status: 'streaming' }),
+      ],
+    });
+    expect(md).toContain('still being generated');
+  });
+
+  it('does NOT flag a completed row', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [row({ role: 'assistant', content: 'final', status: 'completed' })],
+    });
+    expect(md).not.toContain('still being generated');
+  });
+
+  it('renders a legacy NULL-status row (no parts) from plain content', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({ role: 'assistant', content: 'legacy answer', status: null }),
+      ],
+    });
+    expect(md).toContain('legacy answer');
+    expect(md).not.toContain('still being generated');
+  });
+
+  it('renders a persisted error', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: '',
+          status: 'error',
+          metadata: { error: '401: Unauthorized' } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('**⚠️ Error:** 401: Unauthorized');
+  });
+
+  it('escapes embedded triple-backtick fences with a longer delimiter', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: 'x',
+          metadata: {
+            parts: [
+              {
+                type: 'tool-getPage',
+                state: 'output-available',
+                output: '```inner```',
+              },
+            ],
+          } as never,
+        }),
+      ],
+    });
+    // A 4-backtick fence wraps content that itself contains a 3-backtick run.
+    expect(md).toContain('````');
+  });
+});
--- a/apps/server/src/core/ai-chat/chat-markdown.util.ts
+++ b/apps/server/src/core/ai-chat/chat-markdown.util.ts
@@ -0,0 +1,299 @@
+/**
+ * Server-side Markdown export for an AI agent chat (#183). The DB is the single
+ * source of truth: this renders a chat purely from its persisted message rows
+ * (`AiChatMessage[]` — role / content / metadata.parts / toolCalls / usage).
+ * Because the assistant row is now persisted UPFRONT and updated per step, an
+ * interrupted turn is included up to its last finished step.
+ *
+ * Ported from the client `utils/chat-markdown.ts`. It is a PURE function (apart
+ * from `new Date()` for the export timestamp), so it is straightforward to
+ * unit-test and a future background worker can reuse it.
+ *
+ * Only a few fixed role/tool labels are localized via the `lang` param; the
+ * structural document words (Input/Output/Error/Tokens/...) stay English because
+ * the output is a technical artifact.
+ */
+
+import type { AiChatMessage } from '@docmost/db/types/entity.types';
+
+/** Supported export label languages. Defaults to English. */
+export type ExportLang = 'en' | 'ru';
+
+/**
+ * Normalize an arbitrary client locale code to a supported export language. The
+ * client sends `i18n.language`, which is a FULL locale tag (e.g. `en-US`,
+ * `ru-RU`), not a bare `en`/`ru` — so match on the language subtag and fall back
+ * to English for anything non-Russian.
+ */
+export function normalizeLang(lang?: string): ExportLang {
+  return lang?.toLowerCase().startsWith('ru') ? 'ru' : 'en';
+}
+
+/** A single AI SDK UIMessage part (text part or a tool part). */
+interface ExportPart {
+  type: string;
+  text?: string;
+  state?: string;
+  toolName?: string;
+  input?: unknown;
+  output?: unknown;
+  errorText?: string;
+}
+
+/** Authoritative per-turn usage the server attaches to a message row. */
+interface UsageLike {
+  inputTokens?: number;
+  outputTokens?: number;
+  totalTokens?: number;
+  reasoningTokens?: number;
+}
+
+/** Localized label table. The client-side Markdown builder was removed by #183
+ *  (the export is now server-side only), so this no longer mirrors a second
+ *  exporter — instead the tool-action labels are kept in parity with the
+ *  on-screen action-log labels in the client's `tool-parts.tsx` (`toolLabelKey`)
+ *  so the export reads the same as the UI. Only role + tool-action labels are
+ *  localized; everything structural is an English constant in the renderer. */
+const LABELS: Record<
+  ExportLang,
+  {
+    untitled: string;
+    aiAgent: string;
+    you: string;
+    tools: Record<string, string>;
+    ranTool: (name: string) => string;
+    stillGenerating: string;
+  }
+> = {
+  en: {
+    untitled: 'Untitled chat',
+    aiAgent: 'AI agent',
+    you: 'You',
+    tools: {
+      searchPages: 'Searched pages',
+      getPage: 'Read page',
+      createPage: 'Created page',
+      updatePageContent: 'Updated page',
+      renamePage: 'Renamed page',
+      movePage: 'Moved page',
+      deletePage: 'Deleted page (to trash)',
+      createComment: 'Commented',
+      resolveComment: 'Resolved comment',
+    },
+    ranTool: (name) => `Ran tool ${name}`,
+    stillGenerating:
+      'This message is still being generated — the export captured a partial, in-progress response.',
+  },
+  ru: {
+    untitled: 'Без названия',
+    aiAgent: 'ИИ-агент',
+    you: 'Вы',
+    tools: {
+      searchPages: 'Искал по страницам',
+      getPage: 'Прочитал страницу',
+      createPage: 'Создал страницу',
+      updatePageContent: 'Обновил страницу',
+      renamePage: 'Переименовал страницу',
+      movePage: 'Переместил страницу',
+      deletePage: 'Удалил страницу (в корзину)',
+      createComment: 'Прокомментировал',
+      resolveComment: 'Закрыл комментарий',
+    },
+    ranTool: (name) => `Выполнил инструмент ${name}`,
+    stillGenerating:
+      'Это сообщение всё ещё генерируется — экспорт захватил частичный, незавершённый ответ.',
+  },
+};
+
+/** True for AI SDK tool parts (static `tool-*` or `dynamic-tool`). */
+function isToolPart(type: string): boolean {
+  return type.startsWith('tool-') || type === 'dynamic-tool';
+}
+
+/** Extract the tool name from a part `type` of `tool-${name}` (or dynamic). */
+function getToolName(part: ExportPart): string {
+  if (part.type === 'dynamic-tool') return part.toolName ?? '';
+  return part.type.startsWith('tool-')
+    ? part.type.slice('tool-'.length)
+    : part.type;
+}
+
+/** Map an AI SDK tool-part state to the 3 states the action-log renders. */
+function toolRunState(state: string | undefined): 'running' | 'done' | 'error' {
+  if (state === 'output-error' || state === 'output-denied') return 'error';
+  if (state === 'output-available') return 'done';
+  return 'running';
+}
+
+/** Resolve a tool's friendly action-log label (localized) from its name. */
+function toolLabel(name: string, lang: ExportLang): string {
+  return LABELS[lang].tools[name] ?? LABELS[lang].ranTool(name);
+}
+
+/**
+ * Stringify an arbitrary tool input/output value for a fenced block. Strings
+ * pass through as-is; everything else is pretty-printed JSON, falling back to
+ * `String(value)` if serialization throws (e.g. a circular structure).
+ */
+function stringify(value: unknown): string {
+  if (typeof value === 'string') return value;
+  try {
+    return JSON.stringify(value, null, 2);
+  } catch {
+    return String(value);
+  }
+}
+
+/**
+ * Wrap `code` in a fenced code block whose backtick delimiter is LONGER than the
+ * longest backtick run inside the content, so embedded backticks (or a literal
+ * ``` fence) never break out of the block. Minimum 3 backticks.
+ */
+function fence(code: string, lang = ''): string {
+  const runs: string[] = code.match(/`+/g) ?? [];
+  const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
+  const delim = '`'.repeat(Math.max(3, longest + 1));
+  return `${delim}${lang}\n${code}\n${delim}`;
+}
+
+/** Per-row token count, mirroring the header sum in the client window. */
+function rowTokens(usage: UsageLike): number {
+  return (
+    usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
+  );
+}
+
+/** Render one message's UIMessage parts into an array of Markdown blocks
+ *  (text blocks + tool blocks). Mirrors the client renderer / MessageItem. */
+function renderMessageParts(parts: ExportPart[], lang: ExportLang): string[] {
+  const out: string[] = [];
+
+  for (const part of parts) {
+    if (part.type === 'text') {
+      const text = (part.text ?? '').trim();
+      if (text.length > 0) out.push(text);
+      continue;
+    }
+
+    if (!isToolPart(part.type)) continue;
+
+    const name = getToolName(part);
+    const label = toolLabel(name, lang);
+    const state = toolRunState(part.state);
+
+    const toolLines: string[] = [`**Tool: ${label}** (\`${name}\`) — ${state}`];
+    if (part.input !== undefined) {
+      toolLines.push('Input:');
+      toolLines.push(fence(stringify(part.input), 'json'));
+    }
+    if (part.output !== undefined) {
+      toolLines.push('Output:');
+      toolLines.push(fence(stringify(part.output), 'json'));
+    }
+    if (part.errorText) {
+      toolLines.push(`**Error:** ${part.errorText}`);
+    }
+    out.push(toolLines.join('\n\n'));
+  }
+
+  return out;
+}
+
+/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
+ *  single text part built from the plain-text content (mirrors rowToUiMessage). */
+function rowParts(row: AiChatMessage): ExportPart[] {
+  const meta = (row.metadata ?? {}) as { parts?: ExportPart[] };
+  return Array.isArray(meta.parts) && meta.parts.length > 0
+    ? meta.parts
+    : [{ type: 'text', text: row.content ?? '' }];
+}
+
+/**
+ * Serialize a chat to a Markdown string from its persisted rows. Source = DB
+ * ONLY (no live client state). A row whose `status` is still 'streaming' is an
+ * interrupted turn that the export captured mid-flight; it is rendered up to its
+ * last finished step and flagged "still generating".
+ */
+export function buildChatMarkdown(args: {
+  title: string | null;
+  chatId: string;
+  rows: AiChatMessage[];
+  // Accepts a full client locale tag (e.g. 'en-US'/'ru-RU'); normalized below.
+  lang?: string;
+}): string {
+  const { title, chatId, rows } = args;
+  const lang: ExportLang = normalizeLang(args.lang);
+  const L = LABELS[lang];
+  const blocks: string[] = [];
+
+  const heading = (title ?? '').trim() || L.untitled;
+  blocks.push(`# ${heading}`);
+
+  const usageOf = (row: AiChatMessage): UsageLike | undefined => {
+    const meta = (row.metadata ?? {}) as { usage?: UsageLike };
+    return meta.usage;
+  };
+  const errorOf = (row: AiChatMessage): string | undefined => {
+    const meta = (row.metadata ?? {}) as { error?: string };
+    return meta.error;
+  };
+
+  // Metadata bullet list. Total tokens is only shown when there is a sum.
+  const totalTokens = rows.reduce((sum, row) => {
+    const usage = usageOf(row);
+    return usage ? sum + rowTokens(usage) : sum;
+  }, 0);
+  const meta = [
+    `- Chat ID: \`${chatId}\``,
+    `- Exported: ${new Date().toISOString()}`,
+    `- Messages: ${rows.length}`,
+  ];
+  if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
+  blocks.push(meta.join('\n'));
+
+  rows.forEach((row, index) => {
+    blocks.push('---');
+
+    const roleLabel = row.role === 'assistant' ? L.aiAgent : L.you;
+    blocks.push(`## ${index + 1}. ${roleLabel}`);
+
+    // Created-at kept in source as an HTML comment (out of the rendered prose).
+    if (row.createdAt) {
+      const iso =
+        row.createdAt instanceof Date
+          ? row.createdAt.toISOString()
+          : String(row.createdAt);
+      blocks.push(`<!-- ${iso} -->`);
+    }
+
+    blocks.push(...renderMessageParts(rowParts(row), lang));
+
+    // A still-'streaming' row is an interrupted/in-progress turn captured by the
+    // export; record that so the partial answer is not mistaken for complete.
+    if (row.status === 'streaming') {
+      blocks.push(`_⏳ ${L.stillGenerating}_`);
+    }
+
+    const error = errorOf(row);
+    if (error) {
+      blocks.push(`**⚠️ Error:** ${error}`);
+    }
+
+    const usage = usageOf(row);
+    if (usage) {
+      const total = usage.totalTokens ?? rowTokens(usage);
+      const reasoning =
+        usage.reasoningTokens && usage.reasoningTokens > 0
+          ? `, reasoning: ${usage.reasoningTokens}`
+          : '';
+      blocks.push(
+        `_Tokens — in: ${usage.inputTokens ?? '?'}, out: ${
+          usage.outputTokens ?? '?'
+        }${reasoning}, total: ${total}_`,
+      );
+    }
+  });
+
+  // Blank line between blocks so the Markdown renders cleanly.
+  return blocks.join('\n\n');
+}
--- a/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts
+++ b/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts
@@ -26,3 +26,17 @@ export class GetChatMessagesDto {
  @IsString()
  cursor?: string;
 }
+
+/** Export a chat to Markdown (#183). `lang` localizes the few fixed
+ *  role/tool-action labels; defaults to English server-side. */
+export class ExportChatDto {
+  @IsString()
+  chatId: string;
+
+  // A full client locale tag (e.g. 'en-US', 'ru-RU') — normalized server-side to
+  // a supported export language (see normalizeLang). Accept any string so a
+  // region-qualified locale is not rejected (the 400 that broke the real client).
+  @IsOptional()
+  @IsString()
+  lang?: string;
+}
--- a/apps/server/src/database/migrations/20260626T120000-ai-chat-message-status.ts
+++ b/apps/server/src/database/migrations/20260626T120000-ai-chat-message-status.ts
@@ -0,0 +1,18 @@
+import { type Kysely } from 'kysely';
+
+export async function up(db: Kysely<any>): Promise<void> {
+  // Step-granular durability for the assistant turn (#183). The assistant row is
+  // now created UPFRONT (status 'streaming') and UPDATEd as each step completes,
+  // so a process death mid-turn no longer loses the whole answer. The column is
+  // NULLABLE on purpose: rows written before this migration carry NULL, which the
+  // app treats as 'completed' (a settled, pre-status message). Values written by
+  // the app: 'streaming' | 'completed' | 'error' | 'aborted'.
+  await db.schema
+    .alterTable('ai_chat_messages')
+    .addColumn('status', 'text', (col) => col)
+    .execute();
+}
+
+export async function down(db: Kysely<any>): Promise<void> {
+  await db.schema.alterTable('ai_chat_messages').dropColumn('status').execute();
+}
--- a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
+++ b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts
@@ -1,4 +1,4 @@
-import { Injectable } from '@nestjs/common';
+import { Injectable, Logger } from '@nestjs/common';
 import { InjectKysely } from 'nestjs-kysely';
 import { KyselyDB, KyselyTransaction } from '../../types/kysely.types';
 import { dbOrTx } from '../../utils';
@@ -9,8 +9,24 @@ import {
 import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
 import { executeWithCursorPagination } from '@docmost/db/pagination/cursor-pagination';

+// Crash-recovery sweep recency threshold (#183 review): a 'streaming' row is
+// only swept to 'aborted' once it has been UNTOUCHED for this long. A live turn
+// bumps `updatedAt` on every step (well under this window), so its row never
+// matches; only a turn whose process truly died (no step update for >threshold)
+// is swept. Chosen safely ABOVE the longest realistic turn so a fresh replica's
+// boot-sweep can never abort a turn another replica is actively streaming
+// (multi-instance deploy).
+const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes
+
+// Hard upper bound on the rows materialized by `findAllByChat` (export path).
+// A generous cap so a pathologically huge chat cannot load an unbounded result
+// into memory; far above any realistic transcript length.
+const FIND_ALL_BY_CHAT_LIMIT = 5000;
+
@Injectable()
 export class AiChatMessageRepo {
+  private readonly logger = new Logger(AiChatMessageRepo.name);
+
  constructor(@InjectKysely() private readonly db: KyselyDB) {}

  // The `tsv` column is a trigger-maintained tsvector used only for
@@ -25,6 +41,7 @@ export class AiChatMessageRepo {
    'content',
    'toolCalls',
    'metadata',
+    'status',
    'createdAt',
    'updatedAt',
    'deletedAt',
@@ -60,6 +77,46 @@ export class AiChatMessageRepo {
    });
  }

+  // Load ALL (non-deleted) messages of a chat in ascending chronological order
+  // (oldest -> newest), unpaginated. Used by the server-side Markdown export
+  // (#183), where the DB is the single source of truth and the whole transcript
+  // must be rendered in one pass (findByChat is cursor-paginated and would only
+  // return the first page).
+  //
+  // Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any
+  // realistic transcript) so exporting a pathologically huge chat cannot
+  // materialize an unbounded result set in memory.
+  async findAllByChat(
+    chatId: string,
+    workspaceId: string,
+    // Injectable for tests so truncation can be exercised on a modest volume.
+    limit: number = FIND_ALL_BY_CHAT_LIMIT,
+  ): Promise<AiChatMessage[]> {
+    // Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the
+    // NEWEST `limit` messages — the recent conversation matters most for an
+    // export — rather than silently dropping the tail (#183 review). Reverse back
+    // to chronological for rendering, like findRecent.
+    const rows = await this.db
+      .selectFrom('aiChatMessages')
+      .select(this.baseFields)
+      .where('chatId', '=', chatId)
+      .where('workspaceId', '=', workspaceId)
+      .where('deletedAt', 'is', null)
+      .orderBy('createdAt', 'desc')
+      .orderBy('id', 'desc')
+      .limit(limit + 1)
+      .execute();
+
+    if (rows.length > limit) {
+      rows.length = limit; // keep the newest `limit` (rows are newest-first here)
+      this.logger.warn(
+        `Chat ${chatId} export truncated to the newest ${limit} messages ` +
+          `(older messages omitted).`,
+      );
+    }
+    return rows.reverse();
+  }
+
  // Load the most RECENT `limit` messages for a chat and return them in
  // ascending chronological order (oldest -> newest), as the model expects.
  // `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses
@@ -96,4 +153,68 @@ export class AiChatMessageRepo {
      .returning(this.baseFields)
      .executeTakeFirst();
  }
+
+  /**
+   * Update a single message in place by id + workspace (#183 step-granular
+   * durability). The assistant row is created UPFRONT (status 'streaming') and
+   * patched as each step completes, then finalized once on the terminal status.
+   * `updatedAt` is always bumped. Returns the updated row (baseFields) or
+   * undefined when no row matched (e.g. a foreign workspace / deleted row).
+   */
+  async update(
+    id: string,
+    workspaceId: string,
+    patch: Partial<{
+      content: string | null;
+      toolCalls: unknown;
+      metadata: unknown;
+      status: string | null;
+    }>,
+    opts?: { onlyIfStreaming?: boolean; trx?: KyselyTransaction },
+  ): Promise<AiChatMessage | undefined> {
+    const db = dbOrTx(this.db, opts?.trx);
+    let query = db
+      .updateTable('aiChatMessages')
+      .set({ ...(patch as Record<string, unknown>), updatedAt: new Date() })
+      .where('id', '=', id)
+      .where('workspaceId', '=', workspaceId);
+    // Concurrency guard (#183 review): a per-step 'streaming' update must NEVER
+    // overwrite a row the terminal callback already finalized. onStepFinish
+    // fires the streaming update fire-and-forget, so its UPDATE can land AFTER
+    // finalize on a DIFFERENT pool connection (commit order is not guaranteed).
+    // Scoping the streaming update to rows STILL in 'streaming' makes a late
+    // update a no-op once the row is completed/error/aborted — regardless of
+    // commit order. The terminal finalize runs WITHOUT this guard so it always
+    // wins.
+    if (opts?.onlyIfStreaming) {
+      query = query.where('status', '=', 'streaming');
+    }
+    return query.returning(this.baseFields).executeTakeFirst();
+  }
+
+  /**
+   * Crash-recovery sweep (#183): flip every assistant row still left in the
+   * 'streaming' state (a turn that died mid-write before reaching a terminal
+   * status) to 'aborted'. Run once on server start. Returns the number of rows
+   * swept so the caller can log it. Workspace-wide on purpose — a crash can have
+   * dangling streaming rows across any workspace.
+   *
+   * Bounded by recency (#183 review): only rows UNTOUCHED for
+   * SWEEP_STREAMING_STALE_MS are swept. A live turn bumps `updatedAt` on every
+   * step, so an actively-streaming row never matches; this prevents a fresh
+   * replica's boot-sweep from aborting a turn another replica is still streaming
+   * in a multi-instance deploy.
+   */
+  async sweepStreaming(trx?: KyselyTransaction): Promise<number> {
+    const db = dbOrTx(this.db, trx);
+    const staleBefore = new Date(Date.now() - SWEEP_STREAMING_STALE_MS);
+    const rows = await db
+      .updateTable('aiChatMessages')
+      .set({ status: 'aborted', updatedAt: new Date() })
+      .where('status', '=', 'streaming')
+      .where('updatedAt', '<', staleBefore)
+      .returning('id')
+      .execute();
+    return rows.length;
+  }
 }
--- a/apps/server/src/database/types/db.d.ts
+++ b/apps/server/src/database/types/db.d.ts
@@ -620,6 +620,10 @@ export interface AiChatMessages {
  content: string | null;
  toolCalls: Json | null;
  metadata: Json | null;
+  // Turn lifecycle status (#183): 'streaming' | 'completed' | 'error' |
+  // 'aborted'. NULL on rows written before the status column existed; the app
+  // treats NULL as 'completed' (a settled, pre-status message).
+  status: string | null;
  tsv: string | null;
  createdAt: Generated<Timestamp>;
  updatedAt: Generated<Timestamp>;
--- a/apps/server/test/integration/ai-chat-message-status.int-spec.ts
+++ b/apps/server/test/integration/ai-chat-message-status.int-spec.ts
@@ -0,0 +1,270 @@
+import { Kysely } from 'kysely';
+import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
+import {
+  getTestDb,
+  destroyTestDb,
+  createWorkspace,
+  createUser,
+  createChat,
+  createMessage,
+} from './db';
+
+/**
+ * Integration coverage for the #183 step-granular durability primitives on
+ * AiChatMessageRepo: `update` (in-place patch by id+workspace, bumps updatedAt,
+ * returns the row) and `sweepStreaming` (crash recovery: flip dangling
+ * 'streaming' rows to 'aborted'). Real SQL against docmost_test, not a mock.
+ */
+describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => {
+  let db: Kysely<any>;
+  let repo: AiChatMessageRepo;
+  let workspaceId: string;
+  let otherWorkspaceId: string;
+  let userId: string;
+  let chatId: string;
+  let otherChatId: string;
+
+  beforeAll(async () => {
+    db = getTestDb();
+    repo = new AiChatMessageRepo(db as any);
+    workspaceId = (await createWorkspace(db)).id;
+    otherWorkspaceId = (await createWorkspace(db)).id;
+    userId = (await createUser(db, workspaceId)).id;
+    chatId = (await createChat(db, { workspaceId, creatorId: userId })).id;
+    const otherUser = await createUser(db, otherWorkspaceId);
+    otherChatId = (
+      await createChat(db, {
+        workspaceId: otherWorkspaceId,
+        creatorId: otherUser.id,
+      })
+    ).id;
+  });
+
+  afterAll(async () => {
+    await destroyTestDb();
+  });
+
+  it('update patches content/status/metadata and bumps updatedAt', async () => {
+    const seeded = await repo.insert({
+      chatId,
+      workspaceId,
+      userId,
+      role: 'assistant',
+      content: '',
+      status: 'streaming',
+      metadata: { parts: [] } as never,
+    });
+    const before = seeded.updatedAt;
+    // Ensure a measurable timestamp delta.
+    await new Promise((r) => setTimeout(r, 5));
+
+    const updated = await repo.update(seeded.id, workspaceId, {
+      content: 'final answer',
+      status: 'completed',
+      metadata: { parts: [{ type: 'text', text: 'final answer' }] },
+    });
+
+    expect(updated).toBeDefined();
+    expect(updated!.content).toBe('final answer');
+    expect(updated!.status).toBe('completed');
+    expect((updated!.metadata as any).parts).toHaveLength(1);
+    // The 5ms sleep above guarantees a strictly-later timestamp.
+    expect(new Date(updated!.updatedAt).getTime()).toBeGreaterThan(
+      new Date(before).getTime(),
+    );
+  });
+
+  it('onlyIfStreaming update is a NO-OP once the row is finalized (race guard)', async () => {
+    // Reproduce the step-update-vs-finalize race (#183 review): the row is
+    // finalized to 'completed', then a LATE per-step 'streaming' update lands.
+    // With `onlyIfStreaming` it must match nothing and leave the finalized row
+    // untouched (no clobber back to 'streaming', no lost usage).
+    const seeded = await repo.insert({
+      chatId,
+      workspaceId,
+      userId,
+      role: 'assistant',
+      content: 'partial',
+      status: 'streaming',
+    });
+    // Terminal finalize (unguarded) wins.
+    await repo.update(seeded.id, workspaceId, {
+      content: 'final answer',
+      status: 'completed',
+      metadata: { usage: { totalTokens: 42 } } as never,
+    });
+    // A straggler per-step update arrives AFTER finalize.
+    const late = await repo.update(
+      seeded.id,
+      workspaceId,
+      { content: 'partial', status: 'streaming', metadata: {} as never },
+      { onlyIfStreaming: true },
+    );
+    expect(late).toBeUndefined(); // matched no 'streaming' row -> no-op
+    const rows = await repo.findAllByChat(chatId, workspaceId);
+    const row = rows.find((r) => r.id === seeded.id)!;
+    expect(row.status).toBe('completed'); // NOT clobbered back to streaming
+    expect(row.content).toBe('final answer');
+    expect((row.metadata as any).usage.totalTokens).toBe(42); // usage preserved
+  });
+
+  it('update is workspace-scoped: a foreign workspace id matches nothing', async () => {
+    const seeded = await repo.insert({
+      chatId,
+      workspaceId,
+      userId,
+      role: 'assistant',
+      content: 'orig',
+      status: 'streaming',
+    });
+    const res = await repo.update(seeded.id, otherWorkspaceId, {
+      status: 'completed',
+    });
+    expect(res).toBeUndefined();
+    // The row in the real workspace is untouched.
+    const rows = await repo.findAllByChat(chatId, workspaceId);
+    const stillThere = rows.find((r) => r.id === seeded.id);
+    expect(stillThere!.status).toBe('streaming');
+    // Clean up so it does not pollute the sweep test below.
+    await repo.update(seeded.id, workspaceId, { status: 'completed' });
+  });
+
+  // Backdate a row's updatedAt so it qualifies as a STALE streaming row (the
+  // sweep only flips rows untouched for >10 minutes — a live turn bumps
+  // updatedAt every step, so it would never match).
+  async function backdateUpdatedAt(
+    id: string,
+    minutesAgo: number,
+  ): Promise<void> {
+    await db
+      .updateTable('aiChatMessages')
+      .set({ updatedAt: new Date(Date.now() - minutesAgo * 60 * 1000) })
+      .where('id', '=', id)
+      .execute();
+  }
+
+  it('sweepStreaming flips STALE dangling streaming rows to aborted and counts them', async () => {
+    // Two dangling streaming rows in our workspace + one in another workspace —
+    // all backdated past the staleness threshold so the sweep picks them up.
+    const a = await createMessage(db, {
+      workspaceId,
+      chatId,
+      role: 'assistant',
+      status: 'streaming',
+    });
+    const b = await createMessage(db, {
+      workspaceId,
+      chatId,
+      role: 'assistant',
+      status: 'streaming',
+    });
+    const other = await createMessage(db, {
+      workspaceId: otherWorkspaceId,
+      chatId: otherChatId,
+      role: 'assistant',
+      status: 'streaming',
+    });
+    await backdateUpdatedAt(a.id, 20);
+    await backdateUpdatedAt(b.id, 20);
+    await backdateUpdatedAt(other.id, 20);
+
+    // A settled row must NOT be touched.
+    const done = await createMessage(db, {
+      workspaceId,
+      chatId,
+      role: 'assistant',
+      status: 'completed',
+    });
+    // A legacy NULL-status row must NOT be touched.
+    const legacy = await createMessage(db, {
+      workspaceId,
+      chatId,
+      role: 'assistant',
+      status: null,
+    });
+
+    const swept = await repo.sweepStreaming();
+    // At least the 3 stale streaming rows we created (2 here + 1 in the other ws).
+    expect(swept).toBeGreaterThanOrEqual(3);
+
+    const rows = await repo.findAllByChat(chatId, workspaceId);
+    const byId = new Map(rows.map((r) => [r.id, r]));
+    expect(byId.get(a.id)!.status).toBe('aborted');
+    expect(byId.get(b.id)!.status).toBe('aborted');
+    expect(byId.get(done.id)!.status).toBe('completed');
+    expect(byId.get(legacy.id)!.status).toBeNull();
+
+    // Idempotent: a second sweep finds nothing left in our seeded set.
+    const again = await repo.sweepStreaming();
+    const rows2 = await repo.findAllByChat(chatId, workspaceId);
+    // Our two rows stay aborted regardless of `again`'s global count.
+    expect(rows2.find((r) => r.id === a.id)!.status).toBe('aborted');
+    expect(again).toBeGreaterThanOrEqual(0);
+  });
+
+  it('sweepStreaming does NOT sweep a FRESH streaming row (recency bound, #183 review)', async () => {
+    // A row that is actively streaming (recent updatedAt) must survive the sweep:
+    // a fresh replica's boot-sweep must never abort a turn another replica is
+    // still streaming in a multi-instance deploy.
+    const fresh = await createMessage(db, {
+      workspaceId,
+      chatId,
+      role: 'assistant',
+      status: 'streaming',
+    });
+    // A STALE streaming row created alongside it IS swept — proving the sweep
+    // ran and the only difference is recency.
+    const stale = await createMessage(db, {
+      workspaceId,
+      chatId,
+      role: 'assistant',
+      status: 'streaming',
+    });
+    await backdateUpdatedAt(stale.id, 20);
+
+    await repo.sweepStreaming();
+
+    const rows = await repo.findAllByChat(chatId, workspaceId);
+    const byId = new Map(rows.map((r) => [r.id, r]));
+    // Fresh (recently-updated) streaming row is left untouched...
+    expect(byId.get(fresh.id)!.status).toBe('streaming');
+    // ...while the stale one alongside it was swept to 'aborted'.
+    expect(byId.get(stale.id)!.status).toBe('aborted');
+  });
+
+  it('findAllByChat caps the result, keeping the NEWEST messages in order (#183 review)', async () => {
+    // A dedicated chat so the cap test is independent of the rows above.
+    const cappedChat = (
+      await createChat(db, { workspaceId, creatorId: userId })
+    ).id;
+    const base = Date.now();
+    // Three messages at strictly increasing timestamps.
+    await createMessage(db, {
+      workspaceId,
+      chatId: cappedChat,
+      content: 'm1-oldest',
+      createdAt: new Date(base),
+    });
+    await createMessage(db, {
+      workspaceId,
+      chatId: cappedChat,
+      content: 'm2',
+      createdAt: new Date(base + 1000),
+    });
+    await createMessage(db, {
+      workspaceId,
+      chatId: cappedChat,
+      content: 'm3-newest',
+      createdAt: new Date(base + 2000),
+    });
+
+    // Cap of 2 -> the OLDEST message is dropped; the newest two stay, in
+    // chronological order (oldest -> newest).
+    const capped = await repo.findAllByChat(cappedChat, workspaceId, 2);
+    expect(capped.map((r) => r.content)).toEqual(['m2', 'm3-newest']);
+
+    // Without a cap (well above the row count) all three come back in order.
+    const all = await repo.findAllByChat(cappedChat, workspaceId, 100);
+    expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']);
+  });
+});
--- a/apps/server/test/integration/db.ts
+++ b/apps/server/test/integration/db.ts
@@ -104,7 +104,8 @@ export async function createWorkspace(
      name: overrides.name ?? `ws-${suffix}`,
      // hostname is uniquely constrained; keep it unique per workspace.
      hostname: `host-${suffix}`,
-      settings: overrides.settings === undefined ? null : (overrides.settings as any),
+      settings:
+        overrides.settings === undefined ? null : (overrides.settings as any),
    })
    .returning(['id', 'settings'])
    .executeTakeFirstOrThrow();
@@ -226,3 +227,37 @@ export async function createChat(
    .executeTakeFirstOrThrow();
  return { id: row.id as string };
 }
+
+export async function createMessage(
+  db: Kysely<any>,
+  args: {
+    workspaceId: string;
+    chatId: string;
+    userId?: string | null;
+    role?: string;
+    content?: string | null;
+    status?: string | null;
+    metadata?: unknown;
+    // Explicit timestamp so a test can control message ORDER (the default DB
+    // now() can tie within a millisecond, and the v4 id is not time-ordered).
+    createdAt?: Date;
+  },
+): Promise<{ id: string }> {
+  const id = randomUUID();
+  const row = await db
+    .insertInto('aiChatMessages')
+    .values({
+      id,
+      workspaceId: args.workspaceId,
+      chatId: args.chatId,
+      userId: args.userId ?? null,
+      role: args.role ?? 'assistant',
+      content: args.content ?? null,
+      status: args.status ?? null,
+      metadata: (args.metadata ?? null) as any,
+      ...(args.createdAt ? { createdAt: args.createdAt } : {}),
+    })
+    .returning(['id'])
+    .executeTakeFirstOrThrow();
+  return { id: row.id as string };
+}