feat(ai-chat): persistent history as source of truth — step durability + server export (#183)

The chat lived in inconsistent paradigms (in-memory stream + client export vs. DB-as-context), which made export flaky and lost the assistant answer if the process died mid-turn. Make the DB the single source of truth. A. STEP-GRANULAR DURABILITY (server) - ai_chat_messages gains a nullable `status` column (migration; NULL = legacy = completed). The assistant row is now INSERTED UPFRONT as `status:'streaming'` and UPDATEd on every onStepFinish with all finished steps (text + tool calls + tool RESULTS), then finalized once to completed/error/aborted on the terminal callback. So a process death mid-turn keeps every finished step; a startup sweep (OnModuleInit → sweepStreaming) flips any dangling 'streaming' row to 'aborted'. The write path no longer depends on a live socket. - Pure exported `flushAssistant(steps, inProgressText, status, extra?)` builds the persist payload (metadata.parts byte-identical to the old builder), so a future background worker can call the same path. AiChatMessageRepo gains `update`, `sweepStreaming`, and `findAllByChat`. - consumeStream drain, external-MCP client close-once, SSE heartbeat preserved. B. SERVER-SIDE EXPORT - New pure `chat-markdown.util.ts` renders Markdown from DB rows ONLY (server port of the client builder). Because A persists the in-progress row, the export now includes an interrupted turn up to its last finished step (flagged "still generating"). `POST /ai-chat/export` (owner-gated via assertOwnedChat, workspace-scoped) returns it; `lang` accepts a full client locale tag ('en-US'/'ru-RU') and is normalized server-side (normalizeLang) — a strict @IsIn(['en','ru']) DTO rejected the real client's i18n.language with a 400, caught in real-browser testing. - Client: handleCopy calls the endpoint; `canExport = !!activeChatId`. The whole liveThreadRef/liveStateRef/onLiveContentChange/hasLiveContent hybrid (and the client chat-markdown util + test) is removed — the server is now authoritative. Tests: flushAssistant unit (status shapes + parts parity), chat-markdown.util unit (incl. legacy NULL-status + interrupted note + ru + normalizeLang locale tags), controller export wiring + owner-gate, integration update/sweepStreaming. Verified: server build + 318 ai-chat unit + 3 integration; client tsc + 157 ai-chat unit; and END-TO-END in a real browser — a chat turn persists mid-stream and the Copy button exports the DB-sourced markdown (showing the in-progress row), HTTP 200 after the locale fix. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-25 06:05:26 +03:00
parent 27c91e4a69
commit e7b719bbb8
19 changed files with 1500 additions and 1408 deletions
--- a/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
@@ -0,0 +1,92 @@
+import { ForbiddenException } from '@nestjs/common';
+import { AiChatController } from './ai-chat.controller';
+import type { User, Workspace } from '@docmost/db/types/entity.types';
+
+/**
+ * Wiring spec for the #183 `POST /ai-chat/export` endpoint. It must: own-gate via
+ * the chat lookup (workspace-scoped + creator-owned), load the FULL transcript
+ * via findAllByChat, render server-side, and return `{ markdown }`. Exercised by
+ * instantiating the controller with hand-rolled mocks — no Nest graph, no DB.
+ */
+describe('AiChatController.export', () => {
+  const user = { id: 'u1' } as User;
+  const workspace = { id: 'ws1' } as Workspace;
+
+  function makeController(
+    over: {
+      chat?: unknown;
+      rows?: unknown[];
+    } = {},
+  ) {
+    const chat =
+      'chat' in over
+        ? over.chat
+        : { id: 'c1', creatorId: 'u1', title: 'My chat' };
+    const aiChatRepo = {
+      findById: jest.fn().mockResolvedValue(chat),
+    };
+    const aiChatMessageRepo = {
+      findAllByChat: jest.fn().mockResolvedValue(
+        over.rows ?? [
+          {
+            id: 'm1',
+            role: 'user',
+            content: 'hi',
+            metadata: null,
+            status: null,
+          },
+          {
+            id: 'm2',
+            role: 'assistant',
+            content: 'hello',
+            metadata: null,
+            status: 'completed',
+          },
+        ],
+      ),
+    };
+    const controller = new AiChatController(
+      {} as never,
+      aiChatRepo as never,
+      aiChatMessageRepo as never,
+      {} as never,
+    );
+    return { controller, aiChatRepo, aiChatMessageRepo };
+  }
+
+  it('renders the full transcript and returns { markdown }', async () => {
+    const { controller, aiChatMessageRepo } = makeController();
+    const res = await controller.export({ chatId: 'c1' }, user, workspace);
+    expect(aiChatMessageRepo.findAllByChat).toHaveBeenCalledWith('c1', 'ws1');
+    expect(res.markdown).toContain('# My chat');
+    expect(res.markdown).toContain('## 1. You');
+    expect(res.markdown).toContain('## 2. AI agent');
+  });
+
+  it('forbids a chat the user does not own', async () => {
+    const { controller } = makeController({
+      chat: { id: 'c1', creatorId: 'someone-else', title: 'X' },
+    });
+    await expect(
+      controller.export({ chatId: 'c1' }, user, workspace),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+  });
+
+  it('forbids a missing / foreign-workspace chat', async () => {
+    const { controller } = makeController({ chat: null });
+    await expect(
+      controller.export({ chatId: 'c1' }, user, workspace),
+    ).rejects.toBeInstanceOf(ForbiddenException);
+  });
+
+  it('localizes labels when lang=ru is passed', async () => {
+    const { controller } = makeController();
+    const res = await controller.export(
+      { chatId: 'c1', lang: 'ru' },
+      user,
+      workspace,
+    );
+    expect(res.markdown).toContain('## 1. Вы');
+    expect(res.markdown).toContain('## 2. ИИ-агент');
+  });
+});
--- a/apps/server/src/core/ai-chat/ai-chat.controller.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts
@@ -20,7 +20,7 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
 import { AuthUser } from '../../common/decorators/auth-user.decorator';
 import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
 import { SkipTransform } from '../../common/decorators/skip-transform.decorator';
-import { User, Workspace } from '@docmost/db/types/entity.types';
+import { AiChat, User, Workspace } from '@docmost/db/types/entity.types';
 import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
 import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
 import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
@@ -31,10 +31,12 @@ import { AiChatService, AiChatStreamBody } from './ai-chat.service';
 import { AiTranscriptionService } from './ai-transcription.service';
 import {
  ChatIdDto,
+  ExportChatDto,
  GetChatMessagesDto,
  RenameChatDto,
 } from './dto/ai-chat.dto';
 import { describeProviderError } from '../../integrations/ai/ai-error.util';
+import { buildChatMarkdown } from './chat-markdown.util';

 /**
 * Per-user AI chat API (§6.1). Routes are POST to match this codebase's
@@ -81,6 +83,35 @@ export class AiChatController {
    );
  }

+  /**
+   * Export a chat to Markdown (#183). The DB is the single source of truth: the
+   * whole transcript is loaded (oldest -> newest) and rendered server-side. Now
+   * that the assistant row is persisted upfront and per step, an interrupted
+   * turn is included up to its last finished step. Workspace-scoped and owner-
+   * gated via assertOwnedChat (same as the other read endpoints). Returns
+   * `{ markdown }`. `lang` localizes the few fixed labels (default English).
+   */
+  @HttpCode(HttpStatus.OK)
+  @Post('export')
+  async export(
+    @Body() dto: ExportChatDto,
+    @AuthUser() user: User,
+    @AuthWorkspace() workspace: Workspace,
+  ): Promise<{ markdown: string }> {
+    const chat = await this.assertOwnedChat(dto.chatId, user, workspace);
+    const rows = await this.aiChatMessageRepo.findAllByChat(
+      dto.chatId,
+      workspace.id,
+    );
+    const markdown = buildChatMarkdown({
+      title: chat.title ?? null,
+      chatId: dto.chatId,
+      rows,
+      lang: dto.lang ?? 'en',
+    });
+    return { markdown };
+  }
+
  /** Rename a chat. */
  @HttpCode(HttpStatus.OK)
  @Post('rename')
@@ -90,7 +121,11 @@ export class AiChatController {
    @AuthWorkspace() workspace: Workspace,
  ) {
    await this.assertOwnedChat(dto.chatId, user, workspace);
-    await this.aiChatRepo.update(dto.chatId, { title: dto.title }, workspace.id);
+    await this.aiChatRepo.update(
+      dto.chatId,
+      { title: dto.title },
+      workspace.id,
+    );
    return { success: true };
  }

@@ -145,7 +180,10 @@ export class AiChatController {
    // Resolve the agent role for this turn BEFORE hijack: existing chats read it
    // from ai_chats.role_id (authoritative), a new chat from body.roleId. The
    // role drives both the persona and the optional model override below.
-    const role = await this.aiChatService.resolveRoleForRequest(workspace, body);
+    const role = await this.aiChatService.resolveRoleForRequest(
+      workspace,
+      body,
+    );

    // Resolve the model (applying the role's optional override) BEFORE hijack so
    // an unconfigured provider — including a role pointing at an unconfigured
@@ -232,7 +270,9 @@ export class AiChatController {
    let file = null;
    try {
      // Whisper hard-caps uploads at 25MB; allow a single file.
-      file = await req.file({ limits: { fileSize: 25 * 1024 * 1024, files: 1 } });
+      file = await req.file({
+        limits: { fileSize: 25 * 1024 * 1024, files: 1 },
+      });
    } catch (err: any) {
      if (err?.statusCode === 413) {
        throw new BadRequestException('Audio file too large (max 25MB)');
@@ -283,11 +323,12 @@ export class AiChatController {
    chatId: string,
    user: User,
    workspace: Workspace,
-  ): Promise<void> {
+  ): Promise<AiChat> {
    const chat = await this.aiChatRepo.findById(chatId, workspace.id);
    if (!chat || chat.creatorId !== user.id) {
      throw new ForbiddenException();
    }
+    return chat;
  }
 }

--- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
@@ -5,6 +5,7 @@ import {
  rowToUiMessage,
  prepareAgentStep,
  buildPartialAssistantRecord,
+  flushAssistant,
  chatStreamMetadata,
  accumulateStepUsage,
  MAX_AGENT_STEPS,
@@ -94,8 +95,12 @@ describe('assistantParts', () => {
    const steps = [
      {
        text: '',
-        toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
-        toolResults: [{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } }],
+        toolCalls: [
+          { toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
+        ],
+        toolResults: [
+          { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
+        ],
      },
    ];
    const parts = assistantParts(steps, '') as AnyPart[];
@@ -109,7 +114,9 @@ describe('assistantParts', () => {
    const steps = [
      {
        text: '',
-        toolCalls: [{ toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } }],
+        toolCalls: [
+          { toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } },
+        ],
        toolResults: [],
      },
    ];
@@ -136,7 +143,8 @@ describe('assistantParts', () => {
    ];
    const parts = assistantParts(steps, '') as AnyPart[];
    const toolParts = parts.filter(
-      (p) => typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
+      (p) =>
+        typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
    );
    expect(toolParts).toHaveLength(0);
  });
@@ -246,16 +254,30 @@ describe('buildPartialAssistantRecord', () => {
  type AnyPart = Record<string, unknown>;

  it('records an empty turn with the error text (preserves old behavior)', () => {
-    const rec = buildPartialAssistantRecord([], '', 'error', '401: Unauthorized');
+    const rec = buildPartialAssistantRecord(
+      [],
+      '',
+      'error',
+      '401: Unauthorized',
+    );
    expect(rec).toEqual({
      text: '',
      toolCalls: null,
-      metadata: { finishReason: 'error', parts: [], error: '401: Unauthorized' },
+      metadata: {
+        finishReason: 'error',
+        parts: [],
+        error: '401: Unauthorized',
+      },
    });
  });

  it('persists in-progress text (no finished steps) as the partial answer', () => {
-    const rec = buildPartialAssistantRecord([], 'partial answer', 'error', 'boom');
+    const rec = buildPartialAssistantRecord(
+      [],
+      'partial answer',
+      'error',
+      'boom',
+    );
    expect(rec.text).toBe('partial answer');
    expect(rec.metadata.parts).toEqual([
      { type: 'text', text: 'partial answer' },
@@ -275,7 +297,12 @@ describe('buildPartialAssistantRecord', () => {
        ],
      },
    ];
-    const rec = buildPartialAssistantRecord(steps, ' and then', 'error', 'boom');
+    const rec = buildPartialAssistantRecord(
+      steps,
+      ' and then',
+      'error',
+      'boom',
+    );
    const parts = rec.metadata.parts as AnyPart[];
    // The finished step's text part is present.
    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
@@ -284,7 +311,10 @@ describe('buildPartialAssistantRecord', () => {
    expect(toolPart).toBeDefined();
    expect(toolPart!.state).toBe('output-available');
    // The in-progress text is appended LAST so the parts match the stream order.
-    expect(parts[parts.length - 1]).toEqual({ type: 'text', text: ' and then' });
+    expect(parts[parts.length - 1]).toEqual({
+      type: 'text',
+      text: ' and then',
+    });
    expect(rec.text).toBe('looked it up and then');
    expect(rec.toolCalls).not.toBeNull();
    expect(rec.metadata.error).toBe('boom');
@@ -298,6 +328,107 @@ describe('buildPartialAssistantRecord', () => {
  });
 });

+/**
+ * flushAssistant (#183): the PURE row builder behind the step-granular durable
+ * write path. It runs identically for the upfront insert (empty steps,
+ * 'streaming'), every per-step update, and the terminal finalize — so a future
+ * background worker can call the same function. These tests pin the four status
+ * shapes and, critically, that `metadata.parts` stays IDENTICAL to the old
+ * buildPartialAssistantRecord / assistantParts output (rowToUiMessage/findRecent
+ * depend on it).
+ */
+describe('flushAssistant', () => {
+  type AnyPart = Record<string, unknown>;
+
+  const toolStep = {
+    text: 'looked it up',
+    toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
+    toolResults: [
+      { toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
+    ],
+  };
+
+  it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
+    const f = flushAssistant([], '', 'streaming');
+    expect(f.status).toBe('streaming');
+    expect(f.content).toBe('');
+    expect(f.toolCalls).toBeNull();
+    expect(f.metadata.parts).toEqual([]);
+    // No finishReason while streaming (it is not a terminal state).
+    expect('finishReason' in f.metadata).toBe(false);
+  });
+
+  it('streaming update folds in finished steps but keeps status streaming', () => {
+    const f = flushAssistant([toolStep], '', 'streaming');
+    expect(f.status).toBe('streaming');
+    expect(f.content).toBe('looked it up');
+    const parts = f.metadata.parts as AnyPart[];
+    expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
+    const toolPart = parts.find((p) => p.type === 'tool-getPage');
+    expect(toolPart!.state).toBe('output-available');
+    expect(f.toolCalls).not.toBeNull();
+  });
+
+  it('completed: attaches finishReason + normalized usage + contextTokens', () => {
+    const f = flushAssistant([toolStep], '', 'completed', {
+      finishReason: 'stop',
+      usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
+      contextTokens: 15,
+    });
+    expect(f.status).toBe('completed');
+    expect(f.metadata.finishReason).toBe('stop');
+    expect(f.metadata.usage).toEqual({
+      inputTokens: 10,
+      outputTokens: 5,
+      totalTokens: 15,
+      reasoningTokens: undefined,
+    });
+    expect(f.metadata.contextTokens).toBe(15);
+  });
+
+  it('error: records the error and a derived finishReason', () => {
+    const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
+    expect(f.status).toBe('error');
+    expect(f.content).toBe('partial answer');
+    expect(f.metadata.error).toBe('boom');
+    // Derives finishReason from the terminal status when none is supplied.
+    expect(f.metadata.finishReason).toBe('error');
+    expect(f.metadata.parts).toEqual([
+      { type: 'text', text: 'partial answer' },
+    ]);
+  });
+
+  it('aborted: in-progress text appended last, no error key', () => {
+    const f = flushAssistant([toolStep], ' and then', 'aborted');
+    expect(f.status).toBe('aborted');
+    expect(f.metadata.finishReason).toBe('aborted');
+    expect('error' in f.metadata).toBe(false);
+    expect(f.content).toBe('looked it up and then');
+    const parts = f.metadata.parts as AnyPart[];
+    expect(parts[parts.length - 1]).toEqual({
+      type: 'text',
+      text: ' and then',
+    });
+  });
+
+  it('metadata.parts parity with buildPartialAssistantRecord (error path)', () => {
+    const flushed = flushAssistant([toolStep], ' and then', 'error', {
+      error: 'boom',
+    });
+    const legacy = buildPartialAssistantRecord(
+      [toolStep],
+      ' and then',
+      'error',
+      'boom',
+    );
+    // The whole metadata block (parts + finishReason + error) must match the
+    // legacy partial-record shape so rebuilt history is unchanged.
+    expect(flushed.metadata).toEqual(legacy.metadata);
+    expect(flushed.content).toBe(legacy.text);
+    expect(flushed.toolCalls).toEqual(legacy.toolCalls);
+  });
+});
+
 /**
 * chatStreamMetadata: attach metadata to the streamed assistant UI message per
 * part type — `chatId` on `start` (so the client adopts the real created chat id
@@ -319,10 +450,20 @@ describe('chatStreamMetadata', () => {
      chatStreamMetadata(
        { type: 'finish-step', usage: { outputTokens: 100 } },
        'chat-1',
-        { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 },
+        {
+          inputTokens: 500,
+          outputTokens: 220,
+          totalTokens: 720,
+          reasoningTokens: 30,
+        },
      ),
    ).toEqual({
-      usage: { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 },
+      usage: {
+        inputTokens: 500,
+        outputTokens: 220,
+        totalTokens: 720,
+        reasoningTokens: 30,
+      },
    });
  });

@@ -394,8 +535,18 @@ describe('accumulateStepUsage', () => {
  it('sums every field across two steps', () => {
    expect(
      accumulateStepUsage(
-        { inputTokens: 500, outputTokens: 100, totalTokens: 600, reasoningTokens: 30 },
-        { inputTokens: 520, outputTokens: 80, totalTokens: 600, reasoningTokens: 10 },
+        {
+          inputTokens: 500,
+          outputTokens: 100,
+          totalTokens: 600,
+          reasoningTokens: 30,
+        },
+        {
+          inputTokens: 520,
+          outputTokens: 80,
+          totalTokens: 600,
+          reasoningTokens: 10,
+        },
      ),
    ).toEqual({
      inputTokens: 1020,
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -1,4 +1,9 @@
-import { ForbiddenException, Injectable, Logger } from '@nestjs/common';
+import {
+  ForbiddenException,
+  Injectable,
+  Logger,
+  OnModuleInit,
+} from '@nestjs/common';
 import { FastifyReply } from 'fastify';
 import {
  streamText,
@@ -60,7 +65,10 @@ export function prepareAgentStep(
  system: string,
 ): { toolChoice: 'none'; system: string } | undefined {
  if (stepNumber >= MAX_AGENT_STEPS - 1) {
-    return { toolChoice: 'none', system: `${system}\n\n${FINAL_STEP_INSTRUCTION}` };
+    return {
+      toolChoice: 'none',
+      system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`,
+    };
  }
  return undefined;
 }
@@ -121,7 +129,7 @@ export interface AiChatStreamArgs {
 *                    can be rebuilt for `convertToModelMessages`.
 */
@Injectable()
-export class AiChatService {
+export class AiChatService implements OnModuleInit {
  private readonly logger = new Logger(AiChatService.name);

  constructor(
@@ -136,6 +144,32 @@ export class AiChatService {
    private readonly pageAccess: PageAccessService,
  ) {}

+  /**
+   * Crash-recovery sweep on server start (#183): any assistant row left in the
+   * 'streaming' state is the relic of a turn whose process died before it
+   * reached a terminal status. Flip those to 'aborted' so history/export show
+   * them settled (with whatever finished steps were already persisted) instead
+   * of perpetually "streaming". Best-effort: a sweep failure is logged but must
+   * never block server startup.
+   */
+  async onModuleInit(): Promise<void> {
+    try {
+      const swept = await this.aiChatMessageRepo.sweepStreaming();
+      if (swept > 0) {
+        this.logger.log(
+          `Startup sweep: marked ${swept} dangling 'streaming' assistant ` +
+            `message(s) as 'aborted'.`,
+        );
+      }
+    } catch (err) {
+      this.logger.warn(
+        `Startup sweep of dangling 'streaming' messages failed: ${
+          err instanceof Error ? err.message : 'unknown error'
+        }`,
+      );
+    }
+  }
+
  /**
   * Resolve the agent role that applies to this stream request, scoped to the
   * workspace and soft-delete aware. For an EXISTING chat the role is read from
@@ -259,9 +293,7 @@ export class AiChatService {
      content: incomingText,
      // jsonb column: UIMessage parts are JSON-serializable at runtime but not
      // structurally `JsonValue`, so cast through unknown.
-      metadata: (incoming?.parts
-        ? { parts: incoming.parts }
-        : null) as never,
+      metadata: (incoming?.parts ? { parts: incoming.parts } : null) as never,
    });

    // Rebuild the conversation from persisted history (not the client payload),
@@ -347,31 +379,6 @@ export class AiChatService {
      );
    };

-    // Persist the assistant message. Used by onFinish (full result) and the
-    // abort/error paths (partial result). Guarded so we persist at most once.
-    let persisted = false;
-    const persistAssistant = async (data: {
-      text: string;
-      toolCalls: unknown;
-      metadata: Record<string, unknown>;
-    }): Promise<void> => {
-      if (persisted) return;
-      persisted = true;
-      try {
-        await this.aiChatMessageRepo.insert({
-          chatId,
-          workspaceId: workspace.id,
-          userId: user.id,
-          role: 'assistant',
-          content: data.text ?? '',
-          toolCalls: (data.toolCalls ?? null) as never,
-          metadata: data.metadata as never,
-        });
-      } catch (err) {
-        this.logger.error('Failed to persist assistant message', err as Error);
-      }
-    };
-
    // Accumulate the turn's streamed output so a provider error / disconnect can
    // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
    // callbacks don't hand us the in-progress text. `capturedSteps` holds finished
@@ -380,6 +387,94 @@ export class AiChatService {
    const capturedSteps: StepLike[] = [];
    let inProgressText = '';

+    // Step-granular durability (#183): create the assistant row UPFRONT in the
+    // 'streaming' state (before any token), then UPDATE it as each step finishes
+    // and finalize it once on the terminal callback. If the process dies
+    // mid-turn the row survives with every finished step already persisted; the
+    // startup sweep (sweepStreaming) later flips a dangling 'streaming' row to
+    // 'aborted'. The DB is now the single source of truth for the turn — the
+    // socket is never required for the write path. A failed upfront insert is
+    // logged and leaves assistantId undefined; the per-step/terminal updates then
+    // no-op (guarded below) so the turn still streams to the user.
+    let assistantId: string | undefined;
+    try {
+      const seed = flushAssistant([], '', 'streaming');
+      const seeded = await this.aiChatMessageRepo.insert({
+        chatId,
+        workspaceId: workspace.id,
+        userId: user.id,
+        role: 'assistant',
+        content: seed.content,
+        // jsonb columns: cast through never (same as the user insert above).
+        toolCalls: (seed.toolCalls ?? null) as never,
+        metadata: seed.metadata as never,
+        status: seed.status,
+      });
+      assistantId = seeded?.id;
+    } catch (err) {
+      this.logger.error('Failed to insert upfront assistant row', err as Error);
+    }
+
+    // Per-step (non-terminal) update: persist the finished steps the moment a
+    // step ends. Tolerant — a failed update is logged and swallowed so it never
+    // throws into the stream. Keeps status 'streaming'.
+    const updateStreaming = async (): Promise<void> => {
+      if (!assistantId) return;
+      try {
+        await this.aiChatMessageRepo.update(
+          assistantId,
+          workspace.id,
+          flushAssistant(capturedSteps, '', 'streaming'),
+        );
+      } catch (err) {
+        this.logger.warn(
+          `Failed to update streaming assistant row: ${
+            err instanceof Error ? err.message : 'unknown error'
+          }`,
+        );
+      }
+    };
+
+    // Terminal finalize: write the completed/error/aborted row exactly once
+    // across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort
+    // callbacks — mirroring the pre-#183 persist-at-most-once guard for the
+    // TERMINAL status (the row may be updated many times with 'streaming' before
+    // this fires once).
+    let finalized = false;
+    const finalizeAssistant = async (
+      flushed: AssistantFlush,
+    ): Promise<void> => {
+      if (finalized) return;
+      finalized = true;
+      if (!assistantId) {
+        // The upfront insert failed: fall back to inserting the terminal row so
+        // the turn is not lost entirely.
+        try {
+          await this.aiChatMessageRepo.insert({
+            chatId,
+            workspaceId: workspace.id,
+            userId: user.id,
+            role: 'assistant',
+            content: flushed.content,
+            toolCalls: (flushed.toolCalls ?? null) as never,
+            metadata: flushed.metadata as never,
+            status: flushed.status,
+          });
+        } catch (err) {
+          this.logger.error(
+            'Failed to persist terminal assistant message',
+            err as Error,
+          );
+        }
+        return;
+      }
+      try {
+        await this.aiChatMessageRepo.update(assistantId, workspace.id, flushed);
+      } catch (err) {
+        this.logger.error('Failed to finalize assistant message', err as Error);
+      }
+    };
+
    // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
    // first-chunk latency, the model-silent gap right before a disconnect, and
    // how many SSE heartbeats were written, so a Safari drop can be classified
@@ -395,144 +490,141 @@ export class AiChatService {
    let result: ReturnType<typeof streamText>;
    try {
      result = streamText({
-      model,
-      system,
-      messages,
-      tools,
-      // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full
-      // page body for the write tools) are emitted as OUTPUT tokens, so a fixed
-      // cap would truncate complex tool calls mid-argument. Let the model use its
-      // natural per-step budget. (Cost/credit limits are an account concern, not
-      // something to enforce by silently breaking the agent.)
-      stopWhen: stepCountIs(MAX_AGENT_STEPS),
-      // Forced finalization: reserve the LAST allowed step for a text-only
-      // answer. Without this, a turn that spends all its steps on tool calls
-      // ends with no assistant text (an empty turn). prepareAgentStep forbids
-      // further tool calls and appends a synthesis instruction on that step,
-      // concatenated onto the original `system` so the persona is preserved.
-      prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
-      abortSignal: signal,
-      onChunk: ({ chunk }) => {
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
-        // output chunk means the stream is actively emitting bytes; track first
-        // + most-recent activity timestamps.
-        const now = Date.now();
-        firstModelChunkAt ??= now;
-        lastModelChunkAt = now;
-        // 'text-delta' is the assistant's prose; tool-call args are separate chunk
-        // types — so this mirrors exactly what streams to the client.
-        if (chunk.type === 'text-delta') inProgressText += chunk.text;
-      },
-      onStepFinish: (step) => {
-        // The finished step's full text is now in `step.text`; fold it in and reset
-        // the in-progress accumulator for the next step.
-        capturedSteps.push(step as StepLike);
-        inProgressText = '';
-      },
-      onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
-        // baseline for Safari comparison.
-        const diagNow = Date.now();
-        this.logger.log(
-          `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
-            `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
-            `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
-        );
-        await persistAssistant({
-          text,
-          toolCalls: serializeSteps(steps),
-          metadata: {
-            finishReason,
-            // Persist the turn's cumulative usage WITH reasoning tokens resolved
-            // from either the new `outputTokenDetails` or the deprecated top-level
-            // field, so reopened history / the Markdown export show the thinking
-            // token cost too.
-            usage: normalizeStreamUsage(totalUsage as StreamUsage) ?? totalUsage,
-            // Final-step usage = the context actually fed to the model on the last LLM
-            // call (full history + tool results) plus the answer it just generated.
-            // input+output of the FINAL step ≈ the conversation's CURRENT context size,
-            // distinct from totalUsage which sums every step (cumulative tokens spent).
-            contextTokens:
-              (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
-            // Persist the FULL set of UIMessage parts for the turn (text +
-            // tool-call/result), so the rebuilt history replays prior tool
-            // context to the model on later turns.
-            parts: assistantParts(steps, text),
-          },
-        });
-        // Lifecycle: release the external MCP clients leased for this turn.
-        await closeExternalClients();
-
-        // Generate the chat title for a freshly created chat AFTER the stream's
-        // provider call has completed — NOT concurrently with it. The z.ai coding
-        // endpoint stalls one of two concurrent requests to the same plan, which
-        // black-holed the chat stream (~300s headers timeout) when title
-        // generation raced it. Running it here (solo, fire-and-forget) avoids the
-        // race; never block the turn on it, swallow any error.
-        if (isNewChat && incomingText) {
-          void this.generateTitle(chatId, workspace.id, incomingText).catch(
-            (err) => {
-              this.logger.warn(
-                `Title generation failed: ${(err as Error)?.message ?? err}`,
-              );
-            },
+        model,
+        system,
+        messages,
+        tools,
+        // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full
+        // page body for the write tools) are emitted as OUTPUT tokens, so a fixed
+        // cap would truncate complex tool calls mid-argument. Let the model use its
+        // natural per-step budget. (Cost/credit limits are an account concern, not
+        // something to enforce by silently breaking the agent.)
+        stopWhen: stepCountIs(MAX_AGENT_STEPS),
+        // Forced finalization: reserve the LAST allowed step for a text-only
+        // answer. Without this, a turn that spends all its steps on tool calls
+        // ends with no assistant text (an empty turn). prepareAgentStep forbids
+        // further tool calls and appends a synthesis instruction on that step,
+        // concatenated onto the original `system` so the persona is preserved.
+        prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
+        abortSignal: signal,
+        onChunk: ({ chunk }) => {
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
+          // output chunk means the stream is actively emitting bytes; track first
+          // + most-recent activity timestamps.
+          const now = Date.now();
+          firstModelChunkAt ??= now;
+          lastModelChunkAt = now;
+          // 'text-delta' is the assistant's prose; tool-call args are separate chunk
+          // types — so this mirrors exactly what streams to the client.
+          if (chunk.type === 'text-delta') inProgressText += chunk.text;
+        },
+        onStepFinish: (step) => {
+          // The finished step's full text is now in `step.text`; fold it in and reset
+          // the in-progress accumulator for the next step.
+          capturedSteps.push(step as StepLike);
+          inProgressText = '';
+          // Step-granular durability (#183): persist this finished step (its text +
+          // tool calls + tool RESULTS) the moment it ends, so a process death after
+          // this point still recovers the step. Fire-and-forget but error-tolerant
+          // (updateStreaming logs + swallows) — never throw into the stream.
+          void updateStreaming();
+        },
+        onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
+          // baseline for Safari comparison.
+          const diagNow = Date.now();
+          this.logger.log(
+            `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
+              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
+              `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
          );
-        }
-      },
-      onError: async ({ error }) => {
-        // NestJS Logger.error(message, stack?, context?): pass the real message
-        // (with statusCode when present) + the stack string, not the Error
-        // object, so the actual provider cause is clearly logged. Reuse the
-        // shared formatter so provider error formatting stays unified.
-        const e = error as { stack?: string };
-        const errorText = describeProviderError(error, String(error));
-        this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
-        // an error-terminated stream.
-        const diagNow = Date.now();
-        this.logger.warn(
-          `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
-            `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
-            `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
-        );
-        // Persist the PARTIAL answer streamed before the failure (text + any
-        // finished tool steps) WITH the error in metadata, so the turn shows what
-        // the user already saw plus the cause — not just a bare error.
-        await persistAssistant(
-          buildPartialAssistantRecord(
-            capturedSteps,
-            inProgressText,
-            'error',
-            errorText,
-          ),
-        );
-        await closeExternalClients();
-      },
-      onAbort: async ({ steps }) => {
-        const partialChars =
-          capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) +
-          inProgressText.length;
-        // Unlike onError/onFinish, this terminal path otherwise writes nothing, so
-        // an aborted turn (client disconnect / proxy drop / stop()) would be
-        // invisible in the logs. Log it (warn) so the abort is traceable.
-        this.logger.warn(
-          `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
-            `step(s), ${partialChars} chars partial text; persisting partial turn.`,
-        );
-        // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
-        // line — classifies the Safari drop.
-        const diagNow = Date.now();
-        this.logger.warn(
-          `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
-            `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
-            `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
-            `steps=${steps.length}`,
-        );
-        await persistAssistant(
-          buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
-        );
-        await closeExternalClients();
-      },
+          // Finalize the assistant row (#183): the upfront 'streaming' row is
+          // UPDATEd to 'completed' with the turn's final text, cumulative usage and
+          // full UIMessage parts. We pass the SDK `steps` (which carry the final
+          // step's text) as the captured steps so metadata.parts matches the
+          // pre-#183 onFinish record exactly; `inProgressText` is '' here (the last
+          // step already finished). Final-step usage (usage.input+output) ≈ the
+          // conversation's CURRENT context size, distinct from totalUsage.
+          await finalizeAssistant(
+            flushAssistant(steps as StepLike[], '', 'completed', {
+              finishReason: finishReason as string,
+              usage: totalUsage as StreamUsage,
+              contextTokens:
+                (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
+                undefined,
+            }),
+          );
+          // Lifecycle: release the external MCP clients leased for this turn.
+          await closeExternalClients();
+
+          // Generate the chat title for a freshly created chat AFTER the stream's
+          // provider call has completed — NOT concurrently with it. The z.ai coding
+          // endpoint stalls one of two concurrent requests to the same plan, which
+          // black-holed the chat stream (~300s headers timeout) when title
+          // generation raced it. Running it here (solo, fire-and-forget) avoids the
+          // race; never block the turn on it, swallow any error.
+          if (isNewChat && incomingText) {
+            void this.generateTitle(chatId, workspace.id, incomingText).catch(
+              (err) => {
+                this.logger.warn(
+                  `Title generation failed: ${(err as Error)?.message ?? err}`,
+                );
+              },
+            );
+          }
+        },
+        onError: async ({ error }) => {
+          // NestJS Logger.error(message, stack?, context?): pass the real message
+          // (with statusCode when present) + the stack string, not the Error
+          // object, so the actual provider cause is clearly logged. Reuse the
+          // shared formatter so provider error formatting stays unified.
+          const e = error as { stack?: string };
+          const errorText = describeProviderError(error, String(error));
+          this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
+          // an error-terminated stream.
+          const diagNow = Date.now();
+          this.logger.warn(
+            `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
+              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
+              `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
+          );
+          // Finalize the PARTIAL answer streamed before the failure (text + any
+          // finished tool steps) WITH the error in metadata, so the turn shows what
+          // the user already saw plus the cause — not just a bare error. Status
+          // 'error' (#183).
+          await finalizeAssistant(
+            flushAssistant(capturedSteps, inProgressText, 'error', {
+              error: errorText,
+            }),
+          );
+          await closeExternalClients();
+        },
+        onAbort: async ({ steps }) => {
+          const partialChars =
+            capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) +
+            inProgressText.length;
+          // Unlike onError/onFinish, this terminal path otherwise writes nothing, so
+          // an aborted turn (client disconnect / proxy drop / stop()) would be
+          // invisible in the logs. Log it (warn) so the abort is traceable.
+          this.logger.warn(
+            `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
+              `step(s), ${partialChars} chars partial text; persisting partial turn.`,
+          );
+          // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
+          // line — classifies the Safari drop.
+          const diagNow = Date.now();
+          this.logger.warn(
+            `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
+              `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
+              `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
+              `steps=${steps.length}`,
+          );
+          await finalizeAssistant(
+            flushAssistant(capturedSteps, inProgressText, 'aborted'),
+          );
+          await closeExternalClients();
+        },
      });

      // Drain the stream independently of the client socket so the turn always
@@ -652,7 +744,10 @@ export class AiChatService {
        'punctuation at the end.',
      prompt: firstMessage.slice(0, 2000),
    });
-    const title = text.trim().replace(/^["']|["']$/g, '').slice(0, 120);
+    const title = text
+      .trim()
+      .replace(/^["']|["']$/g, '')
+      .slice(0, 120);
    if (title) {
      await this.aiChatRepo.update(chatId, { title }, workspaceId);
    }
@@ -974,6 +1069,82 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
  return { id: row.id, role, parts: parts as UIMessage['parts'] };
 }

+/**
+ * The persisted-row patch shape produced by {@link flushAssistant}. It is the
+ * SAME shape the assistant repo insert/update consume (content + toolCalls +
+ * metadata) plus the lifecycle `status` column added in #183.
+ */
+export interface AssistantFlush {
+  content: string;
+  toolCalls: unknown;
+  metadata: Record<string, unknown>;
+  status: 'streaming' | 'completed' | 'error' | 'aborted';
+}
+
+/**
+ * PURE assistant-row builder (#183 step-granular durability). Given the turn's
+ * accumulated steps + the in-progress (not-yet-finished) text + the lifecycle
+ * status, it returns the row patch to persist. The SAME path runs for the
+ * upfront insert (empty steps, status 'streaming'), every per-step update, and
+ * the terminal finalize (completed/error/aborted) — and a future background
+ * worker can call it identically, so it must stay a pure function of its inputs
+ * (NO `this`, no IO).
+ *
+ * `metadata.parts` is built by the EXACT same logic the old
+ * buildPartialAssistantRecord used (assistantParts over finished steps, then the
+ * in-progress text appended as a trailing text part), so rowToUiMessage /
+ * findRecent keep replaying the turn unchanged. `metadata.finishReason`,
+ * `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
+ * only when provided/relevant, matching the pre-#183 onFinish/onError records.
+ */
+export function flushAssistant(
+  capturedSteps: ReadonlyArray<StepLike> | undefined,
+  inProgressText: string,
+  status: 'streaming' | 'completed' | 'error' | 'aborted',
+  extra?: {
+    finishReason?: string;
+    usage?: ChatStreamUsage | StreamUsage | undefined;
+    contextTokens?: number;
+    error?: string;
+  },
+): AssistantFlush {
+  const finished = capturedSteps ?? [];
+  const stepsText = finished.map((s) => s.text ?? '').join('');
+  const trailing = inProgressText ?? '';
+  // assistantParts emits text parts only for FINISHED steps; append the
+  // in-progress step's text (the partial answer cut off by an error/abort, or
+  // simply not yet flushed mid-stream) as the last text part so the persisted
+  // parts match what streamed to the client.
+  const parts = assistantParts(finished, '') as unknown as Array<
+    Record<string, unknown>
+  >;
+  if (trailing) parts.push({ type: 'text', text: trailing });
+
+  const metadata: Record<string, unknown> = {
+    parts: parts as unknown as UIMessage['parts'],
+  };
+  // finishReason: prefer an explicit one; else derive a sensible value from the
+  // terminal status (so onError/onAbort records keep their historical reason).
+  if (extra?.finishReason) {
+    metadata.finishReason = extra.finishReason;
+  } else if (status === 'error' || status === 'aborted') {
+    metadata.finishReason = status;
+  }
+  if (extra?.usage !== undefined) {
+    metadata.usage =
+      normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
+  }
+  if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
+  if (extra?.error) metadata.error = extra.error;
+
+  return {
+    content: stepsText + trailing,
+    toolCalls: serializeSteps(finished),
+    metadata,
+    status,
+  };
+}
+
 /**
 * Build the assistant-message record persisted on a partial/failed turn (the
 * streamText onError / onAbort paths). Captures the partial answer the user
@@ -982,6 +1153,9 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
 * it is recorded in metadata.error so the cause shows in history; an aborted
 * turn passes none. Pure, so the partial-recording shape is unit-testable
 * without seaming streamText.
+ *
+ * Thin wrapper over {@link flushAssistant} (retained for the existing unit
+ * tests and its historical `{ text, toolCalls, metadata }` shape).
 */
 export function buildPartialAssistantRecord(
  steps: ReadonlyArray<StepLike> | undefined,
@@ -989,24 +1163,13 @@ export function buildPartialAssistantRecord(
  finishReason: 'error' | 'aborted',
  errorText?: string,
 ): { text: string; toolCalls: unknown; metadata: Record<string, unknown> } {
-  const finished = steps ?? [];
-  const stepsText = finished.map((s) => s.text ?? '').join('');
-  const trailing = inProgressText ?? '';
-  // assistantParts emits text parts only for FINISHED steps; append the
-  // in-progress step's text (the answer cut off by the error) as the last text
-  // part so the persisted parts match what streamed to the client.
-  const parts = assistantParts(finished, '') as unknown as Array<
-    Record<string, unknown>
-  >;
-  if (trailing) parts.push({ type: 'text', text: trailing });
+  const flushed = flushAssistant(steps, inProgressText, finishReason, {
+    error: errorText,
+  });
  return {
-    text: stepsText + trailing,
-    toolCalls: serializeSteps(finished),
-    metadata: {
-      finishReason,
-      parts: parts as unknown as UIMessage['parts'],
-      ...(errorText ? { error: errorText } : {}),
-    },
+    text: flushed.content,
+    toolCalls: flushed.toolCalls,
+    metadata: flushed.metadata,
  };
 }

--- a/apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
+++ b/apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
@@ -0,0 +1,221 @@
+import { buildChatMarkdown, normalizeLang } from './chat-markdown.util';
+import type { AiChatMessage } from '@docmost/db/types/entity.types';
+
+/**
+ * normalizeLang: the client sends `i18n.language` — a FULL locale tag like
+ * 'en-US' / 'ru-RU', NOT a bare 'en'/'ru'. A `@IsIn(['en','ru'])` DTO rejected
+ * that with a 400 (caught in real-browser testing); the export now accepts any
+ * string and normalizes here. Guards that regression.
+ */
+describe('normalizeLang', () => {
+  it("maps any 'ru…' locale tag to ru", () => {
+    expect(normalizeLang('ru')).toBe('ru');
+    expect(normalizeLang('ru-RU')).toBe('ru');
+    expect(normalizeLang('RU-ru')).toBe('ru');
+  });
+
+  it('maps everything else (incl. region-qualified English) to en', () => {
+    expect(normalizeLang('en')).toBe('en');
+    expect(normalizeLang('en-US')).toBe('en');
+    expect(normalizeLang('fr-FR')).toBe('en');
+    expect(normalizeLang(undefined)).toBe('en');
+    expect(normalizeLang('')).toBe('en');
+  });
+});
+
+/**
+ * Unit tests for the SERVER Markdown export (#183). Mirrors the coverage of the
+ * (now-removed) client chat-markdown tests: heading/metadata, role labels, text
+ * + tool blocks, token footers, the interrupted-turn note, and NULL-status
+ * (legacy) rows. The export embeds a live `new Date().toISOString()` timestamp;
+ * we never assert it, only the deterministic structure.
+ */
+
+function row(partial: Partial<AiChatMessage>): AiChatMessage {
+  return {
+    id: partial.id ?? 'id',
+    chatId: partial.chatId ?? 'chat-1',
+    workspaceId: partial.workspaceId ?? 'ws-1',
+    userId: partial.userId ?? null,
+    role: partial.role ?? 'user',
+    content: partial.content ?? null,
+    toolCalls: partial.toolCalls ?? null,
+    metadata: partial.metadata ?? null,
+    status: partial.status ?? null,
+    createdAt: partial.createdAt ?? ('2026-06-21T00:00:00.000Z' as never),
+    updatedAt: partial.updatedAt ?? ('2026-06-21T00:00:00.000Z' as never),
+    deletedAt: partial.deletedAt ?? null,
+  } as AiChatMessage;
+}
+
+describe('buildChatMarkdown (server) — structure', () => {
+  it('emits the title heading, chat id and message count', () => {
+    const md = buildChatMarkdown({
+      title: 'My chat',
+      chatId: 'chat-123',
+      rows: [],
+    });
+    expect(md).toContain('# My chat');
+    expect(md).toContain('- Chat ID: `chat-123`');
+    expect(md).toContain('- Messages: 0');
+  });
+
+  it('falls back to "Untitled chat" with no title (en)', () => {
+    const md = buildChatMarkdown({ title: null, chatId: 'c', rows: [] });
+    expect(md).toContain('# Untitled chat');
+  });
+
+  it('localizes fixed labels with lang=ru (structure stays English)', () => {
+    const md = buildChatMarkdown({
+      title: null,
+      chatId: 'c',
+      lang: 'ru',
+      rows: [row({ role: 'assistant', content: 'hi' })],
+    });
+    expect(md).toContain('# Без названия');
+    expect(md).toContain('## 1. ИИ-агент');
+    // Structural words remain English.
+    expect(md).toContain('- Chat ID:');
+  });
+
+  it('numbers messages and labels roles (You / AI agent)', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({ role: 'user', content: 'question' }),
+        row({ role: 'assistant', content: 'answer' }),
+      ],
+    });
+    expect(md).toContain('## 1. You');
+    expect(md).toContain('question');
+    expect(md).toContain('## 2. AI agent');
+    expect(md).toContain('answer');
+  });
+
+  it('renders a tool part with fenced input/output and the friendly label', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: 'done',
+          metadata: {
+            parts: [
+              {
+                type: 'tool-getPage',
+                state: 'output-available',
+                input: { id: 'p1' },
+                output: { title: 'Hello' },
+              },
+              { type: 'text', text: 'done' },
+            ],
+          } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('**Tool: Read page** (`getPage`) — done');
+    expect(md).toContain('Input:');
+    expect(md).toContain('"id": "p1"');
+    expect(md).toContain('Output:');
+    expect(md).toContain('"title": "Hello"');
+  });
+
+  it('emits a token footer + total when usage is present', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: 'a',
+          metadata: {
+            usage: {
+              inputTokens: 100,
+              outputTokens: 20,
+              totalTokens: 120,
+              reasoningTokens: 8,
+            },
+          } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('- Total tokens: 120');
+    expect(md).toContain(
+      '_Tokens — in: 100, out: 20, reasoning: 8, total: 120_',
+    );
+  });
+
+  it('flags a still-streaming (interrupted) row', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({ role: 'assistant', content: 'partial', status: 'streaming' }),
+      ],
+    });
+    expect(md).toContain('still being generated');
+  });
+
+  it('does NOT flag a completed row', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [row({ role: 'assistant', content: 'final', status: 'completed' })],
+    });
+    expect(md).not.toContain('still being generated');
+  });
+
+  it('renders a legacy NULL-status row (no parts) from plain content', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({ role: 'assistant', content: 'legacy answer', status: null }),
+      ],
+    });
+    expect(md).toContain('legacy answer');
+    expect(md).not.toContain('still being generated');
+  });
+
+  it('renders a persisted error', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: '',
+          status: 'error',
+          metadata: { error: '401: Unauthorized' } as never,
+        }),
+      ],
+    });
+    expect(md).toContain('**⚠️ Error:** 401: Unauthorized');
+  });
+
+  it('escapes embedded triple-backtick fences with a longer delimiter', () => {
+    const md = buildChatMarkdown({
+      title: 'T',
+      chatId: 'c',
+      rows: [
+        row({
+          role: 'assistant',
+          content: 'x',
+          metadata: {
+            parts: [
+              {
+                type: 'tool-getPage',
+                state: 'output-available',
+                output: '```inner```',
+              },
+            ],
+          } as never,
+        }),
+      ],
+    });
+    // A 4-backtick fence wraps content that itself contains a 3-backtick run.
+    expect(md).toContain('````');
+  });
+});
--- a/apps/server/src/core/ai-chat/chat-markdown.util.ts
+++ b/apps/server/src/core/ai-chat/chat-markdown.util.ts
@@ -0,0 +1,296 @@
+/**
+ * Server-side Markdown export for an AI agent chat (#183). The DB is the single
+ * source of truth: this renders a chat purely from its persisted message rows
+ * (`AiChatMessage[]` — role / content / metadata.parts / toolCalls / usage).
+ * Because the assistant row is now persisted UPFRONT and updated per step, an
+ * interrupted turn is included up to its last finished step.
+ *
+ * Ported from the client `utils/chat-markdown.ts`. It is a PURE function (apart
+ * from `new Date()` for the export timestamp), so it is straightforward to
+ * unit-test and a future background worker can reuse it.
+ *
+ * Only a few fixed role/tool labels are localized via the `lang` param; the
+ * structural document words (Input/Output/Error/Tokens/...) stay English because
+ * the output is a technical artifact.
+ */
+
+import type { AiChatMessage } from '@docmost/db/types/entity.types';
+
+/** Supported export label languages. Defaults to English. */
+export type ExportLang = 'en' | 'ru';
+
+/**
+ * Normalize an arbitrary client locale code to a supported export language. The
+ * client sends `i18n.language`, which is a FULL locale tag (e.g. `en-US`,
+ * `ru-RU`), not a bare `en`/`ru` — so match on the language subtag and fall back
+ * to English for anything non-Russian.
+ */
+export function normalizeLang(lang?: string): ExportLang {
+  return lang?.toLowerCase().startsWith('ru') ? 'ru' : 'en';
+}
+
+/** A single AI SDK UIMessage part (text part or a tool part). */
+interface ExportPart {
+  type: string;
+  text?: string;
+  state?: string;
+  toolName?: string;
+  input?: unknown;
+  output?: unknown;
+  errorText?: string;
+}
+
+/** Authoritative per-turn usage the server attaches to a message row. */
+interface UsageLike {
+  inputTokens?: number;
+  outputTokens?: number;
+  totalTokens?: number;
+  reasoningTokens?: number;
+}
+
+/** Localized label table. Keep the keys identical to the client's i18n keys so
+ *  the two exports read the same. Only role + tool-action labels are localized;
+ *  everything structural is an English constant in the renderer. */
+const LABELS: Record<
+  ExportLang,
+  {
+    untitled: string;
+    aiAgent: string;
+    you: string;
+    tools: Record<string, string>;
+    ranTool: (name: string) => string;
+    stillGenerating: string;
+  }
+> = {
+  en: {
+    untitled: 'Untitled chat',
+    aiAgent: 'AI agent',
+    you: 'You',
+    tools: {
+      searchPages: 'Searched pages',
+      getPage: 'Read page',
+      createPage: 'Created page',
+      updatePageContent: 'Updated page',
+      renamePage: 'Renamed page',
+      movePage: 'Moved page',
+      deletePage: 'Deleted page (to trash)',
+      createComment: 'Commented',
+      resolveComment: 'Resolved comment',
+    },
+    ranTool: (name) => `Ran tool ${name}`,
+    stillGenerating:
+      'This message is still being generated — the export captured a partial, in-progress response.',
+  },
+  ru: {
+    untitled: 'Без названия',
+    aiAgent: 'ИИ-агент',
+    you: 'Вы',
+    tools: {
+      searchPages: 'Искал по страницам',
+      getPage: 'Прочитал страницу',
+      createPage: 'Создал страницу',
+      updatePageContent: 'Обновил страницу',
+      renamePage: 'Переименовал страницу',
+      movePage: 'Переместил страницу',
+      deletePage: 'Удалил страницу (в корзину)',
+      createComment: 'Прокомментировал',
+      resolveComment: 'Закрыл комментарий',
+    },
+    ranTool: (name) => `Выполнил инструмент ${name}`,
+    stillGenerating:
+      'Это сообщение всё ещё генерируется — экспорт захватил частичный, незавершённый ответ.',
+  },
+};
+
+/** True for AI SDK tool parts (static `tool-*` or `dynamic-tool`). */
+function isToolPart(type: string): boolean {
+  return type.startsWith('tool-') || type === 'dynamic-tool';
+}
+
+/** Extract the tool name from a part `type` of `tool-${name}` (or dynamic). */
+function getToolName(part: ExportPart): string {
+  if (part.type === 'dynamic-tool') return part.toolName ?? '';
+  return part.type.startsWith('tool-')
+    ? part.type.slice('tool-'.length)
+    : part.type;
+}
+
+/** Map an AI SDK tool-part state to the 3 states the action-log renders. */
+function toolRunState(state: string | undefined): 'running' | 'done' | 'error' {
+  if (state === 'output-error' || state === 'output-denied') return 'error';
+  if (state === 'output-available') return 'done';
+  return 'running';
+}
+
+/** Resolve a tool's friendly action-log label (localized) from its name. */
+function toolLabel(name: string, lang: ExportLang): string {
+  return LABELS[lang].tools[name] ?? LABELS[lang].ranTool(name);
+}
+
+/**
+ * Stringify an arbitrary tool input/output value for a fenced block. Strings
+ * pass through as-is; everything else is pretty-printed JSON, falling back to
+ * `String(value)` if serialization throws (e.g. a circular structure).
+ */
+function stringify(value: unknown): string {
+  if (typeof value === 'string') return value;
+  try {
+    return JSON.stringify(value, null, 2);
+  } catch {
+    return String(value);
+  }
+}
+
+/**
+ * Wrap `code` in a fenced code block whose backtick delimiter is LONGER than the
+ * longest backtick run inside the content, so embedded backticks (or a literal
+ * ``` fence) never break out of the block. Minimum 3 backticks.
+ */
+function fence(code: string, lang = ''): string {
+  const runs: string[] = code.match(/`+/g) ?? [];
+  const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
+  const delim = '`'.repeat(Math.max(3, longest + 1));
+  return `${delim}${lang}\n${code}\n${delim}`;
+}
+
+/** Per-row token count, mirroring the header sum in the client window. */
+function rowTokens(usage: UsageLike): number {
+  return (
+    usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
+  );
+}
+
+/** Render one message's UIMessage parts into an array of Markdown blocks
+ *  (text blocks + tool blocks). Mirrors the client renderer / MessageItem. */
+function renderMessageParts(parts: ExportPart[], lang: ExportLang): string[] {
+  const out: string[] = [];
+
+  for (const part of parts) {
+    if (part.type === 'text') {
+      const text = (part.text ?? '').trim();
+      if (text.length > 0) out.push(text);
+      continue;
+    }
+
+    if (!isToolPart(part.type)) continue;
+
+    const name = getToolName(part);
+    const label = toolLabel(name, lang);
+    const state = toolRunState(part.state);
+
+    const toolLines: string[] = [`**Tool: ${label}** (\`${name}\`) — ${state}`];
+    if (part.input !== undefined) {
+      toolLines.push('Input:');
+      toolLines.push(fence(stringify(part.input), 'json'));
+    }
+    if (part.output !== undefined) {
+      toolLines.push('Output:');
+      toolLines.push(fence(stringify(part.output), 'json'));
+    }
+    if (part.errorText) {
+      toolLines.push(`**Error:** ${part.errorText}`);
+    }
+    out.push(toolLines.join('\n\n'));
+  }
+
+  return out;
+}
+
+/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
+ *  single text part built from the plain-text content (mirrors rowToUiMessage). */
+function rowParts(row: AiChatMessage): ExportPart[] {
+  const meta = (row.metadata ?? {}) as { parts?: ExportPart[] };
+  return Array.isArray(meta.parts) && meta.parts.length > 0
+    ? meta.parts
+    : [{ type: 'text', text: row.content ?? '' }];
+}
+
+/**
+ * Serialize a chat to a Markdown string from its persisted rows. Source = DB
+ * ONLY (no live client state). A row whose `status` is still 'streaming' is an
+ * interrupted turn that the export captured mid-flight; it is rendered up to its
+ * last finished step and flagged "still generating".
+ */
+export function buildChatMarkdown(args: {
+  title: string | null;
+  chatId: string;
+  rows: AiChatMessage[];
+  // Accepts a full client locale tag (e.g. 'en-US'/'ru-RU'); normalized below.
+  lang?: string;
+}): string {
+  const { title, chatId, rows } = args;
+  const lang: ExportLang = normalizeLang(args.lang);
+  const L = LABELS[lang];
+  const blocks: string[] = [];
+
+  const heading = (title ?? '').trim() || L.untitled;
+  blocks.push(`# ${heading}`);
+
+  const usageOf = (row: AiChatMessage): UsageLike | undefined => {
+    const meta = (row.metadata ?? {}) as { usage?: UsageLike };
+    return meta.usage;
+  };
+  const errorOf = (row: AiChatMessage): string | undefined => {
+    const meta = (row.metadata ?? {}) as { error?: string };
+    return meta.error ?? undefined;
+  };
+
+  // Metadata bullet list. Total tokens is only shown when there is a sum.
+  const totalTokens = rows.reduce((sum, row) => {
+    const usage = usageOf(row);
+    return usage ? sum + rowTokens(usage) : sum;
+  }, 0);
+  const meta = [
+    `- Chat ID: \`${chatId}\``,
+    `- Exported: ${new Date().toISOString()}`,
+    `- Messages: ${rows.length}`,
+  ];
+  if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
+  blocks.push(meta.join('\n'));
+
+  rows.forEach((row, index) => {
+    blocks.push('---');
+
+    const roleLabel = row.role === 'assistant' ? L.aiAgent : L.you;
+    blocks.push(`## ${index + 1}. ${roleLabel}`);
+
+    // Created-at kept in source as an HTML comment (out of the rendered prose).
+    if (row.createdAt) {
+      const iso =
+        row.createdAt instanceof Date
+          ? row.createdAt.toISOString()
+          : String(row.createdAt);
+      blocks.push(`<!-- ${iso} -->`);
+    }
+
+    blocks.push(...renderMessageParts(rowParts(row), lang));
+
+    // A still-'streaming' row is an interrupted/in-progress turn captured by the
+    // export; record that so the partial answer is not mistaken for complete.
+    if (row.status === 'streaming') {
+      blocks.push(`_⏳ ${L.stillGenerating}_`);
+    }
+
+    const error = errorOf(row);
+    if (error) {
+      blocks.push(`**⚠️ Error:** ${error}`);
+    }
+
+    const usage = usageOf(row);
+    if (usage) {
+      const total = usage.totalTokens ?? rowTokens(usage);
+      const reasoning =
+        usage.reasoningTokens && usage.reasoningTokens > 0
+          ? `, reasoning: ${usage.reasoningTokens}`
+          : '';
+      blocks.push(
+        `_Tokens — in: ${usage.inputTokens ?? '?'}, out: ${
+          usage.outputTokens ?? '?'
+        }${reasoning}, total: ${total}_`,
+      );
+    }
+  });
+
+  // Blank line between blocks so the Markdown renders cleanly.
+  return blocks.join('\n\n');
+}
--- a/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts
+++ b/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts
@@ -26,3 +26,17 @@ export class GetChatMessagesDto {
  @IsString()
  cursor?: string;
 }
+
+/** Export a chat to Markdown (#183). `lang` localizes the few fixed
+ *  role/tool-action labels; defaults to English server-side. */
+export class ExportChatDto {
+  @IsString()
+  chatId: string;
+
+  // A full client locale tag (e.g. 'en-US', 'ru-RU') — normalized server-side to
+  // a supported export language (see normalizeLang). Accept any string so a
+  // region-qualified locale is not rejected (the 400 that broke the real client).
+  @IsOptional()
+  @IsString()
+  lang?: string;
+}