feat(ai-chat): persistent history as source of truth — step durability + server export (#183)
The chat lived in inconsistent paradigms (in-memory stream + client export vs.
DB-as-context), which made export flaky and lost the assistant answer if the
process died mid-turn. Make the DB the single source of truth.
A. STEP-GRANULAR DURABILITY (server)
- ai_chat_messages gains a nullable `status` column (migration; NULL = legacy =
completed). The assistant row is now INSERTED UPFRONT as `status:'streaming'`
and UPDATEd on every onStepFinish with all finished steps (text + tool calls +
tool RESULTS), then finalized once to completed/error/aborted on the terminal
callback. So a process death mid-turn keeps every finished step; a startup
sweep (OnModuleInit → sweepStreaming) flips any dangling 'streaming' row to
'aborted'. The write path no longer depends on a live socket.
- Pure exported `flushAssistant(steps, inProgressText, status, extra?)` builds
the persist payload (metadata.parts byte-identical to the old builder), so a
future background worker can call the same path. AiChatMessageRepo gains
`update`, `sweepStreaming`, and `findAllByChat`.
- consumeStream drain, external-MCP client close-once, SSE heartbeat preserved.
B. SERVER-SIDE EXPORT
- New pure `chat-markdown.util.ts` renders Markdown from DB rows ONLY (server
port of the client builder). Because A persists the in-progress row, the
export now includes an interrupted turn up to its last finished step (flagged
"still generating"). `POST /ai-chat/export` (owner-gated via assertOwnedChat,
workspace-scoped) returns it; `lang` accepts a full client locale tag
('en-US'/'ru-RU') and is normalized server-side (normalizeLang) — a strict
@IsIn(['en','ru']) DTO rejected the real client's i18n.language with a 400,
caught in real-browser testing.
- Client: handleCopy calls the endpoint; `canExport = !!activeChatId`. The whole
liveThreadRef/liveStateRef/onLiveContentChange/hasLiveContent hybrid (and the
client chat-markdown util + test) is removed — the server is now authoritative.
Tests: flushAssistant unit (status shapes + parts parity), chat-markdown.util
unit (incl. legacy NULL-status + interrupted note + ru + normalizeLang locale
tags), controller export wiring + owner-gate, integration update/sweepStreaming.
Verified: server build + 318 ai-chat unit + 3 integration; client tsc + 157
ai-chat unit; and END-TO-END in a real browser — a chat turn persists mid-stream
and the Copy button exports the DB-sourced markdown (showing the in-progress
row), HTTP 200 after the locale fix.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
import { ForbiddenException } from '@nestjs/common';
|
||||
import { AiChatController } from './ai-chat.controller';
|
||||
import type { User, Workspace } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
* Wiring spec for the #183 `POST /ai-chat/export` endpoint. It must: own-gate via
|
||||
* the chat lookup (workspace-scoped + creator-owned), load the FULL transcript
|
||||
* via findAllByChat, render server-side, and return `{ markdown }`. Exercised by
|
||||
* instantiating the controller with hand-rolled mocks — no Nest graph, no DB.
|
||||
*/
|
||||
describe('AiChatController.export', () => {
|
||||
const user = { id: 'u1' } as User;
|
||||
const workspace = { id: 'ws1' } as Workspace;
|
||||
|
||||
function makeController(
|
||||
over: {
|
||||
chat?: unknown;
|
||||
rows?: unknown[];
|
||||
} = {},
|
||||
) {
|
||||
const chat =
|
||||
'chat' in over
|
||||
? over.chat
|
||||
: { id: 'c1', creatorId: 'u1', title: 'My chat' };
|
||||
const aiChatRepo = {
|
||||
findById: jest.fn().mockResolvedValue(chat),
|
||||
};
|
||||
const aiChatMessageRepo = {
|
||||
findAllByChat: jest.fn().mockResolvedValue(
|
||||
over.rows ?? [
|
||||
{
|
||||
id: 'm1',
|
||||
role: 'user',
|
||||
content: 'hi',
|
||||
metadata: null,
|
||||
status: null,
|
||||
},
|
||||
{
|
||||
id: 'm2',
|
||||
role: 'assistant',
|
||||
content: 'hello',
|
||||
metadata: null,
|
||||
status: 'completed',
|
||||
},
|
||||
],
|
||||
),
|
||||
};
|
||||
const controller = new AiChatController(
|
||||
{} as never,
|
||||
aiChatRepo as never,
|
||||
aiChatMessageRepo as never,
|
||||
{} as never,
|
||||
);
|
||||
return { controller, aiChatRepo, aiChatMessageRepo };
|
||||
}
|
||||
|
||||
it('renders the full transcript and returns { markdown }', async () => {
|
||||
const { controller, aiChatMessageRepo } = makeController();
|
||||
const res = await controller.export({ chatId: 'c1' }, user, workspace);
|
||||
expect(aiChatMessageRepo.findAllByChat).toHaveBeenCalledWith('c1', 'ws1');
|
||||
expect(res.markdown).toContain('# My chat');
|
||||
expect(res.markdown).toContain('## 1. You');
|
||||
expect(res.markdown).toContain('## 2. AI agent');
|
||||
});
|
||||
|
||||
it('forbids a chat the user does not own', async () => {
|
||||
const { controller } = makeController({
|
||||
chat: { id: 'c1', creatorId: 'someone-else', title: 'X' },
|
||||
});
|
||||
await expect(
|
||||
controller.export({ chatId: 'c1' }, user, workspace),
|
||||
).rejects.toBeInstanceOf(ForbiddenException);
|
||||
});
|
||||
|
||||
it('forbids a missing / foreign-workspace chat', async () => {
|
||||
const { controller } = makeController({ chat: null });
|
||||
await expect(
|
||||
controller.export({ chatId: 'c1' }, user, workspace),
|
||||
).rejects.toBeInstanceOf(ForbiddenException);
|
||||
});
|
||||
|
||||
it('localizes labels when lang=ru is passed', async () => {
|
||||
const { controller } = makeController();
|
||||
const res = await controller.export(
|
||||
{ chatId: 'c1', lang: 'ru' },
|
||||
user,
|
||||
workspace,
|
||||
);
|
||||
expect(res.markdown).toContain('## 1. Вы');
|
||||
expect(res.markdown).toContain('## 2. ИИ-агент');
|
||||
});
|
||||
});
|
||||
@@ -20,7 +20,7 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
|
||||
import { AuthUser } from '../../common/decorators/auth-user.decorator';
|
||||
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
|
||||
import { SkipTransform } from '../../common/decorators/skip-transform.decorator';
|
||||
import { User, Workspace } from '@docmost/db/types/entity.types';
|
||||
import { AiChat, User, Workspace } from '@docmost/db/types/entity.types';
|
||||
import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
|
||||
import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
|
||||
import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
|
||||
@@ -31,10 +31,12 @@ import { AiChatService, AiChatStreamBody } from './ai-chat.service';
|
||||
import { AiTranscriptionService } from './ai-transcription.service';
|
||||
import {
|
||||
ChatIdDto,
|
||||
ExportChatDto,
|
||||
GetChatMessagesDto,
|
||||
RenameChatDto,
|
||||
} from './dto/ai-chat.dto';
|
||||
import { describeProviderError } from '../../integrations/ai/ai-error.util';
|
||||
import { buildChatMarkdown } from './chat-markdown.util';
|
||||
|
||||
/**
|
||||
* Per-user AI chat API (§6.1). Routes are POST to match this codebase's
|
||||
@@ -81,6 +83,35 @@ export class AiChatController {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Export a chat to Markdown (#183). The DB is the single source of truth: the
|
||||
* whole transcript is loaded (oldest -> newest) and rendered server-side. Now
|
||||
* that the assistant row is persisted upfront and per step, an interrupted
|
||||
* turn is included up to its last finished step. Workspace-scoped and owner-
|
||||
* gated via assertOwnedChat (same as the other read endpoints). Returns
|
||||
* `{ markdown }`. `lang` localizes the few fixed labels (default English).
|
||||
*/
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post('export')
|
||||
async export(
|
||||
@Body() dto: ExportChatDto,
|
||||
@AuthUser() user: User,
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
): Promise<{ markdown: string }> {
|
||||
const chat = await this.assertOwnedChat(dto.chatId, user, workspace);
|
||||
const rows = await this.aiChatMessageRepo.findAllByChat(
|
||||
dto.chatId,
|
||||
workspace.id,
|
||||
);
|
||||
const markdown = buildChatMarkdown({
|
||||
title: chat.title ?? null,
|
||||
chatId: dto.chatId,
|
||||
rows,
|
||||
lang: dto.lang ?? 'en',
|
||||
});
|
||||
return { markdown };
|
||||
}
|
||||
|
||||
/** Rename a chat. */
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post('rename')
|
||||
@@ -90,7 +121,11 @@ export class AiChatController {
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
) {
|
||||
await this.assertOwnedChat(dto.chatId, user, workspace);
|
||||
await this.aiChatRepo.update(dto.chatId, { title: dto.title }, workspace.id);
|
||||
await this.aiChatRepo.update(
|
||||
dto.chatId,
|
||||
{ title: dto.title },
|
||||
workspace.id,
|
||||
);
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
@@ -145,7 +180,10 @@ export class AiChatController {
|
||||
// Resolve the agent role for this turn BEFORE hijack: existing chats read it
|
||||
// from ai_chats.role_id (authoritative), a new chat from body.roleId. The
|
||||
// role drives both the persona and the optional model override below.
|
||||
const role = await this.aiChatService.resolveRoleForRequest(workspace, body);
|
||||
const role = await this.aiChatService.resolveRoleForRequest(
|
||||
workspace,
|
||||
body,
|
||||
);
|
||||
|
||||
// Resolve the model (applying the role's optional override) BEFORE hijack so
|
||||
// an unconfigured provider — including a role pointing at an unconfigured
|
||||
@@ -232,7 +270,9 @@ export class AiChatController {
|
||||
let file = null;
|
||||
try {
|
||||
// Whisper hard-caps uploads at 25MB; allow a single file.
|
||||
file = await req.file({ limits: { fileSize: 25 * 1024 * 1024, files: 1 } });
|
||||
file = await req.file({
|
||||
limits: { fileSize: 25 * 1024 * 1024, files: 1 },
|
||||
});
|
||||
} catch (err: any) {
|
||||
if (err?.statusCode === 413) {
|
||||
throw new BadRequestException('Audio file too large (max 25MB)');
|
||||
@@ -283,11 +323,12 @@ export class AiChatController {
|
||||
chatId: string,
|
||||
user: User,
|
||||
workspace: Workspace,
|
||||
): Promise<void> {
|
||||
): Promise<AiChat> {
|
||||
const chat = await this.aiChatRepo.findById(chatId, workspace.id);
|
||||
if (!chat || chat.creatorId !== user.id) {
|
||||
throw new ForbiddenException();
|
||||
}
|
||||
return chat;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
rowToUiMessage,
|
||||
prepareAgentStep,
|
||||
buildPartialAssistantRecord,
|
||||
flushAssistant,
|
||||
chatStreamMetadata,
|
||||
accumulateStepUsage,
|
||||
MAX_AGENT_STEPS,
|
||||
@@ -94,8 +95,12 @@ describe('assistantParts', () => {
|
||||
const steps = [
|
||||
{
|
||||
text: '',
|
||||
toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
|
||||
toolResults: [{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } }],
|
||||
toolCalls: [
|
||||
{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
|
||||
],
|
||||
toolResults: [
|
||||
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
|
||||
],
|
||||
},
|
||||
];
|
||||
const parts = assistantParts(steps, '') as AnyPart[];
|
||||
@@ -109,7 +114,9 @@ describe('assistantParts', () => {
|
||||
const steps = [
|
||||
{
|
||||
text: '',
|
||||
toolCalls: [{ toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } }],
|
||||
toolCalls: [
|
||||
{ toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } },
|
||||
],
|
||||
toolResults: [],
|
||||
},
|
||||
];
|
||||
@@ -136,7 +143,8 @@ describe('assistantParts', () => {
|
||||
];
|
||||
const parts = assistantParts(steps, '') as AnyPart[];
|
||||
const toolParts = parts.filter(
|
||||
(p) => typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
|
||||
(p) =>
|
||||
typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
|
||||
);
|
||||
expect(toolParts).toHaveLength(0);
|
||||
});
|
||||
@@ -246,16 +254,30 @@ describe('buildPartialAssistantRecord', () => {
|
||||
type AnyPart = Record<string, unknown>;
|
||||
|
||||
it('records an empty turn with the error text (preserves old behavior)', () => {
|
||||
const rec = buildPartialAssistantRecord([], '', 'error', '401: Unauthorized');
|
||||
const rec = buildPartialAssistantRecord(
|
||||
[],
|
||||
'',
|
||||
'error',
|
||||
'401: Unauthorized',
|
||||
);
|
||||
expect(rec).toEqual({
|
||||
text: '',
|
||||
toolCalls: null,
|
||||
metadata: { finishReason: 'error', parts: [], error: '401: Unauthorized' },
|
||||
metadata: {
|
||||
finishReason: 'error',
|
||||
parts: [],
|
||||
error: '401: Unauthorized',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('persists in-progress text (no finished steps) as the partial answer', () => {
|
||||
const rec = buildPartialAssistantRecord([], 'partial answer', 'error', 'boom');
|
||||
const rec = buildPartialAssistantRecord(
|
||||
[],
|
||||
'partial answer',
|
||||
'error',
|
||||
'boom',
|
||||
);
|
||||
expect(rec.text).toBe('partial answer');
|
||||
expect(rec.metadata.parts).toEqual([
|
||||
{ type: 'text', text: 'partial answer' },
|
||||
@@ -275,7 +297,12 @@ describe('buildPartialAssistantRecord', () => {
|
||||
],
|
||||
},
|
||||
];
|
||||
const rec = buildPartialAssistantRecord(steps, ' and then', 'error', 'boom');
|
||||
const rec = buildPartialAssistantRecord(
|
||||
steps,
|
||||
' and then',
|
||||
'error',
|
||||
'boom',
|
||||
);
|
||||
const parts = rec.metadata.parts as AnyPart[];
|
||||
// The finished step's text part is present.
|
||||
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
|
||||
@@ -284,7 +311,10 @@ describe('buildPartialAssistantRecord', () => {
|
||||
expect(toolPart).toBeDefined();
|
||||
expect(toolPart!.state).toBe('output-available');
|
||||
// The in-progress text is appended LAST so the parts match the stream order.
|
||||
expect(parts[parts.length - 1]).toEqual({ type: 'text', text: ' and then' });
|
||||
expect(parts[parts.length - 1]).toEqual({
|
||||
type: 'text',
|
||||
text: ' and then',
|
||||
});
|
||||
expect(rec.text).toBe('looked it up and then');
|
||||
expect(rec.toolCalls).not.toBeNull();
|
||||
expect(rec.metadata.error).toBe('boom');
|
||||
@@ -298,6 +328,107 @@ describe('buildPartialAssistantRecord', () => {
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* flushAssistant (#183): the PURE row builder behind the step-granular durable
|
||||
* write path. It runs identically for the upfront insert (empty steps,
|
||||
* 'streaming'), every per-step update, and the terminal finalize — so a future
|
||||
* background worker can call the same function. These tests pin the four status
|
||||
* shapes and, critically, that `metadata.parts` stays IDENTICAL to the old
|
||||
* buildPartialAssistantRecord / assistantParts output (rowToUiMessage/findRecent
|
||||
* depend on it).
|
||||
*/
|
||||
describe('flushAssistant', () => {
|
||||
type AnyPart = Record<string, unknown>;
|
||||
|
||||
const toolStep = {
|
||||
text: 'looked it up',
|
||||
toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
|
||||
toolResults: [
|
||||
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
|
||||
],
|
||||
};
|
||||
|
||||
it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
|
||||
const f = flushAssistant([], '', 'streaming');
|
||||
expect(f.status).toBe('streaming');
|
||||
expect(f.content).toBe('');
|
||||
expect(f.toolCalls).toBeNull();
|
||||
expect(f.metadata.parts).toEqual([]);
|
||||
// No finishReason while streaming (it is not a terminal state).
|
||||
expect('finishReason' in f.metadata).toBe(false);
|
||||
});
|
||||
|
||||
it('streaming update folds in finished steps but keeps status streaming', () => {
|
||||
const f = flushAssistant([toolStep], '', 'streaming');
|
||||
expect(f.status).toBe('streaming');
|
||||
expect(f.content).toBe('looked it up');
|
||||
const parts = f.metadata.parts as AnyPart[];
|
||||
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
|
||||
const toolPart = parts.find((p) => p.type === 'tool-getPage');
|
||||
expect(toolPart!.state).toBe('output-available');
|
||||
expect(f.toolCalls).not.toBeNull();
|
||||
});
|
||||
|
||||
it('completed: attaches finishReason + normalized usage + contextTokens', () => {
|
||||
const f = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
||||
contextTokens: 15,
|
||||
});
|
||||
expect(f.status).toBe('completed');
|
||||
expect(f.metadata.finishReason).toBe('stop');
|
||||
expect(f.metadata.usage).toEqual({
|
||||
inputTokens: 10,
|
||||
outputTokens: 5,
|
||||
totalTokens: 15,
|
||||
reasoningTokens: undefined,
|
||||
});
|
||||
expect(f.metadata.contextTokens).toBe(15);
|
||||
});
|
||||
|
||||
it('error: records the error and a derived finishReason', () => {
|
||||
const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
|
||||
expect(f.status).toBe('error');
|
||||
expect(f.content).toBe('partial answer');
|
||||
expect(f.metadata.error).toBe('boom');
|
||||
// Derives finishReason from the terminal status when none is supplied.
|
||||
expect(f.metadata.finishReason).toBe('error');
|
||||
expect(f.metadata.parts).toEqual([
|
||||
{ type: 'text', text: 'partial answer' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('aborted: in-progress text appended last, no error key', () => {
|
||||
const f = flushAssistant([toolStep], ' and then', 'aborted');
|
||||
expect(f.status).toBe('aborted');
|
||||
expect(f.metadata.finishReason).toBe('aborted');
|
||||
expect('error' in f.metadata).toBe(false);
|
||||
expect(f.content).toBe('looked it up and then');
|
||||
const parts = f.metadata.parts as AnyPart[];
|
||||
expect(parts[parts.length - 1]).toEqual({
|
||||
type: 'text',
|
||||
text: ' and then',
|
||||
});
|
||||
});
|
||||
|
||||
it('metadata.parts parity with buildPartialAssistantRecord (error path)', () => {
|
||||
const flushed = flushAssistant([toolStep], ' and then', 'error', {
|
||||
error: 'boom',
|
||||
});
|
||||
const legacy = buildPartialAssistantRecord(
|
||||
[toolStep],
|
||||
' and then',
|
||||
'error',
|
||||
'boom',
|
||||
);
|
||||
// The whole metadata block (parts + finishReason + error) must match the
|
||||
// legacy partial-record shape so rebuilt history is unchanged.
|
||||
expect(flushed.metadata).toEqual(legacy.metadata);
|
||||
expect(flushed.content).toBe(legacy.text);
|
||||
expect(flushed.toolCalls).toEqual(legacy.toolCalls);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* chatStreamMetadata: attach metadata to the streamed assistant UI message per
|
||||
* part type — `chatId` on `start` (so the client adopts the real created chat id
|
||||
@@ -319,10 +450,20 @@ describe('chatStreamMetadata', () => {
|
||||
chatStreamMetadata(
|
||||
{ type: 'finish-step', usage: { outputTokens: 100 } },
|
||||
'chat-1',
|
||||
{ inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 },
|
||||
{
|
||||
inputTokens: 500,
|
||||
outputTokens: 220,
|
||||
totalTokens: 720,
|
||||
reasoningTokens: 30,
|
||||
},
|
||||
),
|
||||
).toEqual({
|
||||
usage: { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 },
|
||||
usage: {
|
||||
inputTokens: 500,
|
||||
outputTokens: 220,
|
||||
totalTokens: 720,
|
||||
reasoningTokens: 30,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -394,8 +535,18 @@ describe('accumulateStepUsage', () => {
|
||||
it('sums every field across two steps', () => {
|
||||
expect(
|
||||
accumulateStepUsage(
|
||||
{ inputTokens: 500, outputTokens: 100, totalTokens: 600, reasoningTokens: 30 },
|
||||
{ inputTokens: 520, outputTokens: 80, totalTokens: 600, reasoningTokens: 10 },
|
||||
{
|
||||
inputTokens: 500,
|
||||
outputTokens: 100,
|
||||
totalTokens: 600,
|
||||
reasoningTokens: 30,
|
||||
},
|
||||
{
|
||||
inputTokens: 520,
|
||||
outputTokens: 80,
|
||||
totalTokens: 600,
|
||||
reasoningTokens: 10,
|
||||
},
|
||||
),
|
||||
).toEqual({
|
||||
inputTokens: 1020,
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { ForbiddenException, Injectable, Logger } from '@nestjs/common';
|
||||
import {
|
||||
ForbiddenException,
|
||||
Injectable,
|
||||
Logger,
|
||||
OnModuleInit,
|
||||
} from '@nestjs/common';
|
||||
import { FastifyReply } from 'fastify';
|
||||
import {
|
||||
streamText,
|
||||
@@ -60,7 +65,10 @@ export function prepareAgentStep(
|
||||
system: string,
|
||||
): { toolChoice: 'none'; system: string } | undefined {
|
||||
if (stepNumber >= MAX_AGENT_STEPS - 1) {
|
||||
return { toolChoice: 'none', system: `${system}\n\n${FINAL_STEP_INSTRUCTION}` };
|
||||
return {
|
||||
toolChoice: 'none',
|
||||
system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`,
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
@@ -121,7 +129,7 @@ export interface AiChatStreamArgs {
|
||||
* can be rebuilt for `convertToModelMessages`.
|
||||
*/
|
||||
@Injectable()
|
||||
export class AiChatService {
|
||||
export class AiChatService implements OnModuleInit {
|
||||
private readonly logger = new Logger(AiChatService.name);
|
||||
|
||||
constructor(
|
||||
@@ -136,6 +144,32 @@ export class AiChatService {
|
||||
private readonly pageAccess: PageAccessService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Crash-recovery sweep on server start (#183): any assistant row left in the
|
||||
* 'streaming' state is the relic of a turn whose process died before it
|
||||
* reached a terminal status. Flip those to 'aborted' so history/export show
|
||||
* them settled (with whatever finished steps were already persisted) instead
|
||||
* of perpetually "streaming". Best-effort: a sweep failure is logged but must
|
||||
* never block server startup.
|
||||
*/
|
||||
async onModuleInit(): Promise<void> {
|
||||
try {
|
||||
const swept = await this.aiChatMessageRepo.sweepStreaming();
|
||||
if (swept > 0) {
|
||||
this.logger.log(
|
||||
`Startup sweep: marked ${swept} dangling 'streaming' assistant ` +
|
||||
`message(s) as 'aborted'.`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`Startup sweep of dangling 'streaming' messages failed: ${
|
||||
err instanceof Error ? err.message : 'unknown error'
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the agent role that applies to this stream request, scoped to the
|
||||
* workspace and soft-delete aware. For an EXISTING chat the role is read from
|
||||
@@ -259,9 +293,7 @@ export class AiChatService {
|
||||
content: incomingText,
|
||||
// jsonb column: UIMessage parts are JSON-serializable at runtime but not
|
||||
// structurally `JsonValue`, so cast through unknown.
|
||||
metadata: (incoming?.parts
|
||||
? { parts: incoming.parts }
|
||||
: null) as never,
|
||||
metadata: (incoming?.parts ? { parts: incoming.parts } : null) as never,
|
||||
});
|
||||
|
||||
// Rebuild the conversation from persisted history (not the client payload),
|
||||
@@ -347,31 +379,6 @@ export class AiChatService {
|
||||
);
|
||||
};
|
||||
|
||||
// Persist the assistant message. Used by onFinish (full result) and the
|
||||
// abort/error paths (partial result). Guarded so we persist at most once.
|
||||
let persisted = false;
|
||||
const persistAssistant = async (data: {
|
||||
text: string;
|
||||
toolCalls: unknown;
|
||||
metadata: Record<string, unknown>;
|
||||
}): Promise<void> => {
|
||||
if (persisted) return;
|
||||
persisted = true;
|
||||
try {
|
||||
await this.aiChatMessageRepo.insert({
|
||||
chatId,
|
||||
workspaceId: workspace.id,
|
||||
userId: user.id,
|
||||
role: 'assistant',
|
||||
content: data.text ?? '',
|
||||
toolCalls: (data.toolCalls ?? null) as never,
|
||||
metadata: data.metadata as never,
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to persist assistant message', err as Error);
|
||||
}
|
||||
};
|
||||
|
||||
// Accumulate the turn's streamed output so a provider error / disconnect can
|
||||
// persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
|
||||
// callbacks don't hand us the in-progress text. `capturedSteps` holds finished
|
||||
@@ -380,6 +387,94 @@ export class AiChatService {
|
||||
const capturedSteps: StepLike[] = [];
|
||||
let inProgressText = '';
|
||||
|
||||
// Step-granular durability (#183): create the assistant row UPFRONT in the
|
||||
// 'streaming' state (before any token), then UPDATE it as each step finishes
|
||||
// and finalize it once on the terminal callback. If the process dies
|
||||
// mid-turn the row survives with every finished step already persisted; the
|
||||
// startup sweep (sweepStreaming) later flips a dangling 'streaming' row to
|
||||
// 'aborted'. The DB is now the single source of truth for the turn — the
|
||||
// socket is never required for the write path. A failed upfront insert is
|
||||
// logged and leaves assistantId undefined; the per-step/terminal updates then
|
||||
// no-op (guarded below) so the turn still streams to the user.
|
||||
let assistantId: string | undefined;
|
||||
try {
|
||||
const seed = flushAssistant([], '', 'streaming');
|
||||
const seeded = await this.aiChatMessageRepo.insert({
|
||||
chatId,
|
||||
workspaceId: workspace.id,
|
||||
userId: user.id,
|
||||
role: 'assistant',
|
||||
content: seed.content,
|
||||
// jsonb columns: cast through never (same as the user insert above).
|
||||
toolCalls: (seed.toolCalls ?? null) as never,
|
||||
metadata: seed.metadata as never,
|
||||
status: seed.status,
|
||||
});
|
||||
assistantId = seeded?.id;
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to insert upfront assistant row', err as Error);
|
||||
}
|
||||
|
||||
// Per-step (non-terminal) update: persist the finished steps the moment a
|
||||
// step ends. Tolerant — a failed update is logged and swallowed so it never
|
||||
// throws into the stream. Keeps status 'streaming'.
|
||||
const updateStreaming = async (): Promise<void> => {
|
||||
if (!assistantId) return;
|
||||
try {
|
||||
await this.aiChatMessageRepo.update(
|
||||
assistantId,
|
||||
workspace.id,
|
||||
flushAssistant(capturedSteps, '', 'streaming'),
|
||||
);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`Failed to update streaming assistant row: ${
|
||||
err instanceof Error ? err.message : 'unknown error'
|
||||
}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Terminal finalize: write the completed/error/aborted row exactly once
|
||||
// across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort
|
||||
// callbacks — mirroring the pre-#183 persist-at-most-once guard for the
|
||||
// TERMINAL status (the row may be updated many times with 'streaming' before
|
||||
// this fires once).
|
||||
let finalized = false;
|
||||
const finalizeAssistant = async (
|
||||
flushed: AssistantFlush,
|
||||
): Promise<void> => {
|
||||
if (finalized) return;
|
||||
finalized = true;
|
||||
if (!assistantId) {
|
||||
// The upfront insert failed: fall back to inserting the terminal row so
|
||||
// the turn is not lost entirely.
|
||||
try {
|
||||
await this.aiChatMessageRepo.insert({
|
||||
chatId,
|
||||
workspaceId: workspace.id,
|
||||
userId: user.id,
|
||||
role: 'assistant',
|
||||
content: flushed.content,
|
||||
toolCalls: (flushed.toolCalls ?? null) as never,
|
||||
metadata: flushed.metadata as never,
|
||||
status: flushed.status,
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Failed to persist terminal assistant message',
|
||||
err as Error,
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await this.aiChatMessageRepo.update(assistantId, workspace.id, flushed);
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to finalize assistant message', err as Error);
|
||||
}
|
||||
};
|
||||
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
|
||||
// first-chunk latency, the model-silent gap right before a disconnect, and
|
||||
// how many SSE heartbeats were written, so a Safari drop can be classified
|
||||
@@ -395,144 +490,141 @@ export class AiChatService {
|
||||
let result: ReturnType<typeof streamText>;
|
||||
try {
|
||||
result = streamText({
|
||||
model,
|
||||
system,
|
||||
messages,
|
||||
tools,
|
||||
// No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full
|
||||
// page body for the write tools) are emitted as OUTPUT tokens, so a fixed
|
||||
// cap would truncate complex tool calls mid-argument. Let the model use its
|
||||
// natural per-step budget. (Cost/credit limits are an account concern, not
|
||||
// something to enforce by silently breaking the agent.)
|
||||
stopWhen: stepCountIs(MAX_AGENT_STEPS),
|
||||
// Forced finalization: reserve the LAST allowed step for a text-only
|
||||
// answer. Without this, a turn that spends all its steps on tool calls
|
||||
// ends with no assistant text (an empty turn). prepareAgentStep forbids
|
||||
// further tool calls and appends a synthesis instruction on that step,
|
||||
// concatenated onto the original `system` so the persona is preserved.
|
||||
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
|
||||
abortSignal: signal,
|
||||
onChunk: ({ chunk }) => {
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
|
||||
// output chunk means the stream is actively emitting bytes; track first
|
||||
// + most-recent activity timestamps.
|
||||
const now = Date.now();
|
||||
firstModelChunkAt ??= now;
|
||||
lastModelChunkAt = now;
|
||||
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
|
||||
// types — so this mirrors exactly what streams to the client.
|
||||
if (chunk.type === 'text-delta') inProgressText += chunk.text;
|
||||
},
|
||||
onStepFinish: (step) => {
|
||||
// The finished step's full text is now in `step.text`; fold it in and reset
|
||||
// the in-progress accumulator for the next step.
|
||||
capturedSteps.push(step as StepLike);
|
||||
inProgressText = '';
|
||||
},
|
||||
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
|
||||
// baseline for Safari comparison.
|
||||
const diagNow = Date.now();
|
||||
this.logger.log(
|
||||
`AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
|
||||
);
|
||||
await persistAssistant({
|
||||
text,
|
||||
toolCalls: serializeSteps(steps),
|
||||
metadata: {
|
||||
finishReason,
|
||||
// Persist the turn's cumulative usage WITH reasoning tokens resolved
|
||||
// from either the new `outputTokenDetails` or the deprecated top-level
|
||||
// field, so reopened history / the Markdown export show the thinking
|
||||
// token cost too.
|
||||
usage: normalizeStreamUsage(totalUsage as StreamUsage) ?? totalUsage,
|
||||
// Final-step usage = the context actually fed to the model on the last LLM
|
||||
// call (full history + tool results) plus the answer it just generated.
|
||||
// input+output of the FINAL step ≈ the conversation's CURRENT context size,
|
||||
// distinct from totalUsage which sums every step (cumulative tokens spent).
|
||||
contextTokens:
|
||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
|
||||
// Persist the FULL set of UIMessage parts for the turn (text +
|
||||
// tool-call/result), so the rebuilt history replays prior tool
|
||||
// context to the model on later turns.
|
||||
parts: assistantParts(steps, text),
|
||||
},
|
||||
});
|
||||
// Lifecycle: release the external MCP clients leased for this turn.
|
||||
await closeExternalClients();
|
||||
|
||||
// Generate the chat title for a freshly created chat AFTER the stream's
|
||||
// provider call has completed — NOT concurrently with it. The z.ai coding
|
||||
// endpoint stalls one of two concurrent requests to the same plan, which
|
||||
// black-holed the chat stream (~300s headers timeout) when title
|
||||
// generation raced it. Running it here (solo, fire-and-forget) avoids the
|
||||
// race; never block the turn on it, swallow any error.
|
||||
if (isNewChat && incomingText) {
|
||||
void this.generateTitle(chatId, workspace.id, incomingText).catch(
|
||||
(err) => {
|
||||
this.logger.warn(
|
||||
`Title generation failed: ${(err as Error)?.message ?? err}`,
|
||||
);
|
||||
},
|
||||
model,
|
||||
system,
|
||||
messages,
|
||||
tools,
|
||||
// No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full
|
||||
// page body for the write tools) are emitted as OUTPUT tokens, so a fixed
|
||||
// cap would truncate complex tool calls mid-argument. Let the model use its
|
||||
// natural per-step budget. (Cost/credit limits are an account concern, not
|
||||
// something to enforce by silently breaking the agent.)
|
||||
stopWhen: stepCountIs(MAX_AGENT_STEPS),
|
||||
// Forced finalization: reserve the LAST allowed step for a text-only
|
||||
// answer. Without this, a turn that spends all its steps on tool calls
|
||||
// ends with no assistant text (an empty turn). prepareAgentStep forbids
|
||||
// further tool calls and appends a synthesis instruction on that step,
|
||||
// concatenated onto the original `system` so the persona is preserved.
|
||||
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
|
||||
abortSignal: signal,
|
||||
onChunk: ({ chunk }) => {
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
|
||||
// output chunk means the stream is actively emitting bytes; track first
|
||||
// + most-recent activity timestamps.
|
||||
const now = Date.now();
|
||||
firstModelChunkAt ??= now;
|
||||
lastModelChunkAt = now;
|
||||
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
|
||||
// types — so this mirrors exactly what streams to the client.
|
||||
if (chunk.type === 'text-delta') inProgressText += chunk.text;
|
||||
},
|
||||
onStepFinish: (step) => {
|
||||
// The finished step's full text is now in `step.text`; fold it in and reset
|
||||
// the in-progress accumulator for the next step.
|
||||
capturedSteps.push(step as StepLike);
|
||||
inProgressText = '';
|
||||
// Step-granular durability (#183): persist this finished step (its text +
|
||||
// tool calls + tool RESULTS) the moment it ends, so a process death after
|
||||
// this point still recovers the step. Fire-and-forget but error-tolerant
|
||||
// (updateStreaming logs + swallows) — never throw into the stream.
|
||||
void updateStreaming();
|
||||
},
|
||||
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
|
||||
// baseline for Safari comparison.
|
||||
const diagNow = Date.now();
|
||||
this.logger.log(
|
||||
`AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
onError: async ({ error }) => {
|
||||
// NestJS Logger.error(message, stack?, context?): pass the real message
|
||||
// (with statusCode when present) + the stack string, not the Error
|
||||
// object, so the actual provider cause is clearly logged. Reuse the
|
||||
// shared formatter so provider error formatting stays unified.
|
||||
const e = error as { stack?: string };
|
||||
const errorText = describeProviderError(error, String(error));
|
||||
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
|
||||
// an error-terminated stream.
|
||||
const diagNow = Date.now();
|
||||
this.logger.warn(
|
||||
`AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
|
||||
);
|
||||
// Persist the PARTIAL answer streamed before the failure (text + any
|
||||
// finished tool steps) WITH the error in metadata, so the turn shows what
|
||||
// the user already saw plus the cause — not just a bare error.
|
||||
await persistAssistant(
|
||||
buildPartialAssistantRecord(
|
||||
capturedSteps,
|
||||
inProgressText,
|
||||
'error',
|
||||
errorText,
|
||||
),
|
||||
);
|
||||
await closeExternalClients();
|
||||
},
|
||||
onAbort: async ({ steps }) => {
|
||||
const partialChars =
|
||||
capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) +
|
||||
inProgressText.length;
|
||||
// Unlike onError/onFinish, this terminal path otherwise writes nothing, so
|
||||
// an aborted turn (client disconnect / proxy drop / stop()) would be
|
||||
// invisible in the logs. Log it (warn) so the abort is traceable.
|
||||
this.logger.warn(
|
||||
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
|
||||
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
|
||||
);
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
|
||||
// line — classifies the Safari drop.
|
||||
const diagNow = Date.now();
|
||||
this.logger.warn(
|
||||
`AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
|
||||
`steps=${steps.length}`,
|
||||
);
|
||||
await persistAssistant(
|
||||
buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
|
||||
);
|
||||
await closeExternalClients();
|
||||
},
|
||||
// Finalize the assistant row (#183): the upfront 'streaming' row is
|
||||
// UPDATEd to 'completed' with the turn's final text, cumulative usage and
|
||||
// full UIMessage parts. We pass the SDK `steps` (which carry the final
|
||||
// step's text) as the captured steps so metadata.parts matches the
|
||||
// pre-#183 onFinish record exactly; `inProgressText` is '' here (the last
|
||||
// step already finished). Final-step usage (usage.input+output) ≈ the
|
||||
// conversation's CURRENT context size, distinct from totalUsage.
|
||||
await finalizeAssistant(
|
||||
flushAssistant(steps as StepLike[], '', 'completed', {
|
||||
finishReason: finishReason as string,
|
||||
usage: totalUsage as StreamUsage,
|
||||
contextTokens:
|
||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
|
||||
undefined,
|
||||
}),
|
||||
);
|
||||
// Lifecycle: release the external MCP clients leased for this turn.
|
||||
await closeExternalClients();
|
||||
|
||||
// Generate the chat title for a freshly created chat AFTER the stream's
|
||||
// provider call has completed — NOT concurrently with it. The z.ai coding
|
||||
// endpoint stalls one of two concurrent requests to the same plan, which
|
||||
// black-holed the chat stream (~300s headers timeout) when title
|
||||
// generation raced it. Running it here (solo, fire-and-forget) avoids the
|
||||
// race; never block the turn on it, swallow any error.
|
||||
if (isNewChat && incomingText) {
|
||||
void this.generateTitle(chatId, workspace.id, incomingText).catch(
|
||||
(err) => {
|
||||
this.logger.warn(
|
||||
`Title generation failed: ${(err as Error)?.message ?? err}`,
|
||||
);
|
||||
},
|
||||
);
|
||||
}
|
||||
},
|
||||
onError: async ({ error }) => {
|
||||
// NestJS Logger.error(message, stack?, context?): pass the real message
|
||||
// (with statusCode when present) + the stack string, not the Error
|
||||
// object, so the actual provider cause is clearly logged. Reuse the
|
||||
// shared formatter so provider error formatting stays unified.
|
||||
const e = error as { stack?: string };
|
||||
const errorText = describeProviderError(error, String(error));
|
||||
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
|
||||
// an error-terminated stream.
|
||||
const diagNow = Date.now();
|
||||
this.logger.warn(
|
||||
`AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
|
||||
);
|
||||
// Finalize the PARTIAL answer streamed before the failure (text + any
|
||||
// finished tool steps) WITH the error in metadata, so the turn shows what
|
||||
// the user already saw plus the cause — not just a bare error. Status
|
||||
// 'error' (#183).
|
||||
await finalizeAssistant(
|
||||
flushAssistant(capturedSteps, inProgressText, 'error', {
|
||||
error: errorText,
|
||||
}),
|
||||
);
|
||||
await closeExternalClients();
|
||||
},
|
||||
onAbort: async ({ steps }) => {
|
||||
const partialChars =
|
||||
capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) +
|
||||
inProgressText.length;
|
||||
// Unlike onError/onFinish, this terminal path otherwise writes nothing, so
|
||||
// an aborted turn (client disconnect / proxy drop / stop()) would be
|
||||
// invisible in the logs. Log it (warn) so the abort is traceable.
|
||||
this.logger.warn(
|
||||
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
|
||||
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
|
||||
);
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
|
||||
// line — classifies the Safari drop.
|
||||
const diagNow = Date.now();
|
||||
this.logger.warn(
|
||||
`AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
|
||||
`steps=${steps.length}`,
|
||||
);
|
||||
await finalizeAssistant(
|
||||
flushAssistant(capturedSteps, inProgressText, 'aborted'),
|
||||
);
|
||||
await closeExternalClients();
|
||||
},
|
||||
});
|
||||
|
||||
// Drain the stream independently of the client socket so the turn always
|
||||
@@ -652,7 +744,10 @@ export class AiChatService {
|
||||
'punctuation at the end.',
|
||||
prompt: firstMessage.slice(0, 2000),
|
||||
});
|
||||
const title = text.trim().replace(/^["']|["']$/g, '').slice(0, 120);
|
||||
const title = text
|
||||
.trim()
|
||||
.replace(/^["']|["']$/g, '')
|
||||
.slice(0, 120);
|
||||
if (title) {
|
||||
await this.aiChatRepo.update(chatId, { title }, workspaceId);
|
||||
}
|
||||
@@ -974,6 +1069,82 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
|
||||
return { id: row.id, role, parts: parts as UIMessage['parts'] };
|
||||
}
|
||||
|
||||
/**
|
||||
* The persisted-row patch shape produced by {@link flushAssistant}. It is the
|
||||
* SAME shape the assistant repo insert/update consume (content + toolCalls +
|
||||
* metadata) plus the lifecycle `status` column added in #183.
|
||||
*/
|
||||
export interface AssistantFlush {
|
||||
content: string;
|
||||
toolCalls: unknown;
|
||||
metadata: Record<string, unknown>;
|
||||
status: 'streaming' | 'completed' | 'error' | 'aborted';
|
||||
}
|
||||
|
||||
/**
|
||||
* PURE assistant-row builder (#183 step-granular durability). Given the turn's
|
||||
* accumulated steps + the in-progress (not-yet-finished) text + the lifecycle
|
||||
* status, it returns the row patch to persist. The SAME path runs for the
|
||||
* upfront insert (empty steps, status 'streaming'), every per-step update, and
|
||||
* the terminal finalize (completed/error/aborted) — and a future background
|
||||
* worker can call it identically, so it must stay a pure function of its inputs
|
||||
* (NO `this`, no IO).
|
||||
*
|
||||
* `metadata.parts` is built by the EXACT same logic the old
|
||||
* buildPartialAssistantRecord used (assistantParts over finished steps, then the
|
||||
* in-progress text appended as a trailing text part), so rowToUiMessage /
|
||||
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
||||
* `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
|
||||
* only when provided/relevant, matching the pre-#183 onFinish/onError records.
|
||||
*/
|
||||
export function flushAssistant(
|
||||
capturedSteps: ReadonlyArray<StepLike> | undefined,
|
||||
inProgressText: string,
|
||||
status: 'streaming' | 'completed' | 'error' | 'aborted',
|
||||
extra?: {
|
||||
finishReason?: string;
|
||||
usage?: ChatStreamUsage | StreamUsage | undefined;
|
||||
contextTokens?: number;
|
||||
error?: string;
|
||||
},
|
||||
): AssistantFlush {
|
||||
const finished = capturedSteps ?? [];
|
||||
const stepsText = finished.map((s) => s.text ?? '').join('');
|
||||
const trailing = inProgressText ?? '';
|
||||
// assistantParts emits text parts only for FINISHED steps; append the
|
||||
// in-progress step's text (the partial answer cut off by an error/abort, or
|
||||
// simply not yet flushed mid-stream) as the last text part so the persisted
|
||||
// parts match what streamed to the client.
|
||||
const parts = assistantParts(finished, '') as unknown as Array<
|
||||
Record<string, unknown>
|
||||
>;
|
||||
if (trailing) parts.push({ type: 'text', text: trailing });
|
||||
|
||||
const metadata: Record<string, unknown> = {
|
||||
parts: parts as unknown as UIMessage['parts'],
|
||||
};
|
||||
// finishReason: prefer an explicit one; else derive a sensible value from the
|
||||
// terminal status (so onError/onAbort records keep their historical reason).
|
||||
if (extra?.finishReason) {
|
||||
metadata.finishReason = extra.finishReason;
|
||||
} else if (status === 'error' || status === 'aborted') {
|
||||
metadata.finishReason = status;
|
||||
}
|
||||
if (extra?.usage !== undefined) {
|
||||
metadata.usage =
|
||||
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
|
||||
}
|
||||
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
|
||||
if (extra?.error) metadata.error = extra.error;
|
||||
|
||||
return {
|
||||
content: stepsText + trailing,
|
||||
toolCalls: serializeSteps(finished),
|
||||
metadata,
|
||||
status,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the assistant-message record persisted on a partial/failed turn (the
|
||||
* streamText onError / onAbort paths). Captures the partial answer the user
|
||||
@@ -982,6 +1153,9 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
|
||||
* it is recorded in metadata.error so the cause shows in history; an aborted
|
||||
* turn passes none. Pure, so the partial-recording shape is unit-testable
|
||||
* without seaming streamText.
|
||||
*
|
||||
* Thin wrapper over {@link flushAssistant} (retained for the existing unit
|
||||
* tests and its historical `{ text, toolCalls, metadata }` shape).
|
||||
*/
|
||||
export function buildPartialAssistantRecord(
|
||||
steps: ReadonlyArray<StepLike> | undefined,
|
||||
@@ -989,24 +1163,13 @@ export function buildPartialAssistantRecord(
|
||||
finishReason: 'error' | 'aborted',
|
||||
errorText?: string,
|
||||
): { text: string; toolCalls: unknown; metadata: Record<string, unknown> } {
|
||||
const finished = steps ?? [];
|
||||
const stepsText = finished.map((s) => s.text ?? '').join('');
|
||||
const trailing = inProgressText ?? '';
|
||||
// assistantParts emits text parts only for FINISHED steps; append the
|
||||
// in-progress step's text (the answer cut off by the error) as the last text
|
||||
// part so the persisted parts match what streamed to the client.
|
||||
const parts = assistantParts(finished, '') as unknown as Array<
|
||||
Record<string, unknown>
|
||||
>;
|
||||
if (trailing) parts.push({ type: 'text', text: trailing });
|
||||
const flushed = flushAssistant(steps, inProgressText, finishReason, {
|
||||
error: errorText,
|
||||
});
|
||||
return {
|
||||
text: stepsText + trailing,
|
||||
toolCalls: serializeSteps(finished),
|
||||
metadata: {
|
||||
finishReason,
|
||||
parts: parts as unknown as UIMessage['parts'],
|
||||
...(errorText ? { error: errorText } : {}),
|
||||
},
|
||||
text: flushed.content,
|
||||
toolCalls: flushed.toolCalls,
|
||||
metadata: flushed.metadata,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
221
apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
Normal file
221
apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
Normal file
@@ -0,0 +1,221 @@
|
||||
import { buildChatMarkdown, normalizeLang } from './chat-markdown.util';
|
||||
import type { AiChatMessage } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
* normalizeLang: the client sends `i18n.language` — a FULL locale tag like
|
||||
* 'en-US' / 'ru-RU', NOT a bare 'en'/'ru'. A `@IsIn(['en','ru'])` DTO rejected
|
||||
* that with a 400 (caught in real-browser testing); the export now accepts any
|
||||
* string and normalizes here. Guards that regression.
|
||||
*/
|
||||
describe('normalizeLang', () => {
|
||||
it("maps any 'ru…' locale tag to ru", () => {
|
||||
expect(normalizeLang('ru')).toBe('ru');
|
||||
expect(normalizeLang('ru-RU')).toBe('ru');
|
||||
expect(normalizeLang('RU-ru')).toBe('ru');
|
||||
});
|
||||
|
||||
it('maps everything else (incl. region-qualified English) to en', () => {
|
||||
expect(normalizeLang('en')).toBe('en');
|
||||
expect(normalizeLang('en-US')).toBe('en');
|
||||
expect(normalizeLang('fr-FR')).toBe('en');
|
||||
expect(normalizeLang(undefined)).toBe('en');
|
||||
expect(normalizeLang('')).toBe('en');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Unit tests for the SERVER Markdown export (#183). Mirrors the coverage of the
|
||||
* (now-removed) client chat-markdown tests: heading/metadata, role labels, text
|
||||
* + tool blocks, token footers, the interrupted-turn note, and NULL-status
|
||||
* (legacy) rows. The export embeds a live `new Date().toISOString()` timestamp;
|
||||
* we never assert it, only the deterministic structure.
|
||||
*/
|
||||
|
||||
function row(partial: Partial<AiChatMessage>): AiChatMessage {
|
||||
return {
|
||||
id: partial.id ?? 'id',
|
||||
chatId: partial.chatId ?? 'chat-1',
|
||||
workspaceId: partial.workspaceId ?? 'ws-1',
|
||||
userId: partial.userId ?? null,
|
||||
role: partial.role ?? 'user',
|
||||
content: partial.content ?? null,
|
||||
toolCalls: partial.toolCalls ?? null,
|
||||
metadata: partial.metadata ?? null,
|
||||
status: partial.status ?? null,
|
||||
createdAt: partial.createdAt ?? ('2026-06-21T00:00:00.000Z' as never),
|
||||
updatedAt: partial.updatedAt ?? ('2026-06-21T00:00:00.000Z' as never),
|
||||
deletedAt: partial.deletedAt ?? null,
|
||||
} as AiChatMessage;
|
||||
}
|
||||
|
||||
describe('buildChatMarkdown (server) — structure', () => {
|
||||
it('emits the title heading, chat id and message count', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'My chat',
|
||||
chatId: 'chat-123',
|
||||
rows: [],
|
||||
});
|
||||
expect(md).toContain('# My chat');
|
||||
expect(md).toContain('- Chat ID: `chat-123`');
|
||||
expect(md).toContain('- Messages: 0');
|
||||
});
|
||||
|
||||
it('falls back to "Untitled chat" with no title (en)', () => {
|
||||
const md = buildChatMarkdown({ title: null, chatId: 'c', rows: [] });
|
||||
expect(md).toContain('# Untitled chat');
|
||||
});
|
||||
|
||||
it('localizes fixed labels with lang=ru (structure stays English)', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: null,
|
||||
chatId: 'c',
|
||||
lang: 'ru',
|
||||
rows: [row({ role: 'assistant', content: 'hi' })],
|
||||
});
|
||||
expect(md).toContain('# Без названия');
|
||||
expect(md).toContain('## 1. ИИ-агент');
|
||||
// Structural words remain English.
|
||||
expect(md).toContain('- Chat ID:');
|
||||
});
|
||||
|
||||
it('numbers messages and labels roles (You / AI agent)', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({ role: 'user', content: 'question' }),
|
||||
row({ role: 'assistant', content: 'answer' }),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('## 1. You');
|
||||
expect(md).toContain('question');
|
||||
expect(md).toContain('## 2. AI agent');
|
||||
expect(md).toContain('answer');
|
||||
});
|
||||
|
||||
it('renders a tool part with fenced input/output and the friendly label', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: 'done',
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: 'tool-getPage',
|
||||
state: 'output-available',
|
||||
input: { id: 'p1' },
|
||||
output: { title: 'Hello' },
|
||||
},
|
||||
{ type: 'text', text: 'done' },
|
||||
],
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('**Tool: Read page** (`getPage`) — done');
|
||||
expect(md).toContain('Input:');
|
||||
expect(md).toContain('"id": "p1"');
|
||||
expect(md).toContain('Output:');
|
||||
expect(md).toContain('"title": "Hello"');
|
||||
});
|
||||
|
||||
it('emits a token footer + total when usage is present', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: 'a',
|
||||
metadata: {
|
||||
usage: {
|
||||
inputTokens: 100,
|
||||
outputTokens: 20,
|
||||
totalTokens: 120,
|
||||
reasoningTokens: 8,
|
||||
},
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('- Total tokens: 120');
|
||||
expect(md).toContain(
|
||||
'_Tokens — in: 100, out: 20, reasoning: 8, total: 120_',
|
||||
);
|
||||
});
|
||||
|
||||
it('flags a still-streaming (interrupted) row', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({ role: 'assistant', content: 'partial', status: 'streaming' }),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('still being generated');
|
||||
});
|
||||
|
||||
it('does NOT flag a completed row', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [row({ role: 'assistant', content: 'final', status: 'completed' })],
|
||||
});
|
||||
expect(md).not.toContain('still being generated');
|
||||
});
|
||||
|
||||
it('renders a legacy NULL-status row (no parts) from plain content', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({ role: 'assistant', content: 'legacy answer', status: null }),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('legacy answer');
|
||||
expect(md).not.toContain('still being generated');
|
||||
});
|
||||
|
||||
it('renders a persisted error', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
status: 'error',
|
||||
metadata: { error: '401: Unauthorized' } as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('**⚠️ Error:** 401: Unauthorized');
|
||||
});
|
||||
|
||||
it('escapes embedded triple-backtick fences with a longer delimiter', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: 'x',
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: 'tool-getPage',
|
||||
state: 'output-available',
|
||||
output: '```inner```',
|
||||
},
|
||||
],
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
// A 4-backtick fence wraps content that itself contains a 3-backtick run.
|
||||
expect(md).toContain('````');
|
||||
});
|
||||
});
|
||||
296
apps/server/src/core/ai-chat/chat-markdown.util.ts
Normal file
296
apps/server/src/core/ai-chat/chat-markdown.util.ts
Normal file
@@ -0,0 +1,296 @@
|
||||
/**
|
||||
* Server-side Markdown export for an AI agent chat (#183). The DB is the single
|
||||
* source of truth: this renders a chat purely from its persisted message rows
|
||||
* (`AiChatMessage[]` — role / content / metadata.parts / toolCalls / usage).
|
||||
* Because the assistant row is now persisted UPFRONT and updated per step, an
|
||||
* interrupted turn is included up to its last finished step.
|
||||
*
|
||||
* Ported from the client `utils/chat-markdown.ts`. It is a PURE function (apart
|
||||
* from `new Date()` for the export timestamp), so it is straightforward to
|
||||
* unit-test and a future background worker can reuse it.
|
||||
*
|
||||
* Only a few fixed role/tool labels are localized via the `lang` param; the
|
||||
* structural document words (Input/Output/Error/Tokens/...) stay English because
|
||||
* the output is a technical artifact.
|
||||
*/
|
||||
|
||||
import type { AiChatMessage } from '@docmost/db/types/entity.types';
|
||||
|
||||
/** Supported export label languages. Defaults to English. */
|
||||
export type ExportLang = 'en' | 'ru';
|
||||
|
||||
/**
|
||||
* Normalize an arbitrary client locale code to a supported export language. The
|
||||
* client sends `i18n.language`, which is a FULL locale tag (e.g. `en-US`,
|
||||
* `ru-RU`), not a bare `en`/`ru` — so match on the language subtag and fall back
|
||||
* to English for anything non-Russian.
|
||||
*/
|
||||
export function normalizeLang(lang?: string): ExportLang {
|
||||
return lang?.toLowerCase().startsWith('ru') ? 'ru' : 'en';
|
||||
}
|
||||
|
||||
/** A single AI SDK UIMessage part (text part or a tool part). */
|
||||
interface ExportPart {
|
||||
type: string;
|
||||
text?: string;
|
||||
state?: string;
|
||||
toolName?: string;
|
||||
input?: unknown;
|
||||
output?: unknown;
|
||||
errorText?: string;
|
||||
}
|
||||
|
||||
/** Authoritative per-turn usage the server attaches to a message row. */
|
||||
interface UsageLike {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
/** Localized label table. Keep the keys identical to the client's i18n keys so
|
||||
* the two exports read the same. Only role + tool-action labels are localized;
|
||||
* everything structural is an English constant in the renderer. */
|
||||
const LABELS: Record<
|
||||
ExportLang,
|
||||
{
|
||||
untitled: string;
|
||||
aiAgent: string;
|
||||
you: string;
|
||||
tools: Record<string, string>;
|
||||
ranTool: (name: string) => string;
|
||||
stillGenerating: string;
|
||||
}
|
||||
> = {
|
||||
en: {
|
||||
untitled: 'Untitled chat',
|
||||
aiAgent: 'AI agent',
|
||||
you: 'You',
|
||||
tools: {
|
||||
searchPages: 'Searched pages',
|
||||
getPage: 'Read page',
|
||||
createPage: 'Created page',
|
||||
updatePageContent: 'Updated page',
|
||||
renamePage: 'Renamed page',
|
||||
movePage: 'Moved page',
|
||||
deletePage: 'Deleted page (to trash)',
|
||||
createComment: 'Commented',
|
||||
resolveComment: 'Resolved comment',
|
||||
},
|
||||
ranTool: (name) => `Ran tool ${name}`,
|
||||
stillGenerating:
|
||||
'This message is still being generated — the export captured a partial, in-progress response.',
|
||||
},
|
||||
ru: {
|
||||
untitled: 'Без названия',
|
||||
aiAgent: 'ИИ-агент',
|
||||
you: 'Вы',
|
||||
tools: {
|
||||
searchPages: 'Искал по страницам',
|
||||
getPage: 'Прочитал страницу',
|
||||
createPage: 'Создал страницу',
|
||||
updatePageContent: 'Обновил страницу',
|
||||
renamePage: 'Переименовал страницу',
|
||||
movePage: 'Переместил страницу',
|
||||
deletePage: 'Удалил страницу (в корзину)',
|
||||
createComment: 'Прокомментировал',
|
||||
resolveComment: 'Закрыл комментарий',
|
||||
},
|
||||
ranTool: (name) => `Выполнил инструмент ${name}`,
|
||||
stillGenerating:
|
||||
'Это сообщение всё ещё генерируется — экспорт захватил частичный, незавершённый ответ.',
|
||||
},
|
||||
};
|
||||
|
||||
/** True for AI SDK tool parts (static `tool-*` or `dynamic-tool`). */
|
||||
function isToolPart(type: string): boolean {
|
||||
return type.startsWith('tool-') || type === 'dynamic-tool';
|
||||
}
|
||||
|
||||
/** Extract the tool name from a part `type` of `tool-${name}` (or dynamic). */
|
||||
function getToolName(part: ExportPart): string {
|
||||
if (part.type === 'dynamic-tool') return part.toolName ?? '';
|
||||
return part.type.startsWith('tool-')
|
||||
? part.type.slice('tool-'.length)
|
||||
: part.type;
|
||||
}
|
||||
|
||||
/** Map an AI SDK tool-part state to the 3 states the action-log renders. */
|
||||
function toolRunState(state: string | undefined): 'running' | 'done' | 'error' {
|
||||
if (state === 'output-error' || state === 'output-denied') return 'error';
|
||||
if (state === 'output-available') return 'done';
|
||||
return 'running';
|
||||
}
|
||||
|
||||
/** Resolve a tool's friendly action-log label (localized) from its name. */
|
||||
function toolLabel(name: string, lang: ExportLang): string {
|
||||
return LABELS[lang].tools[name] ?? LABELS[lang].ranTool(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stringify an arbitrary tool input/output value for a fenced block. Strings
|
||||
* pass through as-is; everything else is pretty-printed JSON, falling back to
|
||||
* `String(value)` if serialization throws (e.g. a circular structure).
|
||||
*/
|
||||
function stringify(value: unknown): string {
|
||||
if (typeof value === 'string') return value;
|
||||
try {
|
||||
return JSON.stringify(value, null, 2);
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap `code` in a fenced code block whose backtick delimiter is LONGER than the
|
||||
* longest backtick run inside the content, so embedded backticks (or a literal
|
||||
* ``` fence) never break out of the block. Minimum 3 backticks.
|
||||
*/
|
||||
function fence(code: string, lang = ''): string {
|
||||
const runs: string[] = code.match(/`+/g) ?? [];
|
||||
const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
|
||||
const delim = '`'.repeat(Math.max(3, longest + 1));
|
||||
return `${delim}${lang}\n${code}\n${delim}`;
|
||||
}
|
||||
|
||||
/** Per-row token count, mirroring the header sum in the client window. */
|
||||
function rowTokens(usage: UsageLike): number {
|
||||
return (
|
||||
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
|
||||
);
|
||||
}
|
||||
|
||||
/** Render one message's UIMessage parts into an array of Markdown blocks
|
||||
* (text blocks + tool blocks). Mirrors the client renderer / MessageItem. */
|
||||
function renderMessageParts(parts: ExportPart[], lang: ExportLang): string[] {
|
||||
const out: string[] = [];
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.type === 'text') {
|
||||
const text = (part.text ?? '').trim();
|
||||
if (text.length > 0) out.push(text);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isToolPart(part.type)) continue;
|
||||
|
||||
const name = getToolName(part);
|
||||
const label = toolLabel(name, lang);
|
||||
const state = toolRunState(part.state);
|
||||
|
||||
const toolLines: string[] = [`**Tool: ${label}** (\`${name}\`) — ${state}`];
|
||||
if (part.input !== undefined) {
|
||||
toolLines.push('Input:');
|
||||
toolLines.push(fence(stringify(part.input), 'json'));
|
||||
}
|
||||
if (part.output !== undefined) {
|
||||
toolLines.push('Output:');
|
||||
toolLines.push(fence(stringify(part.output), 'json'));
|
||||
}
|
||||
if (part.errorText) {
|
||||
toolLines.push(`**Error:** ${part.errorText}`);
|
||||
}
|
||||
out.push(toolLines.join('\n\n'));
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
|
||||
* single text part built from the plain-text content (mirrors rowToUiMessage). */
|
||||
function rowParts(row: AiChatMessage): ExportPart[] {
|
||||
const meta = (row.metadata ?? {}) as { parts?: ExportPart[] };
|
||||
return Array.isArray(meta.parts) && meta.parts.length > 0
|
||||
? meta.parts
|
||||
: [{ type: 'text', text: row.content ?? '' }];
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a chat to a Markdown string from its persisted rows. Source = DB
|
||||
* ONLY (no live client state). A row whose `status` is still 'streaming' is an
|
||||
* interrupted turn that the export captured mid-flight; it is rendered up to its
|
||||
* last finished step and flagged "still generating".
|
||||
*/
|
||||
export function buildChatMarkdown(args: {
|
||||
title: string | null;
|
||||
chatId: string;
|
||||
rows: AiChatMessage[];
|
||||
// Accepts a full client locale tag (e.g. 'en-US'/'ru-RU'); normalized below.
|
||||
lang?: string;
|
||||
}): string {
|
||||
const { title, chatId, rows } = args;
|
||||
const lang: ExportLang = normalizeLang(args.lang);
|
||||
const L = LABELS[lang];
|
||||
const blocks: string[] = [];
|
||||
|
||||
const heading = (title ?? '').trim() || L.untitled;
|
||||
blocks.push(`# ${heading}`);
|
||||
|
||||
const usageOf = (row: AiChatMessage): UsageLike | undefined => {
|
||||
const meta = (row.metadata ?? {}) as { usage?: UsageLike };
|
||||
return meta.usage;
|
||||
};
|
||||
const errorOf = (row: AiChatMessage): string | undefined => {
|
||||
const meta = (row.metadata ?? {}) as { error?: string };
|
||||
return meta.error ?? undefined;
|
||||
};
|
||||
|
||||
// Metadata bullet list. Total tokens is only shown when there is a sum.
|
||||
const totalTokens = rows.reduce((sum, row) => {
|
||||
const usage = usageOf(row);
|
||||
return usage ? sum + rowTokens(usage) : sum;
|
||||
}, 0);
|
||||
const meta = [
|
||||
`- Chat ID: \`${chatId}\``,
|
||||
`- Exported: ${new Date().toISOString()}`,
|
||||
`- Messages: ${rows.length}`,
|
||||
];
|
||||
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
|
||||
blocks.push(meta.join('\n'));
|
||||
|
||||
rows.forEach((row, index) => {
|
||||
blocks.push('---');
|
||||
|
||||
const roleLabel = row.role === 'assistant' ? L.aiAgent : L.you;
|
||||
blocks.push(`## ${index + 1}. ${roleLabel}`);
|
||||
|
||||
// Created-at kept in source as an HTML comment (out of the rendered prose).
|
||||
if (row.createdAt) {
|
||||
const iso =
|
||||
row.createdAt instanceof Date
|
||||
? row.createdAt.toISOString()
|
||||
: String(row.createdAt);
|
||||
blocks.push(`<!-- ${iso} -->`);
|
||||
}
|
||||
|
||||
blocks.push(...renderMessageParts(rowParts(row), lang));
|
||||
|
||||
// A still-'streaming' row is an interrupted/in-progress turn captured by the
|
||||
// export; record that so the partial answer is not mistaken for complete.
|
||||
if (row.status === 'streaming') {
|
||||
blocks.push(`_⏳ ${L.stillGenerating}_`);
|
||||
}
|
||||
|
||||
const error = errorOf(row);
|
||||
if (error) {
|
||||
blocks.push(`**⚠️ Error:** ${error}`);
|
||||
}
|
||||
|
||||
const usage = usageOf(row);
|
||||
if (usage) {
|
||||
const total = usage.totalTokens ?? rowTokens(usage);
|
||||
const reasoning =
|
||||
usage.reasoningTokens && usage.reasoningTokens > 0
|
||||
? `, reasoning: ${usage.reasoningTokens}`
|
||||
: '';
|
||||
blocks.push(
|
||||
`_Tokens — in: ${usage.inputTokens ?? '?'}, out: ${
|
||||
usage.outputTokens ?? '?'
|
||||
}${reasoning}, total: ${total}_`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// Blank line between blocks so the Markdown renders cleanly.
|
||||
return blocks.join('\n\n');
|
||||
}
|
||||
@@ -26,3 +26,17 @@ export class GetChatMessagesDto {
|
||||
@IsString()
|
||||
cursor?: string;
|
||||
}
|
||||
|
||||
/** Export a chat to Markdown (#183). `lang` localizes the few fixed
|
||||
* role/tool-action labels; defaults to English server-side. */
|
||||
export class ExportChatDto {
|
||||
@IsString()
|
||||
chatId: string;
|
||||
|
||||
// A full client locale tag (e.g. 'en-US', 'ru-RU') — normalized server-side to
|
||||
// a supported export language (see normalizeLang). Accept any string so a
|
||||
// region-qualified locale is not rejected (the 400 that broke the real client).
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
lang?: string;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
import { type Kysely } from 'kysely';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
// Step-granular durability for the assistant turn (#183). The assistant row is
|
||||
// now created UPFRONT (status 'streaming') and UPDATEd as each step completes,
|
||||
// so a process death mid-turn no longer loses the whole answer. The column is
|
||||
// NULLABLE on purpose: rows written before this migration carry NULL, which the
|
||||
// app treats as 'completed' (a settled, pre-status message). Values written by
|
||||
// the app: 'streaming' | 'completed' | 'error' | 'aborted'.
|
||||
await db.schema
|
||||
.alterTable('ai_chat_messages')
|
||||
.addColumn('status', 'text', (col) => col)
|
||||
.execute();
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await db.schema.alterTable('ai_chat_messages').dropColumn('status').execute();
|
||||
}
|
||||
@@ -25,6 +25,7 @@ export class AiChatMessageRepo {
|
||||
'content',
|
||||
'toolCalls',
|
||||
'metadata',
|
||||
'status',
|
||||
'createdAt',
|
||||
'updatedAt',
|
||||
'deletedAt',
|
||||
@@ -60,6 +61,26 @@ export class AiChatMessageRepo {
|
||||
});
|
||||
}
|
||||
|
||||
// Load ALL (non-deleted) messages of a chat in ascending chronological order
|
||||
// (oldest -> newest), unpaginated. Used by the server-side Markdown export
|
||||
// (#183), where the DB is the single source of truth and the whole transcript
|
||||
// must be rendered in one pass (findByChat is cursor-paginated and would only
|
||||
// return the first page).
|
||||
async findAllByChat(
|
||||
chatId: string,
|
||||
workspaceId: string,
|
||||
): Promise<AiChatMessage[]> {
|
||||
return this.db
|
||||
.selectFrom('aiChatMessages')
|
||||
.select(this.baseFields)
|
||||
.where('chatId', '=', chatId)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.where('deletedAt', 'is', null)
|
||||
.orderBy('createdAt', 'asc')
|
||||
.orderBy('id', 'asc')
|
||||
.execute();
|
||||
}
|
||||
|
||||
// Load the most RECENT `limit` messages for a chat and return them in
|
||||
// ascending chronological order (oldest -> newest), as the model expects.
|
||||
// `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses
|
||||
@@ -96,4 +117,50 @@ export class AiChatMessageRepo {
|
||||
.returning(this.baseFields)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a single message in place by id + workspace (#183 step-granular
|
||||
* durability). The assistant row is created UPFRONT (status 'streaming') and
|
||||
* patched as each step completes, then finalized once on the terminal status.
|
||||
* `updatedAt` is always bumped. Returns the updated row (baseFields) or
|
||||
* undefined when no row matched (e.g. a foreign workspace / deleted row).
|
||||
*/
|
||||
async update(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
patch: Partial<{
|
||||
content: string | null;
|
||||
toolCalls: unknown;
|
||||
metadata: unknown;
|
||||
status: string | null;
|
||||
}>,
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<AiChatMessage | undefined> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
return db
|
||||
.updateTable('aiChatMessages')
|
||||
.set({ ...(patch as Record<string, unknown>), updatedAt: new Date() })
|
||||
.where('id', '=', id)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.returning(this.baseFields)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Crash-recovery sweep (#183): flip every assistant row still left in the
|
||||
* 'streaming' state (a turn that died mid-write before reaching a terminal
|
||||
* status) to 'aborted'. Run once on server start. Returns the number of rows
|
||||
* swept so the caller can log it. Workspace-wide on purpose — a crash can have
|
||||
* dangling streaming rows across any workspace.
|
||||
*/
|
||||
async sweepStreaming(trx?: KyselyTransaction): Promise<number> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
const rows = await db
|
||||
.updateTable('aiChatMessages')
|
||||
.set({ status: 'aborted', updatedAt: new Date() })
|
||||
.where('status', '=', 'streaming')
|
||||
.returning('id')
|
||||
.execute();
|
||||
return rows.length;
|
||||
}
|
||||
}
|
||||
|
||||
4
apps/server/src/database/types/db.d.ts
vendored
4
apps/server/src/database/types/db.d.ts
vendored
@@ -620,6 +620,10 @@ export interface AiChatMessages {
|
||||
content: string | null;
|
||||
toolCalls: Json | null;
|
||||
metadata: Json | null;
|
||||
// Turn lifecycle status (#183): 'streaming' | 'completed' | 'error' |
|
||||
// 'aborted'. NULL on rows written before the status column existed; the app
|
||||
// treats NULL as 'completed' (a settled, pre-status message).
|
||||
status: string | null;
|
||||
tsv: string | null;
|
||||
createdAt: Generated<Timestamp>;
|
||||
updatedAt: Generated<Timestamp>;
|
||||
|
||||
150
apps/server/test/integration/ai-chat-message-status.int-spec.ts
Normal file
150
apps/server/test/integration/ai-chat-message-status.int-spec.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
import { Kysely } from 'kysely';
|
||||
import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
|
||||
import {
|
||||
getTestDb,
|
||||
destroyTestDb,
|
||||
createWorkspace,
|
||||
createUser,
|
||||
createChat,
|
||||
createMessage,
|
||||
} from './db';
|
||||
|
||||
/**
|
||||
* Integration coverage for the #183 step-granular durability primitives on
|
||||
* AiChatMessageRepo: `update` (in-place patch by id+workspace, bumps updatedAt,
|
||||
* returns the row) and `sweepStreaming` (crash recovery: flip dangling
|
||||
* 'streaming' rows to 'aborted'). Real SQL against docmost_test, not a mock.
|
||||
*/
|
||||
describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => {
|
||||
let db: Kysely<any>;
|
||||
let repo: AiChatMessageRepo;
|
||||
let workspaceId: string;
|
||||
let otherWorkspaceId: string;
|
||||
let userId: string;
|
||||
let chatId: string;
|
||||
let otherChatId: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
db = getTestDb();
|
||||
repo = new AiChatMessageRepo(db as any);
|
||||
workspaceId = (await createWorkspace(db)).id;
|
||||
otherWorkspaceId = (await createWorkspace(db)).id;
|
||||
userId = (await createUser(db, workspaceId)).id;
|
||||
chatId = (await createChat(db, { workspaceId, creatorId: userId })).id;
|
||||
const otherUser = await createUser(db, otherWorkspaceId);
|
||||
otherChatId = (
|
||||
await createChat(db, {
|
||||
workspaceId: otherWorkspaceId,
|
||||
creatorId: otherUser.id,
|
||||
})
|
||||
).id;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await destroyTestDb();
|
||||
});
|
||||
|
||||
it('update patches content/status/metadata and bumps updatedAt', async () => {
|
||||
const seeded = await repo.insert({
|
||||
chatId,
|
||||
workspaceId,
|
||||
userId,
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
status: 'streaming',
|
||||
metadata: { parts: [] } as never,
|
||||
});
|
||||
const before = seeded.updatedAt;
|
||||
// Ensure a measurable timestamp delta.
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
|
||||
const updated = await repo.update(seeded.id, workspaceId, {
|
||||
content: 'final answer',
|
||||
status: 'completed',
|
||||
metadata: { parts: [{ type: 'text', text: 'final answer' }] },
|
||||
});
|
||||
|
||||
expect(updated).toBeDefined();
|
||||
expect(updated!.content).toBe('final answer');
|
||||
expect(updated!.status).toBe('completed');
|
||||
expect((updated!.metadata as any).parts).toHaveLength(1);
|
||||
expect(new Date(updated!.updatedAt).getTime()).toBeGreaterThanOrEqual(
|
||||
new Date(before).getTime(),
|
||||
);
|
||||
});
|
||||
|
||||
it('update is workspace-scoped: a foreign workspace id matches nothing', async () => {
|
||||
const seeded = await repo.insert({
|
||||
chatId,
|
||||
workspaceId,
|
||||
userId,
|
||||
role: 'assistant',
|
||||
content: 'orig',
|
||||
status: 'streaming',
|
||||
});
|
||||
const res = await repo.update(seeded.id, otherWorkspaceId, {
|
||||
status: 'completed',
|
||||
});
|
||||
expect(res).toBeUndefined();
|
||||
// The row in the real workspace is untouched.
|
||||
const rows = await repo.findAllByChat(chatId, workspaceId);
|
||||
const stillThere = rows.find((r) => r.id === seeded.id);
|
||||
expect(stillThere!.status).toBe('streaming');
|
||||
// Clean up so it does not pollute the sweep test below.
|
||||
await repo.update(seeded.id, workspaceId, { status: 'completed' });
|
||||
});
|
||||
|
||||
it('sweepStreaming flips dangling streaming rows to aborted and counts them', async () => {
|
||||
// Two dangling streaming rows in our workspace + one in another workspace.
|
||||
const a = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
const b = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
// A settled row must NOT be touched.
|
||||
const done = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'completed',
|
||||
});
|
||||
// A legacy NULL-status row must NOT be touched.
|
||||
const legacy = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: null,
|
||||
});
|
||||
await createMessage(db, {
|
||||
workspaceId: otherWorkspaceId,
|
||||
chatId: otherChatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
|
||||
const swept = await repo.sweepStreaming();
|
||||
// At least the 3 streaming rows we created (2 here + 1 in the other ws).
|
||||
expect(swept).toBeGreaterThanOrEqual(3);
|
||||
|
||||
const rows = await repo.findAllByChat(chatId, workspaceId);
|
||||
const byId = new Map(rows.map((r) => [r.id, r]));
|
||||
expect(byId.get(a.id)!.status).toBe('aborted');
|
||||
expect(byId.get(b.id)!.status).toBe('aborted');
|
||||
expect(byId.get(done.id)!.status).toBe('completed');
|
||||
expect(byId.get(legacy.id)!.status).toBeNull();
|
||||
|
||||
// Idempotent: a second sweep finds nothing left in our seeded set.
|
||||
const again = await repo.sweepStreaming();
|
||||
const rows2 = await repo.findAllByChat(chatId, workspaceId);
|
||||
// Our two rows stay aborted regardless of `again`'s global count.
|
||||
expect(rows2.find((r) => r.id === a.id)!.status).toBe('aborted');
|
||||
expect(again).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
});
|
||||
@@ -104,7 +104,8 @@ export async function createWorkspace(
|
||||
name: overrides.name ?? `ws-${suffix}`,
|
||||
// hostname is uniquely constrained; keep it unique per workspace.
|
||||
hostname: `host-${suffix}`,
|
||||
settings: overrides.settings === undefined ? null : (overrides.settings as any),
|
||||
settings:
|
||||
overrides.settings === undefined ? null : (overrides.settings as any),
|
||||
})
|
||||
.returning(['id', 'settings'])
|
||||
.executeTakeFirstOrThrow();
|
||||
@@ -226,3 +227,33 @@ export async function createChat(
|
||||
.executeTakeFirstOrThrow();
|
||||
return { id: row.id as string };
|
||||
}
|
||||
|
||||
export async function createMessage(
|
||||
db: Kysely<any>,
|
||||
args: {
|
||||
workspaceId: string;
|
||||
chatId: string;
|
||||
userId?: string | null;
|
||||
role?: string;
|
||||
content?: string | null;
|
||||
status?: string | null;
|
||||
metadata?: unknown;
|
||||
},
|
||||
): Promise<{ id: string }> {
|
||||
const id = randomUUID();
|
||||
const row = await db
|
||||
.insertInto('aiChatMessages')
|
||||
.values({
|
||||
id,
|
||||
workspaceId: args.workspaceId,
|
||||
chatId: args.chatId,
|
||||
userId: args.userId ?? null,
|
||||
role: args.role ?? 'assistant',
|
||||
content: args.content ?? null,
|
||||
status: args.status ?? null,
|
||||
metadata: (args.metadata ?? null) as any,
|
||||
})
|
||||
.returning(['id'])
|
||||
.executeTakeFirstOrThrow();
|
||||
return { id: row.id as string };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user