Files
gitmost/apps/server/src/core/ai-chat/chat-markdown.util.ts
claude code agent 227 ea61c96a7c refactor(review): address PR #186 review (#183 — recency sweep, #174 export, tests, cleanups)
15-point review of the persistent-history PR. Architecture decisions: crash
recovery = recency threshold; tool-label duplication = leave as-is.

Must-fix:
1. Boot-sweep bounded by recency. sweepStreaming now also requires
   `updatedAt < now() - SWEEP_STREAMING_STALE_MS` (10 min), so a fresh replica's
   startup sweep can't abort a turn another replica is actively streaming
   (multi-instance deploy). Int-spec: a FRESH 'streaming' row is NOT swept, a
   STALE one IS.
2. Restore export during the FIRST streaming turn of a new chat (#174). The
   server chatId is now adopted EARLY (in-place, on the start-chunk metadata) via
   a new `onServerChatId` callback wired through use-chat-session → chat-thread,
   so `activeChatId` is set at turn start and the Copy button is live mid-first-
   turn (canExport = !!activeChatId). Hook tests for early/in-place/no-op adopt.
3. Cover finalizeAssistant's fallback-insert branch: extracted pure
   `planFinalizeAssistant(assistantId)` (update when id present, insert when the
   upfront insert failed) + a dispatch harness test for both arms.

Tests: onModuleInit lifecycle spec (sweep called; throw → resolves + warns);
int-spec updatedAt assertion → toBeGreaterThan.

Cleanups: cap findAllByChat at 5000 rows; upfront-insert-failure log carries
chatId+workspaceId; removed the now-dead buildPartialAssistantRecord (only the
spec consumed it; shapes still pinned by the flushAssistant suite); controller
passes `lang: dto.lang` (normalizeLang handles undefined); dropped a no-op
`?? undefined` in errorOf; documented the content-column semantics change
(concatenated step text, UI renders from metadata.parts); CHANGELOG [Unreleased]
entry (#183, #174); reworded the stale LABELS parity comment.

Verified: server build + 323 ai-chat unit + 5 integration; client tsc + 160
ai-chat unit; prettier clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-25 11:53:25 +03:00

300 lines
10 KiB
TypeScript

/**
* Server-side Markdown export for an AI agent chat (#183). The DB is the single
* source of truth: this renders a chat purely from its persisted message rows
* (`AiChatMessage[]` — role / content / metadata.parts / toolCalls / usage).
* Because the assistant row is now persisted UPFRONT and updated per step, an
* interrupted turn is included up to its last finished step.
*
* Ported from the client `utils/chat-markdown.ts`. It is a PURE function (apart
* from `new Date()` for the export timestamp), so it is straightforward to
* unit-test and a future background worker can reuse it.
*
* Only a few fixed role/tool labels are localized via the `lang` param; the
* structural document words (Input/Output/Error/Tokens/...) stay English because
* the output is a technical artifact.
*/
import type { AiChatMessage } from '@docmost/db/types/entity.types';
/** Supported export label languages. Defaults to English. */
export type ExportLang = 'en' | 'ru';
/**
* Normalize an arbitrary client locale code to a supported export language. The
* client sends `i18n.language`, which is a FULL locale tag (e.g. `en-US`,
* `ru-RU`), not a bare `en`/`ru` — so match on the language subtag and fall back
* to English for anything non-Russian.
*/
export function normalizeLang(lang?: string): ExportLang {
return lang?.toLowerCase().startsWith('ru') ? 'ru' : 'en';
}
/** A single AI SDK UIMessage part (text part or a tool part). */
interface ExportPart {
type: string;
text?: string;
state?: string;
toolName?: string;
input?: unknown;
output?: unknown;
errorText?: string;
}
/** Authoritative per-turn usage the server attaches to a message row. */
interface UsageLike {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}
/** Localized label table. The client-side Markdown builder was removed by #183
* (the export is now server-side only), so this no longer mirrors a second
* exporter — instead the tool-action labels are kept in parity with the
* on-screen action-log labels in the client's `tool-parts.tsx` (`toolLabelKey`)
* so the export reads the same as the UI. Only role + tool-action labels are
* localized; everything structural is an English constant in the renderer. */
const LABELS: Record<
ExportLang,
{
untitled: string;
aiAgent: string;
you: string;
tools: Record<string, string>;
ranTool: (name: string) => string;
stillGenerating: string;
}
> = {
en: {
untitled: 'Untitled chat',
aiAgent: 'AI agent',
you: 'You',
tools: {
searchPages: 'Searched pages',
getPage: 'Read page',
createPage: 'Created page',
updatePageContent: 'Updated page',
renamePage: 'Renamed page',
movePage: 'Moved page',
deletePage: 'Deleted page (to trash)',
createComment: 'Commented',
resolveComment: 'Resolved comment',
},
ranTool: (name) => `Ran tool ${name}`,
stillGenerating:
'This message is still being generated — the export captured a partial, in-progress response.',
},
ru: {
untitled: 'Без названия',
aiAgent: 'ИИ-агент',
you: 'Вы',
tools: {
searchPages: 'Искал по страницам',
getPage: 'Прочитал страницу',
createPage: 'Создал страницу',
updatePageContent: 'Обновил страницу',
renamePage: 'Переименовал страницу',
movePage: 'Переместил страницу',
deletePage: 'Удалил страницу (в корзину)',
createComment: 'Прокомментировал',
resolveComment: 'Закрыл комментарий',
},
ranTool: (name) => `Выполнил инструмент ${name}`,
stillGenerating:
'Это сообщение всё ещё генерируется — экспорт захватил частичный, незавершённый ответ.',
},
};
/** True for AI SDK tool parts (static `tool-*` or `dynamic-tool`). */
function isToolPart(type: string): boolean {
return type.startsWith('tool-') || type === 'dynamic-tool';
}
/** Extract the tool name from a part `type` of `tool-${name}` (or dynamic). */
function getToolName(part: ExportPart): string {
if (part.type === 'dynamic-tool') return part.toolName ?? '';
return part.type.startsWith('tool-')
? part.type.slice('tool-'.length)
: part.type;
}
/** Map an AI SDK tool-part state to the 3 states the action-log renders. */
function toolRunState(state: string | undefined): 'running' | 'done' | 'error' {
if (state === 'output-error' || state === 'output-denied') return 'error';
if (state === 'output-available') return 'done';
return 'running';
}
/** Resolve a tool's friendly action-log label (localized) from its name. */
function toolLabel(name: string, lang: ExportLang): string {
return LABELS[lang].tools[name] ?? LABELS[lang].ranTool(name);
}
/**
* Stringify an arbitrary tool input/output value for a fenced block. Strings
* pass through as-is; everything else is pretty-printed JSON, falling back to
* `String(value)` if serialization throws (e.g. a circular structure).
*/
function stringify(value: unknown): string {
if (typeof value === 'string') return value;
try {
return JSON.stringify(value, null, 2);
} catch {
return String(value);
}
}
/**
* Wrap `code` in a fenced code block whose backtick delimiter is LONGER than the
* longest backtick run inside the content, so embedded backticks (or a literal
* ``` fence) never break out of the block. Minimum 3 backticks.
*/
function fence(code: string, lang = ''): string {
const runs: string[] = code.match(/`+/g) ?? [];
const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
const delim = '`'.repeat(Math.max(3, longest + 1));
return `${delim}${lang}\n${code}\n${delim}`;
}
/** Per-row token count, mirroring the header sum in the client window. */
function rowTokens(usage: UsageLike): number {
return (
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
);
}
/** Render one message's UIMessage parts into an array of Markdown blocks
* (text blocks + tool blocks). Mirrors the client renderer / MessageItem. */
function renderMessageParts(parts: ExportPart[], lang: ExportLang): string[] {
const out: string[] = [];
for (const part of parts) {
if (part.type === 'text') {
const text = (part.text ?? '').trim();
if (text.length > 0) out.push(text);
continue;
}
if (!isToolPart(part.type)) continue;
const name = getToolName(part);
const label = toolLabel(name, lang);
const state = toolRunState(part.state);
const toolLines: string[] = [`**Tool: ${label}** (\`${name}\`) — ${state}`];
if (part.input !== undefined) {
toolLines.push('Input:');
toolLines.push(fence(stringify(part.input), 'json'));
}
if (part.output !== undefined) {
toolLines.push('Output:');
toolLines.push(fence(stringify(part.output), 'json'));
}
if (part.errorText) {
toolLines.push(`**Error:** ${part.errorText}`);
}
out.push(toolLines.join('\n\n'));
}
return out;
}
/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
* single text part built from the plain-text content (mirrors rowToUiMessage). */
function rowParts(row: AiChatMessage): ExportPart[] {
const meta = (row.metadata ?? {}) as { parts?: ExportPart[] };
return Array.isArray(meta.parts) && meta.parts.length > 0
? meta.parts
: [{ type: 'text', text: row.content ?? '' }];
}
/**
* Serialize a chat to a Markdown string from its persisted rows. Source = DB
* ONLY (no live client state). A row whose `status` is still 'streaming' is an
* interrupted turn that the export captured mid-flight; it is rendered up to its
* last finished step and flagged "still generating".
*/
export function buildChatMarkdown(args: {
title: string | null;
chatId: string;
rows: AiChatMessage[];
// Accepts a full client locale tag (e.g. 'en-US'/'ru-RU'); normalized below.
lang?: string;
}): string {
const { title, chatId, rows } = args;
const lang: ExportLang = normalizeLang(args.lang);
const L = LABELS[lang];
const blocks: string[] = [];
const heading = (title ?? '').trim() || L.untitled;
blocks.push(`# ${heading}`);
const usageOf = (row: AiChatMessage): UsageLike | undefined => {
const meta = (row.metadata ?? {}) as { usage?: UsageLike };
return meta.usage;
};
const errorOf = (row: AiChatMessage): string | undefined => {
const meta = (row.metadata ?? {}) as { error?: string };
return meta.error;
};
// Metadata bullet list. Total tokens is only shown when there is a sum.
const totalTokens = rows.reduce((sum, row) => {
const usage = usageOf(row);
return usage ? sum + rowTokens(usage) : sum;
}, 0);
const meta = [
`- Chat ID: \`${chatId}\``,
`- Exported: ${new Date().toISOString()}`,
`- Messages: ${rows.length}`,
];
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
blocks.push(meta.join('\n'));
rows.forEach((row, index) => {
blocks.push('---');
const roleLabel = row.role === 'assistant' ? L.aiAgent : L.you;
blocks.push(`## ${index + 1}. ${roleLabel}`);
// Created-at kept in source as an HTML comment (out of the rendered prose).
if (row.createdAt) {
const iso =
row.createdAt instanceof Date
? row.createdAt.toISOString()
: String(row.createdAt);
blocks.push(`<!-- ${iso} -->`);
}
blocks.push(...renderMessageParts(rowParts(row), lang));
// A still-'streaming' row is an interrupted/in-progress turn captured by the
// export; record that so the partial answer is not mistaken for complete.
if (row.status === 'streaming') {
blocks.push(`_⏳ ${L.stillGenerating}_`);
}
const error = errorOf(row);
if (error) {
blocks.push(`**⚠️ Error:** ${error}`);
}
const usage = usageOf(row);
if (usage) {
const total = usage.totalTokens ?? rowTokens(usage);
const reasoning =
usage.reasoningTokens && usage.reasoningTokens > 0
? `, reasoning: ${usage.reasoningTokens}`
: '';
blocks.push(
`_Tokens — in: ${usage.inputTokens ?? '?'}, out: ${
usage.outputTokens ?? '?'
}${reasoning}, total: ${total}_`,
);
}
});
// Blank line between blocks so the Markdown renders cleanly.
return blocks.join('\n\n');
}