Files
gitmost/apps/client/src/features/ai-chat/utils/chat-markdown.ts
claude code agent 227 0ebb1adce8 feat(ai-chat): realtime token counter + reasoning tokens, Claude-Code style (#151)
Tokens were only counted post-hoc (onFinish) and the header badge updated only on
chat open/switch; reasoning wasn't requested or shown. Now a counter ticks LIVE
during generation and surfaces reasoning ("thinking") tokens separately, like
Claude Code's `Thinking… · N tokens`.

Architecture (AI SDK v6): no provider gives exact per-token usage mid-stream, so
the live number is a cheap client estimate (chars/≈4) reconciled to AUTHORITATIVE
provider usage at step boundaries and turn end. The useChat per-delta re-render is
the existing realtime engine.

- server: `chatStreamMetadata` now also forwards usage on `finish-step` + `finish`;
  `sendReasoning: true`; persisted `metadata.usage` carries `reasoningTokens`
  (normalized from `outputTokenDetails` or the deprecated field).
- client: pure `count-stream-tokens` (estimateTokens / liveTurnTokens, prefers
  authoritative usage else estimate); `Thinking… · N tokens` in the typing
  indicator; collapsible "Thinking" reasoning block; throttled (~8 Hz) live
  turn-token header badge; `reasoningTokens` in types + Markdown export.

Review fixes folded in:
- v6 `finish-step.usage` is PER-STEP, not cumulative — the server now ACCUMULATES
  a running sum (new pure `accumulateStepUsage`) and sends the cumulative, which
  converges to `finish.totalUsage`, so the live counter never jumps DOWN on a
  multi-step agent turn.
- reasoning double-count: the authoritative turn-total is attributed to a block
  ONLY for a single-reasoning-part (one-step) turn; multi-step blocks each show
  their own estimate (the authoritative total stays in the header).
- no "0" badge flash at turn start (require live > 0, else show context size).
- comment refreshed (finish-step trigger).

Tests: server `accumulateStepUsage` + updated `chatStreamMetadata` (34 in the
suite); client pure-fn tests. Both tsc clean; 162 client ai-chat + the ai-chat
server suite pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-24 06:56:14 +03:00

216 lines
7.4 KiB
TypeScript

/**
* Client-only Markdown builder for an AI agent chat. Serializes the already
* persisted message rows (loaded via `useAiChatMessagesQuery`) into a single
* Markdown string suitable for copying to the clipboard. NO network call is
* made and NO server/DB code is touched — this reuses the rich "request
* internals" (tool calls with input/output, per-message token usage,
* finish/error info) that the chat already holds client-side.
*
* Only role labels and tool action labels are localized via the passed-in `t`
* translator; the structural document words (Input/Output/Error/Tokens/...) are
* plain English constants because the output is a technical artifact.
*/
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
import {
ToolUiPart,
getToolName,
toolRunState,
toolLabelKey,
} from "@/features/ai-chat/utils/tool-parts.tsx";
// Minimal translator signature compatible with react-i18next's `t`.
type Translate = (key: string, values?: Record<string, unknown>) => string;
interface BuildChatMarkdownArgs {
title: string | null;
chatId: string;
rows: IAiChatMessageRow[];
/** In-progress, not-yet-persisted live messages (the current streaming
* turn) to append after the persisted rows. `generating: true` adds a
* note that the message is still being produced. */
pending?: PendingMessage[];
t: Translate;
}
/** A single AI SDK UIMessage part (text part or other). */
interface TextLikePart {
type: string;
text?: string;
}
/** A live, not-yet-persisted message (current streaming turn) to append. */
interface PendingMessage {
role: "user" | "assistant" | string;
parts: TextLikePart[];
generating: boolean;
}
/**
* Stringify an arbitrary tool input/output value for a fenced block. Strings
* pass through as-is; everything else is pretty-printed JSON, falling back to
* `String(value)` if serialization throws (e.g. a circular structure).
*/
function stringify(value: unknown): string {
if (typeof value === "string") return value;
try {
return JSON.stringify(value, null, 2);
} catch {
return String(value);
}
}
/**
* Wrap `code` in a fenced code block whose backtick delimiter is LONGER than
* the longest backtick run inside the content, so embedded backticks (or even
* a literal ``` fence) never break out of the block. Minimum 3 backticks.
*/
function fence(code: string, lang = ""): string {
const runs: string[] = code.match(/`+/g) ?? [];
const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
const delim = "`".repeat(Math.max(3, longest + 1));
return `${delim}${lang}\n${code}\n${delim}`;
}
/** Per-row token count, mirroring the header sum in ai-chat-window.tsx. */
function rowTokens(usage: {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}): number {
return (
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
);
}
/** Render one message's UIMessage parts into an array of Markdown blocks
* (text blocks + tool blocks). Mirrors MessageItem's part handling. */
function renderMessageParts(parts: TextLikePart[], t: Translate): string[] {
const out: string[] = [];
for (const part of parts) {
if (part.type === "text") {
const text = (part.text ?? "").trim();
// Skip empty/whitespace-only text parts (matches MessageItem).
if (text.length > 0) out.push(text);
continue;
}
const isToolPart =
part.type.startsWith("tool-") || part.type === "dynamic-tool";
if (!isToolPart) continue;
const tp = part as unknown as ToolUiPart;
const name = getToolName(tp);
const { key, values } = toolLabelKey(name);
const label = t(key, values);
const state = toolRunState(tp.state);
const toolLines: string[] = [
`**Tool: ${label}** (\`${name}\`) — ${state}`,
];
if (tp.input !== undefined) {
toolLines.push("Input:");
toolLines.push(fence(stringify(tp.input), "json"));
}
if (tp.output !== undefined) {
toolLines.push("Output:");
toolLines.push(fence(stringify(tp.output), "json"));
}
if (tp.errorText) {
toolLines.push(`**Error:** ${tp.errorText}`);
}
out.push(toolLines.join("\n\n"));
}
return out;
}
/**
* Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the
* export timestamp), so it is straightforward to unit-test.
*/
export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
const { title, chatId, rows, pending, t } = args;
const blocks: string[] = [];
const heading = (title ?? "").trim() || t("Untitled chat");
blocks.push(`# ${heading}`);
// Metadata bullet list. Total tokens is only shown when there is a sum.
const totalTokens = rows.reduce((sum, row) => {
const usage = row.metadata?.usage;
return usage ? sum + rowTokens(usage) : sum;
}, 0);
const meta = [
`- Chat ID: \`${chatId}\``,
`- Exported: ${new Date().toISOString()}`,
`- Messages: ${rows.length + (pending?.length ?? 0)}`,
];
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
blocks.push(meta.join("\n"));
rows.forEach((row, index) => {
blocks.push("---");
const roleLabel = row.role === "assistant" ? t("AI agent") : t("You");
blocks.push(`## ${index + 1}. ${roleLabel}`);
// Created-at kept in source as an HTML comment (out of the rendered prose).
blocks.push(`<!-- ${row.createdAt} -->`);
// Resolve parts: prefer the rich persisted parts, else a single text part
// built from the plain-text content (mirrors `rowToUiMessage`).
const parts: TextLikePart[] =
Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
? (row.metadata.parts as TextLikePart[])
: [{ type: "text", text: row.content ?? "" }];
blocks.push(...renderMessageParts(parts, t));
if (row.metadata?.error) {
blocks.push(`**⚠️ Error:** ${row.metadata.error}`);
}
const usage = row.metadata?.usage;
if (usage) {
const total = usage.totalTokens ?? rowTokens(usage);
// Reasoning (thinking) tokens are shown only when the provider reported a
// positive count; old rows / non-reasoning providers omit it.
const reasoning =
usage.reasoningTokens && usage.reasoningTokens > 0
? `, reasoning: ${usage.reasoningTokens}`
: "";
blocks.push(
`_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}${reasoning}, total: ${total}_`,
);
}
});
// Append the in-progress, not-yet-persisted live messages (the current
// streaming turn) after the persisted rows. Heading numbering CONTINUES from
// the persisted rows. A `generating` assistant gets a note that the captured
// response is partial; pending messages carry no usage/token footer yet.
(pending ?? []).forEach((message, p) => {
blocks.push("---");
const num = rows.length + p + 1;
const roleLabel = message.role === "assistant" ? t("AI agent") : t("You");
blocks.push(`## ${num}. ${roleLabel}`);
blocks.push(...renderMessageParts(message.parts, t));
// A generating assistant may have empty/no parts yet — still emit the
// heading (above) and this note so the export shows the in-progress turn.
if (message.generating === true) {
blocks.push(
"_⏳ This message is still being generated — the export captured a partial, in-progress response._",
);
}
});
// Blank line between blocks so the Markdown renders cleanly.
return blocks.join("\n\n");
}