Tokens were only counted post-hoc (onFinish) and the header badge updated only on
chat open/switch; reasoning wasn't requested or shown. Now a counter ticks LIVE
during generation and surfaces reasoning ("thinking") tokens separately, like
Claude Code's `Thinking… · N tokens`.
Architecture (AI SDK v6): no provider gives exact per-token usage mid-stream, so
the live number is a cheap client estimate (chars/≈4) reconciled to AUTHORITATIVE
provider usage at step boundaries and turn end. The useChat per-delta re-render is
the existing realtime engine.
- server: `chatStreamMetadata` now also forwards usage on `finish-step` + `finish`;
`sendReasoning: true`; persisted `metadata.usage` carries `reasoningTokens`
(normalized from `outputTokenDetails` or the deprecated field).
- client: pure `count-stream-tokens` (estimateTokens / liveTurnTokens, prefers
authoritative usage else estimate); `Thinking… · N tokens` in the typing
indicator; collapsible "Thinking" reasoning block; throttled (~8 Hz) live
turn-token header badge; `reasoningTokens` in types + Markdown export.
Review fixes folded in:
- v6 `finish-step.usage` is PER-STEP, not cumulative — the server now ACCUMULATES
a running sum (new pure `accumulateStepUsage`) and sends the cumulative, which
converges to `finish.totalUsage`, so the live counter never jumps DOWN on a
multi-step agent turn.
- reasoning double-count: the authoritative turn-total is attributed to a block
ONLY for a single-reasoning-part (one-step) turn; multi-step blocks each show
their own estimate (the authoritative total stays in the header).
- no "0" badge flash at turn start (require live > 0, else show context size).
- comment refreshed (finish-step trigger).
Tests: server `accumulateStepUsage` + updated `chatStreamMetadata` (34 in the
suite); client pure-fn tests. Both tsc clean; 162 client ai-chat + the ai-chat
server suite pass.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
216 lines
7.4 KiB
TypeScript
216 lines
7.4 KiB
TypeScript
/**
|
|
* Client-only Markdown builder for an AI agent chat. Serializes the already
|
|
* persisted message rows (loaded via `useAiChatMessagesQuery`) into a single
|
|
* Markdown string suitable for copying to the clipboard. NO network call is
|
|
* made and NO server/DB code is touched — this reuses the rich "request
|
|
* internals" (tool calls with input/output, per-message token usage,
|
|
* finish/error info) that the chat already holds client-side.
|
|
*
|
|
* Only role labels and tool action labels are localized via the passed-in `t`
|
|
* translator; the structural document words (Input/Output/Error/Tokens/...) are
|
|
* plain English constants because the output is a technical artifact.
|
|
*/
|
|
|
|
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
|
|
import {
|
|
ToolUiPart,
|
|
getToolName,
|
|
toolRunState,
|
|
toolLabelKey,
|
|
} from "@/features/ai-chat/utils/tool-parts.tsx";
|
|
|
|
// Minimal translator signature compatible with react-i18next's `t`.
|
|
type Translate = (key: string, values?: Record<string, unknown>) => string;
|
|
|
|
interface BuildChatMarkdownArgs {
|
|
title: string | null;
|
|
chatId: string;
|
|
rows: IAiChatMessageRow[];
|
|
/** In-progress, not-yet-persisted live messages (the current streaming
|
|
* turn) to append after the persisted rows. `generating: true` adds a
|
|
* note that the message is still being produced. */
|
|
pending?: PendingMessage[];
|
|
t: Translate;
|
|
}
|
|
|
|
/** A single AI SDK UIMessage part (text part or other). */
|
|
interface TextLikePart {
|
|
type: string;
|
|
text?: string;
|
|
}
|
|
|
|
/** A live, not-yet-persisted message (current streaming turn) to append. */
|
|
interface PendingMessage {
|
|
role: "user" | "assistant" | string;
|
|
parts: TextLikePart[];
|
|
generating: boolean;
|
|
}
|
|
|
|
/**
|
|
* Stringify an arbitrary tool input/output value for a fenced block. Strings
|
|
* pass through as-is; everything else is pretty-printed JSON, falling back to
|
|
* `String(value)` if serialization throws (e.g. a circular structure).
|
|
*/
|
|
function stringify(value: unknown): string {
|
|
if (typeof value === "string") return value;
|
|
try {
|
|
return JSON.stringify(value, null, 2);
|
|
} catch {
|
|
return String(value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Wrap `code` in a fenced code block whose backtick delimiter is LONGER than
|
|
* the longest backtick run inside the content, so embedded backticks (or even
|
|
* a literal ``` fence) never break out of the block. Minimum 3 backticks.
|
|
*/
|
|
function fence(code: string, lang = ""): string {
|
|
const runs: string[] = code.match(/`+/g) ?? [];
|
|
const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
|
|
const delim = "`".repeat(Math.max(3, longest + 1));
|
|
return `${delim}${lang}\n${code}\n${delim}`;
|
|
}
|
|
|
|
/** Per-row token count, mirroring the header sum in ai-chat-window.tsx. */
|
|
function rowTokens(usage: {
|
|
inputTokens?: number;
|
|
outputTokens?: number;
|
|
totalTokens?: number;
|
|
reasoningTokens?: number;
|
|
}): number {
|
|
return (
|
|
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
|
|
);
|
|
}
|
|
|
|
/** Render one message's UIMessage parts into an array of Markdown blocks
|
|
* (text blocks + tool blocks). Mirrors MessageItem's part handling. */
|
|
function renderMessageParts(parts: TextLikePart[], t: Translate): string[] {
|
|
const out: string[] = [];
|
|
|
|
for (const part of parts) {
|
|
if (part.type === "text") {
|
|
const text = (part.text ?? "").trim();
|
|
// Skip empty/whitespace-only text parts (matches MessageItem).
|
|
if (text.length > 0) out.push(text);
|
|
continue;
|
|
}
|
|
|
|
const isToolPart =
|
|
part.type.startsWith("tool-") || part.type === "dynamic-tool";
|
|
if (!isToolPart) continue;
|
|
|
|
const tp = part as unknown as ToolUiPart;
|
|
const name = getToolName(tp);
|
|
const { key, values } = toolLabelKey(name);
|
|
const label = t(key, values);
|
|
const state = toolRunState(tp.state);
|
|
|
|
const toolLines: string[] = [
|
|
`**Tool: ${label}** (\`${name}\`) — ${state}`,
|
|
];
|
|
if (tp.input !== undefined) {
|
|
toolLines.push("Input:");
|
|
toolLines.push(fence(stringify(tp.input), "json"));
|
|
}
|
|
if (tp.output !== undefined) {
|
|
toolLines.push("Output:");
|
|
toolLines.push(fence(stringify(tp.output), "json"));
|
|
}
|
|
if (tp.errorText) {
|
|
toolLines.push(`**Error:** ${tp.errorText}`);
|
|
}
|
|
out.push(toolLines.join("\n\n"));
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the
|
|
* export timestamp), so it is straightforward to unit-test.
|
|
*/
|
|
export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
|
|
const { title, chatId, rows, pending, t } = args;
|
|
const blocks: string[] = [];
|
|
|
|
const heading = (title ?? "").trim() || t("Untitled chat");
|
|
blocks.push(`# ${heading}`);
|
|
|
|
// Metadata bullet list. Total tokens is only shown when there is a sum.
|
|
const totalTokens = rows.reduce((sum, row) => {
|
|
const usage = row.metadata?.usage;
|
|
return usage ? sum + rowTokens(usage) : sum;
|
|
}, 0);
|
|
const meta = [
|
|
`- Chat ID: \`${chatId}\``,
|
|
`- Exported: ${new Date().toISOString()}`,
|
|
`- Messages: ${rows.length + (pending?.length ?? 0)}`,
|
|
];
|
|
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
|
|
blocks.push(meta.join("\n"));
|
|
|
|
rows.forEach((row, index) => {
|
|
blocks.push("---");
|
|
|
|
const roleLabel = row.role === "assistant" ? t("AI agent") : t("You");
|
|
blocks.push(`## ${index + 1}. ${roleLabel}`);
|
|
|
|
// Created-at kept in source as an HTML comment (out of the rendered prose).
|
|
blocks.push(`<!-- ${row.createdAt} -->`);
|
|
|
|
// Resolve parts: prefer the rich persisted parts, else a single text part
|
|
// built from the plain-text content (mirrors `rowToUiMessage`).
|
|
const parts: TextLikePart[] =
|
|
Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
|
|
? (row.metadata.parts as TextLikePart[])
|
|
: [{ type: "text", text: row.content ?? "" }];
|
|
|
|
blocks.push(...renderMessageParts(parts, t));
|
|
|
|
if (row.metadata?.error) {
|
|
blocks.push(`**⚠️ Error:** ${row.metadata.error}`);
|
|
}
|
|
|
|
const usage = row.metadata?.usage;
|
|
if (usage) {
|
|
const total = usage.totalTokens ?? rowTokens(usage);
|
|
// Reasoning (thinking) tokens are shown only when the provider reported a
|
|
// positive count; old rows / non-reasoning providers omit it.
|
|
const reasoning =
|
|
usage.reasoningTokens && usage.reasoningTokens > 0
|
|
? `, reasoning: ${usage.reasoningTokens}`
|
|
: "";
|
|
blocks.push(
|
|
`_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}${reasoning}, total: ${total}_`,
|
|
);
|
|
}
|
|
});
|
|
|
|
// Append the in-progress, not-yet-persisted live messages (the current
|
|
// streaming turn) after the persisted rows. Heading numbering CONTINUES from
|
|
// the persisted rows. A `generating` assistant gets a note that the captured
|
|
// response is partial; pending messages carry no usage/token footer yet.
|
|
(pending ?? []).forEach((message, p) => {
|
|
blocks.push("---");
|
|
|
|
const num = rows.length + p + 1;
|
|
const roleLabel = message.role === "assistant" ? t("AI agent") : t("You");
|
|
blocks.push(`## ${num}. ${roleLabel}`);
|
|
|
|
blocks.push(...renderMessageParts(message.parts, t));
|
|
|
|
// A generating assistant may have empty/no parts yet — still emit the
|
|
// heading (above) and this note so the export shows the in-progress turn.
|
|
if (message.generating === true) {
|
|
blocks.push(
|
|
"_⏳ This message is still being generated — the export captured a partial, in-progress response._",
|
|
);
|
|
}
|
|
});
|
|
|
|
// Blank line between blocks so the Markdown renders cleanly.
|
|
return blocks.join("\n\n");
|
|
}
|