From 0ebb1adce8c068099f70d254232fb2e213cead57 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 06:56:14 +0300 Subject: [PATCH 1/2] feat(ai-chat): realtime token counter + reasoning tokens, Claude-Code style (#151) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tokens were only counted post-hoc (onFinish) and the header badge updated only on chat open/switch; reasoning wasn't requested or shown. Now a counter ticks LIVE during generation and surfaces reasoning ("thinking") tokens separately, like Claude Code's `Thinking… · N tokens`. Architecture (AI SDK v6): no provider gives exact per-token usage mid-stream, so the live number is a cheap client estimate (chars/≈4) reconciled to AUTHORITATIVE provider usage at step boundaries and turn end. The useChat per-delta re-render is the existing realtime engine. - server: `chatStreamMetadata` now also forwards usage on `finish-step` + `finish`; `sendReasoning: true`; persisted `metadata.usage` carries `reasoningTokens` (normalized from `outputTokenDetails` or the deprecated field). - client: pure `count-stream-tokens` (estimateTokens / liveTurnTokens, prefers authoritative usage else estimate); `Thinking… · N tokens` in the typing indicator; collapsible "Thinking" reasoning block; throttled (~8 Hz) live turn-token header badge; `reasoningTokens` in types + Markdown export. Review fixes folded in: - v6 `finish-step.usage` is PER-STEP, not cumulative — the server now ACCUMULATES a running sum (new pure `accumulateStepUsage`) and sends the cumulative, which converges to `finish.totalUsage`, so the live counter never jumps DOWN on a multi-step agent turn. - reasoning double-count: the authoritative turn-total is attributed to a block ONLY for a single-reasoning-part (one-step) turn; multi-step blocks each show their own estimate (the authoritative total stays in the header). - no "0" badge flash at turn start (require live > 0, else show context size). - comment refreshed (finish-step trigger). Tests: server `accumulateStepUsage` + updated `chatStreamMetadata` (34 in the suite); client pure-fn tests. Both tsc clean; 162 client ai-chat + the ai-chat server suite pass. Co-Authored-By: Claude Opus 4.8 --- .../public/locales/en-US/translation.json | 4 + .../public/locales/ru-RU/translation.json | 4 + .../ai-chat/components/ai-chat-window.tsx | 19 ++- .../ai-chat/components/ai-chat.module.css | 18 +++ .../ai-chat/components/chat-thread.tsx | 56 +++++++ .../ai-chat/components/message-item.tsx | 34 +++++ .../ai-chat/components/message-list.tsx | 22 ++- .../ai-chat/components/reasoning-block.tsx | 83 +++++++++++ .../components/tail-thinking-tokens.test.ts | 50 +++++++ .../ai-chat/components/typing-indicator.tsx | 15 +- .../features/ai-chat/types/ai-chat.types.ts | 4 + .../features/ai-chat/utils/chat-markdown.ts | 9 +- .../ai-chat/utils/count-stream-tokens.test.ts | 119 +++++++++++++++ .../ai-chat/utils/count-stream-tokens.ts | 94 ++++++++++++ .../src/core/ai-chat/ai-chat.service.spec.ts | 134 ++++++++++++++++- .../src/core/ai-chat/ai-chat.service.ts | 138 ++++++++++++++++-- 16 files changed, 775 insertions(+), 28 deletions(-) create mode 100644 apps/client/src/features/ai-chat/components/reasoning-block.tsx create mode 100644 apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts create mode 100644 apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts create mode 100644 apps/client/src/features/ai-chat/utils/count-stream-tokens.ts diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index a4dd886b..fcb10ab2 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -1147,6 +1147,9 @@ "Ask a question about this documentation.": "Ask a question about this documentation.", "Ask a question…": "Ask a question…", "Thinking…": "Thinking…", + "Thinking… · {{count}} tokens": "Thinking… · {{count}} tokens", + "Thinking": "Thinking", + "Thinking · {{count}} tokens": "Thinking · {{count}} tokens", "The assistant is unavailable right now. Please try again.": "The assistant is unavailable right now. Please try again.", "Public share assistant": "Public share assistant", "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.": "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.", @@ -1158,6 +1161,7 @@ "Built-in assistant persona": "Built-in assistant persona", "Minimize": "Minimize", "Current context size": "Current context size", + "Tokens generated this turn": "Tokens generated this turn", "AI agent": "AI agent", "Take a look at the current document": "Take a look at the current document", "AI agent is typing…": "AI agent is typing…", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index ca14b406..87064523 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -680,6 +680,9 @@ "AI agent is typing…": "AI-агент печатает…", "{{name}} is typing…": "{{name}} печатает…", "Thinking…": "Думаю…", + "Thinking… · {{count}} tokens": "Думаю… · {{count}} токенов", + "Thinking": "Размышления", + "Thinking · {{count}} tokens": "Размышления · {{count}} токенов", "Agent role": "Роль агента", "AI chat": "AI-чат", "AI chat is disabled for this workspace.": "AI-чат отключён для этого рабочего пространства.", @@ -690,6 +693,7 @@ "Copy chat": "Копировать чат", "Created successfully": "Успешно создано", "Current context size": "Текущий размер контекста", + "Tokens generated this turn": "Токенов сгенерировано за ход", "Delete this chat?": "Удалить этот чат?", "Deleted successfully": "Успешно удалено", "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index 5ec80874..5f6b1dde 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -156,6 +156,12 @@ export default function AiChatWindow() { isStreaming: false, }); + // Live turn-token total (reasoning + output) for the in-flight turn, pushed up + // (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream. + // `null` means no turn is in flight -> the badge falls back to the persisted + // context size below. + const [liveTurnTokens, setLiveTurnTokens] = useState(null); + // The page the user is currently viewing. AiChatWindow lives in a pathless // parent layout route, so useParams() can't see :pageSlug. Match the full // pathname against the authenticated page route instead so "the current page" @@ -485,11 +491,19 @@ export default function AiChatWindow() { )}
- {contextTokens > 0 && ( + {/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz); + once it finishes, fall back to the persisted context size. Require + > 0 so the very first emit (an empty tail message, count 0) does not + flash a "0" badge before any token streams in (#151 review). */} + {liveTurnTokens !== null && liveTurnTokens > 0 ? ( + + {formatTokens(liveTurnTokens)} + + ) : contextTokens > 0 ? ( {formatTokens(contextTokens)} - )} + ) : null}
@@ -608,6 +622,7 @@ export default function AiChatWindow() { assistantName={currentRole?.name} onTurnFinished={onTurnFinished} liveStateRef={liveThreadRef} + onLiveTurnTokens={setLiveTurnTokens} /> )}
diff --git a/apps/client/src/features/ai-chat/components/ai-chat.module.css b/apps/client/src/features/ai-chat/components/ai-chat.module.css index 7680e9ec..6b7aac64 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat.module.css +++ b/apps/client/src/features/ai-chat/components/ai-chat.module.css @@ -111,6 +111,24 @@ background: light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-6)); } +/* Collapsible "Thinking" (reasoning) block: a subtle left rule, dimmer than the + answer so it reads as secondary thinking context above the real answer. */ +.reasoningBlock { + border-left: 2px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4)); + padding-left: 8px; +} + +.reasoningText { + margin-top: 4px; + font-size: var(--mantine-font-size-xs); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-dark-1)); + white-space: pre-wrap; +} + +.reasoningText p { + margin: 0 0 4px; +} + .inputWrapper { flex: 0 0 auto; padding-top: var(--mantine-spacing-xs); diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index b5dc6d48..ea41c6b0 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -23,6 +23,7 @@ import { } from "@/features/ai-chat/types/ai-chat.types.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts"; +import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { dequeue, enqueueMessage, @@ -69,6 +70,12 @@ interface ChatThreadProps { * assistant message. A ref (not state) avoids re-rendering the parent on * every streamed delta. */ liveStateRef?: MutableRefObject<{ messages: UIMessage[]; isStreaming: boolean }>; + /** Reports the live turn-token total (reasoning + output) for the in-flight + * turn so the parent can show a header badge that ticks mid-stream. THROTTLED + * here (~8 Hz) so the parent re-renders a handful of times a second, not on + * every streamed delta. Called with `null` when no turn is in flight (the + * parent then reverts the badge to the persisted context size). */ + onLiveTurnTokens?: (tokens: number | null) => void; } /** @@ -113,6 +120,7 @@ export default function ChatThread({ assistantName, onTurnFinished, liveStateRef, + onLiveTurnTokens, }: ChatThreadProps) { const { t } = useTranslation(); @@ -310,6 +318,54 @@ export default function ChatThread({ }; }, [liveStateRef, messages, isStreaming]); + // Report the live turn-token total to the parent header badge, THROTTLED to + // ~8 Hz so the parent re-renders a few times a second instead of on every + // streamed delta. The tail assistant message's reasoning+output (estimate while + // streaming, authoritative once a step reports usage) is the live figure. When + // the turn ends we emit a final exact value, then `null` so the parent reverts + // the badge to the persisted context size. + const lastEmitRef = useRef(0); + const emitTimerRef = useRef | null>(null); + useEffect(() => { + if (!onLiveTurnTokens) return; + if (!isStreaming) { + // Turn ended (or never started): clear any pending throttle and revert. + if (emitTimerRef.current) { + clearTimeout(emitTimerRef.current); + emitTimerRef.current = null; + } + lastEmitRef.current = 0; + onLiveTurnTokens(null); + return; + } + const tail = messages[messages.length - 1]; + const live = + tail?.role === "assistant" ? liveTurnTokens(tail) : null; + const total = live ? live.reasoning + live.output : 0; + const now = Date.now(); + const MIN_INTERVAL = 120; // ms (~8 Hz) + const elapsed = now - lastEmitRef.current; + if (elapsed >= MIN_INTERVAL) { + lastEmitRef.current = now; + onLiveTurnTokens(total); + } else if (!emitTimerRef.current) { + // Schedule a trailing emit so the FINAL value of a burst is not dropped. + emitTimerRef.current = setTimeout(() => { + emitTimerRef.current = null; + lastEmitRef.current = Date.now(); + onLiveTurnTokens(total); + }, MIN_INTERVAL - elapsed); + } + }, [messages, isStreaming, onLiveTurnTokens]); + + // Clear any pending throttle timer on unmount (chat switch via `key`) so a + // trailing emit can't fire into a torn-down thread's parent. + useEffect(() => { + return () => { + if (emitTimerRef.current) clearTimeout(emitTimerRef.current); + }; + }, []); + // Classify the turn error into a heading + detail so the banner names the cause // (connection reset, timeout, rate limit, context overflow, quota, ...) instead // of a generic "Something went wrong". diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx index 255f5722..53d666f9 100644 --- a/apps/client/src/features/ai-chat/components/message-item.tsx +++ b/apps/client/src/features/ai-chat/components/message-item.tsx @@ -2,6 +2,7 @@ import { Box, Text } from "@mantine/core"; import { useTranslation } from "react-i18next"; import type { UIMessage } from "@ai-sdk/react"; import ToolCallCard from "@/features/ai-chat/components/tool-call-card.tsx"; +import ReasoningBlock from "@/features/ai-chat/components/reasoning-block.tsx"; import ChatErrorAlert from "@/features/ai-chat/components/chat-error-alert.tsx"; import ChatStoppedNotice from "@/features/ai-chat/components/chat-stopped-notice.tsx"; import { ToolUiPart, isToolPart } from "@/features/ai-chat/utils/tool-parts.tsx"; @@ -77,12 +78,45 @@ export default function MessageItem({ // return won't fire for them. if (!assistantMessageHasVisibleContent(message)) return null; + // Authoritative reasoning token count for the turn, if the server attached it + // (incl. providers that report a reasoning COUNT without streaming the text). + // It is the TURN TOTAL, so it may only be attributed to a block when there is a + // SINGLE reasoning part (the common one-step turn) — then that block shows the + // exact figure. With multiple reasoning parts (multi-step agent turn) every + // block falls back to its own per-part estimate; attributing the turn total to + // one of them would double-count against the others' estimates (#151 review). + // The authoritative turn total is still surfaced live in the header badge. + const reasoningTokens = ( + message.metadata as { usage?: { reasoningTokens?: number } } | undefined + )?.usage?.reasoningTokens; + const reasoningPartCount = message.parts.reduce( + (acc, p) => (p.type === "reasoning" ? acc + 1 : acc), + 0, + ); + const lastReasoningIndex = message.parts.reduce( + (acc, p, i) => (p.type === "reasoning" ? i : acc), + -1, + ); + return ( {resolveAssistantName(assistantName) ?? t("AI agent")} {message.parts.map((part, index) => { + if (part.type === "reasoning") { + // Reasoning ("thinking") -> a collapsible block with its own token + // count. Empty/whitespace reasoning with no authoritative count carries + // nothing to show, so skip it (avoids an empty 0-token block). + const text = (part as { text?: string }).text ?? ""; + const tokens = + reasoningPartCount === 1 && index === lastReasoningIndex + ? reasoningTokens + : undefined; + if (!text.trim() && !(tokens && tokens > 0)) return null; + return ; + } + if (part.type === "text") { // Skip empty/whitespace-only text parts (a streaming message often // starts with an empty text part before the first token arrives); the diff --git a/apps/client/src/features/ai-chat/components/message-list.tsx b/apps/client/src/features/ai-chat/components/message-list.tsx index a1bc14f5..d9995cda 100644 --- a/apps/client/src/features/ai-chat/components/message-list.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.tsx @@ -6,6 +6,7 @@ import MessageItem from "@/features/ai-chat/components/message-item.tsx"; import TypingIndicator from "@/features/ai-chat/components/typing-indicator.tsx"; import { isToolPart, toolRunState, ToolUiPart } from "@/features/ai-chat/utils/tool-parts.tsx"; import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts"; +import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; interface MessageListProps { @@ -94,6 +95,19 @@ export function typingIndicatorShowsName(messages: UIMessage[]): boolean { return !assistantMessageHasVisibleContent(last); } +/** + * The live thinking-token count to show on the standalone typing indicator. It + * is the reasoning split of the tail assistant message (estimate while streaming, + * authoritative once the server attaches usage at a step/turn boundary). Returns + * 0 when the turn has produced no reasoning yet — the indicator then shows the + * plain "Thinking…" line. + */ +export function tailThinkingTokens(messages: UIMessage[]): number { + const last = messages[messages.length - 1]; + if (!last || last.role !== "assistant") return 0; + return liveTurnTokens(last).reasoning; +} + /** * Scrollable transcript. Auto-scrolls to the newest message as it streams in, * but only while the user is pinned to the bottom — if they scrolled up to read @@ -190,7 +204,13 @@ export default function MessageList({ assistantName={assistantName} /> ))} - {typing && } + {typing && ( + + )} ); diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx new file mode 100644 index 00000000..43e88a69 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx @@ -0,0 +1,83 @@ +import { useState } from "react"; +import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core"; +import { IconChevronDown } from "@tabler/icons-react"; +import { useTranslation } from "react-i18next"; +import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; +import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts"; +import classes from "@/features/ai-chat/components/ai-chat.module.css"; + +interface ReasoningBlockProps { + /** The streamed/persisted reasoning (thinking) text. May be empty when the + * provider reports only a reasoning token COUNT without the text. */ + text: string; + /** Authoritative reasoning token count from `usage.reasoningTokens`, when the + * step/turn has finished. When absent (or 0) the count is estimated from the + * text length so it ticks live as the reasoning streams in. */ + tokens?: number; +} + +/** + * Collapsible "Thinking" block for an assistant `reasoning` part. Mirrors Claude + * Code's surfacing of the model's thinking: a header that shows the thinking + * token count (authoritative when the step has reported usage, else a live + * estimate from the streamed text) and an expandable body with the reasoning + * prose. Collapsed by default so it never crowds out the answer. + * + * Providers that don't stream reasoning TEXT still render this block from the + * authoritative count alone (header only, empty body) so the cost is visible. + */ +export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { + const { t } = useTranslation(); + const [open, setOpen] = useState(false); + + // Authoritative count wins; otherwise estimate live from the streamed text. + const count = tokens && tokens > 0 ? tokens : estimateTokens(text); + const trimmed = text.trim(); + const html = trimmed ? renderChatMarkdown(trimmed, {}) : ""; + + return ( + + setOpen((o) => !o)} + // No body to expand when the provider reported only a token count. + disabled={!trimmed} + aria-expanded={open} + > + + + + {count > 0 + ? t("Thinking · {{count}} tokens", { count }) + : t("Thinking")} + + + + + {trimmed && ( + + {html ? ( +
+ ) : ( + + {trimmed} + + )} + + )} + + ); +} diff --git a/apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts b/apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts new file mode 100644 index 00000000..5f421aec --- /dev/null +++ b/apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from "vitest"; +import type { UIMessage } from "@ai-sdk/react"; +import { tailThinkingTokens } from "@/features/ai-chat/components/message-list.tsx"; + +/** + * Pure-helper tests for `tailThinkingTokens`: the live thinking-token count the + * standalone typing indicator shows. It is the reasoning split of the tail + * assistant message (estimate while streaming, authoritative once usage arrives). + */ +const msg = ( + role: "user" | "assistant", + parts: unknown[], + metadata?: unknown, +): UIMessage => + ({ id: Math.random().toString(), role, parts, metadata }) as UIMessage; + +describe("tailThinkingTokens", () => { + it("is 0 when there are no messages", () => { + expect(tailThinkingTokens([])).toBe(0); + }); + + it("is 0 when the tail message is the user's", () => { + expect(tailThinkingTokens([msg("user", [{ type: "text", text: "q" }])])).toBe(0); + }); + + it("is 0 when the assistant has produced no reasoning yet", () => { + expect( + tailThinkingTokens([msg("assistant", [{ type: "text", text: "answer" }])]), + ).toBe(0); + }); + + it("estimates reasoning tokens from streamed reasoning text", () => { + // 8 chars -> 2 tokens. + expect( + tailThinkingTokens([ + msg("assistant", [{ type: "reasoning", text: "12345678" }]), + ]), + ).toBe(2); + }); + + it("uses authoritative usage.reasoningTokens once the server attaches it", () => { + expect( + tailThinkingTokens([ + msg("assistant", [{ type: "reasoning", text: "x" }], { + usage: { outputTokens: 100, reasoningTokens: 42 }, + }), + ]), + ).toBe(42); + }); +}); diff --git a/apps/client/src/features/ai-chat/components/typing-indicator.tsx b/apps/client/src/features/ai-chat/components/typing-indicator.tsx index c811c2bd..72ac3179 100644 --- a/apps/client/src/features/ai-chat/components/typing-indicator.tsx +++ b/apps/client/src/features/ai-chat/components/typing-indicator.tsx @@ -16,6 +16,12 @@ interface TypingIndicatorProps { * assistant row above already shows the same name, to avoid a duplicate label. */ showName?: boolean; + /** + * Live thinking/reasoning token count for the in-flight turn. When > 0 the + * typing line becomes `Thinking… · {count} tokens` (like Claude Code). Omitted + * / 0 keeps the plain `Thinking…` line. + */ + thinkingTokens?: number; } /** @@ -30,9 +36,14 @@ interface TypingIndicatorProps { * typing line is always the generic "Thinking…" (it never includes the * role/identity name). */ -export default function TypingIndicator({ assistantName, showName = true }: TypingIndicatorProps) { +export default function TypingIndicator({ assistantName, showName = true, thinkingTokens }: TypingIndicatorProps) { const { t } = useTranslation(); const name = resolveAssistantName(assistantName); + // Show the running thinking-token count only once there is something to count. + const thinkingLine = + thinkingTokens && thinkingTokens > 0 + ? t("Thinking… · {{count}} tokens", { count: thinkingTokens }) + : t("Thinking…"); return ( @@ -48,7 +59,7 @@ export default function TypingIndicator({ assistantName, showName = true }: Typi - {t("Thinking…")} + {thinkingLine} diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts index f4b0ccb6..afc9cae6 100644 --- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts +++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts @@ -98,6 +98,10 @@ export interface IAiChatMessageRow { inputTokens?: number; outputTokens?: number; totalTokens?: number; + // Reasoning (thinking) tokens, when the provider reports them. Optional so + // old history rows (recorded before this shipped) stay valid. Included in + // `outputTokens` per the AI SDK usage shape. + reasoningTokens?: number; }; // Current context size for the turn = final-step (input+output) tokens, i.e. // how much the conversation occupies in the model's context window after this diff --git a/apps/client/src/features/ai-chat/utils/chat-markdown.ts b/apps/client/src/features/ai-chat/utils/chat-markdown.ts index f54d012a..c3c3b3b2 100644 --- a/apps/client/src/features/ai-chat/utils/chat-markdown.ts +++ b/apps/client/src/features/ai-chat/utils/chat-markdown.ts @@ -77,6 +77,7 @@ function rowTokens(usage: { inputTokens?: number; outputTokens?: number; totalTokens?: number; + reasoningTokens?: number; }): number { return ( usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0) @@ -175,8 +176,14 @@ export function buildChatMarkdown(args: BuildChatMarkdownArgs): string { const usage = row.metadata?.usage; if (usage) { const total = usage.totalTokens ?? rowTokens(usage); + // Reasoning (thinking) tokens are shown only when the provider reported a + // positive count; old rows / non-reasoning providers omit it. + const reasoning = + usage.reasoningTokens && usage.reasoningTokens > 0 + ? `, reasoning: ${usage.reasoningTokens}` + : ""; blocks.push( - `_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}, total: ${total}_`, + `_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}${reasoning}, total: ${total}_`, ); } }); diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts new file mode 100644 index 00000000..62256bc3 --- /dev/null +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts @@ -0,0 +1,119 @@ +import { describe, expect, it } from "vitest"; +import type { UIMessage } from "@ai-sdk/react"; +import { + estimateTokens, + liveTurnTokens, +} from "@/features/ai-chat/utils/count-stream-tokens.ts"; + +const msg = (parts: unknown[], metadata?: unknown): UIMessage => + ({ + id: Math.random().toString(), + role: "assistant", + parts, + metadata, + }) as UIMessage; + +describe("estimateTokens", () => { + it("returns 0 for the empty string", () => { + expect(estimateTokens("")).toBe(0); + }); + + it("ceils chars/4 so any non-empty text is at least 1 token", () => { + expect(estimateTokens("a")).toBe(1); + expect(estimateTokens("abcd")).toBe(1); + expect(estimateTokens("abcde")).toBe(2); + expect(estimateTokens("12345678")).toBe(2); + }); +}); + +describe("liveTurnTokens — estimate path", () => { + it("is all zeros for an undefined message", () => { + expect(liveTurnTokens(undefined)).toEqual({ + reasoning: 0, + output: 0, + authoritative: false, + }); + }); + + it("is all zeros for a parts-less message", () => { + expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({ + reasoning: 0, + output: 0, + authoritative: false, + }); + }); + + it("estimates output from text parts", () => { + // 8 chars -> 2 tokens. + const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }])); + expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false }); + }); + + it("estimates reasoning from reasoning parts (kept separate from output)", () => { + const r = liveTurnTokens( + msg([ + { type: "reasoning", text: "12345678" }, + { type: "text", text: "abcd" }, + ]), + ); + expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false }); + }); + + it("accumulates across multiple text + reasoning parts (multi-step)", () => { + const r = liveTurnTokens( + msg([ + { type: "reasoning", text: "abcd" }, // 1 + { type: "text", text: "abcd" }, // 1 + { type: "tool-getPage", state: "output-available" }, // ignored + { type: "reasoning", text: "abcd" }, // 1 + { type: "text", text: "abcdefgh" }, // 2 + ]), + ); + expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false }); + }); + + it("ignores non text/reasoning parts (tools, step-start)", () => { + const r = liveTurnTokens( + msg([ + { type: "step-start" }, + { type: "tool-getPage", state: "input-available" }, + ]), + ); + expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false }); + }); +}); + +describe("liveTurnTokens — authoritative path", () => { + it("returns authoritative usage verbatim, splitting reasoning out of output", () => { + // outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30. + const r = liveTurnTokens( + msg([{ type: "text", text: "estimate would be tiny" }], { + usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, + }), + ); + expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); + }); + + it("treats missing reasoningTokens as 0 and keeps full output", () => { + const r = liveTurnTokens( + msg([{ type: "text", text: "x" }], { + usage: { inputTokens: 10, outputTokens: 42 }, + }), + ); + expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true }); + }); + + it("never returns a negative output when reasoning exceeds reported output", () => { + const r = liveTurnTokens( + msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }), + ); + expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true }); + }); + + it("falls back to the estimate when metadata has no usage object", () => { + const r = liveTurnTokens( + msg([{ type: "text", text: "abcd" }], { chatId: "c1" }), + ); + expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false }); + }); +}); diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts new file mode 100644 index 00000000..e9cca6bb --- /dev/null +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts @@ -0,0 +1,94 @@ +import type { UIMessage } from "@ai-sdk/react"; + +/** + * Live token counting for a streaming AI-chat turn — split into REASONING + * (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows + * `Thinking… · 60 tokens` next to its thinking indicator. + * + * No provider streams exact per-token usage mid-stream, so the live number is a + * CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage + * once the server attaches it on a step/turn boundary (see the server's + * `chatStreamMetadata` + the client's read of `message.metadata.usage`). When + * authoritative usage is present we return it verbatim (the number "jumps to + * exact"); otherwise we return the running estimate. Pure + unit-testable: it + * never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the + * bundle, and be wrong for Gemini/Ollama anyway). + */ + +/** + * Rough token estimate for a piece of text using the standard chars/≈4 heuristic. + * Returns 0 for empty/whitespace-free-of-content input, and ceils so any + * non-empty text counts as at least one token. + */ +export function estimateTokens(text: string): number { + if (!text) return 0; + return Math.ceil(text.length / 4); +} + +/** Authoritative per-step/turn usage the server attaches to message metadata. */ +export interface AuthoritativeUsage { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + reasoningTokens?: number; +} + +/** Live token split for a turn's tail (streaming) assistant message. */ +export interface LiveTurnTokens { + /** Thinking/reasoning tokens (estimate, or authoritative when available). */ + reasoning: number; + /** Answer/output tokens (estimate, or authoritative when available). */ + output: number; + /** True when the numbers come from authoritative server usage, not estimate. */ + authoritative: boolean; +} + +/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */ +function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined { + const meta = message?.metadata as + | { usage?: AuthoritativeUsage } + | undefined; + const usage = meta?.usage; + if (!usage || typeof usage !== "object") return undefined; + return usage; +} + +/** + * Token split for the given (streaming) assistant message. + * + * Prefers AUTHORITATIVE `metadata.usage` when the server has attached it (at a + * step/turn boundary, incl. `reasoningTokens`) — so the live counter snaps to the + * provider's exact figures. Until then it returns a running ESTIMATE summed over + * the message parts: `reasoning` parts feed the reasoning estimate, `text` parts + * feed the output estimate. Multi-part / multi-step turns accumulate naturally + * because every part of the turn is summed. + * + * Providers that don't stream reasoning text still surface a reasoning count once + * the authoritative usage arrives (`usage.reasoningTokens`); on the pure estimate + * path such a turn simply shows `reasoning: 0` until then. + */ +export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens { + if (!message) return { reasoning: 0, output: 0, authoritative: false }; + + const usage = metadataUsage(message); + if (usage) { + // Authoritative branch: outputTokens already INCLUDES reasoning tokens in the + // AI SDK usage shape, so subtract reasoning out for the "answer" figure (never + // go negative if a provider reports them inconsistently). + const reasoning = usage.reasoningTokens ?? 0; + const totalOutput = usage.outputTokens ?? 0; + const output = Math.max(0, totalOutput - reasoning); + return { reasoning, output, authoritative: true }; + } + + let reasoning = 0; + let output = 0; + for (const part of message.parts ?? []) { + if (part.type === "reasoning") { + reasoning += estimateTokens((part as { text?: string }).text ?? ""); + } else if (part.type === "text") { + output += estimateTokens((part as { text?: string }).text ?? ""); + } + } + return { reasoning, output, authoritative: false }; +} diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index 8f6e48d5..bd0bb2e3 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -5,7 +5,8 @@ import { rowToUiMessage, prepareAgentStep, buildPartialAssistantRecord, - chatStreamStartMetadata, + chatStreamMetadata, + accumulateStepUsage, MAX_AGENT_STEPS, FINAL_STEP_INSTRUCTION, } from './ai-chat.service'; @@ -298,18 +299,135 @@ describe('buildPartialAssistantRecord', () => { }); /** - * chatStreamStartMetadata: attach the authoritative chatId to the streamed - * assistant UI message ONLY on the `start` part (so the client adopts the real - * created chat id at the first chunk — see #137). Any non-start part adds none. + * chatStreamMetadata: attach metadata to the streamed assistant UI message per + * part type — `chatId` on `start` (so the client adopts the real created chat id + * at the first chunk — see #137), and AUTHORITATIVE usage (incl. reasoning + * tokens) on `finish-step` and `finish` so the client's live token counter snaps + * to exact at each step/turn boundary. */ -describe('chatStreamStartMetadata', () => { +describe('chatStreamMetadata', () => { it('returns { chatId } for the start part', () => { - expect(chatStreamStartMetadata({ type: 'start' }, 'chat-1')).toEqual({ + expect(chatStreamMetadata({ type: 'start' }, 'chat-1')).toEqual({ chatId: 'chat-1', }); }); - it('returns undefined for a finish part (any non-start part)', () => { - expect(chatStreamStartMetadata({ type: 'finish' }, 'chat-1')).toBeUndefined(); + it('returns the CUMULATIVE step usage passed in for the finish-step part', () => { + // finish-step usage is per-step in v6; the caller accumulates and passes the + // running sum, which this just wraps. + expect( + chatStreamMetadata( + { type: 'finish-step', usage: { outputTokens: 100 } }, + 'chat-1', + { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 }, + ), + ).toEqual({ + usage: { inputTokens: 500, outputTokens: 220, totalTokens: 720, reasoningTokens: 30 }, + }); + }); + + it('returns turn usage for the finish part (reasoning from deprecated top-level field)', () => { + expect( + chatStreamMetadata( + { + type: 'finish', + totalUsage: { + inputTokens: 1000, + outputTokens: 250, + totalTokens: 1250, + reasoningTokens: 50, + }, + }, + 'chat-1', + ), + ).toEqual({ + usage: { + inputTokens: 1000, + outputTokens: 250, + totalTokens: 1250, + reasoningTokens: 50, + }, + }); + }); + + it('prefers outputTokenDetails.reasoningTokens over the deprecated field (finish)', () => { + expect( + chatStreamMetadata( + { + type: 'finish', + totalUsage: { + outputTokens: 100, + reasoningTokens: 5, + outputTokenDetails: { reasoningTokens: 30 }, + }, + }, + 'chat-1', + ), + ).toEqual({ + usage: { + inputTokens: undefined, + outputTokens: 100, + totalTokens: undefined, + reasoningTokens: 30, + }, + }); + }); + + it('returns undefined for a finish-step with no accumulated usage', () => { + expect( + chatStreamMetadata({ type: 'finish-step' }, 'chat-1'), + ).toBeUndefined(); + }); + + it('returns undefined for an unrelated part (e.g. text-delta)', () => { + expect( + chatStreamMetadata({ type: 'text-delta' }, 'chat-1'), + ).toBeUndefined(); + }); +}); + +/** + * accumulateStepUsage: sums per-step usage into a running cumulative total so the + * client never sees the live counter jump DOWN on a multi-step agent turn (#151). + */ +describe('accumulateStepUsage', () => { + it('sums every field across two steps', () => { + expect( + accumulateStepUsage( + { inputTokens: 500, outputTokens: 100, totalTokens: 600, reasoningTokens: 30 }, + { inputTokens: 520, outputTokens: 80, totalTokens: 600, reasoningTokens: 10 }, + ), + ).toEqual({ + inputTokens: 1020, + outputTokens: 180, + totalTokens: 1200, + reasoningTokens: 40, + }); + }); + + it('returns the step as-is when there is no accumulator yet', () => { + expect(accumulateStepUsage(undefined, { outputTokens: 10 })).toEqual({ + outputTokens: 10, + }); + }); + + it('returns the accumulator unchanged when the step usage is absent', () => { + const acc = { outputTokens: 10 }; + expect(accumulateStepUsage(acc, undefined)).toBe(acc); + }); + + it('returns undefined when both sides are absent', () => { + expect(accumulateStepUsage(undefined, undefined)).toBeUndefined(); + }); + + it('keeps a field undefined only when neither side has it', () => { + expect( + accumulateStepUsage({ outputTokens: 5 }, { outputTokens: 7 }), + ).toEqual({ + inputTokens: undefined, + outputTokens: 12, + totalTokens: undefined, + reasoningTokens: undefined, + }); }); }); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index a96a4437..91cb64af 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -420,7 +420,11 @@ export class AiChatService { toolCalls: serializeSteps(steps), metadata: { finishReason, - usage: totalUsage, + // Persist the turn's cumulative usage WITH reasoning tokens resolved + // from either the new `outputTokenDetails` or the deprecated top-level + // field, so reopened history / the Markdown export show the thinking + // token cost too. + usage: normalizeStreamUsage(totalUsage as StreamUsage) ?? totalUsage, // Final-step usage = the context actually fed to the model on the last LLM // call (full history + tool results) plus the answer it just generated. // input+output of the FINAL step ≈ the conversation's CURRENT context size, @@ -512,17 +516,42 @@ export class AiChatService { // does not buffer responses by default. // Scrub the SDK's hop-by-hop Connection header before it writes the head (Safari/HTTP2). stripStreamingHopByHopHeaders(res.raw); + // Running sum of per-step usage (v6 `finish-step.usage` is per-step). Sent + // as the cumulative authoritative usage so the client never jumps DOWN. + let cumulativeStepUsage: ChatStreamUsage | undefined; result.pipeUIMessageStreamToResponse(res.raw, { headers: { 'X-Accel-Buffering': 'no' }, // Surface the authoritative chatId on the streamed assistant UI message so // the client adopts the REAL id of the row we created, instead of guessing // the newest chat in its list. `messageMetadata` is invoked by the AI SDK - // on the `start` and `finish` stream parts (ai@6); we attach `chatId` on the - // `start` part so it reaches the client (as message.metadata.chatId) at the - // very first chunk — before any second tab can race a newer chat into the - // list. This fixes the two-tab "adoption race" (#137) where a new chat in - // tab A could adopt tab B's id and leak its turns into the wrong row. - messageMetadata: ({ part }) => chatStreamStartMetadata(part, chatId), + // on the `start`, `finish-step` and `finish` stream parts (ai@6 — note the + // `finish-step` trigger relies on it being delivered as its own + // message-metadata chunk); we attach `chatId` on the `start` part so it + // reaches the client (as message.metadata.chatId) at the very first chunk — + // before any second tab can race a newer chat into the list. This fixes the + // two-tab "adoption race" (#137). + // + // `finish-step.usage` is PER-STEP (not cumulative) in v6, and the client + // merges each metadata.usage by replacement — so on a multi-step agent turn + // (up to MAX_AGENT_STEPS) the naive per-step value would make the live + // counter jump DOWN at each boundary. We keep a running sum here and send + // the CUMULATIVE usage, which converges to `finish.totalUsage` (#151). + messageMetadata: ({ part }) => { + const p = part as StreamMetadataPart; + if (p.type === 'finish-step') { + cumulativeStepUsage = accumulateStepUsage( + cumulativeStepUsage, + normalizeStreamUsage(p.usage), + ); + } + return chatStreamMetadata(p, chatId, cumulativeStepUsage); + }, + // Stream reasoning (thinking) parts to the client so the live counter can + // estimate reasoning tokens from streamed text. v6 default is already + // true; set explicitly so the intent survives any future SDK default + // change. Providers that don't emit reasoning text still surface the + // count via the authoritative `usage.reasoningTokens` on finish-step. + sendReasoning: true, onError: (error: unknown) => { // Reuse the shared formatter so provider error formatting stays // unified between the log line and the streamed error message. @@ -573,16 +602,97 @@ export class AiChatService { } } +/** Shape of the AI SDK v6 LanguageModelUsage we forward to the client. The SDK + * exposes `reasoningTokens` both as a (deprecated) top-level field and under + * `outputTokenDetails.reasoningTokens`; we normalize to a single field so the + * client gets one stable usage shape regardless of provider/SDK version. */ +interface StreamUsage { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + reasoningTokens?: number; + outputTokenDetails?: { reasoningTokens?: number }; +} + +/** A streamed part the messageMetadata callback can receive (only the fields we read). */ +interface StreamMetadataPart { + type: string; + usage?: StreamUsage; + totalUsage?: StreamUsage; +} + +/** Authoritative usage we attach to a streamed assistant message's metadata. */ +export interface ChatStreamUsage { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + reasoningTokens?: number; +} + +/** Normalize an AI SDK usage object to our flat client-facing shape, resolving + * reasoning tokens from either the new `outputTokenDetails` or the deprecated + * top-level field. Returns undefined for a missing usage object. */ +function normalizeStreamUsage( + usage: StreamUsage | undefined, +): ChatStreamUsage | undefined { + if (!usage) return undefined; + const reasoningTokens = + usage.outputTokenDetails?.reasoningTokens ?? usage.reasoningTokens; + return { + inputTokens: usage.inputTokens, + outputTokens: usage.outputTokens, + totalTokens: usage.totalTokens, + reasoningTokens, + }; +} + +/** Sum a (normalized) per-step usage into a running cumulative usage. v6's + * `finish-step.usage` is PER-STEP, so the caller accumulates across steps; the + * cumulative sum converges to the turn's `totalUsage` (no down-jump on the + * client). Returns undefined only when both sides are absent. Pure. */ +export function accumulateStepUsage( + acc: ChatStreamUsage | undefined, + step: ChatStreamUsage | undefined, +): ChatStreamUsage | undefined { + if (!acc) return step; + if (!step) return acc; + const add = (a?: number, b?: number): number | undefined => + a == null && b == null ? undefined : (a ?? 0) + (b ?? 0); + return { + inputTokens: add(acc.inputTokens, step.inputTokens), + outputTokens: add(acc.outputTokens, step.outputTokens), + totalTokens: add(acc.totalTokens, step.totalTokens), + reasoningTokens: add(acc.reasoningTokens, step.reasoningTokens), + }; +} + /** - * Attach the authoritative `chatId` to the streamed assistant message's `start` - * part (as `message.metadata.chatId`) so the client can adopt the real id for a - * new chat. See the client's adopt-chat-id.ts for the full #137 design. + * Pure metadata builder for the streamed assistant UI message. The AI SDK calls + * `messageMetadata` on the `start`, `finish-step` and `finish` stream parts; we + * attach (as `message.metadata`): + * - `start` -> `{ chatId }` so the client adopts the real created chat id + * at the first chunk (see adopt-chat-id.ts / #137). + * - `finish-step` -> `{ usage }` the CUMULATIVE authoritative usage so far + * (incl. reasoning tokens) — the caller passes the running + * sum (`cumulativeStepUsage`), since v6 per-step usage is not + * cumulative; the client snaps to exact without jumping down. + * - `finish` -> `{ usage }` from the turn's `totalUsage` (final reconcile). + * Any other part type contributes no metadata. Pure + unit-testable. */ -export function chatStreamStartMetadata( - part: { type: string }, +export function chatStreamMetadata( + part: StreamMetadataPart, chatId: string, -): { chatId: string } | undefined { - return part.type === 'start' ? { chatId } : undefined; + cumulativeStepUsage?: ChatStreamUsage, +): { chatId: string } | { usage: ChatStreamUsage } | undefined { + if (part.type === 'start') return { chatId }; + if (part.type === 'finish-step') { + return cumulativeStepUsage ? { usage: cumulativeStepUsage } : undefined; + } + if (part.type === 'finish') { + const usage = normalizeStreamUsage(part.totalUsage); + return usage ? { usage } : undefined; + } + return undefined; } /** The last message with role 'user' from a useChat payload, if any. */ -- 2.49.1 From 044e3f7e6a5985d20f22e10217e388c99e5091fb Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 13:05:07 +0300 Subject: [PATCH 2/2] fix(ai-chat): plural token strings + cover reasoning UI + cleanups (#151 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review of #158 (Request changes) — core logic verified correct; addressed the test-coverage + localization items: 1. i18n pluralization: the token-count keys were called with {count} but had one form, so ru-RU always rendered the genitive ("1 токенов"). Added _one/_other (en) and _one/_few/_many (ru: токен/токена/токенов) for both "Thinking… · {{count}} tokens" and "Thinking · {{count}} tokens"; de-duped the PR-added duplicate "Thinking" key. Call sites unchanged. 2. ReasoningBlock: new reasoning-block.test.tsx (4 branches: authoritative count wins / estimate fallback / header-only when count-but-no-text / body render). 3. Reasoning-token attribution: extracted the #151 anti-double-count rule into a pure `reasoningTokensForPart(message)` (single reasoning part -> authoritative turn total; multiple/none -> undefined so each estimates). message-item uses it; removed the now-dead lastReasoningIndex reduce (review #5). Unit-tested. 6. adopt-chat-id.ts: refreshed 3 stale `chatStreamStartMetadata` -> `chatStreamMetadata` comment references. 7. chat-markdown.test.ts: assert the export footer's `reasoning: N` line appears when reasoningTokens>0 and is absent at 0/undefined. Skipped optional #4 (mantine useThrottledCallback): the manual throttle has two distinct exit paths (turn-end revert-to-null + the captured-total trailing emit) with no guarding test; remapping risks the streaming behavior — non-blocking. Client tsc clean; ai-chat suite green (171 tests). Co-Authored-By: Claude Opus 4.8 --- .../public/locales/en-US/translation.json | 5 +- .../public/locales/ru-RU/translation.json | 7 +- .../ai-chat/components/message-item.tsx | 37 ++++------- .../components/reasoning-block.test.tsx | 65 +++++++++++++++++++ .../features/ai-chat/utils/adopt-chat-id.ts | 6 +- .../ai-chat/utils/chat-markdown.test.ts | 51 +++++++++++++++ .../ai-chat/utils/reasoning-tokens.test.ts | 56 ++++++++++++++++ .../ai-chat/utils/reasoning-tokens.ts | 34 ++++++++++ 8 files changed, 231 insertions(+), 30 deletions(-) create mode 100644 apps/client/src/features/ai-chat/components/reasoning-block.test.tsx create mode 100644 apps/client/src/features/ai-chat/utils/reasoning-tokens.test.ts create mode 100644 apps/client/src/features/ai-chat/utils/reasoning-tokens.ts diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index fcb10ab2..e1f4aa55 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -1148,8 +1148,11 @@ "Ask a question…": "Ask a question…", "Thinking…": "Thinking…", "Thinking… · {{count}} tokens": "Thinking… · {{count}} tokens", - "Thinking": "Thinking", + "Thinking… · {{count}} tokens_one": "Thinking… · {{count}} token", + "Thinking… · {{count}} tokens_other": "Thinking… · {{count}} tokens", "Thinking · {{count}} tokens": "Thinking · {{count}} tokens", + "Thinking · {{count}} tokens_one": "Thinking · {{count}} token", + "Thinking · {{count}} tokens_other": "Thinking · {{count}} tokens", "The assistant is unavailable right now. Please try again.": "The assistant is unavailable right now. Please try again.", "Public share assistant": "Public share assistant", "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.": "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index 87064523..6c121f15 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -681,8 +681,13 @@ "{{name}} is typing…": "{{name}} печатает…", "Thinking…": "Думаю…", "Thinking… · {{count}} tokens": "Думаю… · {{count}} токенов", - "Thinking": "Размышления", + "Thinking… · {{count}} tokens_one": "Думаю… · {{count}} токен", + "Thinking… · {{count}} tokens_few": "Думаю… · {{count}} токена", + "Thinking… · {{count}} tokens_many": "Думаю… · {{count}} токенов", "Thinking · {{count}} tokens": "Размышления · {{count}} токенов", + "Thinking · {{count}} tokens_one": "Размышления · {{count}} токен", + "Thinking · {{count}} tokens_few": "Размышления · {{count}} токена", + "Thinking · {{count}} tokens_many": "Размышления · {{count}} токенов", "Agent role": "Роль агента", "AI chat": "AI-чат", "AI chat is disabled for this workspace.": "AI-чат отключён для этого рабочего пространства.", diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx index 53d666f9..6436b4d6 100644 --- a/apps/client/src/features/ai-chat/components/message-item.tsx +++ b/apps/client/src/features/ai-chat/components/message-item.tsx @@ -9,6 +9,7 @@ import { ToolUiPart, isToolPart } from "@/features/ai-chat/utils/tool-parts.tsx" import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts"; import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts"; import { resolveAssistantName } from "@/features/ai-chat/utils/assistant-name.ts"; +import { reasoningTokensForPart } from "@/features/ai-chat/utils/reasoning-tokens.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; @@ -78,25 +79,12 @@ export default function MessageItem({ // return won't fire for them. if (!assistantMessageHasVisibleContent(message)) return null; - // Authoritative reasoning token count for the turn, if the server attached it - // (incl. providers that report a reasoning COUNT without streaming the text). - // It is the TURN TOTAL, so it may only be attributed to a block when there is a - // SINGLE reasoning part (the common one-step turn) — then that block shows the - // exact figure. With multiple reasoning parts (multi-step agent turn) every - // block falls back to its own per-part estimate; attributing the turn total to - // one of them would double-count against the others' estimates (#151 review). - // The authoritative turn total is still surfaced live in the header badge. - const reasoningTokens = ( - message.metadata as { usage?: { reasoningTokens?: number } } | undefined - )?.usage?.reasoningTokens; - const reasoningPartCount = message.parts.reduce( - (acc, p) => (p.type === "reasoning" ? acc + 1 : acc), - 0, - ); - const lastReasoningIndex = message.parts.reduce( - (acc, p, i) => (p.type === "reasoning" ? i : acc), - -1, - ); + // Authoritative reasoning token count to attribute to a reasoning block, or + // undefined when the block must estimate on its own. See reasoningTokensForPart + // for the #151 anti-double-count rule (only a single reasoning part may carry + // the turn total). The authoritative turn total is still surfaced live in the + // header badge regardless. + const reasoningTokens = reasoningTokensForPart(message); return ( @@ -109,12 +97,11 @@ export default function MessageItem({ // count. Empty/whitespace reasoning with no authoritative count carries // nothing to show, so skip it (avoids an empty 0-token block). const text = (part as { text?: string }).text ?? ""; - const tokens = - reasoningPartCount === 1 && index === lastReasoningIndex - ? reasoningTokens - : undefined; - if (!text.trim() && !(tokens && tokens > 0)) return null; - return ; + if (!text.trim() && !(reasoningTokens && reasoningTokens > 0)) + return null; + return ( + + ); } if (part.type === "text") { diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx new file mode 100644 index 00000000..7d325391 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx @@ -0,0 +1,65 @@ +import { describe, it, expect, vi } from "vitest"; +import { render, screen } from "@testing-library/react"; +import { MantineProvider } from "@mantine/core"; + +// Stub react-i18next so `t` returns the key with `{{count}}` interpolated. This +// keeps the assertions on the component's OWN count logic (authoritative vs +// estimate) rather than on translation, and mirrors the t-mock pattern used by +// other component tests in the repo. +vi.mock("react-i18next", () => ({ + useTranslation: () => ({ + t: (key: string, opts?: { count?: number }) => + opts && typeof opts.count === "number" + ? key.replace("{{count}}", String(opts.count)) + : key, + }), +})); + +import ReasoningBlock from "./reasoning-block"; +import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; + +// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. + +function renderBlock(props: { text: string; tokens?: number }) { + return render( + + + , + ); +} + +describe("ReasoningBlock", () => { + it("shows the authoritative count in the header when tokens > 0", () => { + // Text "thinking…" estimates to ceil(9/4) = 3, but the authoritative 42 + // must win, so the header shows 42 (and NOT the 3-token estimate). + renderBlock({ text: "thinking…", tokens: 42 }); + expect(screen.getByText("Thinking · 42 tokens")).toBeDefined(); + expect(screen.queryByText("Thinking · 3 tokens")).toBeNull(); + }); + + it("falls back to the text-length estimate when no authoritative tokens", () => { + const text = "some reasoning prose that streams in"; + const estimate = estimateTokens(text); + renderBlock({ text }); + expect(estimate).toBeGreaterThan(0); + expect(screen.getByText(new RegExp(`${estimate} tokens`))).toBeDefined(); + }); + + it("header-only when text is empty but an authoritative count is present", () => { + renderBlock({ text: "", tokens: 17 }); + expect(screen.getByText(/17 tokens/)).toBeDefined(); + // No disclosure body to expand: the toggle button is disabled. + const button = screen.getByRole("button"); + expect((button as HTMLButtonElement).disabled).toBe(true); + }); + + it("renders the reasoning body (markdown or raw-text fallback)", () => { + renderBlock({ text: "**bold** reasoning", tokens: 5 }); + // The toggle is enabled because there IS body text to expand. + const button = screen.getByRole("button"); + expect((button as HTMLButtonElement).disabled).toBe(false); + // The body prose renders (markdown -> sanitized html, or raw-text fallback); + // either way the text is present in the document. + expect(screen.getByText(/reasoning/)).toBeDefined(); + }); +}); diff --git a/apps/client/src/features/ai-chat/utils/adopt-chat-id.ts b/apps/client/src/features/ai-chat/utils/adopt-chat-id.ts index 1993dccc..0c01dd91 100644 --- a/apps/client/src/features/ai-chat/utils/adopt-chat-id.ts +++ b/apps/client/src/features/ai-chat/utils/adopt-chat-id.ts @@ -4,7 +4,7 @@ * ============================ CANONICAL #137 NOTE ============================ * This docblock is the single authoritative explanation of the new-chat id * adoption design and the #137 two-tab race it fixes. Other call sites - * (use-chat-session.ts, the server's `chatStreamStartMetadata`) reference here + * (use-chat-session.ts, the server's `chatStreamMetadata`) reference here * rather than restating it. * * When a user sends the first turn of a BRAND-NEW chat, the client has no chat @@ -17,7 +17,7 @@ * leak its later turns into it (#137). We adopt by IDENTITY instead, two ways: * * PRIMARY path: the server streams the real chat id on the assistant message - * metadata's `start` part (see `chatStreamStartMetadata` server-side); + * metadata's `start` part (see `chatStreamMetadata` server-side); * `extractServerChatId` reads it off the finished message and * `resolveAdoptedChatId` turns it into the id to adopt for a new chat. This is * authoritative and immune to the race. @@ -46,7 +46,7 @@ export function resolveAdoptedChatId( /** * Read the authoritative server chat id off a finished assistant message. The * server attaches it as `message.metadata.chatId` on the `start` part (see - * `chatStreamStartMetadata`). Returns it only when it is a string; undefined for + * `chatStreamMetadata`). Returns it only when it is a string; undefined for * a missing message, missing metadata, or a non-string `chatId`. */ export function extractServerChatId( diff --git a/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts b/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts index 79eb6023..651d1d26 100644 --- a/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts +++ b/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts @@ -314,6 +314,57 @@ describe("buildChatMarkdown — token totals", () => { }); expect(md).toContain("- Total tokens: 99"); }); + + it("appends the reasoning figure to the row footer when reasoningTokens > 0", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [ + row({ + role: "assistant", + content: "x", + metadata: { + usage: { inputTokens: 10, outputTokens: 8, reasoningTokens: 3 }, + }, + }), + ], + t, + }); + expect(md).toContain("_Tokens — in: 10, out: 8, reasoning: 3, total: 18_"); + }); + + it("omits the reasoning figure when reasoningTokens is 0 / absent", () => { + const zero = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [ + row({ + role: "assistant", + content: "x", + metadata: { + usage: { inputTokens: 10, outputTokens: 5, reasoningTokens: 0 }, + }, + }), + ], + t, + }); + expect(zero).toContain("_Tokens — in: 10, out: 5, total: 15_"); + expect(zero).not.toContain("reasoning:"); + + const absent = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [ + row({ + role: "assistant", + content: "x", + metadata: { usage: { inputTokens: 10, outputTokens: 5 } }, + }), + ], + t, + }); + expect(absent).not.toContain("reasoning:"); + }); }); describe("buildChatMarkdown — pending / in-progress messages", () => { diff --git a/apps/client/src/features/ai-chat/utils/reasoning-tokens.test.ts b/apps/client/src/features/ai-chat/utils/reasoning-tokens.test.ts new file mode 100644 index 00000000..6e7e30a5 --- /dev/null +++ b/apps/client/src/features/ai-chat/utils/reasoning-tokens.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from "vitest"; +import type { UIMessage } from "@ai-sdk/react"; +import { reasoningTokensForPart } from "@/features/ai-chat/utils/reasoning-tokens.ts"; + +/** + * Pure-helper tests for `reasoningTokensForPart`, the #151 anti-double-count + * rule: the authoritative `usage.reasoningTokens` is the TURN TOTAL, so it may + * only be attributed when the turn has exactly one reasoning part. With multiple + * reasoning parts (or no authoritative usage) every part falls back to its own + * per-part estimate, signalled here by `undefined`. + */ +const msg = ( + parts: UIMessage["parts"], + metadata?: unknown, +): UIMessage => + ({ + id: Math.random().toString(), + role: "assistant", + parts, + metadata, + }) as UIMessage; + +describe("reasoningTokensForPart", () => { + it("single reasoning part -> the authoritative turn total", () => { + const m = msg( + [ + { type: "reasoning", text: "thinking…" } as never, + { type: "text", text: "answer" }, + ], + { usage: { reasoningTokens: 42 } }, + ); + expect(reasoningTokensForPart(m)).toBe(42); + }); + + it("multiple reasoning parts -> undefined (each estimates on its own)", () => { + const m = msg( + [ + { type: "reasoning", text: "step one" } as never, + { type: "reasoning", text: "step two" } as never, + { type: "text", text: "answer" }, + ], + { usage: { reasoningTokens: 99 } }, + ); + // Even with an authoritative total, two reasoning parts must each estimate + // (attributing the total to one would double-count against the other). + expect(reasoningTokensForPart(m)).toBeUndefined(); + }); + + it("no authoritative usage -> undefined even for a single reasoning part", () => { + const m = msg([ + { type: "reasoning", text: "thinking…" } as never, + { type: "text", text: "answer" }, + ]); + expect(reasoningTokensForPart(m)).toBeUndefined(); + }); +}); diff --git a/apps/client/src/features/ai-chat/utils/reasoning-tokens.ts b/apps/client/src/features/ai-chat/utils/reasoning-tokens.ts new file mode 100644 index 00000000..ab21d4b2 --- /dev/null +++ b/apps/client/src/features/ai-chat/utils/reasoning-tokens.ts @@ -0,0 +1,34 @@ +import type { UIMessage } from "@ai-sdk/react"; + +/** + * Decide the authoritative reasoning token count to attribute to a single + * `reasoning` part of an assistant message — or `undefined` when the part should + * fall back to its own per-part estimate. + * + * `usage.reasoningTokens` is the TURN TOTAL, so it may only be attributed to a + * block when the turn has exactly ONE reasoning part (the common one-step turn): + * then that block can show the exact figure. With MULTIPLE reasoning parts (a + * multi-step agent turn) every block must fall back to its own estimate — + * attributing the turn total to one of them would double-count against the + * others' estimates (#151 review anti-double-count rule). When there is no + * authoritative usage at all, every part estimates. + * + * Returns the authoritative `reasoningTokens` only for the single-reasoning-part + * case; `undefined` otherwise (the caller estimates from the part text). + */ +export function reasoningTokensForPart( + message: UIMessage, +): number | undefined { + const reasoningTokens = ( + message.metadata as { usage?: { reasoningTokens?: number } } | undefined + )?.usage?.reasoningTokens; + + const reasoningPartCount = (message.parts ?? []).reduce( + (acc, p) => (p.type === "reasoning" ? acc + 1 : acc), + 0, + ); + + // Exactly one reasoning part -> attribute the authoritative turn total to it. + // Otherwise (zero or multiple) each part estimates on its own. + return reasoningPartCount === 1 ? reasoningTokens : undefined; +} -- 2.49.1