feat(ai-chat): realtime token counter + reasoning tokens, Claude-Code style (#151)

Tokens were only counted post-hoc (onFinish) and the header badge updated only on
chat open/switch; reasoning wasn't requested or shown. Now a counter ticks LIVE
during generation and surfaces reasoning ("thinking") tokens separately, like
Claude Code's `Thinking… · N tokens`.

Architecture (AI SDK v6): no provider gives exact per-token usage mid-stream, so
the live number is a cheap client estimate (chars/≈4) reconciled to AUTHORITATIVE
provider usage at step boundaries and turn end. The useChat per-delta re-render is
the existing realtime engine.

- server: `chatStreamMetadata` now also forwards usage on `finish-step` + `finish`;
  `sendReasoning: true`; persisted `metadata.usage` carries `reasoningTokens`
  (normalized from `outputTokenDetails` or the deprecated field).
- client: pure `count-stream-tokens` (estimateTokens / liveTurnTokens, prefers
  authoritative usage else estimate); `Thinking… · N tokens` in the typing
  indicator; collapsible "Thinking" reasoning block; throttled (~8 Hz) live
  turn-token header badge; `reasoningTokens` in types + Markdown export.

Review fixes folded in:
- v6 `finish-step.usage` is PER-STEP, not cumulative — the server now ACCUMULATES
  a running sum (new pure `accumulateStepUsage`) and sends the cumulative, which
  converges to `finish.totalUsage`, so the live counter never jumps DOWN on a
  multi-step agent turn.
- reasoning double-count: the authoritative turn-total is attributed to a block
  ONLY for a single-reasoning-part (one-step) turn; multi-step blocks each show
  their own estimate (the authoritative total stays in the header).
- no "0" badge flash at turn start (require live > 0, else show context size).
- comment refreshed (finish-step trigger).

Tests: server `accumulateStepUsage` + updated `chatStreamMetadata` (34 in the
suite); client pure-fn tests. Both tsc clean; 162 client ai-chat + the ai-chat
server suite pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-24 06:56:14 +03:00
parent acf6d85b07
commit 0ebb1adce8
16 changed files with 775 additions and 28 deletions

View File

@@ -156,6 +156,12 @@ export default function AiChatWindow() {
isStreaming: false,
});
// Live turn-token total (reasoning + output) for the in-flight turn, pushed up
// (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream.
// `null` means no turn is in flight -> the badge falls back to the persisted
// context size below.
const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
// The page the user is currently viewing. AiChatWindow lives in a pathless
// parent layout route, so useParams() can't see :pageSlug. Match the full
// pathname against the authenticated page route instead so "the current page"
@@ -485,11 +491,19 @@ export default function AiChatWindow() {
)}
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
{contextTokens > 0 && (
{/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz);
once it finishes, fall back to the persisted context size. Require
> 0 so the very first emit (an empty tail message, count 0) does not
flash a "0" badge before any token streams in (#151 review). */}
{liveTurnTokens !== null && liveTurnTokens > 0 ? (
<Tooltip label={t("Tokens generated this turn")} withArrow>
<span className={classes.badge}>{formatTokens(liveTurnTokens)}</span>
</Tooltip>
) : contextTokens > 0 ? (
<Tooltip label={t("Current context size")} withArrow>
<span className={classes.badge}>{formatTokens(contextTokens)}</span>
</Tooltip>
)}
) : null}
</div>
<div style={{ display: "flex", alignItems: "center", gap: 1 }}>
@@ -608,6 +622,7 @@ export default function AiChatWindow() {
assistantName={currentRole?.name}
onTurnFinished={onTurnFinished}
liveStateRef={liveThreadRef}
onLiveTurnTokens={setLiveTurnTokens}
/>
)}
</div>

View File

@@ -111,6 +111,24 @@
background: light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-6));
}
/* Collapsible "Thinking" (reasoning) block: a subtle left rule, dimmer than the
answer so it reads as secondary thinking context above the real answer. */
.reasoningBlock {
border-left: 2px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-4));
padding-left: 8px;
}
.reasoningText {
margin-top: 4px;
font-size: var(--mantine-font-size-xs);
color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-dark-1));
white-space: pre-wrap;
}
.reasoningText p {
margin: 0 0 4px;
}
.inputWrapper {
flex: 0 0 auto;
padding-top: var(--mantine-spacing-xs);

View File

@@ -23,6 +23,7 @@ import {
} from "@/features/ai-chat/types/ai-chat.types.ts";
import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import {
dequeue,
enqueueMessage,
@@ -69,6 +70,12 @@ interface ChatThreadProps {
* assistant message. A ref (not state) avoids re-rendering the parent on
* every streamed delta. */
liveStateRef?: MutableRefObject<{ messages: UIMessage[]; isStreaming: boolean }>;
/** Reports the live turn-token total (reasoning + output) for the in-flight
* turn so the parent can show a header badge that ticks mid-stream. THROTTLED
* here (~8 Hz) so the parent re-renders a handful of times a second, not on
* every streamed delta. Called with `null` when no turn is in flight (the
* parent then reverts the badge to the persisted context size). */
onLiveTurnTokens?: (tokens: number | null) => void;
}
/**
@@ -113,6 +120,7 @@ export default function ChatThread({
assistantName,
onTurnFinished,
liveStateRef,
onLiveTurnTokens,
}: ChatThreadProps) {
const { t } = useTranslation();
@@ -310,6 +318,54 @@ export default function ChatThread({
};
}, [liveStateRef, messages, isStreaming]);
// Report the live turn-token total to the parent header badge, THROTTLED to
// ~8 Hz so the parent re-renders a few times a second instead of on every
// streamed delta. The tail assistant message's reasoning+output (estimate while
// streaming, authoritative once a step reports usage) is the live figure. When
// the turn ends we emit a final exact value, then `null` so the parent reverts
// the badge to the persisted context size.
const lastEmitRef = useRef(0);
const emitTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
useEffect(() => {
if (!onLiveTurnTokens) return;
if (!isStreaming) {
// Turn ended (or never started): clear any pending throttle and revert.
if (emitTimerRef.current) {
clearTimeout(emitTimerRef.current);
emitTimerRef.current = null;
}
lastEmitRef.current = 0;
onLiveTurnTokens(null);
return;
}
const tail = messages[messages.length - 1];
const live =
tail?.role === "assistant" ? liveTurnTokens(tail) : null;
const total = live ? live.reasoning + live.output : 0;
const now = Date.now();
const MIN_INTERVAL = 120; // ms (~8 Hz)
const elapsed = now - lastEmitRef.current;
if (elapsed >= MIN_INTERVAL) {
lastEmitRef.current = now;
onLiveTurnTokens(total);
} else if (!emitTimerRef.current) {
// Schedule a trailing emit so the FINAL value of a burst is not dropped.
emitTimerRef.current = setTimeout(() => {
emitTimerRef.current = null;
lastEmitRef.current = Date.now();
onLiveTurnTokens(total);
}, MIN_INTERVAL - elapsed);
}
}, [messages, isStreaming, onLiveTurnTokens]);
// Clear any pending throttle timer on unmount (chat switch via `key`) so a
// trailing emit can't fire into a torn-down thread's parent.
useEffect(() => {
return () => {
if (emitTimerRef.current) clearTimeout(emitTimerRef.current);
};
}, []);
// Classify the turn error into a heading + detail so the banner names the cause
// (connection reset, timeout, rate limit, context overflow, quota, ...) instead
// of a generic "Something went wrong".

View File

@@ -2,6 +2,7 @@ import { Box, Text } from "@mantine/core";
import { useTranslation } from "react-i18next";
import type { UIMessage } from "@ai-sdk/react";
import ToolCallCard from "@/features/ai-chat/components/tool-call-card.tsx";
import ReasoningBlock from "@/features/ai-chat/components/reasoning-block.tsx";
import ChatErrorAlert from "@/features/ai-chat/components/chat-error-alert.tsx";
import ChatStoppedNotice from "@/features/ai-chat/components/chat-stopped-notice.tsx";
import { ToolUiPart, isToolPart } from "@/features/ai-chat/utils/tool-parts.tsx";
@@ -77,12 +78,45 @@ export default function MessageItem({
// return won't fire for them.
if (!assistantMessageHasVisibleContent(message)) return null;
// Authoritative reasoning token count for the turn, if the server attached it
// (incl. providers that report a reasoning COUNT without streaming the text).
// It is the TURN TOTAL, so it may only be attributed to a block when there is a
// SINGLE reasoning part (the common one-step turn) — then that block shows the
// exact figure. With multiple reasoning parts (multi-step agent turn) every
// block falls back to its own per-part estimate; attributing the turn total to
// one of them would double-count against the others' estimates (#151 review).
// The authoritative turn total is still surfaced live in the header badge.
const reasoningTokens = (
message.metadata as { usage?: { reasoningTokens?: number } } | undefined
)?.usage?.reasoningTokens;
const reasoningPartCount = message.parts.reduce(
(acc, p) => (p.type === "reasoning" ? acc + 1 : acc),
0,
);
const lastReasoningIndex = message.parts.reduce(
(acc, p, i) => (p.type === "reasoning" ? i : acc),
-1,
);
return (
<Box className={classes.messageRow}>
<Text size="xs" c="dimmed" mb={4}>
{resolveAssistantName(assistantName) ?? t("AI agent")}
</Text>
{message.parts.map((part, index) => {
if (part.type === "reasoning") {
// Reasoning ("thinking") -> a collapsible block with its own token
// count. Empty/whitespace reasoning with no authoritative count carries
// nothing to show, so skip it (avoids an empty 0-token block).
const text = (part as { text?: string }).text ?? "";
const tokens =
reasoningPartCount === 1 && index === lastReasoningIndex
? reasoningTokens
: undefined;
if (!text.trim() && !(tokens && tokens > 0)) return null;
return <ReasoningBlock key={index} text={text} tokens={tokens} />;
}
if (part.type === "text") {
// Skip empty/whitespace-only text parts (a streaming message often
// starts with an empty text part before the first token arrives); the

View File

@@ -6,6 +6,7 @@ import MessageItem from "@/features/ai-chat/components/message-item.tsx";
import TypingIndicator from "@/features/ai-chat/components/typing-indicator.tsx";
import { isToolPart, toolRunState, ToolUiPart } from "@/features/ai-chat/utils/tool-parts.tsx";
import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts";
import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import classes from "@/features/ai-chat/components/ai-chat.module.css";
interface MessageListProps {
@@ -94,6 +95,19 @@ export function typingIndicatorShowsName(messages: UIMessage[]): boolean {
return !assistantMessageHasVisibleContent(last);
}
/**
* The live thinking-token count to show on the standalone typing indicator. It
* is the reasoning split of the tail assistant message (estimate while streaming,
* authoritative once the server attaches usage at a step/turn boundary). Returns
* 0 when the turn has produced no reasoning yet — the indicator then shows the
* plain "Thinking…" line.
*/
export function tailThinkingTokens(messages: UIMessage[]): number {
const last = messages[messages.length - 1];
if (!last || last.role !== "assistant") return 0;
return liveTurnTokens(last).reasoning;
}
/**
* Scrollable transcript. Auto-scrolls to the newest message as it streams in,
* but only while the user is pinned to the bottom — if they scrolled up to read
@@ -190,7 +204,13 @@ export default function MessageList({
assistantName={assistantName}
/>
))}
{typing && <TypingIndicator assistantName={assistantName} showName={typingIndicatorShowsName(messages)} />}
{typing && (
<TypingIndicator
assistantName={assistantName}
showName={typingIndicatorShowsName(messages)}
thinkingTokens={tailThinkingTokens(messages)}
/>
)}
</Stack>
</ScrollArea>
);

View File

@@ -0,0 +1,83 @@
import { useState } from "react";
import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core";
import { IconChevronDown } from "@tabler/icons-react";
import { useTranslation } from "react-i18next";
import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
import classes from "@/features/ai-chat/components/ai-chat.module.css";
interface ReasoningBlockProps {
/** The streamed/persisted reasoning (thinking) text. May be empty when the
* provider reports only a reasoning token COUNT without the text. */
text: string;
/** Authoritative reasoning token count from `usage.reasoningTokens`, when the
* step/turn has finished. When absent (or 0) the count is estimated from the
* text length so it ticks live as the reasoning streams in. */
tokens?: number;
}
/**
* Collapsible "Thinking" block for an assistant `reasoning` part. Mirrors Claude
* Code's surfacing of the model's thinking: a header that shows the thinking
* token count (authoritative when the step has reported usage, else a live
* estimate from the streamed text) and an expandable body with the reasoning
* prose. Collapsed by default so it never crowds out the answer.
*
* Providers that don't stream reasoning TEXT still render this block from the
* authoritative count alone (header only, empty body) so the cost is visible.
*/
export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
const { t } = useTranslation();
const [open, setOpen] = useState(false);
// Authoritative count wins; otherwise estimate live from the streamed text.
const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
const trimmed = text.trim();
const html = trimmed ? renderChatMarkdown(trimmed, {}) : "";
return (
<Box className={classes.reasoningBlock} mb={6}>
<UnstyledButton
onClick={() => setOpen((o) => !o)}
// No body to expand when the provider reported only a token count.
disabled={!trimmed}
aria-expanded={open}
>
<Group gap={6} wrap="nowrap" align="center">
<IconChevronDown
size={12}
style={{
transform: open ? "none" : "rotate(-90deg)",
transition: "transform 150ms ease",
opacity: trimmed ? 1 : 0.4,
}}
/>
<Text size="xs" c="dimmed">
{count > 0
? t("Thinking · {{count}} tokens", { count })
: t("Thinking")}
</Text>
</Group>
</UnstyledButton>
{trimmed && (
<Collapse in={open}>
{html ? (
<div
className={classes.reasoningText}
// Sanitized by renderChatMarkdown (DOMPurify) before insertion.
dangerouslySetInnerHTML={{ __html: html }}
/>
) : (
<Text
className={classes.reasoningText}
style={{ whiteSpace: "pre-wrap" }}
>
{trimmed}
</Text>
)}
</Collapse>
)}
</Box>
);
}

View File

@@ -0,0 +1,50 @@
import { describe, expect, it } from "vitest";
import type { UIMessage } from "@ai-sdk/react";
import { tailThinkingTokens } from "@/features/ai-chat/components/message-list.tsx";
/**
* Pure-helper tests for `tailThinkingTokens`: the live thinking-token count the
* standalone typing indicator shows. It is the reasoning split of the tail
* assistant message (estimate while streaming, authoritative once usage arrives).
*/
const msg = (
role: "user" | "assistant",
parts: unknown[],
metadata?: unknown,
): UIMessage =>
({ id: Math.random().toString(), role, parts, metadata }) as UIMessage;
describe("tailThinkingTokens", () => {
it("is 0 when there are no messages", () => {
expect(tailThinkingTokens([])).toBe(0);
});
it("is 0 when the tail message is the user's", () => {
expect(tailThinkingTokens([msg("user", [{ type: "text", text: "q" }])])).toBe(0);
});
it("is 0 when the assistant has produced no reasoning yet", () => {
expect(
tailThinkingTokens([msg("assistant", [{ type: "text", text: "answer" }])]),
).toBe(0);
});
it("estimates reasoning tokens from streamed reasoning text", () => {
// 8 chars -> 2 tokens.
expect(
tailThinkingTokens([
msg("assistant", [{ type: "reasoning", text: "12345678" }]),
]),
).toBe(2);
});
it("uses authoritative usage.reasoningTokens once the server attaches it", () => {
expect(
tailThinkingTokens([
msg("assistant", [{ type: "reasoning", text: "x" }], {
usage: { outputTokens: 100, reasoningTokens: 42 },
}),
]),
).toBe(42);
});
});

View File

@@ -16,6 +16,12 @@ interface TypingIndicatorProps {
* assistant row above already shows the same name, to avoid a duplicate label.
*/
showName?: boolean;
/**
* Live thinking/reasoning token count for the in-flight turn. When > 0 the
* typing line becomes `Thinking… · {count} tokens` (like Claude Code). Omitted
* / 0 keeps the plain `Thinking…` line.
*/
thinkingTokens?: number;
}
/**
@@ -30,9 +36,14 @@ interface TypingIndicatorProps {
* typing line is always the generic "Thinking…" (it never includes the
* role/identity name).
*/
export default function TypingIndicator({ assistantName, showName = true }: TypingIndicatorProps) {
export default function TypingIndicator({ assistantName, showName = true, thinkingTokens }: TypingIndicatorProps) {
const { t } = useTranslation();
const name = resolveAssistantName(assistantName);
// Show the running thinking-token count only once there is something to count.
const thinkingLine =
thinkingTokens && thinkingTokens > 0
? t("Thinking… · {{count}} tokens", { count: thinkingTokens })
: t("Thinking…");
return (
<Box className={classes.messageRow}>
@@ -48,7 +59,7 @@ export default function TypingIndicator({ assistantName, showName = true }: Typi
<span />
</span>
<Text size="sm" c="dimmed">
{t("Thinking…")}
{thinkingLine}
</Text>
</Group>
</Box>

View File

@@ -98,6 +98,10 @@ export interface IAiChatMessageRow {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
// Reasoning (thinking) tokens, when the provider reports them. Optional so
// old history rows (recorded before this shipped) stay valid. Included in
// `outputTokens` per the AI SDK usage shape.
reasoningTokens?: number;
};
// Current context size for the turn = final-step (input+output) tokens, i.e.
// how much the conversation occupies in the model's context window after this

View File

@@ -77,6 +77,7 @@ function rowTokens(usage: {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}): number {
return (
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
@@ -175,8 +176,14 @@ export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
const usage = row.metadata?.usage;
if (usage) {
const total = usage.totalTokens ?? rowTokens(usage);
// Reasoning (thinking) tokens are shown only when the provider reported a
// positive count; old rows / non-reasoning providers omit it.
const reasoning =
usage.reasoningTokens && usage.reasoningTokens > 0
? `, reasoning: ${usage.reasoningTokens}`
: "";
blocks.push(
`_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}, total: ${total}_`,
`_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}${reasoning}, total: ${total}_`,
);
}
});

View File

@@ -0,0 +1,119 @@
import { describe, expect, it } from "vitest";
import type { UIMessage } from "@ai-sdk/react";
import {
estimateTokens,
liveTurnTokens,
} from "@/features/ai-chat/utils/count-stream-tokens.ts";
const msg = (parts: unknown[], metadata?: unknown): UIMessage =>
({
id: Math.random().toString(),
role: "assistant",
parts,
metadata,
}) as UIMessage;
describe("estimateTokens", () => {
it("returns 0 for the empty string", () => {
expect(estimateTokens("")).toBe(0);
});
it("ceils chars/4 so any non-empty text is at least 1 token", () => {
expect(estimateTokens("a")).toBe(1);
expect(estimateTokens("abcd")).toBe(1);
expect(estimateTokens("abcde")).toBe(2);
expect(estimateTokens("12345678")).toBe(2);
});
});
describe("liveTurnTokens — estimate path", () => {
it("is all zeros for an undefined message", () => {
expect(liveTurnTokens(undefined)).toEqual({
reasoning: 0,
output: 0,
authoritative: false,
});
});
it("is all zeros for a parts-less message", () => {
expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({
reasoning: 0,
output: 0,
authoritative: false,
});
});
it("estimates output from text parts", () => {
// 8 chars -> 2 tokens.
const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }]));
expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false });
});
it("estimates reasoning from reasoning parts (kept separate from output)", () => {
const r = liveTurnTokens(
msg([
{ type: "reasoning", text: "12345678" },
{ type: "text", text: "abcd" },
]),
);
expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false });
});
it("accumulates across multiple text + reasoning parts (multi-step)", () => {
const r = liveTurnTokens(
msg([
{ type: "reasoning", text: "abcd" }, // 1
{ type: "text", text: "abcd" }, // 1
{ type: "tool-getPage", state: "output-available" }, // ignored
{ type: "reasoning", text: "abcd" }, // 1
{ type: "text", text: "abcdefgh" }, // 2
]),
);
expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false });
});
it("ignores non text/reasoning parts (tools, step-start)", () => {
const r = liveTurnTokens(
msg([
{ type: "step-start" },
{ type: "tool-getPage", state: "input-available" },
]),
);
expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false });
});
});
describe("liveTurnTokens — authoritative path", () => {
it("returns authoritative usage verbatim, splitting reasoning out of output", () => {
// outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30.
const r = liveTurnTokens(
msg([{ type: "text", text: "estimate would be tiny" }], {
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
}),
);
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
});
it("treats missing reasoningTokens as 0 and keeps full output", () => {
const r = liveTurnTokens(
msg([{ type: "text", text: "x" }], {
usage: { inputTokens: 10, outputTokens: 42 },
}),
);
expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true });
});
it("never returns a negative output when reasoning exceeds reported output", () => {
const r = liveTurnTokens(
msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }),
);
expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true });
});
it("falls back to the estimate when metadata has no usage object", () => {
const r = liveTurnTokens(
msg([{ type: "text", text: "abcd" }], { chatId: "c1" }),
);
expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false });
});
});

View File

@@ -0,0 +1,94 @@
import type { UIMessage } from "@ai-sdk/react";
/**
* Live token counting for a streaming AI-chat turn — split into REASONING
* (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows
* `Thinking… · 60 tokens` next to its thinking indicator.
*
* No provider streams exact per-token usage mid-stream, so the live number is a
* CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage
* once the server attaches it on a step/turn boundary (see the server's
* `chatStreamMetadata` + the client's read of `message.metadata.usage`). When
* authoritative usage is present we return it verbatim (the number "jumps to
* exact"); otherwise we return the running estimate. Pure + unit-testable: it
* never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the
* bundle, and be wrong for Gemini/Ollama anyway).
*/
/**
* Rough token estimate for a piece of text using the standard chars/≈4 heuristic.
* Returns 0 for empty/whitespace-free-of-content input, and ceils so any
* non-empty text counts as at least one token.
*/
export function estimateTokens(text: string): number {
if (!text) return 0;
return Math.ceil(text.length / 4);
}
/** Authoritative per-step/turn usage the server attaches to message metadata. */
export interface AuthoritativeUsage {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}
/** Live token split for a turn's tail (streaming) assistant message. */
export interface LiveTurnTokens {
/** Thinking/reasoning tokens (estimate, or authoritative when available). */
reasoning: number;
/** Answer/output tokens (estimate, or authoritative when available). */
output: number;
/** True when the numbers come from authoritative server usage, not estimate. */
authoritative: boolean;
}
/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */
function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined {
const meta = message?.metadata as
| { usage?: AuthoritativeUsage }
| undefined;
const usage = meta?.usage;
if (!usage || typeof usage !== "object") return undefined;
return usage;
}
/**
* Token split for the given (streaming) assistant message.
*
* Prefers AUTHORITATIVE `metadata.usage` when the server has attached it (at a
* step/turn boundary, incl. `reasoningTokens`) — so the live counter snaps to the
* provider's exact figures. Until then it returns a running ESTIMATE summed over
* the message parts: `reasoning` parts feed the reasoning estimate, `text` parts
* feed the output estimate. Multi-part / multi-step turns accumulate naturally
* because every part of the turn is summed.
*
* Providers that don't stream reasoning text still surface a reasoning count once
* the authoritative usage arrives (`usage.reasoningTokens`); on the pure estimate
* path such a turn simply shows `reasoning: 0` until then.
*/
export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens {
if (!message) return { reasoning: 0, output: 0, authoritative: false };
const usage = metadataUsage(message);
if (usage) {
// Authoritative branch: outputTokens already INCLUDES reasoning tokens in the
// AI SDK usage shape, so subtract reasoning out for the "answer" figure (never
// go negative if a provider reports them inconsistently).
const reasoning = usage.reasoningTokens ?? 0;
const totalOutput = usage.outputTokens ?? 0;
const output = Math.max(0, totalOutput - reasoning);
return { reasoning, output, authoritative: true };
}
let reasoning = 0;
let output = 0;
for (const part of message.parts ?? []) {
if (part.type === "reasoning") {
reasoning += estimateTokens((part as { text?: string }).text ?? "");
} else if (part.type === "text") {
output += estimateTokens((part as { text?: string }).text ?? "");
}
}
return { reasoning, output, authoritative: false };
}