feat(ai-chat): show current context size instead of total tokens spent

The floating AI-chat header badge summed metadata.usage (AI SDK
totalUsage, all steps) across every assistant row, showing the
cumulative tokens SPENT — which grows each turn as history is re-sent.
Replace it with the conversation's CURRENT context size.

- server: persist metadata.contextTokens in streamText onFinish from the
  final-step `usage` (inputTokens + outputTokens ≈ current context
  window occupancy); keep usage: totalUsage for back-compat/fallback
- client: derive the badge from the most recent assistant row's
  contextTokens (fallback to that row's usage total for older chats)
  instead of summing all rows
- types: add metadata.contextTokens to IAiChatMessageRow
- i18n: rename badge label "Tokens used in this chat" -> "Current
  context size" (en-US)

No DB migration needed (metadata is a JSON column).
This commit is contained in:
vvzvlad
2026-06-18 19:54:34 +03:00
parent 411671bad2
commit f96df1c540
4 changed files with 42 additions and 19 deletions

View File

@@ -1123,7 +1123,7 @@
"Create subpage of {{name}}": "Create subpage of {{name}}",
"AI chat": "AI chat",
"Minimize": "Minimize",
"Tokens used in this chat": "Tokens used in this chat",
"Current context size": "Current context size",
"AI agent": "AI agent",
"AI agent is typing…": "AI agent is typing…",
"Send": "Send",

View File

@@ -204,19 +204,30 @@ export default function AiChatWindow() {
const threadKey = activeChatId ?? "new";
const waitingForHistory = activeChatId !== null && messagesLoading;
// Sum of persisted token usage for the active chat. NOTE: this reflects the
// PERSISTED rows for the active chat (updates on chat open/switch); it does
// not tick live mid-stream — acceptable for v1.
const totalTokens = useMemo(() => {
// Current context size for the active chat: how much the conversation now
// occupies in the model's context window — NOT the cumulative tokens spent.
// We read the most recent assistant row that carries a context figure:
// `contextTokens` (final-step input+output) for chats recorded after this
// shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
// PERSISTED rows (updates on chat open/switch); it does not tick live
// mid-stream — acceptable for v1.
const contextTokens = useMemo(() => {
if (!activeChatId || !messageRows) return 0;
return messageRows.reduce((sum, row) => {
const usage = row.metadata?.usage;
if (!usage) return sum;
const rowTokens =
usage.totalTokens ??
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
return sum + rowTokens;
}, 0);
for (let i = messageRows.length - 1; i >= 0; i--) {
const meta = messageRows[i].metadata;
if (!meta) continue;
if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
return meta.contextTokens;
}
const usage = meta.usage;
if (usage) {
const fallback =
usage.totalTokens ??
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
if (fallback > 0) return fallback;
}
}
return 0;
}, [activeChatId, messageRows]);
// On (re)open, settle the geometry before paint (useLayoutEffect → no
@@ -333,9 +344,9 @@ export default function AiChatWindow() {
<span className={classes.title}>{t("AI chat")}</span>
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
{totalTokens > 0 && (
<Tooltip label={t("Tokens used in this chat")} withArrow>
<span className={classes.badge}>{formatTokens(totalTokens)}</span>
{contextTokens > 0 && (
<Tooltip label={t("Current context size")} withArrow>
<span className={classes.badge}>{formatTokens(contextTokens)}</span>
</Tooltip>
)}
</div>

View File

@@ -28,13 +28,19 @@ export interface IAiChatMessageRow {
toolCalls?: unknown;
metadata?: {
parts?: UIMessage["parts"];
// AI SDK v6 `totalUsage` persisted on assistant rows. Used to sum the token
// count shown in the floating window's header badge.
// AI SDK v6 `totalUsage` persisted on assistant rows. Legacy cumulative
// figure (sum of every step's usage for the turn); kept for back-compat and
// as the fallback for older rows that have no `contextTokens`.
usage?: {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
};
// Current context size for the turn = final-step (input+output) tokens, i.e.
// how much the conversation occupies in the model's context window after this
// turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
// floating window's header badge.
contextTokens?: number;
// Set on an assistant row whose turn ended in a provider/stream error; the
// raw provider error text (e.g. "402: ...") for inline display in the thread.
error?: string;

View File

@@ -246,13 +246,19 @@ export class AiChatService {
// something to enforce by silently breaking the agent.)
stopWhen: stepCountIs(8),
abortSignal: signal,
onFinish: async ({ text, finishReason, totalUsage, steps }) => {
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
await persistAssistant({
text,
toolCalls: serializeSteps(steps),
metadata: {
finishReason,
usage: totalUsage,
// Final-step usage = the context actually fed to the model on the last LLM
// call (full history + tool results) plus the answer it just generated.
// input+output of the FINAL step ≈ the conversation's CURRENT context size,
// distinct from totalUsage which sums every step (cumulative tokens spent).
contextTokens:
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
// Persist the FULL set of UIMessage parts for the turn (text +
// tool-call/result), so the rebuilt history replays prior tool
// context to the model on later turns.