diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index 87e8af42..21f7c5f7 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -1123,7 +1123,7 @@ "Create subpage of {{name}}": "Create subpage of {{name}}", "AI chat": "AI chat", "Minimize": "Minimize", - "Tokens used in this chat": "Tokens used in this chat", + "Current context size": "Current context size", "AI agent": "AI agent", "AI agent is typing…": "AI agent is typing…", "Send": "Send", diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index 664aa6ff..122f80ff 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -204,19 +204,30 @@ export default function AiChatWindow() { const threadKey = activeChatId ?? "new"; const waitingForHistory = activeChatId !== null && messagesLoading; - // Sum of persisted token usage for the active chat. NOTE: this reflects the - // PERSISTED rows for the active chat (updates on chat open/switch); it does - // not tick live mid-stream — acceptable for v1. - const totalTokens = useMemo(() => { + // Current context size for the active chat: how much the conversation now + // occupies in the model's context window — NOT the cumulative tokens spent. + // We read the most recent assistant row that carries a context figure: + // `contextTokens` (final-step input+output) for chats recorded after this + // shipped; older rows fall back to that turn's `usage` total. NOTE: reflects + // PERSISTED rows (updates on chat open/switch); it does not tick live + // mid-stream — acceptable for v1. + const contextTokens = useMemo(() => { if (!activeChatId || !messageRows) return 0; - return messageRows.reduce((sum, row) => { - const usage = row.metadata?.usage; - if (!usage) return sum; - const rowTokens = - usage.totalTokens ?? - (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0); - return sum + rowTokens; - }, 0); + for (let i = messageRows.length - 1; i >= 0; i--) { + const meta = messageRows[i].metadata; + if (!meta) continue; + if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) { + return meta.contextTokens; + } + const usage = meta.usage; + if (usage) { + const fallback = + usage.totalTokens ?? + (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0); + if (fallback > 0) return fallback; + } + } + return 0; }, [activeChatId, messageRows]); // On (re)open, settle the geometry before paint (useLayoutEffect → no @@ -333,9 +344,9 @@ export default function AiChatWindow() { {t("AI chat")}
- {totalTokens > 0 && ( - - {formatTokens(totalTokens)} + {contextTokens > 0 && ( + + {formatTokens(contextTokens)} )}
diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts index d225242d..21740da5 100644 --- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts +++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts @@ -28,13 +28,19 @@ export interface IAiChatMessageRow { toolCalls?: unknown; metadata?: { parts?: UIMessage["parts"]; - // AI SDK v6 `totalUsage` persisted on assistant rows. Used to sum the token - // count shown in the floating window's header badge. + // AI SDK v6 `totalUsage` persisted on assistant rows. Legacy cumulative + // figure (sum of every step's usage for the turn); kept for back-compat and + // as the fallback for older rows that have no `contextTokens`. usage?: { inputTokens?: number; outputTokens?: number; totalTokens?: number; }; + // Current context size for the turn = final-step (input+output) tokens, i.e. + // how much the conversation occupies in the model's context window after this + // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the + // floating window's header badge. + contextTokens?: number; // Set on an assistant row whose turn ended in a provider/stream error; the // raw provider error text (e.g. "402: ...") for inline display in the thread. error?: string; diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 20105169..3119c3c4 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -246,13 +246,19 @@ export class AiChatService { // something to enforce by silently breaking the agent.) stopWhen: stepCountIs(8), abortSignal: signal, - onFinish: async ({ text, finishReason, totalUsage, steps }) => { + onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { await persistAssistant({ text, toolCalls: serializeSteps(steps), metadata: { finishReason, usage: totalUsage, + // Final-step usage = the context actually fed to the model on the last LLM + // call (full history + tool results) plus the answer it just generated. + // input+output of the FINAL step ≈ the conversation's CURRENT context size, + // distinct from totalUsage which sums every step (cumulative tokens spent). + contextTokens: + (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined, // Persist the FULL set of UIMessage parts for the turn (text + // tool-call/result), so the rebuilt history replays prior tool // context to the model on later turns.