feat(ai-chat): show current context size instead of total tokens spent

The floating AI-chat header badge summed metadata.usage (AI SDK totalUsage, all steps) across every assistant row, showing the cumulative tokens SPENT — which grows each turn as history is re-sent. Replace it with the conversation's CURRENT context size. - server: persist metadata.contextTokens in streamText onFinish from the final-step `usage` (inputTokens + outputTokens ≈ current context window occupancy); keep usage: totalUsage for back-compat/fallback - client: derive the badge from the most recent assistant row's contextTokens (fallback to that row's usage total for older chats) instead of summing all rows - types: add metadata.contextTokens to IAiChatMessageRow - i18n: rename badge label "Tokens used in this chat" -> "Current context size" (en-US) No DB migration needed (metadata is a JSON column).
2026-06-18 19:54:34 +03:00
parent 411671bad2
commit f96df1c540
4 changed files with 42 additions and 19 deletions
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -1123,7 +1123,7 @@
  "Create subpage of {{name}}": "Create subpage of {{name}}",
  "AI chat": "AI chat",
  "Minimize": "Minimize",
-  "Tokens used in this chat": "Tokens used in this chat",
+  "Current context size": "Current context size",
  "AI agent": "AI agent",
  "AI agent is typing…": "AI agent is typing…",
  "Send": "Send",
--- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
+++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
@@ -204,19 +204,30 @@ export default function AiChatWindow() {
  const threadKey = activeChatId ?? "new";
  const waitingForHistory = activeChatId !== null && messagesLoading;

-  // Sum of persisted token usage for the active chat. NOTE: this reflects the
-  // PERSISTED rows for the active chat (updates on chat open/switch); it does
-  // not tick live mid-stream — acceptable for v1.
-  const totalTokens = useMemo(() => {
+  // Current context size for the active chat: how much the conversation now
+  // occupies in the model's context window — NOT the cumulative tokens spent.
+  // We read the most recent assistant row that carries a context figure:
+  // `contextTokens` (final-step input+output) for chats recorded after this
+  // shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
+  // PERSISTED rows (updates on chat open/switch); it does not tick live
+  // mid-stream — acceptable for v1.
+  const contextTokens = useMemo(() => {
    if (!activeChatId || !messageRows) return 0;
-    return messageRows.reduce((sum, row) => {
-      const usage = row.metadata?.usage;
-      if (!usage) return sum;
-      const rowTokens =
-        usage.totalTokens ??
-        (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
-      return sum + rowTokens;
-    }, 0);
+    for (let i = messageRows.length - 1; i >= 0; i--) {
+      const meta = messageRows[i].metadata;
+      if (!meta) continue;
+      if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
+        return meta.contextTokens;
+      }
+      const usage = meta.usage;
+      if (usage) {
+        const fallback =
+          usage.totalTokens ??
+          (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
+        if (fallback > 0) return fallback;
+      }
+    }
+    return 0;
  }, [activeChatId, messageRows]);

  // On (re)open, settle the geometry before paint (useLayoutEffect → no
@@ -333,9 +344,9 @@ export default function AiChatWindow() {
        <span className={classes.title}>{t("AI chat")}</span>

        <div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
-          {totalTokens > 0 && (
-            <Tooltip label={t("Tokens used in this chat")} withArrow>
-              <span className={classes.badge}>{formatTokens(totalTokens)}</span>
+          {contextTokens > 0 && (
+            <Tooltip label={t("Current context size")} withArrow>
+              <span className={classes.badge}>{formatTokens(contextTokens)}</span>
            </Tooltip>
          )}
        </div>
--- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts
+++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts
@@ -28,13 +28,19 @@ export interface IAiChatMessageRow {
  toolCalls?: unknown;
  metadata?: {
    parts?: UIMessage["parts"];
-    // AI SDK v6 `totalUsage` persisted on assistant rows. Used to sum the token
-    // count shown in the floating window's header badge.
+    // AI SDK v6 `totalUsage` persisted on assistant rows. Legacy cumulative
+    // figure (sum of every step's usage for the turn); kept for back-compat and
+    // as the fallback for older rows that have no `contextTokens`.
    usage?: {
      inputTokens?: number;
      outputTokens?: number;
      totalTokens?: number;
    };
+    // Current context size for the turn = final-step (input+output) tokens, i.e.
+    // how much the conversation occupies in the model's context window after this
+    // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
+    // floating window's header badge.
+    contextTokens?: number;
    // Set on an assistant row whose turn ended in a provider/stream error; the
    // raw provider error text (e.g. "402: ...") for inline display in the thread.
    error?: string;
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -246,13 +246,19 @@ export class AiChatService {
      // something to enforce by silently breaking the agent.)
      stopWhen: stepCountIs(8),
      abortSignal: signal,
-      onFinish: async ({ text, finishReason, totalUsage, steps }) => {
+      onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
        await persistAssistant({
          text,
          toolCalls: serializeSteps(steps),
          metadata: {
            finishReason,
            usage: totalUsage,
+            // Final-step usage = the context actually fed to the model on the last LLM
+            // call (full history + tool results) plus the answer it just generated.
+            // input+output of the FINAL step ≈ the conversation's CURRENT context size,
+            // distinct from totalUsage which sums every step (cumulative tokens spent).
+            contextTokens:
+              (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
            // Persist the FULL set of UIMessage parts for the turn (text +
            // tool-call/result), so the rebuilt history replays prior tool
            // context to the model on later turns.