diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json
index 87e8af42..21f7c5f7 100644
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -1123,7 +1123,7 @@
   "Create subpage of {{name}}": "Create subpage of {{name}}",
   "AI chat": "AI chat",
   "Minimize": "Minimize",
-  "Tokens used in this chat": "Tokens used in this chat",
+  "Current context size": "Current context size",
   "AI agent": "AI agent",
   "AI agent is typing…": "AI agent is typing…",
   "Send": "Send",
diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
index 664aa6ff..122f80ff 100644
--- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
+++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx
@@ -204,19 +204,30 @@ export default function AiChatWindow() {
   const threadKey = activeChatId ?? "new";
   const waitingForHistory = activeChatId !== null && messagesLoading;
 
-  // Sum of persisted token usage for the active chat. NOTE: this reflects the
-  // PERSISTED rows for the active chat (updates on chat open/switch); it does
-  // not tick live mid-stream — acceptable for v1.
-  const totalTokens = useMemo(() => {
+  // Current context size for the active chat: how much the conversation now
+  // occupies in the model's context window — NOT the cumulative tokens spent.
+  // We read the most recent assistant row that carries a context figure:
+  // `contextTokens` (final-step input+output) for chats recorded after this
+  // shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
+  // PERSISTED rows (updates on chat open/switch); it does not tick live
+  // mid-stream — acceptable for v1.
+  const contextTokens = useMemo(() => {
     if (!activeChatId || !messageRows) return 0;
-    return messageRows.reduce((sum, row) => {
-      const usage = row.metadata?.usage;
-      if (!usage) return sum;
-      const rowTokens =
-        usage.totalTokens ??
-        (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
-      return sum + rowTokens;
-    }, 0);
+    for (let i = messageRows.length - 1; i >= 0; i--) {
+      const meta = messageRows[i].metadata;
+      if (!meta) continue;
+      if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
+        return meta.contextTokens;
+      }
+      const usage = meta.usage;
+      if (usage) {
+        const fallback =
+          usage.totalTokens ??
+          (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
+        if (fallback > 0) return fallback;
+      }
+    }
+    return 0;
   }, [activeChatId, messageRows]);
 
   // On (re)open, settle the geometry before paint (useLayoutEffect → no
@@ -333,9 +344,9 @@ export default function AiChatWindow() {
         <span className={classes.title}>{t("AI chat")}</span>
 
         <div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
-          {totalTokens > 0 && (
-            <Tooltip label={t("Tokens used in this chat")} withArrow>
-              <span className={classes.badge}>{formatTokens(totalTokens)}</span>
+          {contextTokens > 0 && (
+            <Tooltip label={t("Current context size")} withArrow>
+              <span className={classes.badge}>{formatTokens(contextTokens)}</span>
             </Tooltip>
           )}
         </div>
diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts
index d225242d..21740da5 100644
--- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts
+++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts
@@ -28,13 +28,19 @@ export interface IAiChatMessageRow {
   toolCalls?: unknown;
   metadata?: {
     parts?: UIMessage["parts"];
-    // AI SDK v6 `totalUsage` persisted on assistant rows. Used to sum the token
-    // count shown in the floating window's header badge.
+    // AI SDK v6 `totalUsage` persisted on assistant rows. Legacy cumulative
+    // figure (sum of every step's usage for the turn); kept for back-compat and
+    // as the fallback for older rows that have no `contextTokens`.
     usage?: {
       inputTokens?: number;
       outputTokens?: number;
       totalTokens?: number;
     };
+    // Current context size for the turn = final-step (input+output) tokens, i.e.
+    // how much the conversation occupies in the model's context window after this
+    // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
+    // floating window's header badge.
+    contextTokens?: number;
     // Set on an assistant row whose turn ended in a provider/stream error; the
     // raw provider error text (e.g. "402: ...") for inline display in the thread.
     error?: string;
diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts
index 20105169..3119c3c4 100644
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -246,13 +246,19 @@ export class AiChatService {
       // something to enforce by silently breaking the agent.)
       stopWhen: stepCountIs(8),
       abortSignal: signal,
-      onFinish: async ({ text, finishReason, totalUsage, steps }) => {
+      onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
         await persistAssistant({
           text,
           toolCalls: serializeSteps(steps),
           metadata: {
             finishReason,
             usage: totalUsage,
+            // Final-step usage = the context actually fed to the model on the last LLM
+            // call (full history + tool results) plus the answer it just generated.
+            // input+output of the FINAL step ≈ the conversation's CURRENT context size,
+            // distinct from totalUsage which sums every step (cumulative tokens spent).
+            contextTokens:
+              (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
             // Persist the FULL set of UIMessage parts for the turn (text +
             // tool-call/result), so the rebuilt history replays prior tool
             // context to the model on later turns.