feat(ai-chat): show current context size instead of total tokens spent
The floating AI-chat header badge summed metadata.usage (AI SDK totalUsage, all steps) across every assistant row, showing the cumulative tokens SPENT — which grows each turn as history is re-sent. Replace it with the conversation's CURRENT context size. - server: persist metadata.contextTokens in streamText onFinish from the final-step `usage` (inputTokens + outputTokens ≈ current context window occupancy); keep usage: totalUsage for back-compat/fallback - client: derive the badge from the most recent assistant row's contextTokens (fallback to that row's usage total for older chats) instead of summing all rows - types: add metadata.contextTokens to IAiChatMessageRow - i18n: rename badge label "Tokens used in this chat" -> "Current context size" (en-US) No DB migration needed (metadata is a JSON column).
This commit is contained in:
@@ -1123,7 +1123,7 @@
|
||||
"Create subpage of {{name}}": "Create subpage of {{name}}",
|
||||
"AI chat": "AI chat",
|
||||
"Minimize": "Minimize",
|
||||
"Tokens used in this chat": "Tokens used in this chat",
|
||||
"Current context size": "Current context size",
|
||||
"AI agent": "AI agent",
|
||||
"AI agent is typing…": "AI agent is typing…",
|
||||
"Send": "Send",
|
||||
|
||||
@@ -204,19 +204,30 @@ export default function AiChatWindow() {
|
||||
const threadKey = activeChatId ?? "new";
|
||||
const waitingForHistory = activeChatId !== null && messagesLoading;
|
||||
|
||||
// Sum of persisted token usage for the active chat. NOTE: this reflects the
|
||||
// PERSISTED rows for the active chat (updates on chat open/switch); it does
|
||||
// not tick live mid-stream — acceptable for v1.
|
||||
const totalTokens = useMemo(() => {
|
||||
// Current context size for the active chat: how much the conversation now
|
||||
// occupies in the model's context window — NOT the cumulative tokens spent.
|
||||
// We read the most recent assistant row that carries a context figure:
|
||||
// `contextTokens` (final-step input+output) for chats recorded after this
|
||||
// shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
|
||||
// PERSISTED rows (updates on chat open/switch); it does not tick live
|
||||
// mid-stream — acceptable for v1.
|
||||
const contextTokens = useMemo(() => {
|
||||
if (!activeChatId || !messageRows) return 0;
|
||||
return messageRows.reduce((sum, row) => {
|
||||
const usage = row.metadata?.usage;
|
||||
if (!usage) return sum;
|
||||
const rowTokens =
|
||||
for (let i = messageRows.length - 1; i >= 0; i--) {
|
||||
const meta = messageRows[i].metadata;
|
||||
if (!meta) continue;
|
||||
if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
|
||||
return meta.contextTokens;
|
||||
}
|
||||
const usage = meta.usage;
|
||||
if (usage) {
|
||||
const fallback =
|
||||
usage.totalTokens ??
|
||||
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
|
||||
return sum + rowTokens;
|
||||
}, 0);
|
||||
if (fallback > 0) return fallback;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}, [activeChatId, messageRows]);
|
||||
|
||||
// On (re)open, settle the geometry before paint (useLayoutEffect → no
|
||||
@@ -333,9 +344,9 @@ export default function AiChatWindow() {
|
||||
<span className={classes.title}>{t("AI chat")}</span>
|
||||
|
||||
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
|
||||
{totalTokens > 0 && (
|
||||
<Tooltip label={t("Tokens used in this chat")} withArrow>
|
||||
<span className={classes.badge}>{formatTokens(totalTokens)}</span>
|
||||
{contextTokens > 0 && (
|
||||
<Tooltip label={t("Current context size")} withArrow>
|
||||
<span className={classes.badge}>{formatTokens(contextTokens)}</span>
|
||||
</Tooltip>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -28,13 +28,19 @@ export interface IAiChatMessageRow {
|
||||
toolCalls?: unknown;
|
||||
metadata?: {
|
||||
parts?: UIMessage["parts"];
|
||||
// AI SDK v6 `totalUsage` persisted on assistant rows. Used to sum the token
|
||||
// count shown in the floating window's header badge.
|
||||
// AI SDK v6 `totalUsage` persisted on assistant rows. Legacy cumulative
|
||||
// figure (sum of every step's usage for the turn); kept for back-compat and
|
||||
// as the fallback for older rows that have no `contextTokens`.
|
||||
usage?: {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
};
|
||||
// Current context size for the turn = final-step (input+output) tokens, i.e.
|
||||
// how much the conversation occupies in the model's context window after this
|
||||
// turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
|
||||
// floating window's header badge.
|
||||
contextTokens?: number;
|
||||
// Set on an assistant row whose turn ended in a provider/stream error; the
|
||||
// raw provider error text (e.g. "402: ...") for inline display in the thread.
|
||||
error?: string;
|
||||
|
||||
@@ -246,13 +246,19 @@ export class AiChatService {
|
||||
// something to enforce by silently breaking the agent.)
|
||||
stopWhen: stepCountIs(8),
|
||||
abortSignal: signal,
|
||||
onFinish: async ({ text, finishReason, totalUsage, steps }) => {
|
||||
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
||||
await persistAssistant({
|
||||
text,
|
||||
toolCalls: serializeSteps(steps),
|
||||
metadata: {
|
||||
finishReason,
|
||||
usage: totalUsage,
|
||||
// Final-step usage = the context actually fed to the model on the last LLM
|
||||
// call (full history + tool results) plus the answer it just generated.
|
||||
// input+output of the FINAL step ≈ the conversation's CURRENT context size,
|
||||
// distinct from totalUsage which sums every step (cumulative tokens spent).
|
||||
contextTokens:
|
||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined,
|
||||
// Persist the FULL set of UIMessage parts for the turn (text +
|
||||
// tool-call/result), so the rebuilt history replays prior tool
|
||||
// context to the model on later turns.
|
||||
|
||||
Reference in New Issue
Block a user