Compare commits

...

1 Commits

Author SHA1 Message Date
claude code agent 227
21a9c4d89d fix(ai-chat): tick the live token counter between agent steps (#163)
The header token badge (and the "Thinking… · N tokens" line) froze between
agent steps and jumped in chunks instead of ticking smoothly. liveTurnTokens
returned the authoritative server `usage` VERBATIM as soon as it appeared, but
the server only attaches usage at a step boundary and it is cumulative over
COMPLETED steps — so during the next (in-flight) step the figure stayed frozen
at the previous boundary and the running text estimate was ignored.

Combine both sources per component via max: always compute the running estimate
(chars/≈4 over the message's reasoning/text parts, which includes the in-flight
step) and take max(authoritativeBase, estimate). Between boundaries the estimate
ticks the number up; at a boundary the authoritative figure snaps it exact; and
because the server usage is cumulative and we only ever take the max, the counter
is monotonic (never drops). Reasoning/output stay split; the #151 reasoning-only
authoritative count is preserved.

Backward compatible: in every existing test the estimate is <= the authoritative
figure, so max returns the same value. +4 tests for the in-flight-step-exceeds-
base case (output + reasoning), the authoritative-wins case, and monotonicity.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 15:12:21 +03:00
2 changed files with 95 additions and 24 deletions

View File

@@ -117,3 +117,55 @@ describe("liveTurnTokens — authoritative path", () => {
expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false }); expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false });
}); });
}); });
describe("liveTurnTokens — combined authoritative + estimate (#163)", () => {
it("ticks the in-flight step above the completed-steps authoritative base", () => {
// The authoritative usage is the sum over COMPLETED steps (step 1). The
// CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in
// the parts -> the running estimate must push the live figure above the base
// so the badge keeps growing between step boundaries.
const longText = "x".repeat(800); // 800 chars -> 200 est output tokens
const r = liveTurnTokens(
msg([{ type: "text", text: longText }], {
usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output
}),
);
// max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen.
expect(r.output).toBe(200);
expect(r.authoritative).toBe(true);
});
it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => {
const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning
const r = liveTurnTokens(
msg([{ type: "reasoning", text: longReasoning }], {
usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 },
}),
);
// reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0.
expect(r.reasoning).toBe(100);
expect(r.output).toBe(0);
expect(r.authoritative).toBe(true);
});
it("snaps to the authoritative figure once it exceeds the rough estimate", () => {
// Short on-screen text (estimate tiny) but a large authoritative output:
// the exact figure wins at the boundary (the counter never under-reports).
const r = liveTurnTokens(
msg([{ type: "text", text: "abcd" }], {
usage: { inputTokens: 10, outputTokens: 5000 },
}),
);
expect(r.output).toBe(5000);
});
it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => {
// Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged.
const r = liveTurnTokens(
msg([{ type: "text", text: "tiny" }], {
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
}),
);
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
});
});

View File

@@ -56,39 +56,58 @@ function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined {
/** /**
* Token split for the given (streaming) assistant message. * Token split for the given (streaming) assistant message.
* *
* Prefers AUTHORITATIVE `metadata.usage` when the server has attached it (at a * COMBINES the authoritative server usage with the running text estimate so the
* step/turn boundary, incl. `reasoningTokens`) — so the live counter snaps to the * counter ticks in real time AND lands exact. The server only attaches
* provider's exact figures. Until then it returns a running ESTIMATE summed over * `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is
* the message parts: `reasoning` parts feed the reasoning estimate, `text` parts * CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step.
* feed the output estimate. Multi-part / multi-step turns accumulate naturally * So a multi-step turn that returned the authoritative figure verbatim would
* because every part of the turn is summed. * FREEZE between boundaries and jump in steps (issue #163).
*
* Instead we always compute the running ESTIMATE (chars/≈4 over the message's
* `reasoning`/`text` parts, which grows on every streamed delta) and take the
* per-component MAX of the authoritative base and the estimate:
* - between boundaries the estimate of the in-flight step ticks the number up;
* - at a boundary the authoritative figure snaps it to exact;
* - because the server's usage is cumulative and we only ever take the max, the
* number is MONOTONIC — it never drops.
* *
* Providers that don't stream reasoning text still surface a reasoning count once * Providers that don't stream reasoning text still surface a reasoning count once
* the authoritative usage arrives (`usage.reasoningTokens`); on the pure estimate * the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure
* path such a turn simply shows `reasoning: 0` until then. * estimate path (no usage yet) such a turn shows `reasoning: 0` until then.
*/ */
export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens { export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens {
if (!message) return { reasoning: 0, output: 0, authoritative: false }; if (!message) return { reasoning: 0, output: 0, authoritative: false };
const usage = metadataUsage(message); // Running ESTIMATE over every reasoning/text part — grows on each delta. This
if (usage) { // includes the IN-FLIGHT step, which the authoritative usage does not cover yet.
// Authoritative branch: outputTokens already INCLUDES reasoning tokens in the let estReasoning = 0;
// AI SDK usage shape, so subtract reasoning out for the "answer" figure (never let estOutput = 0;
// go negative if a provider reports them inconsistently).
const reasoning = usage.reasoningTokens ?? 0;
const totalOutput = usage.outputTokens ?? 0;
const output = Math.max(0, totalOutput - reasoning);
return { reasoning, output, authoritative: true };
}
let reasoning = 0;
let output = 0;
for (const part of message.parts ?? []) { for (const part of message.parts ?? []) {
if (part.type === "reasoning") { if (part.type === "reasoning") {
reasoning += estimateTokens((part as { text?: string }).text ?? ""); estReasoning += estimateTokens((part as { text?: string }).text ?? "");
} else if (part.type === "text") { } else if (part.type === "text") {
output += estimateTokens((part as { text?: string }).text ?? ""); estOutput += estimateTokens((part as { text?: string }).text ?? "");
} }
} }
return { reasoning, output, authoritative: false };
const usage = metadataUsage(message);
if (!usage) {
// No authoritative usage streamed yet: the estimate IS the live figure.
return { reasoning: estReasoning, output: estOutput, authoritative: false };
}
// Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES
// reasoning in the AI SDK usage shape, so subtract it out for the "answer"
// figure (never go negative if a provider reports them inconsistently).
const authReasoning = usage.reasoningTokens ?? 0;
const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning);
// Per-component max: the in-flight step's estimate ticks above the completed-
// steps base between boundaries, and the authoritative figure wins once it
// exceeds the (rough) estimate at the next boundary. Monotonic by construction.
return {
reasoning: Math.max(authReasoning, estReasoning),
output: Math.max(authOutput, estOutput),
authoritative: true,
};
} }