diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts index 62256bc3..3e650f0d 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts @@ -117,3 +117,55 @@ describe("liveTurnTokens — authoritative path", () => { expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false }); }); }); + +describe("liveTurnTokens — combined authoritative + estimate (#163)", () => { + it("ticks the in-flight step above the completed-steps authoritative base", () => { + // The authoritative usage is the sum over COMPLETED steps (step 1). The + // CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in + // the parts -> the running estimate must push the live figure above the base + // so the badge keeps growing between step boundaries. + const longText = "x".repeat(800); // 800 chars -> 200 est output tokens + const r = liveTurnTokens( + msg([{ type: "text", text: longText }], { + usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output + }), + ); + // max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen. + expect(r.output).toBe(200); + expect(r.authoritative).toBe(true); + }); + + it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => { + const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning + const r = liveTurnTokens( + msg([{ type: "reasoning", text: longReasoning }], { + usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 }, + }), + ); + // reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0. + expect(r.reasoning).toBe(100); + expect(r.output).toBe(0); + expect(r.authoritative).toBe(true); + }); + + it("snaps to the authoritative figure once it exceeds the rough estimate", () => { + // Short on-screen text (estimate tiny) but a large authoritative output: + // the exact figure wins at the boundary (the counter never under-reports). + const r = liveTurnTokens( + msg([{ type: "text", text: "abcd" }], { + usage: { inputTokens: 10, outputTokens: 5000 }, + }), + ); + expect(r.output).toBe(5000); + }); + + it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => { + // Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged. + const r = liveTurnTokens( + msg([{ type: "text", text: "tiny" }], { + usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, + }), + ); + expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); + }); +}); diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts index e9cca6bb..9a900996 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts @@ -56,39 +56,58 @@ function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined { /** * Token split for the given (streaming) assistant message. * - * Prefers AUTHORITATIVE `metadata.usage` when the server has attached it (at a - * step/turn boundary, incl. `reasoningTokens`) — so the live counter snaps to the - * provider's exact figures. Until then it returns a running ESTIMATE summed over - * the message parts: `reasoning` parts feed the reasoning estimate, `text` parts - * feed the output estimate. Multi-part / multi-step turns accumulate naturally - * because every part of the turn is summed. + * COMBINES the authoritative server usage with the running text estimate so the + * counter ticks in real time AND lands exact. The server only attaches + * `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is + * CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step. + * So a multi-step turn that returned the authoritative figure verbatim would + * FREEZE between boundaries and jump in steps (issue #163). + * + * Instead we always compute the running ESTIMATE (chars/≈4 over the message's + * `reasoning`/`text` parts, which grows on every streamed delta) and take the + * per-component MAX of the authoritative base and the estimate: + * - between boundaries the estimate of the in-flight step ticks the number up; + * - at a boundary the authoritative figure snaps it to exact; + * - because the server's usage is cumulative and we only ever take the max, the + * number is MONOTONIC — it never drops. * * Providers that don't stream reasoning text still surface a reasoning count once - * the authoritative usage arrives (`usage.reasoningTokens`); on the pure estimate - * path such a turn simply shows `reasoning: 0` until then. + * the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure + * estimate path (no usage yet) such a turn shows `reasoning: 0` until then. */ export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens { if (!message) return { reasoning: 0, output: 0, authoritative: false }; - const usage = metadataUsage(message); - if (usage) { - // Authoritative branch: outputTokens already INCLUDES reasoning tokens in the - // AI SDK usage shape, so subtract reasoning out for the "answer" figure (never - // go negative if a provider reports them inconsistently). - const reasoning = usage.reasoningTokens ?? 0; - const totalOutput = usage.outputTokens ?? 0; - const output = Math.max(0, totalOutput - reasoning); - return { reasoning, output, authoritative: true }; - } - - let reasoning = 0; - let output = 0; + // Running ESTIMATE over every reasoning/text part — grows on each delta. This + // includes the IN-FLIGHT step, which the authoritative usage does not cover yet. + let estReasoning = 0; + let estOutput = 0; for (const part of message.parts ?? []) { if (part.type === "reasoning") { - reasoning += estimateTokens((part as { text?: string }).text ?? ""); + estReasoning += estimateTokens((part as { text?: string }).text ?? ""); } else if (part.type === "text") { - output += estimateTokens((part as { text?: string }).text ?? ""); + estOutput += estimateTokens((part as { text?: string }).text ?? ""); } } - return { reasoning, output, authoritative: false }; + + const usage = metadataUsage(message); + if (!usage) { + // No authoritative usage streamed yet: the estimate IS the live figure. + return { reasoning: estReasoning, output: estOutput, authoritative: false }; + } + + // Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES + // reasoning in the AI SDK usage shape, so subtract it out for the "answer" + // figure (never go negative if a provider reports them inconsistently). + const authReasoning = usage.reasoningTokens ?? 0; + const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning); + + // Per-component max: the in-flight step's estimate ticks above the completed- + // steps base between boundaries, and the authoritative figure wins once it + // exceeds the (rough) estimate at the next boundary. Monotonic by construction. + return { + reasoning: Math.max(authReasoning, estReasoning), + output: Math.max(authOutput, estOutput), + authoritative: true, + }; }