From d88fe4cde7defdaeac65ee583558ad00356acf31 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Fri, 26 Jun 2026 06:27:45 +0300 Subject: [PATCH] feat(ai-chat): context badge shows current/max (#189) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header badge in the floating AI-chat window flipped meaning between states (a live per-turn token counter while streaming vs. the context size at rest), which made it "reset to 1" on each prompt and confused users. Make it consistently show the current context size, with the model's context window as an optional "/ max" denominator. The max comes from a new admin-set AI setting (chatContextWindow, in tokens) — provider-independent and always exact. The server stamps it onto the assistant message metadata (maxContextTokens) next to contextTokens, so the client reads both from the last row with no client-side model resolution (survives shares / future per-role models). - server: chatContextWindow in AiProviderSettings/keys/masked/resolved, DTO (@IsInt @Min(0)), settings-service resolve/getMasked, repo parity allowlist; flushAssistant writes metadata.maxContextTokens when > 0. - client: ContextBadge component (extracted, shows "current [/ max]", no live mode); removed the liveTurnTokens header path + dead util fn; Context-window NumberInput in AI settings; i18n strings. - live "Thinking · N tokens" feedback in the chat body is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../public/locales/en-US/translation.json | 5 +- .../public/locales/ru-RU/translation.json | 5 +- .../ai-chat/components/ai-chat-window.tsx | 57 +++---- .../ai-chat/components/chat-thread.tsx | 55 ------ .../ai-chat/components/context-badge.test.tsx | 69 ++++++++ .../ai-chat/components/context-badge.tsx | 61 +++++++ .../features/ai-chat/types/ai-chat.types.ts | 9 +- .../ai-chat/utils/count-stream-tokens.test.ts | 158 +----------------- .../ai-chat/utils/count-stream-tokens.ts | 107 +----------- .../components/ai-provider-settings.tsx | 30 ++++ .../workspace/services/ai-settings-service.ts | 5 + .../src/core/ai-chat/ai-chat.service.spec.ts | 20 +++ .../src/core/ai-chat/ai-chat.service.ts | 10 ++ .../repos/workspace/workspace.repo.ts | 1 + .../ai/ai-provider-settings-keys.spec.ts | 32 ++++ .../integrations/ai/ai-settings.service.ts | 5 + apps/server/src/integrations/ai/ai.types.ts | 14 ++ .../ai/dto/update-ai-settings.dto.ts | 9 +- 18 files changed, 305 insertions(+), 347 deletions(-) create mode 100644 apps/client/src/features/ai-chat/components/context-badge.test.tsx create mode 100644 apps/client/src/features/ai-chat/components/context-badge.tsx diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index bd8c4ed3..b9f4fc17 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -1168,7 +1168,10 @@ "Built-in assistant persona": "Built-in assistant persona", "Minimize": "Minimize", "Current context size": "Current context size", - "Tokens generated this turn": "Tokens generated this turn", + "Context size / model limit": "Context size / model limit", + "Context window (tokens)": "Context window (tokens)", + "Shows used / total in the chat header badge; empty hides the total.": "Shows used / total in the chat header badge; empty hides the total.", + "e.g. 200000": "e.g. 200000", "AI agent": "AI agent", "Take a look at the current document": "Take a look at the current document", "AI agent is typing…": "AI agent is typing…", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index f8c59436..1845bc2b 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -705,7 +705,10 @@ "Copy chat": "Копировать чат", "Created successfully": "Успешно создано", "Current context size": "Текущий размер контекста", - "Tokens generated this turn": "Токенов сгенерировано за ход", + "Context size / model limit": "Размер контекста / лимит модели", + "Context window (tokens)": "Размер окна контекста (токены)", + "Shows used / total in the chat header badge; empty hides the total.": "Показывает использовано/всего в шапке чата; пусто — скрыть лимит.", + "e.g. 200000": "напр. 200000", "Delete this chat?": "Удалить этот чат?", "Deleted successfully": "Успешно удалено", "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index de0b9923..ecdf1397 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -6,7 +6,7 @@ import { useRef, useState, } from "react"; -import { Group, Loader, Tooltip } from "@mantine/core"; +import { Group, Loader } from "@mantine/core"; import { IconArrowsDiagonal, IconCheck, @@ -39,6 +39,7 @@ import { } from "@/features/ai-chat/queries/ai-chat-query.ts"; import ConversationList from "@/features/ai-chat/components/conversation-list.tsx"; import ChatThread from "@/features/ai-chat/components/chat-thread.tsx"; +import { ContextBadge } from "@/features/ai-chat/components/context-badge.tsx"; import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts"; import { useChatSession } from "@/features/ai-chat/hooks/use-chat-session.ts"; import { @@ -60,13 +61,6 @@ const MIN_HEIGHT = 400; // Margin kept between the window and the viewport edges while dragging. const EDGE_MARGIN = 8; -/** Compact token formatter: 1.2M / 3.4k / 950. */ -function formatTokens(n: number): string { - if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; - if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`; - return String(n); -} - // Compute the initial top-right placement at the default size, fitted to the // current viewport. Reads `window` only when called (inside an effect). function computeInitialGeom() { @@ -161,12 +155,6 @@ export default function AiChatWindow() { const { data: messageRows, isLoading: messagesLoading } = useAiChatMessagesQuery(activeChatId ?? undefined); - // Live turn-token total (reasoning + output) for the in-flight turn, pushed up - // (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream. - // `null` means no turn is in flight -> the badge falls back to the persisted - // context size below. - const [liveTurnTokens, setLiveTurnTokens] = useState(null); - // The page the user is currently viewing. AiChatWindow lives in a pathless // parent layout route, so useParams() can't see :pageSlug. Match the full // pathname against the authenticated page route instead so "the current page" @@ -306,6 +294,21 @@ export default function AiChatWindow() { return 0; }, [activeChatId, messageRows]); + // The model's context-window size (badge denominator), read from the most + // recent assistant row that carries it. Admin-configured in AI settings and + // stamped onto the turn server-side, so it travels with the message metadata — + // no client-side model resolution, and it survives public shares / per-role + // models automatically. 0 (no limit configured, or older rows) → the badge + // hides the denominator and shows only the current context size. + const maxContextTokens = useMemo(() => { + if (!activeChatId || !messageRows) return 0; + for (let i = messageRows.length - 1; i >= 0; i--) { + const max = messageRows[i].metadata?.maxContextTokens; + if (typeof max === "number" && max > 0) return max; + } + return 0; + }, [activeChatId, messageRows]); + // On (re)open, settle the geometry before paint (useLayoutEffect → no // first-frame jump): compute an initial top-right placement the first time, // and re-clamp an existing geometry to the current viewport on later opens @@ -495,23 +498,14 @@ export default function AiChatWindow() { )}
- {/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz); - once it finishes, fall back to the persisted context size. Require - > 0 so the very first emit (an empty tail message, count 0) does not - flash a "0" badge before any token streams in (#151 review). */} - {liveTurnTokens !== null && liveTurnTokens > 0 ? ( - - - {formatTokens(liveTurnTokens)} - - - ) : contextTokens > 0 ? ( - - - {formatTokens(contextTokens)} - - - ) : null} + {/* Context badge: always "current / max" context size (or just current + when no model limit is configured). It no longer flips to a live + per-turn generation counter mid-stream — that live feedback lives in + the chat body's "Thinking · N tokens" block. */} +
@@ -634,7 +628,6 @@ export default function AiChatWindow() { assistantName={currentRole?.name} onTurnFinished={onTurnFinished} onServerChatId={onServerChatId} - onLiveTurnTokens={setLiveTurnTokens} /> )}
diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index c906a940..14f9a2ad 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -20,7 +20,6 @@ import { } from "@/features/ai-chat/utils/role-launch.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts"; -import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { dequeue, enqueueMessage, @@ -67,12 +66,6 @@ interface ChatThreadProps { * Copy/export button available mid-stream). Distinct from onTurnFinished, * which fires only at the terminal outcome. */ onServerChatId?: (serverChatId?: string) => void; - /** Reports the live turn-token total (reasoning + output) for the in-flight - * turn so the parent can show a header badge that ticks mid-stream. THROTTLED - * here (~8 Hz) so the parent re-renders a handful of times a second, not on - * every streamed delta. Called with `null` when no turn is in flight (the - * parent then reverts the badge to the persisted context size). */ - onLiveTurnTokens?: (tokens: number | null) => void; } /** @@ -117,7 +110,6 @@ export default function ChatThread({ assistantName, onTurnFinished, onServerChatId, - onLiveTurnTokens, }: ChatThreadProps) { const { t } = useTranslation(); @@ -328,53 +320,6 @@ export default function ChatThread({ // the SAME on-screen banner text can be mirrored into the export (issue #160). const errorView = error ? describeChatError(error.message ?? "", t) : null; - // Report the live turn-token total to the parent header badge, THROTTLED to - // ~8 Hz so the parent re-renders a few times a second instead of on every - // streamed delta. The tail assistant message's reasoning+output (estimate while - // streaming, authoritative once a step reports usage) is the live figure. When - // the turn ends we emit a final exact value, then `null` so the parent reverts - // the badge to the persisted context size. - const lastEmitRef = useRef(0); - const emitTimerRef = useRef | null>(null); - useEffect(() => { - if (!onLiveTurnTokens) return; - if (!isStreaming) { - // Turn ended (or never started): clear any pending throttle and revert. - if (emitTimerRef.current) { - clearTimeout(emitTimerRef.current); - emitTimerRef.current = null; - } - lastEmitRef.current = 0; - onLiveTurnTokens(null); - return; - } - const tail = messages[messages.length - 1]; - const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null; - const total = live ? live.reasoning + live.output : 0; - const now = Date.now(); - const MIN_INTERVAL = 120; // ms (~8 Hz) - const elapsed = now - lastEmitRef.current; - if (elapsed >= MIN_INTERVAL) { - lastEmitRef.current = now; - onLiveTurnTokens(total); - } else if (!emitTimerRef.current) { - // Schedule a trailing emit so the FINAL value of a burst is not dropped. - emitTimerRef.current = setTimeout(() => { - emitTimerRef.current = null; - lastEmitRef.current = Date.now(); - onLiveTurnTokens(total); - }, MIN_INTERVAL - elapsed); - } - }, [messages, isStreaming, onLiveTurnTokens]); - - // Clear any pending throttle timer on unmount (chat switch via `key`) so a - // trailing emit can't fire into a torn-down thread's parent. - useEffect(() => { - return () => { - if (emitTimerRef.current) clearTimeout(emitTimerRef.current); - }; - }, []); - // A role was picked with autoStart=false: the role is bound but NOTHING was // sent, so chatId stays null and the empty state would keep showing the cards. // This flag hides the cards and reveals the composer (with the role indicated) diff --git a/apps/client/src/features/ai-chat/components/context-badge.test.tsx b/apps/client/src/features/ai-chat/components/context-badge.test.tsx new file mode 100644 index 00000000..f92ef2a7 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/context-badge.test.tsx @@ -0,0 +1,69 @@ +import { describe, it, expect } from "vitest"; +import { render, screen, fireEvent } from "@testing-library/react"; +import { MantineProvider } from "@mantine/core"; +import { ContextBadge, formatTokens } from "./context-badge"; + +// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. +// Without an I18nextProvider, `t(key)` returns the key verbatim, so tooltip +// labels assert against their English source strings. + +function renderBadge(props: { + contextTokens: number; + maxContextTokens?: number; +}) { + return render( + + + , + ); +} + +describe("formatTokens", () => { + it("formats with k / M suffixes", () => { + expect(formatTokens(572)).toBe("572"); + expect(formatTokens(200_000)).toBe("200.0k"); + expect(formatTokens(1_500_000)).toBe("1.5M"); + }); +}); + +describe("ContextBadge", () => { + it("shows `current / max` when a limit is configured", () => { + renderBadge({ contextTokens: 572, maxContextTokens: 200_000 }); + expect(screen.getByText("572 / 200.0k")).toBeDefined(); + }); + + it("shows only the current size when no limit is configured", () => { + renderBadge({ contextTokens: 572, maxContextTokens: 0 }); + expect(screen.getByText("572")).toBeDefined(); + // No denominator rendered. + expect(screen.queryByText(/\//)).toBeNull(); + }); + + it("treats an undefined limit as no limit", () => { + renderBadge({ contextTokens: 1234 }); + expect(screen.getByText("1.2k")).toBeDefined(); + expect(screen.queryByText(/\//)).toBeNull(); + }); + + it("renders nothing until there is a current context size", () => { + const { container } = renderBadge({ + contextTokens: 0, + maxContextTokens: 200_000, + }); + expect(container.querySelector("span")).toBeNull(); + }); + + it("never flips to a live per-turn counter (no live mode); shows context as-is even above max", () => { + // `current > max` (estimate drift / smaller-model role) is shown unclamped. + renderBadge({ contextTokens: 210_000, maxContextTokens: 200_000 }); + expect(screen.getByText("210.0k / 200.0k")).toBeDefined(); + }); + + it("exposes the limit tooltip label on hover", async () => { + renderBadge({ contextTokens: 572, maxContextTokens: 200_000 }); + fireEvent.mouseEnter(screen.getByText("572 / 200.0k")); + expect( + await screen.findByText("Context size / model limit"), + ).toBeDefined(); + }); +}); diff --git a/apps/client/src/features/ai-chat/components/context-badge.tsx b/apps/client/src/features/ai-chat/components/context-badge.tsx new file mode 100644 index 00000000..0f2538d7 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/context-badge.tsx @@ -0,0 +1,61 @@ +import { Tooltip } from "@mantine/core"; +import { useTranslation } from "react-i18next"; +import classes from "@/features/ai-chat/components/ai-chat-window.module.css"; + +/** Compact token formatter: 1.2M / 3.4k / 950. */ +export function formatTokens(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`; + return String(n); +} + +interface ContextBadgeProps { + // Current context size for the active chat (tokens occupied in the model's + // window). 0 = unknown → nothing is rendered. + contextTokens: number; + // The model's context-window size (tokens), from AI settings. 0/undefined = + // no limit known → only the current size is shown (no denominator). + maxContextTokens?: number; +} + +/** + * Header badge that ALWAYS shows the current context size, and — when the model's + * context-window size is configured — appends "/ max" so the badge reads + * "current / max" (e.g. `572 / 200k`). This is a single, stable meaning: unlike + * the previous design it never flips to a live per-turn generation counter while + * streaming (that live feedback lives in the chat body's "Thinking · N tokens"). + * + * No limit configured (or older history rows without it) → the denominator is + * hidden and the badge shows the current size only, matching the prior at-rest + * behaviour. `context > max` (estimate drift, or a role on a smaller model) is + * shown as-is, without clamping. + */ +export function ContextBadge({ + contextTokens, + maxContextTokens, +}: ContextBadgeProps) { + const { t } = useTranslation(); + + // Nothing to show until the first persisted context figure exists. + if (!(contextTokens > 0)) return null; + + const hasMax = typeof maxContextTokens === "number" && maxContextTokens > 0; + const label = hasMax + ? `${formatTokens(contextTokens)} / ${formatTokens(maxContextTokens)}` + : formatTokens(contextTokens); + + return ( + + {label} + + ); +} + +export default ContextBadge; diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts index af595917..22e32f15 100644 --- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts +++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts @@ -113,9 +113,14 @@ export interface IAiChatMessageRow { }; // Current context size for the turn = final-step (input+output) tokens, i.e. // how much the conversation occupies in the model's context window after this - // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the - // floating window's header badge. + // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown as the + // numerator of the floating window's "current / max" header badge. contextTokens?: number; + // The model's context-window size (tokens), admin-configured in AI settings + // and stamped onto the turn server-side. The denominator of the header badge. + // Absent/0 (older rows, or no limit configured) → the badge hides the + // denominator and shows only the current context size (`contextTokens`). + maxContextTokens?: number; // Set on an assistant row whose turn ended in a provider/stream error; the // raw provider error text (e.g. "402: ...") for inline display in the thread. error?: string; diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts index 3e650f0d..6b00fbc4 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts @@ -1,17 +1,5 @@ import { describe, expect, it } from "vitest"; -import type { UIMessage } from "@ai-sdk/react"; -import { - estimateTokens, - liveTurnTokens, -} from "@/features/ai-chat/utils/count-stream-tokens.ts"; - -const msg = (parts: unknown[], metadata?: unknown): UIMessage => - ({ - id: Math.random().toString(), - role: "assistant", - parts, - metadata, - }) as UIMessage; +import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; describe("estimateTokens", () => { it("returns 0 for the empty string", () => { @@ -25,147 +13,3 @@ describe("estimateTokens", () => { expect(estimateTokens("12345678")).toBe(2); }); }); - -describe("liveTurnTokens — estimate path", () => { - it("is all zeros for an undefined message", () => { - expect(liveTurnTokens(undefined)).toEqual({ - reasoning: 0, - output: 0, - authoritative: false, - }); - }); - - it("is all zeros for a parts-less message", () => { - expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({ - reasoning: 0, - output: 0, - authoritative: false, - }); - }); - - it("estimates output from text parts", () => { - // 8 chars -> 2 tokens. - const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }])); - expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false }); - }); - - it("estimates reasoning from reasoning parts (kept separate from output)", () => { - const r = liveTurnTokens( - msg([ - { type: "reasoning", text: "12345678" }, - { type: "text", text: "abcd" }, - ]), - ); - expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false }); - }); - - it("accumulates across multiple text + reasoning parts (multi-step)", () => { - const r = liveTurnTokens( - msg([ - { type: "reasoning", text: "abcd" }, // 1 - { type: "text", text: "abcd" }, // 1 - { type: "tool-getPage", state: "output-available" }, // ignored - { type: "reasoning", text: "abcd" }, // 1 - { type: "text", text: "abcdefgh" }, // 2 - ]), - ); - expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false }); - }); - - it("ignores non text/reasoning parts (tools, step-start)", () => { - const r = liveTurnTokens( - msg([ - { type: "step-start" }, - { type: "tool-getPage", state: "input-available" }, - ]), - ); - expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false }); - }); -}); - -describe("liveTurnTokens — authoritative path", () => { - it("returns authoritative usage verbatim, splitting reasoning out of output", () => { - // outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30. - const r = liveTurnTokens( - msg([{ type: "text", text: "estimate would be tiny" }], { - usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, - }), - ); - expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); - }); - - it("treats missing reasoningTokens as 0 and keeps full output", () => { - const r = liveTurnTokens( - msg([{ type: "text", text: "x" }], { - usage: { inputTokens: 10, outputTokens: 42 }, - }), - ); - expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true }); - }); - - it("never returns a negative output when reasoning exceeds reported output", () => { - const r = liveTurnTokens( - msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }), - ); - expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true }); - }); - - it("falls back to the estimate when metadata has no usage object", () => { - const r = liveTurnTokens( - msg([{ type: "text", text: "abcd" }], { chatId: "c1" }), - ); - expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false }); - }); -}); - -describe("liveTurnTokens — combined authoritative + estimate (#163)", () => { - it("ticks the in-flight step above the completed-steps authoritative base", () => { - // The authoritative usage is the sum over COMPLETED steps (step 1). The - // CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in - // the parts -> the running estimate must push the live figure above the base - // so the badge keeps growing between step boundaries. - const longText = "x".repeat(800); // 800 chars -> 200 est output tokens - const r = liveTurnTokens( - msg([{ type: "text", text: longText }], { - usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output - }), - ); - // max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen. - expect(r.output).toBe(200); - expect(r.authoritative).toBe(true); - }); - - it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => { - const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning - const r = liveTurnTokens( - msg([{ type: "reasoning", text: longReasoning }], { - usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 }, - }), - ); - // reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0. - expect(r.reasoning).toBe(100); - expect(r.output).toBe(0); - expect(r.authoritative).toBe(true); - }); - - it("snaps to the authoritative figure once it exceeds the rough estimate", () => { - // Short on-screen text (estimate tiny) but a large authoritative output: - // the exact figure wins at the boundary (the counter never under-reports). - const r = liveTurnTokens( - msg([{ type: "text", text: "abcd" }], { - usage: { inputTokens: 10, outputTokens: 5000 }, - }), - ); - expect(r.output).toBe(5000); - }); - - it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => { - // Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged. - const r = liveTurnTokens( - msg([{ type: "text", text: "tiny" }], { - usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, - }), - ); - expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); - }); -}); diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts index 9a900996..16cbaec4 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts @@ -1,18 +1,16 @@ -import type { UIMessage } from "@ai-sdk/react"; - /** - * Live token counting for a streaming AI-chat turn — split into REASONING - * (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows - * `Thinking… · 60 tokens` next to its thinking indicator. + * Live token ESTIMATION for a streaming AI-chat turn. * * No provider streams exact per-token usage mid-stream, so the live number is a - * CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage - * once the server attaches it on a step/turn boundary (see the server's - * `chatStreamMetadata` + the client's read of `message.metadata.usage`). When - * authoritative usage is present we return it verbatim (the number "jumps to - * exact"); otherwise we return the running estimate. Pure + unit-testable: it - * never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the + * CLIENT ESTIMATE (chars/≈4 heuristic). It powers the chat body's + * `Thinking… · N tokens` indicator (see `ReasoningBlock`), which reconciles to + * the authoritative server usage once it lands. Pure + unit-testable: it never + * runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the * bundle, and be wrong for Gemini/Ollama anyway). + * + * The former header-badge `liveTurnTokens()` split was removed with #189 (the + * header badge now shows the stable "current / max" context size, not a live + * per-turn counter); the live feedback remains in `ReasoningBlock`. */ /** @@ -24,90 +22,3 @@ export function estimateTokens(text: string): number { if (!text) return 0; return Math.ceil(text.length / 4); } - -/** Authoritative per-step/turn usage the server attaches to message metadata. */ -export interface AuthoritativeUsage { - inputTokens?: number; - outputTokens?: number; - totalTokens?: number; - reasoningTokens?: number; -} - -/** Live token split for a turn's tail (streaming) assistant message. */ -export interface LiveTurnTokens { - /** Thinking/reasoning tokens (estimate, or authoritative when available). */ - reasoning: number; - /** Answer/output tokens (estimate, or authoritative when available). */ - output: number; - /** True when the numbers come from authoritative server usage, not estimate. */ - authoritative: boolean; -} - -/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */ -function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined { - const meta = message?.metadata as - | { usage?: AuthoritativeUsage } - | undefined; - const usage = meta?.usage; - if (!usage || typeof usage !== "object") return undefined; - return usage; -} - -/** - * Token split for the given (streaming) assistant message. - * - * COMBINES the authoritative server usage with the running text estimate so the - * counter ticks in real time AND lands exact. The server only attaches - * `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is - * CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step. - * So a multi-step turn that returned the authoritative figure verbatim would - * FREEZE between boundaries and jump in steps (issue #163). - * - * Instead we always compute the running ESTIMATE (chars/≈4 over the message's - * `reasoning`/`text` parts, which grows on every streamed delta) and take the - * per-component MAX of the authoritative base and the estimate: - * - between boundaries the estimate of the in-flight step ticks the number up; - * - at a boundary the authoritative figure snaps it to exact; - * - because the server's usage is cumulative and we only ever take the max, the - * number is MONOTONIC — it never drops. - * - * Providers that don't stream reasoning text still surface a reasoning count once - * the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure - * estimate path (no usage yet) such a turn shows `reasoning: 0` until then. - */ -export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens { - if (!message) return { reasoning: 0, output: 0, authoritative: false }; - - // Running ESTIMATE over every reasoning/text part — grows on each delta. This - // includes the IN-FLIGHT step, which the authoritative usage does not cover yet. - let estReasoning = 0; - let estOutput = 0; - for (const part of message.parts ?? []) { - if (part.type === "reasoning") { - estReasoning += estimateTokens((part as { text?: string }).text ?? ""); - } else if (part.type === "text") { - estOutput += estimateTokens((part as { text?: string }).text ?? ""); - } - } - - const usage = metadataUsage(message); - if (!usage) { - // No authoritative usage streamed yet: the estimate IS the live figure. - return { reasoning: estReasoning, output: estOutput, authoritative: false }; - } - - // Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES - // reasoning in the AI SDK usage shape, so subtract it out for the "answer" - // figure (never go negative if a provider reports them inconsistently). - const authReasoning = usage.reasoningTokens ?? 0; - const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning); - - // Per-component max: the in-flight step's estimate ticks above the completed- - // steps base between boundaries, and the authoritative figure wins once it - // exceeds the (rough) estimate at the next boundary. Monotonic by construction. - return { - reasoning: Math.max(authReasoning, estReasoning), - output: Math.max(authOutput, estOutput), - authoritative: true, - }; -} diff --git a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx index 08348756..ba98539a 100644 --- a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx +++ b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx @@ -7,6 +7,7 @@ import { Button, Group, Modal, + NumberInput, Paper, PasswordInput, Select, @@ -85,6 +86,9 @@ const formSchema = z.object({ chatModel: z.string(), // Chat provider implementation (reasoning surfacing). Default openai-compatible. chatApiStyle: z.enum(["openai-compatible", "openai"]), + // Model context-window size (tokens) shown as the chat header badge's "max". + // Empty string = no limit (NumberInput emits "" when cleared). + chatContextWindow: z.union([z.number(), z.literal("")]), // Cheap model id for the anonymous public-share assistant; empty = use chatModel. publicShareChatModel: z.string(), // Agent-role id whose persona the public-share assistant adopts; empty = @@ -312,6 +316,7 @@ export default function AiProviderSettings() { initialValues: { chatModel: "", chatApiStyle: "openai-compatible" as ChatApiStyle, + chatContextWindow: "" as number | "", publicShareChatModel: "", publicShareAssistantRoleId: "", embeddingModel: "", @@ -335,6 +340,10 @@ export default function AiProviderSettings() { form.setValues({ chatModel: settings.chatModel ?? "", chatApiStyle: settings.chatApiStyle ?? "openai-compatible", + // 0/unset = no limit → show an empty field (not a literal "0"). + chatContextWindow: settings.chatContextWindow + ? settings.chatContextWindow + : "", publicShareChatModel: settings.publicShareChatModel ?? "", publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "", embeddingModel: settings.embeddingModel ?? "", @@ -365,6 +374,11 @@ export default function AiProviderSettings() { driver: "openai", chatModel: values.chatModel, chatApiStyle: values.chatApiStyle, + // Empty → 0, which clears the limit server-side (badge shows current only). + chatContextWindow: + typeof values.chatContextWindow === "number" + ? values.chatContextWindow + : 0, // Cheap model id for the anonymous public-share assistant; empty falls // back to chatModel server-side. publicShareChatModel: values.publicShareChatModel, @@ -785,6 +799,22 @@ export default function AiProviderSettings() { {...form.getInputProps("chatApiStyle")} /> + + {/* Anonymous public-share assistant: a single master toggle + an optional cheaper model id. Reuses this card's driver/URL/key. */} diff --git a/apps/client/src/features/workspace/services/ai-settings-service.ts b/apps/client/src/features/workspace/services/ai-settings-service.ts index 189589b0..28afd9f0 100644 --- a/apps/client/src/features/workspace/services/ai-settings-service.ts +++ b/apps/client/src/features/workspace/services/ai-settings-service.ts @@ -23,6 +23,9 @@ export interface IAiSettings { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Chat model context-window size (tokens); shown as the "max" in the chat + // header context badge. 0/unset = no limit (badge shows the current size only). + chatContextWindow?: number; // Cheap model id for the anonymous public-share assistant; empty = chatModel. publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = @@ -57,6 +60,8 @@ export interface IAiSettingsUpdate { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Chat model context-window size (tokens); 0 clears the limit. + chatContextWindow?: number; publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = // built-in locked persona. diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index bfeafb97..7514a557 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -292,6 +292,26 @@ describe('flushAssistant', () => { expect(f.metadata.contextTokens).toBe(15); }); + it('completed: writes maxContextTokens when the model limit is > 0', () => { + const f = flushAssistant([toolStep], '', 'completed', { + contextTokens: 15, + maxContextTokens: 200_000, + }); + expect(f.metadata.maxContextTokens).toBe(200_000); + }); + + it('omits maxContextTokens when the limit is unset or 0', () => { + const unset = flushAssistant([toolStep], '', 'completed', { + contextTokens: 15, + }); + expect('maxContextTokens' in unset.metadata).toBe(false); + const zero = flushAssistant([toolStep], '', 'completed', { + contextTokens: 15, + maxContextTokens: 0, + }); + expect('maxContextTokens' in zero.metadata).toBe(false); + }); + it('error: records the error and a derived finishReason', () => { const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' }); expect(f.status).toBe('error'); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 5c4b1f0e..1a53ff1f 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -616,6 +616,9 @@ export class AiChatService implements OnModuleInit { contextTokens: (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined, + // Admin-configured context-window size for this model (badge max). + // Resolved once per turn above; written to metadata only when > 0. + maxContextTokens: resolved?.chatContextWindow, }), ); // Lifecycle: release the external MCP clients leased for this turn. @@ -1223,6 +1226,10 @@ export function flushAssistant( finishReason?: string; usage?: ChatStreamUsage | StreamUsage | undefined; contextTokens?: number; + // Admin-configured context-window size (tokens) for this turn's model; the + // denominator of the client's "current / max" header badge. Written only + // when > 0 (0/unset = no limit known → the badge shows current only). + maxContextTokens?: number; error?: string; }, ): AssistantFlush { @@ -1253,6 +1260,9 @@ export function flushAssistant( normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage; } if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens; + if (extra?.maxContextTokens && extra.maxContextTokens > 0) { + metadata.maxContextTokens = extra.maxContextTokens; + } if (extra?.error) metadata.error = extra.error; return { diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 60e0a66e..3b5d1955 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -21,6 +21,7 @@ export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [ 'driver', 'chatModel', 'chatApiStyle', + 'chatContextWindow', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', diff --git a/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts b/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts index 64a4dbea..04d4705b 100644 --- a/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts +++ b/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts @@ -41,3 +41,35 @@ describe('UpdateAiSettingsDto.chatApiStyle', () => { expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined(); }); }); + +/** DTO validation for chatContextWindow (@IsOptional @IsInt @Min(0)). */ +describe('UpdateAiSettingsDto.chatContextWindow', () => { + const errorsFor = async (chatContextWindow: unknown) => + validate(plainToInstance(UpdateAiSettingsDto, { chatContextWindow })); + + it('accepts a non-negative integer (incl. 0 = clear the limit)', async () => { + for (const v of [0, 200000]) { + const errs = await errorsFor(v); + expect( + errs.find((e) => e.property === 'chatContextWindow'), + ).toBeUndefined(); + } + }); + + it('rejects a negative value', async () => { + const errs = await errorsFor(-1); + expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined(); + }); + + it('rejects a non-integer value', async () => { + const errs = await errorsFor(1.5); + expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined(); + }); + + it('accepts the field being omitted (optional)', async () => { + const errs = await validate(plainToInstance(UpdateAiSettingsDto, {})); + expect( + errs.find((e) => e.property === 'chatContextWindow'), + ).toBeUndefined(); + }); +}); diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts index 05020fa9..2c68ad2c 100644 --- a/apps/server/src/integrations/ai/ai-settings.service.ts +++ b/apps/server/src/integrations/ai/ai-settings.service.ts @@ -27,6 +27,8 @@ export interface UpdateAiSettingsInput { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Chat context-window size (tokens); 0/empty clears the limit. + chatContextWindow?: number; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; @@ -162,6 +164,8 @@ export class AiSettingsService { chatModel: provider.chatModel, // Plain passthrough; getChatModel defaults unset to 'openai-compatible'. chatApiStyle: provider.chatApiStyle, + // Admin-configured context-window size; 0/unset = no limit (badge denominator). + chatContextWindow: provider.chatContextWindow, // Cheap model id for the anonymous public-share assistant; reuses the chat // driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel. publicShareChatModel: provider.publicShareChatModel, @@ -244,6 +248,7 @@ export class AiSettingsService { driver: provider.driver, chatModel: provider.chatModel, chatApiStyle: provider.chatApiStyle, + chatContextWindow: provider.chatContextWindow, embeddingModel: provider.embeddingModel, baseUrl: provider.baseUrl, embeddingBaseUrl: provider.embeddingBaseUrl, diff --git a/apps/server/src/integrations/ai/ai.types.ts b/apps/server/src/integrations/ai/ai.types.ts index 29c8d6f2..7c11f55e 100644 --- a/apps/server/src/integrations/ai/ai.types.ts +++ b/apps/server/src/integrations/ai/ai.types.ts @@ -35,6 +35,13 @@ export interface AiProviderSettings { // Chat provider implementation for the `openai` driver. Unset → defaults to // 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle. chatApiStyle?: ChatApiStyle; + // Admin-configured chat model context-window size, in tokens. There is no + // provider-independent way to discover this (OpenAI's /v1/models usually omits + // it, Gemini/Ollama/OpenRouter each expose it differently), so it is entered + // manually. Surfaced to the chat client (via assistant message metadata) as the + // denominator of the header "current / max" context badge. Empty/0 = no limit + // known → the badge shows only the current context size. + chatContextWindow?: number; embeddingModel?: string; baseUrl?: string; // Embedding-specific base URL. Falls back to `baseUrl` when empty/unset. @@ -73,6 +80,7 @@ export const PROVIDER_SETTINGS_KEYS = [ 'driver', 'chatModel', 'chatApiStyle', + 'chatContextWindow', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', @@ -98,6 +106,10 @@ export const PROVIDER_SETTINGS_KEYS = [ export interface ResolvedAiConfig extends Partial { driver?: AiDriver; chatModel?: string; + // Admin-configured chat context-window size (tokens); 0/unset = no limit. Used + // as the header context-badge denominator. Re-declared for parity with the + // explicit fields above. + chatContextWindow?: number; // Cheap model id for the public-share assistant; reuses the chat creds. publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts (empty/unset @@ -117,6 +129,8 @@ export interface MaskedAiSettings { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Admin-configured chat context-window size (tokens); 0/unset = no limit. + chatContextWindow?: number; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; diff --git a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts index 53aa8220..f2156213 100644 --- a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts +++ b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts @@ -1,4 +1,4 @@ -import { IsIn, IsOptional, IsString } from 'class-validator'; +import { IsIn, IsInt, IsOptional, IsString, Min } from 'class-validator'; import { AI_DRIVERS, AiDriver, @@ -29,6 +29,13 @@ export class UpdateAiSettingsDto { @IsIn(CHAT_API_STYLES) chatApiStyle?: ChatApiStyle; + // Chat model context-window size in tokens (header context-badge denominator). + // 0 (or empty) clears the limit so the badge shows only the current context. + @IsOptional() + @IsInt() + @Min(0) + chatContextWindow?: number; + @IsOptional() @IsString() embeddingModel?: string;