diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c2aa9c9..9eaf9757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 OpenRouter, etc.; `openai` uses the official provider (real-OpenAI reasoning-model request shaping). Chosen explicitly rather than inferred from the base URL, since a custom URL can front real OpenAI too. (#175, #177) +- **AI chat "Context window (tokens)" setting (`chatContextWindow`).** A new + admin field in AI settings that records the chat model's context-window size. + When set (> 0) it becomes the denominator of the header context-badge, which + now reads "used / max"; `0`/empty clears the limit and the badge shows only + the current context as before. There is no provider-independent way to read a + model's window automatically, so it is an explicit workspace-level value. + (#189) - **Per-MCP-server instructions in the agent prompt.** Each external MCP server now has an admin-authored `instructions` field ("how/when to use this server's tools") that is injected into the agent's system prompt next to that server's @@ -61,6 +68,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 model's reasoning out of the box. An endpoint that is real OpenAI behind a custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177) +- **AI chat header context-badge now shows "used / max".** When an admin sets + the new `chatContextWindow`, the badge displays the current context size over + the configured window (e.g. `120k / 200k`) instead of switching to a live + per-turn token counter during streaming. With no window configured the badge + keeps showing just the current context. (#189) + - **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the same id are ONE footnote — one number, one definition, several back-references — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index bd8c4ed3..b9f4fc17 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -1168,7 +1168,10 @@ "Built-in assistant persona": "Built-in assistant persona", "Minimize": "Minimize", "Current context size": "Current context size", - "Tokens generated this turn": "Tokens generated this turn", + "Context size / model limit": "Context size / model limit", + "Context window (tokens)": "Context window (tokens)", + "Shows used / total in the chat header badge; empty hides the total.": "Shows used / total in the chat header badge; empty hides the total.", + "e.g. 200000": "e.g. 200000", "AI agent": "AI agent", "Take a look at the current document": "Take a look at the current document", "AI agent is typing…": "AI agent is typing…", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index f8c59436..1845bc2b 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -705,7 +705,10 @@ "Copy chat": "Копировать чат", "Created successfully": "Успешно создано", "Current context size": "Текущий размер контекста", - "Tokens generated this turn": "Токенов сгенерировано за ход", + "Context size / model limit": "Размер контекста / лимит модели", + "Context window (tokens)": "Размер окна контекста (токены)", + "Shows used / total in the chat header badge; empty hides the total.": "Показывает использовано/всего в шапке чата; пусто — скрыть лимит.", + "e.g. 200000": "напр. 200000", "Delete this chat?": "Удалить этот чат?", "Deleted successfully": "Успешно удалено", "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index de0b9923..ecdf1397 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -6,7 +6,7 @@ import { useRef, useState, } from "react"; -import { Group, Loader, Tooltip } from "@mantine/core"; +import { Group, Loader } from "@mantine/core"; import { IconArrowsDiagonal, IconCheck, @@ -39,6 +39,7 @@ import { } from "@/features/ai-chat/queries/ai-chat-query.ts"; import ConversationList from "@/features/ai-chat/components/conversation-list.tsx"; import ChatThread from "@/features/ai-chat/components/chat-thread.tsx"; +import { ContextBadge } from "@/features/ai-chat/components/context-badge.tsx"; import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts"; import { useChatSession } from "@/features/ai-chat/hooks/use-chat-session.ts"; import { @@ -60,13 +61,6 @@ const MIN_HEIGHT = 400; // Margin kept between the window and the viewport edges while dragging. const EDGE_MARGIN = 8; -/** Compact token formatter: 1.2M / 3.4k / 950. */ -function formatTokens(n: number): string { - if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; - if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`; - return String(n); -} - // Compute the initial top-right placement at the default size, fitted to the // current viewport. Reads `window` only when called (inside an effect). function computeInitialGeom() { @@ -161,12 +155,6 @@ export default function AiChatWindow() { const { data: messageRows, isLoading: messagesLoading } = useAiChatMessagesQuery(activeChatId ?? undefined); - // Live turn-token total (reasoning + output) for the in-flight turn, pushed up - // (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream. - // `null` means no turn is in flight -> the badge falls back to the persisted - // context size below. - const [liveTurnTokens, setLiveTurnTokens] = useState(null); - // The page the user is currently viewing. AiChatWindow lives in a pathless // parent layout route, so useParams() can't see :pageSlug. Match the full // pathname against the authenticated page route instead so "the current page" @@ -306,6 +294,21 @@ export default function AiChatWindow() { return 0; }, [activeChatId, messageRows]); + // The model's context-window size (badge denominator), read from the most + // recent assistant row that carries it. Admin-configured in AI settings and + // stamped onto the turn server-side, so it travels with the message metadata — + // no client-side model resolution, and it survives public shares / per-role + // models automatically. 0 (no limit configured, or older rows) → the badge + // hides the denominator and shows only the current context size. + const maxContextTokens = useMemo(() => { + if (!activeChatId || !messageRows) return 0; + for (let i = messageRows.length - 1; i >= 0; i--) { + const max = messageRows[i].metadata?.maxContextTokens; + if (typeof max === "number" && max > 0) return max; + } + return 0; + }, [activeChatId, messageRows]); + // On (re)open, settle the geometry before paint (useLayoutEffect → no // first-frame jump): compute an initial top-right placement the first time, // and re-clamp an existing geometry to the current viewport on later opens @@ -495,23 +498,14 @@ export default function AiChatWindow() { )}
- {/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz); - once it finishes, fall back to the persisted context size. Require - > 0 so the very first emit (an empty tail message, count 0) does not - flash a "0" badge before any token streams in (#151 review). */} - {liveTurnTokens !== null && liveTurnTokens > 0 ? ( - - - {formatTokens(liveTurnTokens)} - - - ) : contextTokens > 0 ? ( - - - {formatTokens(contextTokens)} - - - ) : null} + {/* Context badge: always "current / max" context size (or just current + when no model limit is configured). It no longer flips to a live + per-turn generation counter mid-stream — that live feedback lives in + the chat body's "Thinking · N tokens" block. */} +
@@ -634,7 +628,6 @@ export default function AiChatWindow() { assistantName={currentRole?.name} onTurnFinished={onTurnFinished} onServerChatId={onServerChatId} - onLiveTurnTokens={setLiveTurnTokens} /> )}
diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index c906a940..14f9a2ad 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -20,7 +20,6 @@ import { } from "@/features/ai-chat/utils/role-launch.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts"; -import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { dequeue, enqueueMessage, @@ -67,12 +66,6 @@ interface ChatThreadProps { * Copy/export button available mid-stream). Distinct from onTurnFinished, * which fires only at the terminal outcome. */ onServerChatId?: (serverChatId?: string) => void; - /** Reports the live turn-token total (reasoning + output) for the in-flight - * turn so the parent can show a header badge that ticks mid-stream. THROTTLED - * here (~8 Hz) so the parent re-renders a handful of times a second, not on - * every streamed delta. Called with `null` when no turn is in flight (the - * parent then reverts the badge to the persisted context size). */ - onLiveTurnTokens?: (tokens: number | null) => void; } /** @@ -117,7 +110,6 @@ export default function ChatThread({ assistantName, onTurnFinished, onServerChatId, - onLiveTurnTokens, }: ChatThreadProps) { const { t } = useTranslation(); @@ -328,53 +320,6 @@ export default function ChatThread({ // the SAME on-screen banner text can be mirrored into the export (issue #160). const errorView = error ? describeChatError(error.message ?? "", t) : null; - // Report the live turn-token total to the parent header badge, THROTTLED to - // ~8 Hz so the parent re-renders a few times a second instead of on every - // streamed delta. The tail assistant message's reasoning+output (estimate while - // streaming, authoritative once a step reports usage) is the live figure. When - // the turn ends we emit a final exact value, then `null` so the parent reverts - // the badge to the persisted context size. - const lastEmitRef = useRef(0); - const emitTimerRef = useRef | null>(null); - useEffect(() => { - if (!onLiveTurnTokens) return; - if (!isStreaming) { - // Turn ended (or never started): clear any pending throttle and revert. - if (emitTimerRef.current) { - clearTimeout(emitTimerRef.current); - emitTimerRef.current = null; - } - lastEmitRef.current = 0; - onLiveTurnTokens(null); - return; - } - const tail = messages[messages.length - 1]; - const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null; - const total = live ? live.reasoning + live.output : 0; - const now = Date.now(); - const MIN_INTERVAL = 120; // ms (~8 Hz) - const elapsed = now - lastEmitRef.current; - if (elapsed >= MIN_INTERVAL) { - lastEmitRef.current = now; - onLiveTurnTokens(total); - } else if (!emitTimerRef.current) { - // Schedule a trailing emit so the FINAL value of a burst is not dropped. - emitTimerRef.current = setTimeout(() => { - emitTimerRef.current = null; - lastEmitRef.current = Date.now(); - onLiveTurnTokens(total); - }, MIN_INTERVAL - elapsed); - } - }, [messages, isStreaming, onLiveTurnTokens]); - - // Clear any pending throttle timer on unmount (chat switch via `key`) so a - // trailing emit can't fire into a torn-down thread's parent. - useEffect(() => { - return () => { - if (emitTimerRef.current) clearTimeout(emitTimerRef.current); - }; - }, []); - // A role was picked with autoStart=false: the role is bound but NOTHING was // sent, so chatId stays null and the empty state would keep showing the cards. // This flag hides the cards and reveals the composer (with the role indicated) diff --git a/apps/client/src/features/ai-chat/components/context-badge.test.tsx b/apps/client/src/features/ai-chat/components/context-badge.test.tsx new file mode 100644 index 00000000..f92ef2a7 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/context-badge.test.tsx @@ -0,0 +1,69 @@ +import { describe, it, expect } from "vitest"; +import { render, screen, fireEvent } from "@testing-library/react"; +import { MantineProvider } from "@mantine/core"; +import { ContextBadge, formatTokens } from "./context-badge"; + +// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. +// Without an I18nextProvider, `t(key)` returns the key verbatim, so tooltip +// labels assert against their English source strings. + +function renderBadge(props: { + contextTokens: number; + maxContextTokens?: number; +}) { + return render( + + + , + ); +} + +describe("formatTokens", () => { + it("formats with k / M suffixes", () => { + expect(formatTokens(572)).toBe("572"); + expect(formatTokens(200_000)).toBe("200.0k"); + expect(formatTokens(1_500_000)).toBe("1.5M"); + }); +}); + +describe("ContextBadge", () => { + it("shows `current / max` when a limit is configured", () => { + renderBadge({ contextTokens: 572, maxContextTokens: 200_000 }); + expect(screen.getByText("572 / 200.0k")).toBeDefined(); + }); + + it("shows only the current size when no limit is configured", () => { + renderBadge({ contextTokens: 572, maxContextTokens: 0 }); + expect(screen.getByText("572")).toBeDefined(); + // No denominator rendered. + expect(screen.queryByText(/\//)).toBeNull(); + }); + + it("treats an undefined limit as no limit", () => { + renderBadge({ contextTokens: 1234 }); + expect(screen.getByText("1.2k")).toBeDefined(); + expect(screen.queryByText(/\//)).toBeNull(); + }); + + it("renders nothing until there is a current context size", () => { + const { container } = renderBadge({ + contextTokens: 0, + maxContextTokens: 200_000, + }); + expect(container.querySelector("span")).toBeNull(); + }); + + it("never flips to a live per-turn counter (no live mode); shows context as-is even above max", () => { + // `current > max` (estimate drift / smaller-model role) is shown unclamped. + renderBadge({ contextTokens: 210_000, maxContextTokens: 200_000 }); + expect(screen.getByText("210.0k / 200.0k")).toBeDefined(); + }); + + it("exposes the limit tooltip label on hover", async () => { + renderBadge({ contextTokens: 572, maxContextTokens: 200_000 }); + fireEvent.mouseEnter(screen.getByText("572 / 200.0k")); + expect( + await screen.findByText("Context size / model limit"), + ).toBeDefined(); + }); +}); diff --git a/apps/client/src/features/ai-chat/components/context-badge.tsx b/apps/client/src/features/ai-chat/components/context-badge.tsx new file mode 100644 index 00000000..0f2538d7 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/context-badge.tsx @@ -0,0 +1,61 @@ +import { Tooltip } from "@mantine/core"; +import { useTranslation } from "react-i18next"; +import classes from "@/features/ai-chat/components/ai-chat-window.module.css"; + +/** Compact token formatter: 1.2M / 3.4k / 950. */ +export function formatTokens(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; + if (n >= 1_000) return `${(n / 1_000).toFixed(1)}k`; + return String(n); +} + +interface ContextBadgeProps { + // Current context size for the active chat (tokens occupied in the model's + // window). 0 = unknown → nothing is rendered. + contextTokens: number; + // The model's context-window size (tokens), from AI settings. 0/undefined = + // no limit known → only the current size is shown (no denominator). + maxContextTokens?: number; +} + +/** + * Header badge that ALWAYS shows the current context size, and — when the model's + * context-window size is configured — appends "/ max" so the badge reads + * "current / max" (e.g. `572 / 200k`). This is a single, stable meaning: unlike + * the previous design it never flips to a live per-turn generation counter while + * streaming (that live feedback lives in the chat body's "Thinking · N tokens"). + * + * No limit configured (or older history rows without it) → the denominator is + * hidden and the badge shows the current size only, matching the prior at-rest + * behaviour. `context > max` (estimate drift, or a role on a smaller model) is + * shown as-is, without clamping. + */ +export function ContextBadge({ + contextTokens, + maxContextTokens, +}: ContextBadgeProps) { + const { t } = useTranslation(); + + // Nothing to show until the first persisted context figure exists. + if (!(contextTokens > 0)) return null; + + const hasMax = typeof maxContextTokens === "number" && maxContextTokens > 0; + const label = hasMax + ? `${formatTokens(contextTokens)} / ${formatTokens(maxContextTokens)}` + : formatTokens(contextTokens); + + return ( + + {label} + + ); +} + +export default ContextBadge; diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts index af595917..22e32f15 100644 --- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts +++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts @@ -113,9 +113,14 @@ export interface IAiChatMessageRow { }; // Current context size for the turn = final-step (input+output) tokens, i.e. // how much the conversation occupies in the model's context window after this - // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the - // floating window's header badge. + // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown as the + // numerator of the floating window's "current / max" header badge. contextTokens?: number; + // The model's context-window size (tokens), admin-configured in AI settings + // and stamped onto the turn server-side. The denominator of the header badge. + // Absent/0 (older rows, or no limit configured) → the badge hides the + // denominator and shows only the current context size (`contextTokens`). + maxContextTokens?: number; // Set on an assistant row whose turn ended in a provider/stream error; the // raw provider error text (e.g. "402: ...") for inline display in the thread. error?: string; diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts index 3e650f0d..6b00fbc4 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts @@ -1,17 +1,5 @@ import { describe, expect, it } from "vitest"; -import type { UIMessage } from "@ai-sdk/react"; -import { - estimateTokens, - liveTurnTokens, -} from "@/features/ai-chat/utils/count-stream-tokens.ts"; - -const msg = (parts: unknown[], metadata?: unknown): UIMessage => - ({ - id: Math.random().toString(), - role: "assistant", - parts, - metadata, - }) as UIMessage; +import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; describe("estimateTokens", () => { it("returns 0 for the empty string", () => { @@ -25,147 +13,3 @@ describe("estimateTokens", () => { expect(estimateTokens("12345678")).toBe(2); }); }); - -describe("liveTurnTokens — estimate path", () => { - it("is all zeros for an undefined message", () => { - expect(liveTurnTokens(undefined)).toEqual({ - reasoning: 0, - output: 0, - authoritative: false, - }); - }); - - it("is all zeros for a parts-less message", () => { - expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({ - reasoning: 0, - output: 0, - authoritative: false, - }); - }); - - it("estimates output from text parts", () => { - // 8 chars -> 2 tokens. - const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }])); - expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false }); - }); - - it("estimates reasoning from reasoning parts (kept separate from output)", () => { - const r = liveTurnTokens( - msg([ - { type: "reasoning", text: "12345678" }, - { type: "text", text: "abcd" }, - ]), - ); - expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false }); - }); - - it("accumulates across multiple text + reasoning parts (multi-step)", () => { - const r = liveTurnTokens( - msg([ - { type: "reasoning", text: "abcd" }, // 1 - { type: "text", text: "abcd" }, // 1 - { type: "tool-getPage", state: "output-available" }, // ignored - { type: "reasoning", text: "abcd" }, // 1 - { type: "text", text: "abcdefgh" }, // 2 - ]), - ); - expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false }); - }); - - it("ignores non text/reasoning parts (tools, step-start)", () => { - const r = liveTurnTokens( - msg([ - { type: "step-start" }, - { type: "tool-getPage", state: "input-available" }, - ]), - ); - expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false }); - }); -}); - -describe("liveTurnTokens — authoritative path", () => { - it("returns authoritative usage verbatim, splitting reasoning out of output", () => { - // outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30. - const r = liveTurnTokens( - msg([{ type: "text", text: "estimate would be tiny" }], { - usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, - }), - ); - expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); - }); - - it("treats missing reasoningTokens as 0 and keeps full output", () => { - const r = liveTurnTokens( - msg([{ type: "text", text: "x" }], { - usage: { inputTokens: 10, outputTokens: 42 }, - }), - ); - expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true }); - }); - - it("never returns a negative output when reasoning exceeds reported output", () => { - const r = liveTurnTokens( - msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }), - ); - expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true }); - }); - - it("falls back to the estimate when metadata has no usage object", () => { - const r = liveTurnTokens( - msg([{ type: "text", text: "abcd" }], { chatId: "c1" }), - ); - expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false }); - }); -}); - -describe("liveTurnTokens — combined authoritative + estimate (#163)", () => { - it("ticks the in-flight step above the completed-steps authoritative base", () => { - // The authoritative usage is the sum over COMPLETED steps (step 1). The - // CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in - // the parts -> the running estimate must push the live figure above the base - // so the badge keeps growing between step boundaries. - const longText = "x".repeat(800); // 800 chars -> 200 est output tokens - const r = liveTurnTokens( - msg([{ type: "text", text: longText }], { - usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output - }), - ); - // max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen. - expect(r.output).toBe(200); - expect(r.authoritative).toBe(true); - }); - - it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => { - const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning - const r = liveTurnTokens( - msg([{ type: "reasoning", text: longReasoning }], { - usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 }, - }), - ); - // reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0. - expect(r.reasoning).toBe(100); - expect(r.output).toBe(0); - expect(r.authoritative).toBe(true); - }); - - it("snaps to the authoritative figure once it exceeds the rough estimate", () => { - // Short on-screen text (estimate tiny) but a large authoritative output: - // the exact figure wins at the boundary (the counter never under-reports). - const r = liveTurnTokens( - msg([{ type: "text", text: "abcd" }], { - usage: { inputTokens: 10, outputTokens: 5000 }, - }), - ); - expect(r.output).toBe(5000); - }); - - it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => { - // Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged. - const r = liveTurnTokens( - msg([{ type: "text", text: "tiny" }], { - usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, - }), - ); - expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); - }); -}); diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts index 9a900996..16cbaec4 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts @@ -1,18 +1,16 @@ -import type { UIMessage } from "@ai-sdk/react"; - /** - * Live token counting for a streaming AI-chat turn — split into REASONING - * (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows - * `Thinking… · 60 tokens` next to its thinking indicator. + * Live token ESTIMATION for a streaming AI-chat turn. * * No provider streams exact per-token usage mid-stream, so the live number is a - * CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage - * once the server attaches it on a step/turn boundary (see the server's - * `chatStreamMetadata` + the client's read of `message.metadata.usage`). When - * authoritative usage is present we return it verbatim (the number "jumps to - * exact"); otherwise we return the running estimate. Pure + unit-testable: it - * never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the + * CLIENT ESTIMATE (chars/≈4 heuristic). It powers the chat body's + * `Thinking… · N tokens` indicator (see `ReasoningBlock`), which reconciles to + * the authoritative server usage once it lands. Pure + unit-testable: it never + * runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the * bundle, and be wrong for Gemini/Ollama anyway). + * + * The former header-badge `liveTurnTokens()` split was removed with #189 (the + * header badge now shows the stable "current / max" context size, not a live + * per-turn counter); the live feedback remains in `ReasoningBlock`. */ /** @@ -24,90 +22,3 @@ export function estimateTokens(text: string): number { if (!text) return 0; return Math.ceil(text.length / 4); } - -/** Authoritative per-step/turn usage the server attaches to message metadata. */ -export interface AuthoritativeUsage { - inputTokens?: number; - outputTokens?: number; - totalTokens?: number; - reasoningTokens?: number; -} - -/** Live token split for a turn's tail (streaming) assistant message. */ -export interface LiveTurnTokens { - /** Thinking/reasoning tokens (estimate, or authoritative when available). */ - reasoning: number; - /** Answer/output tokens (estimate, or authoritative when available). */ - output: number; - /** True when the numbers come from authoritative server usage, not estimate. */ - authoritative: boolean; -} - -/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */ -function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined { - const meta = message?.metadata as - | { usage?: AuthoritativeUsage } - | undefined; - const usage = meta?.usage; - if (!usage || typeof usage !== "object") return undefined; - return usage; -} - -/** - * Token split for the given (streaming) assistant message. - * - * COMBINES the authoritative server usage with the running text estimate so the - * counter ticks in real time AND lands exact. The server only attaches - * `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is - * CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step. - * So a multi-step turn that returned the authoritative figure verbatim would - * FREEZE between boundaries and jump in steps (issue #163). - * - * Instead we always compute the running ESTIMATE (chars/≈4 over the message's - * `reasoning`/`text` parts, which grows on every streamed delta) and take the - * per-component MAX of the authoritative base and the estimate: - * - between boundaries the estimate of the in-flight step ticks the number up; - * - at a boundary the authoritative figure snaps it to exact; - * - because the server's usage is cumulative and we only ever take the max, the - * number is MONOTONIC — it never drops. - * - * Providers that don't stream reasoning text still surface a reasoning count once - * the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure - * estimate path (no usage yet) such a turn shows `reasoning: 0` until then. - */ -export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens { - if (!message) return { reasoning: 0, output: 0, authoritative: false }; - - // Running ESTIMATE over every reasoning/text part — grows on each delta. This - // includes the IN-FLIGHT step, which the authoritative usage does not cover yet. - let estReasoning = 0; - let estOutput = 0; - for (const part of message.parts ?? []) { - if (part.type === "reasoning") { - estReasoning += estimateTokens((part as { text?: string }).text ?? ""); - } else if (part.type === "text") { - estOutput += estimateTokens((part as { text?: string }).text ?? ""); - } - } - - const usage = metadataUsage(message); - if (!usage) { - // No authoritative usage streamed yet: the estimate IS the live figure. - return { reasoning: estReasoning, output: estOutput, authoritative: false }; - } - - // Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES - // reasoning in the AI SDK usage shape, so subtract it out for the "answer" - // figure (never go negative if a provider reports them inconsistently). - const authReasoning = usage.reasoningTokens ?? 0; - const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning); - - // Per-component max: the in-flight step's estimate ticks above the completed- - // steps base between boundaries, and the authoritative figure wins once it - // exceeds the (rough) estimate at the next boundary. Monotonic by construction. - return { - reasoning: Math.max(authReasoning, estReasoning), - output: Math.max(authOutput, estOutput), - authoritative: true, - }; -} diff --git a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx index 08348756..ba98539a 100644 --- a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx +++ b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx @@ -7,6 +7,7 @@ import { Button, Group, Modal, + NumberInput, Paper, PasswordInput, Select, @@ -85,6 +86,9 @@ const formSchema = z.object({ chatModel: z.string(), // Chat provider implementation (reasoning surfacing). Default openai-compatible. chatApiStyle: z.enum(["openai-compatible", "openai"]), + // Model context-window size (tokens) shown as the chat header badge's "max". + // Empty string = no limit (NumberInput emits "" when cleared). + chatContextWindow: z.union([z.number(), z.literal("")]), // Cheap model id for the anonymous public-share assistant; empty = use chatModel. publicShareChatModel: z.string(), // Agent-role id whose persona the public-share assistant adopts; empty = @@ -312,6 +316,7 @@ export default function AiProviderSettings() { initialValues: { chatModel: "", chatApiStyle: "openai-compatible" as ChatApiStyle, + chatContextWindow: "" as number | "", publicShareChatModel: "", publicShareAssistantRoleId: "", embeddingModel: "", @@ -335,6 +340,10 @@ export default function AiProviderSettings() { form.setValues({ chatModel: settings.chatModel ?? "", chatApiStyle: settings.chatApiStyle ?? "openai-compatible", + // 0/unset = no limit → show an empty field (not a literal "0"). + chatContextWindow: settings.chatContextWindow + ? settings.chatContextWindow + : "", publicShareChatModel: settings.publicShareChatModel ?? "", publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "", embeddingModel: settings.embeddingModel ?? "", @@ -365,6 +374,11 @@ export default function AiProviderSettings() { driver: "openai", chatModel: values.chatModel, chatApiStyle: values.chatApiStyle, + // Empty → 0, which clears the limit server-side (badge shows current only). + chatContextWindow: + typeof values.chatContextWindow === "number" + ? values.chatContextWindow + : 0, // Cheap model id for the anonymous public-share assistant; empty falls // back to chatModel server-side. publicShareChatModel: values.publicShareChatModel, @@ -785,6 +799,22 @@ export default function AiProviderSettings() { {...form.getInputProps("chatApiStyle")} /> + + {/* Anonymous public-share assistant: a single master toggle + an optional cheaper model id. Reuses this card's driver/URL/key. */} diff --git a/apps/client/src/features/workspace/services/ai-settings-service.ts b/apps/client/src/features/workspace/services/ai-settings-service.ts index 189589b0..28afd9f0 100644 --- a/apps/client/src/features/workspace/services/ai-settings-service.ts +++ b/apps/client/src/features/workspace/services/ai-settings-service.ts @@ -23,6 +23,9 @@ export interface IAiSettings { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Chat model context-window size (tokens); shown as the "max" in the chat + // header context badge. 0/unset = no limit (badge shows the current size only). + chatContextWindow?: number; // Cheap model id for the anonymous public-share assistant; empty = chatModel. publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = @@ -57,6 +60,8 @@ export interface IAiSettingsUpdate { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Chat model context-window size (tokens); 0 clears the limit. + chatContextWindow?: number; publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = // built-in locked persona. diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index bfeafb97..7514a557 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -292,6 +292,26 @@ describe('flushAssistant', () => { expect(f.metadata.contextTokens).toBe(15); }); + it('completed: writes maxContextTokens when the model limit is > 0', () => { + const f = flushAssistant([toolStep], '', 'completed', { + contextTokens: 15, + maxContextTokens: 200_000, + }); + expect(f.metadata.maxContextTokens).toBe(200_000); + }); + + it('omits maxContextTokens when the limit is unset or 0', () => { + const unset = flushAssistant([toolStep], '', 'completed', { + contextTokens: 15, + }); + expect('maxContextTokens' in unset.metadata).toBe(false); + const zero = flushAssistant([toolStep], '', 'completed', { + contextTokens: 15, + maxContextTokens: 0, + }); + expect('maxContextTokens' in zero.metadata).toBe(false); + }); + it('error: records the error and a derived finishReason', () => { const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' }); expect(f.status).toBe('error'); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 5c4b1f0e..1a53ff1f 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -616,6 +616,9 @@ export class AiChatService implements OnModuleInit { contextTokens: (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined, + // Admin-configured context-window size for this model (badge max). + // Resolved once per turn above; written to metadata only when > 0. + maxContextTokens: resolved?.chatContextWindow, }), ); // Lifecycle: release the external MCP clients leased for this turn. @@ -1223,6 +1226,10 @@ export function flushAssistant( finishReason?: string; usage?: ChatStreamUsage | StreamUsage | undefined; contextTokens?: number; + // Admin-configured context-window size (tokens) for this turn's model; the + // denominator of the client's "current / max" header badge. Written only + // when > 0 (0/unset = no limit known → the badge shows current only). + maxContextTokens?: number; error?: string; }, ): AssistantFlush { @@ -1253,6 +1260,9 @@ export function flushAssistant( normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage; } if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens; + if (extra?.maxContextTokens && extra.maxContextTokens > 0) { + metadata.maxContextTokens = extra.maxContextTokens; + } if (extra?.error) metadata.error = extra.error; return { diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 60e0a66e..52a4de13 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -21,6 +21,7 @@ export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [ 'driver', 'chatModel', 'chatApiStyle', + 'chatContextWindow', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', @@ -255,11 +256,17 @@ export class WorkspaceRepo { ): Promise { const db = dbOrTx(this.db, trx); // Assemble the provider object IN SQL. Keys are fixed provider field names - // (sql.lit -> inlined literals, no injection); values are bound params cast - // to ::text — postgres.js sends bound params untyped, and jsonb_build_object's - // value args are polymorphic ("any"), so without the explicit ::text cast - // Postgres throws "could not determine data type of parameter $1". The result - // is a real jsonb object, never a double-encoded string. The CASE self-heals + // (sql.lit -> inlined literals, no injection); values are bound params with + // an explicit cast — postgres.js sends bound params untyped, and + // jsonb_build_object's value args are polymorphic ("any"), so without the + // cast Postgres throws "could not determine data type of parameter $1". The + // cast is branched by the JS runtime type so the value lands in jsonb with + // the matching JSON type: a number stays a JSON number (e.g. + // chatContextWindow → `{"chatContextWindow":200000}`, jsonb_typeof 'number'), + // a boolean a JSON boolean, everything else a JSON string. A plain `::text` + // for all would store a numeric field as the JSON STRING `"200000"`, which + // the client's `typeof === "number"` guards reject. The result is a real + // jsonb object, never a double-encoded string. The CASE self-heals // workspaces whose settings.ai.provider was previously corrupted into an // array/string. const entries = Object.entries(provider).filter( @@ -267,7 +274,14 @@ export class WorkspaceRepo { ); const patch = entries.length ? sql`jsonb_build_object(${sql.join( - entries.flatMap(([k, v]) => [sql.lit(k), sql`${v}::text`]), + entries.flatMap(([k, v]) => [ + sql.lit(k), + typeof v === 'number' + ? sql`${v}::numeric` + : typeof v === 'boolean' + ? sql`${v}::boolean` + : sql`${v}::text`, + ]), )})` : sql`'{}'::jsonb`; return db diff --git a/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts b/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts index 64a4dbea..04d4705b 100644 --- a/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts +++ b/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts @@ -41,3 +41,35 @@ describe('UpdateAiSettingsDto.chatApiStyle', () => { expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined(); }); }); + +/** DTO validation for chatContextWindow (@IsOptional @IsInt @Min(0)). */ +describe('UpdateAiSettingsDto.chatContextWindow', () => { + const errorsFor = async (chatContextWindow: unknown) => + validate(plainToInstance(UpdateAiSettingsDto, { chatContextWindow })); + + it('accepts a non-negative integer (incl. 0 = clear the limit)', async () => { + for (const v of [0, 200000]) { + const errs = await errorsFor(v); + expect( + errs.find((e) => e.property === 'chatContextWindow'), + ).toBeUndefined(); + } + }); + + it('rejects a negative value', async () => { + const errs = await errorsFor(-1); + expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined(); + }); + + it('rejects a non-integer value', async () => { + const errs = await errorsFor(1.5); + expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined(); + }); + + it('accepts the field being omitted (optional)', async () => { + const errs = await validate(plainToInstance(UpdateAiSettingsDto, {})); + expect( + errs.find((e) => e.property === 'chatContextWindow'), + ).toBeUndefined(); + }); +}); diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts index 05020fa9..2c68ad2c 100644 --- a/apps/server/src/integrations/ai/ai-settings.service.ts +++ b/apps/server/src/integrations/ai/ai-settings.service.ts @@ -27,6 +27,8 @@ export interface UpdateAiSettingsInput { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Chat context-window size (tokens); 0/empty clears the limit. + chatContextWindow?: number; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; @@ -162,6 +164,8 @@ export class AiSettingsService { chatModel: provider.chatModel, // Plain passthrough; getChatModel defaults unset to 'openai-compatible'. chatApiStyle: provider.chatApiStyle, + // Admin-configured context-window size; 0/unset = no limit (badge denominator). + chatContextWindow: provider.chatContextWindow, // Cheap model id for the anonymous public-share assistant; reuses the chat // driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel. publicShareChatModel: provider.publicShareChatModel, @@ -244,6 +248,7 @@ export class AiSettingsService { driver: provider.driver, chatModel: provider.chatModel, chatApiStyle: provider.chatApiStyle, + chatContextWindow: provider.chatContextWindow, embeddingModel: provider.embeddingModel, baseUrl: provider.baseUrl, embeddingBaseUrl: provider.embeddingBaseUrl, diff --git a/apps/server/src/integrations/ai/ai.types.ts b/apps/server/src/integrations/ai/ai.types.ts index 29c8d6f2..7c11f55e 100644 --- a/apps/server/src/integrations/ai/ai.types.ts +++ b/apps/server/src/integrations/ai/ai.types.ts @@ -35,6 +35,13 @@ export interface AiProviderSettings { // Chat provider implementation for the `openai` driver. Unset → defaults to // 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle. chatApiStyle?: ChatApiStyle; + // Admin-configured chat model context-window size, in tokens. There is no + // provider-independent way to discover this (OpenAI's /v1/models usually omits + // it, Gemini/Ollama/OpenRouter each expose it differently), so it is entered + // manually. Surfaced to the chat client (via assistant message metadata) as the + // denominator of the header "current / max" context badge. Empty/0 = no limit + // known → the badge shows only the current context size. + chatContextWindow?: number; embeddingModel?: string; baseUrl?: string; // Embedding-specific base URL. Falls back to `baseUrl` when empty/unset. @@ -73,6 +80,7 @@ export const PROVIDER_SETTINGS_KEYS = [ 'driver', 'chatModel', 'chatApiStyle', + 'chatContextWindow', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', @@ -98,6 +106,10 @@ export const PROVIDER_SETTINGS_KEYS = [ export interface ResolvedAiConfig extends Partial { driver?: AiDriver; chatModel?: string; + // Admin-configured chat context-window size (tokens); 0/unset = no limit. Used + // as the header context-badge denominator. Re-declared for parity with the + // explicit fields above. + chatContextWindow?: number; // Cheap model id for the public-share assistant; reuses the chat creds. publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts (empty/unset @@ -117,6 +129,8 @@ export interface MaskedAiSettings { driver?: AiDriver; chatModel?: string; chatApiStyle?: ChatApiStyle; + // Admin-configured chat context-window size (tokens); 0/unset = no limit. + chatContextWindow?: number; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; diff --git a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts index 53aa8220..f2156213 100644 --- a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts +++ b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts @@ -1,4 +1,4 @@ -import { IsIn, IsOptional, IsString } from 'class-validator'; +import { IsIn, IsInt, IsOptional, IsString, Min } from 'class-validator'; import { AI_DRIVERS, AiDriver, @@ -29,6 +29,13 @@ export class UpdateAiSettingsDto { @IsIn(CHAT_API_STYLES) chatApiStyle?: ChatApiStyle; + // Chat model context-window size in tokens (header context-badge denominator). + // 0 (or empty) clears the limit so the badge shows only the current context. + @IsOptional() + @IsInt() + @Min(0) + chatContextWindow?: number; + @IsOptional() @IsString() embeddingModel?: string; diff --git a/apps/server/test/integration/workspace-repo-ai-provider-settings.int-spec.ts b/apps/server/test/integration/workspace-repo-ai-provider-settings.int-spec.ts new file mode 100644 index 00000000..90afc036 --- /dev/null +++ b/apps/server/test/integration/workspace-repo-ai-provider-settings.int-spec.ts @@ -0,0 +1,91 @@ +import { Kysely, sql } from 'kysely'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { getTestDb, destroyTestDb, createWorkspace } from './db'; + +/** + * WorkspaceRepo.updateAiProviderSettings numeric round-trip (#189, #213). + * + * `chatContextWindow` is the first NUMERIC provider field routed through this + * generic SQL layer. The patch builder must cast a JS number so it lands in + * jsonb as a JSON NUMBER, not the JSON STRING `"200000"` — the client guards + * (`typeof === "number"`) reject a string, silently killing the `/ max` badge + * denominator. A plain `::text` cast (the prior code) regressed exactly this. + * These specs are real SQL and assert both the JS value type and the on-disk + * `jsonb_typeof`. + */ +describe('WorkspaceRepo.updateAiProviderSettings (numeric round-trip) [integration]', () => { + let db: Kysely; + let repo: WorkspaceRepo; + + beforeAll(() => { + db = getTestDb(); + repo = new WorkspaceRepo(db as any); + }); + + afterAll(async () => { + await destroyTestDb(); + }); + + it('stores chatContextWindow as a JSON number (not a "200000" string)', async () => { + const ws = await createWorkspace(db, { settings: undefined }); + + const updated = await repo.updateAiProviderSettings(ws.id, { + driver: 'openai', + chatModel: 'gpt-4o', + chatContextWindow: 200000, + }); + + // Returned row: the number survives as a real JS number, alongside the + // string fields which stay strings. + const provider = (updated.settings as any)?.ai?.provider; + expect(provider.chatContextWindow).toBe(200000); + expect(typeof provider.chatContextWindow).toBe('number'); + expect(provider.driver).toBe('openai'); + expect(provider.chatModel).toBe('gpt-4o'); + + // On disk: the jsonb value is typed 'number' (the must-fix assertion), and + // sibling string fields are typed 'string'. + const typed = await db + .selectFrom('workspaces') + .select([ + sql`jsonb_typeof(settings->'ai'->'provider'->'chatContextWindow')`.as( + 'windowType', + ), + sql`jsonb_typeof(settings->'ai'->'provider'->'chatModel')`.as( + 'modelType', + ), + ]) + .where('id', '=', ws.id) + .executeTakeFirstOrThrow(); + + expect(typed.windowType).toBe('number'); + expect(typed.modelType).toBe('string'); + }); + + it('re-reads chatContextWindow as a number after a partial-merge update', async () => { + const ws = await createWorkspace(db, { + settings: { ai: { provider: { driver: 'openai', chatModel: 'x' } } }, + }); + + // Merge in only the numeric field; siblings must be preserved and the value + // must still be a JSON number, not a string. + await repo.updateAiProviderSettings(ws.id, { chatContextWindow: 128000 }); + + const row = await db + .selectFrom('workspaces') + .select([ + 'settings', + sql`jsonb_typeof(settings->'ai'->'provider'->'chatContextWindow')`.as( + 'windowType', + ), + ]) + .where('id', '=', ws.id) + .executeTakeFirstOrThrow(); + + expect(row.windowType).toBe('number'); + const provider = (row.settings as any)?.ai?.provider; + expect(provider.chatContextWindow).toBe(128000); + expect(provider.driver).toBe('openai'); + expect(provider.chatModel).toBe('x'); + }); +});