feat(ai-chat): header badge shows current/max context, max from AI settings (#189)
The floating chat window's header badge flipped meaning — a live per-turn token counter while streaming, the persisted context size at rest — so it "reset to 1" on each prompt and conflated two different numbers. Replace it with a stable "current / max" context badge (e.g. `572 / 200k`). The live "Thinking · N tokens" inside the chat body stays; only the duplicate live counter is removed from the header. Max comes from a new admin setting "Context window (tokens)". The server resolves it and attaches `maxContextTokens` to the completed assistant turn's metadata (next to contextTokens), so the badge needs no client-side model resolution and this survives public shares / per-role models. Server: - ai.types: chatContextWindow on AiProviderSettings + PROVIDER_SETTINGS_KEYS + ResolvedAiConfig + MaskedAiSettings. - workspace.repo: chatContextWindow in AI_PROVIDER_SETTINGS_ALLOWED (parity). - update-ai-settings.dto: @IsInt @Min(0) chatContextWindow. - ai-settings.service: coerce the ::text-stored value to a positive int in resolve()/getMasked(). - ai-chat.service: flushAssistant writes metadata.maxContextTokens (>0); the completed turn passes resolved.chatContextWindow. Client: - ai-chat.types: maxContextTokens on the message-row metadata. - ai-chat-window: read maxContextTokens; render "current [/ max]"; drop the liveTurnTokens state/branch and the onLiveTurnTokens prop; new tooltip. - chat-thread: remove the live-turn-token throttle effect and plumbing. - count-stream-tokens: drop the now-dead liveTurnTokens()/types; keep estimateTokens. - settings: chatContextWindow on IAiSettings(+Update) + a NumberInput in the AI provider settings form. i18n: add the badge/settings keys (en, ru); remove the two now-unused keys. Tests: flushAssistant maxContextTokens, DTO validation, trim token tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -275,11 +275,12 @@ describe('flushAssistant', () => {
|
||||
expect(f.toolCalls).not.toBeNull();
|
||||
});
|
||||
|
||||
it('completed: attaches finishReason + normalized usage + contextTokens', () => {
|
||||
it('completed: attaches finishReason + normalized usage + contextTokens + maxContextTokens', () => {
|
||||
const f = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
||||
contextTokens: 15,
|
||||
maxContextTokens: 200000,
|
||||
});
|
||||
expect(f.status).toBe('completed');
|
||||
expect(f.metadata.finishReason).toBe('stop');
|
||||
@@ -290,6 +291,23 @@ describe('flushAssistant', () => {
|
||||
reasoningTokens: undefined,
|
||||
});
|
||||
expect(f.metadata.contextTokens).toBe(15);
|
||||
expect(f.metadata.maxContextTokens).toBe(200000);
|
||||
});
|
||||
|
||||
it('completed: omits maxContextTokens when unset or 0', () => {
|
||||
// No maxContextTokens in the extra (admin set no context window).
|
||||
const f = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
contextTokens: 15,
|
||||
});
|
||||
expect('maxContextTokens' in f.metadata).toBe(false);
|
||||
// Explicit 0 is treated the same as unset (no limit -> key omitted).
|
||||
const f0 = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
contextTokens: 15,
|
||||
maxContextTokens: 0,
|
||||
});
|
||||
expect('maxContextTokens' in f0.metadata).toBe(false);
|
||||
});
|
||||
|
||||
it('error: records the error and a derived finishReason', () => {
|
||||
|
||||
@@ -616,6 +616,10 @@ export class AiChatService implements OnModuleInit {
|
||||
contextTokens:
|
||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
|
||||
undefined,
|
||||
// Max context window for the chat header badge denominator;
|
||||
// resolved from the admin-configured provider settings (in
|
||||
// closure scope here). Omitted/0 = no limit.
|
||||
maxContextTokens: resolved?.chatContextWindow,
|
||||
}),
|
||||
);
|
||||
// Lifecycle: release the external MCP clients leased for this turn.
|
||||
@@ -1212,8 +1216,9 @@ export async function applyFinalize(
|
||||
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
||||
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
||||
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
||||
* `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
|
||||
* only when provided/relevant, matching the pre-#183 onFinish/onError records.
|
||||
* `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
|
||||
* `metadata.maxContextTokens` are attached only when provided/relevant, matching
|
||||
* the pre-#183 onFinish/onError records.
|
||||
*/
|
||||
export function flushAssistant(
|
||||
capturedSteps: ReadonlyArray<StepLike> | undefined,
|
||||
@@ -1223,6 +1228,7 @@ export function flushAssistant(
|
||||
finishReason?: string;
|
||||
usage?: ChatStreamUsage | StreamUsage | undefined;
|
||||
contextTokens?: number;
|
||||
maxContextTokens?: number;
|
||||
error?: string;
|
||||
},
|
||||
): AssistantFlush {
|
||||
@@ -1253,6 +1259,8 @@ export function flushAssistant(
|
||||
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
|
||||
}
|
||||
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
|
||||
if (extra?.maxContextTokens)
|
||||
metadata.maxContextTokens = extra.maxContextTokens;
|
||||
if (extra?.error) metadata.error = extra.error;
|
||||
|
||||
return {
|
||||
|
||||
@@ -20,6 +20,7 @@ import { DB, Workspaces } from '@docmost/db/types/db';
|
||||
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatContextWindow',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
|
||||
@@ -41,3 +41,35 @@ describe('UpdateAiSettingsDto.chatApiStyle', () => {
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
/** DTO validation for the new chatContextWindow field (@IsInt @Min(0)). */
|
||||
describe('UpdateAiSettingsDto.chatContextWindow', () => {
|
||||
const errorsFor = async (chatContextWindow: unknown) =>
|
||||
validate(plainToInstance(UpdateAiSettingsDto, { chatContextWindow }));
|
||||
|
||||
it('accepts a non-negative integer (incl. 0 = clear the limit)', async () => {
|
||||
for (const v of [0, 200000]) {
|
||||
const errs = await errorsFor(v);
|
||||
expect(
|
||||
errs.find((e) => e.property === 'chatContextWindow'),
|
||||
).toBeUndefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects a negative value', async () => {
|
||||
const errs = await errorsFor(-1);
|
||||
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
|
||||
});
|
||||
|
||||
it('rejects a non-integer value', async () => {
|
||||
const errs = await errorsFor(1.5);
|
||||
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
|
||||
});
|
||||
|
||||
it('accepts the field being omitted (optional)', async () => {
|
||||
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
|
||||
expect(
|
||||
errs.find((e) => e.property === 'chatContextWindow'),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -26,6 +26,8 @@ import {
|
||||
export interface UpdateAiSettingsInput {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens for the chat header badge. 0/empty = no limit.
|
||||
chatContextWindow?: number;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
@@ -157,9 +159,20 @@ export class AiSettingsService {
|
||||
const provider = await this.readProvider(workspaceId);
|
||||
if (!provider.driver) return null;
|
||||
|
||||
// Provider values are stored as ::text (see workspace.repo.ts), so
|
||||
// chatContextWindow arrives as a string here; parse it back to a positive
|
||||
// integer or undefined.
|
||||
const ctxWindow = Number(provider.chatContextWindow);
|
||||
|
||||
const config: ResolvedAiConfig = {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
// Max context window for the chat header badge denominator. 0/unset = no
|
||||
// limit.
|
||||
chatContextWindow:
|
||||
Number.isFinite(ctxWindow) && ctxWindow > 0
|
||||
? Math.floor(ctxWindow)
|
||||
: undefined,
|
||||
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; reuses the chat
|
||||
@@ -219,6 +232,15 @@ export class AiSettingsService {
|
||||
async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
|
||||
const provider = await this.readProvider(workspaceId);
|
||||
|
||||
// Provider values are stored as ::text (see workspace.repo.ts), so
|
||||
// chatContextWindow arrives as a string; coerce it to a positive integer or
|
||||
// undefined so the client receives a real number.
|
||||
const ctxWindow = Number(provider.chatContextWindow);
|
||||
const chatContextWindow =
|
||||
Number.isFinite(ctxWindow) && ctxWindow > 0
|
||||
? Math.floor(ctxWindow)
|
||||
: undefined;
|
||||
|
||||
let hasApiKey = false;
|
||||
let hasEmbeddingApiKey = false;
|
||||
let hasSttApiKey = false;
|
||||
@@ -243,6 +265,7 @@ export class AiSettingsService {
|
||||
return {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
chatContextWindow,
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
embeddingModel: provider.embeddingModel,
|
||||
baseUrl: provider.baseUrl,
|
||||
|
||||
@@ -32,6 +32,9 @@ export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
|
||||
export interface AiProviderSettings {
|
||||
driver: AiDriver;
|
||||
chatModel: string;
|
||||
// Max context window in tokens; surfaced to the chat header badge as the
|
||||
// denominator ("current / max"). 0/unset = no limit (badge shows no denominator).
|
||||
chatContextWindow?: number;
|
||||
// Chat provider implementation for the `openai` driver. Unset → defaults to
|
||||
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
@@ -72,6 +75,7 @@ export interface AiProviderSettings {
|
||||
export const PROVIDER_SETTINGS_KEYS = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatContextWindow',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
@@ -98,6 +102,9 @@ export const PROVIDER_SETTINGS_KEYS = [
|
||||
export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens; surfaced to the chat header badge as the
|
||||
// "current / max" denominator. 0/unset = no limit.
|
||||
chatContextWindow?: number;
|
||||
// Cheap model id for the public-share assistant; reuses the chat creds.
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts (empty/unset
|
||||
@@ -116,6 +123,9 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
export interface MaskedAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens; the chat header badge denominator. 0/unset =
|
||||
// no limit.
|
||||
chatContextWindow?: number;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { IsIn, IsOptional, IsString } from 'class-validator';
|
||||
import { IsIn, IsInt, IsOptional, IsString, Min } from 'class-validator';
|
||||
import {
|
||||
AI_DRIVERS,
|
||||
AiDriver,
|
||||
@@ -25,6 +25,13 @@ export class UpdateAiSettingsDto {
|
||||
@IsString()
|
||||
chatModel?: string;
|
||||
|
||||
// Max context window in tokens shown in the chat header badge. 0/empty =
|
||||
// clear the limit (no denominator shown).
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(0)
|
||||
chatContextWindow?: number;
|
||||
|
||||
@IsOptional()
|
||||
@IsIn(CHAT_API_STYLES)
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
|
||||
Reference in New Issue
Block a user