feat(ai-chat): header badge shows current/max context, max from AI settings (#189)
The floating chat window's header badge flipped meaning — a live per-turn token counter while streaming, the persisted context size at rest — so it "reset to 1" on each prompt and conflated two different numbers. Replace it with a stable "current / max" context badge (e.g. `572 / 200k`). The live "Thinking · N tokens" inside the chat body stays; only the duplicate live counter is removed from the header. Max comes from a new admin setting "Context window (tokens)". The server resolves it and attaches `maxContextTokens` to the completed assistant turn's metadata (next to contextTokens), so the badge needs no client-side model resolution and this survives public shares / per-role models. Server: - ai.types: chatContextWindow on AiProviderSettings + PROVIDER_SETTINGS_KEYS + ResolvedAiConfig + MaskedAiSettings. - workspace.repo: chatContextWindow in AI_PROVIDER_SETTINGS_ALLOWED (parity). - update-ai-settings.dto: @IsInt @Min(0) chatContextWindow. - ai-settings.service: coerce the ::text-stored value to a positive int in resolve()/getMasked(). - ai-chat.service: flushAssistant writes metadata.maxContextTokens (>0); the completed turn passes resolved.chatContextWindow. Client: - ai-chat.types: maxContextTokens on the message-row metadata. - ai-chat-window: read maxContextTokens; render "current [/ max]"; drop the liveTurnTokens state/branch and the onLiveTurnTokens prop; new tooltip. - chat-thread: remove the live-turn-token throttle effect and plumbing. - count-stream-tokens: drop the now-dead liveTurnTokens()/types; keep estimateTokens. - settings: chatContextWindow on IAiSettings(+Update) + a NumberInput in the AI provider settings form. i18n: add the badge/settings keys (en, ru); remove the two now-unused keys. Tests: flushAssistant maxContextTokens, DTO validation, trim token tests. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1169,8 +1169,9 @@
|
|||||||
"Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.",
|
"Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.",
|
||||||
"Built-in assistant persona": "Built-in assistant persona",
|
"Built-in assistant persona": "Built-in assistant persona",
|
||||||
"Minimize": "Minimize",
|
"Minimize": "Minimize",
|
||||||
"Current context size": "Current context size",
|
"Context size / model limit": "Context size / model limit",
|
||||||
"Tokens generated this turn": "Tokens generated this turn",
|
"Context window (tokens)": "Context window (tokens)",
|
||||||
|
"Shown as used / total in the chat header. Leave empty to hide the limit.": "Shown as used / total in the chat header. Leave empty to hide the limit.",
|
||||||
"AI agent": "AI agent",
|
"AI agent": "AI agent",
|
||||||
"Take a look at the current document": "Take a look at the current document",
|
"Take a look at the current document": "Take a look at the current document",
|
||||||
"AI agent is typing…": "AI agent is typing…",
|
"AI agent is typing…": "AI agent is typing…",
|
||||||
|
|||||||
@@ -704,8 +704,9 @@
|
|||||||
"Ask the AI agent…": "Спросите AI-агента…",
|
"Ask the AI agent…": "Спросите AI-агента…",
|
||||||
"Copy chat": "Копировать чат",
|
"Copy chat": "Копировать чат",
|
||||||
"Created successfully": "Успешно создано",
|
"Created successfully": "Успешно создано",
|
||||||
"Current context size": "Текущий размер контекста",
|
"Context size / model limit": "Размер контекста / лимит модели",
|
||||||
"Tokens generated this turn": "Токенов сгенерировано за ход",
|
"Context window (tokens)": "Окно контекста (токены)",
|
||||||
|
"Shown as used / total in the chat header. Leave empty to hide the limit.": "Показывается в шапке чата как использовано / всего. Пусто — лимит скрыт.",
|
||||||
"Delete this chat?": "Удалить этот чат?",
|
"Delete this chat?": "Удалить этот чат?",
|
||||||
"Deleted successfully": "Успешно удалено",
|
"Deleted successfully": "Успешно удалено",
|
||||||
"Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}",
|
"Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}",
|
||||||
|
|||||||
@@ -161,12 +161,6 @@ export default function AiChatWindow() {
|
|||||||
const { data: messageRows, isLoading: messagesLoading } =
|
const { data: messageRows, isLoading: messagesLoading } =
|
||||||
useAiChatMessagesQuery(activeChatId ?? undefined);
|
useAiChatMessagesQuery(activeChatId ?? undefined);
|
||||||
|
|
||||||
// Live turn-token total (reasoning + output) for the in-flight turn, pushed up
|
|
||||||
// (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream.
|
|
||||||
// `null` means no turn is in flight -> the badge falls back to the persisted
|
|
||||||
// context size below.
|
|
||||||
const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
|
|
||||||
|
|
||||||
// The page the user is currently viewing. AiChatWindow lives in a pathless
|
// The page the user is currently viewing. AiChatWindow lives in a pathless
|
||||||
// parent layout route, so useParams() can't see :pageSlug. Match the full
|
// parent layout route, so useParams() can't see :pageSlug. Match the full
|
||||||
// pathname against the authenticated page route instead so "the current page"
|
// pathname against the authenticated page route instead so "the current page"
|
||||||
@@ -306,6 +300,25 @@ export default function AiChatWindow() {
|
|||||||
return 0;
|
return 0;
|
||||||
}, [activeChatId, messageRows]);
|
}, [activeChatId, messageRows]);
|
||||||
|
|
||||||
|
// The model's max context window (badge denominator). Read the most recent row
|
||||||
|
// carrying `maxContextTokens` (set alongside contextTokens on a completed
|
||||||
|
// turn); 0 when no row has it (older rows, or no admin-configured limit) — the
|
||||||
|
// badge then shows just the current size with no denominator.
|
||||||
|
const maxContextTokens = useMemo(() => {
|
||||||
|
if (!activeChatId || !messageRows) return 0;
|
||||||
|
for (let i = messageRows.length - 1; i >= 0; i--) {
|
||||||
|
const meta = messageRows[i].metadata;
|
||||||
|
if (!meta) continue;
|
||||||
|
if (
|
||||||
|
typeof meta.maxContextTokens === "number" &&
|
||||||
|
meta.maxContextTokens > 0
|
||||||
|
) {
|
||||||
|
return meta.maxContextTokens;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}, [activeChatId, messageRows]);
|
||||||
|
|
||||||
// On (re)open, settle the geometry before paint (useLayoutEffect → no
|
// On (re)open, settle the geometry before paint (useLayoutEffect → no
|
||||||
// first-frame jump): compute an initial top-right placement the first time,
|
// first-frame jump): compute an initial top-right placement the first time,
|
||||||
// and re-clamp an existing geometry to the current viewport on later opens
|
// and re-clamp an existing geometry to the current viewport on later opens
|
||||||
@@ -495,20 +508,17 @@ export default function AiChatWindow() {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
|
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
|
||||||
{/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz);
|
{/* Always show the persisted "current / max" context. The denominator
|
||||||
once it finishes, fall back to the persisted context size. Require
|
(the admin-configured model limit) is appended only when known;
|
||||||
> 0 so the very first emit (an empty tail message, count 0) does not
|
not clamped when current > max (shown as-is, e.g. "210k / 200k").
|
||||||
flash a "0" badge before any token streams in (#151 review). */}
|
Hidden entirely until a turn has recorded a context figure. */}
|
||||||
{liveTurnTokens !== null && liveTurnTokens > 0 ? (
|
{contextTokens > 0 ? (
|
||||||
<Tooltip label={t("Tokens generated this turn")} withArrow>
|
<Tooltip label={t("Context size / model limit")} withArrow>
|
||||||
<span className={classes.badge}>
|
|
||||||
{formatTokens(liveTurnTokens)}
|
|
||||||
</span>
|
|
||||||
</Tooltip>
|
|
||||||
) : contextTokens > 0 ? (
|
|
||||||
<Tooltip label={t("Current context size")} withArrow>
|
|
||||||
<span className={classes.badge}>
|
<span className={classes.badge}>
|
||||||
{formatTokens(contextTokens)}
|
{formatTokens(contextTokens)}
|
||||||
|
{maxContextTokens > 0
|
||||||
|
? ` / ${formatTokens(maxContextTokens)}`
|
||||||
|
: ""}
|
||||||
</span>
|
</span>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
) : null}
|
) : null}
|
||||||
@@ -634,7 +644,6 @@ export default function AiChatWindow() {
|
|||||||
assistantName={currentRole?.name}
|
assistantName={currentRole?.name}
|
||||||
onTurnFinished={onTurnFinished}
|
onTurnFinished={onTurnFinished}
|
||||||
onServerChatId={onServerChatId}
|
onServerChatId={onServerChatId}
|
||||||
onLiveTurnTokens={setLiveTurnTokens}
|
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ import {
|
|||||||
} from "@/features/ai-chat/utils/role-launch.ts";
|
} from "@/features/ai-chat/utils/role-launch.ts";
|
||||||
import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
|
import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
|
||||||
import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
|
import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
|
||||||
import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
|
||||||
import {
|
import {
|
||||||
dequeue,
|
dequeue,
|
||||||
enqueueMessage,
|
enqueueMessage,
|
||||||
@@ -67,12 +66,6 @@ interface ChatThreadProps {
|
|||||||
* Copy/export button available mid-stream). Distinct from onTurnFinished,
|
* Copy/export button available mid-stream). Distinct from onTurnFinished,
|
||||||
* which fires only at the terminal outcome. */
|
* which fires only at the terminal outcome. */
|
||||||
onServerChatId?: (serverChatId?: string) => void;
|
onServerChatId?: (serverChatId?: string) => void;
|
||||||
/** Reports the live turn-token total (reasoning + output) for the in-flight
|
|
||||||
* turn so the parent can show a header badge that ticks mid-stream. THROTTLED
|
|
||||||
* here (~8 Hz) so the parent re-renders a handful of times a second, not on
|
|
||||||
* every streamed delta. Called with `null` when no turn is in flight (the
|
|
||||||
* parent then reverts the badge to the persisted context size). */
|
|
||||||
onLiveTurnTokens?: (tokens: number | null) => void;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -117,7 +110,6 @@ export default function ChatThread({
|
|||||||
assistantName,
|
assistantName,
|
||||||
onTurnFinished,
|
onTurnFinished,
|
||||||
onServerChatId,
|
onServerChatId,
|
||||||
onLiveTurnTokens,
|
|
||||||
}: ChatThreadProps) {
|
}: ChatThreadProps) {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
|
|
||||||
@@ -328,53 +320,6 @@ export default function ChatThread({
|
|||||||
// the SAME on-screen banner text can be mirrored into the export (issue #160).
|
// the SAME on-screen banner text can be mirrored into the export (issue #160).
|
||||||
const errorView = error ? describeChatError(error.message ?? "", t) : null;
|
const errorView = error ? describeChatError(error.message ?? "", t) : null;
|
||||||
|
|
||||||
// Report the live turn-token total to the parent header badge, THROTTLED to
|
|
||||||
// ~8 Hz so the parent re-renders a few times a second instead of on every
|
|
||||||
// streamed delta. The tail assistant message's reasoning+output (estimate while
|
|
||||||
// streaming, authoritative once a step reports usage) is the live figure. When
|
|
||||||
// the turn ends we emit a final exact value, then `null` so the parent reverts
|
|
||||||
// the badge to the persisted context size.
|
|
||||||
const lastEmitRef = useRef(0);
|
|
||||||
const emitTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
|
||||||
useEffect(() => {
|
|
||||||
if (!onLiveTurnTokens) return;
|
|
||||||
if (!isStreaming) {
|
|
||||||
// Turn ended (or never started): clear any pending throttle and revert.
|
|
||||||
if (emitTimerRef.current) {
|
|
||||||
clearTimeout(emitTimerRef.current);
|
|
||||||
emitTimerRef.current = null;
|
|
||||||
}
|
|
||||||
lastEmitRef.current = 0;
|
|
||||||
onLiveTurnTokens(null);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const tail = messages[messages.length - 1];
|
|
||||||
const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null;
|
|
||||||
const total = live ? live.reasoning + live.output : 0;
|
|
||||||
const now = Date.now();
|
|
||||||
const MIN_INTERVAL = 120; // ms (~8 Hz)
|
|
||||||
const elapsed = now - lastEmitRef.current;
|
|
||||||
if (elapsed >= MIN_INTERVAL) {
|
|
||||||
lastEmitRef.current = now;
|
|
||||||
onLiveTurnTokens(total);
|
|
||||||
} else if (!emitTimerRef.current) {
|
|
||||||
// Schedule a trailing emit so the FINAL value of a burst is not dropped.
|
|
||||||
emitTimerRef.current = setTimeout(() => {
|
|
||||||
emitTimerRef.current = null;
|
|
||||||
lastEmitRef.current = Date.now();
|
|
||||||
onLiveTurnTokens(total);
|
|
||||||
}, MIN_INTERVAL - elapsed);
|
|
||||||
}
|
|
||||||
}, [messages, isStreaming, onLiveTurnTokens]);
|
|
||||||
|
|
||||||
// Clear any pending throttle timer on unmount (chat switch via `key`) so a
|
|
||||||
// trailing emit can't fire into a torn-down thread's parent.
|
|
||||||
useEffect(() => {
|
|
||||||
return () => {
|
|
||||||
if (emitTimerRef.current) clearTimeout(emitTimerRef.current);
|
|
||||||
};
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
// A role was picked with autoStart=false: the role is bound but NOTHING was
|
// A role was picked with autoStart=false: the role is bound but NOTHING was
|
||||||
// sent, so chatId stays null and the empty state would keep showing the cards.
|
// sent, so chatId stays null and the empty state would keep showing the cards.
|
||||||
// This flag hides the cards and reveals the composer (with the role indicated)
|
// This flag hides the cards and reveals the composer (with the role indicated)
|
||||||
|
|||||||
@@ -116,6 +116,9 @@ export interface IAiChatMessageRow {
|
|||||||
// turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
|
// turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
|
||||||
// floating window's header badge.
|
// floating window's header badge.
|
||||||
contextTokens?: number;
|
contextTokens?: number;
|
||||||
|
// The model's max context window (denominator for the header badge); set
|
||||||
|
// alongside contextTokens on a completed turn; absent on older rows.
|
||||||
|
maxContextTokens?: number;
|
||||||
// Set on an assistant row whose turn ended in a provider/stream error; the
|
// Set on an assistant row whose turn ended in a provider/stream error; the
|
||||||
// raw provider error text (e.g. "402: ...") for inline display in the thread.
|
// raw provider error text (e.g. "402: ...") for inline display in the thread.
|
||||||
error?: string;
|
error?: string;
|
||||||
|
|||||||
@@ -1,17 +1,5 @@
|
|||||||
import { describe, expect, it } from "vitest";
|
import { describe, expect, it } from "vitest";
|
||||||
import type { UIMessage } from "@ai-sdk/react";
|
import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
||||||
import {
|
|
||||||
estimateTokens,
|
|
||||||
liveTurnTokens,
|
|
||||||
} from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
|
||||||
|
|
||||||
const msg = (parts: unknown[], metadata?: unknown): UIMessage =>
|
|
||||||
({
|
|
||||||
id: Math.random().toString(),
|
|
||||||
role: "assistant",
|
|
||||||
parts,
|
|
||||||
metadata,
|
|
||||||
}) as UIMessage;
|
|
||||||
|
|
||||||
describe("estimateTokens", () => {
|
describe("estimateTokens", () => {
|
||||||
it("returns 0 for the empty string", () => {
|
it("returns 0 for the empty string", () => {
|
||||||
@@ -25,147 +13,3 @@ describe("estimateTokens", () => {
|
|||||||
expect(estimateTokens("12345678")).toBe(2);
|
expect(estimateTokens("12345678")).toBe(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("liveTurnTokens — estimate path", () => {
|
|
||||||
it("is all zeros for an undefined message", () => {
|
|
||||||
expect(liveTurnTokens(undefined)).toEqual({
|
|
||||||
reasoning: 0,
|
|
||||||
output: 0,
|
|
||||||
authoritative: false,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("is all zeros for a parts-less message", () => {
|
|
||||||
expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({
|
|
||||||
reasoning: 0,
|
|
||||||
output: 0,
|
|
||||||
authoritative: false,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("estimates output from text parts", () => {
|
|
||||||
// 8 chars -> 2 tokens.
|
|
||||||
const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }]));
|
|
||||||
expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("estimates reasoning from reasoning parts (kept separate from output)", () => {
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([
|
|
||||||
{ type: "reasoning", text: "12345678" },
|
|
||||||
{ type: "text", text: "abcd" },
|
|
||||||
]),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("accumulates across multiple text + reasoning parts (multi-step)", () => {
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([
|
|
||||||
{ type: "reasoning", text: "abcd" }, // 1
|
|
||||||
{ type: "text", text: "abcd" }, // 1
|
|
||||||
{ type: "tool-getPage", state: "output-available" }, // ignored
|
|
||||||
{ type: "reasoning", text: "abcd" }, // 1
|
|
||||||
{ type: "text", text: "abcdefgh" }, // 2
|
|
||||||
]),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("ignores non text/reasoning parts (tools, step-start)", () => {
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([
|
|
||||||
{ type: "step-start" },
|
|
||||||
{ type: "tool-getPage", state: "input-available" },
|
|
||||||
]),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false });
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("liveTurnTokens — authoritative path", () => {
|
|
||||||
it("returns authoritative usage verbatim, splitting reasoning out of output", () => {
|
|
||||||
// outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30.
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "text", text: "estimate would be tiny" }], {
|
|
||||||
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("treats missing reasoningTokens as 0 and keeps full output", () => {
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "text", text: "x" }], {
|
|
||||||
usage: { inputTokens: 10, outputTokens: 42 },
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("never returns a negative output when reasoning exceeds reported output", () => {
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("falls back to the estimate when metadata has no usage object", () => {
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "text", text: "abcd" }], { chatId: "c1" }),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false });
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("liveTurnTokens — combined authoritative + estimate (#163)", () => {
|
|
||||||
it("ticks the in-flight step above the completed-steps authoritative base", () => {
|
|
||||||
// The authoritative usage is the sum over COMPLETED steps (step 1). The
|
|
||||||
// CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in
|
|
||||||
// the parts -> the running estimate must push the live figure above the base
|
|
||||||
// so the badge keeps growing between step boundaries.
|
|
||||||
const longText = "x".repeat(800); // 800 chars -> 200 est output tokens
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "text", text: longText }], {
|
|
||||||
usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
// max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen.
|
|
||||||
expect(r.output).toBe(200);
|
|
||||||
expect(r.authoritative).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => {
|
|
||||||
const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "reasoning", text: longReasoning }], {
|
|
||||||
usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 },
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
// reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0.
|
|
||||||
expect(r.reasoning).toBe(100);
|
|
||||||
expect(r.output).toBe(0);
|
|
||||||
expect(r.authoritative).toBe(true);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("snaps to the authoritative figure once it exceeds the rough estimate", () => {
|
|
||||||
// Short on-screen text (estimate tiny) but a large authoritative output:
|
|
||||||
// the exact figure wins at the boundary (the counter never under-reports).
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "text", text: "abcd" }], {
|
|
||||||
usage: { inputTokens: 10, outputTokens: 5000 },
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
expect(r.output).toBe(5000);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => {
|
|
||||||
// Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged.
|
|
||||||
const r = liveTurnTokens(
|
|
||||||
msg([{ type: "text", text: "tiny" }], {
|
|
||||||
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|||||||
@@ -1,18 +1,11 @@
|
|||||||
import type { UIMessage } from "@ai-sdk/react";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Live token counting for a streaming AI-chat turn — split into REASONING
|
* Rough client-side token estimation for AI-chat UI affordances.
|
||||||
* (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows
|
|
||||||
* `Thinking… · 60 tokens` next to its thinking indicator.
|
|
||||||
*
|
*
|
||||||
* No provider streams exact per-token usage mid-stream, so the live number is a
|
* No provider streams exact per-token usage mid-stream, so any in-flight figure
|
||||||
* CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage
|
* is a CLIENT ESTIMATE (chars/≈4 heuristic). Pure + unit-testable: it never runs
|
||||||
* once the server attaches it on a step/turn boundary (see the server's
|
* a real BPE tokenizer (that would be O(n²) on the hot path, bloat the bundle,
|
||||||
* `chatStreamMetadata` + the client's read of `message.metadata.usage`). When
|
* and be wrong for Gemini/Ollama anyway). Used by the in-body reasoning counter
|
||||||
* authoritative usage is present we return it verbatim (the number "jumps to
|
* ("Thinking · N tokens").
|
||||||
* exact"); otherwise we return the running estimate. Pure + unit-testable: it
|
|
||||||
* never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the
|
|
||||||
* bundle, and be wrong for Gemini/Ollama anyway).
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -24,90 +17,3 @@ export function estimateTokens(text: string): number {
|
|||||||
if (!text) return 0;
|
if (!text) return 0;
|
||||||
return Math.ceil(text.length / 4);
|
return Math.ceil(text.length / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Authoritative per-step/turn usage the server attaches to message metadata. */
|
|
||||||
export interface AuthoritativeUsage {
|
|
||||||
inputTokens?: number;
|
|
||||||
outputTokens?: number;
|
|
||||||
totalTokens?: number;
|
|
||||||
reasoningTokens?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Live token split for a turn's tail (streaming) assistant message. */
|
|
||||||
export interface LiveTurnTokens {
|
|
||||||
/** Thinking/reasoning tokens (estimate, or authoritative when available). */
|
|
||||||
reasoning: number;
|
|
||||||
/** Answer/output tokens (estimate, or authoritative when available). */
|
|
||||||
output: number;
|
|
||||||
/** True when the numbers come from authoritative server usage, not estimate. */
|
|
||||||
authoritative: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */
|
|
||||||
function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined {
|
|
||||||
const meta = message?.metadata as
|
|
||||||
| { usage?: AuthoritativeUsage }
|
|
||||||
| undefined;
|
|
||||||
const usage = meta?.usage;
|
|
||||||
if (!usage || typeof usage !== "object") return undefined;
|
|
||||||
return usage;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Token split for the given (streaming) assistant message.
|
|
||||||
*
|
|
||||||
* COMBINES the authoritative server usage with the running text estimate so the
|
|
||||||
* counter ticks in real time AND lands exact. The server only attaches
|
|
||||||
* `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is
|
|
||||||
* CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step.
|
|
||||||
* So a multi-step turn that returned the authoritative figure verbatim would
|
|
||||||
* FREEZE between boundaries and jump in steps (issue #163).
|
|
||||||
*
|
|
||||||
* Instead we always compute the running ESTIMATE (chars/≈4 over the message's
|
|
||||||
* `reasoning`/`text` parts, which grows on every streamed delta) and take the
|
|
||||||
* per-component MAX of the authoritative base and the estimate:
|
|
||||||
* - between boundaries the estimate of the in-flight step ticks the number up;
|
|
||||||
* - at a boundary the authoritative figure snaps it to exact;
|
|
||||||
* - because the server's usage is cumulative and we only ever take the max, the
|
|
||||||
* number is MONOTONIC — it never drops.
|
|
||||||
*
|
|
||||||
* Providers that don't stream reasoning text still surface a reasoning count once
|
|
||||||
* the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure
|
|
||||||
* estimate path (no usage yet) such a turn shows `reasoning: 0` until then.
|
|
||||||
*/
|
|
||||||
export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens {
|
|
||||||
if (!message) return { reasoning: 0, output: 0, authoritative: false };
|
|
||||||
|
|
||||||
// Running ESTIMATE over every reasoning/text part — grows on each delta. This
|
|
||||||
// includes the IN-FLIGHT step, which the authoritative usage does not cover yet.
|
|
||||||
let estReasoning = 0;
|
|
||||||
let estOutput = 0;
|
|
||||||
for (const part of message.parts ?? []) {
|
|
||||||
if (part.type === "reasoning") {
|
|
||||||
estReasoning += estimateTokens((part as { text?: string }).text ?? "");
|
|
||||||
} else if (part.type === "text") {
|
|
||||||
estOutput += estimateTokens((part as { text?: string }).text ?? "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const usage = metadataUsage(message);
|
|
||||||
if (!usage) {
|
|
||||||
// No authoritative usage streamed yet: the estimate IS the live figure.
|
|
||||||
return { reasoning: estReasoning, output: estOutput, authoritative: false };
|
|
||||||
}
|
|
||||||
|
|
||||||
// Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES
|
|
||||||
// reasoning in the AI SDK usage shape, so subtract it out for the "answer"
|
|
||||||
// figure (never go negative if a provider reports them inconsistently).
|
|
||||||
const authReasoning = usage.reasoningTokens ?? 0;
|
|
||||||
const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning);
|
|
||||||
|
|
||||||
// Per-component max: the in-flight step's estimate ticks above the completed-
|
|
||||||
// steps base between boundaries, and the authoritative figure wins once it
|
|
||||||
// exceeds the (rough) estimate at the next boundary. Monotonic by construction.
|
|
||||||
return {
|
|
||||||
reasoning: Math.max(authReasoning, estReasoning),
|
|
||||||
output: Math.max(authOutput, estOutput),
|
|
||||||
authoritative: true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import {
|
|||||||
Button,
|
Button,
|
||||||
Group,
|
Group,
|
||||||
Modal,
|
Modal,
|
||||||
|
NumberInput,
|
||||||
Paper,
|
Paper,
|
||||||
PasswordInput,
|
PasswordInput,
|
||||||
Select,
|
Select,
|
||||||
@@ -83,6 +84,9 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
|
|||||||
// (empty means "leave unchanged" unless explicitly cleared).
|
// (empty means "leave unchanged" unless explicitly cleared).
|
||||||
const formSchema = z.object({
|
const formSchema = z.object({
|
||||||
chatModel: z.string(),
|
chatModel: z.string(),
|
||||||
|
// Max context window in tokens shown in the chat header badge. A number, or ""
|
||||||
|
// when the NumberInput is empty (no limit).
|
||||||
|
chatContextWindow: z.union([z.number(), z.literal("")]),
|
||||||
// Chat provider implementation (reasoning surfacing). Default openai-compatible.
|
// Chat provider implementation (reasoning surfacing). Default openai-compatible.
|
||||||
chatApiStyle: z.enum(["openai-compatible", "openai"]),
|
chatApiStyle: z.enum(["openai-compatible", "openai"]),
|
||||||
// Cheap model id for the anonymous public-share assistant; empty = use chatModel.
|
// Cheap model id for the anonymous public-share assistant; empty = use chatModel.
|
||||||
@@ -311,6 +315,7 @@ export default function AiProviderSettings() {
|
|||||||
validate: zod4Resolver(formSchema),
|
validate: zod4Resolver(formSchema),
|
||||||
initialValues: {
|
initialValues: {
|
||||||
chatModel: "",
|
chatModel: "",
|
||||||
|
chatContextWindow: "",
|
||||||
chatApiStyle: "openai-compatible" as ChatApiStyle,
|
chatApiStyle: "openai-compatible" as ChatApiStyle,
|
||||||
publicShareChatModel: "",
|
publicShareChatModel: "",
|
||||||
publicShareAssistantRoleId: "",
|
publicShareAssistantRoleId: "",
|
||||||
@@ -334,6 +339,7 @@ export default function AiProviderSettings() {
|
|||||||
if (!settings) return;
|
if (!settings) return;
|
||||||
form.setValues({
|
form.setValues({
|
||||||
chatModel: settings.chatModel ?? "",
|
chatModel: settings.chatModel ?? "",
|
||||||
|
chatContextWindow: settings.chatContextWindow ?? "",
|
||||||
chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
|
chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
|
||||||
publicShareChatModel: settings.publicShareChatModel ?? "",
|
publicShareChatModel: settings.publicShareChatModel ?? "",
|
||||||
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
|
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
|
||||||
@@ -364,6 +370,12 @@ export default function AiProviderSettings() {
|
|||||||
// Everything is OpenAI-compatible.
|
// Everything is OpenAI-compatible.
|
||||||
driver: "openai",
|
driver: "openai",
|
||||||
chatModel: values.chatModel,
|
chatModel: values.chatModel,
|
||||||
|
// Max context window for the chat header badge; empty NumberInput ("") →
|
||||||
|
// 0, which clears the limit server-side (no denominator shown).
|
||||||
|
chatContextWindow:
|
||||||
|
typeof values.chatContextWindow === "number"
|
||||||
|
? values.chatContextWindow
|
||||||
|
: 0,
|
||||||
chatApiStyle: values.chatApiStyle,
|
chatApiStyle: values.chatApiStyle,
|
||||||
// Cheap model id for the anonymous public-share assistant; empty falls
|
// Cheap model id for the anonymous public-share assistant; empty falls
|
||||||
// back to chatModel server-side.
|
// back to chatModel server-side.
|
||||||
@@ -767,6 +779,18 @@ export default function AiProviderSettings() {
|
|||||||
{t("Resolves to {{url}}", { url: chatResolved })}
|
{t("Resolves to {{url}}", { url: chatResolved })}
|
||||||
</Text>
|
</Text>
|
||||||
|
|
||||||
|
<NumberInput
|
||||||
|
mt="sm"
|
||||||
|
label={t("Context window (tokens)")}
|
||||||
|
description={t(
|
||||||
|
"Shown as used / total in the chat header. Leave empty to hide the limit.",
|
||||||
|
)}
|
||||||
|
min={0}
|
||||||
|
allowDecimal={false}
|
||||||
|
disabled={isLoading}
|
||||||
|
{...form.getInputProps("chatContextWindow")}
|
||||||
|
/>
|
||||||
|
|
||||||
<Select
|
<Select
|
||||||
mt="sm"
|
mt="sm"
|
||||||
label={t("Protocol")}
|
label={t("Protocol")}
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ export type ChatApiStyle = "openai-compatible" | "openai";
|
|||||||
export interface IAiSettings {
|
export interface IAiSettings {
|
||||||
driver?: AiDriver;
|
driver?: AiDriver;
|
||||||
chatModel?: string;
|
chatModel?: string;
|
||||||
|
// Max context window in tokens shown in the chat header badge; 0/unset = no limit.
|
||||||
|
chatContextWindow?: number;
|
||||||
chatApiStyle?: ChatApiStyle;
|
chatApiStyle?: ChatApiStyle;
|
||||||
// Cheap model id for the anonymous public-share assistant; empty = chatModel.
|
// Cheap model id for the anonymous public-share assistant; empty = chatModel.
|
||||||
publicShareChatModel?: string;
|
publicShareChatModel?: string;
|
||||||
@@ -56,6 +58,8 @@ export interface IAiSettings {
|
|||||||
export interface IAiSettingsUpdate {
|
export interface IAiSettingsUpdate {
|
||||||
driver?: AiDriver;
|
driver?: AiDriver;
|
||||||
chatModel?: string;
|
chatModel?: string;
|
||||||
|
// Max context window in tokens for the chat header badge; 0 = clear the limit.
|
||||||
|
chatContextWindow?: number;
|
||||||
chatApiStyle?: ChatApiStyle;
|
chatApiStyle?: ChatApiStyle;
|
||||||
publicShareChatModel?: string;
|
publicShareChatModel?: string;
|
||||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||||
|
|||||||
@@ -275,11 +275,12 @@ describe('flushAssistant', () => {
|
|||||||
expect(f.toolCalls).not.toBeNull();
|
expect(f.toolCalls).not.toBeNull();
|
||||||
});
|
});
|
||||||
|
|
||||||
it('completed: attaches finishReason + normalized usage + contextTokens', () => {
|
it('completed: attaches finishReason + normalized usage + contextTokens + maxContextTokens', () => {
|
||||||
const f = flushAssistant([toolStep], '', 'completed', {
|
const f = flushAssistant([toolStep], '', 'completed', {
|
||||||
finishReason: 'stop',
|
finishReason: 'stop',
|
||||||
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
||||||
contextTokens: 15,
|
contextTokens: 15,
|
||||||
|
maxContextTokens: 200000,
|
||||||
});
|
});
|
||||||
expect(f.status).toBe('completed');
|
expect(f.status).toBe('completed');
|
||||||
expect(f.metadata.finishReason).toBe('stop');
|
expect(f.metadata.finishReason).toBe('stop');
|
||||||
@@ -290,6 +291,23 @@ describe('flushAssistant', () => {
|
|||||||
reasoningTokens: undefined,
|
reasoningTokens: undefined,
|
||||||
});
|
});
|
||||||
expect(f.metadata.contextTokens).toBe(15);
|
expect(f.metadata.contextTokens).toBe(15);
|
||||||
|
expect(f.metadata.maxContextTokens).toBe(200000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('completed: omits maxContextTokens when unset or 0', () => {
|
||||||
|
// No maxContextTokens in the extra (admin set no context window).
|
||||||
|
const f = flushAssistant([toolStep], '', 'completed', {
|
||||||
|
finishReason: 'stop',
|
||||||
|
contextTokens: 15,
|
||||||
|
});
|
||||||
|
expect('maxContextTokens' in f.metadata).toBe(false);
|
||||||
|
// Explicit 0 is treated the same as unset (no limit -> key omitted).
|
||||||
|
const f0 = flushAssistant([toolStep], '', 'completed', {
|
||||||
|
finishReason: 'stop',
|
||||||
|
contextTokens: 15,
|
||||||
|
maxContextTokens: 0,
|
||||||
|
});
|
||||||
|
expect('maxContextTokens' in f0.metadata).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('error: records the error and a derived finishReason', () => {
|
it('error: records the error and a derived finishReason', () => {
|
||||||
|
|||||||
@@ -616,6 +616,10 @@ export class AiChatService implements OnModuleInit {
|
|||||||
contextTokens:
|
contextTokens:
|
||||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
|
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
|
||||||
undefined,
|
undefined,
|
||||||
|
// Max context window for the chat header badge denominator;
|
||||||
|
// resolved from the admin-configured provider settings (in
|
||||||
|
// closure scope here). Omitted/0 = no limit.
|
||||||
|
maxContextTokens: resolved?.chatContextWindow,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
// Lifecycle: release the external MCP clients leased for this turn.
|
// Lifecycle: release the external MCP clients leased for this turn.
|
||||||
@@ -1212,8 +1216,9 @@ export async function applyFinalize(
|
|||||||
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
||||||
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
||||||
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
||||||
* `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
|
* `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
|
||||||
* only when provided/relevant, matching the pre-#183 onFinish/onError records.
|
* `metadata.maxContextTokens` are attached only when provided/relevant, matching
|
||||||
|
* the pre-#183 onFinish/onError records.
|
||||||
*/
|
*/
|
||||||
export function flushAssistant(
|
export function flushAssistant(
|
||||||
capturedSteps: ReadonlyArray<StepLike> | undefined,
|
capturedSteps: ReadonlyArray<StepLike> | undefined,
|
||||||
@@ -1223,6 +1228,7 @@ export function flushAssistant(
|
|||||||
finishReason?: string;
|
finishReason?: string;
|
||||||
usage?: ChatStreamUsage | StreamUsage | undefined;
|
usage?: ChatStreamUsage | StreamUsage | undefined;
|
||||||
contextTokens?: number;
|
contextTokens?: number;
|
||||||
|
maxContextTokens?: number;
|
||||||
error?: string;
|
error?: string;
|
||||||
},
|
},
|
||||||
): AssistantFlush {
|
): AssistantFlush {
|
||||||
@@ -1253,6 +1259,8 @@ export function flushAssistant(
|
|||||||
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
|
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
|
||||||
}
|
}
|
||||||
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
|
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
|
||||||
|
if (extra?.maxContextTokens)
|
||||||
|
metadata.maxContextTokens = extra.maxContextTokens;
|
||||||
if (extra?.error) metadata.error = extra.error;
|
if (extra?.error) metadata.error = extra.error;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import { DB, Workspaces } from '@docmost/db/types/db';
|
|||||||
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
|
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
|
||||||
'driver',
|
'driver',
|
||||||
'chatModel',
|
'chatModel',
|
||||||
|
'chatContextWindow',
|
||||||
'chatApiStyle',
|
'chatApiStyle',
|
||||||
'embeddingModel',
|
'embeddingModel',
|
||||||
'baseUrl',
|
'baseUrl',
|
||||||
|
|||||||
@@ -41,3 +41,35 @@ describe('UpdateAiSettingsDto.chatApiStyle', () => {
|
|||||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/** DTO validation for the new chatContextWindow field (@IsInt @Min(0)). */
|
||||||
|
describe('UpdateAiSettingsDto.chatContextWindow', () => {
|
||||||
|
const errorsFor = async (chatContextWindow: unknown) =>
|
||||||
|
validate(plainToInstance(UpdateAiSettingsDto, { chatContextWindow }));
|
||||||
|
|
||||||
|
it('accepts a non-negative integer (incl. 0 = clear the limit)', async () => {
|
||||||
|
for (const v of [0, 200000]) {
|
||||||
|
const errs = await errorsFor(v);
|
||||||
|
expect(
|
||||||
|
errs.find((e) => e.property === 'chatContextWindow'),
|
||||||
|
).toBeUndefined();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects a negative value', async () => {
|
||||||
|
const errs = await errorsFor(-1);
|
||||||
|
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects a non-integer value', async () => {
|
||||||
|
const errs = await errorsFor(1.5);
|
||||||
|
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts the field being omitted (optional)', async () => {
|
||||||
|
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
|
||||||
|
expect(
|
||||||
|
errs.find((e) => e.property === 'chatContextWindow'),
|
||||||
|
).toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -26,6 +26,8 @@ import {
|
|||||||
export interface UpdateAiSettingsInput {
|
export interface UpdateAiSettingsInput {
|
||||||
driver?: AiDriver;
|
driver?: AiDriver;
|
||||||
chatModel?: string;
|
chatModel?: string;
|
||||||
|
// Max context window in tokens for the chat header badge. 0/empty = no limit.
|
||||||
|
chatContextWindow?: number;
|
||||||
chatApiStyle?: ChatApiStyle;
|
chatApiStyle?: ChatApiStyle;
|
||||||
embeddingModel?: string;
|
embeddingModel?: string;
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
@@ -157,9 +159,20 @@ export class AiSettingsService {
|
|||||||
const provider = await this.readProvider(workspaceId);
|
const provider = await this.readProvider(workspaceId);
|
||||||
if (!provider.driver) return null;
|
if (!provider.driver) return null;
|
||||||
|
|
||||||
|
// Provider values are stored as ::text (see workspace.repo.ts), so
|
||||||
|
// chatContextWindow arrives as a string here; parse it back to a positive
|
||||||
|
// integer or undefined.
|
||||||
|
const ctxWindow = Number(provider.chatContextWindow);
|
||||||
|
|
||||||
const config: ResolvedAiConfig = {
|
const config: ResolvedAiConfig = {
|
||||||
driver: provider.driver,
|
driver: provider.driver,
|
||||||
chatModel: provider.chatModel,
|
chatModel: provider.chatModel,
|
||||||
|
// Max context window for the chat header badge denominator. 0/unset = no
|
||||||
|
// limit.
|
||||||
|
chatContextWindow:
|
||||||
|
Number.isFinite(ctxWindow) && ctxWindow > 0
|
||||||
|
? Math.floor(ctxWindow)
|
||||||
|
: undefined,
|
||||||
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
|
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
|
||||||
chatApiStyle: provider.chatApiStyle,
|
chatApiStyle: provider.chatApiStyle,
|
||||||
// Cheap model id for the anonymous public-share assistant; reuses the chat
|
// Cheap model id for the anonymous public-share assistant; reuses the chat
|
||||||
@@ -219,6 +232,15 @@ export class AiSettingsService {
|
|||||||
async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
|
async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
|
||||||
const provider = await this.readProvider(workspaceId);
|
const provider = await this.readProvider(workspaceId);
|
||||||
|
|
||||||
|
// Provider values are stored as ::text (see workspace.repo.ts), so
|
||||||
|
// chatContextWindow arrives as a string; coerce it to a positive integer or
|
||||||
|
// undefined so the client receives a real number.
|
||||||
|
const ctxWindow = Number(provider.chatContextWindow);
|
||||||
|
const chatContextWindow =
|
||||||
|
Number.isFinite(ctxWindow) && ctxWindow > 0
|
||||||
|
? Math.floor(ctxWindow)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
let hasApiKey = false;
|
let hasApiKey = false;
|
||||||
let hasEmbeddingApiKey = false;
|
let hasEmbeddingApiKey = false;
|
||||||
let hasSttApiKey = false;
|
let hasSttApiKey = false;
|
||||||
@@ -243,6 +265,7 @@ export class AiSettingsService {
|
|||||||
return {
|
return {
|
||||||
driver: provider.driver,
|
driver: provider.driver,
|
||||||
chatModel: provider.chatModel,
|
chatModel: provider.chatModel,
|
||||||
|
chatContextWindow,
|
||||||
chatApiStyle: provider.chatApiStyle,
|
chatApiStyle: provider.chatApiStyle,
|
||||||
embeddingModel: provider.embeddingModel,
|
embeddingModel: provider.embeddingModel,
|
||||||
baseUrl: provider.baseUrl,
|
baseUrl: provider.baseUrl,
|
||||||
|
|||||||
@@ -32,6 +32,9 @@ export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
|
|||||||
export interface AiProviderSettings {
|
export interface AiProviderSettings {
|
||||||
driver: AiDriver;
|
driver: AiDriver;
|
||||||
chatModel: string;
|
chatModel: string;
|
||||||
|
// Max context window in tokens; surfaced to the chat header badge as the
|
||||||
|
// denominator ("current / max"). 0/unset = no limit (badge shows no denominator).
|
||||||
|
chatContextWindow?: number;
|
||||||
// Chat provider implementation for the `openai` driver. Unset → defaults to
|
// Chat provider implementation for the `openai` driver. Unset → defaults to
|
||||||
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
|
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
|
||||||
chatApiStyle?: ChatApiStyle;
|
chatApiStyle?: ChatApiStyle;
|
||||||
@@ -72,6 +75,7 @@ export interface AiProviderSettings {
|
|||||||
export const PROVIDER_SETTINGS_KEYS = [
|
export const PROVIDER_SETTINGS_KEYS = [
|
||||||
'driver',
|
'driver',
|
||||||
'chatModel',
|
'chatModel',
|
||||||
|
'chatContextWindow',
|
||||||
'chatApiStyle',
|
'chatApiStyle',
|
||||||
'embeddingModel',
|
'embeddingModel',
|
||||||
'baseUrl',
|
'baseUrl',
|
||||||
@@ -98,6 +102,9 @@ export const PROVIDER_SETTINGS_KEYS = [
|
|||||||
export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||||
driver?: AiDriver;
|
driver?: AiDriver;
|
||||||
chatModel?: string;
|
chatModel?: string;
|
||||||
|
// Max context window in tokens; surfaced to the chat header badge as the
|
||||||
|
// "current / max" denominator. 0/unset = no limit.
|
||||||
|
chatContextWindow?: number;
|
||||||
// Cheap model id for the public-share assistant; reuses the chat creds.
|
// Cheap model id for the public-share assistant; reuses the chat creds.
|
||||||
publicShareChatModel?: string;
|
publicShareChatModel?: string;
|
||||||
// Agent-role id whose persona the public-share assistant adopts (empty/unset
|
// Agent-role id whose persona the public-share assistant adopts (empty/unset
|
||||||
@@ -116,6 +123,9 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
|||||||
export interface MaskedAiSettings {
|
export interface MaskedAiSettings {
|
||||||
driver?: AiDriver;
|
driver?: AiDriver;
|
||||||
chatModel?: string;
|
chatModel?: string;
|
||||||
|
// Max context window in tokens; the chat header badge denominator. 0/unset =
|
||||||
|
// no limit.
|
||||||
|
chatContextWindow?: number;
|
||||||
chatApiStyle?: ChatApiStyle;
|
chatApiStyle?: ChatApiStyle;
|
||||||
embeddingModel?: string;
|
embeddingModel?: string;
|
||||||
baseUrl?: string;
|
baseUrl?: string;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { IsIn, IsOptional, IsString } from 'class-validator';
|
import { IsIn, IsInt, IsOptional, IsString, Min } from 'class-validator';
|
||||||
import {
|
import {
|
||||||
AI_DRIVERS,
|
AI_DRIVERS,
|
||||||
AiDriver,
|
AiDriver,
|
||||||
@@ -25,6 +25,13 @@ export class UpdateAiSettingsDto {
|
|||||||
@IsString()
|
@IsString()
|
||||||
chatModel?: string;
|
chatModel?: string;
|
||||||
|
|
||||||
|
// Max context window in tokens shown in the chat header badge. 0/empty =
|
||||||
|
// clear the limit (no denominator shown).
|
||||||
|
@IsOptional()
|
||||||
|
@IsInt()
|
||||||
|
@Min(0)
|
||||||
|
chatContextWindow?: number;
|
||||||
|
|
||||||
@IsOptional()
|
@IsOptional()
|
||||||
@IsIn(CHAT_API_STYLES)
|
@IsIn(CHAT_API_STYLES)
|
||||||
chatApiStyle?: ChatApiStyle;
|
chatApiStyle?: ChatApiStyle;
|
||||||
|
|||||||
Reference in New Issue
Block a user