Merge pull request 'Батч: бейдж контекста (#189) + e2e в CI (#187) + inline-тест MCP (#170)' (#197) from batch/issues-189-187-170 into develop

Reviewed-on: #197
This commit was merged in pull request #197.
This commit is contained in:
2026-06-26 18:09:47 +03:00
23 changed files with 837 additions and 408 deletions

View File

@@ -56,3 +56,160 @@ jobs:
tags: ${{ env.IMAGE }}:develop tags: ${{ env.IMAGE }}:develop
cache-from: type=gha,scope=develop-amd64 cache-from: type=gha,scope=develop-amd64
cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true
# e2e jobs run on every develop push but DO NOT gate the build/publish above:
# `build` stays `needs: test` only, so the :develop image still ships even if
# e2e fails. A failing e2e job turns the run red and triggers GitHub's email
# to the pusher — that red run + email is the intended notification, not a
# deploy block.
e2e-server:
runs-on: ubuntu-latest
env:
DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost
REDIS_URL: redis://localhost:6379
APP_SECRET: ci-e2e-secret-change-me-min-32-characters
APP_URL: http://localhost:3000
services:
postgres:
image: pgvector/pgvector:pg18
env:
POSTGRES_DB: docmost
POSTGRES_USER: docmost
POSTGRES_PASSWORD: docmost
ports:
- 5432:5432
options: >-
--health-cmd "pg_isready -U docmost"
--health-interval 5s
--health-timeout 5s
--health-retries 20
redis:
image: redis:7
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 5s
--health-timeout 5s
--health-retries 20
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up pnpm
uses: pnpm/action-setup@v4
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build editor-ext
run: pnpm --filter @docmost/editor-ext build
- name: Run migrations
run: pnpm --filter ./apps/server migration:latest
- name: Run server e2e
run: pnpm --filter ./apps/server test:e2e
# Same rationale as e2e-server: this job is intentionally NOT in
# `build.needs`. Deploy of the :develop image must not be blocked by e2e;
# a red run plus GitHub's email to the pusher is the notification mechanism.
e2e-mcp:
runs-on: ubuntu-latest
env:
DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost
REDIS_URL: redis://localhost:6379
APP_SECRET: ci-e2e-secret-change-me-min-32-characters
APP_URL: http://localhost:3000
NODE_ENV: production
services:
postgres:
image: pgvector/pgvector:pg18
env:
POSTGRES_DB: docmost
POSTGRES_USER: docmost
POSTGRES_PASSWORD: docmost
ports:
- 5432:5432
options: >-
--health-cmd "pg_isready -U docmost"
--health-interval 5s
--health-timeout 5s
--health-retries 20
redis:
image: redis:7
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 5s
--health-timeout 5s
--health-retries 20
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up pnpm
uses: pnpm/action-setup@v4
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build editor-ext
run: pnpm --filter @docmost/editor-ext build
- name: Build server
run: pnpm server:build
- name: Build mcp
run: pnpm --filter @docmost/mcp build
- name: Run migrations
run: pnpm --filter ./apps/server migration:latest
- name: Start server (prod)
# Capture stdout/stderr so a start-up crash (bind error, stack trace,
# migration mismatch) is diagnosable; without this the only signal is
# the generic health-loop timeout below, ~120s later.
run: pnpm --filter ./apps/server start:prod > /tmp/server.log 2>&1 &
- name: Wait for server health
run: |
for i in $(seq 1 60); do
if curl -fsS http://localhost:3000/api/health > /dev/null; then
echo "Server is healthy"
exit 0
fi
sleep 2
done
echo "Server did not become healthy in time"
exit 1
- name: Dump server log on failure
if: failure()
run: cat /tmp/server.log || true
- name: Seed admin
run: |
curl -fsS -X POST http://localhost:3000/api/auth/setup \
-H "Content-Type: application/json" \
-d '{"name":"E2E","email":"e2e@example.com","password":"E2ePassword123","workspaceName":"E2E"}'
- name: Run mcp e2e
env:
DOCMOST_API_URL: http://localhost:3000/api
DOCMOST_EMAIL: e2e@example.com
DOCMOST_PASSWORD: E2ePassword123
run: pnpm --filter @docmost/mcp test:e2e

View File

@@ -715,6 +715,8 @@
"Test": "Test", "Test": "Test",
"Available tools": "Available tools", "Available tools": "Available tools",
"No tools available": "No tools available", "No tools available": "No tools available",
"Failed": "Failed",
"OK · {{n}}": "OK · {{n}}",
"Created successfully": "Created successfully", "Created successfully": "Created successfully",
"Deleted successfully": "Deleted successfully", "Deleted successfully": "Deleted successfully",
"Clear": "Clear", "Clear": "Clear",
@@ -1167,8 +1169,9 @@
"Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.", "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.",
"Built-in assistant persona": "Built-in assistant persona", "Built-in assistant persona": "Built-in assistant persona",
"Minimize": "Minimize", "Minimize": "Minimize",
"Current context size": "Current context size", "Context size / model limit": "Context size / model limit",
"Tokens generated this turn": "Tokens generated this turn", "Context window (tokens)": "Context window (tokens)",
"Shown as used / total in the chat header. Leave empty to hide the limit.": "Shown as used / total in the chat header. Leave empty to hide the limit.",
"AI agent": "AI agent", "AI agent": "AI agent",
"Take a look at the current document": "Take a look at the current document", "Take a look at the current document": "Take a look at the current document",
"AI agent is typing…": "AI agent is typing…", "AI agent is typing…": "AI agent is typing…",

View File

@@ -704,13 +704,19 @@
"Ask the AI agent…": "Спросите AI-агента…", "Ask the AI agent…": "Спросите AI-агента…",
"Copy chat": "Копировать чат", "Copy chat": "Копировать чат",
"Created successfully": "Успешно создано", "Created successfully": "Успешно создано",
"Current context size": "Текущий размер контекста", "Context size / model limit": "Размер контекста / лимит модели",
"Tokens generated this turn": "Токенов сгенерировано за ход", "Context window (tokens)": "Окно контекста (токены)",
"Shown as used / total in the chat header. Leave empty to hide the limit.": "Показывается в шапке чата как использовано / всего. Пусто — лимит скрыт.",
"Delete this chat?": "Удалить этот чат?", "Delete this chat?": "Удалить этот чат?",
"Deleted successfully": "Успешно удалено", "Deleted successfully": "Успешно удалено",
"Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}",
"Failed to delete chat": "Не удалось удалить чат", "Failed to delete chat": "Не удалось удалить чат",
"Failed to rename chat": "Не удалось переименовать чат", "Failed to rename chat": "Не удалось переименовать чат",
"Failed": "Ошибка",
"OK · {{n}}": "OK · {{n}}",
"Test": "Тест",
"No tools available": "Инструменты недоступны",
"Available tools": "Доступные инструменты",
"Minimize": "Свернуть", "Minimize": "Свернуть",
"No chats yet.": "Чатов пока нет.", "No chats yet.": "Чатов пока нет.",
"Send": "Отправить", "Send": "Отправить",

View File

@@ -45,6 +45,7 @@ import {
shouldCollapseOnOutsidePointer, shouldCollapseOnOutsidePointer,
isHeaderClick, isHeaderClick,
} from "@/features/ai-chat/utils/collapse-helpers.ts"; } from "@/features/ai-chat/utils/collapse-helpers.ts";
import { selectContextBadge } from "@/features/ai-chat/utils/context-badge.ts";
import { useClipboard } from "@/hooks/use-clipboard"; import { useClipboard } from "@/hooks/use-clipboard";
import { notifications } from "@mantine/notifications"; import { notifications } from "@mantine/notifications";
import classes from "@/features/ai-chat/components/ai-chat-window.module.css"; import classes from "@/features/ai-chat/components/ai-chat-window.module.css";
@@ -161,12 +162,6 @@ export default function AiChatWindow() {
const { data: messageRows, isLoading: messagesLoading } = const { data: messageRows, isLoading: messagesLoading } =
useAiChatMessagesQuery(activeChatId ?? undefined); useAiChatMessagesQuery(activeChatId ?? undefined);
// Live turn-token total (reasoning + output) for the in-flight turn, pushed up
// (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream.
// `null` means no turn is in flight -> the badge falls back to the persisted
// context size below.
const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
// The page the user is currently viewing. AiChatWindow lives in a pathless // The page the user is currently viewing. AiChatWindow lives in a pathless
// parent layout route, so useParams() can't see :pageSlug. Match the full // parent layout route, so useParams() can't see :pageSlug. Match the full
// pathname against the authenticated page route instead so "the current page" // pathname against the authenticated page route instead so "the current page"
@@ -301,24 +296,19 @@ export default function AiChatWindow() {
// shipped; older rows fall back to that turn's `usage` total. NOTE: reflects // shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
// PERSISTED rows (updates on chat open/switch); it does not tick live // PERSISTED rows (updates on chat open/switch); it does not tick live
// mid-stream — acceptable for v1. // mid-stream — acceptable for v1.
const contextTokens = useMemo(() => { //
if (!activeChatId || !messageRows) return 0; // The denominator `maxContextTokens` (the model's configured max window) is
for (let i = messageRows.length - 1; i >= 0; i--) { // derived in the SAME backward scan: it is stamped alongside `contextTokens`
const meta = messageRows[i].metadata; // on a completed turn, but the numerator and denominator are taken from the
if (!meta) continue; // most recent row carrying EACH value independently — they may land on
if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) { // different rows (e.g. a fresh error row can carry contextTokens but not
return meta.contextTokens; // maxContextTokens), so we keep scanning for whichever is still unset. 0 when
} // no row has it (older rows, or no admin-configured limit) — the badge then
const usage = meta.usage; // shows just the current size with no denominator.
if (usage) { const { contextTokens, maxContextTokens } = useMemo(
const fallback = () => selectContextBadge(activeChatId ? messageRows : undefined),
usage.totalTokens ?? [activeChatId, messageRows],
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0); );
if (fallback > 0) return fallback;
}
}
return 0;
}, [activeChatId, messageRows]);
// On (re)open, settle the geometry before paint (useLayoutEffect → no // On (re)open, settle the geometry before paint (useLayoutEffect → no
// first-frame jump): compute an initial top-right placement the first time, // first-frame jump): compute an initial top-right placement the first time,
@@ -509,20 +499,17 @@ export default function AiChatWindow() {
)} )}
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}> <div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
{/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz); {/* Always show the persisted "current / max" context. The denominator
once it finishes, fall back to the persisted context size. Require (the admin-configured model limit) is appended only when known;
> 0 so the very first emit (an empty tail message, count 0) does not not clamped when current > max (shown as-is, e.g. "210k / 200k").
flash a "0" badge before any token streams in (#151 review). */} Hidden entirely until a turn has recorded a context figure. */}
{liveTurnTokens !== null && liveTurnTokens > 0 ? ( {contextTokens > 0 ? (
<Tooltip label={t("Tokens generated this turn")} withArrow> <Tooltip label={t("Context size / model limit")} withArrow>
<span className={classes.badge}>
{formatTokens(liveTurnTokens)}
</span>
</Tooltip>
) : contextTokens > 0 ? (
<Tooltip label={t("Current context size")} withArrow>
<span className={classes.badge}> <span className={classes.badge}>
{formatTokens(contextTokens)} {formatTokens(contextTokens)}
{maxContextTokens > 0
? ` / ${formatTokens(maxContextTokens)}`
: ""}
</span> </span>
</Tooltip> </Tooltip>
) : null} ) : null}
@@ -649,7 +636,6 @@ export default function AiChatWindow() {
assistantName={currentRole?.name} assistantName={currentRole?.name}
onTurnFinished={onTurnFinished} onTurnFinished={onTurnFinished}
onServerChatId={onServerChatId} onServerChatId={onServerChatId}
onLiveTurnTokens={setLiveTurnTokens}
/> />
)} )}
</div> </div>

View File

@@ -20,7 +20,6 @@ import {
} from "@/features/ai-chat/utils/role-launch.ts"; } from "@/features/ai-chat/utils/role-launch.ts";
import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts"; import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import { import {
dequeue, dequeue,
enqueueMessage, enqueueMessage,
@@ -82,12 +81,6 @@ interface ChatThreadProps {
* Copy/export button available mid-stream). Distinct from onTurnFinished, * Copy/export button available mid-stream). Distinct from onTurnFinished,
* which fires only at the terminal outcome. */ * which fires only at the terminal outcome. */
onServerChatId?: (serverChatId?: string) => void; onServerChatId?: (serverChatId?: string) => void;
/** Reports the live turn-token total (reasoning + output) for the in-flight
* turn so the parent can show a header badge that ticks mid-stream. THROTTLED
* here (~8 Hz) so the parent re-renders a handful of times a second, not on
* every streamed delta. Called with `null` when no turn is in flight (the
* parent then reverts the badge to the persisted context size). */
onLiveTurnTokens?: (tokens: number | null) => void;
} }
/** /**
@@ -133,7 +126,6 @@ export default function ChatThread({
assistantName, assistantName,
onTurnFinished, onTurnFinished,
onServerChatId, onServerChatId,
onLiveTurnTokens,
}: ChatThreadProps) { }: ChatThreadProps) {
const { t } = useTranslation(); const { t } = useTranslation();
@@ -348,53 +340,6 @@ export default function ChatThread({
// the SAME on-screen banner text can be mirrored into the export (issue #160). // the SAME on-screen banner text can be mirrored into the export (issue #160).
const errorView = error ? describeChatError(error.message ?? "", t) : null; const errorView = error ? describeChatError(error.message ?? "", t) : null;
// Report the live turn-token total to the parent header badge, THROTTLED to
// ~8 Hz so the parent re-renders a few times a second instead of on every
// streamed delta. The tail assistant message's reasoning+output (estimate while
// streaming, authoritative once a step reports usage) is the live figure. When
// the turn ends we emit a final exact value, then `null` so the parent reverts
// the badge to the persisted context size.
const lastEmitRef = useRef(0);
const emitTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
useEffect(() => {
if (!onLiveTurnTokens) return;
if (!isStreaming) {
// Turn ended (or never started): clear any pending throttle and revert.
if (emitTimerRef.current) {
clearTimeout(emitTimerRef.current);
emitTimerRef.current = null;
}
lastEmitRef.current = 0;
onLiveTurnTokens(null);
return;
}
const tail = messages[messages.length - 1];
const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null;
const total = live ? live.reasoning + live.output : 0;
const now = Date.now();
const MIN_INTERVAL = 120; // ms (~8 Hz)
const elapsed = now - lastEmitRef.current;
if (elapsed >= MIN_INTERVAL) {
lastEmitRef.current = now;
onLiveTurnTokens(total);
} else if (!emitTimerRef.current) {
// Schedule a trailing emit so the FINAL value of a burst is not dropped.
emitTimerRef.current = setTimeout(() => {
emitTimerRef.current = null;
lastEmitRef.current = Date.now();
onLiveTurnTokens(total);
}, MIN_INTERVAL - elapsed);
}
}, [messages, isStreaming, onLiveTurnTokens]);
// Clear any pending throttle timer on unmount (chat switch via `key`) so a
// trailing emit can't fire into a torn-down thread's parent.
useEffect(() => {
return () => {
if (emitTimerRef.current) clearTimeout(emitTimerRef.current);
};
}, []);
// A role was picked with autoStart=false: the role is bound but NOTHING was // A role was picked with autoStart=false: the role is bound but NOTHING was
// sent, so chatId stays null and the empty state would keep showing the cards. // sent, so chatId stays null and the empty state would keep showing the cards.
// This flag hides the cards and reveals the composer (with the role indicated) // This flag hides the cards and reveals the composer (with the role indicated)

View File

@@ -116,6 +116,9 @@ export interface IAiChatMessageRow {
// turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
// floating window's header badge. // floating window's header badge.
contextTokens?: number; contextTokens?: number;
// The model's max context window (denominator for the header badge); set
// alongside contextTokens on a completed turn; absent on older rows.
maxContextTokens?: number;
// Set on an assistant row whose turn ended in a provider/stream error; the // Set on an assistant row whose turn ended in a provider/stream error; the
// raw provider error text (e.g. "402: ...") for inline display in the thread. // raw provider error text (e.g. "402: ...") for inline display in the thread.
error?: string; error?: string;

View File

@@ -0,0 +1,90 @@
import { describe, expect, it } from "vitest";
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
import { selectContextBadge } from "@/features/ai-chat/utils/context-badge.ts";
/**
* Pure-helper tests for the header context badge selection. Covers the two
* non-obvious rules: numerator and denominator are each taken from the most
* recent row carrying THAT value (they may live on different rows), and a fresh
* row with a zero/absent value must NOT shadow an older positive one.
*/
const row = (metadata: IAiChatMessageRow["metadata"]): IAiChatMessageRow => ({
id: Math.random().toString(),
role: "assistant",
content: null,
metadata,
createdAt: "2026-01-01T00:00:00.000Z",
});
describe("selectContextBadge", () => {
it("returns zeros for empty / nullish input", () => {
expect(selectContextBadge(undefined)).toEqual({
contextTokens: 0,
maxContextTokens: 0,
});
expect(selectContextBadge(null)).toEqual({
contextTokens: 0,
maxContextTokens: 0,
});
expect(selectContextBadge([])).toEqual({
contextTokens: 0,
maxContextTokens: 0,
});
});
it("reads both figures from the most recent row that carries them", () => {
expect(
selectContextBadge([
row({ contextTokens: 100, maxContextTokens: 200000 }),
row({ contextTokens: 1500, maxContextTokens: 200000 }),
]),
).toEqual({ contextTokens: 1500, maxContextTokens: 200000 });
});
it("falls back to legacy usage total for older rows without contextTokens", () => {
expect(
selectContextBadge([
row({ usage: { inputTokens: 30, outputTokens: 70 } }),
]),
).toEqual({ contextTokens: 100, maxContextTokens: 0 });
expect(
selectContextBadge([row({ usage: { totalTokens: 250 } })]),
).toEqual({ contextTokens: 250, maxContextTokens: 0 });
});
it("takes numerator and denominator from different rows", () => {
// Freshest row (an error turn) carries contextTokens but no max; the older
// completed turn carries the max. Each is picked from its own latest row.
expect(
selectContextBadge([
row({ contextTokens: 800, maxContextTokens: 200000 }),
row({ contextTokens: 1200, error: "402: nope" }),
]),
).toEqual({ contextTokens: 1200, maxContextTokens: 200000 });
});
it("does not let a fresh zero/absent max shadow an older positive max", () => {
expect(
selectContextBadge([
row({ contextTokens: 100, maxContextTokens: 200000 }),
row({ contextTokens: 1200, maxContextTokens: 0 }),
]),
).toEqual({ contextTokens: 1200, maxContextTokens: 200000 });
});
it("skips rows with null metadata", () => {
expect(
selectContextBadge([
row({ contextTokens: 500, maxContextTokens: 200000 }),
row(null),
]),
).toEqual({ contextTokens: 500, maxContextTokens: 200000 });
});
it("reports current > max as-is (no clamp)", () => {
expect(
selectContextBadge([row({ contextTokens: 250000, maxContextTokens: 200000 })]),
).toEqual({ contextTokens: 250000, maxContextTokens: 200000 });
});
});

View File

@@ -0,0 +1,49 @@
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
/**
* Derive the header context badge figures from the persisted message rows.
*
* - `contextTokens` (numerator): how much the conversation now occupies in the
* model's context window. Read from the most recent row carrying a context
* figure — `contextTokens` (final-step input+output) on rows recorded after
* this shipped, else that turn's legacy `usage` total for older rows.
* - `maxContextTokens` (denominator): the model's configured max window, stamped
* alongside `contextTokens` on a completed turn.
*
* Each value is taken from the most recent row carrying THAT value
* independently — they may land on different rows (e.g. a fresh error row can
* carry `contextTokens` but not `maxContextTokens`), so the scan continues for
* whichever is still unset. `0` means "no row has it" (older rows, or no
* admin-configured limit); the badge then omits the value.
*/
export function selectContextBadge(
messageRows: readonly IAiChatMessageRow[] | undefined | null,
): { contextTokens: number; maxContextTokens: number } {
let contextTokens = 0;
let maxContextTokens = 0;
if (!messageRows) return { contextTokens, maxContextTokens };
for (let i = messageRows.length - 1; i >= 0; i--) {
const meta = messageRows[i].metadata;
if (!meta) continue;
if (contextTokens === 0) {
if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
contextTokens = meta.contextTokens;
} else if (meta.usage) {
const usage = meta.usage;
const fallback =
usage.totalTokens ??
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
if (fallback > 0) contextTokens = fallback;
}
}
if (
maxContextTokens === 0 &&
typeof meta.maxContextTokens === "number" &&
meta.maxContextTokens > 0
) {
maxContextTokens = meta.maxContextTokens;
}
if (contextTokens !== 0 && maxContextTokens !== 0) break;
}
return { contextTokens, maxContextTokens };
}

View File

@@ -1,17 +1,5 @@
import { describe, expect, it } from "vitest"; import { describe, expect, it } from "vitest";
import type { UIMessage } from "@ai-sdk/react"; import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import {
estimateTokens,
liveTurnTokens,
} from "@/features/ai-chat/utils/count-stream-tokens.ts";
const msg = (parts: unknown[], metadata?: unknown): UIMessage =>
({
id: Math.random().toString(),
role: "assistant",
parts,
metadata,
}) as UIMessage;
describe("estimateTokens", () => { describe("estimateTokens", () => {
it("returns 0 for the empty string", () => { it("returns 0 for the empty string", () => {
@@ -25,147 +13,3 @@ describe("estimateTokens", () => {
expect(estimateTokens("12345678")).toBe(2); expect(estimateTokens("12345678")).toBe(2);
}); });
}); });
describe("liveTurnTokens — estimate path", () => {
it("is all zeros for an undefined message", () => {
expect(liveTurnTokens(undefined)).toEqual({
reasoning: 0,
output: 0,
authoritative: false,
});
});
it("is all zeros for a parts-less message", () => {
expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({
reasoning: 0,
output: 0,
authoritative: false,
});
});
it("estimates output from text parts", () => {
// 8 chars -> 2 tokens.
const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }]));
expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false });
});
it("estimates reasoning from reasoning parts (kept separate from output)", () => {
const r = liveTurnTokens(
msg([
{ type: "reasoning", text: "12345678" },
{ type: "text", text: "abcd" },
]),
);
expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false });
});
it("accumulates across multiple text + reasoning parts (multi-step)", () => {
const r = liveTurnTokens(
msg([
{ type: "reasoning", text: "abcd" }, // 1
{ type: "text", text: "abcd" }, // 1
{ type: "tool-getPage", state: "output-available" }, // ignored
{ type: "reasoning", text: "abcd" }, // 1
{ type: "text", text: "abcdefgh" }, // 2
]),
);
expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false });
});
it("ignores non text/reasoning parts (tools, step-start)", () => {
const r = liveTurnTokens(
msg([
{ type: "step-start" },
{ type: "tool-getPage", state: "input-available" },
]),
);
expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false });
});
});
describe("liveTurnTokens — authoritative path", () => {
it("returns authoritative usage verbatim, splitting reasoning out of output", () => {
// outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30.
const r = liveTurnTokens(
msg([{ type: "text", text: "estimate would be tiny" }], {
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
}),
);
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
});
it("treats missing reasoningTokens as 0 and keeps full output", () => {
const r = liveTurnTokens(
msg([{ type: "text", text: "x" }], {
usage: { inputTokens: 10, outputTokens: 42 },
}),
);
expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true });
});
it("never returns a negative output when reasoning exceeds reported output", () => {
const r = liveTurnTokens(
msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }),
);
expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true });
});
it("falls back to the estimate when metadata has no usage object", () => {
const r = liveTurnTokens(
msg([{ type: "text", text: "abcd" }], { chatId: "c1" }),
);
expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false });
});
});
describe("liveTurnTokens — combined authoritative + estimate (#163)", () => {
it("ticks the in-flight step above the completed-steps authoritative base", () => {
// The authoritative usage is the sum over COMPLETED steps (step 1). The
// CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in
// the parts -> the running estimate must push the live figure above the base
// so the badge keeps growing between step boundaries.
const longText = "x".repeat(800); // 800 chars -> 200 est output tokens
const r = liveTurnTokens(
msg([{ type: "text", text: longText }], {
usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output
}),
);
// max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen.
expect(r.output).toBe(200);
expect(r.authoritative).toBe(true);
});
it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => {
const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning
const r = liveTurnTokens(
msg([{ type: "reasoning", text: longReasoning }], {
usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 },
}),
);
// reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0.
expect(r.reasoning).toBe(100);
expect(r.output).toBe(0);
expect(r.authoritative).toBe(true);
});
it("snaps to the authoritative figure once it exceeds the rough estimate", () => {
// Short on-screen text (estimate tiny) but a large authoritative output:
// the exact figure wins at the boundary (the counter never under-reports).
const r = liveTurnTokens(
msg([{ type: "text", text: "abcd" }], {
usage: { inputTokens: 10, outputTokens: 5000 },
}),
);
expect(r.output).toBe(5000);
});
it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => {
// Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged.
const r = liveTurnTokens(
msg([{ type: "text", text: "tiny" }], {
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
}),
);
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
});
});

View File

@@ -1,18 +1,11 @@
import type { UIMessage } from "@ai-sdk/react";
/** /**
* Live token counting for a streaming AI-chat turn — split into REASONING * Rough client-side token estimation for AI-chat UI affordances.
* (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows
* `Thinking… · 60 tokens` next to its thinking indicator.
* *
* No provider streams exact per-token usage mid-stream, so the live number is a * No provider streams exact per-token usage mid-stream, so any in-flight figure
* CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage * is a CLIENT ESTIMATE (chars/≈4 heuristic). Pure + unit-testable: it never runs
* once the server attaches it on a step/turn boundary (see the server's * a real BPE tokenizer (that would be O(n²) on the hot path, bloat the bundle,
* `chatStreamMetadata` + the client's read of `message.metadata.usage`). When * and be wrong for Gemini/Ollama anyway). Used by the in-body reasoning counter
* authoritative usage is present we return it verbatim (the number "jumps to * ("Thinking · N tokens").
* exact"); otherwise we return the running estimate. Pure + unit-testable: it
* never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the
* bundle, and be wrong for Gemini/Ollama anyway).
*/ */
/** /**
@@ -24,90 +17,3 @@ export function estimateTokens(text: string): number {
if (!text) return 0; if (!text) return 0;
return Math.ceil(text.length / 4); return Math.ceil(text.length / 4);
} }
/** Authoritative per-step/turn usage the server attaches to message metadata. */
export interface AuthoritativeUsage {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
reasoningTokens?: number;
}
/** Live token split for a turn's tail (streaming) assistant message. */
export interface LiveTurnTokens {
/** Thinking/reasoning tokens (estimate, or authoritative when available). */
reasoning: number;
/** Answer/output tokens (estimate, or authoritative when available). */
output: number;
/** True when the numbers come from authoritative server usage, not estimate. */
authoritative: boolean;
}
/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */
function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined {
const meta = message?.metadata as
| { usage?: AuthoritativeUsage }
| undefined;
const usage = meta?.usage;
if (!usage || typeof usage !== "object") return undefined;
return usage;
}
/**
* Token split for the given (streaming) assistant message.
*
* COMBINES the authoritative server usage with the running text estimate so the
* counter ticks in real time AND lands exact. The server only attaches
* `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is
* CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step.
* So a multi-step turn that returned the authoritative figure verbatim would
* FREEZE between boundaries and jump in steps (issue #163).
*
* Instead we always compute the running ESTIMATE (chars/≈4 over the message's
* `reasoning`/`text` parts, which grows on every streamed delta) and take the
* per-component MAX of the authoritative base and the estimate:
* - between boundaries the estimate of the in-flight step ticks the number up;
* - at a boundary the authoritative figure snaps it to exact;
* - because the server's usage is cumulative and we only ever take the max, the
* number is MONOTONIC — it never drops.
*
* Providers that don't stream reasoning text still surface a reasoning count once
* the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure
* estimate path (no usage yet) such a turn shows `reasoning: 0` until then.
*/
export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens {
if (!message) return { reasoning: 0, output: 0, authoritative: false };
// Running ESTIMATE over every reasoning/text part — grows on each delta. This
// includes the IN-FLIGHT step, which the authoritative usage does not cover yet.
let estReasoning = 0;
let estOutput = 0;
for (const part of message.parts ?? []) {
if (part.type === "reasoning") {
estReasoning += estimateTokens((part as { text?: string }).text ?? "");
} else if (part.type === "text") {
estOutput += estimateTokens((part as { text?: string }).text ?? "");
}
}
const usage = metadataUsage(message);
if (!usage) {
// No authoritative usage streamed yet: the estimate IS the live figure.
return { reasoning: estReasoning, output: estOutput, authoritative: false };
}
// Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES
// reasoning in the AI SDK usage shape, so subtract it out for the "answer"
// figure (never go negative if a provider reports them inconsistently).
const authReasoning = usage.reasoningTokens ?? 0;
const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning);
// Per-component max: the in-flight step's estimate ticks above the completed-
// steps base between boundaries, and the authoritative figure wins once it
// exceeds the (rough) estimate at the next boundary. Monotonic by construction.
return {
reasoning: Math.max(authReasoning, estReasoning),
output: Math.max(authOutput, estOutput),
authoritative: true,
};
}

View File

@@ -0,0 +1,87 @@
import { describe, expect, it } from "vitest";
import { mcpTestButtonView } from "./ai-mcp-server-test-view";
/**
* Pure-helper tests for the inline "Test" button presentation. Covers the four
* states (idle / loading is handled by the component's `isPending`, so here:
* idle / ok-with-tools / ok-without-tools / failed) and the tooltip text
* branches that are easiest to break silently.
*/
// Identity-ish translator that echoes the key and interpolates {{n}} so the
// label/tooltip branches are observable without the real i18n bundle.
const t = (key: string, options?: Record<string, unknown>): string =>
options && "n" in options
? key.replace("{{n}}", String((options as { n: unknown }).n))
: key;
describe("mcpTestButtonView", () => {
it("idle when there is no result", () => {
expect(mcpTestButtonView(undefined, t)).toEqual({
state: "idle",
color: undefined,
variant: "default",
label: "Test",
tooltip: "",
});
});
it("ok with tools lists them in the tooltip", () => {
expect(mcpTestButtonView({ ok: true, tools: ["a", "b"] }, t)).toEqual({
state: "ok",
color: "green",
variant: "light",
label: "OK · 2",
tooltip: "a, b",
});
});
it('ok with zero tools shows "No tools available"', () => {
expect(mcpTestButtonView({ ok: true, tools: [] }, t)).toEqual({
state: "ok",
color: "green",
variant: "light",
label: "OK · 0",
tooltip: "No tools available",
});
});
it("failed surfaces the error text in the tooltip", () => {
expect(
mcpTestButtonView({ ok: false, error: "402: nope" }, t),
).toEqual({
state: "failed",
color: "red",
variant: "light",
label: "Failed",
tooltip: "402: nope",
});
});
it("failed when the request itself rejects (no result payload)", () => {
// 401/403/500/network: there is no { ok } body, only a thrown error. The
// row must still show a red "Failed" rather than reverting to idle "Test".
expect(
mcpTestButtonView(undefined, t, {
response: { data: { message: "Unauthorized" } },
}),
).toEqual({
state: "failed",
color: "red",
variant: "light",
label: "Failed",
tooltip: "Unauthorized",
});
});
it("reject without a server message falls back to the generic label", () => {
// A bare network error (no response body) still surfaces as failed, using
// the i18n fallback for the tooltip.
expect(mcpTestButtonView(undefined, t, new Error("network down"))).toEqual({
state: "failed",
color: "red",
variant: "light",
label: "Failed",
tooltip: "Failed to update data",
});
});
});

View File

@@ -0,0 +1,90 @@
import type { IAiMcpServerTestResult } from "@/features/workspace/services/ai-mcp-server-service.ts";
/** Minimal translator shape (i18next `t`): key + optional interpolation. */
type Translate = (key: string, options?: Record<string, unknown>) => string;
/** Subset of an axios-style rejection we read for the reject tooltip. */
type McpTestRequestError = {
response?: { data?: { message?: string } };
};
/**
* Best-effort extraction of a server-sent message from a rejected test request
* (axios stores it at `error.response.data.message`). Returns undefined for a
* bare/network error so the caller can fall back to a generic label.
*/
function readRequestErrorMessage(error: unknown): string | undefined {
if (error && typeof error === "object" && "response" in error) {
return (error as McpTestRequestError).response?.data?.message;
}
return undefined;
}
/**
* Presentation for the inline "Test" button, derived from the current test
* result tristate (no result yet / ok / failed). Color is never the only signal
* — the label and icon change too (a11y / colorblind-friendly). Kept as a single
* pure derivation (rather than two parallel if/else chains) so the button and
* tooltip can never drift apart, and so the text branches are unit-testable
* without rendering the row.
*/
export interface McpTestButtonView {
/** Tristate; the component maps this to the leftSection icon. */
state: "idle" | "ok" | "failed";
/** Mantine Button color; undefined = theme default (idle). */
color?: string;
/** Mantine Button variant. */
variant: string;
/** Translated button label. */
label: string;
/** Translated tooltip text; "" while there is no result (tooltip disabled). */
tooltip: string;
}
export function mcpTestButtonView(
result: IAiMcpServerTestResult | undefined,
t: Translate,
error?: unknown,
): McpTestButtonView {
if (result?.ok) {
return {
state: "ok",
color: "green",
variant: "light",
label: t("OK · {{n}}", { n: result.tools.length }),
tooltip:
result.tools.length > 0
? result.tools.join(", ")
: t("No tools available"),
};
}
if (result && result.ok === false) {
return {
state: "failed",
color: "red",
variant: "light",
label: t("Failed"),
tooltip: result.error,
};
}
if (error) {
// The test request itself rejected (401/403/500/network) — there is no
// `{ ok }` payload, so without this branch the row would silently revert to
// the idle "Test" instead of reporting the failure. Tooltip prefers the
// server-sent message, else the generic i18n fallback.
return {
state: "failed",
color: "red",
variant: "light",
label: t("Failed"),
tooltip: readRequestErrorMessage(error) ?? t("Failed to update data"),
};
}
return {
state: "idle",
color: undefined,
variant: "default",
label: t("Test"),
tooltip: "",
};
}

View File

@@ -1,4 +1,4 @@
import { useState } from "react"; import { useEffect, useState } from "react";
import { import {
ActionIcon, ActionIcon,
Badge, Badge,
@@ -10,18 +10,28 @@ import {
Stack, Stack,
Switch, Switch,
Text, Text,
Tooltip,
} from "@mantine/core"; } from "@mantine/core";
import { useDisclosure } from "@mantine/hooks"; import { useDisclosure } from "@mantine/hooks";
import { modals } from "@mantine/modals"; import { modals } from "@mantine/modals";
import { IconPencil, IconPlus, IconTrash } from "@tabler/icons-react"; import {
IconCheck,
IconPencil,
IconPlugConnected,
IconPlus,
IconTrash,
IconX,
} from "@tabler/icons-react";
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import useUserRole from "@/hooks/use-user-role.tsx"; import useUserRole from "@/hooks/use-user-role.tsx";
import { import {
useAiMcpServersQuery, useAiMcpServersQuery,
useDeleteAiMcpServerMutation, useDeleteAiMcpServerMutation,
useTestAiMcpServerMutation,
useUpdateAiMcpServerMutation, useUpdateAiMcpServerMutation,
} from "@/features/workspace/queries/ai-mcp-server-query.ts"; } from "@/features/workspace/queries/ai-mcp-server-query.ts";
import { IAiMcpServer } from "@/features/workspace/services/ai-mcp-server-service.ts"; import { IAiMcpServer } from "@/features/workspace/services/ai-mcp-server-service.ts";
import { mcpTestButtonView } from "@/features/workspace/components/settings/components/ai-mcp-server-test-view.ts";
import AiMcpServerForm from "./ai-mcp-server-form.tsx"; import AiMcpServerForm from "./ai-mcp-server-form.tsx";
/** /**
@@ -112,55 +122,15 @@ export default function AiMcpServers() {
<Stack gap="xs" mt="sm"> <Stack gap="xs" mt="sm">
{servers?.map((server) => ( {servers?.map((server) => (
<Group key={server.id} justify="space-between" wrap="nowrap"> <AiMcpServerRow
<Stack gap={2} style={{ minWidth: 0 }}> key={server.id}
<Group gap="xs"> server={server}
<Text fw={500} truncate> onEdit={openEdit}
{server.name} onDelete={confirmDelete}
</Text> onToggleEnabled={(enabled) =>
<Badge size="xs" variant="light"> updateMutation.mutate({ id: server.id, enabled })
{server.transport.toUpperCase()} }
</Badge> />
</Group>
<Text
size="xs"
c="dimmed"
truncate
style={{ fontFamily: "ui-monospace, Menlo, monospace" }}
>
{server.url}
</Text>
</Stack>
<Group gap="xs" wrap="nowrap">
<Switch
size="sm"
checked={server.enabled}
aria-label={t("Enabled")}
onChange={(event) =>
updateMutation.mutate({
id: server.id,
enabled: event.currentTarget.checked,
})
}
/>
<ActionIcon
variant="subtle"
aria-label={t("Edit")}
onClick={() => openEdit(server)}
>
<IconPencil size={16} />
</ActionIcon>
<ActionIcon
variant="subtle"
color="red"
aria-label={t("Delete")}
onClick={() => confirmDelete(server)}
>
<IconTrash size={16} />
</ActionIcon>
</Group>
</Group>
))} ))}
</Stack> </Stack>
@@ -180,3 +150,127 @@ export default function AiMcpServers() {
</Paper> </Paper>
); );
} }
interface AiMcpServerRowProps {
server: IAiMcpServer;
onEdit: (server: IAiMcpServer) => void;
onDelete: (server: IAiMcpServer) => void;
onToggleEnabled: (enabled: boolean) => void;
}
/**
* A single external MCP server row: name/badge/url on the left and the
* Test / Switch / Edit / Delete controls on the right. Each row owns its own
* `useTestAiMcpServerMutation()` so the inline Test result and loading state are
* independent per row (a shared mutation would make `isPending` global and make
* every row flicker).
*/
function AiMcpServerRow({
server,
onEdit,
onDelete,
onToggleEnabled,
}: AiMcpServerRowProps) {
const { t } = useTranslation();
const testMutation = useTestAiMcpServerMutation();
const result = testMutation.data;
// The row is keyed by `server.id`, so editing the connection-relevant fields
// (url/transport/headers) does NOT remount it — an old success/failure result
// would otherwise stick. Clear the result when those fields change.
useEffect(() => {
testMutation.reset();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [server.url, server.transport, server.hasHeaders]);
// Single derivation of the button/tooltip presentation from the test tristate
// (idle / ok / failed), so the two can never drift apart. Tooltip is "" while
// there is no result; the icon is mapped from `view.state` below. When the
// request itself rejects (401/403/500/network) there is no `data` payload, so
// we feed the mutation error in too — otherwise the row would silently revert
// to "Test" instead of showing a red "Failed".
const view = mcpTestButtonView(
result,
t,
testMutation.isError ? testMutation.error : undefined,
);
const tooltipLabel = view.tooltip;
const buttonColor = view.color;
const buttonVariant = view.variant;
const buttonLabel = view.label;
const buttonIcon =
view.state === "ok" ? (
<IconCheck size={16} />
) : view.state === "failed" ? (
<IconX size={16} />
) : (
<IconPlugConnected size={16} />
);
return (
<Group justify="space-between" wrap="nowrap">
<Stack gap={2} style={{ minWidth: 0 }}>
<Group gap="xs">
<Text fw={500} truncate>
{server.name}
</Text>
<Badge size="xs" variant="light">
{server.transport.toUpperCase()}
</Badge>
</Group>
<Text
size="xs"
c="dimmed"
truncate
style={{ fontFamily: "ui-monospace, Menlo, monospace" }}
>
{server.url}
</Text>
</Stack>
<Group gap="xs" wrap="nowrap">
{/* Always clickable: testing a disabled server before enabling it is useful. */}
<Tooltip
label={tooltipLabel}
disabled={view.state === "idle"}
multiline
maw={320}
withinPortal
>
<Button
size="xs"
miw={88}
color={buttonColor}
variant={buttonVariant}
leftSection={testMutation.isPending ? undefined : buttonIcon}
loading={testMutation.isPending}
onClick={() => testMutation.mutate(server.id)}
>
{buttonLabel}
</Button>
</Tooltip>
<Switch
size="sm"
checked={server.enabled}
aria-label={t("Enabled")}
onChange={(event) => onToggleEnabled(event.currentTarget.checked)}
/>
<ActionIcon
variant="subtle"
aria-label={t("Edit")}
onClick={() => onEdit(server)}
>
<IconPencil size={16} />
</ActionIcon>
<ActionIcon
variant="subtle"
color="red"
aria-label={t("Delete")}
onClick={() => onDelete(server)}
>
<IconTrash size={16} />
</ActionIcon>
</Group>
</Group>
);
}

View File

@@ -7,6 +7,7 @@ import {
Button, Button,
Group, Group,
Modal, Modal,
NumberInput,
Paper, Paper,
PasswordInput, PasswordInput,
Select, Select,
@@ -83,6 +84,9 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
// (empty means "leave unchanged" unless explicitly cleared). // (empty means "leave unchanged" unless explicitly cleared).
const formSchema = z.object({ const formSchema = z.object({
chatModel: z.string(), chatModel: z.string(),
// Max context window in tokens shown in the chat header badge. A number, or ""
// when the NumberInput is empty (no limit).
chatContextWindow: z.union([z.number(), z.literal("")]),
// Chat provider implementation (reasoning surfacing). Default openai-compatible. // Chat provider implementation (reasoning surfacing). Default openai-compatible.
chatApiStyle: z.enum(["openai-compatible", "openai"]), chatApiStyle: z.enum(["openai-compatible", "openai"]),
// Cheap model id for the anonymous public-share assistant; empty = use chatModel. // Cheap model id for the anonymous public-share assistant; empty = use chatModel.
@@ -311,6 +315,7 @@ export default function AiProviderSettings() {
validate: zod4Resolver(formSchema), validate: zod4Resolver(formSchema),
initialValues: { initialValues: {
chatModel: "", chatModel: "",
chatContextWindow: "",
chatApiStyle: "openai-compatible" as ChatApiStyle, chatApiStyle: "openai-compatible" as ChatApiStyle,
publicShareChatModel: "", publicShareChatModel: "",
publicShareAssistantRoleId: "", publicShareAssistantRoleId: "",
@@ -334,6 +339,7 @@ export default function AiProviderSettings() {
if (!settings) return; if (!settings) return;
form.setValues({ form.setValues({
chatModel: settings.chatModel ?? "", chatModel: settings.chatModel ?? "",
chatContextWindow: settings.chatContextWindow ?? "",
chatApiStyle: settings.chatApiStyle ?? "openai-compatible", chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
publicShareChatModel: settings.publicShareChatModel ?? "", publicShareChatModel: settings.publicShareChatModel ?? "",
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "", publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
@@ -364,6 +370,12 @@ export default function AiProviderSettings() {
// Everything is OpenAI-compatible. // Everything is OpenAI-compatible.
driver: "openai", driver: "openai",
chatModel: values.chatModel, chatModel: values.chatModel,
// Max context window for the chat header badge; empty NumberInput ("") →
// 0, which clears the limit server-side (no denominator shown).
chatContextWindow:
typeof values.chatContextWindow === "number"
? values.chatContextWindow
: 0,
chatApiStyle: values.chatApiStyle, chatApiStyle: values.chatApiStyle,
// Cheap model id for the anonymous public-share assistant; empty falls // Cheap model id for the anonymous public-share assistant; empty falls
// back to chatModel server-side. // back to chatModel server-side.
@@ -767,6 +779,18 @@ export default function AiProviderSettings() {
{t("Resolves to {{url}}", { url: chatResolved })} {t("Resolves to {{url}}", { url: chatResolved })}
</Text> </Text>
<NumberInput
mt="sm"
label={t("Context window (tokens)")}
description={t(
"Shown as used / total in the chat header. Leave empty to hide the limit.",
)}
min={0}
allowDecimal={false}
disabled={isLoading}
{...form.getInputProps("chatContextWindow")}
/>
<Select <Select
mt="sm" mt="sm"
label={t("Protocol")} label={t("Protocol")}

View File

@@ -22,6 +22,8 @@ export type ChatApiStyle = "openai-compatible" | "openai";
export interface IAiSettings { export interface IAiSettings {
driver?: AiDriver; driver?: AiDriver;
chatModel?: string; chatModel?: string;
// Max context window in tokens shown in the chat header badge; 0/unset = no limit.
chatContextWindow?: number;
chatApiStyle?: ChatApiStyle; chatApiStyle?: ChatApiStyle;
// Cheap model id for the anonymous public-share assistant; empty = chatModel. // Cheap model id for the anonymous public-share assistant; empty = chatModel.
publicShareChatModel?: string; publicShareChatModel?: string;
@@ -56,6 +58,8 @@ export interface IAiSettings {
export interface IAiSettingsUpdate { export interface IAiSettingsUpdate {
driver?: AiDriver; driver?: AiDriver;
chatModel?: string; chatModel?: string;
// Max context window in tokens for the chat header badge; 0 = clear the limit.
chatContextWindow?: number;
chatApiStyle?: ChatApiStyle; chatApiStyle?: ChatApiStyle;
publicShareChatModel?: string; publicShareChatModel?: string;
// Agent-role id whose persona the public-share assistant adopts; empty = // Agent-role id whose persona the public-share assistant adopts; empty =

View File

@@ -275,11 +275,12 @@ describe('flushAssistant', () => {
expect(f.toolCalls).not.toBeNull(); expect(f.toolCalls).not.toBeNull();
}); });
it('completed: attaches finishReason + normalized usage + contextTokens', () => { it('completed: attaches finishReason + normalized usage + contextTokens + maxContextTokens', () => {
const f = flushAssistant([toolStep], '', 'completed', { const f = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop', finishReason: 'stop',
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
contextTokens: 15, contextTokens: 15,
maxContextTokens: 200000,
}); });
expect(f.status).toBe('completed'); expect(f.status).toBe('completed');
expect(f.metadata.finishReason).toBe('stop'); expect(f.metadata.finishReason).toBe('stop');
@@ -290,6 +291,23 @@ describe('flushAssistant', () => {
reasoningTokens: undefined, reasoningTokens: undefined,
}); });
expect(f.metadata.contextTokens).toBe(15); expect(f.metadata.contextTokens).toBe(15);
expect(f.metadata.maxContextTokens).toBe(200000);
});
it('completed: omits maxContextTokens when unset or 0', () => {
// No maxContextTokens in the extra (admin set no context window).
const f = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop',
contextTokens: 15,
});
expect('maxContextTokens' in f.metadata).toBe(false);
// Explicit 0 is treated the same as unset (no limit -> key omitted).
const f0 = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop',
contextTokens: 15,
maxContextTokens: 0,
});
expect('maxContextTokens' in f0.metadata).toBe(false);
}); });
it('error: records the error and a derived finishReason', () => { it('error: records the error and a derived finishReason', () => {

View File

@@ -616,6 +616,10 @@ export class AiChatService implements OnModuleInit {
contextTokens: contextTokens:
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
undefined, undefined,
// Max context window for the chat header badge denominator;
// resolved from the admin-configured provider settings (in
// closure scope here). Omitted/0 = no limit.
maxContextTokens: resolved?.chatContextWindow,
}), }),
); );
// Lifecycle: release the external MCP clients leased for this turn. // Lifecycle: release the external MCP clients leased for this turn.
@@ -1212,8 +1216,9 @@ export async function applyFinalize(
* `metadata.parts` is built by assistantParts over the finished steps, then the * `metadata.parts` is built by assistantParts over the finished steps, then the
* in-progress text appended as a trailing text part, so rowToUiMessage / * in-progress text appended as a trailing text part, so rowToUiMessage /
* findRecent keep replaying the turn unchanged. `metadata.finishReason`, * findRecent keep replaying the turn unchanged. `metadata.finishReason`,
* `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached * `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
* only when provided/relevant, matching the pre-#183 onFinish/onError records. * `metadata.maxContextTokens` are attached only when provided/relevant, matching
* the pre-#183 onFinish/onError records.
*/ */
export function flushAssistant( export function flushAssistant(
capturedSteps: ReadonlyArray<StepLike> | undefined, capturedSteps: ReadonlyArray<StepLike> | undefined,
@@ -1223,6 +1228,7 @@ export function flushAssistant(
finishReason?: string; finishReason?: string;
usage?: ChatStreamUsage | StreamUsage | undefined; usage?: ChatStreamUsage | StreamUsage | undefined;
contextTokens?: number; contextTokens?: number;
maxContextTokens?: number;
error?: string; error?: string;
}, },
): AssistantFlush { ): AssistantFlush {
@@ -1253,6 +1259,8 @@ export function flushAssistant(
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage; normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
} }
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens; if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
if (extra?.maxContextTokens)
metadata.maxContextTokens = extra.maxContextTokens;
if (extra?.error) metadata.error = extra.error; if (extra?.error) metadata.error = extra.error;
return { return {

View File

@@ -20,6 +20,7 @@ import { DB, Workspaces } from '@docmost/db/types/db';
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [ export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
'driver', 'driver',
'chatModel', 'chatModel',
'chatContextWindow',
'chatApiStyle', 'chatApiStyle',
'embeddingModel', 'embeddingModel',
'baseUrl', 'baseUrl',

View File

@@ -41,3 +41,35 @@ describe('UpdateAiSettingsDto.chatApiStyle', () => {
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined(); expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
}); });
}); });
/** DTO validation for the new chatContextWindow field (@IsInt @Min(0)). */
describe('UpdateAiSettingsDto.chatContextWindow', () => {
const errorsFor = async (chatContextWindow: unknown) =>
validate(plainToInstance(UpdateAiSettingsDto, { chatContextWindow }));
it('accepts a non-negative integer (incl. 0 = clear the limit)', async () => {
for (const v of [0, 200000]) {
const errs = await errorsFor(v);
expect(
errs.find((e) => e.property === 'chatContextWindow'),
).toBeUndefined();
}
});
it('rejects a negative value', async () => {
const errs = await errorsFor(-1);
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
});
it('rejects a non-integer value', async () => {
const errs = await errorsFor(1.5);
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
});
it('accepts the field being omitted (optional)', async () => {
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
expect(
errs.find((e) => e.property === 'chatContextWindow'),
).toBeUndefined();
});
});

View File

@@ -0,0 +1,43 @@
import { parsePositiveInt } from './ai-settings.service';
/**
* Round-trip coercion for numeric `::text` provider settings (e.g.
* chatContextWindow). Values are stored as text and read back as strings, so
* this guards the read path the DTO write-validation does not cover: a silent
* loss of `Math.floor` or a `> 0` → `>= 0` drift would otherwise go unnoticed.
*/
describe('parsePositiveInt', () => {
it('keeps a valid positive integer string', () => {
expect(parsePositiveInt('200000')).toBe(200000);
});
it('floors a fractional string', () => {
expect(parsePositiveInt('1.9')).toBe(1);
expect(parsePositiveInt('1.0')).toBe(1);
});
it('returns undefined for zero', () => {
expect(parsePositiveInt('0')).toBeUndefined();
});
it('returns undefined for a negative value', () => {
expect(parsePositiveInt('-5')).toBeUndefined();
});
it('returns undefined for an empty string', () => {
expect(parsePositiveInt('')).toBeUndefined();
});
it('returns undefined for a non-numeric string', () => {
expect(parsePositiveInt('abc')).toBeUndefined();
});
it('returns undefined for undefined / null', () => {
expect(parsePositiveInt(undefined)).toBeUndefined();
expect(parsePositiveInt(null)).toBeUndefined();
});
it('accepts a real number too (not only ::text strings)', () => {
expect(parsePositiveInt(42)).toBe(42);
});
});

View File

@@ -18,6 +18,18 @@ import {
PROVIDER_SETTINGS_KEYS, PROVIDER_SETTINGS_KEYS,
} from './ai.types'; } from './ai.types';
/**
* Coerce a raw provider value (stored as `::text`, so it arrives as a string —
* see workspace.repo.ts) into a positive integer, or `undefined` when it is not
* a finite number greater than zero. Used for numeric `::text` settings such as
* `chatContextWindow`. Fractions are floored: `"1.9" → 1`, `"0"`/`"-5"`/`""`/
* `"abc"`/`undefined` → `undefined`.
*/
export function parsePositiveInt(raw: unknown): number | undefined {
const n = Number(raw);
return Number.isFinite(n) && n > 0 ? Math.floor(n) : undefined;
}
/** /**
* Shape of the partial update accepted by `update`. Mirrors the validated * Shape of the partial update accepted by `update`. Mirrors the validated
* controller DTO. `apiKey` / `embeddingApiKey` are write-only: undefined = * controller DTO. `apiKey` / `embeddingApiKey` are write-only: undefined =
@@ -26,6 +38,8 @@ import {
export interface UpdateAiSettingsInput { export interface UpdateAiSettingsInput {
driver?: AiDriver; driver?: AiDriver;
chatModel?: string; chatModel?: string;
// Max context window in tokens for the chat header badge. 0/empty = no limit.
chatContextWindow?: number;
chatApiStyle?: ChatApiStyle; chatApiStyle?: ChatApiStyle;
embeddingModel?: string; embeddingModel?: string;
baseUrl?: string; baseUrl?: string;
@@ -160,6 +174,9 @@ export class AiSettingsService {
const config: ResolvedAiConfig = { const config: ResolvedAiConfig = {
driver: provider.driver, driver: provider.driver,
chatModel: provider.chatModel, chatModel: provider.chatModel,
// Max context window for the chat header badge denominator. Stored as
// ::text; 0/unset/invalid = no limit (undefined).
chatContextWindow: parsePositiveInt(provider.chatContextWindow),
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'. // Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
chatApiStyle: provider.chatApiStyle, chatApiStyle: provider.chatApiStyle,
// Cheap model id for the anonymous public-share assistant; reuses the chat // Cheap model id for the anonymous public-share assistant; reuses the chat
@@ -219,6 +236,10 @@ export class AiSettingsService {
async getMasked(workspaceId: string): Promise<MaskedAiSettings> { async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
const provider = await this.readProvider(workspaceId); const provider = await this.readProvider(workspaceId);
// Stored as ::text; coerce to a positive integer (or undefined) so the
// client receives a real number.
const chatContextWindow = parsePositiveInt(provider.chatContextWindow);
let hasApiKey = false; let hasApiKey = false;
let hasEmbeddingApiKey = false; let hasEmbeddingApiKey = false;
let hasSttApiKey = false; let hasSttApiKey = false;
@@ -243,6 +264,7 @@ export class AiSettingsService {
return { return {
driver: provider.driver, driver: provider.driver,
chatModel: provider.chatModel, chatModel: provider.chatModel,
chatContextWindow,
chatApiStyle: provider.chatApiStyle, chatApiStyle: provider.chatApiStyle,
embeddingModel: provider.embeddingModel, embeddingModel: provider.embeddingModel,
baseUrl: provider.baseUrl, baseUrl: provider.baseUrl,

View File

@@ -32,6 +32,9 @@ export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
export interface AiProviderSettings { export interface AiProviderSettings {
driver: AiDriver; driver: AiDriver;
chatModel: string; chatModel: string;
// Max context window in tokens; surfaced to the chat header badge as the
// denominator ("current / max"). 0/unset = no limit (badge shows no denominator).
chatContextWindow?: number;
// Chat provider implementation for the `openai` driver. Unset → defaults to // Chat provider implementation for the `openai` driver. Unset → defaults to
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle. // 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
chatApiStyle?: ChatApiStyle; chatApiStyle?: ChatApiStyle;
@@ -72,6 +75,7 @@ export interface AiProviderSettings {
export const PROVIDER_SETTINGS_KEYS = [ export const PROVIDER_SETTINGS_KEYS = [
'driver', 'driver',
'chatModel', 'chatModel',
'chatContextWindow',
'chatApiStyle', 'chatApiStyle',
'embeddingModel', 'embeddingModel',
'baseUrl', 'baseUrl',
@@ -98,6 +102,9 @@ export const PROVIDER_SETTINGS_KEYS = [
export interface ResolvedAiConfig extends Partial<AiProviderSettings> { export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
driver?: AiDriver; driver?: AiDriver;
chatModel?: string; chatModel?: string;
// Max context window in tokens; surfaced to the chat header badge as the
// "current / max" denominator. 0/unset = no limit.
chatContextWindow?: number;
// Cheap model id for the public-share assistant; reuses the chat creds. // Cheap model id for the public-share assistant; reuses the chat creds.
publicShareChatModel?: string; publicShareChatModel?: string;
// Agent-role id whose persona the public-share assistant adopts (empty/unset // Agent-role id whose persona the public-share assistant adopts (empty/unset
@@ -116,6 +123,9 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
export interface MaskedAiSettings { export interface MaskedAiSettings {
driver?: AiDriver; driver?: AiDriver;
chatModel?: string; chatModel?: string;
// Max context window in tokens; the chat header badge denominator. 0/unset =
// no limit.
chatContextWindow?: number;
chatApiStyle?: ChatApiStyle; chatApiStyle?: ChatApiStyle;
embeddingModel?: string; embeddingModel?: string;
baseUrl?: string; baseUrl?: string;

View File

@@ -1,4 +1,4 @@
import { IsIn, IsOptional, IsString } from 'class-validator'; import { IsIn, IsInt, IsOptional, IsString, Min } from 'class-validator';
import { import {
AI_DRIVERS, AI_DRIVERS,
AiDriver, AiDriver,
@@ -25,6 +25,13 @@ export class UpdateAiSettingsDto {
@IsString() @IsString()
chatModel?: string; chatModel?: string;
// Max context window in tokens shown in the chat header badge. 0/empty =
// clear the limit (no denominator shown).
@IsOptional()
@IsInt()
@Min(0)
chatContextWindow?: number;
@IsOptional() @IsOptional()
@IsIn(CHAT_API_STYLES) @IsIn(CHAT_API_STYLES)
chatApiStyle?: ChatApiStyle; chatApiStyle?: ChatApiStyle;