From 993f884e64108347a495a956fb350a4dd8b4f37d Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 22:22:48 +0300 Subject: [PATCH 1/5] ci(develop): run server + mcp e2e on every develop push without blocking deploy (#187) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two independent jobs to develop.yml — e2e-server and e2e-mcp — that run on each push to develop alongside test/build. `build` stays `needs: test` only, so a failing e2e never blocks the :develop image build/publish; the red run plus GitHub's email to the pusher is the notification. - e2e-server: pgvector + redis services, migrations, apps/server test:e2e. - e2e-mcp: build editor-ext/server/mcp, migrate, start the prod server (REST + /collab in one process), wait for /api/health, seed the admin via /api/auth/setup, then run @docmost/mcp test:e2e. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/develop.yml | 150 ++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 2d81467c..957def23 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -56,3 +56,153 @@ jobs: tags: ${{ env.IMAGE }}:develop cache-from: type=gha,scope=develop-amd64 cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true + + # e2e jobs run on every develop push but DO NOT gate the build/publish above: + # `build` stays `needs: test` only, so the :develop image still ships even if + # e2e fails. A failing e2e job turns the run red and triggers GitHub's email + # to the pusher — that red run + email is the intended notification, not a + # deploy block. + e2e-server: + runs-on: ubuntu-latest + env: + DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost + REDIS_URL: redis://localhost:6379 + APP_SECRET: ci-e2e-secret-change-me-min-32-characters + APP_URL: http://localhost:3000 + services: + postgres: + image: pgvector/pgvector:pg18 + env: + POSTGRES_DB: docmost + POSTGRES_USER: docmost + POSTGRES_PASSWORD: docmost + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U docmost" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build editor-ext + run: pnpm --filter @docmost/editor-ext build + + - name: Run migrations + run: pnpm --filter ./apps/server migration:latest + + - name: Run server e2e + run: pnpm --filter ./apps/server test:e2e + + # Same rationale as e2e-server: this job is intentionally NOT in + # `build.needs`. Deploy of the :develop image must not be blocked by e2e; + # a red run plus GitHub's email to the pusher is the notification mechanism. + e2e-mcp: + runs-on: ubuntu-latest + env: + DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost + REDIS_URL: redis://localhost:6379 + APP_SECRET: ci-e2e-secret-change-me-min-32-characters + APP_URL: http://localhost:3000 + NODE_ENV: production + services: + postgres: + image: pgvector/pgvector:pg18 + env: + POSTGRES_DB: docmost + POSTGRES_USER: docmost + POSTGRES_PASSWORD: docmost + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U docmost" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build editor-ext + run: pnpm --filter @docmost/editor-ext build + + - name: Build server + run: pnpm server:build + + - name: Build mcp + run: pnpm --filter @docmost/mcp build + + - name: Run migrations + run: pnpm --filter ./apps/server migration:latest + + - name: Start server (prod) + run: pnpm --filter ./apps/server start:prod & + + - name: Wait for server health + run: | + for i in $(seq 1 60); do + if curl -fsS http://localhost:3000/api/health > /dev/null; then + echo "Server is healthy" + exit 0 + fi + sleep 2 + done + echo "Server did not become healthy in time" + exit 1 + + - name: Seed admin + run: | + curl -fsS -X POST http://localhost:3000/api/auth/setup \ + -H "Content-Type: application/json" \ + -d '{"name":"E2E","email":"e2e@example.com","password":"E2ePassword123","workspaceName":"E2E"}' + + - name: Run mcp e2e + env: + DOCMOST_API_URL: http://localhost:3000/api + DOCMOST_EMAIL: e2e@example.com + DOCMOST_PASSWORD: E2ePassword123 + run: pnpm --filter @docmost/mcp test:e2e From 2644fe6a831bb0bb245e75d5eb106527b4af342c Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 22:22:48 +0300 Subject: [PATCH 2/5] feat(ai-chat): inline Test button per external MCP server row (#170) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a per-row Test button to the external MCP servers list that shows the connection result inline (no toasts). Extract the row into AiMcpServerRow so each row owns its own useTestAiMcpServerMutation instance — independent loading and result, no cross-row flicker. States: idle (Test), pending (loading), success (green, "OK · N" with the tool count), failure (red, "Failed"); a tooltip shows the tool list or the error. The result resets when url/transport/headers change (the row is keyed by id, so it does not remount). Backend, service and mutation are unchanged. - ai-mcp-servers.tsx: AiMcpServerRow + Test button + reset effect + tooltip. - i18n: add Failed / "OK · {{n}}" (en, ru) and ru Test / tool-list keys. Co-Authored-By: Claude Opus 4.8 --- .../public/locales/en-US/translation.json | 2 + .../public/locales/ru-RU/translation.json | 5 + .../settings/components/ai-mcp-servers.tsx | 200 +++++++++++++----- 3 files changed, 156 insertions(+), 51 deletions(-) diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index bd8c4ed3..971cbc11 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -715,6 +715,8 @@ "Test": "Test", "Available tools": "Available tools", "No tools available": "No tools available", + "Failed": "Failed", + "OK · {{n}}": "OK · {{n}}", "Created successfully": "Created successfully", "Deleted successfully": "Deleted successfully", "Clear": "Clear", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index f8c59436..349dc227 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -711,6 +711,11 @@ "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", "Failed to delete chat": "Не удалось удалить чат", "Failed to rename chat": "Не удалось переименовать чат", + "Failed": "Ошибка", + "OK · {{n}}": "OK · {{n}}", + "Test": "Тест", + "No tools available": "Инструменты недоступны", + "Available tools": "Доступные инструменты", "Minimize": "Свернуть", "No chats yet.": "Чатов пока нет.", "Send": "Отправить", diff --git a/apps/client/src/features/workspace/components/settings/components/ai-mcp-servers.tsx b/apps/client/src/features/workspace/components/settings/components/ai-mcp-servers.tsx index 15db8c22..5dabd174 100644 --- a/apps/client/src/features/workspace/components/settings/components/ai-mcp-servers.tsx +++ b/apps/client/src/features/workspace/components/settings/components/ai-mcp-servers.tsx @@ -1,4 +1,4 @@ -import { useState } from "react"; +import { useEffect, useState } from "react"; import { ActionIcon, Badge, @@ -10,15 +10,24 @@ import { Stack, Switch, Text, + Tooltip, } from "@mantine/core"; import { useDisclosure } from "@mantine/hooks"; import { modals } from "@mantine/modals"; -import { IconPencil, IconPlus, IconTrash } from "@tabler/icons-react"; +import { + IconCheck, + IconPencil, + IconPlugConnected, + IconPlus, + IconTrash, + IconX, +} from "@tabler/icons-react"; import { useTranslation } from "react-i18next"; import useUserRole from "@/hooks/use-user-role.tsx"; import { useAiMcpServersQuery, useDeleteAiMcpServerMutation, + useTestAiMcpServerMutation, useUpdateAiMcpServerMutation, } from "@/features/workspace/queries/ai-mcp-server-query.ts"; import { IAiMcpServer } from "@/features/workspace/services/ai-mcp-server-service.ts"; @@ -112,55 +121,15 @@ export default function AiMcpServers() { {servers?.map((server) => ( - - - - - {server.name} - - - {server.transport.toUpperCase()} - - - - {server.url} - - - - - - updateMutation.mutate({ - id: server.id, - enabled: event.currentTarget.checked, - }) - } - /> - openEdit(server)} - > - - - confirmDelete(server)} - > - - - - + + updateMutation.mutate({ id: server.id, enabled }) + } + /> ))} @@ -180,3 +149,132 @@ export default function AiMcpServers() { ); } + +interface AiMcpServerRowProps { + server: IAiMcpServer; + onEdit: (server: IAiMcpServer) => void; + onDelete: (server: IAiMcpServer) => void; + onToggleEnabled: (enabled: boolean) => void; +} + +/** + * A single external MCP server row: name/badge/url on the left and the + * Test / Switch / Edit / Delete controls on the right. Each row owns its own + * `useTestAiMcpServerMutation()` so the inline Test result and loading state are + * independent per row (a shared mutation would make `isPending` global and make + * every row flicker). + */ +function AiMcpServerRow({ + server, + onEdit, + onDelete, + onToggleEnabled, +}: AiMcpServerRowProps) { + const { t } = useTranslation(); + const testMutation = useTestAiMcpServerMutation(); + const result = testMutation.data; + + // The row is keyed by `server.id`, so editing the connection-relevant fields + // (url/transport/headers) does NOT remount it — an old success/failure result + // would otherwise stick. Clear the result when those fields change. + useEffect(() => { + testMutation.reset(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [server.url, server.transport, server.hasHeaders]); + + // Tooltip text describes the cause/details; disabled while there is no result. + let tooltipLabel = ""; + if (result?.ok) { + tooltipLabel = + result.tools.length > 0 + ? result.tools.join(", ") + : t("No tools available"); + } else if (result && result.ok === false) { + tooltipLabel = result.error; + } + + // Pick the button presentation from the current test state. Color is never the + // only signal — the label changes too (a11y / colorblind-friendly). + let buttonColor: string | undefined; + let buttonVariant = "default"; + let buttonIcon = ; + let buttonLabel = t("Test"); + if (result?.ok) { + buttonColor = "green"; + buttonVariant = "light"; + buttonIcon = ; + buttonLabel = t("OK · {{n}}", { n: result.tools.length }); + } else if (result && result.ok === false) { + buttonColor = "red"; + buttonVariant = "light"; + buttonIcon = ; + buttonLabel = t("Failed"); + } + + return ( + + + + + {server.name} + + + {server.transport.toUpperCase()} + + + + {server.url} + + + + + {/* Always clickable: testing a disabled server before enabling it is useful. */} + + + + onToggleEnabled(event.currentTarget.checked)} + /> + onEdit(server)} + > + + + onDelete(server)} + > + + + + + ); +} From 9b61024b9575a818102e86264acf700d7173c48a Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 22:39:09 +0300 Subject: [PATCH 3/5] feat(ai-chat): header badge shows current/max context, max from AI settings (#189) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The floating chat window's header badge flipped meaning — a live per-turn token counter while streaming, the persisted context size at rest — so it "reset to 1" on each prompt and conflated two different numbers. Replace it with a stable "current / max" context badge (e.g. `572 / 200k`). The live "Thinking · N tokens" inside the chat body stays; only the duplicate live counter is removed from the header. Max comes from a new admin setting "Context window (tokens)". The server resolves it and attaches `maxContextTokens` to the completed assistant turn's metadata (next to contextTokens), so the badge needs no client-side model resolution and this survives public shares / per-role models. Server: - ai.types: chatContextWindow on AiProviderSettings + PROVIDER_SETTINGS_KEYS + ResolvedAiConfig + MaskedAiSettings. - workspace.repo: chatContextWindow in AI_PROVIDER_SETTINGS_ALLOWED (parity). - update-ai-settings.dto: @IsInt @Min(0) chatContextWindow. - ai-settings.service: coerce the ::text-stored value to a positive int in resolve()/getMasked(). - ai-chat.service: flushAssistant writes metadata.maxContextTokens (>0); the completed turn passes resolved.chatContextWindow. Client: - ai-chat.types: maxContextTokens on the message-row metadata. - ai-chat-window: read maxContextTokens; render "current [/ max]"; drop the liveTurnTokens state/branch and the onLiveTurnTokens prop; new tooltip. - chat-thread: remove the live-turn-token throttle effect and plumbing. - count-stream-tokens: drop the now-dead liveTurnTokens()/types; keep estimateTokens. - settings: chatContextWindow on IAiSettings(+Update) + a NumberInput in the AI provider settings form. i18n: add the badge/settings keys (en, ru); remove the two now-unused keys. Tests: flushAssistant maxContextTokens, DTO validation, trim token tests. Co-Authored-By: Claude Opus 4.8 --- .../public/locales/en-US/translation.json | 5 +- .../public/locales/ru-RU/translation.json | 5 +- .../ai-chat/components/ai-chat-window.tsx | 47 +++--- .../ai-chat/components/chat-thread.tsx | 55 ------ .../features/ai-chat/types/ai-chat.types.ts | 3 + .../ai-chat/utils/count-stream-tokens.test.ts | 158 +----------------- .../ai-chat/utils/count-stream-tokens.ts | 106 +----------- .../components/ai-provider-settings.tsx | 24 +++ .../workspace/services/ai-settings-service.ts | 4 + .../src/core/ai-chat/ai-chat.service.spec.ts | 20 ++- .../src/core/ai-chat/ai-chat.service.ts | 12 +- .../repos/workspace/workspace.repo.ts | 1 + .../ai/ai-provider-settings-keys.spec.ts | 32 ++++ .../integrations/ai/ai-settings.service.ts | 23 +++ apps/server/src/integrations/ai/ai.types.ts | 10 ++ .../ai/dto/update-ai-settings.dto.ts | 9 +- 16 files changed, 175 insertions(+), 339 deletions(-) diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index 971cbc11..ad884ddb 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -1169,8 +1169,9 @@ "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.", "Built-in assistant persona": "Built-in assistant persona", "Minimize": "Minimize", - "Current context size": "Current context size", - "Tokens generated this turn": "Tokens generated this turn", + "Context size / model limit": "Context size / model limit", + "Context window (tokens)": "Context window (tokens)", + "Shown as used / total in the chat header. Leave empty to hide the limit.": "Shown as used / total in the chat header. Leave empty to hide the limit.", "AI agent": "AI agent", "Take a look at the current document": "Take a look at the current document", "AI agent is typing…": "AI agent is typing…", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index 349dc227..c6cb7c6a 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -704,8 +704,9 @@ "Ask the AI agent…": "Спросите AI-агента…", "Copy chat": "Копировать чат", "Created successfully": "Успешно создано", - "Current context size": "Текущий размер контекста", - "Tokens generated this turn": "Токенов сгенерировано за ход", + "Context size / model limit": "Размер контекста / лимит модели", + "Context window (tokens)": "Окно контекста (токены)", + "Shown as used / total in the chat header. Leave empty to hide the limit.": "Показывается в шапке чата как использовано / всего. Пусто — лимит скрыт.", "Delete this chat?": "Удалить этот чат?", "Deleted successfully": "Успешно удалено", "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index de0b9923..abd38952 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -161,12 +161,6 @@ export default function AiChatWindow() { const { data: messageRows, isLoading: messagesLoading } = useAiChatMessagesQuery(activeChatId ?? undefined); - // Live turn-token total (reasoning + output) for the in-flight turn, pushed up - // (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream. - // `null` means no turn is in flight -> the badge falls back to the persisted - // context size below. - const [liveTurnTokens, setLiveTurnTokens] = useState(null); - // The page the user is currently viewing. AiChatWindow lives in a pathless // parent layout route, so useParams() can't see :pageSlug. Match the full // pathname against the authenticated page route instead so "the current page" @@ -306,6 +300,25 @@ export default function AiChatWindow() { return 0; }, [activeChatId, messageRows]); + // The model's max context window (badge denominator). Read the most recent row + // carrying `maxContextTokens` (set alongside contextTokens on a completed + // turn); 0 when no row has it (older rows, or no admin-configured limit) — the + // badge then shows just the current size with no denominator. + const maxContextTokens = useMemo(() => { + if (!activeChatId || !messageRows) return 0; + for (let i = messageRows.length - 1; i >= 0; i--) { + const meta = messageRows[i].metadata; + if (!meta) continue; + if ( + typeof meta.maxContextTokens === "number" && + meta.maxContextTokens > 0 + ) { + return meta.maxContextTokens; + } + } + return 0; + }, [activeChatId, messageRows]); + // On (re)open, settle the geometry before paint (useLayoutEffect → no // first-frame jump): compute an initial top-right placement the first time, // and re-clamp an existing geometry to the current viewport on later opens @@ -495,20 +508,17 @@ export default function AiChatWindow() { )}
- {/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz); - once it finishes, fall back to the persisted context size. Require - > 0 so the very first emit (an empty tail message, count 0) does not - flash a "0" badge before any token streams in (#151 review). */} - {liveTurnTokens !== null && liveTurnTokens > 0 ? ( - - - {formatTokens(liveTurnTokens)} - - - ) : contextTokens > 0 ? ( - + {/* Always show the persisted "current / max" context. The denominator + (the admin-configured model limit) is appended only when known; + not clamped when current > max (shown as-is, e.g. "210k / 200k"). + Hidden entirely until a turn has recorded a context figure. */} + {contextTokens > 0 ? ( + {formatTokens(contextTokens)} + {maxContextTokens > 0 + ? ` / ${formatTokens(maxContextTokens)}` + : ""} ) : null} @@ -634,7 +644,6 @@ export default function AiChatWindow() { assistantName={currentRole?.name} onTurnFinished={onTurnFinished} onServerChatId={onServerChatId} - onLiveTurnTokens={setLiveTurnTokens} /> )}
diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index c906a940..14f9a2ad 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -20,7 +20,6 @@ import { } from "@/features/ai-chat/utils/role-launch.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts"; -import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { dequeue, enqueueMessage, @@ -67,12 +66,6 @@ interface ChatThreadProps { * Copy/export button available mid-stream). Distinct from onTurnFinished, * which fires only at the terminal outcome. */ onServerChatId?: (serverChatId?: string) => void; - /** Reports the live turn-token total (reasoning + output) for the in-flight - * turn so the parent can show a header badge that ticks mid-stream. THROTTLED - * here (~8 Hz) so the parent re-renders a handful of times a second, not on - * every streamed delta. Called with `null` when no turn is in flight (the - * parent then reverts the badge to the persisted context size). */ - onLiveTurnTokens?: (tokens: number | null) => void; } /** @@ -117,7 +110,6 @@ export default function ChatThread({ assistantName, onTurnFinished, onServerChatId, - onLiveTurnTokens, }: ChatThreadProps) { const { t } = useTranslation(); @@ -328,53 +320,6 @@ export default function ChatThread({ // the SAME on-screen banner text can be mirrored into the export (issue #160). const errorView = error ? describeChatError(error.message ?? "", t) : null; - // Report the live turn-token total to the parent header badge, THROTTLED to - // ~8 Hz so the parent re-renders a few times a second instead of on every - // streamed delta. The tail assistant message's reasoning+output (estimate while - // streaming, authoritative once a step reports usage) is the live figure. When - // the turn ends we emit a final exact value, then `null` so the parent reverts - // the badge to the persisted context size. - const lastEmitRef = useRef(0); - const emitTimerRef = useRef | null>(null); - useEffect(() => { - if (!onLiveTurnTokens) return; - if (!isStreaming) { - // Turn ended (or never started): clear any pending throttle and revert. - if (emitTimerRef.current) { - clearTimeout(emitTimerRef.current); - emitTimerRef.current = null; - } - lastEmitRef.current = 0; - onLiveTurnTokens(null); - return; - } - const tail = messages[messages.length - 1]; - const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null; - const total = live ? live.reasoning + live.output : 0; - const now = Date.now(); - const MIN_INTERVAL = 120; // ms (~8 Hz) - const elapsed = now - lastEmitRef.current; - if (elapsed >= MIN_INTERVAL) { - lastEmitRef.current = now; - onLiveTurnTokens(total); - } else if (!emitTimerRef.current) { - // Schedule a trailing emit so the FINAL value of a burst is not dropped. - emitTimerRef.current = setTimeout(() => { - emitTimerRef.current = null; - lastEmitRef.current = Date.now(); - onLiveTurnTokens(total); - }, MIN_INTERVAL - elapsed); - } - }, [messages, isStreaming, onLiveTurnTokens]); - - // Clear any pending throttle timer on unmount (chat switch via `key`) so a - // trailing emit can't fire into a torn-down thread's parent. - useEffect(() => { - return () => { - if (emitTimerRef.current) clearTimeout(emitTimerRef.current); - }; - }, []); - // A role was picked with autoStart=false: the role is bound but NOTHING was // sent, so chatId stays null and the empty state would keep showing the cards. // This flag hides the cards and reveals the composer (with the role indicated) diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts index af595917..22a51058 100644 --- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts +++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts @@ -116,6 +116,9 @@ export interface IAiChatMessageRow { // turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the // floating window's header badge. contextTokens?: number; + // The model's max context window (denominator for the header badge); set + // alongside contextTokens on a completed turn; absent on older rows. + maxContextTokens?: number; // Set on an assistant row whose turn ended in a provider/stream error; the // raw provider error text (e.g. "402: ...") for inline display in the thread. error?: string; diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts index 3e650f0d..6b00fbc4 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.test.ts @@ -1,17 +1,5 @@ import { describe, expect, it } from "vitest"; -import type { UIMessage } from "@ai-sdk/react"; -import { - estimateTokens, - liveTurnTokens, -} from "@/features/ai-chat/utils/count-stream-tokens.ts"; - -const msg = (parts: unknown[], metadata?: unknown): UIMessage => - ({ - id: Math.random().toString(), - role: "assistant", - parts, - metadata, - }) as UIMessage; +import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; describe("estimateTokens", () => { it("returns 0 for the empty string", () => { @@ -25,147 +13,3 @@ describe("estimateTokens", () => { expect(estimateTokens("12345678")).toBe(2); }); }); - -describe("liveTurnTokens — estimate path", () => { - it("is all zeros for an undefined message", () => { - expect(liveTurnTokens(undefined)).toEqual({ - reasoning: 0, - output: 0, - authoritative: false, - }); - }); - - it("is all zeros for a parts-less message", () => { - expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({ - reasoning: 0, - output: 0, - authoritative: false, - }); - }); - - it("estimates output from text parts", () => { - // 8 chars -> 2 tokens. - const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }])); - expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false }); - }); - - it("estimates reasoning from reasoning parts (kept separate from output)", () => { - const r = liveTurnTokens( - msg([ - { type: "reasoning", text: "12345678" }, - { type: "text", text: "abcd" }, - ]), - ); - expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false }); - }); - - it("accumulates across multiple text + reasoning parts (multi-step)", () => { - const r = liveTurnTokens( - msg([ - { type: "reasoning", text: "abcd" }, // 1 - { type: "text", text: "abcd" }, // 1 - { type: "tool-getPage", state: "output-available" }, // ignored - { type: "reasoning", text: "abcd" }, // 1 - { type: "text", text: "abcdefgh" }, // 2 - ]), - ); - expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false }); - }); - - it("ignores non text/reasoning parts (tools, step-start)", () => { - const r = liveTurnTokens( - msg([ - { type: "step-start" }, - { type: "tool-getPage", state: "input-available" }, - ]), - ); - expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false }); - }); -}); - -describe("liveTurnTokens — authoritative path", () => { - it("returns authoritative usage verbatim, splitting reasoning out of output", () => { - // outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30. - const r = liveTurnTokens( - msg([{ type: "text", text: "estimate would be tiny" }], { - usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, - }), - ); - expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); - }); - - it("treats missing reasoningTokens as 0 and keeps full output", () => { - const r = liveTurnTokens( - msg([{ type: "text", text: "x" }], { - usage: { inputTokens: 10, outputTokens: 42 }, - }), - ); - expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true }); - }); - - it("never returns a negative output when reasoning exceeds reported output", () => { - const r = liveTurnTokens( - msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }), - ); - expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true }); - }); - - it("falls back to the estimate when metadata has no usage object", () => { - const r = liveTurnTokens( - msg([{ type: "text", text: "abcd" }], { chatId: "c1" }), - ); - expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false }); - }); -}); - -describe("liveTurnTokens — combined authoritative + estimate (#163)", () => { - it("ticks the in-flight step above the completed-steps authoritative base", () => { - // The authoritative usage is the sum over COMPLETED steps (step 1). The - // CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in - // the parts -> the running estimate must push the live figure above the base - // so the badge keeps growing between step boundaries. - const longText = "x".repeat(800); // 800 chars -> 200 est output tokens - const r = liveTurnTokens( - msg([{ type: "text", text: longText }], { - usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output - }), - ); - // max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen. - expect(r.output).toBe(200); - expect(r.authoritative).toBe(true); - }); - - it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => { - const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning - const r = liveTurnTokens( - msg([{ type: "reasoning", text: longReasoning }], { - usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 }, - }), - ); - // reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0. - expect(r.reasoning).toBe(100); - expect(r.output).toBe(0); - expect(r.authoritative).toBe(true); - }); - - it("snaps to the authoritative figure once it exceeds the rough estimate", () => { - // Short on-screen text (estimate tiny) but a large authoritative output: - // the exact figure wins at the boundary (the counter never under-reports). - const r = liveTurnTokens( - msg([{ type: "text", text: "abcd" }], { - usage: { inputTokens: 10, outputTokens: 5000 }, - }), - ); - expect(r.output).toBe(5000); - }); - - it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => { - // Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged. - const r = liveTurnTokens( - msg([{ type: "text", text: "tiny" }], { - usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 }, - }), - ); - expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true }); - }); -}); diff --git a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts index 9a900996..aaf99599 100644 --- a/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts +++ b/apps/client/src/features/ai-chat/utils/count-stream-tokens.ts @@ -1,18 +1,11 @@ -import type { UIMessage } from "@ai-sdk/react"; - /** - * Live token counting for a streaming AI-chat turn — split into REASONING - * (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows - * `Thinking… · 60 tokens` next to its thinking indicator. + * Rough client-side token estimation for AI-chat UI affordances. * - * No provider streams exact per-token usage mid-stream, so the live number is a - * CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage - * once the server attaches it on a step/turn boundary (see the server's - * `chatStreamMetadata` + the client's read of `message.metadata.usage`). When - * authoritative usage is present we return it verbatim (the number "jumps to - * exact"); otherwise we return the running estimate. Pure + unit-testable: it - * never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the - * bundle, and be wrong for Gemini/Ollama anyway). + * No provider streams exact per-token usage mid-stream, so any in-flight figure + * is a CLIENT ESTIMATE (chars/≈4 heuristic). Pure + unit-testable: it never runs + * a real BPE tokenizer (that would be O(n²) on the hot path, bloat the bundle, + * and be wrong for Gemini/Ollama anyway). Used by the in-body reasoning counter + * ("Thinking · N tokens"). */ /** @@ -24,90 +17,3 @@ export function estimateTokens(text: string): number { if (!text) return 0; return Math.ceil(text.length / 4); } - -/** Authoritative per-step/turn usage the server attaches to message metadata. */ -export interface AuthoritativeUsage { - inputTokens?: number; - outputTokens?: number; - totalTokens?: number; - reasoningTokens?: number; -} - -/** Live token split for a turn's tail (streaming) assistant message. */ -export interface LiveTurnTokens { - /** Thinking/reasoning tokens (estimate, or authoritative when available). */ - reasoning: number; - /** Answer/output tokens (estimate, or authoritative when available). */ - output: number; - /** True when the numbers come from authoritative server usage, not estimate. */ - authoritative: boolean; -} - -/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */ -function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined { - const meta = message?.metadata as - | { usage?: AuthoritativeUsage } - | undefined; - const usage = meta?.usage; - if (!usage || typeof usage !== "object") return undefined; - return usage; -} - -/** - * Token split for the given (streaming) assistant message. - * - * COMBINES the authoritative server usage with the running text estimate so the - * counter ticks in real time AND lands exact. The server only attaches - * `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is - * CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step. - * So a multi-step turn that returned the authoritative figure verbatim would - * FREEZE between boundaries and jump in steps (issue #163). - * - * Instead we always compute the running ESTIMATE (chars/≈4 over the message's - * `reasoning`/`text` parts, which grows on every streamed delta) and take the - * per-component MAX of the authoritative base and the estimate: - * - between boundaries the estimate of the in-flight step ticks the number up; - * - at a boundary the authoritative figure snaps it to exact; - * - because the server's usage is cumulative and we only ever take the max, the - * number is MONOTONIC — it never drops. - * - * Providers that don't stream reasoning text still surface a reasoning count once - * the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure - * estimate path (no usage yet) such a turn shows `reasoning: 0` until then. - */ -export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens { - if (!message) return { reasoning: 0, output: 0, authoritative: false }; - - // Running ESTIMATE over every reasoning/text part — grows on each delta. This - // includes the IN-FLIGHT step, which the authoritative usage does not cover yet. - let estReasoning = 0; - let estOutput = 0; - for (const part of message.parts ?? []) { - if (part.type === "reasoning") { - estReasoning += estimateTokens((part as { text?: string }).text ?? ""); - } else if (part.type === "text") { - estOutput += estimateTokens((part as { text?: string }).text ?? ""); - } - } - - const usage = metadataUsage(message); - if (!usage) { - // No authoritative usage streamed yet: the estimate IS the live figure. - return { reasoning: estReasoning, output: estOutput, authoritative: false }; - } - - // Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES - // reasoning in the AI SDK usage shape, so subtract it out for the "answer" - // figure (never go negative if a provider reports them inconsistently). - const authReasoning = usage.reasoningTokens ?? 0; - const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning); - - // Per-component max: the in-flight step's estimate ticks above the completed- - // steps base between boundaries, and the authoritative figure wins once it - // exceeds the (rough) estimate at the next boundary. Monotonic by construction. - return { - reasoning: Math.max(authReasoning, estReasoning), - output: Math.max(authOutput, estOutput), - authoritative: true, - }; -} diff --git a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx index 08348756..811c2610 100644 --- a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx +++ b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx @@ -7,6 +7,7 @@ import { Button, Group, Modal, + NumberInput, Paper, PasswordInput, Select, @@ -83,6 +84,9 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [ // (empty means "leave unchanged" unless explicitly cleared). const formSchema = z.object({ chatModel: z.string(), + // Max context window in tokens shown in the chat header badge. A number, or "" + // when the NumberInput is empty (no limit). + chatContextWindow: z.union([z.number(), z.literal("")]), // Chat provider implementation (reasoning surfacing). Default openai-compatible. chatApiStyle: z.enum(["openai-compatible", "openai"]), // Cheap model id for the anonymous public-share assistant; empty = use chatModel. @@ -311,6 +315,7 @@ export default function AiProviderSettings() { validate: zod4Resolver(formSchema), initialValues: { chatModel: "", + chatContextWindow: "", chatApiStyle: "openai-compatible" as ChatApiStyle, publicShareChatModel: "", publicShareAssistantRoleId: "", @@ -334,6 +339,7 @@ export default function AiProviderSettings() { if (!settings) return; form.setValues({ chatModel: settings.chatModel ?? "", + chatContextWindow: settings.chatContextWindow ?? "", chatApiStyle: settings.chatApiStyle ?? "openai-compatible", publicShareChatModel: settings.publicShareChatModel ?? "", publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "", @@ -364,6 +370,12 @@ export default function AiProviderSettings() { // Everything is OpenAI-compatible. driver: "openai", chatModel: values.chatModel, + // Max context window for the chat header badge; empty NumberInput ("") → + // 0, which clears the limit server-side (no denominator shown). + chatContextWindow: + typeof values.chatContextWindow === "number" + ? values.chatContextWindow + : 0, chatApiStyle: values.chatApiStyle, // Cheap model id for the anonymous public-share assistant; empty falls // back to chatModel server-side. @@ -767,6 +779,18 @@ export default function AiProviderSettings() { {t("Resolves to {{url}}", { url: chatResolved })} + +