Compare commits
5 Commits
fix/ai-cha
...
batch/issu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b392219659 | ||
|
|
ba5cd02439 | ||
|
|
9b61024b95 | ||
|
|
2644fe6a83 | ||
|
|
993f884e64 |
157
.github/workflows/develop.yml
vendored
157
.github/workflows/develop.yml
vendored
@@ -56,3 +56,160 @@ jobs:
|
||||
tags: ${{ env.IMAGE }}:develop
|
||||
cache-from: type=gha,scope=develop-amd64
|
||||
cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true
|
||||
|
||||
# e2e jobs run on every develop push but DO NOT gate the build/publish above:
|
||||
# `build` stays `needs: test` only, so the :develop image still ships even if
|
||||
# e2e fails. A failing e2e job turns the run red and triggers GitHub's email
|
||||
# to the pusher — that red run + email is the intended notification, not a
|
||||
# deploy block.
|
||||
e2e-server:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost
|
||||
REDIS_URL: redis://localhost:6379
|
||||
APP_SECRET: ci-e2e-secret-change-me-min-32-characters
|
||||
APP_URL: http://localhost:3000
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg18
|
||||
env:
|
||||
POSTGRES_DB: docmost
|
||||
POSTGRES_USER: docmost
|
||||
POSTGRES_PASSWORD: docmost
|
||||
ports:
|
||||
- 5432:5432
|
||||
options: >-
|
||||
--health-cmd "pg_isready -U docmost"
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 20
|
||||
redis:
|
||||
image: redis:7
|
||||
ports:
|
||||
- 6379:6379
|
||||
options: >-
|
||||
--health-cmd "redis-cli ping"
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 20
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: pnpm
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build editor-ext
|
||||
run: pnpm --filter @docmost/editor-ext build
|
||||
|
||||
- name: Run migrations
|
||||
run: pnpm --filter ./apps/server migration:latest
|
||||
|
||||
- name: Run server e2e
|
||||
run: pnpm --filter ./apps/server test:e2e
|
||||
|
||||
# Same rationale as e2e-server: this job is intentionally NOT in
|
||||
# `build.needs`. Deploy of the :develop image must not be blocked by e2e;
|
||||
# a red run plus GitHub's email to the pusher is the notification mechanism.
|
||||
e2e-mcp:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost
|
||||
REDIS_URL: redis://localhost:6379
|
||||
APP_SECRET: ci-e2e-secret-change-me-min-32-characters
|
||||
APP_URL: http://localhost:3000
|
||||
NODE_ENV: production
|
||||
services:
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg18
|
||||
env:
|
||||
POSTGRES_DB: docmost
|
||||
POSTGRES_USER: docmost
|
||||
POSTGRES_PASSWORD: docmost
|
||||
ports:
|
||||
- 5432:5432
|
||||
options: >-
|
||||
--health-cmd "pg_isready -U docmost"
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 20
|
||||
redis:
|
||||
image: redis:7
|
||||
ports:
|
||||
- 6379:6379
|
||||
options: >-
|
||||
--health-cmd "redis-cli ping"
|
||||
--health-interval 5s
|
||||
--health-timeout 5s
|
||||
--health-retries 20
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 22
|
||||
cache: pnpm
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Build editor-ext
|
||||
run: pnpm --filter @docmost/editor-ext build
|
||||
|
||||
- name: Build server
|
||||
run: pnpm server:build
|
||||
|
||||
- name: Build mcp
|
||||
run: pnpm --filter @docmost/mcp build
|
||||
|
||||
- name: Run migrations
|
||||
run: pnpm --filter ./apps/server migration:latest
|
||||
|
||||
- name: Start server (prod)
|
||||
# Capture stdout/stderr so a start-up crash (bind error, stack trace,
|
||||
# migration mismatch) is diagnosable; without this the only signal is
|
||||
# the generic health-loop timeout below, ~120s later.
|
||||
run: pnpm --filter ./apps/server start:prod > /tmp/server.log 2>&1 &
|
||||
|
||||
- name: Wait for server health
|
||||
run: |
|
||||
for i in $(seq 1 60); do
|
||||
if curl -fsS http://localhost:3000/api/health > /dev/null; then
|
||||
echo "Server is healthy"
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "Server did not become healthy in time"
|
||||
exit 1
|
||||
|
||||
- name: Dump server log on failure
|
||||
if: failure()
|
||||
run: cat /tmp/server.log || true
|
||||
|
||||
- name: Seed admin
|
||||
run: |
|
||||
curl -fsS -X POST http://localhost:3000/api/auth/setup \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"E2E","email":"e2e@example.com","password":"E2ePassword123","workspaceName":"E2E"}'
|
||||
|
||||
- name: Run mcp e2e
|
||||
env:
|
||||
DOCMOST_API_URL: http://localhost:3000/api
|
||||
DOCMOST_EMAIL: e2e@example.com
|
||||
DOCMOST_PASSWORD: E2ePassword123
|
||||
run: pnpm --filter @docmost/mcp test:e2e
|
||||
|
||||
@@ -78,13 +78,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Fixed
|
||||
|
||||
- **AI chat: the desktop app no longer freezes at 100% CPU on long agent runs.**
|
||||
`useChat` re-rendered on every streamed token and `MessageItem`/`ReasoningBlock`
|
||||
re-parsed the whole transcript markdown (marked + DOMPurify) on every delta, so
|
||||
per-turn work grew quadratically and saturated the main thread. The stream is now
|
||||
throttled (`experimental_throttle`) to ~20 Hz and each finalized message row /
|
||||
markdown part / reasoning block is memoized, so a long turn no longer re-parses
|
||||
already-finished content. (#182)
|
||||
- **Editor: caret/selection landed on the wrong line when clicking inside code
|
||||
blocks and footnotes.** The affected NodeViews rendered their non-editable
|
||||
chrome (language menu, footnotes heading, footnote number marker) before the
|
||||
|
||||
@@ -715,6 +715,8 @@
|
||||
"Test": "Test",
|
||||
"Available tools": "Available tools",
|
||||
"No tools available": "No tools available",
|
||||
"Failed": "Failed",
|
||||
"OK · {{n}}": "OK · {{n}}",
|
||||
"Created successfully": "Created successfully",
|
||||
"Deleted successfully": "Deleted successfully",
|
||||
"Clear": "Clear",
|
||||
@@ -1167,8 +1169,9 @@
|
||||
"Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.",
|
||||
"Built-in assistant persona": "Built-in assistant persona",
|
||||
"Minimize": "Minimize",
|
||||
"Current context size": "Current context size",
|
||||
"Tokens generated this turn": "Tokens generated this turn",
|
||||
"Context size / model limit": "Context size / model limit",
|
||||
"Context window (tokens)": "Context window (tokens)",
|
||||
"Shown as used / total in the chat header. Leave empty to hide the limit.": "Shown as used / total in the chat header. Leave empty to hide the limit.",
|
||||
"AI agent": "AI agent",
|
||||
"Take a look at the current document": "Take a look at the current document",
|
||||
"AI agent is typing…": "AI agent is typing…",
|
||||
|
||||
@@ -704,13 +704,19 @@
|
||||
"Ask the AI agent…": "Спросите AI-агента…",
|
||||
"Copy chat": "Копировать чат",
|
||||
"Created successfully": "Успешно создано",
|
||||
"Current context size": "Текущий размер контекста",
|
||||
"Tokens generated this turn": "Токенов сгенерировано за ход",
|
||||
"Context size / model limit": "Размер контекста / лимит модели",
|
||||
"Context window (tokens)": "Окно контекста (токены)",
|
||||
"Shown as used / total in the chat header. Leave empty to hide the limit.": "Показывается в шапке чата как использовано / всего. Пусто — лимит скрыт.",
|
||||
"Delete this chat?": "Удалить этот чат?",
|
||||
"Deleted successfully": "Успешно удалено",
|
||||
"Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}",
|
||||
"Failed to delete chat": "Не удалось удалить чат",
|
||||
"Failed to rename chat": "Не удалось переименовать чат",
|
||||
"Failed": "Ошибка",
|
||||
"OK · {{n}}": "OK · {{n}}",
|
||||
"Test": "Тест",
|
||||
"No tools available": "Инструменты недоступны",
|
||||
"Available tools": "Доступные инструменты",
|
||||
"Minimize": "Свернуть",
|
||||
"No chats yet.": "Чатов пока нет.",
|
||||
"Send": "Отправить",
|
||||
|
||||
@@ -45,6 +45,7 @@ import {
|
||||
shouldCollapseOnOutsidePointer,
|
||||
isHeaderClick,
|
||||
} from "@/features/ai-chat/utils/collapse-helpers.ts";
|
||||
import { selectContextBadge } from "@/features/ai-chat/utils/context-badge.ts";
|
||||
import { useClipboard } from "@/hooks/use-clipboard";
|
||||
import { notifications } from "@mantine/notifications";
|
||||
import classes from "@/features/ai-chat/components/ai-chat-window.module.css";
|
||||
@@ -161,12 +162,6 @@ export default function AiChatWindow() {
|
||||
const { data: messageRows, isLoading: messagesLoading } =
|
||||
useAiChatMessagesQuery(activeChatId ?? undefined);
|
||||
|
||||
// Live turn-token total (reasoning + output) for the in-flight turn, pushed up
|
||||
// (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream.
|
||||
// `null` means no turn is in flight -> the badge falls back to the persisted
|
||||
// context size below.
|
||||
const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
|
||||
|
||||
// The page the user is currently viewing. AiChatWindow lives in a pathless
|
||||
// parent layout route, so useParams() can't see :pageSlug. Match the full
|
||||
// pathname against the authenticated page route instead so "the current page"
|
||||
@@ -287,24 +282,19 @@ export default function AiChatWindow() {
|
||||
// shipped; older rows fall back to that turn's `usage` total. NOTE: reflects
|
||||
// PERSISTED rows (updates on chat open/switch); it does not tick live
|
||||
// mid-stream — acceptable for v1.
|
||||
const contextTokens = useMemo(() => {
|
||||
if (!activeChatId || !messageRows) return 0;
|
||||
for (let i = messageRows.length - 1; i >= 0; i--) {
|
||||
const meta = messageRows[i].metadata;
|
||||
if (!meta) continue;
|
||||
if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
|
||||
return meta.contextTokens;
|
||||
}
|
||||
const usage = meta.usage;
|
||||
if (usage) {
|
||||
const fallback =
|
||||
usage.totalTokens ??
|
||||
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
|
||||
if (fallback > 0) return fallback;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}, [activeChatId, messageRows]);
|
||||
//
|
||||
// The denominator `maxContextTokens` (the model's configured max window) is
|
||||
// derived in the SAME backward scan: it is stamped alongside `contextTokens`
|
||||
// on a completed turn, but the numerator and denominator are taken from the
|
||||
// most recent row carrying EACH value independently — they may land on
|
||||
// different rows (e.g. a fresh error row can carry contextTokens but not
|
||||
// maxContextTokens), so we keep scanning for whichever is still unset. 0 when
|
||||
// no row has it (older rows, or no admin-configured limit) — the badge then
|
||||
// shows just the current size with no denominator.
|
||||
const { contextTokens, maxContextTokens } = useMemo(
|
||||
() => selectContextBadge(activeChatId ? messageRows : undefined),
|
||||
[activeChatId, messageRows],
|
||||
);
|
||||
|
||||
// On (re)open, settle the geometry before paint (useLayoutEffect → no
|
||||
// first-frame jump): compute an initial top-right placement the first time,
|
||||
@@ -495,20 +485,17 @@ export default function AiChatWindow() {
|
||||
)}
|
||||
|
||||
<div style={{ flex: 1, display: "flex", justifyContent: "center" }}>
|
||||
{/* While a turn streams, show the LIVE turn-token count (ticks ~8 Hz);
|
||||
once it finishes, fall back to the persisted context size. Require
|
||||
> 0 so the very first emit (an empty tail message, count 0) does not
|
||||
flash a "0" badge before any token streams in (#151 review). */}
|
||||
{liveTurnTokens !== null && liveTurnTokens > 0 ? (
|
||||
<Tooltip label={t("Tokens generated this turn")} withArrow>
|
||||
<span className={classes.badge}>
|
||||
{formatTokens(liveTurnTokens)}
|
||||
</span>
|
||||
</Tooltip>
|
||||
) : contextTokens > 0 ? (
|
||||
<Tooltip label={t("Current context size")} withArrow>
|
||||
{/* Always show the persisted "current / max" context. The denominator
|
||||
(the admin-configured model limit) is appended only when known;
|
||||
not clamped when current > max (shown as-is, e.g. "210k / 200k").
|
||||
Hidden entirely until a turn has recorded a context figure. */}
|
||||
{contextTokens > 0 ? (
|
||||
<Tooltip label={t("Context size / model limit")} withArrow>
|
||||
<span className={classes.badge}>
|
||||
{formatTokens(contextTokens)}
|
||||
{maxContextTokens > 0
|
||||
? ` / ${formatTokens(maxContextTokens)}`
|
||||
: ""}
|
||||
</span>
|
||||
</Tooltip>
|
||||
) : null}
|
||||
@@ -634,7 +621,6 @@ export default function AiChatWindow() {
|
||||
assistantName={currentRole?.name}
|
||||
onTurnFinished={onTurnFinished}
|
||||
onServerChatId={onServerChatId}
|
||||
onLiveTurnTokens={setLiveTurnTokens}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -20,7 +20,6 @@ import {
|
||||
} from "@/features/ai-chat/utils/role-launch.ts";
|
||||
import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
|
||||
import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts";
|
||||
import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
||||
import {
|
||||
dequeue,
|
||||
enqueueMessage,
|
||||
@@ -29,14 +28,6 @@ import {
|
||||
} from "@/features/ai-chat/utils/queue-helpers.ts";
|
||||
import classes from "@/features/ai-chat/components/ai-chat.module.css";
|
||||
|
||||
// Throttle how often the streamed `messages` state triggers a re-render. Without
|
||||
// it, useChat updates state on EVERY token, so the whole transcript's markdown
|
||||
// (marked + DOMPurify) is re-parsed per token — on a long agent run that grows
|
||||
// into a quadratic CPU storm that pins the main thread and freezes the UI.
|
||||
// ~50ms (20 Hz) keeps streaming visually smooth while decoupling re-render cost
|
||||
// from the token rate.
|
||||
const STREAM_THROTTLE_MS = 50;
|
||||
|
||||
/** The page the user is currently viewing, sent as chat context. */
|
||||
export interface OpenPageContext {
|
||||
id: string;
|
||||
@@ -75,12 +66,6 @@ interface ChatThreadProps {
|
||||
* Copy/export button available mid-stream). Distinct from onTurnFinished,
|
||||
* which fires only at the terminal outcome. */
|
||||
onServerChatId?: (serverChatId?: string) => void;
|
||||
/** Reports the live turn-token total (reasoning + output) for the in-flight
|
||||
* turn so the parent can show a header badge that ticks mid-stream. THROTTLED
|
||||
* here (~8 Hz) so the parent re-renders a handful of times a second, not on
|
||||
* every streamed delta. Called with `null` when no turn is in flight (the
|
||||
* parent then reverts the badge to the persisted context size). */
|
||||
onLiveTurnTokens?: (tokens: number | null) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -125,7 +110,6 @@ export default function ChatThread({
|
||||
assistantName,
|
||||
onTurnFinished,
|
||||
onServerChatId,
|
||||
onLiveTurnTokens,
|
||||
}: ChatThreadProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
@@ -254,8 +238,6 @@ export default function ChatThread({
|
||||
id: chatStoreId,
|
||||
messages: initialMessages,
|
||||
transport,
|
||||
// See STREAM_THROTTLE_MS — bounds re-render/markdown-reparse frequency.
|
||||
experimental_throttle: STREAM_THROTTLE_MS,
|
||||
// `onFinish` (ai@6 useChat) fires from a `finally` on EVERY terminal outcome
|
||||
// — success, user Stop/abort (`isAbort`), network drop (`isDisconnect`), and
|
||||
// stream error (`isError`). Keep calling `onTurnFinished()` on all of them
|
||||
@@ -338,53 +320,6 @@ export default function ChatThread({
|
||||
// the SAME on-screen banner text can be mirrored into the export (issue #160).
|
||||
const errorView = error ? describeChatError(error.message ?? "", t) : null;
|
||||
|
||||
// Report the live turn-token total to the parent header badge, THROTTLED to
|
||||
// ~8 Hz so the parent re-renders a few times a second instead of on every
|
||||
// streamed delta. The tail assistant message's reasoning+output (estimate while
|
||||
// streaming, authoritative once a step reports usage) is the live figure. When
|
||||
// the turn ends we emit a final exact value, then `null` so the parent reverts
|
||||
// the badge to the persisted context size.
|
||||
const lastEmitRef = useRef(0);
|
||||
const emitTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||
useEffect(() => {
|
||||
if (!onLiveTurnTokens) return;
|
||||
if (!isStreaming) {
|
||||
// Turn ended (or never started): clear any pending throttle and revert.
|
||||
if (emitTimerRef.current) {
|
||||
clearTimeout(emitTimerRef.current);
|
||||
emitTimerRef.current = null;
|
||||
}
|
||||
lastEmitRef.current = 0;
|
||||
onLiveTurnTokens(null);
|
||||
return;
|
||||
}
|
||||
const tail = messages[messages.length - 1];
|
||||
const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null;
|
||||
const total = live ? live.reasoning + live.output : 0;
|
||||
const now = Date.now();
|
||||
const MIN_INTERVAL = 120; // ms (~8 Hz)
|
||||
const elapsed = now - lastEmitRef.current;
|
||||
if (elapsed >= MIN_INTERVAL) {
|
||||
lastEmitRef.current = now;
|
||||
onLiveTurnTokens(total);
|
||||
} else if (!emitTimerRef.current) {
|
||||
// Schedule a trailing emit so the FINAL value of a burst is not dropped.
|
||||
emitTimerRef.current = setTimeout(() => {
|
||||
emitTimerRef.current = null;
|
||||
lastEmitRef.current = Date.now();
|
||||
onLiveTurnTokens(total);
|
||||
}, MIN_INTERVAL - elapsed);
|
||||
}
|
||||
}, [messages, isStreaming, onLiveTurnTokens]);
|
||||
|
||||
// Clear any pending throttle timer on unmount (chat switch via `key`) so a
|
||||
// trailing emit can't fire into a torn-down thread's parent.
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (emitTimerRef.current) clearTimeout(emitTimerRef.current);
|
||||
};
|
||||
}, []);
|
||||
|
||||
// A role was picked with autoStart=false: the role is bound but NOTHING was
|
||||
// sent, so chatId stays null and the empty state would keep showing the cards.
|
||||
// This flag hides the cards and reveals the composer (with the role indicated)
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { render } from "@testing-library/react";
|
||||
import { MantineProvider } from "@mantine/core";
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
|
||||
// Stub react-i18next (the component reads `useTranslation`). Mirrors the stub in
|
||||
// reasoning-block.test.tsx.
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({ t: (key: string) => key }),
|
||||
}));
|
||||
|
||||
// Spy on `renderChatMarkdown` so we can count parse calls per text. We keep every
|
||||
// OTHER named export of markdown.ts intact via `importActual`, and override only
|
||||
// `renderChatMarkdown` with a `vi.fn()` that returns simple HTML so the component
|
||||
// still renders. This is the seam that proves the MarkdownPart memo works: a
|
||||
// finalized text part must NOT be re-parsed on a later streamed delta.
|
||||
// `vi.hoisted` so the spy exists when the hoisted `vi.mock` factory runs.
|
||||
const { renderChatMarkdownSpy } = vi.hoisted(() => ({
|
||||
renderChatMarkdownSpy: vi.fn((text: string) => `<p>${text}</p>`),
|
||||
}));
|
||||
vi.mock("@/features/ai-chat/utils/markdown.ts", async () => {
|
||||
const actual = await vi.importActual<
|
||||
typeof import("@/features/ai-chat/utils/markdown.ts")
|
||||
>("@/features/ai-chat/utils/markdown.ts");
|
||||
return { ...actual, renderChatMarkdown: renderChatMarkdownSpy };
|
||||
});
|
||||
|
||||
import MessageItem from "./message-item";
|
||||
|
||||
// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
|
||||
|
||||
const msg = (parts: UIMessage["parts"]): UIMessage =>
|
||||
({ id: "m1", role: "assistant", parts }) as UIMessage;
|
||||
|
||||
const renderRow = (message: UIMessage) =>
|
||||
render(
|
||||
<MantineProvider>
|
||||
<MessageItem message={message} />
|
||||
</MantineProvider>,
|
||||
);
|
||||
|
||||
/** Count how many spy calls parsed exactly `text` (filtering by the first arg). */
|
||||
const callsFor = (text: string) =>
|
||||
renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === text).length;
|
||||
|
||||
describe("MessageItem markdown memoization", () => {
|
||||
it("does not re-parse finalized text parts when only a tail part grows", () => {
|
||||
renderChatMarkdownSpy.mockClear();
|
||||
|
||||
// Two finalized text parts.
|
||||
const first = msg([
|
||||
{ type: "text", text: "alpha" },
|
||||
{ type: "text", text: "beta" },
|
||||
]);
|
||||
const { rerender } = renderRow(first);
|
||||
|
||||
// Both finalized parts parsed exactly once on the initial render.
|
||||
expect(callsFor("alpha")).toBe(1);
|
||||
expect(callsFor("beta")).toBe(1);
|
||||
|
||||
// A streamed delta: a NEW message object where only a third tail part grows;
|
||||
// the first two parts' text is byte-identical.
|
||||
const next = msg([
|
||||
{ type: "text", text: "alpha" },
|
||||
{ type: "text", text: "beta" },
|
||||
{ type: "text", text: "gamm" },
|
||||
]);
|
||||
rerender(
|
||||
<MantineProvider>
|
||||
<MessageItem message={next} />
|
||||
</MantineProvider>,
|
||||
);
|
||||
|
||||
// The finalized parts hit the MarkdownPart memo: still parsed at most once
|
||||
// each across BOTH renders (the resilient invariant). The only new parse is
|
||||
// for the changed/added tail part.
|
||||
expect(callsFor("alpha")).toBe(1);
|
||||
expect(callsFor("beta")).toBe(1);
|
||||
expect(callsFor("gamm")).toBe(1);
|
||||
});
|
||||
});
|
||||
@@ -1,73 +0,0 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
|
||||
// Stub react-i18next: importing the component module pulls in `useTranslation`,
|
||||
// and we only exercise the pure `arePropsEqual` comparator (no rendering), so a
|
||||
// minimal `t` that echoes the key is enough. Mirrors the stub in
|
||||
// reasoning-block.test.tsx.
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({ t: (key: string) => key }),
|
||||
}));
|
||||
|
||||
import { arePropsEqual } from "./message-item";
|
||||
|
||||
/**
|
||||
* Tests for `arePropsEqual`, the `React.memo` comparator for MessageItem. It must
|
||||
* return false on any visible prop/content change (so the row re-renders) and
|
||||
* true when nothing visible changed (so a finalized row is skipped). A FIXED
|
||||
* message id is used so a content-identical clone yields an equal signature.
|
||||
*/
|
||||
const msg = (parts: UIMessage["parts"]): UIMessage =>
|
||||
({ id: "m1", role: "assistant", parts }) as UIMessage;
|
||||
|
||||
const props = (
|
||||
message: UIMessage,
|
||||
over: Record<string, unknown> = {},
|
||||
) => ({
|
||||
message,
|
||||
showCitations: true,
|
||||
neutralizeInternalLinks: false,
|
||||
assistantName: "AI",
|
||||
...over,
|
||||
});
|
||||
|
||||
describe("arePropsEqual", () => {
|
||||
it("returns false when showCitations differs", () => {
|
||||
const m = msg([{ type: "text", text: "answer" }]);
|
||||
expect(
|
||||
arePropsEqual(props(m), props(m, { showCitations: false })),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false when neutralizeInternalLinks differs", () => {
|
||||
const m = msg([{ type: "text", text: "answer" }]);
|
||||
expect(
|
||||
arePropsEqual(props(m), props(m, { neutralizeInternalLinks: true })),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("returns false when assistantName differs", () => {
|
||||
const m = msg([{ type: "text", text: "answer" }]);
|
||||
expect(
|
||||
arePropsEqual(props(m), props(m, { assistantName: "Other" })),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("returns true on the identity fast path (same message object, equal props)", () => {
|
||||
const m = msg([{ type: "text", text: "answer" }]);
|
||||
expect(arePropsEqual(props(m), props(m))).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for the same content in a different message object", () => {
|
||||
const a = msg([{ type: "text", text: "answer" }]);
|
||||
const b = msg([{ type: "text", text: "answer" }]);
|
||||
expect(a).not.toBe(b);
|
||||
expect(arePropsEqual(props(a), props(b))).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false when content changed in a different message object", () => {
|
||||
const a = msg([{ type: "text", text: "answer" }]);
|
||||
const b = msg([{ type: "text", text: "answer grown" }]);
|
||||
expect(arePropsEqual(props(a), props(b))).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,4 +1,3 @@
|
||||
import { memo } from "react";
|
||||
import { Box, Text } from "@mantine/core";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
@@ -11,7 +10,6 @@ import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/mess
|
||||
import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
|
||||
import { resolveAssistantName } from "@/features/ai-chat/utils/assistant-name.ts";
|
||||
import { reasoningTokensForPart } from "@/features/ai-chat/utils/reasoning-tokens.ts";
|
||||
import { messageSignature } from "@/features/ai-chat/utils/message-signature.ts";
|
||||
import { describeChatError } from "@/features/ai-chat/utils/error-message.ts";
|
||||
import classes from "@/features/ai-chat/components/ai-chat.module.css";
|
||||
|
||||
@@ -36,39 +34,6 @@ interface MessageItemProps {
|
||||
assistantName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* One assistant text part rendered as sanitized markdown. Memoized on its inputs
|
||||
* so a finalized text part is NOT re-parsed on every streamed delta: during a
|
||||
* turn only the actively-growing tail part changes its `text`, so every earlier
|
||||
* part hits the memo and skips the expensive marked + DOMPurify pass. Props are
|
||||
* primitives, so React.memo's default shallow compare is exactly right (the
|
||||
* `text` string is compared by value).
|
||||
*/
|
||||
const MarkdownPart = memo(function MarkdownPart({
|
||||
text,
|
||||
neutralizeInternalLinks,
|
||||
}: {
|
||||
text: string;
|
||||
neutralizeInternalLinks: boolean;
|
||||
}) {
|
||||
const html = renderChatMarkdown(text, { neutralizeInternalLinks });
|
||||
if (html) {
|
||||
return (
|
||||
<div
|
||||
className={classes.markdown}
|
||||
// Sanitized by renderChatMarkdown (DOMPurify) before insertion.
|
||||
dangerouslySetInnerHTML={{ __html: html }}
|
||||
/>
|
||||
);
|
||||
}
|
||||
// Fallback when markdown could not render synchronously: raw text.
|
||||
return (
|
||||
<Text className={classes.markdown} style={{ whiteSpace: "pre-wrap" }}>
|
||||
{text}
|
||||
</Text>
|
||||
);
|
||||
});
|
||||
|
||||
/**
|
||||
* Render a single UIMessage by iterating its `parts`:
|
||||
* - `text` parts -> sanitized markdown.
|
||||
@@ -76,13 +41,12 @@ const MarkdownPart = memo(function MarkdownPart({
|
||||
* Other part kinds (reasoning, sources, files, step-start) are ignored for v1.
|
||||
* User messages render their text as a right-aligned plain bubble.
|
||||
*
|
||||
* This component is memoized (see `arePropsEqual` at the bottom) on a cheap
|
||||
* per-message content signature: the streaming TAIL message's signature changes
|
||||
* on each delta so it still re-renders and streams in, while finalized rows are
|
||||
* skipped. Each text part's markdown is itself memoized via `MarkdownPart`, so a
|
||||
* long turn no longer re-parses the whole transcript on every token.
|
||||
* This component is intentionally NOT memoized: `useChat` replaces the streaming
|
||||
* assistant message with a freshly cloned object on every streamed delta, so the
|
||||
* `message` prop identity (and its `parts`) changes each tick. Re-rendering the
|
||||
* text parts on each delta is what makes the answer stream in progressively.
|
||||
*/
|
||||
function MessageItem({
|
||||
export default function MessageItem({
|
||||
message,
|
||||
showCitations = true,
|
||||
neutralizeInternalLinks = false,
|
||||
@@ -145,12 +109,24 @@ function MessageItem({
|
||||
// starts with an empty text part before the first token arrives); the
|
||||
// typing indicator covers that gap until real content streams in.
|
||||
if (!part.text.trim()) return null;
|
||||
const html = renderChatMarkdown(part.text, {
|
||||
neutralizeInternalLinks,
|
||||
});
|
||||
if (html) {
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
className={classes.markdown}
|
||||
// Sanitized by renderChatMarkdown (DOMPurify) before insertion.
|
||||
dangerouslySetInnerHTML={{ __html: html }}
|
||||
/>
|
||||
);
|
||||
}
|
||||
// Fallback when markdown could not render synchronously: raw text.
|
||||
return (
|
||||
<MarkdownPart
|
||||
key={index}
|
||||
text={part.text}
|
||||
neutralizeInternalLinks={neutralizeInternalLinks}
|
||||
/>
|
||||
<Text key={index} className={classes.markdown} style={{ whiteSpace: "pre-wrap" }}>
|
||||
{part.text}
|
||||
</Text>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -201,26 +177,3 @@ function MessageItem({
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
/** Skip re-rendering a message whose visible content is unchanged. The streaming
|
||||
* TAIL message gets a fresh object whose signature changes each delta, so it
|
||||
* still re-renders and streams in; every FINALIZED message is skipped, turning a
|
||||
* per-token whole-transcript re-render into a tail-only one. */
|
||||
export function arePropsEqual(
|
||||
prev: MessageItemProps,
|
||||
next: MessageItemProps,
|
||||
): boolean {
|
||||
if (
|
||||
prev.showCitations !== next.showCitations ||
|
||||
prev.neutralizeInternalLinks !== next.neutralizeInternalLinks ||
|
||||
prev.assistantName !== next.assistantName
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
// Fast path: identical message object (finalized rows keep their identity
|
||||
// across deltas) — skip without building signatures.
|
||||
if (prev.message === next.message) return true;
|
||||
return messageSignature(prev.message) === messageSignature(next.message);
|
||||
}
|
||||
|
||||
export default memo(MessageItem, arePropsEqual);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { memo, useMemo, useState } from "react";
|
||||
import { useState } from "react";
|
||||
import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core";
|
||||
import { IconChevronDown } from "@tabler/icons-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
@@ -27,23 +27,19 @@ interface ReasoningBlockProps {
|
||||
* Providers that don't stream reasoning TEXT still render this block from the
|
||||
* authoritative count alone (header only, empty body) so the cost is visible.
|
||||
*/
|
||||
function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
|
||||
export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
|
||||
const { t } = useTranslation();
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
// Authoritative count wins; otherwise estimate live from the streamed text.
|
||||
const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
|
||||
const trimmed = text.trim();
|
||||
// Memoize the markdown render so toggling `open` (or a parent re-render caused
|
||||
// by an unrelated streamed delta) does not re-parse the reasoning text; it
|
||||
// recomputes only when the reasoning text itself changes (while it streams in).
|
||||
// collapseBlankLines collapses the blank-line gaps the model emits between every
|
||||
// list item / paragraph so the reasoning renders compactly (tight lists, joined
|
||||
// paragraphs) — ONLY here, not in the normal answer.
|
||||
const html = useMemo(
|
||||
() => (trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : ""),
|
||||
[trimmed],
|
||||
);
|
||||
// Collapse the blank-line gaps the model emits between every list item /
|
||||
// paragraph so the reasoning renders compactly (tight lists, joined
|
||||
// paragraphs) — see collapseBlankLines. ONLY here, not in the normal answer.
|
||||
const html = trimmed
|
||||
? renderChatMarkdown(collapseBlankLines(trimmed), {})
|
||||
: "";
|
||||
|
||||
return (
|
||||
<Box className={classes.reasoningBlock} mb={6}>
|
||||
@@ -91,8 +87,3 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
// Memoized: re-renders only when `text`/`tokens` change (primitive props, default
|
||||
// shallow compare), so a parent re-render during streaming of OTHER content does
|
||||
// not re-run the markdown parse for an already-finalized reasoning block.
|
||||
export default memo(ReasoningBlock);
|
||||
|
||||
@@ -116,6 +116,9 @@ export interface IAiChatMessageRow {
|
||||
// turn. Distinct from `usage` (legacy cumulative totalUsage). Shown in the
|
||||
// floating window's header badge.
|
||||
contextTokens?: number;
|
||||
// The model's max context window (denominator for the header badge); set
|
||||
// alongside contextTokens on a completed turn; absent on older rows.
|
||||
maxContextTokens?: number;
|
||||
// Set on an assistant row whose turn ended in a provider/stream error; the
|
||||
// raw provider error text (e.g. "402: ...") for inline display in the thread.
|
||||
error?: string;
|
||||
|
||||
90
apps/client/src/features/ai-chat/utils/context-badge.test.ts
Normal file
90
apps/client/src/features/ai-chat/utils/context-badge.test.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
|
||||
import { selectContextBadge } from "@/features/ai-chat/utils/context-badge.ts";
|
||||
|
||||
/**
|
||||
* Pure-helper tests for the header context badge selection. Covers the two
|
||||
* non-obvious rules: numerator and denominator are each taken from the most
|
||||
* recent row carrying THAT value (they may live on different rows), and a fresh
|
||||
* row with a zero/absent value must NOT shadow an older positive one.
|
||||
*/
|
||||
const row = (metadata: IAiChatMessageRow["metadata"]): IAiChatMessageRow => ({
|
||||
id: Math.random().toString(),
|
||||
role: "assistant",
|
||||
content: null,
|
||||
metadata,
|
||||
createdAt: "2026-01-01T00:00:00.000Z",
|
||||
});
|
||||
|
||||
describe("selectContextBadge", () => {
|
||||
it("returns zeros for empty / nullish input", () => {
|
||||
expect(selectContextBadge(undefined)).toEqual({
|
||||
contextTokens: 0,
|
||||
maxContextTokens: 0,
|
||||
});
|
||||
expect(selectContextBadge(null)).toEqual({
|
||||
contextTokens: 0,
|
||||
maxContextTokens: 0,
|
||||
});
|
||||
expect(selectContextBadge([])).toEqual({
|
||||
contextTokens: 0,
|
||||
maxContextTokens: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it("reads both figures from the most recent row that carries them", () => {
|
||||
expect(
|
||||
selectContextBadge([
|
||||
row({ contextTokens: 100, maxContextTokens: 200000 }),
|
||||
row({ contextTokens: 1500, maxContextTokens: 200000 }),
|
||||
]),
|
||||
).toEqual({ contextTokens: 1500, maxContextTokens: 200000 });
|
||||
});
|
||||
|
||||
it("falls back to legacy usage total for older rows without contextTokens", () => {
|
||||
expect(
|
||||
selectContextBadge([
|
||||
row({ usage: { inputTokens: 30, outputTokens: 70 } }),
|
||||
]),
|
||||
).toEqual({ contextTokens: 100, maxContextTokens: 0 });
|
||||
|
||||
expect(
|
||||
selectContextBadge([row({ usage: { totalTokens: 250 } })]),
|
||||
).toEqual({ contextTokens: 250, maxContextTokens: 0 });
|
||||
});
|
||||
|
||||
it("takes numerator and denominator from different rows", () => {
|
||||
// Freshest row (an error turn) carries contextTokens but no max; the older
|
||||
// completed turn carries the max. Each is picked from its own latest row.
|
||||
expect(
|
||||
selectContextBadge([
|
||||
row({ contextTokens: 800, maxContextTokens: 200000 }),
|
||||
row({ contextTokens: 1200, error: "402: nope" }),
|
||||
]),
|
||||
).toEqual({ contextTokens: 1200, maxContextTokens: 200000 });
|
||||
});
|
||||
|
||||
it("does not let a fresh zero/absent max shadow an older positive max", () => {
|
||||
expect(
|
||||
selectContextBadge([
|
||||
row({ contextTokens: 100, maxContextTokens: 200000 }),
|
||||
row({ contextTokens: 1200, maxContextTokens: 0 }),
|
||||
]),
|
||||
).toEqual({ contextTokens: 1200, maxContextTokens: 200000 });
|
||||
});
|
||||
|
||||
it("skips rows with null metadata", () => {
|
||||
expect(
|
||||
selectContextBadge([
|
||||
row({ contextTokens: 500, maxContextTokens: 200000 }),
|
||||
row(null),
|
||||
]),
|
||||
).toEqual({ contextTokens: 500, maxContextTokens: 200000 });
|
||||
});
|
||||
|
||||
it("reports current > max as-is (no clamp)", () => {
|
||||
expect(
|
||||
selectContextBadge([row({ contextTokens: 250000, maxContextTokens: 200000 })]),
|
||||
).toEqual({ contextTokens: 250000, maxContextTokens: 200000 });
|
||||
});
|
||||
});
|
||||
49
apps/client/src/features/ai-chat/utils/context-badge.ts
Normal file
49
apps/client/src/features/ai-chat/utils/context-badge.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
|
||||
|
||||
/**
|
||||
* Derive the header context badge figures from the persisted message rows.
|
||||
*
|
||||
* - `contextTokens` (numerator): how much the conversation now occupies in the
|
||||
* model's context window. Read from the most recent row carrying a context
|
||||
* figure — `contextTokens` (final-step input+output) on rows recorded after
|
||||
* this shipped, else that turn's legacy `usage` total for older rows.
|
||||
* - `maxContextTokens` (denominator): the model's configured max window, stamped
|
||||
* alongside `contextTokens` on a completed turn.
|
||||
*
|
||||
* Each value is taken from the most recent row carrying THAT value
|
||||
* independently — they may land on different rows (e.g. a fresh error row can
|
||||
* carry `contextTokens` but not `maxContextTokens`), so the scan continues for
|
||||
* whichever is still unset. `0` means "no row has it" (older rows, or no
|
||||
* admin-configured limit); the badge then omits the value.
|
||||
*/
|
||||
export function selectContextBadge(
|
||||
messageRows: readonly IAiChatMessageRow[] | undefined | null,
|
||||
): { contextTokens: number; maxContextTokens: number } {
|
||||
let contextTokens = 0;
|
||||
let maxContextTokens = 0;
|
||||
if (!messageRows) return { contextTokens, maxContextTokens };
|
||||
for (let i = messageRows.length - 1; i >= 0; i--) {
|
||||
const meta = messageRows[i].metadata;
|
||||
if (!meta) continue;
|
||||
if (contextTokens === 0) {
|
||||
if (typeof meta.contextTokens === "number" && meta.contextTokens > 0) {
|
||||
contextTokens = meta.contextTokens;
|
||||
} else if (meta.usage) {
|
||||
const usage = meta.usage;
|
||||
const fallback =
|
||||
usage.totalTokens ??
|
||||
(usage.inputTokens ?? 0) + (usage.outputTokens ?? 0);
|
||||
if (fallback > 0) contextTokens = fallback;
|
||||
}
|
||||
}
|
||||
if (
|
||||
maxContextTokens === 0 &&
|
||||
typeof meta.maxContextTokens === "number" &&
|
||||
meta.maxContextTokens > 0
|
||||
) {
|
||||
maxContextTokens = meta.maxContextTokens;
|
||||
}
|
||||
if (contextTokens !== 0 && maxContextTokens !== 0) break;
|
||||
}
|
||||
return { contextTokens, maxContextTokens };
|
||||
}
|
||||
@@ -1,17 +1,5 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
import {
|
||||
estimateTokens,
|
||||
liveTurnTokens,
|
||||
} from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
||||
|
||||
const msg = (parts: unknown[], metadata?: unknown): UIMessage =>
|
||||
({
|
||||
id: Math.random().toString(),
|
||||
role: "assistant",
|
||||
parts,
|
||||
metadata,
|
||||
}) as UIMessage;
|
||||
import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
||||
|
||||
describe("estimateTokens", () => {
|
||||
it("returns 0 for the empty string", () => {
|
||||
@@ -25,147 +13,3 @@ describe("estimateTokens", () => {
|
||||
expect(estimateTokens("12345678")).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("liveTurnTokens — estimate path", () => {
|
||||
it("is all zeros for an undefined message", () => {
|
||||
expect(liveTurnTokens(undefined)).toEqual({
|
||||
reasoning: 0,
|
||||
output: 0,
|
||||
authoritative: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("is all zeros for a parts-less message", () => {
|
||||
expect(liveTurnTokens({ id: "x", role: "assistant" } as UIMessage)).toEqual({
|
||||
reasoning: 0,
|
||||
output: 0,
|
||||
authoritative: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("estimates output from text parts", () => {
|
||||
// 8 chars -> 2 tokens.
|
||||
const r = liveTurnTokens(msg([{ type: "text", text: "12345678" }]));
|
||||
expect(r).toEqual({ reasoning: 0, output: 2, authoritative: false });
|
||||
});
|
||||
|
||||
it("estimates reasoning from reasoning parts (kept separate from output)", () => {
|
||||
const r = liveTurnTokens(
|
||||
msg([
|
||||
{ type: "reasoning", text: "12345678" },
|
||||
{ type: "text", text: "abcd" },
|
||||
]),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 2, output: 1, authoritative: false });
|
||||
});
|
||||
|
||||
it("accumulates across multiple text + reasoning parts (multi-step)", () => {
|
||||
const r = liveTurnTokens(
|
||||
msg([
|
||||
{ type: "reasoning", text: "abcd" }, // 1
|
||||
{ type: "text", text: "abcd" }, // 1
|
||||
{ type: "tool-getPage", state: "output-available" }, // ignored
|
||||
{ type: "reasoning", text: "abcd" }, // 1
|
||||
{ type: "text", text: "abcdefgh" }, // 2
|
||||
]),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 2, output: 3, authoritative: false });
|
||||
});
|
||||
|
||||
it("ignores non text/reasoning parts (tools, step-start)", () => {
|
||||
const r = liveTurnTokens(
|
||||
msg([
|
||||
{ type: "step-start" },
|
||||
{ type: "tool-getPage", state: "input-available" },
|
||||
]),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 0, output: 0, authoritative: false });
|
||||
});
|
||||
});
|
||||
|
||||
describe("liveTurnTokens — authoritative path", () => {
|
||||
it("returns authoritative usage verbatim, splitting reasoning out of output", () => {
|
||||
// outputTokens INCLUDES reasoning in the AI SDK shape -> answer = 100 - 30.
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "text", text: "estimate would be tiny" }], {
|
||||
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
|
||||
}),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
|
||||
});
|
||||
|
||||
it("treats missing reasoningTokens as 0 and keeps full output", () => {
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "text", text: "x" }], {
|
||||
usage: { inputTokens: 10, outputTokens: 42 },
|
||||
}),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 0, output: 42, authoritative: true });
|
||||
});
|
||||
|
||||
it("never returns a negative output when reasoning exceeds reported output", () => {
|
||||
const r = liveTurnTokens(
|
||||
msg([], { usage: { outputTokens: 10, reasoningTokens: 40 } }),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 40, output: 0, authoritative: true });
|
||||
});
|
||||
|
||||
it("falls back to the estimate when metadata has no usage object", () => {
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "text", text: "abcd" }], { chatId: "c1" }),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 0, output: 1, authoritative: false });
|
||||
});
|
||||
});
|
||||
|
||||
describe("liveTurnTokens — combined authoritative + estimate (#163)", () => {
|
||||
it("ticks the in-flight step above the completed-steps authoritative base", () => {
|
||||
// The authoritative usage is the sum over COMPLETED steps (step 1). The
|
||||
// CURRENT step is streaming and its text is NOT in `usage` yet, but it IS in
|
||||
// the parts -> the running estimate must push the live figure above the base
|
||||
// so the badge keeps growing between step boundaries.
|
||||
const longText = "x".repeat(800); // 800 chars -> 200 est output tokens
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "text", text: longText }], {
|
||||
usage: { inputTokens: 500, outputTokens: 40 }, // step-1 base: 40 output
|
||||
}),
|
||||
);
|
||||
// max(authOutput=40, estOutput=200) = 200 -> the counter ticks, not frozen.
|
||||
expect(r.output).toBe(200);
|
||||
expect(r.authoritative).toBe(true);
|
||||
});
|
||||
|
||||
it("ticks reasoning of the in-flight step above the authoritative reasoning base", () => {
|
||||
const longReasoning = "r".repeat(400); // 400 chars -> 100 est reasoning
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "reasoning", text: longReasoning }], {
|
||||
usage: { inputTokens: 100, outputTokens: 20, reasoningTokens: 20 },
|
||||
}),
|
||||
);
|
||||
// reasoning: max(20, 100) = 100 ; output: max(max(0,20-20)=0, 0) = 0.
|
||||
expect(r.reasoning).toBe(100);
|
||||
expect(r.output).toBe(0);
|
||||
expect(r.authoritative).toBe(true);
|
||||
});
|
||||
|
||||
it("snaps to the authoritative figure once it exceeds the rough estimate", () => {
|
||||
// Short on-screen text (estimate tiny) but a large authoritative output:
|
||||
// the exact figure wins at the boundary (the counter never under-reports).
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "text", text: "abcd" }], {
|
||||
usage: { inputTokens: 10, outputTokens: 5000 },
|
||||
}),
|
||||
);
|
||||
expect(r.output).toBe(5000);
|
||||
});
|
||||
|
||||
it("is monotonic: max never drops below the authoritative base when the estimate is smaller", () => {
|
||||
// Mirrors the legacy 'verbatim' tests: estimate < authoritative -> unchanged.
|
||||
const r = liveTurnTokens(
|
||||
msg([{ type: "text", text: "tiny" }], {
|
||||
usage: { inputTokens: 500, outputTokens: 100, reasoningTokens: 30 },
|
||||
}),
|
||||
);
|
||||
expect(r).toEqual({ reasoning: 30, output: 70, authoritative: true });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,18 +1,11 @@
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
|
||||
/**
|
||||
* Live token counting for a streaming AI-chat turn — split into REASONING
|
||||
* (thinking) and OUTPUT (answer) tokens, mirroring how Claude Code shows
|
||||
* `Thinking… · 60 tokens` next to its thinking indicator.
|
||||
* Rough client-side token estimation for AI-chat UI affordances.
|
||||
*
|
||||
* No provider streams exact per-token usage mid-stream, so the live number is a
|
||||
* CLIENT ESTIMATE (chars/≈4 heuristic) that is reconciled to AUTHORITATIVE usage
|
||||
* once the server attaches it on a step/turn boundary (see the server's
|
||||
* `chatStreamMetadata` + the client's read of `message.metadata.usage`). When
|
||||
* authoritative usage is present we return it verbatim (the number "jumps to
|
||||
* exact"); otherwise we return the running estimate. Pure + unit-testable: it
|
||||
* never runs a real BPE tokenizer (that would be O(n²) on the hot path, bloat the
|
||||
* bundle, and be wrong for Gemini/Ollama anyway).
|
||||
* No provider streams exact per-token usage mid-stream, so any in-flight figure
|
||||
* is a CLIENT ESTIMATE (chars/≈4 heuristic). Pure + unit-testable: it never runs
|
||||
* a real BPE tokenizer (that would be O(n²) on the hot path, bloat the bundle,
|
||||
* and be wrong for Gemini/Ollama anyway). Used by the in-body reasoning counter
|
||||
* ("Thinking · N tokens").
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -24,90 +17,3 @@ export function estimateTokens(text: string): number {
|
||||
if (!text) return 0;
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/** Authoritative per-step/turn usage the server attaches to message metadata. */
|
||||
export interface AuthoritativeUsage {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
/** Live token split for a turn's tail (streaming) assistant message. */
|
||||
export interface LiveTurnTokens {
|
||||
/** Thinking/reasoning tokens (estimate, or authoritative when available). */
|
||||
reasoning: number;
|
||||
/** Answer/output tokens (estimate, or authoritative when available). */
|
||||
output: number;
|
||||
/** True when the numbers come from authoritative server usage, not estimate. */
|
||||
authoritative: boolean;
|
||||
}
|
||||
|
||||
/** Read the authoritative usage off a UIMessage's metadata, if the server set it. */
|
||||
function metadataUsage(message: UIMessage): AuthoritativeUsage | undefined {
|
||||
const meta = message?.metadata as
|
||||
| { usage?: AuthoritativeUsage }
|
||||
| undefined;
|
||||
const usage = meta?.usage;
|
||||
if (!usage || typeof usage !== "object") return undefined;
|
||||
return usage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Token split for the given (streaming) assistant message.
|
||||
*
|
||||
* COMBINES the authoritative server usage with the running text estimate so the
|
||||
* counter ticks in real time AND lands exact. The server only attaches
|
||||
* `metadata.usage` at a step/turn boundary (`finish-step`/`finish`) and it is
|
||||
* CUMULATIVE over COMPLETED steps — it does NOT yet include the in-flight step.
|
||||
* So a multi-step turn that returned the authoritative figure verbatim would
|
||||
* FREEZE between boundaries and jump in steps (issue #163).
|
||||
*
|
||||
* Instead we always compute the running ESTIMATE (chars/≈4 over the message's
|
||||
* `reasoning`/`text` parts, which grows on every streamed delta) and take the
|
||||
* per-component MAX of the authoritative base and the estimate:
|
||||
* - between boundaries the estimate of the in-flight step ticks the number up;
|
||||
* - at a boundary the authoritative figure snaps it to exact;
|
||||
* - because the server's usage is cumulative and we only ever take the max, the
|
||||
* number is MONOTONIC — it never drops.
|
||||
*
|
||||
* Providers that don't stream reasoning text still surface a reasoning count once
|
||||
* the authoritative usage arrives (`max(reasoningTokens, 0)`); on the pure
|
||||
* estimate path (no usage yet) such a turn shows `reasoning: 0` until then.
|
||||
*/
|
||||
export function liveTurnTokens(message: UIMessage | undefined): LiveTurnTokens {
|
||||
if (!message) return { reasoning: 0, output: 0, authoritative: false };
|
||||
|
||||
// Running ESTIMATE over every reasoning/text part — grows on each delta. This
|
||||
// includes the IN-FLIGHT step, which the authoritative usage does not cover yet.
|
||||
let estReasoning = 0;
|
||||
let estOutput = 0;
|
||||
for (const part of message.parts ?? []) {
|
||||
if (part.type === "reasoning") {
|
||||
estReasoning += estimateTokens((part as { text?: string }).text ?? "");
|
||||
} else if (part.type === "text") {
|
||||
estOutput += estimateTokens((part as { text?: string }).text ?? "");
|
||||
}
|
||||
}
|
||||
|
||||
const usage = metadataUsage(message);
|
||||
if (!usage) {
|
||||
// No authoritative usage streamed yet: the estimate IS the live figure.
|
||||
return { reasoning: estReasoning, output: estOutput, authoritative: false };
|
||||
}
|
||||
|
||||
// Authoritative sum over COMPLETED steps. `outputTokens` already INCLUDES
|
||||
// reasoning in the AI SDK usage shape, so subtract it out for the "answer"
|
||||
// figure (never go negative if a provider reports them inconsistently).
|
||||
const authReasoning = usage.reasoningTokens ?? 0;
|
||||
const authOutput = Math.max(0, (usage.outputTokens ?? 0) - authReasoning);
|
||||
|
||||
// Per-component max: the in-flight step's estimate ticks above the completed-
|
||||
// steps base between boundaries, and the authoritative figure wins once it
|
||||
// exceeds the (rough) estimate at the next boundary. Monotonic by construction.
|
||||
return {
|
||||
reasoning: Math.max(authReasoning, estReasoning),
|
||||
output: Math.max(authOutput, estOutput),
|
||||
authoritative: true,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,241 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
import { messageSignature } from "@/features/ai-chat/utils/message-signature.ts";
|
||||
|
||||
/**
|
||||
* Pure-helper tests for `messageSignature`, the cheap per-message content
|
||||
* signature that drives MessageItem's memo (a streaming row's signature must
|
||||
* change on every delta so it re-renders, while a finalized row's stays stable
|
||||
* so it is skipped). Each test exercises ONE change signal and asserts it flips
|
||||
* the signature; a content-identical clone must keep an EQUAL signature.
|
||||
*
|
||||
* The signature embeds `message.id` and `message.role`, so the `msg` factory
|
||||
* uses a FIXED id/role here (not `Math.random()`): otherwise two messages with
|
||||
* identical content would get different signatures and the negative case would
|
||||
* be impossible to express.
|
||||
*/
|
||||
const msg = (
|
||||
parts: UIMessage["parts"],
|
||||
metadata?: unknown,
|
||||
): UIMessage =>
|
||||
({
|
||||
id: "m1",
|
||||
role: "assistant",
|
||||
parts,
|
||||
metadata,
|
||||
}) as UIMessage;
|
||||
|
||||
describe("messageSignature", () => {
|
||||
it("changes when a text part grows", () => {
|
||||
const before = msg([{ type: "text", text: "alpha" }]);
|
||||
const after = msg([{ type: "text", text: "alpha beta" }]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when a new part is appended", () => {
|
||||
const before = msg([{ type: "text", text: "alpha" }]);
|
||||
const after = msg([
|
||||
{ type: "text", text: "alpha" },
|
||||
{ type: "text", text: "beta" },
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when a part's state flips", () => {
|
||||
const before = msg([
|
||||
{ type: "tool-getPage", state: "input-streaming" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{ type: "tool-getPage", state: "output-available" } as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when a tool part gains an output", () => {
|
||||
const before = msg([
|
||||
{ type: "tool-getPage", state: "output-available" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-available",
|
||||
output: { ok: true },
|
||||
} as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when a part gains an errorText", () => {
|
||||
const before = msg([
|
||||
{ type: "tool-getPage", state: "output-error" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-error",
|
||||
errorText: "boom",
|
||||
} as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when usage.reasoningTokens arrives on finish-step (text/state already frozen)", () => {
|
||||
// The specifically-commented edge case: the authoritative turn total lands on
|
||||
// the final finish-step AFTER the reasoning text length and state are frozen.
|
||||
// Only the token count appears between these two snapshots, so the signature
|
||||
// MUST still flip — otherwise the "Thinking · N tokens" header would never
|
||||
// snap from the live estimate to the exact figure.
|
||||
const before = msg([
|
||||
{ type: "reasoning", text: "thinking", state: "done" } as never,
|
||||
]);
|
||||
const after = msg(
|
||||
[{ type: "reasoning", text: "thinking", state: "done" } as never],
|
||||
{ usage: { reasoningTokens: 42 } },
|
||||
);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when metadata.error appears", () => {
|
||||
const before = msg([{ type: "text", text: "answer" }]);
|
||||
const after = msg([{ type: "text", text: "answer" }], { error: "boom" });
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("changes when metadata.finishReason changes (e.g. to 'aborted')", () => {
|
||||
const before = msg([{ type: "text", text: "answer" }], {
|
||||
finishReason: "stop",
|
||||
});
|
||||
const after = msg([{ type: "text", text: "answer" }], {
|
||||
finishReason: "aborted",
|
||||
});
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("is UNCHANGED for a content-identical clone (different object, same values)", () => {
|
||||
// A finalized row that is re-created as a fresh object (different parts array
|
||||
// by reference, same parts by value) must keep an EQUAL signature, so the
|
||||
// memo skips re-rendering it.
|
||||
const a = msg([
|
||||
{ type: "text", text: "alpha" },
|
||||
{ type: "tool-getPage", state: "output-available", output: { ok: true } } as never,
|
||||
]);
|
||||
const b = msg([
|
||||
{ type: "text", text: "alpha" },
|
||||
{ type: "tool-getPage", state: "output-available", output: { ok: true } } as never,
|
||||
]);
|
||||
expect(a).not.toBe(b);
|
||||
expect(messageSignature(a)).toBe(messageSignature(b));
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Per-part-kind coupling guard for the load-bearing invariant documented at the
|
||||
* top of message-signature.ts: the signature MUST sample every VISIBLE field the
|
||||
* MessageItem render body draws, or the memo freezes a stale row. This is an
|
||||
* executable lock for the part kinds rendered TODAY — read alongside
|
||||
* `MessageItem` (message-item.tsx) and the `assistantMessageHasVisibleContent`
|
||||
* helper (message-content.ts), which "mirrors MessageItem's render decisions
|
||||
* EXACTLY". For each kind, mutating a field the render body DRAWS must flip the
|
||||
* signature. If a new visible field is rendered without being added here AND to
|
||||
* the signature, the corresponding assertion below should fail — that is the
|
||||
* guard. (This intentionally stops short of the render-descriptor refactor:
|
||||
* adding a part kind or a visible field still requires a human to extend both
|
||||
* the signature and this block.)
|
||||
*/
|
||||
describe("messageSignature ↔ render coupling (per visible part kind)", () => {
|
||||
describe("text part — render draws part.text (MarkdownPart text={part.text})", () => {
|
||||
it("flips when the visible text changes", () => {
|
||||
// Streaming is append-only, so the visible text only grows; the signature
|
||||
// samples its length, so the growth is the change signal.
|
||||
const before = msg([{ type: "text", text: "answer" }]);
|
||||
const after = msg([{ type: "text", text: "answer extended" }]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
});
|
||||
|
||||
describe("reasoning part — render draws text + tokens (ReasoningBlock)", () => {
|
||||
it("flips when the visible reasoning text changes", () => {
|
||||
const before = msg([
|
||||
{ type: "reasoning", text: "think", state: "streaming" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{ type: "reasoning", text: "think harder", state: "streaming" } as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("flips when the visible token count (metadata.usage.reasoningTokens) lands", () => {
|
||||
// The header's "Thinking · N tokens" reads reasoningTokensForPart, fed by
|
||||
// metadata.usage.reasoningTokens — a VISIBLE field that arrives on the final
|
||||
// finish-step after text length and state are frozen.
|
||||
const before = msg([
|
||||
{ type: "reasoning", text: "think", state: "done" } as never,
|
||||
]);
|
||||
const after = msg(
|
||||
[{ type: "reasoning", text: "think", state: "done" } as never],
|
||||
{ usage: { reasoningTokens: 99 } },
|
||||
);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
});
|
||||
|
||||
describe("tool-* part — render draws state/errorText/citations (ToolCallCard)", () => {
|
||||
it("flips when the run state changes (running ↔ done icon + label)", () => {
|
||||
// toolRunState(part.state) selects the spinner/check/error icon.
|
||||
const before = msg([
|
||||
{ type: "tool-getPage", state: "input-available" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{ type: "tool-getPage", state: "output-available" } as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("flips when output arrives (drives the rendered citation links)", () => {
|
||||
// toolCitations reads part.output to render the "/p/{id}" anchors.
|
||||
const before = msg([
|
||||
{ type: "tool-getPage", state: "output-available" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-available",
|
||||
output: { id: "page-1", title: "Doc" },
|
||||
} as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("flips when errorText appears (the visible red error detail line)", () => {
|
||||
const before = msg([
|
||||
{ type: "tool-getPage", state: "output-error" } as never,
|
||||
]);
|
||||
const after = msg([
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-error",
|
||||
errorText: "permission denied",
|
||||
} as never,
|
||||
]);
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
});
|
||||
|
||||
describe("metadata banners — render draws error / aborted notices", () => {
|
||||
it("flips when metadata.error appears (ChatErrorAlert banner)", () => {
|
||||
const before = msg([{ type: "text", text: "answer" }]);
|
||||
const after = msg([{ type: "text", text: "answer" }], { error: "boom" });
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
|
||||
it("flips when metadata.finishReason becomes 'aborted' (ChatStoppedNotice)", () => {
|
||||
const before = msg([{ type: "text", text: "answer" }], {
|
||||
finishReason: "stop",
|
||||
});
|
||||
const after = msg([{ type: "text", text: "answer" }], {
|
||||
finishReason: "aborted",
|
||||
});
|
||||
expect(messageSignature(before)).not.toBe(messageSignature(after));
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,44 +0,0 @@
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
|
||||
/** Cheap content signature for one message: changes iff something VISIBLE in the
|
||||
* row changed. Streaming is APPEND-ONLY (text parts only grow, parts are only
|
||||
* appended, a tool/text part flips state once), so a per-part [type, text
|
||||
* length, state, error/output presence] tuple + the persisted metadata
|
||||
* (error/finishReason) is a sufficient change signal without comparing full
|
||||
* strings on every delta. WARNING — load-bearing for the MessageItem memo:
|
||||
* if a future part kind's VISIBLE content can change WITHOUT changing [type,
|
||||
* text length, state, error/output presence] (e.g. a tool that streams
|
||||
* `preliminary` output, or a client-side regenerate that edits a finalized
|
||||
* row in place), extend this signature or the memo will freeze a stale row. */
|
||||
export function messageSignature(message: UIMessage): string {
|
||||
const parts = message.parts
|
||||
.map((p) => {
|
||||
const any = p as {
|
||||
type: string;
|
||||
text?: string;
|
||||
state?: string;
|
||||
errorText?: string;
|
||||
output?: unknown;
|
||||
};
|
||||
return [
|
||||
any.type,
|
||||
any.text?.length ?? 0,
|
||||
any.state ?? "",
|
||||
any.errorText ? 1 : 0,
|
||||
any.output !== undefined ? 1 : 0,
|
||||
].join(":");
|
||||
})
|
||||
.join("|");
|
||||
const meta = message.metadata as
|
||||
| { error?: string; finishReason?: string; usage?: { reasoningTokens?: number } }
|
||||
| undefined;
|
||||
// `usage.reasoningTokens` is neither append-only nor part-bound: the authoritative
|
||||
// turn total arrives on the final `finish-step` AFTER the reasoning text length and
|
||||
// state are already frozen. Without it in the signature the row's signature would be
|
||||
// unchanged at that point and the re-render skipped, so the "Thinking · N tokens"
|
||||
// header (reasoningTokensForPart) would keep the live estimate instead of snapping
|
||||
// to the exact figure.
|
||||
return `${message.id}#${message.role}#${parts}#${meta?.error ?? ""}#${
|
||||
meta?.finishReason ?? ""
|
||||
}#${meta?.usage?.reasoningTokens ?? ""}`;
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { mcpTestButtonView } from "./ai-mcp-server-test-view";
|
||||
|
||||
/**
|
||||
* Pure-helper tests for the inline "Test" button presentation. Covers the four
|
||||
* states (idle / loading is handled by the component's `isPending`, so here:
|
||||
* idle / ok-with-tools / ok-without-tools / failed) and the tooltip text
|
||||
* branches that are easiest to break silently.
|
||||
*/
|
||||
// Identity-ish translator that echoes the key and interpolates {{n}} so the
|
||||
// label/tooltip branches are observable without the real i18n bundle.
|
||||
const t = (key: string, options?: Record<string, unknown>): string =>
|
||||
options && "n" in options
|
||||
? key.replace("{{n}}", String((options as { n: unknown }).n))
|
||||
: key;
|
||||
|
||||
describe("mcpTestButtonView", () => {
|
||||
it("idle when there is no result", () => {
|
||||
expect(mcpTestButtonView(undefined, t)).toEqual({
|
||||
state: "idle",
|
||||
color: undefined,
|
||||
variant: "default",
|
||||
label: "Test",
|
||||
tooltip: "",
|
||||
});
|
||||
});
|
||||
|
||||
it("ok with tools lists them in the tooltip", () => {
|
||||
expect(mcpTestButtonView({ ok: true, tools: ["a", "b"] }, t)).toEqual({
|
||||
state: "ok",
|
||||
color: "green",
|
||||
variant: "light",
|
||||
label: "OK · 2",
|
||||
tooltip: "a, b",
|
||||
});
|
||||
});
|
||||
|
||||
it('ok with zero tools shows "No tools available"', () => {
|
||||
expect(mcpTestButtonView({ ok: true, tools: [] }, t)).toEqual({
|
||||
state: "ok",
|
||||
color: "green",
|
||||
variant: "light",
|
||||
label: "OK · 0",
|
||||
tooltip: "No tools available",
|
||||
});
|
||||
});
|
||||
|
||||
it("failed surfaces the error text in the tooltip", () => {
|
||||
expect(
|
||||
mcpTestButtonView({ ok: false, error: "402: nope" }, t),
|
||||
).toEqual({
|
||||
state: "failed",
|
||||
color: "red",
|
||||
variant: "light",
|
||||
label: "Failed",
|
||||
tooltip: "402: nope",
|
||||
});
|
||||
});
|
||||
|
||||
it("failed when the request itself rejects (no result payload)", () => {
|
||||
// 401/403/500/network: there is no { ok } body, only a thrown error. The
|
||||
// row must still show a red "Failed" rather than reverting to idle "Test".
|
||||
expect(
|
||||
mcpTestButtonView(undefined, t, {
|
||||
response: { data: { message: "Unauthorized" } },
|
||||
}),
|
||||
).toEqual({
|
||||
state: "failed",
|
||||
color: "red",
|
||||
variant: "light",
|
||||
label: "Failed",
|
||||
tooltip: "Unauthorized",
|
||||
});
|
||||
});
|
||||
|
||||
it("reject without a server message falls back to the generic label", () => {
|
||||
// A bare network error (no response body) still surfaces as failed, using
|
||||
// the i18n fallback for the tooltip.
|
||||
expect(mcpTestButtonView(undefined, t, new Error("network down"))).toEqual({
|
||||
state: "failed",
|
||||
color: "red",
|
||||
variant: "light",
|
||||
label: "Failed",
|
||||
tooltip: "Failed to update data",
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,90 @@
|
||||
import type { IAiMcpServerTestResult } from "@/features/workspace/services/ai-mcp-server-service.ts";
|
||||
|
||||
/** Minimal translator shape (i18next `t`): key + optional interpolation. */
|
||||
type Translate = (key: string, options?: Record<string, unknown>) => string;
|
||||
|
||||
/** Subset of an axios-style rejection we read for the reject tooltip. */
|
||||
type McpTestRequestError = {
|
||||
response?: { data?: { message?: string } };
|
||||
};
|
||||
|
||||
/**
|
||||
* Best-effort extraction of a server-sent message from a rejected test request
|
||||
* (axios stores it at `error.response.data.message`). Returns undefined for a
|
||||
* bare/network error so the caller can fall back to a generic label.
|
||||
*/
|
||||
function readRequestErrorMessage(error: unknown): string | undefined {
|
||||
if (error && typeof error === "object" && "response" in error) {
|
||||
return (error as McpTestRequestError).response?.data?.message;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Presentation for the inline "Test" button, derived from the current test
|
||||
* result tristate (no result yet / ok / failed). Color is never the only signal
|
||||
* — the label and icon change too (a11y / colorblind-friendly). Kept as a single
|
||||
* pure derivation (rather than two parallel if/else chains) so the button and
|
||||
* tooltip can never drift apart, and so the text branches are unit-testable
|
||||
* without rendering the row.
|
||||
*/
|
||||
export interface McpTestButtonView {
|
||||
/** Tristate; the component maps this to the leftSection icon. */
|
||||
state: "idle" | "ok" | "failed";
|
||||
/** Mantine Button color; undefined = theme default (idle). */
|
||||
color?: string;
|
||||
/** Mantine Button variant. */
|
||||
variant: string;
|
||||
/** Translated button label. */
|
||||
label: string;
|
||||
/** Translated tooltip text; "" while there is no result (tooltip disabled). */
|
||||
tooltip: string;
|
||||
}
|
||||
|
||||
export function mcpTestButtonView(
|
||||
result: IAiMcpServerTestResult | undefined,
|
||||
t: Translate,
|
||||
error?: unknown,
|
||||
): McpTestButtonView {
|
||||
if (result?.ok) {
|
||||
return {
|
||||
state: "ok",
|
||||
color: "green",
|
||||
variant: "light",
|
||||
label: t("OK · {{n}}", { n: result.tools.length }),
|
||||
tooltip:
|
||||
result.tools.length > 0
|
||||
? result.tools.join(", ")
|
||||
: t("No tools available"),
|
||||
};
|
||||
}
|
||||
if (result && result.ok === false) {
|
||||
return {
|
||||
state: "failed",
|
||||
color: "red",
|
||||
variant: "light",
|
||||
label: t("Failed"),
|
||||
tooltip: result.error,
|
||||
};
|
||||
}
|
||||
if (error) {
|
||||
// The test request itself rejected (401/403/500/network) — there is no
|
||||
// `{ ok }` payload, so without this branch the row would silently revert to
|
||||
// the idle "Test" instead of reporting the failure. Tooltip prefers the
|
||||
// server-sent message, else the generic i18n fallback.
|
||||
return {
|
||||
state: "failed",
|
||||
color: "red",
|
||||
variant: "light",
|
||||
label: t("Failed"),
|
||||
tooltip: readRequestErrorMessage(error) ?? t("Failed to update data"),
|
||||
};
|
||||
}
|
||||
return {
|
||||
state: "idle",
|
||||
color: undefined,
|
||||
variant: "default",
|
||||
label: t("Test"),
|
||||
tooltip: "",
|
||||
};
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useState } from "react";
|
||||
import { useEffect, useState } from "react";
|
||||
import {
|
||||
ActionIcon,
|
||||
Badge,
|
||||
@@ -10,18 +10,28 @@ import {
|
||||
Stack,
|
||||
Switch,
|
||||
Text,
|
||||
Tooltip,
|
||||
} from "@mantine/core";
|
||||
import { useDisclosure } from "@mantine/hooks";
|
||||
import { modals } from "@mantine/modals";
|
||||
import { IconPencil, IconPlus, IconTrash } from "@tabler/icons-react";
|
||||
import {
|
||||
IconCheck,
|
||||
IconPencil,
|
||||
IconPlugConnected,
|
||||
IconPlus,
|
||||
IconTrash,
|
||||
IconX,
|
||||
} from "@tabler/icons-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import useUserRole from "@/hooks/use-user-role.tsx";
|
||||
import {
|
||||
useAiMcpServersQuery,
|
||||
useDeleteAiMcpServerMutation,
|
||||
useTestAiMcpServerMutation,
|
||||
useUpdateAiMcpServerMutation,
|
||||
} from "@/features/workspace/queries/ai-mcp-server-query.ts";
|
||||
import { IAiMcpServer } from "@/features/workspace/services/ai-mcp-server-service.ts";
|
||||
import { mcpTestButtonView } from "@/features/workspace/components/settings/components/ai-mcp-server-test-view.ts";
|
||||
import AiMcpServerForm from "./ai-mcp-server-form.tsx";
|
||||
|
||||
/**
|
||||
@@ -112,55 +122,15 @@ export default function AiMcpServers() {
|
||||
|
||||
<Stack gap="xs" mt="sm">
|
||||
{servers?.map((server) => (
|
||||
<Group key={server.id} justify="space-between" wrap="nowrap">
|
||||
<Stack gap={2} style={{ minWidth: 0 }}>
|
||||
<Group gap="xs">
|
||||
<Text fw={500} truncate>
|
||||
{server.name}
|
||||
</Text>
|
||||
<Badge size="xs" variant="light">
|
||||
{server.transport.toUpperCase()}
|
||||
</Badge>
|
||||
</Group>
|
||||
<Text
|
||||
size="xs"
|
||||
c="dimmed"
|
||||
truncate
|
||||
style={{ fontFamily: "ui-monospace, Menlo, monospace" }}
|
||||
>
|
||||
{server.url}
|
||||
</Text>
|
||||
</Stack>
|
||||
|
||||
<Group gap="xs" wrap="nowrap">
|
||||
<Switch
|
||||
size="sm"
|
||||
checked={server.enabled}
|
||||
aria-label={t("Enabled")}
|
||||
onChange={(event) =>
|
||||
updateMutation.mutate({
|
||||
id: server.id,
|
||||
enabled: event.currentTarget.checked,
|
||||
})
|
||||
}
|
||||
/>
|
||||
<ActionIcon
|
||||
variant="subtle"
|
||||
aria-label={t("Edit")}
|
||||
onClick={() => openEdit(server)}
|
||||
>
|
||||
<IconPencil size={16} />
|
||||
</ActionIcon>
|
||||
<ActionIcon
|
||||
variant="subtle"
|
||||
color="red"
|
||||
aria-label={t("Delete")}
|
||||
onClick={() => confirmDelete(server)}
|
||||
>
|
||||
<IconTrash size={16} />
|
||||
</ActionIcon>
|
||||
</Group>
|
||||
</Group>
|
||||
<AiMcpServerRow
|
||||
key={server.id}
|
||||
server={server}
|
||||
onEdit={openEdit}
|
||||
onDelete={confirmDelete}
|
||||
onToggleEnabled={(enabled) =>
|
||||
updateMutation.mutate({ id: server.id, enabled })
|
||||
}
|
||||
/>
|
||||
))}
|
||||
</Stack>
|
||||
|
||||
@@ -180,3 +150,127 @@ export default function AiMcpServers() {
|
||||
</Paper>
|
||||
);
|
||||
}
|
||||
|
||||
interface AiMcpServerRowProps {
|
||||
server: IAiMcpServer;
|
||||
onEdit: (server: IAiMcpServer) => void;
|
||||
onDelete: (server: IAiMcpServer) => void;
|
||||
onToggleEnabled: (enabled: boolean) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* A single external MCP server row: name/badge/url on the left and the
|
||||
* Test / Switch / Edit / Delete controls on the right. Each row owns its own
|
||||
* `useTestAiMcpServerMutation()` so the inline Test result and loading state are
|
||||
* independent per row (a shared mutation would make `isPending` global and make
|
||||
* every row flicker).
|
||||
*/
|
||||
function AiMcpServerRow({
|
||||
server,
|
||||
onEdit,
|
||||
onDelete,
|
||||
onToggleEnabled,
|
||||
}: AiMcpServerRowProps) {
|
||||
const { t } = useTranslation();
|
||||
const testMutation = useTestAiMcpServerMutation();
|
||||
const result = testMutation.data;
|
||||
|
||||
// The row is keyed by `server.id`, so editing the connection-relevant fields
|
||||
// (url/transport/headers) does NOT remount it — an old success/failure result
|
||||
// would otherwise stick. Clear the result when those fields change.
|
||||
useEffect(() => {
|
||||
testMutation.reset();
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [server.url, server.transport, server.hasHeaders]);
|
||||
|
||||
// Single derivation of the button/tooltip presentation from the test tristate
|
||||
// (idle / ok / failed), so the two can never drift apart. Tooltip is "" while
|
||||
// there is no result; the icon is mapped from `view.state` below. When the
|
||||
// request itself rejects (401/403/500/network) there is no `data` payload, so
|
||||
// we feed the mutation error in too — otherwise the row would silently revert
|
||||
// to "Test" instead of showing a red "Failed".
|
||||
const view = mcpTestButtonView(
|
||||
result,
|
||||
t,
|
||||
testMutation.isError ? testMutation.error : undefined,
|
||||
);
|
||||
const tooltipLabel = view.tooltip;
|
||||
const buttonColor = view.color;
|
||||
const buttonVariant = view.variant;
|
||||
const buttonLabel = view.label;
|
||||
const buttonIcon =
|
||||
view.state === "ok" ? (
|
||||
<IconCheck size={16} />
|
||||
) : view.state === "failed" ? (
|
||||
<IconX size={16} />
|
||||
) : (
|
||||
<IconPlugConnected size={16} />
|
||||
);
|
||||
|
||||
return (
|
||||
<Group justify="space-between" wrap="nowrap">
|
||||
<Stack gap={2} style={{ minWidth: 0 }}>
|
||||
<Group gap="xs">
|
||||
<Text fw={500} truncate>
|
||||
{server.name}
|
||||
</Text>
|
||||
<Badge size="xs" variant="light">
|
||||
{server.transport.toUpperCase()}
|
||||
</Badge>
|
||||
</Group>
|
||||
<Text
|
||||
size="xs"
|
||||
c="dimmed"
|
||||
truncate
|
||||
style={{ fontFamily: "ui-monospace, Menlo, monospace" }}
|
||||
>
|
||||
{server.url}
|
||||
</Text>
|
||||
</Stack>
|
||||
|
||||
<Group gap="xs" wrap="nowrap">
|
||||
{/* Always clickable: testing a disabled server before enabling it is useful. */}
|
||||
<Tooltip
|
||||
label={tooltipLabel}
|
||||
disabled={view.state === "idle"}
|
||||
multiline
|
||||
maw={320}
|
||||
withinPortal
|
||||
>
|
||||
<Button
|
||||
size="xs"
|
||||
miw={88}
|
||||
color={buttonColor}
|
||||
variant={buttonVariant}
|
||||
leftSection={testMutation.isPending ? undefined : buttonIcon}
|
||||
loading={testMutation.isPending}
|
||||
onClick={() => testMutation.mutate(server.id)}
|
||||
>
|
||||
{buttonLabel}
|
||||
</Button>
|
||||
</Tooltip>
|
||||
<Switch
|
||||
size="sm"
|
||||
checked={server.enabled}
|
||||
aria-label={t("Enabled")}
|
||||
onChange={(event) => onToggleEnabled(event.currentTarget.checked)}
|
||||
/>
|
||||
<ActionIcon
|
||||
variant="subtle"
|
||||
aria-label={t("Edit")}
|
||||
onClick={() => onEdit(server)}
|
||||
>
|
||||
<IconPencil size={16} />
|
||||
</ActionIcon>
|
||||
<ActionIcon
|
||||
variant="subtle"
|
||||
color="red"
|
||||
aria-label={t("Delete")}
|
||||
onClick={() => onDelete(server)}
|
||||
>
|
||||
<IconTrash size={16} />
|
||||
</ActionIcon>
|
||||
</Group>
|
||||
</Group>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
Button,
|
||||
Group,
|
||||
Modal,
|
||||
NumberInput,
|
||||
Paper,
|
||||
PasswordInput,
|
||||
Select,
|
||||
@@ -83,6 +84,9 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
|
||||
// (empty means "leave unchanged" unless explicitly cleared).
|
||||
const formSchema = z.object({
|
||||
chatModel: z.string(),
|
||||
// Max context window in tokens shown in the chat header badge. A number, or ""
|
||||
// when the NumberInput is empty (no limit).
|
||||
chatContextWindow: z.union([z.number(), z.literal("")]),
|
||||
// Chat provider implementation (reasoning surfacing). Default openai-compatible.
|
||||
chatApiStyle: z.enum(["openai-compatible", "openai"]),
|
||||
// Cheap model id for the anonymous public-share assistant; empty = use chatModel.
|
||||
@@ -311,6 +315,7 @@ export default function AiProviderSettings() {
|
||||
validate: zod4Resolver(formSchema),
|
||||
initialValues: {
|
||||
chatModel: "",
|
||||
chatContextWindow: "",
|
||||
chatApiStyle: "openai-compatible" as ChatApiStyle,
|
||||
publicShareChatModel: "",
|
||||
publicShareAssistantRoleId: "",
|
||||
@@ -334,6 +339,7 @@ export default function AiProviderSettings() {
|
||||
if (!settings) return;
|
||||
form.setValues({
|
||||
chatModel: settings.chatModel ?? "",
|
||||
chatContextWindow: settings.chatContextWindow ?? "",
|
||||
chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
|
||||
publicShareChatModel: settings.publicShareChatModel ?? "",
|
||||
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
|
||||
@@ -364,6 +370,12 @@ export default function AiProviderSettings() {
|
||||
// Everything is OpenAI-compatible.
|
||||
driver: "openai",
|
||||
chatModel: values.chatModel,
|
||||
// Max context window for the chat header badge; empty NumberInput ("") →
|
||||
// 0, which clears the limit server-side (no denominator shown).
|
||||
chatContextWindow:
|
||||
typeof values.chatContextWindow === "number"
|
||||
? values.chatContextWindow
|
||||
: 0,
|
||||
chatApiStyle: values.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; empty falls
|
||||
// back to chatModel server-side.
|
||||
@@ -767,6 +779,18 @@ export default function AiProviderSettings() {
|
||||
{t("Resolves to {{url}}", { url: chatResolved })}
|
||||
</Text>
|
||||
|
||||
<NumberInput
|
||||
mt="sm"
|
||||
label={t("Context window (tokens)")}
|
||||
description={t(
|
||||
"Shown as used / total in the chat header. Leave empty to hide the limit.",
|
||||
)}
|
||||
min={0}
|
||||
allowDecimal={false}
|
||||
disabled={isLoading}
|
||||
{...form.getInputProps("chatContextWindow")}
|
||||
/>
|
||||
|
||||
<Select
|
||||
mt="sm"
|
||||
label={t("Protocol")}
|
||||
|
||||
@@ -22,6 +22,8 @@ export type ChatApiStyle = "openai-compatible" | "openai";
|
||||
export interface IAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens shown in the chat header badge; 0/unset = no limit.
|
||||
chatContextWindow?: number;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
// Cheap model id for the anonymous public-share assistant; empty = chatModel.
|
||||
publicShareChatModel?: string;
|
||||
@@ -56,6 +58,8 @@ export interface IAiSettings {
|
||||
export interface IAiSettingsUpdate {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens for the chat header badge; 0 = clear the limit.
|
||||
chatContextWindow?: number;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
|
||||
@@ -275,11 +275,12 @@ describe('flushAssistant', () => {
|
||||
expect(f.toolCalls).not.toBeNull();
|
||||
});
|
||||
|
||||
it('completed: attaches finishReason + normalized usage + contextTokens', () => {
|
||||
it('completed: attaches finishReason + normalized usage + contextTokens + maxContextTokens', () => {
|
||||
const f = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
||||
contextTokens: 15,
|
||||
maxContextTokens: 200000,
|
||||
});
|
||||
expect(f.status).toBe('completed');
|
||||
expect(f.metadata.finishReason).toBe('stop');
|
||||
@@ -290,6 +291,23 @@ describe('flushAssistant', () => {
|
||||
reasoningTokens: undefined,
|
||||
});
|
||||
expect(f.metadata.contextTokens).toBe(15);
|
||||
expect(f.metadata.maxContextTokens).toBe(200000);
|
||||
});
|
||||
|
||||
it('completed: omits maxContextTokens when unset or 0', () => {
|
||||
// No maxContextTokens in the extra (admin set no context window).
|
||||
const f = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
contextTokens: 15,
|
||||
});
|
||||
expect('maxContextTokens' in f.metadata).toBe(false);
|
||||
// Explicit 0 is treated the same as unset (no limit -> key omitted).
|
||||
const f0 = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
contextTokens: 15,
|
||||
maxContextTokens: 0,
|
||||
});
|
||||
expect('maxContextTokens' in f0.metadata).toBe(false);
|
||||
});
|
||||
|
||||
it('error: records the error and a derived finishReason', () => {
|
||||
|
||||
@@ -616,6 +616,10 @@ export class AiChatService implements OnModuleInit {
|
||||
contextTokens:
|
||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
|
||||
undefined,
|
||||
// Max context window for the chat header badge denominator;
|
||||
// resolved from the admin-configured provider settings (in
|
||||
// closure scope here). Omitted/0 = no limit.
|
||||
maxContextTokens: resolved?.chatContextWindow,
|
||||
}),
|
||||
);
|
||||
// Lifecycle: release the external MCP clients leased for this turn.
|
||||
@@ -1212,8 +1216,9 @@ export async function applyFinalize(
|
||||
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
||||
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
||||
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
||||
* `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
|
||||
* only when provided/relevant, matching the pre-#183 onFinish/onError records.
|
||||
* `metadata.error`, `metadata.usage`, `metadata.contextTokens` and
|
||||
* `metadata.maxContextTokens` are attached only when provided/relevant, matching
|
||||
* the pre-#183 onFinish/onError records.
|
||||
*/
|
||||
export function flushAssistant(
|
||||
capturedSteps: ReadonlyArray<StepLike> | undefined,
|
||||
@@ -1223,6 +1228,7 @@ export function flushAssistant(
|
||||
finishReason?: string;
|
||||
usage?: ChatStreamUsage | StreamUsage | undefined;
|
||||
contextTokens?: number;
|
||||
maxContextTokens?: number;
|
||||
error?: string;
|
||||
},
|
||||
): AssistantFlush {
|
||||
@@ -1253,6 +1259,8 @@ export function flushAssistant(
|
||||
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
|
||||
}
|
||||
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
|
||||
if (extra?.maxContextTokens)
|
||||
metadata.maxContextTokens = extra.maxContextTokens;
|
||||
if (extra?.error) metadata.error = extra.error;
|
||||
|
||||
return {
|
||||
|
||||
@@ -20,6 +20,7 @@ import { DB, Workspaces } from '@docmost/db/types/db';
|
||||
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatContextWindow',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
|
||||
@@ -41,3 +41,35 @@ describe('UpdateAiSettingsDto.chatApiStyle', () => {
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
/** DTO validation for the new chatContextWindow field (@IsInt @Min(0)). */
|
||||
describe('UpdateAiSettingsDto.chatContextWindow', () => {
|
||||
const errorsFor = async (chatContextWindow: unknown) =>
|
||||
validate(plainToInstance(UpdateAiSettingsDto, { chatContextWindow }));
|
||||
|
||||
it('accepts a non-negative integer (incl. 0 = clear the limit)', async () => {
|
||||
for (const v of [0, 200000]) {
|
||||
const errs = await errorsFor(v);
|
||||
expect(
|
||||
errs.find((e) => e.property === 'chatContextWindow'),
|
||||
).toBeUndefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects a negative value', async () => {
|
||||
const errs = await errorsFor(-1);
|
||||
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
|
||||
});
|
||||
|
||||
it('rejects a non-integer value', async () => {
|
||||
const errs = await errorsFor(1.5);
|
||||
expect(errs.find((e) => e.property === 'chatContextWindow')).toBeDefined();
|
||||
});
|
||||
|
||||
it('accepts the field being omitted (optional)', async () => {
|
||||
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
|
||||
expect(
|
||||
errs.find((e) => e.property === 'chatContextWindow'),
|
||||
).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
43
apps/server/src/integrations/ai/ai-settings.service.spec.ts
Normal file
43
apps/server/src/integrations/ai/ai-settings.service.spec.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { parsePositiveInt } from './ai-settings.service';
|
||||
|
||||
/**
|
||||
* Round-trip coercion for numeric `::text` provider settings (e.g.
|
||||
* chatContextWindow). Values are stored as text and read back as strings, so
|
||||
* this guards the read path the DTO write-validation does not cover: a silent
|
||||
* loss of `Math.floor` or a `> 0` → `>= 0` drift would otherwise go unnoticed.
|
||||
*/
|
||||
describe('parsePositiveInt', () => {
|
||||
it('keeps a valid positive integer string', () => {
|
||||
expect(parsePositiveInt('200000')).toBe(200000);
|
||||
});
|
||||
|
||||
it('floors a fractional string', () => {
|
||||
expect(parsePositiveInt('1.9')).toBe(1);
|
||||
expect(parsePositiveInt('1.0')).toBe(1);
|
||||
});
|
||||
|
||||
it('returns undefined for zero', () => {
|
||||
expect(parsePositiveInt('0')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for a negative value', () => {
|
||||
expect(parsePositiveInt('-5')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for an empty string', () => {
|
||||
expect(parsePositiveInt('')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for a non-numeric string', () => {
|
||||
expect(parsePositiveInt('abc')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for undefined / null', () => {
|
||||
expect(parsePositiveInt(undefined)).toBeUndefined();
|
||||
expect(parsePositiveInt(null)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('accepts a real number too (not only ::text strings)', () => {
|
||||
expect(parsePositiveInt(42)).toBe(42);
|
||||
});
|
||||
});
|
||||
@@ -18,6 +18,18 @@ import {
|
||||
PROVIDER_SETTINGS_KEYS,
|
||||
} from './ai.types';
|
||||
|
||||
/**
|
||||
* Coerce a raw provider value (stored as `::text`, so it arrives as a string —
|
||||
* see workspace.repo.ts) into a positive integer, or `undefined` when it is not
|
||||
* a finite number greater than zero. Used for numeric `::text` settings such as
|
||||
* `chatContextWindow`. Fractions are floored: `"1.9" → 1`, `"0"`/`"-5"`/`""`/
|
||||
* `"abc"`/`undefined` → `undefined`.
|
||||
*/
|
||||
export function parsePositiveInt(raw: unknown): number | undefined {
|
||||
const n = Number(raw);
|
||||
return Number.isFinite(n) && n > 0 ? Math.floor(n) : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shape of the partial update accepted by `update`. Mirrors the validated
|
||||
* controller DTO. `apiKey` / `embeddingApiKey` are write-only: undefined =
|
||||
@@ -26,6 +38,8 @@ import {
|
||||
export interface UpdateAiSettingsInput {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens for the chat header badge. 0/empty = no limit.
|
||||
chatContextWindow?: number;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
@@ -160,6 +174,9 @@ export class AiSettingsService {
|
||||
const config: ResolvedAiConfig = {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
// Max context window for the chat header badge denominator. Stored as
|
||||
// ::text; 0/unset/invalid = no limit (undefined).
|
||||
chatContextWindow: parsePositiveInt(provider.chatContextWindow),
|
||||
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; reuses the chat
|
||||
@@ -219,6 +236,10 @@ export class AiSettingsService {
|
||||
async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
|
||||
const provider = await this.readProvider(workspaceId);
|
||||
|
||||
// Stored as ::text; coerce to a positive integer (or undefined) so the
|
||||
// client receives a real number.
|
||||
const chatContextWindow = parsePositiveInt(provider.chatContextWindow);
|
||||
|
||||
let hasApiKey = false;
|
||||
let hasEmbeddingApiKey = false;
|
||||
let hasSttApiKey = false;
|
||||
@@ -243,6 +264,7 @@ export class AiSettingsService {
|
||||
return {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
chatContextWindow,
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
embeddingModel: provider.embeddingModel,
|
||||
baseUrl: provider.baseUrl,
|
||||
|
||||
@@ -32,6 +32,9 @@ export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
|
||||
export interface AiProviderSettings {
|
||||
driver: AiDriver;
|
||||
chatModel: string;
|
||||
// Max context window in tokens; surfaced to the chat header badge as the
|
||||
// denominator ("current / max"). 0/unset = no limit (badge shows no denominator).
|
||||
chatContextWindow?: number;
|
||||
// Chat provider implementation for the `openai` driver. Unset → defaults to
|
||||
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
@@ -72,6 +75,7 @@ export interface AiProviderSettings {
|
||||
export const PROVIDER_SETTINGS_KEYS = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatContextWindow',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
@@ -98,6 +102,9 @@ export const PROVIDER_SETTINGS_KEYS = [
|
||||
export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens; surfaced to the chat header badge as the
|
||||
// "current / max" denominator. 0/unset = no limit.
|
||||
chatContextWindow?: number;
|
||||
// Cheap model id for the public-share assistant; reuses the chat creds.
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts (empty/unset
|
||||
@@ -116,6 +123,9 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
export interface MaskedAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
// Max context window in tokens; the chat header badge denominator. 0/unset =
|
||||
// no limit.
|
||||
chatContextWindow?: number;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { IsIn, IsOptional, IsString } from 'class-validator';
|
||||
import { IsIn, IsInt, IsOptional, IsString, Min } from 'class-validator';
|
||||
import {
|
||||
AI_DRIVERS,
|
||||
AiDriver,
|
||||
@@ -25,6 +25,13 @@ export class UpdateAiSettingsDto {
|
||||
@IsString()
|
||||
chatModel?: string;
|
||||
|
||||
// Max context window in tokens shown in the chat header badge. 0/empty =
|
||||
// clear the limit (no denominator shown).
|
||||
@IsOptional()
|
||||
@IsInt()
|
||||
@Min(0)
|
||||
chatContextWindow?: number;
|
||||
|
||||
@IsOptional()
|
||||
@IsIn(CHAT_API_STYLES)
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
|
||||
Reference in New Issue
Block a user