Compare commits
13 Commits
fix/ai-str
...
fix/ai-cha
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df81851eb3 | ||
|
|
4597183a1e | ||
|
|
5aa199660d | ||
|
|
bf2ebb9d47 | ||
|
|
ad90e2290e | ||
| e262f1695c | |||
|
|
c065e26d14 | ||
|
|
91e7335d54 | ||
|
|
b0faa2fe32 | ||
|
|
d1fbcc1bfa | ||
|
|
6edbbab43b | ||
|
|
59190148db | ||
| 80a4b5a1b0 |
@@ -142,6 +142,13 @@ MCP_DOCMOST_PASSWORD=
|
||||
# provider is eventually broken instead of leaking forever. Default 900000 (15 min).
|
||||
# AI_STREAM_TIMEOUT_MS=900000
|
||||
|
||||
# Keep-alive recycle window (ms) for streaming chat/agent AI + external-MCP calls.
|
||||
# A pooled connection idle longer than this is closed instead of reused, so a
|
||||
# NAT / egress firewall / reverse proxy that silently drops idle connections
|
||||
# cannot poison a reused socket into a PRE-RESPONSE `read ECONNRESET`. Lower it if
|
||||
# your egress drops idle connections faster than ~10s. Default 10000 (10 s).
|
||||
# AI_STREAM_KEEPALIVE_MS=10000
|
||||
|
||||
# --- Anonymous public-share AI assistant ---
|
||||
# Opt-in per workspace (AI settings -> "public share assistant"; off by default).
|
||||
# When enabled, anonymous visitors of a published share can ask an AI about that
|
||||
|
||||
13
CHANGELOG.md
13
CHANGELOG.md
@@ -25,9 +25,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
flagging dangling references, empty or duplicate definitions, and `[^id]`
|
||||
markers inside table rows, so an agent can fix its own markup. The page is
|
||||
still created; the field is omitted when there are no problems. (#166)
|
||||
- **AI chat "Protocol" setting (`chatApiStyle`).** A new admin choice in AI
|
||||
settings for the `openai` driver: `openai-compatible` (default) routes chat
|
||||
through `@ai-sdk/openai-compatible`, which surfaces a provider's streamed
|
||||
reasoning (`reasoning_content` → reasoning parts) for z.ai/GLM, DeepSeek,
|
||||
OpenRouter, etc.; `openai` uses the official provider (real-OpenAI
|
||||
reasoning-model request shaping). Chosen explicitly rather than inferred from
|
||||
the base URL, since a custom URL can front real OpenAI too. (#175, #177)
|
||||
|
||||
### Changed
|
||||
|
||||
- **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
|
||||
For the `openai` driver the chat provider defaults to the openai-compatible
|
||||
implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
|
||||
model's reasoning out of the box. An endpoint that is real OpenAI behind a
|
||||
custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177)
|
||||
|
||||
- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the
|
||||
same id are ONE footnote — one number, one definition, several back-references
|
||||
— instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are
|
||||
|
||||
@@ -1307,5 +1307,9 @@
|
||||
"Page tree (child pages, recursive)": "Page tree (child pages, recursive)",
|
||||
"Render the full nested tree of all descendant pages": "Render the full nested tree of all descendant pages",
|
||||
"Showing {{count}} subpages_one": "Showing {{count}} subpage",
|
||||
"Showing {{count}} subpages_other": "Showing {{count}} subpages"
|
||||
"Showing {{count}} subpages_other": "Showing {{count}} subpages",
|
||||
"Protocol": "Protocol",
|
||||
"How chat requests are sent and how reasoning is surfaced": "How chat requests are sent and how reasoning is surfaced",
|
||||
"OpenAI-compatible (surfaces reasoning)": "OpenAI-compatible (surfaces reasoning)",
|
||||
"OpenAI (official)": "OpenAI (official)"
|
||||
}
|
||||
|
||||
@@ -1160,5 +1160,9 @@
|
||||
"Render the full nested tree of all descendant pages": "Показать полное вложенное дерево всех дочерних страниц",
|
||||
"Showing {{count}} subpages_one": "Показано {{count}} подстраница",
|
||||
"Showing {{count}} subpages_few": "Показано {{count}} подстраницы",
|
||||
"Showing {{count}} subpages_many": "Показано {{count}} подстраниц"
|
||||
"Showing {{count}} subpages_many": "Показано {{count}} подстраниц",
|
||||
"Protocol": "Протокол",
|
||||
"How chat requests are sent and how reasoning is surfaced": "Как отправляются запросы чата и как показывается reasoning",
|
||||
"OpenAI-compatible (surfaces reasoning)": "OpenAI-совместимый (показывает reasoning)",
|
||||
"OpenAI (official)": "OpenAI (официальный)"
|
||||
}
|
||||
|
||||
@@ -80,17 +80,31 @@ function computeInitialGeom() {
|
||||
Math.min(DEFAULT_HEIGHT, window.innerHeight - 2 * EDGE_MARGIN),
|
||||
);
|
||||
const left = Math.max(EDGE_MARGIN, window.innerWidth - width - 24);
|
||||
const maxTop = Math.max(EDGE_MARGIN, window.innerHeight - height - EDGE_MARGIN);
|
||||
const maxTop = Math.max(
|
||||
EDGE_MARGIN,
|
||||
window.innerHeight - height - EDGE_MARGIN,
|
||||
);
|
||||
const top = Math.min(60, maxTop);
|
||||
return { left, top, width, height };
|
||||
}
|
||||
|
||||
// Clamp a geometry so the window stays within the current viewport.
|
||||
function clampGeom(g: { left: number; top: number; width: number; height: number }) {
|
||||
function clampGeom(g: {
|
||||
left: number;
|
||||
top: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}) {
|
||||
const effWidth = Math.max(g.width, MIN_WIDTH);
|
||||
const effHeight = Math.max(g.height, MIN_HEIGHT);
|
||||
const maxLeft = Math.max(EDGE_MARGIN, window.innerWidth - effWidth - EDGE_MARGIN);
|
||||
const maxTop = Math.max(EDGE_MARGIN, window.innerHeight - effHeight - EDGE_MARGIN);
|
||||
const maxLeft = Math.max(
|
||||
EDGE_MARGIN,
|
||||
window.innerWidth - effWidth - EDGE_MARGIN,
|
||||
);
|
||||
const maxTop = Math.max(
|
||||
EDGE_MARGIN,
|
||||
window.innerHeight - effHeight - EDGE_MARGIN,
|
||||
);
|
||||
return {
|
||||
...g,
|
||||
left: Math.min(Math.max(EDGE_MARGIN, g.left), maxLeft),
|
||||
@@ -151,9 +165,14 @@ export default function AiChatWindow() {
|
||||
// Live snapshot of the active thread's useChat state, kept up to date by
|
||||
// ChatThread. Lets the export include the in-progress (not-yet-persisted)
|
||||
// streaming turn. A ref avoids re-rendering this window on every token.
|
||||
const liveThreadRef = useRef<{ messages: UIMessage[]; isStreaming: boolean }>({
|
||||
const liveThreadRef = useRef<{
|
||||
messages: UIMessage[];
|
||||
isStreaming: boolean;
|
||||
banner: string | null;
|
||||
}>({
|
||||
messages: [],
|
||||
isStreaming: false,
|
||||
banner: null,
|
||||
});
|
||||
|
||||
// Live turn-token total (reasoning + output) for the in-flight turn, pushed up
|
||||
@@ -161,6 +180,12 @@ export default function AiChatWindow() {
|
||||
// `null` means no turn is in flight -> the badge falls back to the persisted
|
||||
// context size below.
|
||||
const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
|
||||
// Whether the on-screen thread currently holds at least one message. Reported
|
||||
// reactively by ChatThread (the live snapshot lives in a non-reactive ref). This
|
||||
// lets the "Copy chat" button stay available for a brand-new, not-yet-persisted
|
||||
// chat whose first turn is in flight or was interrupted — that case has no
|
||||
// persisted rows yet, so a persisted-rows-only gate would hide the button (#174).
|
||||
const [hasLiveContent, setHasLiveContent] = useState(false);
|
||||
|
||||
// The page the user is currently viewing. AiChatWindow lives in a pathless
|
||||
// parent layout route, so useParams() can't see :pageSlug. Match the full
|
||||
@@ -185,8 +210,12 @@ export default function AiChatWindow() {
|
||||
// The invalidate closures are passed inline: `onTurnFinished` is read live by
|
||||
// useChat's onFinish (never in an effect dep array), so their identity does not
|
||||
// matter — no memoization ceremony needed.
|
||||
const { threadKey, waitingForHistory, onTurnFinished, cancelPendingAdoption } =
|
||||
useChatSession({
|
||||
const {
|
||||
threadKey,
|
||||
waitingForHistory,
|
||||
onTurnFinished,
|
||||
cancelPendingAdoption,
|
||||
} = useChatSession({
|
||||
activeChatId,
|
||||
setActiveChatId,
|
||||
chats,
|
||||
@@ -231,13 +260,23 @@ export default function AiChatWindow() {
|
||||
() => chats?.items?.find((c) => c.id === activeChatId) ?? null,
|
||||
[chats, activeChatId],
|
||||
);
|
||||
const canExport = !!activeChatId && !!messageRows && messageRows.length > 0;
|
||||
// Export is available when there is anything to export: either persisted rows
|
||||
// for the active chat, OR a live on-screen thread with at least one message.
|
||||
// The live arm covers a brand-new chat whose first turn is streaming or was
|
||||
// interrupted before the server persisted any row (#174); the persisted arm is
|
||||
// the steady-state path for an already-saved chat (#160).
|
||||
const canExport =
|
||||
hasLiveContent ||
|
||||
(!!activeChatId && !!messageRows && messageRows.length > 0);
|
||||
|
||||
// The role to display in the header and as the assistant's name. Prefer the
|
||||
// persisted role of an existing chat (chat-list JOIN); fall back to the role
|
||||
// picked via a card click for a brand-new or just-adopted chat. selectChat
|
||||
// resets selectedRoleId, so this fallback never leaks into an unrelated chat.
|
||||
const currentRole = useMemo<{ name: string; emoji: string | null } | null>(() => {
|
||||
const currentRole = useMemo<{
|
||||
name: string;
|
||||
emoji: string | null;
|
||||
} | null>(() => {
|
||||
if (activeChat?.roleName) {
|
||||
return { name: activeChat.roleName, emoji: activeChat.roleEmoji ?? null };
|
||||
}
|
||||
@@ -249,28 +288,44 @@ export default function AiChatWindow() {
|
||||
// call) and copy it to the clipboard. The "Copied" notification is the
|
||||
// feedback.
|
||||
const handleCopy = useCallback(() => {
|
||||
if (!activeChatId || !messageRows || messageRows.length === 0) return;
|
||||
// While the active thread is streaming, the current user message and the
|
||||
// in-progress assistant reply are NOT yet in messageRows (the persisted
|
||||
// query is only refetched after the turn finishes). Pull the live tail —
|
||||
// messages whose id is not among the persisted rows — and append them,
|
||||
// flagging the streaming assistant message as still generating.
|
||||
// Export gate. There must be SOMETHING to export — either a live on-screen
|
||||
// message or a persisted row. A brand-new chat whose first turn is streaming
|
||||
// or was interrupted has live messages but no persisted rows yet; it still
|
||||
// exports the on-screen thread WYSIWYG (#174). Only a truly empty chat (no
|
||||
// live messages and no rows) is non-exportable (the button is hidden too —
|
||||
// see `canExport`).
|
||||
const live = liveThreadRef.current;
|
||||
const rowIds = new Set(messageRows.map((r) => r.id));
|
||||
const pending = live.isStreaming
|
||||
? live.messages
|
||||
.filter((m) => !rowIds.has(m.id))
|
||||
.map((m) => ({
|
||||
role: m.role,
|
||||
parts: (m.parts ?? []) as { type: string; text?: string }[],
|
||||
generating: m.role === "assistant",
|
||||
}))
|
||||
: [];
|
||||
const hasRows = !!messageRows && messageRows.length > 0;
|
||||
if (live.messages.length === 0 && !hasRows) return;
|
||||
// WYSIWYG export: the live on-screen messages ARE the document (so a partial
|
||||
// reply from an interrupted turn — which never reached the persisted rows —
|
||||
// is exported just as it appears). The persisted rows enrich each live
|
||||
// message (token usage / error / timestamp) by id and serve as the fallback
|
||||
// when the live mirror is empty. The on-screen banner is appended too. See
|
||||
// issues #160 and #174. `chatId` may be null for a not-yet-saved chat — use a
|
||||
// placeholder so the header line still renders.
|
||||
const markdown = buildChatMarkdown({
|
||||
title: activeChat?.title ?? null,
|
||||
chatId: activeChatId,
|
||||
chatId: activeChatId ?? "unsaved",
|
||||
live: live.messages.map((m) => ({
|
||||
id: m.id,
|
||||
role: m.role,
|
||||
parts: (m.parts ?? []) as { type: string; text?: string }[],
|
||||
metadata: m.metadata as
|
||||
| {
|
||||
usage?: {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
| undefined,
|
||||
})),
|
||||
rows: messageRows,
|
||||
pending,
|
||||
isStreaming: live.isStreaming,
|
||||
banner: live.banner,
|
||||
t,
|
||||
});
|
||||
clipboard.copy(markdown);
|
||||
@@ -351,7 +406,8 @@ export default function AiChatWindow() {
|
||||
const width = el.offsetWidth;
|
||||
const height = el.offsetHeight;
|
||||
setGeom((prev) => {
|
||||
if (!prev || (prev.width === width && prev.height === height)) return prev;
|
||||
if (!prev || (prev.width === width && prev.height === height))
|
||||
return prev;
|
||||
return { ...prev, width, height };
|
||||
});
|
||||
});
|
||||
@@ -497,11 +553,15 @@ export default function AiChatWindow() {
|
||||
flash a "0" badge before any token streams in (#151 review). */}
|
||||
{liveTurnTokens !== null && liveTurnTokens > 0 ? (
|
||||
<Tooltip label={t("Tokens generated this turn")} withArrow>
|
||||
<span className={classes.badge}>{formatTokens(liveTurnTokens)}</span>
|
||||
<span className={classes.badge}>
|
||||
{formatTokens(liveTurnTokens)}
|
||||
</span>
|
||||
</Tooltip>
|
||||
) : contextTokens > 0 ? (
|
||||
<Tooltip label={t("Current context size")} withArrow>
|
||||
<span className={classes.badge}>{formatTokens(contextTokens)}</span>
|
||||
<span className={classes.badge}>
|
||||
{formatTokens(contextTokens)}
|
||||
</span>
|
||||
</Tooltip>
|
||||
) : null}
|
||||
</div>
|
||||
@@ -515,7 +575,11 @@ export default function AiChatWindow() {
|
||||
aria-label={t("Copy chat")}
|
||||
onClick={handleCopy}
|
||||
>
|
||||
{clipboard.copied ? <IconCheck size={14} /> : <IconCopy size={14} />}
|
||||
{clipboard.copied ? (
|
||||
<IconCheck size={14} />
|
||||
) : (
|
||||
<IconCopy size={14} />
|
||||
)}
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
@@ -623,6 +687,7 @@ export default function AiChatWindow() {
|
||||
onTurnFinished={onTurnFinished}
|
||||
liveStateRef={liveThreadRef}
|
||||
onLiveTurnTokens={setLiveTurnTokens}
|
||||
onLiveContentChange={setHasLiveContent}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -73,13 +73,25 @@ interface ChatThreadProps {
|
||||
* "Copy chat" export can include the in-progress, not-yet-persisted
|
||||
* assistant message. A ref (not state) avoids re-rendering the parent on
|
||||
* every streamed delta. */
|
||||
liveStateRef?: MutableRefObject<{ messages: UIMessage[]; isStreaming: boolean }>;
|
||||
liveStateRef?: MutableRefObject<{
|
||||
messages: UIMessage[];
|
||||
isStreaming: boolean;
|
||||
banner: string | null;
|
||||
}>;
|
||||
/** Reports the live turn-token total (reasoning + output) for the in-flight
|
||||
* turn so the parent can show a header badge that ticks mid-stream. THROTTLED
|
||||
* here (~8 Hz) so the parent re-renders a handful of times a second, not on
|
||||
* every streamed delta. Called with `null` when no turn is in flight (the
|
||||
* parent then reverts the badge to the persisted context size). */
|
||||
onLiveTurnTokens?: (tokens: number | null) => void;
|
||||
/** Reports whether the live thread currently holds at least one message, so the
|
||||
* parent can gate the "Copy chat" button on the on-screen thread rather than on
|
||||
* the persisted rows alone. This stays truthy for a brand-new, not-yet-saved
|
||||
* chat the moment its first user message appears — so an interrupted very first
|
||||
* turn (no persisted rows yet) is still exportable (#174). Called with `false`
|
||||
* on unmount so a thread torn down by `key` on chat switch can't leave the
|
||||
* button enabled for the next, possibly empty, chat. */
|
||||
onLiveContentChange?: (hasContent: boolean) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -125,6 +137,7 @@ export default function ChatThread({
|
||||
onTurnFinished,
|
||||
liveStateRef,
|
||||
onLiveTurnTokens,
|
||||
onLiveContentChange,
|
||||
}: ChatThreadProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
@@ -309,18 +322,49 @@ export default function ChatThread({
|
||||
if (isStreaming) setStopNotice(null);
|
||||
}, [isStreaming]);
|
||||
|
||||
// Classify the turn error into a heading + detail so the banner names the cause
|
||||
// (connection reset, timeout, rate limit, context overflow, quota, ...) instead
|
||||
// of a generic "Something went wrong". Computed here (not only in the JSX) so
|
||||
// the SAME on-screen banner text can be mirrored into the export (issue #160).
|
||||
const errorView = error ? describeChatError(error.message ?? "", t) : null;
|
||||
|
||||
// The exact banner the user sees under the message list, flattened to a single
|
||||
// string for the "Copy chat" export so the artifact records the interruption
|
||||
// WYSIWYG. Mirrors the JSX precedence below: error first, else the stop notice.
|
||||
const banner = errorView
|
||||
? errorView.detail
|
||||
? `${errorView.title} — ${errorView.detail}`
|
||||
: errorView.title
|
||||
: stopNotice === "manual"
|
||||
? t("Response stopped.")
|
||||
: stopNotice === "disconnect"
|
||||
? t("Connection lost — the answer was interrupted.")
|
||||
: null;
|
||||
|
||||
// Mirror the live useChat snapshot into the parent-owned ref so the export
|
||||
// (handled in AiChatWindow) can include the in-progress streaming turn. The
|
||||
// cleanup clears the ref on unmount so a thread torn down by `key` on chat
|
||||
// switch can't leak its (possibly still-streaming) tail into the next chat's
|
||||
// export before the new thread's effect repopulates the ref.
|
||||
// (handled in AiChatWindow) can include the in-progress streaming turn AND the
|
||||
// on-screen banner. The cleanup clears the ref on unmount so a thread torn down
|
||||
// by `key` on chat switch can't leak its (possibly still-streaming) tail into
|
||||
// the next chat's export before the new thread's effect repopulates the ref.
|
||||
useEffect(() => {
|
||||
if (!liveStateRef) return;
|
||||
liveStateRef.current = { messages, isStreaming };
|
||||
liveStateRef.current = { messages, isStreaming, banner };
|
||||
return () => {
|
||||
liveStateRef.current = { messages: [], isStreaming: false };
|
||||
liveStateRef.current = { messages: [], isStreaming: false, banner: null };
|
||||
};
|
||||
}, [liveStateRef, messages, isStreaming]);
|
||||
}, [liveStateRef, messages, isStreaming, banner]);
|
||||
|
||||
// Reactively report "the live thread has content" to the parent. `liveStateRef`
|
||||
// above is a ref (deliberately non-reactive so streaming deltas don't re-render
|
||||
// the parent), so the export button needs a SEPARATE reactive signal to flip on
|
||||
// for a not-yet-persisted chat. Keyed on the boolean only — identical values are
|
||||
// a no-op setState in the parent, so this does not add per-delta re-renders.
|
||||
const hasLiveContent = messages.length > 0;
|
||||
useEffect(() => {
|
||||
if (!onLiveContentChange) return;
|
||||
onLiveContentChange(hasLiveContent);
|
||||
return () => onLiveContentChange(false);
|
||||
}, [onLiveContentChange, hasLiveContent]);
|
||||
|
||||
// Report the live turn-token total to the parent header badge, THROTTLED to
|
||||
// ~8 Hz so the parent re-renders a few times a second instead of on every
|
||||
@@ -343,8 +387,7 @@ export default function ChatThread({
|
||||
return;
|
||||
}
|
||||
const tail = messages[messages.length - 1];
|
||||
const live =
|
||||
tail?.role === "assistant" ? liveTurnTokens(tail) : null;
|
||||
const live = tail?.role === "assistant" ? liveTurnTokens(tail) : null;
|
||||
const total = live ? live.reasoning + live.output : 0;
|
||||
const now = Date.now();
|
||||
const MIN_INTERVAL = 120; // ms (~8 Hz)
|
||||
@@ -370,11 +413,6 @@ export default function ChatThread({
|
||||
};
|
||||
}, []);
|
||||
|
||||
// Classify the turn error into a heading + detail so the banner names the cause
|
||||
// (connection reset, timeout, rate limit, context overflow, quota, ...) instead
|
||||
// of a generic "Something went wrong".
|
||||
const errorView = error ? describeChatError(error.message ?? "", t) : null;
|
||||
|
||||
// A role was picked with autoStart=false: the role is bound but NOTHING was
|
||||
// sent, so chatId stays null and the empty state would keep showing the cards.
|
||||
// This flag hides the cards and reveals the composer (with the role indicated)
|
||||
|
||||
@@ -6,7 +6,6 @@ import MessageItem from "@/features/ai-chat/components/message-item.tsx";
|
||||
import TypingIndicator from "@/features/ai-chat/components/typing-indicator.tsx";
|
||||
import { isToolPart, toolRunState, ToolUiPart } from "@/features/ai-chat/utils/tool-parts.tsx";
|
||||
import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts";
|
||||
import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
||||
import classes from "@/features/ai-chat/components/ai-chat.module.css";
|
||||
|
||||
interface MessageListProps {
|
||||
@@ -51,7 +50,9 @@ const BOTTOM_THRESHOLD = 40;
|
||||
* assistant message's LAST part is not live output:
|
||||
* - the last message is still the user's (assistant hasn't started a row), or
|
||||
* - the assistant row has no parts yet, or
|
||||
* - its last part is an empty/whitespace text part, or
|
||||
* - its last part is an empty/whitespace text part, or a finished ("done")
|
||||
* text part while the turn continues (the model paused after some narration
|
||||
* and is thinking about its next step), or
|
||||
* - its last part is a finished/errored tool (the model is thinking about the
|
||||
* next step between tool calls).
|
||||
* It hides only while output is actively rendering: a non-empty streaming text
|
||||
@@ -65,7 +66,19 @@ export function showTypingIndicator(messages: UIMessage[], isStreaming: boolean)
|
||||
const lastPart = last.parts[last.parts.length - 1];
|
||||
if (!lastPart) return true; // assistant row exists but has no parts yet.
|
||||
// The answer text is actively streaming in -> MessageItem renders it; no dots.
|
||||
if (lastPart.type === "text" && lastPart.text.trim().length > 0) return false;
|
||||
// Only while it is STILL streaming, though: once a non-empty text part is
|
||||
// finalized ("done") but the turn is still in flight, the model has paused
|
||||
// after some narration and is working on its next step (e.g. about to call a
|
||||
// tool) — nothing is visibly progressing, so the dots must show. A text part
|
||||
// without a `state` is treated as still-rendering (kept suppressed); this
|
||||
// branch only runs while streaming, where live parts always carry a state.
|
||||
if (
|
||||
lastPart.type === "text" &&
|
||||
lastPart.text.trim().length > 0 &&
|
||||
(lastPart as { state?: "streaming" | "done" }).state !== "done"
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
// A tool still in flight shows its own Loader in ToolCallCard -> no dots.
|
||||
if (
|
||||
isToolPart(lastPart.type) &&
|
||||
@@ -95,19 +108,6 @@ export function typingIndicatorShowsName(messages: UIMessage[]): boolean {
|
||||
return !assistantMessageHasVisibleContent(last);
|
||||
}
|
||||
|
||||
/**
|
||||
* The live thinking-token count to show on the standalone typing indicator. It
|
||||
* is the reasoning split of the tail assistant message (estimate while streaming,
|
||||
* authoritative once the server attaches usage at a step/turn boundary). Returns
|
||||
* 0 when the turn has produced no reasoning yet — the indicator then shows the
|
||||
* plain "Thinking…" line.
|
||||
*/
|
||||
export function tailThinkingTokens(messages: UIMessage[]): number {
|
||||
const last = messages[messages.length - 1];
|
||||
if (!last || last.role !== "assistant") return 0;
|
||||
return liveTurnTokens(last).reasoning;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scrollable transcript. Auto-scrolls to the newest message as it streams in,
|
||||
* but only while the user is pinned to the bottom — if they scrolled up to read
|
||||
@@ -208,7 +208,6 @@ export default function MessageList({
|
||||
<TypingIndicator
|
||||
assistantName={assistantName}
|
||||
showName={typingIndicatorShowsName(messages)}
|
||||
thinkingTokens={tailThinkingTokens(messages)}
|
||||
/>
|
||||
)}
|
||||
</Stack>
|
||||
|
||||
@@ -82,4 +82,14 @@ describe("showTypingIndicator", () => {
|
||||
showTypingIndicator([msg("assistant", [doneTool, text])], true),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("shows while streaming after a text part is finalized (paused before the next step)", () => {
|
||||
const doneText = { type: "text", text: "Now creating the page in", state: "done" } as unknown as UIMessage["parts"][number];
|
||||
expect(showTypingIndicator([msg("assistant", [doneText])], true)).toBe(true);
|
||||
});
|
||||
|
||||
it("hides while a text part is actively streaming (state: streaming)", () => {
|
||||
const streamingText = { type: "text", text: "Now writ", state: "streaming" } as unknown as UIMessage["parts"][number];
|
||||
expect(showTypingIndicator([msg("assistant", [streamingText])], true)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
import { tailThinkingTokens } from "@/features/ai-chat/components/message-list.tsx";
|
||||
|
||||
/**
|
||||
* Pure-helper tests for `tailThinkingTokens`: the live thinking-token count the
|
||||
* standalone typing indicator shows. It is the reasoning split of the tail
|
||||
* assistant message (estimate while streaming, authoritative once usage arrives).
|
||||
*/
|
||||
const msg = (
|
||||
role: "user" | "assistant",
|
||||
parts: unknown[],
|
||||
metadata?: unknown,
|
||||
): UIMessage =>
|
||||
({ id: Math.random().toString(), role, parts, metadata }) as UIMessage;
|
||||
|
||||
describe("tailThinkingTokens", () => {
|
||||
it("is 0 when there are no messages", () => {
|
||||
expect(tailThinkingTokens([])).toBe(0);
|
||||
});
|
||||
|
||||
it("is 0 when the tail message is the user's", () => {
|
||||
expect(tailThinkingTokens([msg("user", [{ type: "text", text: "q" }])])).toBe(0);
|
||||
});
|
||||
|
||||
it("is 0 when the assistant has produced no reasoning yet", () => {
|
||||
expect(
|
||||
tailThinkingTokens([msg("assistant", [{ type: "text", text: "answer" }])]),
|
||||
).toBe(0);
|
||||
});
|
||||
|
||||
it("estimates reasoning tokens from streamed reasoning text", () => {
|
||||
// 8 chars -> 2 tokens.
|
||||
expect(
|
||||
tailThinkingTokens([
|
||||
msg("assistant", [{ type: "reasoning", text: "12345678" }]),
|
||||
]),
|
||||
).toBe(2);
|
||||
});
|
||||
|
||||
it("uses authoritative usage.reasoningTokens once the server attaches it", () => {
|
||||
expect(
|
||||
tailThinkingTokens([
|
||||
msg("assistant", [{ type: "reasoning", text: "x" }], {
|
||||
usage: { outputTokens: 100, reasoningTokens: 42 },
|
||||
}),
|
||||
]),
|
||||
).toBe(42);
|
||||
});
|
||||
});
|
||||
@@ -16,12 +16,6 @@ interface TypingIndicatorProps {
|
||||
* assistant row above already shows the same name, to avoid a duplicate label.
|
||||
*/
|
||||
showName?: boolean;
|
||||
/**
|
||||
* Live thinking/reasoning token count for the in-flight turn. When > 0 the
|
||||
* typing line becomes `Thinking… · {count} tokens` (like Claude Code). Omitted
|
||||
* / 0 keeps the plain `Thinking…` line.
|
||||
*/
|
||||
thinkingTokens?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -32,23 +26,20 @@ interface TypingIndicatorProps {
|
||||
*
|
||||
* Mirrors the assistant row layout in MessageItem (the dimmed label), so it reads
|
||||
* as the assistant's bubble taking shape. The dimmed label uses the configured
|
||||
* identity name when provided (otherwise the generic "AI agent"), while the
|
||||
* typing line is always the generic "Thinking…" (it never includes the
|
||||
* role/identity name).
|
||||
* identity name when provided (otherwise the generic "AI agent"); below it the
|
||||
* animated dots stand in for the nascent bubble until content arrives.
|
||||
*/
|
||||
export default function TypingIndicator({ assistantName, showName = true, thinkingTokens }: TypingIndicatorProps) {
|
||||
export default function TypingIndicator({ assistantName, showName = true }: TypingIndicatorProps) {
|
||||
const { t } = useTranslation();
|
||||
const name = resolveAssistantName(assistantName);
|
||||
// Show the running thinking-token count only once there is something to count.
|
||||
const thinkingLine =
|
||||
thinkingTokens && thinkingTokens > 0
|
||||
? t("Thinking… · {{count}} tokens", { count: thinkingTokens })
|
||||
: t("Thinking…");
|
||||
|
||||
return (
|
||||
<Box className={classes.messageRow}>
|
||||
{showName !== false && (
|
||||
<Text size="xs" c="dimmed" mb={4}>
|
||||
// Extra bottom gap (vs MessageItem's mb={4}) gives the small bouncing
|
||||
// dots room below the name label; without it they crowd the label. Only
|
||||
// applies when the name is shown — the nameless case spaces fine on its own.
|
||||
<Text size="xs" c="dimmed" mb={8}>
|
||||
{name ?? t("AI agent")}
|
||||
</Text>
|
||||
)}
|
||||
@@ -58,9 +49,6 @@ export default function TypingIndicator({ assistantName, showName = true, thinki
|
||||
<span />
|
||||
<span />
|
||||
</span>
|
||||
<Text size="sm" c="dimmed">
|
||||
{thinkingLine}
|
||||
</Text>
|
||||
</Group>
|
||||
</Box>
|
||||
);
|
||||
|
||||
@@ -165,7 +165,9 @@ describe("buildChatMarkdown — tool parts", () => {
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("**Tool: Ran tool mysteryTool** (`mysteryTool`) — error");
|
||||
expect(md).toContain(
|
||||
"**Tool: Ran tool mysteryTool** (`mysteryTool`) — error",
|
||||
);
|
||||
expect(md).toContain("**Error:** boom");
|
||||
});
|
||||
|
||||
@@ -307,7 +309,9 @@ describe("buildChatMarkdown — token totals", () => {
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: { usage: { inputTokens: 3, outputTokens: 4, totalTokens: 99 } },
|
||||
metadata: {
|
||||
usage: { inputTokens: 3, outputTokens: 4, totalTokens: 99 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
@@ -367,125 +371,377 @@ describe("buildChatMarkdown — token totals", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — pending / in-progress messages", () => {
|
||||
it("continues the heading numbering after the persisted rows", () => {
|
||||
// A minimal on-screen (live) message, matching the subset buildChatMarkdown reads.
|
||||
function live(partial: {
|
||||
id?: string;
|
||||
role?: string;
|
||||
parts?: { type: string; text?: string }[];
|
||||
metadata?: { usage?: Record<string, number>; error?: string };
|
||||
}) {
|
||||
return {
|
||||
id: partial.id ?? "live-id",
|
||||
role: partial.role ?? "assistant",
|
||||
parts: partial.parts ?? [],
|
||||
metadata: partial.metadata,
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildChatMarkdown — live (WYSIWYG) source", () => {
|
||||
it("uses the live messages as the document (what's on screen), numbered from 1", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "persisted" })],
|
||||
pending: [
|
||||
{
|
||||
// Persisted rows hold only the user turn; the assistant reply is live-only.
|
||||
rows: [row({ id: "u1", role: "user", content: "persisted user" })],
|
||||
live: [
|
||||
live({
|
||||
id: "u1",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "live question" }],
|
||||
generating: false,
|
||||
},
|
||||
{
|
||||
parts: [{ type: "text", text: "on-screen user" }],
|
||||
}),
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "live answer" }],
|
||||
generating: true,
|
||||
},
|
||||
parts: [{ type: "text", text: "on-screen reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("## 2. You");
|
||||
expect(md).toContain("## 3. AI agent");
|
||||
expect(md).toContain("live question");
|
||||
expect(md).toContain("live answer");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("on-screen user");
|
||||
expect(md).toContain("on-screen reply");
|
||||
// Message count reflects the LIVE document, not rows + live.
|
||||
expect(md).toContain("- Messages: 2");
|
||||
});
|
||||
|
||||
it("flags a generating assistant pending message as still being generated", () => {
|
||||
it("captures a partial reply from an interrupted (non-streaming) turn — no 'generating' note", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "persisted" })],
|
||||
pending: [
|
||||
{
|
||||
rows: [row({ id: "u1", role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({
|
||||
id: "a-live",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "partial reply" }],
|
||||
generating: true,
|
||||
},
|
||||
parts: [{ type: "text", text: "partial plan before the drop" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false, // the stream dropped — not streaming anymore
|
||||
banner: "Connection lost — the answer was interrupted.",
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("partial reply");
|
||||
expect(md).toContain("still being generated");
|
||||
// The partial assistant answer that was on screen IS in the export.
|
||||
expect(md).toContain("partial plan before the drop");
|
||||
// It is NOT flagged still-generating (the turn is over, just interrupted).
|
||||
expect(md).not.toContain("still being generated");
|
||||
// The on-screen banner is recorded at the end.
|
||||
expect(md).toContain("Connection lost — the answer was interrupted.");
|
||||
});
|
||||
|
||||
it("renders a non-generating user pending message without the note", () => {
|
||||
it("flags ONLY the tail assistant as still generating, and only while streaming", () => {
|
||||
const streaming = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "a",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "done earlier" }],
|
||||
}),
|
||||
live({
|
||||
id: "u",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "next q" }],
|
||||
}),
|
||||
live({
|
||||
id: "b",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "streaming now" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
// Exactly one "still being generated" note (the tail assistant).
|
||||
expect(streaming.match(/still being generated/g)?.length).toBe(1);
|
||||
|
||||
const idle = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "b",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "final" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(idle).not.toContain("still being generated");
|
||||
});
|
||||
|
||||
it("does NOT flag a completed assistant as generating when the streaming tail is a user message", () => {
|
||||
// The `status === "submitted"` window: the user just sent, isStreaming is
|
||||
// already true, but the new assistant turn has no message yet so the tail is
|
||||
// the USER message. The previous assistant answer is complete on screen and
|
||||
// must not be marked still-generating (WYSIWYG; regression for #160 review).
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "persisted" })],
|
||||
pending: [
|
||||
{
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "a",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "completed answer" }],
|
||||
}),
|
||||
live({
|
||||
id: "u",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "my live message" }],
|
||||
generating: false,
|
||||
},
|
||||
parts: [{ type: "text", text: "the new question" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("my live message");
|
||||
expect(md).toContain("completed answer");
|
||||
expect(md).not.toContain("still being generated");
|
||||
});
|
||||
|
||||
it("includes the pending messages in the metadata message count", () => {
|
||||
it("emits the heading + note for a streaming tail assistant with empty parts", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({ role: "user", content: "a" }),
|
||||
row({ role: "assistant", content: "b" }),
|
||||
],
|
||||
pending: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "c" }],
|
||||
generating: false,
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "d" }],
|
||||
generating: true,
|
||||
},
|
||||
],
|
||||
t,
|
||||
});
|
||||
// 2 persisted rows + 2 pending = 4.
|
||||
expect(md).toContain("- Messages: 4");
|
||||
});
|
||||
|
||||
it("emits the heading and note for a generating assistant with empty parts", () => {
|
||||
expect(() =>
|
||||
buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "persisted" })],
|
||||
pending: [
|
||||
{
|
||||
role: "assistant",
|
||||
parts: [],
|
||||
generating: true,
|
||||
},
|
||||
],
|
||||
t,
|
||||
}),
|
||||
).not.toThrow();
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "persisted" })],
|
||||
pending: [
|
||||
{
|
||||
role: "assistant",
|
||||
parts: [],
|
||||
generating: true,
|
||||
},
|
||||
rows: [row({ id: "u1", role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({ id: "a-live", role: "assistant", parts: [] }),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("still being generated");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — live enrichment from persisted rows", () => {
|
||||
it("pulls usage / error / timestamp from the persisted row matched by id", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
createdAt: "2026-06-22T10:00:00.000Z",
|
||||
metadata: {
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
error: "rate limited",
|
||||
},
|
||||
}),
|
||||
],
|
||||
live: [
|
||||
// Same id as the persisted row, but no usage/error/timestamp on the live msg.
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("reply");
|
||||
// Token footer + total come from the enriched row.
|
||||
expect(md).toContain("_Tokens — in: 10, out: 5, total: 15_");
|
||||
expect(md).toContain("- Total tokens: 15");
|
||||
expect(md).toContain("**⚠️ Error:** rate limited");
|
||||
// The persisted timestamp is carried into the export.
|
||||
expect(md).toContain("<!-- 2026-06-22T10:00:00.000Z -->");
|
||||
});
|
||||
|
||||
it("prefers authoritative usage already on the live message over the row's", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: {
|
||||
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
live: [
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "reply" }],
|
||||
metadata: {
|
||||
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
// The live (authoritative, freshest) usage wins, not the stale row usage.
|
||||
expect(md).toContain("- Total tokens: 150");
|
||||
expect(md).not.toContain("- Total tokens: 2");
|
||||
});
|
||||
|
||||
it("a current-turn live message with no matching row renders without a footer", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ id: "u1", role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({
|
||||
id: "a-live",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "fresh reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("fresh reply");
|
||||
// No persisted row for the live assistant -> no token footer, no timestamp.
|
||||
expect(md).not.toContain("_Tokens —");
|
||||
expect(md).not.toContain("<!-- undefined -->");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — fallback + banner", () => {
|
||||
it("falls back to the persisted rows when there are no live messages", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({ role: "user", content: "from rows" }),
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "answer",
|
||||
metadata: { usage: { inputTokens: 4, outputTokens: 6 } },
|
||||
}),
|
||||
],
|
||||
live: [], // empty live mirror -> fallback path
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("from rows");
|
||||
expect(md).toContain("- Messages: 2");
|
||||
expect(md).toContain("- Total tokens: 10");
|
||||
});
|
||||
|
||||
it("appends the on-screen banner once, after the messages", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
],
|
||||
isStreaming: false,
|
||||
banner: "Rate limit reached — try again shortly.",
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("_⚠️ Rate limit reached — try again shortly._");
|
||||
// Banner comes after the (only) message block.
|
||||
expect(md.indexOf("Rate limit reached")).toBeGreaterThan(
|
||||
md.indexOf("## 1."),
|
||||
);
|
||||
});
|
||||
|
||||
it("omits the banner block when there is no banner", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
],
|
||||
isStreaming: false,
|
||||
banner: null,
|
||||
t,
|
||||
});
|
||||
expect(md).not.toContain("_⚠️");
|
||||
});
|
||||
});
|
||||
|
||||
// #174: a brand-new, not-yet-persisted chat whose first turn is streaming (or was
|
||||
// interrupted) has live messages but NO persisted rows yet, and its chat id is not
|
||||
// known (the caller passes a placeholder). The export must still capture the
|
||||
// on-screen thread WYSIWYG from the live messages alone.
|
||||
describe("buildChatMarkdown — first-turn export with no persisted base (#174)", () => {
|
||||
it("builds the document from live messages alone when rows are empty", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: null,
|
||||
chatId: "unsaved",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "u1",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "hello" }],
|
||||
}),
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "partial reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
// Both on-screen messages are serialized, numbered from 1.
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("hello");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("partial reply");
|
||||
// The streaming tail assistant is flagged as in-progress.
|
||||
expect(md).toContain("still being generated");
|
||||
// The placeholder chat id and the live message count are recorded.
|
||||
expect(md).toContain("- Chat ID: `unsaved`");
|
||||
expect(md).toContain("- Messages: 2");
|
||||
// No persisted timestamp exists for a current-turn live message.
|
||||
expect(md).not.toContain("<!--");
|
||||
});
|
||||
|
||||
it("captures an interrupted first turn (no rows, not streaming) without a generating note", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: null,
|
||||
chatId: "unsaved",
|
||||
rows: [],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "half an answer" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
banner: "Connection dropped — the response was cut off.",
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("half an answer");
|
||||
// An interrupted (non-streaming) partial is exported as-is, no generating note.
|
||||
expect(md).not.toContain("still being generated");
|
||||
// The on-screen banner records the interruption.
|
||||
expect(md).toContain("_⚠️ Connection dropped — the response was cut off._");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -25,11 +25,23 @@ type Translate = (key: string, values?: Record<string, unknown>) => string;
|
||||
interface BuildChatMarkdownArgs {
|
||||
title: string | null;
|
||||
chatId: string;
|
||||
/** The live, on-screen messages — the WYSIWYG source of the export. When
|
||||
* present and non-empty these DRIVE the document (so it mirrors exactly what
|
||||
* the user sees, including a partial reply from an interrupted turn). Each is
|
||||
* matched to a persisted row by `id` to enrich it with token usage / error /
|
||||
* timestamp. When absent or empty the builder falls back to `rows`. */
|
||||
live?: LiveMessage[];
|
||||
/** Persisted message rows. Enrichment source (matched to `live` by id) AND the
|
||||
* fallback document source when `live` is empty. */
|
||||
rows: IAiChatMessageRow[];
|
||||
/** In-progress, not-yet-persisted live messages (the current streaming
|
||||
* turn) to append after the persisted rows. `generating: true` adds a
|
||||
* note that the message is still being produced. */
|
||||
pending?: PendingMessage[];
|
||||
/** Whether the live thread is still streaming. Only then is the tail assistant
|
||||
* message flagged "still generating"; an interrupted (non-streaming) partial
|
||||
* reply is exported as-is and the `banner` explains the interruption. */
|
||||
isStreaming?: boolean;
|
||||
/** The on-screen banner text (error / dropped connection / manual stop),
|
||||
* appended at the end of the export so the artifact records the interruption
|
||||
* the user saw. */
|
||||
banner?: string | null;
|
||||
t: Translate;
|
||||
}
|
||||
|
||||
@@ -39,10 +51,31 @@ interface TextLikePart {
|
||||
text?: string;
|
||||
}
|
||||
|
||||
/** A live, not-yet-persisted message (current streaming turn) to append. */
|
||||
interface PendingMessage {
|
||||
/** Authoritative per-turn usage the server attaches to a message / row. */
|
||||
interface UsageLike {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
/** A live, on-screen message (subset of the AI SDK UIMessage we consume). */
|
||||
interface LiveMessage {
|
||||
id: string;
|
||||
role: "user" | "assistant" | string;
|
||||
parts: TextLikePart[];
|
||||
metadata?: { usage?: UsageLike; error?: string };
|
||||
}
|
||||
|
||||
/** One message normalized for rendering, regardless of live/persisted origin. */
|
||||
interface ExportItem {
|
||||
role: string;
|
||||
parts: TextLikePart[];
|
||||
usage?: UsageLike;
|
||||
error?: string;
|
||||
/** ISO timestamp from the persisted row, when one is known. */
|
||||
createdAt?: string;
|
||||
/** True only for the tail assistant message while the thread is streaming. */
|
||||
generating: boolean;
|
||||
}
|
||||
|
||||
@@ -127,53 +160,128 @@ function renderMessageParts(parts: TextLikePart[], t: Translate): string[] {
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
|
||||
* single text part built from the plain-text content (mirrors `rowToUiMessage`). */
|
||||
function rowParts(row: IAiChatMessageRow): TextLikePart[] {
|
||||
return Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
|
||||
? (row.metadata.parts as TextLikePart[])
|
||||
: [{ type: "text", text: row.content ?? "" }];
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize the export to one ordered list of {@link ExportItem}, WYSIWYG-first:
|
||||
*
|
||||
* - When `live` messages are present, THEY are the document (what the user sees,
|
||||
* incl. an interrupted turn's partial reply). Each is matched to a persisted
|
||||
* row by `id` to pull token usage / error / timestamp — a live message of the
|
||||
* CURRENT turn has no matching row yet, so it simply renders without a footer.
|
||||
* Authoritative `usage`/`error` already on the live message metadata win over
|
||||
* the row (the server attaches usage to the streamed message at a step
|
||||
* boundary before the row is refetched). Only the tail assistant message is
|
||||
* flagged `generating`, and only while `isStreaming`.
|
||||
* - When `live` is empty (e.g. the export runs before the live mirror is
|
||||
* populated), fall back to the persisted `rows` so the format never regresses.
|
||||
*/
|
||||
function resolveItems(
|
||||
live: LiveMessage[] | undefined,
|
||||
rows: IAiChatMessageRow[],
|
||||
isStreaming: boolean,
|
||||
): ExportItem[] {
|
||||
if (live && live.length > 0) {
|
||||
const rowsById = new Map(rows.map((r) => [r.id, r]));
|
||||
// The "still generating" note may apply ONLY to an assistant message that is
|
||||
// the actual TAIL of the list — that is where the on-screen typing indicator
|
||||
// sits. While `status === "submitted"` (isStreaming true) right after the
|
||||
// user hit send, the tail is the USER message and the new assistant turn has
|
||||
// no message yet; the previous assistant answer is shown complete on screen,
|
||||
// so it must NOT be flagged (the indicator renders as a separate bottom
|
||||
// block, not on that answer).
|
||||
const lastIndex = live.length - 1;
|
||||
const tailIsStreamingAssistant =
|
||||
isStreaming && live[lastIndex]?.role === "assistant";
|
||||
return live.map((m, i) => {
|
||||
const row = rowsById.get(m.id);
|
||||
return {
|
||||
role: m.role,
|
||||
parts: m.parts ?? [],
|
||||
// Authoritative usage/error already on the live message (the server
|
||||
// attaches usage to the streamed message at a step boundary) wins over
|
||||
// the persisted row; a current-turn live message has no matching row yet
|
||||
// and simply renders without a token footer (the accepted WYSIWYG
|
||||
// tradeoff — an interrupted turn loses only its token footer, not text).
|
||||
usage: m.metadata?.usage ?? row?.metadata?.usage,
|
||||
error: m.metadata?.error ?? row?.metadata?.error ?? undefined,
|
||||
createdAt: row?.createdAt,
|
||||
generating: tailIsStreamingAssistant && i === lastIndex,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
return rows.map((row) => ({
|
||||
role: row.role,
|
||||
parts: rowParts(row),
|
||||
usage: row.metadata?.usage,
|
||||
error: row.metadata?.error ?? undefined,
|
||||
createdAt: row.createdAt,
|
||||
generating: false,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the
|
||||
* export timestamp), so it is straightforward to unit-test.
|
||||
*/
|
||||
export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
|
||||
const { title, chatId, rows, pending, t } = args;
|
||||
const { title, chatId, live, rows, isStreaming, banner, t } = args;
|
||||
const blocks: string[] = [];
|
||||
|
||||
const items = resolveItems(live, rows, isStreaming === true);
|
||||
|
||||
const heading = (title ?? "").trim() || t("Untitled chat");
|
||||
blocks.push(`# ${heading}`);
|
||||
|
||||
// Metadata bullet list. Total tokens is only shown when there is a sum.
|
||||
const totalTokens = rows.reduce((sum, row) => {
|
||||
const usage = row.metadata?.usage;
|
||||
return usage ? sum + rowTokens(usage) : sum;
|
||||
}, 0);
|
||||
const totalTokens = items.reduce(
|
||||
(sum, item) => (item.usage ? sum + rowTokens(item.usage) : sum),
|
||||
0,
|
||||
);
|
||||
const meta = [
|
||||
`- Chat ID: \`${chatId}\``,
|
||||
`- Exported: ${new Date().toISOString()}`,
|
||||
`- Messages: ${rows.length + (pending?.length ?? 0)}`,
|
||||
`- Messages: ${items.length}`,
|
||||
];
|
||||
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
|
||||
blocks.push(meta.join("\n"));
|
||||
|
||||
rows.forEach((row, index) => {
|
||||
items.forEach((item, index) => {
|
||||
blocks.push("---");
|
||||
|
||||
const roleLabel = row.role === "assistant" ? t("AI agent") : t("You");
|
||||
const roleLabel = item.role === "assistant" ? t("AI agent") : t("You");
|
||||
blocks.push(`## ${index + 1}. ${roleLabel}`);
|
||||
|
||||
// Created-at kept in source as an HTML comment (out of the rendered prose).
|
||||
blocks.push(`<!-- ${row.createdAt} -->`);
|
||||
// A live message of the current turn has no persisted row yet — omit it.
|
||||
if (item.createdAt) blocks.push(`<!-- ${item.createdAt} -->`);
|
||||
|
||||
// Resolve parts: prefer the rich persisted parts, else a single text part
|
||||
// built from the plain-text content (mirrors `rowToUiMessage`).
|
||||
const parts: TextLikePart[] =
|
||||
Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
|
||||
? (row.metadata.parts as TextLikePart[])
|
||||
: [{ type: "text", text: row.content ?? "" }];
|
||||
blocks.push(...renderMessageParts(item.parts, t));
|
||||
|
||||
blocks.push(...renderMessageParts(parts, t));
|
||||
|
||||
if (row.metadata?.error) {
|
||||
blocks.push(`**⚠️ Error:** ${row.metadata.error}`);
|
||||
// A generating assistant may have empty/no parts yet — the heading (above)
|
||||
// and this note still record the in-progress turn.
|
||||
if (item.generating) {
|
||||
blocks.push(
|
||||
"_⏳ This message is still being generated — the export captured a partial, in-progress response._",
|
||||
);
|
||||
}
|
||||
|
||||
const usage = row.metadata?.usage;
|
||||
// A persisted per-message error (the raw provider text) may coexist with the
|
||||
// trailing `banner` (the classified on-screen alert) when the failed turn's
|
||||
// row has already been refetched by export time. They describe the same
|
||||
// failure at different fidelity; showing both is an accepted, minor redundancy.
|
||||
if (item.error) {
|
||||
blocks.push(`**⚠️ Error:** ${item.error}`);
|
||||
}
|
||||
|
||||
const usage = item.usage;
|
||||
if (usage) {
|
||||
const total = usage.totalTokens ?? rowTokens(usage);
|
||||
// Reasoning (thinking) tokens are shown only when the provider reported a
|
||||
@@ -188,27 +296,12 @@ export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
|
||||
}
|
||||
});
|
||||
|
||||
// Append the in-progress, not-yet-persisted live messages (the current
|
||||
// streaming turn) after the persisted rows. Heading numbering CONTINUES from
|
||||
// the persisted rows. A `generating` assistant gets a note that the captured
|
||||
// response is partial; pending messages carry no usage/token footer yet.
|
||||
(pending ?? []).forEach((message, p) => {
|
||||
// Record the on-screen banner (error / dropped connection / manual stop) so
|
||||
// the export reflects exactly what the user saw, including an interruption.
|
||||
if (banner && banner.trim().length > 0) {
|
||||
blocks.push("---");
|
||||
|
||||
const num = rows.length + p + 1;
|
||||
const roleLabel = message.role === "assistant" ? t("AI agent") : t("You");
|
||||
blocks.push(`## ${num}. ${roleLabel}`);
|
||||
|
||||
blocks.push(...renderMessageParts(message.parts, t));
|
||||
|
||||
// A generating assistant may have empty/no parts yet — still emit the
|
||||
// heading (above) and this note so the export shows the in-progress turn.
|
||||
if (message.generating === true) {
|
||||
blocks.push(
|
||||
"_⏳ This message is still being generated — the export captured a partial, in-progress response._",
|
||||
);
|
||||
blocks.push(`_⚠️ ${banner.trim()}_`);
|
||||
}
|
||||
});
|
||||
|
||||
// Blank line between blocks so the Markdown renders cleanly.
|
||||
return blocks.join("\n\n");
|
||||
|
||||
@@ -38,6 +38,7 @@ import {
|
||||
AiTestCapability,
|
||||
IAiSettingsUpdate,
|
||||
SttApiStyle,
|
||||
ChatApiStyle,
|
||||
} from "@/features/workspace/services/ai-settings-service.ts";
|
||||
import { useAiRolesQuery } from "@/features/ai-chat/queries/ai-chat-query.ts";
|
||||
import { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
|
||||
@@ -82,6 +83,8 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
|
||||
// (empty means "leave unchanged" unless explicitly cleared).
|
||||
const formSchema = z.object({
|
||||
chatModel: z.string(),
|
||||
// Chat provider implementation (reasoning surfacing). Default openai-compatible.
|
||||
chatApiStyle: z.enum(["openai-compatible", "openai"]),
|
||||
// Cheap model id for the anonymous public-share assistant; empty = use chatModel.
|
||||
publicShareChatModel: z.string(),
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
@@ -308,6 +311,7 @@ export default function AiProviderSettings() {
|
||||
validate: zod4Resolver(formSchema),
|
||||
initialValues: {
|
||||
chatModel: "",
|
||||
chatApiStyle: "openai-compatible" as ChatApiStyle,
|
||||
publicShareChatModel: "",
|
||||
publicShareAssistantRoleId: "",
|
||||
embeddingModel: "",
|
||||
@@ -330,6 +334,7 @@ export default function AiProviderSettings() {
|
||||
if (!settings) return;
|
||||
form.setValues({
|
||||
chatModel: settings.chatModel ?? "",
|
||||
chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
|
||||
publicShareChatModel: settings.publicShareChatModel ?? "",
|
||||
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
|
||||
embeddingModel: settings.embeddingModel ?? "",
|
||||
@@ -359,6 +364,7 @@ export default function AiProviderSettings() {
|
||||
// Everything is OpenAI-compatible.
|
||||
driver: "openai",
|
||||
chatModel: values.chatModel,
|
||||
chatApiStyle: values.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; empty falls
|
||||
// back to chatModel server-side.
|
||||
publicShareChatModel: values.publicShareChatModel,
|
||||
@@ -761,6 +767,24 @@ export default function AiProviderSettings() {
|
||||
{t("Resolves to {{url}}", { url: chatResolved })}
|
||||
</Text>
|
||||
|
||||
<Select
|
||||
mt="sm"
|
||||
label={t("Protocol")}
|
||||
description={t(
|
||||
"How chat requests are sent and how reasoning is surfaced",
|
||||
)}
|
||||
data={[
|
||||
{
|
||||
value: "openai-compatible",
|
||||
label: t("OpenAI-compatible (surfaces reasoning)"),
|
||||
},
|
||||
{ value: "openai", label: t("OpenAI (official)") },
|
||||
]}
|
||||
allowDeselect={false}
|
||||
disabled={isLoading}
|
||||
{...form.getInputProps("chatApiStyle")}
|
||||
/>
|
||||
|
||||
{/* Anonymous public-share assistant: a single master toggle + an
|
||||
optional cheaper model id. Reuses this card's driver/URL/key. */}
|
||||
<Group justify="space-between" align="center" wrap="nowrap" mt="md">
|
||||
|
||||
@@ -9,6 +9,12 @@ export type AiDriver = "openai" | "gemini" | "ollama";
|
||||
// - 'json' -> JSON body with base64-encoded audio (OpenRouter)
|
||||
export type SttApiStyle = "multipart" | "json";
|
||||
|
||||
// Chat provider implementation for the `openai` driver (chosen explicitly):
|
||||
// - 'openai-compatible' -> maps streamed reasoning_content to reasoning parts
|
||||
// (z.ai/GLM, DeepSeek, OpenRouter, ...). Default.
|
||||
// - 'openai' -> official provider; real-OpenAI reasoning-model shaping.
|
||||
export type ChatApiStyle = "openai-compatible" | "openai";
|
||||
|
||||
// Masked AI provider settings returned by the server.
|
||||
// No API key is ever returned; only `hasApiKey` / `hasEmbeddingApiKey` indicate
|
||||
// whether one is stored. `embeddingBaseUrl` is the RAW stored value (empty means
|
||||
@@ -16,6 +22,7 @@ export type SttApiStyle = "multipart" | "json";
|
||||
export interface IAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
// Cheap model id for the anonymous public-share assistant; empty = chatModel.
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
@@ -49,6 +56,7 @@ export interface IAiSettings {
|
||||
export interface IAiSettingsUpdate {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
// built-in locked persona.
|
||||
|
||||
@@ -10,6 +10,29 @@ import {
|
||||
import { ExpressionBuilder, sql } from 'kysely';
|
||||
import { DB, Workspaces } from '@docmost/db/types/db';
|
||||
|
||||
/**
|
||||
* Writable `settings.ai.provider` keys, enforced at this generic SQL layer. This
|
||||
* repo cannot import AI-feature types, so this list is its own copy; a parity
|
||||
* test (ai-provider-settings-keys.spec.ts) asserts it equals
|
||||
* PROVIDER_SETTINGS_KEYS in ai.types so a future drift fails in CI rather than
|
||||
* silently dropping a field at this boundary.
|
||||
*/
|
||||
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
];
|
||||
|
||||
@Injectable()
|
||||
export class WorkspaceRepo {
|
||||
public baseFields: Array<keyof Workspaces> = [
|
||||
@@ -239,9 +262,8 @@ export class WorkspaceRepo {
|
||||
// is a real jsonb object, never a double-encoded string. The CASE self-heals
|
||||
// workspaces whose settings.ai.provider was previously corrupted into an
|
||||
// array/string.
|
||||
const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId'];
|
||||
const entries = Object.entries(provider).filter(
|
||||
([k, v]) => v !== undefined && ALLOWED.includes(k),
|
||||
([k, v]) => v !== undefined && AI_PROVIDER_SETTINGS_ALLOWED.includes(k),
|
||||
);
|
||||
const patch = entries.length
|
||||
? sql`jsonb_build_object(${sql.join(
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
import { validate } from 'class-validator';
|
||||
import { plainToInstance } from 'class-transformer';
|
||||
import { PROVIDER_SETTINGS_KEYS } from './ai.types';
|
||||
import { AI_PROVIDER_SETTINGS_ALLOWED } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { UpdateAiSettingsDto } from './dto/update-ai-settings.dto';
|
||||
|
||||
/**
|
||||
* Drift guard: the writable provider-settings keys are maintained in two layers
|
||||
* that TypeScript cannot cross-check — PROVIDER_SETTINGS_KEYS (ai.types, used by
|
||||
* the settings service) and AI_PROVIDER_SETTINGS_ALLOWED (the generic workspace
|
||||
* repo's SQL boundary). A key missing from the repo copy silently drops the field
|
||||
* on persist (exactly what happened to chatApiStyle), so this asserts they match.
|
||||
*/
|
||||
describe('provider-settings key allowlist parity', () => {
|
||||
it('the repo SQL allowlist equals PROVIDER_SETTINGS_KEYS', () => {
|
||||
expect([...AI_PROVIDER_SETTINGS_ALLOWED].sort()).toEqual(
|
||||
[...PROVIDER_SETTINGS_KEYS].sort(),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
/** DTO validation for the new chatApiStyle field (@IsIn(CHAT_API_STYLES)). */
|
||||
describe('UpdateAiSettingsDto.chatApiStyle', () => {
|
||||
const errorsFor = async (chatApiStyle: unknown) =>
|
||||
validate(plainToInstance(UpdateAiSettingsDto, { chatApiStyle }));
|
||||
|
||||
it('accepts both valid values', async () => {
|
||||
for (const v of ['openai-compatible', 'openai']) {
|
||||
const errs = await errorsFor(v);
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects an unknown value', async () => {
|
||||
const errs = await errorsFor('definitely-not-a-style');
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeDefined();
|
||||
});
|
||||
|
||||
it('accepts the field being omitted (optional)', async () => {
|
||||
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -14,6 +14,8 @@ import {
|
||||
MaskedAiSettings,
|
||||
ResolvedAiConfig,
|
||||
SttApiStyle,
|
||||
ChatApiStyle,
|
||||
PROVIDER_SETTINGS_KEYS,
|
||||
} from './ai.types';
|
||||
|
||||
/**
|
||||
@@ -24,6 +26,7 @@ import {
|
||||
export interface UpdateAiSettingsInput {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
embeddingBaseUrl?: string;
|
||||
@@ -157,6 +160,8 @@ export class AiSettingsService {
|
||||
const config: ResolvedAiConfig = {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; reuses the chat
|
||||
// driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel.
|
||||
publicShareChatModel: provider.publicShareChatModel,
|
||||
@@ -238,6 +243,7 @@ export class AiSettingsService {
|
||||
return {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
embeddingModel: provider.embeddingModel,
|
||||
baseUrl: provider.baseUrl,
|
||||
embeddingBaseUrl: provider.embeddingBaseUrl,
|
||||
@@ -275,20 +281,8 @@ export class AiSettingsService {
|
||||
|
||||
// Persist non-secret provider fields (only those present in the partial).
|
||||
const providerPatch: Partial<AiProviderSettings> = {};
|
||||
for (const key of [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
] as const) {
|
||||
// Single source of truth for the writable provider keys (see ai.types).
|
||||
for (const key of PROVIDER_SETTINGS_KEYS) {
|
||||
if (nonSecret[key] !== undefined) {
|
||||
(providerPatch as Record<string, unknown>)[key] = nonSecret[key];
|
||||
}
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import * as http from 'node:http';
|
||||
import {
|
||||
createStreamingFetch,
|
||||
withPreResponseRetry,
|
||||
streamTimeoutMs,
|
||||
streamKeepAliveMs,
|
||||
streamingDispatcherOptions,
|
||||
isRetryableConnectError,
|
||||
} from './ai-streaming-fetch';
|
||||
|
||||
/**
|
||||
@@ -38,15 +41,54 @@ describe('streamTimeoutMs', () => {
|
||||
}
|
||||
});
|
||||
|
||||
it('applies the timeout to BOTH undici stream timeouts', () => {
|
||||
it('applies the silence timeout + keep-alive recycle window to the dispatcher', () => {
|
||||
delete process.env.AI_STREAM_TIMEOUT_MS;
|
||||
delete process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
expect(streamingDispatcherOptions()).toEqual({
|
||||
headersTimeout: 900_000,
|
||||
bodyTimeout: 900_000,
|
||||
keepAliveTimeout: 10_000,
|
||||
keepAliveMaxTimeout: 10_000,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('streamKeepAliveMs', () => {
|
||||
const ORIG = process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
afterEach(() => {
|
||||
if (ORIG === undefined) delete process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
else process.env.AI_STREAM_KEEPALIVE_MS = ORIG;
|
||||
});
|
||||
|
||||
it('defaults to 10s (recycle idle sockets so a NAT/proxy drop cannot poison reuse)', () => {
|
||||
delete process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
expect(streamKeepAliveMs()).toBe(10_000);
|
||||
});
|
||||
|
||||
it('honours a positive override and ignores invalid/non-positive', () => {
|
||||
process.env.AI_STREAM_KEEPALIVE_MS = '4000';
|
||||
expect(streamKeepAliveMs()).toBe(4000);
|
||||
for (const bad of ['0', '-1', 'x', '']) {
|
||||
process.env.AI_STREAM_KEEPALIVE_MS = bad;
|
||||
expect(streamKeepAliveMs()).toBe(10_000);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('isRetryableConnectError', () => {
|
||||
it('matches connection-level codes on the error or its cause', () => {
|
||||
expect(isRetryableConnectError({ cause: { code: 'ECONNRESET' } })).toBe(true);
|
||||
expect(isRetryableConnectError({ cause: { code: 'UND_ERR_SOCKET' } })).toBe(true);
|
||||
expect(isRetryableConnectError({ code: 'ECONNREFUSED' })).toBe(true);
|
||||
});
|
||||
it('does NOT match aborts / unrelated errors', () => {
|
||||
expect(isRetryableConnectError({ name: 'AbortError', cause: { code: 'ABORT_ERR' } })).toBe(false);
|
||||
expect(isRetryableConnectError({ cause: { code: 'UND_ERR_HEADERS_TIMEOUT' } })).toBe(false);
|
||||
expect(isRetryableConnectError(new Error('plain'))).toBe(false);
|
||||
expect(isRetryableConnectError(undefined)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('createStreamingFetch — against a delayed server', () => {
|
||||
const ORIG = process.env.AI_STREAM_TIMEOUT_MS;
|
||||
let server: http.Server;
|
||||
@@ -110,3 +152,84 @@ describe('createStreamingFetch — against a delayed server', () => {
|
||||
if (code) expect(code).toBe('UND_ERR_HEADERS_TIMEOUT');
|
||||
});
|
||||
});
|
||||
|
||||
describe('withPreResponseRetry', () => {
|
||||
// The retry is the OUTERMOST layer (over the dispatcher-bound streaming fetch),
|
||||
// matching ai.service's withPreResponseRetry(instrument(createStreamingFetch())).
|
||||
// PRE_RESPONSE_CONNECT_RETRIES is 2 -> at most 3 total attempts.
|
||||
const MAX_ATTEMPTS = 3;
|
||||
let server: http.Server;
|
||||
let url: string;
|
||||
let requests = 0;
|
||||
// 'first' resets only the first connection; 'all' resets every connection.
|
||||
let resetMode: 'first' | 'all' = 'first';
|
||||
|
||||
const retryingFetch = () => withPreResponseRetry(createStreamingFetch());
|
||||
|
||||
beforeAll(async () => {
|
||||
server = http.createServer((req, res) => {
|
||||
requests += 1;
|
||||
const shouldReset = resetMode === 'all' || requests === 1;
|
||||
if (shouldReset) {
|
||||
// Reset before any response byte (a poisoned/stale keep-alive socket).
|
||||
const sock = req.socket as import('node:net').Socket & {
|
||||
resetAndDestroy?: () => void;
|
||||
};
|
||||
if (typeof sock.resetAndDestroy === 'function') sock.resetAndDestroy();
|
||||
else sock.destroy();
|
||||
return;
|
||||
}
|
||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
||||
res.end('ok');
|
||||
});
|
||||
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
||||
const addr = server.address() as import('node:net').AddressInfo;
|
||||
url = `http://127.0.0.1:${addr.port}/`;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
requests = 0;
|
||||
resetMode = 'first';
|
||||
});
|
||||
|
||||
it('retries a pre-response reset on a fresh connection and succeeds', async () => {
|
||||
resetMode = 'first';
|
||||
const res = await retryingFetch()(url);
|
||||
expect(res.status).toBe(200);
|
||||
expect(await res.text()).toBe('ok');
|
||||
// first request reset -> retry -> second request served.
|
||||
expect(requests).toBe(2);
|
||||
});
|
||||
|
||||
it('gives up after the retry bound and rethrows the original reset', async () => {
|
||||
resetMode = 'all'; // every attempt resets -> retries exhaust
|
||||
let caught: unknown;
|
||||
try {
|
||||
await retryingFetch()(url);
|
||||
} catch (e) {
|
||||
caught = e;
|
||||
}
|
||||
expect(caught).toBeDefined();
|
||||
// A retryable connection error reached the caller (not swallowed).
|
||||
expect(isRetryableConnectError(caught)).toBe(true);
|
||||
// Bounded: exactly PRE_RESPONSE_CONNECT_RETRIES + 1 attempts hit the server
|
||||
// (pins both the limit and that the final error propagates — guards an
|
||||
// off-by-one or an infinite loop).
|
||||
expect(requests).toBe(MAX_ATTEMPTS);
|
||||
});
|
||||
|
||||
it('does NOT retry an aborted request (no retry storm)', async () => {
|
||||
resetMode = 'all';
|
||||
const ctrl = new AbortController();
|
||||
ctrl.abort();
|
||||
await expect(
|
||||
retryingFetch()(url, { signal: ctrl.signal }),
|
||||
).rejects.toBeDefined();
|
||||
// Pre-aborted: the request never reached the server, so nothing was retried.
|
||||
expect(requests).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -18,41 +18,139 @@ import { Agent } from 'undici';
|
||||
*/
|
||||
const DEFAULT_STREAM_TIMEOUT_MS = 900_000;
|
||||
|
||||
/**
|
||||
* Default keep-alive recycle window (10s). A pooled connection idle longer than
|
||||
* this is CLOSED rather than reused.
|
||||
*
|
||||
* Long agent turns leave gaps of tens of seconds between provider calls (one
|
||||
* call per step; a crawl/search tool runs in between). A NAT / reverse proxy /
|
||||
* conntrack in front of the deployment silently drops an idle connection after
|
||||
* its own timeout; undici, not knowing, then reuses that dead socket and the
|
||||
* next request fails PRE-RESPONSE with `read ECONNRESET` (#175 prod telemetry:
|
||||
* the resets correlate with idleSincePrevCall ~42s, while a direct path to the
|
||||
* provider does NOT reset). Recycling idle sockets well below such a drop window
|
||||
* means a long-gap call opens a fresh connection instead of reusing a stale one.
|
||||
* `keepAliveMaxTimeout` also caps a server-advertised keep-alive so the provider
|
||||
* cannot push the reuse window back up.
|
||||
*/
|
||||
const DEFAULT_STREAM_KEEPALIVE_MS = 10_000;
|
||||
|
||||
/**
|
||||
* How many times to retry a PRE-RESPONSE connection failure (a reset/timeout
|
||||
* before ANY response byte) on a fresh connection. Safe because `fetch()` only
|
||||
* rejects before the Response resolves — a started stream is never replayed.
|
||||
*/
|
||||
const PRE_RESPONSE_CONNECT_RETRIES = 2;
|
||||
|
||||
/** undici cause codes for a connection-level failure that occurred PRE-RESPONSE. */
|
||||
const RETRYABLE_CONNECT_CODES = new Set([
|
||||
'ECONNRESET',
|
||||
'ECONNREFUSED',
|
||||
'EPIPE',
|
||||
'ETIMEDOUT',
|
||||
'UND_ERR_SOCKET',
|
||||
'UND_ERR_CONNECT_TIMEOUT',
|
||||
]);
|
||||
|
||||
function positiveEnv(name: string, fallback: number): number {
|
||||
const raw = Number(process.env[name]);
|
||||
return Number.isFinite(raw) && raw > 0 ? raw : fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* The configured silence timeout (ms). Override with `AI_STREAM_TIMEOUT_MS`; a
|
||||
* missing/invalid/non-positive value falls back to {@link DEFAULT_STREAM_TIMEOUT_MS}.
|
||||
*/
|
||||
export function streamTimeoutMs(): number {
|
||||
const raw = Number(process.env.AI_STREAM_TIMEOUT_MS);
|
||||
return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_STREAM_TIMEOUT_MS;
|
||||
return positiveEnv('AI_STREAM_TIMEOUT_MS', DEFAULT_STREAM_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
/** Keep-alive recycle window (ms). Override with `AI_STREAM_KEEPALIVE_MS`. */
|
||||
export function streamKeepAliveMs(): number {
|
||||
return positiveEnv('AI_STREAM_KEEPALIVE_MS', DEFAULT_STREAM_KEEPALIVE_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* undici `Agent` timeout options for streaming AI traffic — both stream timeouts
|
||||
* set to the (generous, finite) silence timeout. Shared by the chat provider
|
||||
* fetch and the external-MCP dispatcher so they behave identically (#175).
|
||||
* undici `Agent` options for streaming AI traffic — the (generous, finite)
|
||||
* silence timeouts plus the keep-alive recycle window. Shared by the chat
|
||||
* provider fetch and the external-MCP dispatcher so they behave identically.
|
||||
*/
|
||||
export function streamingDispatcherOptions(): {
|
||||
headersTimeout: number;
|
||||
bodyTimeout: number;
|
||||
keepAliveTimeout: number;
|
||||
keepAliveMaxTimeout: number;
|
||||
} {
|
||||
const t = streamTimeoutMs();
|
||||
return { headersTimeout: t, bodyTimeout: t };
|
||||
const ka = streamKeepAliveMs();
|
||||
return {
|
||||
headersTimeout: t,
|
||||
bodyTimeout: t,
|
||||
keepAliveTimeout: ka,
|
||||
keepAliveMaxTimeout: ka,
|
||||
};
|
||||
}
|
||||
|
||||
/** True for a connection-level error worth retrying on a fresh connection. */
|
||||
export function isRetryableConnectError(err: unknown): boolean {
|
||||
const e = err as { code?: string; cause?: { code?: string } } | undefined;
|
||||
const code = e?.cause?.code ?? e?.code;
|
||||
return typeof code === 'string' && RETRYABLE_CONNECT_CODES.has(code);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a `fetch` for long-lived streaming AI calls (the agent chat turn) backed
|
||||
* by a dedicated undici dispatcher whose stream timeouts are the generous-but-
|
||||
* finite silence timeout above (#175). A single shared dispatcher is returned
|
||||
* (callers hold it for the service lifetime) so its connection pool is reused.
|
||||
* by a dedicated undici dispatcher (finite silence timeouts + keep-alive
|
||||
* recycling, #175). A single shared dispatcher is returned (callers hold it for
|
||||
* the service lifetime) so its connection pool is reused.
|
||||
*
|
||||
* This is the BASE transport — no retry. The chat path wraps it as
|
||||
* `withPreResponseRetry(createInstrumentedFetch(ctx, createStreamingFetch()))`
|
||||
* so the retry is the OUTERMOST layer and the instrumentation observes EVERY
|
||||
* attempt (a recovered reset is still logged — see withPreResponseRetry).
|
||||
*/
|
||||
export function createStreamingFetch(): typeof fetch {
|
||||
const dispatcher = new Agent(streamingDispatcherOptions());
|
||||
return ((input: Parameters<typeof fetch>[0], init?: RequestInit) =>
|
||||
fetch(input, {
|
||||
...(init ?? {}),
|
||||
// `dispatcher` is an undici-specific init field (not in the DOM RequestInit
|
||||
// type); Node's global fetch reads it. Cast to satisfy the type.
|
||||
// `dispatcher` is an undici-specific init field (not in the DOM
|
||||
// RequestInit type); Node's global fetch reads it. Cast to satisfy it.
|
||||
dispatcher,
|
||||
} as RequestInit & { dispatcher: Agent })) as typeof fetch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a fetch so a PRE-RESPONSE connection reset (`baseFetch` rejects before the
|
||||
* Response resolves — so nothing has streamed) is retried a few times on a fresh
|
||||
* connection (#175). A poisoned keep-alive socket is destroyed by undici on the
|
||||
* reset, so the retry lands on a new connection. An abort (client disconnect) is
|
||||
* never retried.
|
||||
*
|
||||
* This is the OUTERMOST transport layer by design: composing it as
|
||||
* `withPreResponseRetry(instrumentedFetch)` means every attempt — including the
|
||||
* resets that the retry recovers from — flows through the instrumentation, so the
|
||||
* "PRE-RESPONSE FAILED ... ECONNRESET ... idleSincePrevCall" telemetry stays
|
||||
* visible precisely when the fix is working (and AI_STREAM_KEEPALIVE_MS can be
|
||||
* tuned from real data). A retry INSIDE the transport would hide it.
|
||||
*/
|
||||
export function withPreResponseRetry(baseFetch: typeof fetch): typeof fetch {
|
||||
return (async (input: Parameters<typeof fetch>[0], init?: RequestInit) => {
|
||||
for (let attempt = 0; ; attempt++) {
|
||||
try {
|
||||
return await baseFetch(input, init);
|
||||
} catch (err) {
|
||||
const aborted = init?.signal?.aborted === true;
|
||||
if (
|
||||
aborted ||
|
||||
attempt >= PRE_RESPONSE_CONNECT_RETRIES ||
|
||||
!isRetryableConnectError(err)
|
||||
) {
|
||||
throw err;
|
||||
}
|
||||
// Brief backoff before the fresh-connection retry.
|
||||
await new Promise((resolve) => setTimeout(resolve, 150 * (attempt + 1)));
|
||||
}
|
||||
}
|
||||
}) as typeof fetch;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
// `.provider` alone cannot prove the openai-compatible factory was called with
|
||||
// `includeUsage: true` — a regression dropping it (which zeroes streamed token
|
||||
// usage / reasoning-token metadata) would still pass. So mock the factory and
|
||||
// assert the exact args. jest.mock is module-scoped, hence a dedicated file.
|
||||
|
||||
const mockCompatibleModel = { provider: 'openai-compatible.chat', modelId: 'm' };
|
||||
// jest allows `mock`-prefixed vars inside a jest.mock factory.
|
||||
const mockCreateOpenAICompatible = jest.fn(
|
||||
(_settings: unknown) => () => mockCompatibleModel,
|
||||
);
|
||||
|
||||
jest.mock('@ai-sdk/openai-compatible', () => ({
|
||||
createOpenAICompatible: (settings: unknown) =>
|
||||
mockCreateOpenAICompatible(settings),
|
||||
}));
|
||||
|
||||
import { AiService } from './ai.service';
|
||||
|
||||
describe('AiService.getChatModel openai-compatible factory args', () => {
|
||||
function serviceWith(chatApiStyle?: 'openai-compatible' | 'openai') {
|
||||
const aiSettings = {
|
||||
resolve: jest.fn().mockResolvedValue({
|
||||
driver: 'openai',
|
||||
chatModel: 'glm-5.2',
|
||||
apiKey: 'the-key',
|
||||
baseUrl: 'https://api.z.ai/v4',
|
||||
chatApiStyle,
|
||||
}),
|
||||
};
|
||||
return new AiService(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
aiSettings as any,
|
||||
{ find: jest.fn() } as never,
|
||||
{ decryptSecret: jest.fn() } as never,
|
||||
);
|
||||
}
|
||||
|
||||
beforeEach(() => mockCreateOpenAICompatible.mockClear());
|
||||
|
||||
it('passes includeUsage:true plus baseURL/apiKey/fetch (default style)', async () => {
|
||||
await serviceWith().getChatModel('ws-1'); // unset -> openai-compatible
|
||||
expect(mockCreateOpenAICompatible).toHaveBeenCalledTimes(1);
|
||||
expect(mockCreateOpenAICompatible).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
name: 'openai-compatible',
|
||||
baseURL: 'https://api.z.ai/v4',
|
||||
apiKey: 'the-key',
|
||||
includeUsage: true,
|
||||
fetch: expect.any(Function),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does NOT use the openai-compatible factory for chatApiStyle 'openai'", async () => {
|
||||
await serviceWith('openai').getChatModel('ws-1');
|
||||
expect(mockCreateOpenAICompatible).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -285,3 +285,64 @@ describe('AiService.getChatModel role model override', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Chat provider selection by the EXPLICIT `chatApiStyle` (NOT inferred from
|
||||
* baseUrl): 'openai-compatible' (default) uses @ai-sdk/openai-compatible, which
|
||||
* maps streamed reasoning_content to reasoning parts; 'openai' uses the official
|
||||
* provider; and openai-compatible without a baseURL safely falls back to the
|
||||
* official provider (it has no default endpoint). Asserted via `.provider`.
|
||||
*/
|
||||
describe('AiService.getChatModel chatApiStyle provider selection', () => {
|
||||
function serviceWith(opts: {
|
||||
baseUrl?: string;
|
||||
chatApiStyle?: 'openai-compatible' | 'openai';
|
||||
}) {
|
||||
const aiSettings = {
|
||||
resolve: jest.fn().mockResolvedValue({
|
||||
driver: 'openai',
|
||||
chatModel: 'glm-5.2',
|
||||
apiKey: 'key',
|
||||
baseUrl: opts.baseUrl,
|
||||
chatApiStyle: opts.chatApiStyle,
|
||||
}),
|
||||
};
|
||||
return new AiService(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
aiSettings as any,
|
||||
{ find: jest.fn() } as never,
|
||||
{ decryptSecret: jest.fn() } as never,
|
||||
);
|
||||
}
|
||||
|
||||
const providerOf = async (svc: AiService) =>
|
||||
(
|
||||
(await svc.getChatModel('ws-1')) as { provider: string }
|
||||
).provider;
|
||||
|
||||
it("'openai-compatible' + baseURL -> openai-compatible provider", async () => {
|
||||
expect(
|
||||
await providerOf(
|
||||
serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai-compatible' }),
|
||||
),
|
||||
).toContain('openai-compatible');
|
||||
});
|
||||
|
||||
it("'openai' + baseURL -> official openai provider", async () => {
|
||||
expect(
|
||||
await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai' })),
|
||||
).toBe('openai.chat');
|
||||
});
|
||||
|
||||
it('unset + baseURL -> defaults to openai-compatible', async () => {
|
||||
expect(
|
||||
await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4' })),
|
||||
).toContain('openai-compatible');
|
||||
});
|
||||
|
||||
it("'openai-compatible' WITHOUT baseURL -> safe fallback to official openai", async () => {
|
||||
expect(
|
||||
await providerOf(serviceWith({ chatApiStyle: 'openai-compatible' })),
|
||||
).toBe('openai.chat');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
type LanguageModel,
|
||||
} from 'ai';
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
||||
import { createOllama } from 'ai-sdk-ollama';
|
||||
import { AiSettingsService } from './ai-settings.service';
|
||||
@@ -15,7 +16,10 @@ import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured
|
||||
import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception';
|
||||
import { describeProviderError } from './ai-error.util';
|
||||
import { createInstrumentedFetch } from './ai-provider-http';
|
||||
import { createStreamingFetch } from './ai-streaming-fetch';
|
||||
import {
|
||||
createStreamingFetch,
|
||||
withPreResponseRetry,
|
||||
} from './ai-streaming-fetch';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { SecretBoxService } from '../crypto/secret-box';
|
||||
import { AiDriver } from './ai.types';
|
||||
@@ -45,14 +49,15 @@ export interface ChatModelOverride {
|
||||
export class AiService {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
|
||||
// Provider HTTP fetch for the chat path: the streaming fetch — which RAISES
|
||||
// undici's 300s headers/body timeouts to a generous-but-finite silence timeout
|
||||
// so a long agent turn is not severed mid-stream (#175) — wrapped with the
|
||||
// provider-HTTP instrumentation so the logs observe that exact transport. Held
|
||||
// for the service lifetime to reuse the streaming dispatcher's connection pool.
|
||||
private readonly aiProviderFetch = createInstrumentedFetch(
|
||||
'AiService:provider-http',
|
||||
createStreamingFetch(),
|
||||
// Provider HTTP fetch for the chat path, layered so each transport concern is
|
||||
// observed (#175). Inside-out: the streaming fetch (finite silence timeouts +
|
||||
// keep-alive recycling) → provider-HTTP instrumentation (logs every attempt) →
|
||||
// pre-response connection-reset retry as the OUTERMOST layer. Retry-outer means
|
||||
// a reset the retry recovers from is still logged with its idle-gap, instead of
|
||||
// collapsing into a clean "OK". Held for the service lifetime to reuse the
|
||||
// streaming dispatcher's connection pool.
|
||||
private readonly aiProviderFetch = withPreResponseRetry(
|
||||
createInstrumentedFetch('AiService:provider-http', createStreamingFetch()),
|
||||
);
|
||||
|
||||
constructor(
|
||||
@@ -95,6 +100,10 @@ export class AiService {
|
||||
|
||||
let apiKey = cfg.apiKey;
|
||||
let baseUrl = cfg.baseUrl;
|
||||
// Chat provider implementation, chosen EXPLICITLY by the admin (not inferred
|
||||
// from baseUrl). Unset → 'openai-compatible' so reasoning is surfaced by
|
||||
// default for this fork's openai+baseUrl setups.
|
||||
const chatApiStyle = cfg.chatApiStyle ?? 'openai-compatible';
|
||||
|
||||
// A driver override that differs from the workspace driver needs that
|
||||
// driver's own creds (the workspace driver's key would be wrong/absent).
|
||||
@@ -145,19 +154,41 @@ export class AiService {
|
||||
}
|
||||
|
||||
switch (driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints. Use Chat
|
||||
// Completions (/chat/completions) — the portable OpenAI-compatible
|
||||
// endpoint. The default callable createOpenAI(...)(model) targets the
|
||||
// Responses API (/responses), which OpenAI-compatible gateways
|
||||
// (OpenRouter, etc.) reject on multi-turn requests (history with
|
||||
// assistant messages) → 400. The provider fetch is the instrumented
|
||||
// streaming fetch (finite-but-generous stream timeouts, #175).
|
||||
case 'openai': {
|
||||
// The provider implementation is chosen by the admin's `chatApiStyle`
|
||||
// (NOT inferred from baseUrl — a custom URL can front real OpenAI too).
|
||||
// Both branches hit Chat Completions (/chat/completions); the provider
|
||||
// fetch is the instrumented streaming fetch (finite-but-generous stream
|
||||
// timeouts, #175).
|
||||
//
|
||||
// 'openai-compatible' (default) maps the third-party provider's streamed
|
||||
// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, ...) — the
|
||||
// point of #175. It has no default endpoint, so it requires a baseURL;
|
||||
// when there is none (real OpenAI, or a role's cross-driver override that
|
||||
// cleared baseUrl) we fall back to the official provider.
|
||||
if (chatApiStyle === 'openai-compatible' && baseUrl) {
|
||||
return createOpenAICompatible({
|
||||
name: 'openai-compatible',
|
||||
apiKey,
|
||||
baseURL: baseUrl,
|
||||
// Keep streamed token usage (stream_options.include_usage): without
|
||||
// it @ai-sdk/openai-compatible omits usage, zeroing the live token
|
||||
// counter and reasoning-token metadata. The official provider always
|
||||
// sent it, so this preserves parity.
|
||||
includeUsage: true,
|
||||
fetch: this.aiProviderFetch,
|
||||
})(chatModel);
|
||||
}
|
||||
// Official @ai-sdk/openai: real-OpenAI reasoning-model request shaping;
|
||||
// `.chat()` targets Chat Completions (the default callable targets the
|
||||
// Responses API, which openai-compatible gateways 400 on multi-turn
|
||||
// history). In this fork baseUrl is normally set; undefined = real OpenAI.
|
||||
return createOpenAI({
|
||||
apiKey,
|
||||
baseURL: baseUrl,
|
||||
fetch: this.aiProviderFetch,
|
||||
}).chat(chatModel);
|
||||
}
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({ apiKey })(chatModel);
|
||||
case 'ollama':
|
||||
|
||||
@@ -16,6 +16,15 @@ export const AI_DRIVERS: AiDriver[] = ['openai', 'gemini', 'ollama'];
|
||||
export type SttApiStyle = 'multipart' | 'json';
|
||||
export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json'];
|
||||
|
||||
// Chat provider implementation for the `openai` driver. Chosen explicitly by the
|
||||
// admin (NOT inferred from baseUrl — a custom URL can front real OpenAI too).
|
||||
// 'openai-compatible' = @ai-sdk/openai-compatible: maps streamed
|
||||
// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, OpenRouter, ...).
|
||||
// 'openai' = official @ai-sdk/openai: real-OpenAI reasoning-model request shaping
|
||||
// (max_completion_tokens, the 'developer' role), no third-party reasoning map.
|
||||
export type ChatApiStyle = 'openai-compatible' | 'openai';
|
||||
export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
|
||||
|
||||
/**
|
||||
* Non-secret provider settings persisted under `settings.ai.provider`.
|
||||
* The API key is intentionally absent here.
|
||||
@@ -23,6 +32,9 @@ export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json'];
|
||||
export interface AiProviderSettings {
|
||||
driver: AiDriver;
|
||||
chatModel: string;
|
||||
// Chat provider implementation for the `openai` driver. Unset → defaults to
|
||||
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
// Embedding-specific base URL. Falls back to `baseUrl` when empty/unset.
|
||||
@@ -45,6 +57,34 @@ export interface AiProviderSettings {
|
||||
publicShareAssistantRoleId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* The persisted, non-secret provider setting keys — the SINGLE source of truth
|
||||
* for which fields a settings update may write through to `settings.ai.provider`.
|
||||
* `satisfies readonly (keyof AiProviderSettings)[]` makes the compiler reject a
|
||||
* typo or a key that is not a real provider setting.
|
||||
*
|
||||
* The settings service consumes this directly. The generic workspace repo cannot
|
||||
* import AI types, so it keeps its own copy of the same keys, guarded by a parity
|
||||
* test against this constant (so any future drift fails in CI, not silently in
|
||||
* prod — a missing key there validates fine, passes the service, and is then
|
||||
* dropped at the SQL boundary with no error).
|
||||
*/
|
||||
export const PROVIDER_SETTINGS_KEYS = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
] as const satisfies readonly (keyof AiProviderSettings)[];
|
||||
|
||||
/**
|
||||
* Fully resolved provider config, including the decrypted API key for the
|
||||
* stored driver. Returned by `AiSettingsService.resolve`. The keys are held in
|
||||
@@ -76,6 +116,7 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
export interface MaskedAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
embeddingBaseUrl?: string;
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import { IsIn, IsOptional, IsString } from 'class-validator';
|
||||
import { AI_DRIVERS, AiDriver, STT_API_STYLES, SttApiStyle } from '../ai.types';
|
||||
import {
|
||||
AI_DRIVERS,
|
||||
AiDriver,
|
||||
CHAT_API_STYLES,
|
||||
ChatApiStyle,
|
||||
STT_API_STYLES,
|
||||
SttApiStyle,
|
||||
} from '../ai.types';
|
||||
|
||||
/**
|
||||
* Admin update payload for the workspace AI provider settings.
|
||||
@@ -18,6 +25,10 @@ export class UpdateAiSettingsDto {
|
||||
@IsString()
|
||||
chatModel?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsIn(CHAT_API_STYLES)
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
embeddingModel?: string;
|
||||
|
||||
Reference in New Issue
Block a user