import { ForbiddenException, Injectable, Logger, OnModuleInit, } from '@nestjs/common'; import { FastifyReply } from 'fastify'; import { streamText, generateText, convertToModelMessages, stepCountIs, type UIMessage, type LanguageModel, } from 'ai'; import { AiService } from '../../integrations/ai/ai.service'; import { AiSettingsService } from '../../integrations/ai/ai-settings.service'; import { describeProviderError } from '../../integrations/ai/ai-error.util'; import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo'; import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo'; import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; import { PageAccessService } from '../page/page-access/page-access.service'; import { User, Workspace, AiChatMessage, AiAgentRole, } from '@docmost/db/types/entity.types'; import { AiChatToolsService } from './tools/ai-chat-tools.service'; import { McpClientsService } from './external-mcp/mcp-clients.service'; import { buildSystemPrompt } from './ai-chat.prompt'; import { roleModelOverride } from './roles/role-model-config'; import { startSseHeartbeat, stripStreamingHopByHopHeaders, } from './sse-resilience'; // Max agent steps per turn. One step = one model generation; a step that calls // tools is followed by another step carrying the tool results. Raised from 8 so // multi-search research questions are not cut off mid-investigation. const MAX_AGENT_STEPS = 20; // System-prompt addendum injected ONLY on the final step (see prepareAgentStep). // It forbids further tool calls and tells the model to synthesize the best // answer it can from what it already gathered, so a tool-heavy turn never ends // empty. const FINAL_STEP_INSTRUCTION = 'You have reached the maximum number of tool-use steps for this turn. ' + 'Do NOT call any more tools. Using only the information already gathered, ' + "write the most complete, useful final answer you can now, in the user's " + 'language. If the information is incomplete, say so explicitly: summarize ' + 'what you found, what is still missing, and give your best partial conclusion.'; // Pure, unit-testable: decide per-step overrides. Returns undefined for normal // steps; on the final allowed step forces a text-only synthesis answer. // `system` is the in-scope system prompt; we CONCATENATE so the original // persona/context is preserved — a bare `system` override would REPLACE the // whole system prompt for the step. // // NOTE: at AI SDK v7 the per-step `system` field is renamed to `instructions`. // On v6 (`^6.0.134`) `system` is the correct field — adjust when bumping. export function prepareAgentStep( stepNumber: number, system: string, ): { toolChoice: 'none'; system: string } | undefined { if (stepNumber >= MAX_AGENT_STEPS - 1) { return { toolChoice: 'none', system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`, }; } return undefined; } export { MAX_AGENT_STEPS, FINAL_STEP_INSTRUCTION }; // Pure, unit-testable post-processing for a model-generated title (#199): trim // whitespace, strip a single pair of surrounding quotes the model often adds, // drop a trailing period, and hard-cap the length to the page-title column. export function cleanGeneratedTitle(text: string): string { return text .trim() .replace(/^["']|["']$/g, '') .replace(/\.+$/, '') .trim() .slice(0, 255); } /** * Pure, unit-testable (#198): decide whether THIS turn is an interrupt-resume, * i.e. it directly follows a user interruption of the previous (still-partial) * assistant turn. The client "send now" flag is only a HINT — confirm it against * the just-loaded history so a spoofed/stale flag cannot inject the interrupt * note onto an ordinary turn. * * `history` is the model history oldest -> newest, with the just-inserted user * row as its tail; the turn before it is `history[len-2]`. We treat the new turn * as an interrupt-resume only when the client said so AND the preceding assistant * turn really ended unfinished: 'aborted' (onAbort already finalized it), or * still 'streaming' (onAbort has not finalized yet — the abort/resend race; the * partial output is already in history thanks to the step-granular write path). */ export function isInterruptResume( history: Array<{ role: string; status?: string | null }>, clientInterrupted: boolean | undefined, ): boolean { if (clientInterrupted !== true) return false; const prev = history[history.length - 2]; return ( prev?.role === 'assistant' && (prev.status === 'aborted' || prev.status === 'streaming') ); } /** * Payload accepted from the client `useChat` POST body. We do NOT bind a strict * DTO (the global ValidationPipe whitelist would strip the useChat-specific * fields), so this is a loose shape parsed straight off `req.body`. */ export interface AiChatStreamBody { chatId?: string; // The agent role selected by the client. Honoured ONLY when creating a new // chat (no valid chatId) — it is persisted to ai_chats.role_id and is // immutable afterwards. For existing chats the role is read from the chat row, // never from this field, so it cannot be swapped per-turn. roleId?: string | null; // The page the user is currently viewing (client-supplied), or null on a // non-page route. Used ONLY as prompt context so the agent knows what "this // page" refers to; the page itself is never fetched server-side here. The id // is attacker-controllable but harmless: the agent reads/writes via its // CASL-enforced page tools, which 403 on a page the user cannot access. openPage?: { id?: string; title?: string } | null; // Set by the client "send now" action (#198): this turn immediately follows a // user interruption of the previous turn. A hint only — the server re-confirms // it against persisted history (`isInterruptResume`) before injecting the // interrupt note, so a spoofed/stale flag on an ordinary turn is ignored. interrupted?: boolean; // useChat sends the full UIMessage list; the last one is the new user turn. messages?: UIMessage[]; } /** * Optional run-lifecycle hooks (#184 phase 1). When supplied, the turn is wrapped * in a first-class server-side RUN: `begin` is called once the chat id is known * and returns the run's AbortSignal (decoupled from the HTTP socket — a browser * disconnect no longer governs the abort), and the lifecycle callbacks persist * the run's progress and terminal status. Absent (the default) => the legacy * socket-bound behavior is unchanged. */ export interface AiChatRunHooks { // Called once the chat id is resolved; returns the run handle whose `signal` // drives the agent loop's abort. Returning null disables run tracking (the // turn falls back to the passed-in socket signal). begin( chatId: string, ): Promise<{ runId: string; signal: AbortSignal } | null>; onAssistantSeeded?( runId: string, assistantMessageId: string, ): Promise | void; onStep?(runId: string, stepCount: number): void; onSettled?( runId: string, status: 'completed' | 'error' | 'aborted', error?: string, ): Promise | void; } export interface AiChatStreamArgs { user: User; workspace: Workspace; sessionId: string; body: AiChatStreamBody; res: FastifyReply; signal: AbortSignal; // Run-lifecycle hooks (#184). When present the turn becomes a detached, // durable RUN whose abort is governed by the run (explicit stop), not the // socket; when absent the turn stays socket-bound (legacy behavior). runHooks?: AiChatRunHooks; // Resolved by the controller BEFORE res.hijack(), so an unconfigured provider // (AiNotConfiguredException -> 503) surfaces as clean JSON before streaming. // For a role with a model override this already carries the override-resolved // model (or the controller threw a 503 if the override driver was unconfigured). model: LanguageModel; // The agent role to apply this turn, pre-resolved by the controller from the // chat row (existing chat) or the request body (new chat). null => universal // assistant. Carried here so the turn never re-loads it. role: AiAgentRole | null; } /** * Per-user AI chat orchestration (§6.1/§6.5/§6.7 stage 1). * * Message persistence shape (ai_chat_messages): * - `role` : 'user' | 'assistant' * - `content` : the message's plain text (assistant final text; user text). * The migration column is `text`, so plain text is stored. * - `tool_calls` : jsonb — the assistant's tool steps/calls/results for this * turn (trace; also surfaced in the UI as an action log). * - `metadata` : jsonb — the assistant message's reconstructable UIMessage * `parts` plus finishReason/usage, so multi-turn tool history * can be rebuilt for `convertToModelMessages`. */ @Injectable() export class AiChatService implements OnModuleInit { private readonly logger = new Logger(AiChatService.name); constructor( private readonly ai: AiService, private readonly aiChatRepo: AiChatRepo, private readonly aiChatMessageRepo: AiChatMessageRepo, private readonly aiSettings: AiSettingsService, private readonly tools: AiChatToolsService, private readonly mcpClients: McpClientsService, private readonly aiAgentRoleRepo: AiAgentRoleRepo, private readonly pageRepo: PageRepo, private readonly pageAccess: PageAccessService, ) {} /** * Crash-recovery sweep on server start (#183): any assistant row left in the * 'streaming' state is the relic of a turn whose process died before it * reached a terminal status. Flip those to 'aborted' so history/export show * them settled (with whatever finished steps were already persisted) instead * of perpetually "streaming". Best-effort: a sweep failure is logged but must * never block server startup. */ async onModuleInit(): Promise { try { const swept = await this.aiChatMessageRepo.sweepStreaming(); if (swept > 0) { this.logger.log( `Startup sweep: marked ${swept} dangling 'streaming' assistant ` + `message(s) as 'aborted'.`, ); } } catch (err) { this.logger.warn( `Startup sweep of dangling 'streaming' messages failed: ${ err instanceof Error ? err.message : 'unknown error' }`, ); } } /** * Resolve the agent role that applies to this stream request, scoped to the * workspace and soft-delete aware. For an EXISTING chat the role is read from * `ai_chats.role_id` (authoritative — never from the body). For a NEW chat * (no valid chatId) the role comes from the request body's `roleId`. Returns * null for the universal assistant or when the referenced role is missing / * soft-deleted. */ async resolveRoleForRequest( workspace: Workspace, body: AiChatStreamBody, ): Promise { let roleId: string | null | undefined; if (body.chatId) { const chat = await this.aiChatRepo.findById(body.chatId, workspace.id); // A valid existing chat fixes the role from its own row. if (chat) roleId = chat.roleId; else roleId = body.roleId; // stale chatId => treated as a new chat } else { roleId = body.roleId; } if (!roleId) return null; // A disabled or soft-deleted role falls back to the universal assistant: it // must not apply its persona/model override even to a chat that was bound to // it earlier. findLiveEnabled enforces this (live + enabled + workspace // scope), server-authoritatively, for both the new-chat (body.roleId) and // existing-chat (chat.role_id) paths — the single shared invariant. return ( (await this.aiAgentRoleRepo.findLiveEnabled(roleId, workspace.id)) ?? null ); } /** * Resolve the chat language model for the workspace, applying the role's * optional model override. Exposed so the controller can resolve it BEFORE * res.hijack(): an unconfigured provider (incl. a role pointing at an * unconfigured driver) throws AiNotConfiguredException there and returns a * clean 503 instead of breaking mid-stream. */ getChatModel( workspaceId: string, role?: AiAgentRole | null, ): Promise { return this.ai.getChatModel(workspaceId, roleModelOverride(role)); } /** * Validate the client-supplied open page and return its AUTHORITATIVE identity * ({ id, title }) or null. The client controls BOTH the id and the title in the * request body, so neither is trusted: the id must resolve to a real page in * THIS workspace that the user may read, and the title is taken from the DB row * (never the client) so the model can't be told it is "on Page A" while the id * points at page B (#159). Fail-closed — any missing / foreign / inaccessible * page, or any non-Forbidden access-check fault, returns null. */ private async resolveOpenPageContext( openPage: { id?: string; title?: string } | null | undefined, workspace: Workspace, user: User, ): Promise<{ id: string; title: string } | null> { const candidatePageId = openPage?.id; if (!candidatePageId) return null; const page = await this.pageRepo.findById(candidatePageId); if (!page || page.workspaceId !== workspace.id) return null; try { await this.pageAccess.validateCanView(page, user); } catch (e) { // A ForbiddenException is the expected "user cannot read this page" case; // log anything else (e.g. a DB error) so a real fault is not masked. if (!(e instanceof ForbiddenException)) { this.logger.warn( `open page access check failed: ${ e instanceof Error ? e.message : 'unknown error' }`, ); } return null; } return { id: page.id, title: page.title ?? '' }; } async stream({ user, workspace, sessionId, body, res, signal, model, role, runHooks, }: AiChatStreamArgs): Promise { // Resolve / create the chat. A new chat is created when no valid chatId is // supplied or the supplied one does not belong to this workspace. let isNewChat = false; let chatId = body.chatId; if (chatId) { const existing = await this.aiChatRepo.findById(chatId, workspace.id); if (!existing) { chatId = undefined; } } // The open page the client sent is attacker-controllable — BOTH its id and // its title. Resolve it ONCE against the DB (workspace-scoped + access- // checked) and use the AUTHORITATIVE identity everywhere below: the system // prompt context, the getCurrentPage tool, and the new-chat history origin. // Previously the client title was echoed verbatim, so a navigation / two-tab // desync (openPage.id -> page B, title -> "Page A") made the model report // "updated Page A" while it edited page B (#159). Null when no page is open // or the page is foreign / inaccessible / missing. const openPageContext = await this.resolveOpenPageContext( body.openPage, workspace, user, ); if (!chatId) { // The history-list origin is the validated open page (see above): // persisting an unvalidated id would leak a title via the chat-list join, // or violate the page_id FK on insert (this runs after res.hijack(), so a // DB error would break the stream). const originPageId: string | null = openPageContext?.id ?? null; const chat = await this.aiChatRepo.insert({ creatorId: user.id, workspaceId: workspace.id, // Bind the chat to the resolved role (if any) at creation time. The role // is immutable afterwards (later turns read it from this column). roleId: role?.id ?? null, // Validated above: a real, readable page in this workspace, else null. pageId: originPageId, }); chatId = chat.id; isNewChat = true; } // Start the durable RUN now that the chat id is known (#184 phase 1). The // returned `runId` + `signal` make the turn a first-class server-side object // whose abort is governed by the run (an explicit user stop), NOT by the HTTP // socket — so a browser disconnect no longer ends the turn. With no runHooks // (the default / flag off) the turn stays socket-bound via `signal` and // `runId` is undefined, leaving the legacy path byte-for-byte unchanged. let runId: string | undefined; let effectiveSignal = signal; if (runHooks) { try { const handle = await runHooks.begin(chatId); if (handle) { runId = handle.runId; effectiveSignal = handle.signal; } } catch (err) { // A failed run start must not break the turn — fall back to the socket // signal (legacy behavior) and stream anyway. this.logger.error( `Failed to begin agent run (chat ${chatId}); streaming without run tracking`, err as Error, ); } } // Extract the incoming user turn (the last user message from useChat). const incoming = lastUserMessage(body.messages); const incomingText = uiMessageText(incoming); // Persist the user message before contacting the model. await this.aiChatMessageRepo.insert({ chatId, workspaceId: workspace.id, userId: user.id, role: 'user', content: incomingText, // jsonb column: UIMessage parts are JSON-serializable at runtime but not // structurally `JsonValue`, so cast through unknown. metadata: (incoming?.parts ? { parts: incoming.parts } : null) as never, }); // Rebuild the conversation from persisted history (not the client payload), // so the model always sees the authoritative server-side transcript. Load // the FULL history in chronological order (oldest -> newest, incl. the user // message just inserted above) so NO turns are dropped — there is no // recent-tail window anymore. `findAllByChat` keeps a 5000-row memory-safety // backstop (on overflow it keeps the NEWEST rows and logs a warning); that // is a safety net far above any realistic chat, not a conversational limit. const history = await this.aiChatMessageRepo.findAllByChat( chatId, workspace.id, ); const uiMessages = history.map(rowToUiMessage); // convertToModelMessages is async in ai@6.0.134 (returns Promise). const messages = await convertToModelMessages(uiMessages); // Interrupt-resume detection (#198): the client "send now" flag is only a // hint — confirm it against the persisted history (the preceding assistant // turn must really be aborted/streaming) so a spoofed flag cannot inject the // interrupt note onto an ordinary turn. The partial output the model needs is // already in `messages` (the aborted assistant row replays via findRecent). const interrupted = isInterruptResume(history, body.interrupted); // The model is resolved by the controller before hijack (clean 503 path). // Here we only need the admin-configured system prompt. const resolved = await this.aiSettings.resolve(workspace.id); // Build the external MCP toolset FIRST so the system prompt can carry each // connected server's admin-authored guidance (#180). Merge in admin- // configured external MCP tools (web search, etc.; §6.8). A down/slow // external server never crashes the turn — toolsFor skips it and records the // outcome. The returned client handles MUST be closed in the streamText // lifecycle (onFinish/onError/onAbort) — leaking them is a bug. Docmost // tools take precedence on a name clash (external are namespaced, so a clash // is not expected; the spread order makes intent explicit). let external: Awaited> = { tools: {}, clients: [], outcomes: [], instructions: [], }; try { external = await this.mcpClients.toolsFor(workspace.id); } catch (err) { // Building the external toolset must never break the turn; proceed with // Docmost-only tools. Never log URLs/headers — short message only. this.logger.warn( `External MCP toolset unavailable: ${ err instanceof Error ? err.message : 'unknown error' }`, ); } // Close every external client EXACTLY ONCE across the turn's terminal // callbacks (onFinish/onError/onAbort all fire at most once collectively, // but guard anyway). DEFINED HERE — before the prompt/toolset are built — so // that if buildSystemPrompt or forUser throws AFTER the external lease was // taken (toolsFor above), the lease is still released. Otherwise its refCount // stays >= 1 forever and the external undici sockets leak until restart // (#180 reorder moved toolsFor ahead of these; #185 review). Close errors are // swallowed so they never break the response. let clientsClosed = false; const closeExternalClients = async (): Promise => { if (clientsClosed) return; clientsClosed = true; await Promise.all( external.clients.map((c) => c.close().catch((closeErr) => { this.logger.warn( `Failed to close external MCP client: ${ closeErr instanceof Error ? closeErr.message : 'unknown error' }`, ); }), ), ); }; // Build the system prompt + Docmost toolset. If either throws after the // external MCP lease was taken above, release the lease before rethrowing so // the leased transports are not leaked (#185 review). let system: string; let docmostTools: Awaited>; try { system = buildSystemPrompt({ workspace, adminPrompt: resolved?.systemPrompt, // The role (pre-resolved by the controller) REPLACES the persona layer; // the safety framework is still appended by buildSystemPrompt. roleInstructions: role?.instructions, // Server-validated open page (authoritative title), not the client value. openedPage: openPageContext, // Guidance only for servers that connected and yielded ≥1 callable tool. mcpInstructions: external.instructions, // History-confirmed interrupt-resume flag (#198): adds the interrupt note // so the model treats the partial answer above as cut off, not finished. interrupted, }); // Pass the resolved chatId so the write tools can mint provenance tokens // (access + collab) carrying { actor:'agent', aiChatId: chatId }, making // agent REST/collab writes attributable and non-spoofable (§6.5/§6.6). docmostTools = await this.tools.forUser( user, sessionId, workspace.id, chatId, // Same server-validated open page used by the system prompt above; // exposed to the model via getCurrentPage so page identity (and the // AUTHORITATIVE title) survives prompt mangling / client title spoofing. openPageContext, ); } catch (err) { await closeExternalClients(); throw err; } const tools = { ...external.tools, ...docmostTools }; // Accumulate the turn's streamed output so a provider error / disconnect can // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort // callbacks don't hand us the in-progress text. `capturedSteps` holds finished // steps (tool calls + their text); `inProgressText` holds the text streamed in // the CURRENT, not-yet-finished step, reset whenever a step finishes. const capturedSteps: StepLike[] = []; let inProgressText = ''; // Step-granular durability (#183): create the assistant row UPFRONT in the // 'streaming' state (before any token), then UPDATE it as each step finishes // and finalize it once on the terminal callback. If the process dies // mid-turn the row survives with every finished step already persisted; the // startup sweep (sweepStreaming) later flips a dangling 'streaming' row to // 'aborted'. The DB is now the single source of truth for the turn — the // socket is never required for the write path. A failed upfront insert is // logged and leaves assistantId undefined; the per-step/terminal updates then // no-op (guarded below) so the turn still streams to the user. let assistantId: string | undefined; try { const seed = flushAssistant([], '', 'streaming'); const seeded = await this.aiChatMessageRepo.insert({ chatId, workspaceId: workspace.id, userId: user.id, role: 'assistant', content: seed.content, // jsonb columns: cast through never (same as the user insert above). toolCalls: (seed.toolCalls ?? null) as never, metadata: seed.metadata as never, status: seed.status, }); assistantId = seeded?.id; } catch (err) { this.logger.error( `Failed to insert upfront assistant row (chat ${chatId}, workspace ${workspace.id})`, err as Error, ); } // Link the assistant message (the #183 projection) to its run (#184), so a // reconnecting client can resolve the run's output. Best-effort. if (runId && assistantId) { try { await runHooks?.onAssistantSeeded?.(runId, assistantId); } catch (err) { this.logger.warn( `Failed to link assistant row to run ${runId}: ${ err instanceof Error ? err.message : 'unknown error' }`, ); } } // Per-step (non-terminal) update: persist the finished steps the moment a // step ends. Tolerant — a failed update is logged and swallowed so it never // throws into the stream. Keeps status 'streaming'. const updateStreaming = async (): Promise => { if (!assistantId) return; // Cheap short-circuit once the turn is finalized (see `finalized` below). // The AUTHORITATIVE guard is `onlyIfStreaming` on the UPDATE: a late // fire-and-forget step update could still be in flight on another pool // connection when finalize runs, so the SQL `WHERE status='streaming'` // (not this flag) is what prevents it clobbering the terminal row. if (finalized) return; try { await this.aiChatMessageRepo.update( assistantId, workspace.id, flushAssistant(capturedSteps, '', 'streaming'), { onlyIfStreaming: true }, ); } catch (err) { this.logger.warn( `Failed to update streaming assistant row: ${ err instanceof Error ? err.message : 'unknown error' }`, ); } }; // Serialize the per-step updates (#183 review): onStepFinish fires them // without await, so two could otherwise commit out of order on different pool // connections (step N landing after N+1). Chaining each onto the previous // keeps the persisted row monotonic with step order; each link short-circuits // on `finalized`, so a tail of late updates is cheap. let stepUpdateChain: Promise = Promise.resolve(); // Terminal finalize: write the completed/error/aborted row exactly once // across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort // callbacks — mirroring the pre-#183 persist-at-most-once guard for the // TERMINAL status (the row may be updated many times with 'streaming' before // this fires once). let finalized = false; const finalizeAssistant = async ( flushed: AssistantFlush, ): Promise => { if (finalized) return; finalized = true; const plan = planFinalizeAssistant(assistantId); try { // Shared dispatch (see applyFinalize): UPDATE the upfront row, or — when // the upfront insert failed (kind 'insert') — INSERT the terminal row as // the only safety against losing the turn entirely. await applyFinalize( this.aiChatMessageRepo, plan, { chatId, workspaceId: workspace.id, userId: user.id }, flushed, ); } catch (err) { this.logger.error( `Failed to finalize assistant message (kind=${plan.kind})`, err as Error, ); } }; // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure // first-chunk latency, the model-silent gap right before a disconnect, and // how many SSE heartbeats were written, so a Safari drop can be classified // (idle-gap vs hard wall-clock cap vs slow first chunk). const streamStartedAt = Date.now(); let firstModelChunkAt: number | undefined; let lastModelChunkAt = streamStartedAt; let heartbeatsSent = 0; // NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous // failure here (or in pipe below) would skip the terminal callbacks, so the // catch releases the leased external clients to avoid a connection leak. let result: ReturnType; try { result = streamText({ model, system, messages, tools, // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full // page body for the write tools) are emitted as OUTPUT tokens, so a fixed // cap would truncate complex tool calls mid-argument. Let the model use its // natural per-step budget. (Cost/credit limits are an account concern, not // something to enforce by silently breaking the agent.) stopWhen: stepCountIs(MAX_AGENT_STEPS), // Forced finalization: reserve the LAST allowed step for a text-only // answer. Without this, a turn that spends all its steps on tool calls // ends with no assistant text (an empty turn). prepareAgentStep forbids // further tool calls and appends a synthesis instruction on that step, // concatenated onto the original `system` so the persona is preserved. prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), // #184: the RUN's signal (explicit-stop) when a run wraps this turn, else // the socket-bound signal (legacy). A browser disconnect aborts only in // the legacy path. abortSignal: effectiveSignal, onChunk: ({ chunk }) => { // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model // output chunk means the stream is actively emitting bytes; track first // + most-recent activity timestamps. const now = Date.now(); firstModelChunkAt ??= now; lastModelChunkAt = now; // 'text-delta' is the assistant's prose; tool-call args are separate chunk // types — so this mirrors exactly what streams to the client. if (chunk.type === 'text-delta') inProgressText += chunk.text; }, onStepFinish: (step) => { // The finished step's full text is now in `step.text`; fold it in and reset // the in-progress accumulator for the next step. capturedSteps.push(step as StepLike); inProgressText = ''; // Step-granular durability (#183): persist this finished step (its text + // tool calls + tool RESULTS) the moment it ends, so a process death after // this point still recovers the step. Not awaited here (never block the // stream), but SERIALIZED via stepUpdateChain so the writes commit in // step order; updateStreaming is error-tolerant (logs + swallows). stepUpdateChain = stepUpdateChain.then(() => updateStreaming()); // #184: persist the run's progress (finished-step count). Fire-and- // forget; the hook swallows its own errors. if (runId) runHooks?.onStep?.(runId, capturedSteps.length); }, onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success // baseline for Safari comparison. const diagNow = Date.now(); this.logger.log( `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`, ); // Finalize the assistant row (#183): the upfront 'streaming' row is // UPDATEd to 'completed' with the turn's final text, cumulative usage and // full UIMessage parts. We pass the SDK `steps` (which carry the final // step's text) as the captured steps so metadata.parts matches the // pre-#183 onFinish record exactly; `inProgressText` is '' here (the last // step already finished). Final-step usage (usage.input+output) ≈ the // conversation's CURRENT context size, distinct from totalUsage. // // COLUMN-SEMANTICS NOTE (#183): `content` is built by flushAssistant as // the CONCATENATION of every step's text (stepsText), whereas pre-#183 // it stored only the FINAL step's text. This is a deliberate, harmless // change: the UI and the Markdown export render from `metadata.parts` // (per-step text + tool parts), not from `content`; `content` is the // plain-text projection (full-text search / fallback). A multi-step // turn's `content` therefore now holds all steps' prose, not just the // last block. await finalizeAssistant( flushAssistant(steps as StepLike[], '', 'completed', { finishReason: finishReason as string, usage: totalUsage as StreamUsage, contextTokens: (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || undefined, // Max context window for the chat header badge denominator; // resolved from the admin-configured provider settings (in // closure scope here). Omitted/0 = no limit. maxContextTokens: resolved?.chatContextWindow, }), ); // #184: settle the RUN as succeeded (best-effort, after the projection // is finalized above). if (runId) await runHooks?.onSettled?.(runId, 'completed'); // Lifecycle: release the external MCP clients leased for this turn. await closeExternalClients(); // Generate the chat title for a freshly created chat AFTER the stream's // provider call has completed — NOT concurrently with it. The z.ai coding // endpoint stalls one of two concurrent requests to the same plan, which // black-holed the chat stream (~300s headers timeout) when title // generation raced it. Running it here (solo, fire-and-forget) avoids the // race; never block the turn on it, swallow any error. if (isNewChat && incomingText) { void this.generateTitle(chatId, workspace.id, incomingText).catch( (err) => { this.logger.warn( `Title generation failed: ${(err as Error)?.message ?? err}`, ); }, ); } }, onError: async ({ error }) => { // NestJS Logger.error(message, stack?, context?): pass the real message // (with statusCode when present) + the stack string, not the Error // object, so the actual provider cause is clearly logged. Reuse the // shared formatter so provider error formatting stays unified. const e = error as { stack?: string }; const errorText = describeProviderError(error, String(error)); this.logger.error(`AI chat stream error: ${errorText}`, e?.stack); // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of // an error-terminated stream. const diagNow = Date.now(); this.logger.warn( `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`, ); // Finalize the PARTIAL answer streamed before the failure (text + any // finished tool steps) WITH the error in metadata, so the turn shows what // the user already saw plus the cause — not just a bare error. Status // 'error' (#183). await finalizeAssistant( flushAssistant(capturedSteps, inProgressText, 'error', { error: errorText, }), ); // #184: settle the RUN as failed, carrying the provider/transport cause. if (runId) await runHooks?.onSettled?.(runId, 'error', errorText); await closeExternalClients(); }, onAbort: async ({ steps }) => { const partialChars = capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) + inProgressText.length; // Unlike onError/onFinish, this terminal path otherwise writes nothing, so // an aborted turn (client disconnect / proxy drop / stop()) would be // invisible in the logs. Log it (warn) so the abort is traceable. this.logger.warn( `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` + `step(s), ${partialChars} chars partial text; persisting partial turn.`, ); // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key // line — classifies the Safari drop. const diagNow = Date.now(); this.logger.warn( `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` + `steps=${steps.length}`, ); await finalizeAssistant( flushAssistant(capturedSteps, inProgressText, 'aborted'), ); // #184: settle the RUN as aborted (an explicit user stop reached the // run's signal; a disconnect does not abort a run-wrapped turn). if (runId) await runHooks?.onSettled?.(runId, 'aborted'); await closeExternalClients(); }, }); // Drain the stream independently of the client socket so the turn always // runs to completion (or to its abort) and the terminal callbacks // (onFinish/onError/onAbort) fire — releasing the per-turn object graph // (history, the per-request toolset closures, captured steps, SDK buffers) // and closing leased MCP clients. WITHOUT this, a client disconnect leaves // the pipe's dead socket as the only reader; backpressure stalls the stream, // the callbacks never run, and every dropped turn stays rooted in memory — // the heap-OOM leak. consumeStream removes that backpressure (AI SDK v6 // "Handling client disconnects"). NOT awaited (fire-and-forget); the stream // errors are already logged by the streamText `onError` callback above, so // swallow here to avoid an unhandledRejection. void result.consumeStream({ onError: () => undefined }); // Stream the UI-message protocol straight to the hijacked Node response. // Without onError the AI SDK masks the cause ('An error occurred.') and the // UI shows a generic failure. Surface the real provider message instead. // AI SDK error messages / 4xx bodies never contain the API key, so this is // safe; we never dump the resolved config/apiKey. // // SSE buffering / proxy note: pipeUIMessageStreamToResponse writes the // headers immediately (res.writeHead) and each chunk incrementally, and the // SDK's default UI_MESSAGE_STREAM_HEADERS already include // `x-accel-buffering: no` (disables nginx response buffering) plus // `content-type: text/event-stream` and `cache-control: no-cache`. We pass // `headers` explicitly anyway so the intent is visible here and survives any // future change to the SDK defaults (prepareHeaders only fills a header when // absent, so this never clobbers the SDK's content-type). DEPLOYMENT: the // reverse proxy in front of this server MUST NOT buffer this route, or the // whole response is released at once and nothing streams. nginx honours the // `x-accel-buffering: no` header we send (and additionally set // `proxy_buffering off; proxy_cache off;` for /api/ai-chat/stream); traefik // does not buffer responses by default. // Scrub the SDK's hop-by-hop Connection header before it writes the head (Safari/HTTP2). stripStreamingHopByHopHeaders(res.raw); // Running sum of per-step usage (v6 `finish-step.usage` is per-step). Sent // as the cumulative authoritative usage so the client never jumps DOWN. let cumulativeStepUsage: ChatStreamUsage | undefined; result.pipeUIMessageStreamToResponse(res.raw, { headers: { 'X-Accel-Buffering': 'no' }, // Surface the authoritative chatId on the streamed assistant UI message so // the client adopts the REAL id of the row we created, instead of guessing // the newest chat in its list. `messageMetadata` is invoked by the AI SDK // on the `start`, `finish-step` and `finish` stream parts (ai@6 — note the // `finish-step` trigger relies on it being delivered as its own // message-metadata chunk); we attach `chatId` on the `start` part so it // reaches the client (as message.metadata.chatId) at the very first chunk — // before any second tab can race a newer chat into the list. This fixes the // two-tab "adoption race" (#137). // // `finish-step.usage` is PER-STEP (not cumulative) in v6, and the client // merges each metadata.usage by replacement — so on a multi-step agent turn // (up to MAX_AGENT_STEPS) the naive per-step value would make the live // counter jump DOWN at each boundary. We keep a running sum here and send // the CUMULATIVE usage, which converges to `finish.totalUsage` (#151). messageMetadata: ({ part }) => { const p = part as StreamMetadataPart; if (p.type === 'finish-step') { cumulativeStepUsage = accumulateStepUsage( cumulativeStepUsage, normalizeStreamUsage(p.usage), ); } return chatStreamMetadata(p, chatId, cumulativeStepUsage, runId); }, // Stream reasoning (thinking) parts to the client so the live counter can // estimate reasoning tokens from streamed text. v6 default is already // true; set explicitly so the intent survives any future SDK default // change. Providers that don't emit reasoning text still surface the // count via the authoritative `usage.reasoningTokens` on finish-step. sendReasoning: true, onError: (error: unknown) => { // Reuse the shared formatter so provider error formatting stays // unified between the log line and the streamed error message. return describeProviderError(error, 'AI stream error'); }, }); // Force the status line + headers onto the socket NOW (before the model's // first token), so the proxy sees the response start immediately even if the // provider's first chunk is delayed. writeToServerResponse already called // writeHead synchronously above; flushHeaders is a belt-and-braces no-op once // headers are sent, and is guarded for response-likes that lack it. res.raw.flushHeaders?.(); // Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout). // DIAGNOSTIC (Safari stream-drop investigation) — temporary: count beats so a disconnect log can show // how many pings were written before Safari dropped. startSseHeartbeat(res.raw, 15_000, () => { heartbeatsSent += 1; }); } catch (err) { // Synchronous failure before/while wiring the stream: the terminal // callbacks will not run, so release the leased external clients here and // re-throw for the controller to surface on the socket. await closeExternalClients(); throw err; } } /** * One-shot page-title generation from a note's content (#199). No tools, no * streaming — mirrors generateTitle() but for an arbitrary note body supplied * by the client, and RETURNS the title instead of writing it (the client * applies it via the existing /pages/update route, which enforces edit * permission). The content is truncated to keep the prompt cheap and within * context limits. Throws AiNotConfiguredException (503) if AI is unconfigured. */ async generatePageTitle(workspaceId: string, content: string): Promise { const model = await this.ai.getChatModel(workspaceId); const { text } = await generateText({ model, system: 'You generate a single concise, descriptive title for a note based on ' + 'its content. Reply with the title only — at most 8 words, no quotes, ' + 'no trailing punctuation, written in the same language as the note.', prompt: content.slice(0, 8000), }); return cleanGeneratedTitle(text); } /** * Cheap, non-blocking title generation from the first user message. Uses * generateText (async) and writes the result back onto the chat row. Any * failure is caught by the caller — title is best-effort cosmetic metadata. */ private async generateTitle( chatId: string, workspaceId: string, firstMessage: string, ): Promise { const model = await this.ai.getChatModel(workspaceId); const { text } = await generateText({ model, system: 'Generate a short, descriptive chat title (max 6 words) for the ' + "user's first message. Reply with the title only — no quotes, no " + 'punctuation at the end.', prompt: firstMessage.slice(0, 2000), }); const title = text .trim() .replace(/^["']|["']$/g, '') .slice(0, 120); if (title) { await this.aiChatRepo.update(chatId, { title }, workspaceId); } } } /** Shape of the AI SDK v6 LanguageModelUsage we forward to the client. The SDK * exposes `reasoningTokens` both as a (deprecated) top-level field and under * `outputTokenDetails.reasoningTokens`; we normalize to a single field so the * client gets one stable usage shape regardless of provider/SDK version. */ interface StreamUsage { inputTokens?: number; outputTokens?: number; totalTokens?: number; reasoningTokens?: number; outputTokenDetails?: { reasoningTokens?: number }; } /** A streamed part the messageMetadata callback can receive (only the fields we read). */ interface StreamMetadataPart { type: string; usage?: StreamUsage; totalUsage?: StreamUsage; } /** Authoritative usage we attach to a streamed assistant message's metadata. */ export interface ChatStreamUsage { inputTokens?: number; outputTokens?: number; totalTokens?: number; reasoningTokens?: number; } /** Normalize an AI SDK usage object to our flat client-facing shape, resolving * reasoning tokens from either the new `outputTokenDetails` or the deprecated * top-level field. Returns undefined for a missing usage object. */ function normalizeStreamUsage( usage: StreamUsage | undefined, ): ChatStreamUsage | undefined { if (!usage) return undefined; const reasoningTokens = usage.outputTokenDetails?.reasoningTokens ?? usage.reasoningTokens; return { inputTokens: usage.inputTokens, outputTokens: usage.outputTokens, totalTokens: usage.totalTokens, reasoningTokens, }; } /** Sum a (normalized) per-step usage into a running cumulative usage. v6's * `finish-step.usage` is PER-STEP, so the caller accumulates across steps; the * cumulative sum converges to the turn's `totalUsage` (no down-jump on the * client). Returns undefined only when both sides are absent. Pure. */ export function accumulateStepUsage( acc: ChatStreamUsage | undefined, step: ChatStreamUsage | undefined, ): ChatStreamUsage | undefined { if (!acc) return step; if (!step) return acc; const add = (a?: number, b?: number): number | undefined => a == null && b == null ? undefined : (a ?? 0) + (b ?? 0); return { inputTokens: add(acc.inputTokens, step.inputTokens), outputTokens: add(acc.outputTokens, step.outputTokens), totalTokens: add(acc.totalTokens, step.totalTokens), reasoningTokens: add(acc.reasoningTokens, step.reasoningTokens), }; } /** * Pure metadata builder for the streamed assistant UI message. The AI SDK calls * `messageMetadata` on the `start`, `finish-step` and `finish` stream parts; we * attach (as `message.metadata`): * - `start` -> `{ chatId }` so the client adopts the real created chat id * at the first chunk (see adopt-chat-id.ts / #137). * - `finish-step` -> `{ usage }` the CUMULATIVE authoritative usage so far * (incl. reasoning tokens) — the caller passes the running * sum (`cumulativeStepUsage`), since v6 per-step usage is not * cumulative; the client snaps to exact without jumping down. * - `finish` -> `{ usage }` from the turn's `totalUsage` (final reconcile). * Any other part type contributes no metadata. Pure + unit-testable. */ export function chatStreamMetadata( part: StreamMetadataPart, chatId: string, cumulativeStepUsage?: ChatStreamUsage, // #184: the active run's id, attached alongside `chatId` on the `start` part so // the client learns the run it can reconnect to / stop. Omitted when the turn // is not run-wrapped (legacy path). runId?: string, ): { chatId: string; runId?: string } | { usage: ChatStreamUsage } | undefined { if (part.type === 'start') return runId ? { chatId, runId } : { chatId }; if (part.type === 'finish-step') { return cumulativeStepUsage ? { usage: cumulativeStepUsage } : undefined; } if (part.type === 'finish') { const usage = normalizeStreamUsage(part.totalUsage); return usage ? { usage } : undefined; } return undefined; } /** The last message with role 'user' from a useChat payload, if any. */ function lastUserMessage( messages: UIMessage[] | undefined, ): UIMessage | undefined { if (!Array.isArray(messages)) return undefined; for (let i = messages.length - 1; i >= 0; i--) { if (messages[i]?.role === 'user') return messages[i]; } return undefined; } /** Concatenate the text parts of a UIMessage into a plain string. */ function uiMessageText(message: UIMessage | undefined): string { if (!message?.parts) return ''; return message.parts .filter((p): p is { type: 'text'; text: string } => p?.type === 'text') .map((p) => p.text) .join(''); } /** Build a single text part array (or empty when there is no text). */ function textPart(text: string): Array<{ type: 'text'; text: string }> { return text ? [{ type: 'text', text }] : []; } /** * Minimal shapes of the AI SDK v6 step objects we read to rebuild UIMessage * parts (see ai@6.0.134 `StepResult`: `text`, `toolCalls` -> TypedToolCall, * `toolResults` -> TypedToolResult). Typed loosely so this survives provider * variation; only the fields we persist are referenced. */ type StepLike = { text?: string; toolCalls?: ReadonlyArray<{ toolCallId?: string; toolName?: string; input?: unknown; }>; toolResults?: ReadonlyArray<{ toolCallId?: string; toolName?: string; output?: unknown; }>; }; /** * Compaction tunables for persisted tool OUTPUTS. Read tools (getPage, * getPageJson, getNode, diffPageVersions, exportPageMarkdown, ...) return whole * pages with no size cap. Their outputs are stored in `metadata.parts` and * RE-SENT to the provider on every later turn via convertToModelMessages, so an * uncompacted large body grows token cost, latency, and DB row size on every * turn. We shrink the big payloads while preserving the object's shape and its * small scalar fields (id/title/pageId) the client reads to render citations. */ // Only outputs whose JSON serialization exceeds this are compacted at all // (fast path: smaller outputs are returned unchanged, by identity). const MAX_TOOL_OUTPUT_BYTES = 4000; // A string longer than this is truncated to a leading preview. const TOOL_OUTPUT_STRING_LIMIT = 600; // Number of leading characters kept from a truncated string. const TOOL_OUTPUT_STRING_PREVIEW = 500; // Maximum number of array elements kept; the rest are summarized by a marker. const TOOL_OUTPUT_ARRAY_LIMIT = 50; // Beyond this nesting depth a subtree is replaced with a marker, bounding the // recursion and the size of pathological deeply-nested payloads. const TOOL_OUTPUT_MAX_DEPTH = 8; /** * Recursively compact a single tool output before it is persisted (and thus * re-sent to the provider on later turns). Preserves the value's KIND and its * keys/scalars (so the client can still extract id/title/pageId citations from * `part.output`); only the large payloads (long strings, long arrays, very deep * subtrees) are shrunk. Returns a plain JSON-serializable value. * * Exported only so the unit test can import the pure helper; exporting it does * not change runtime behavior. */ export function compactToolOutput(output: unknown): unknown { // Fast path: nothing to do for null/undefined or non-serializable values. if (output === null || output === undefined) return output; let serialized: string | undefined; try { serialized = JSON.stringify(output); } catch { // Non-serializable (e.g. circular): return unchanged, never throw here. return output; } // JSON.stringify returns undefined for values like a bare function/symbol. if (serialized === undefined) return output; // Below the size threshold: return the original unchanged (by identity). if (Buffer.byteLength(serialized, 'utf8') <= MAX_TOOL_OUTPUT_BYTES) { return output; } return compactValue(output, 0); } /** Recursive worker for compactToolOutput; see the constants above for limits. */ function compactValue(value: unknown, depth: number): unknown { if (typeof value === 'string') { if (value.length > TOOL_OUTPUT_STRING_LIMIT) { return `${value.slice(0, TOOL_OUTPUT_STRING_PREVIEW)}…[truncated ${ value.length - TOOL_OUTPUT_STRING_PREVIEW } chars]`; } return value; } if (Array.isArray(value)) { const kept = value .slice(0, TOOL_OUTPUT_ARRAY_LIMIT) .map((el) => compactValue(el, depth + 1)); if (value.length > TOOL_OUTPUT_ARRAY_LIMIT) { // Append a marker summarizing the dropped tail so the size is bounded // while signalling that the array was longer. kept.push({ _truncated: true, omittedItems: value.length - TOOL_OUTPUT_ARRAY_LIMIT, }); } return kept; } if (typeof value === 'object' && value !== null) { if (depth >= TOOL_OUTPUT_MAX_DEPTH) { return { _truncated: true, note: 'nested content omitted for replay' }; } // Rebuild the object preserving keys (keeps id/title/pageId), compacting // each value one level deeper. const out: Record = {}; for (const [k, v] of Object.entries(value)) { out[k] = compactValue(v, depth + 1); } return out; } // Numbers, booleans, etc.: nothing to shrink. return value; } /** * Rebuild the FULL UIMessage `parts` for an assistant turn from the SDK steps, * so multi-turn history replays prior tool-calls/results to the model (not just * the final text). Per step we emit the step's text part (if any) followed by a * static `tool-${name}` UI part per tool call — `output-available` when the * tool returned, or a synthetic `output-error` when it did not (so the call is * never persisted unpaired). Both shapes `convertToModelMessages` consumes on * the next turn map to a balanced assistant `tool-call` + tool-message * `tool-result`; a bare `input-available` would instead replay as an unpaired * call and throw MissingToolResultsError. Tools here are statically named, so * `tool-${name}` (not `dynamic-tool`) is faithful and `getStaticToolName` * recovers the name. Falls back to a single `text` part built from * `fallbackText` when the steps carry no text. */ // Exported only so the unit tests can import these pure helpers; exporting // them does not change runtime behavior. export function assistantParts( steps: ReadonlyArray | undefined, fallbackText: string, ): UIMessage['parts'] { const parts: Array> = []; let sawText = false; for (const step of steps ?? []) { if (step.text) { parts.push({ type: 'text', text: step.text }); sawText = true; } // Index this step's results by tool call id to pair calls with outputs. const resultsById = new Map(); for (const r of step.toolResults ?? []) { if (r.toolCallId) resultsById.set(r.toolCallId, r.output); } for (const call of step.toolCalls ?? []) { if (!call.toolName || !call.toolCallId) continue; const hasResult = resultsById.has(call.toolCallId); if (hasResult) { // output-available: the tool returned; the next turn replays its result. parts.push({ type: `tool-${call.toolName}`, toolCallId: call.toolCallId, state: 'output-available', input: call.input, output: compactToolOutput(resultsById.get(call.toolCallId)), }); } else { // No paired result (e.g. aborted mid-step). Persisting a bare // tool-call (input-available) would replay as an unpaired call and // throw MissingToolResultsError on the next turn (convertToModelMessages // emits no tool-result for it). Emit a SYNTHETIC paired result instead: // an output-error round-trips through convertToModelMessages as a // balanced tool-call + tool-result, keeping the rebuilt history valid. parts.push({ type: `tool-${call.toolName}`, toolCallId: call.toolCallId, state: 'output-error', input: call.input, errorText: 'Tool call did not complete.', }); } } } if (!sawText && fallbackText) { // No per-step text (e.g. a single final block): append the final text after // any tool parts so the natural call -> result -> answer order is preserved. parts.push({ type: 'text', text: fallbackText }); } return parts as UIMessage['parts']; } /** * Map a persisted message row back to a UIMessage. User messages restore their * stored parts when available; assistant messages restore the reconstructable * parts from metadata, falling back to a single text part from `content`. */ export function rowToUiMessage(row: AiChatMessage): Omit & { id: string; } { const role = row.role === 'assistant' ? 'assistant' : 'user'; const meta = (row.metadata ?? {}) as { parts?: UIMessage['parts'] }; const parts = Array.isArray(meta.parts) && meta.parts.length > 0 ? meta.parts : textPart(row.content ?? ''); return { id: row.id, role, parts: parts as UIMessage['parts'] }; } /** * The persisted-row patch shape produced by {@link flushAssistant}. It is the * SAME shape the assistant repo insert/update consume (content + toolCalls + * metadata) plus the lifecycle `status` column added in #183. */ export interface AssistantFlush { content: string; toolCalls: unknown; metadata: Record; status: 'streaming' | 'completed' | 'error' | 'aborted'; } /** * Pure decision for the terminal finalize (#183): given whether the upfront * assistant row exists (`assistantId`), choose whether the terminal payload is * written by UPDATEing that row or — when the upfront insert failed and there is * no id — by INSERTing a fresh terminal row so the turn is not lost entirely. * Returns `{ kind: 'update', id }` or `{ kind: 'insert' }`. Extracted so the * fallback-insert branch (the only safety against losing a turn whose upfront * insert failed) is unit-testable without seaming streamText. */ export function planFinalizeAssistant( assistantId: string | undefined, ): { kind: 'update'; id: string } | { kind: 'insert' } { return assistantId ? { kind: 'update', id: assistantId } : { kind: 'insert' }; } /** The repo surface the terminal finalize needs (structural — the real repo and * a test mock both satisfy it). */ export interface FinalizeRepo { insert(insertable: Record): Promise; update( id: string, workspaceId: string, patch: AssistantFlush, ): Promise; } /** * Apply a finalize `plan` to the repo with the terminal `flushed` payload (#183): * UPDATE the upfront row, or INSERT a fresh terminal row as the fallback when the * upfront insert failed. The SINGLE dispatch shared by the service's * finalizeAssistant and its test, so the test exercises the real path instead of * a copy (#186 review). Pure of error handling — the caller wraps it. */ export async function applyFinalize( repo: FinalizeRepo, plan: { kind: 'update'; id: string } | { kind: 'insert' }, base: { chatId: string; workspaceId: string; userId: string }, flushed: AssistantFlush, ): Promise { if (plan.kind === 'update') { await repo.update(plan.id, base.workspaceId, flushed); return; } await repo.insert({ chatId: base.chatId, workspaceId: base.workspaceId, userId: base.userId, role: 'assistant', content: flushed.content, toolCalls: flushed.toolCalls ?? null, metadata: flushed.metadata, status: flushed.status, }); } /** * PURE assistant-row builder (#183 step-granular durability). Given the turn's * accumulated steps + the in-progress (not-yet-finished) text + the lifecycle * status, it returns the row patch to persist. The SAME path runs for the * upfront insert (empty steps, status 'streaming'), every per-step update, and * the terminal finalize (completed/error/aborted) — and a future background * worker can call it identically, so it must stay a pure function of its inputs * (NO `this`, no IO). * * `metadata.parts` is built by assistantParts over the finished steps, then the * in-progress text appended as a trailing text part, so rowToUiMessage / * findAllByChat keep replaying the turn unchanged. `metadata.finishReason`, * `metadata.error`, `metadata.usage`, `metadata.contextTokens` and * `metadata.maxContextTokens` are attached only when provided/relevant, matching * the pre-#183 onFinish/onError records. */ export function flushAssistant( capturedSteps: ReadonlyArray | undefined, inProgressText: string, status: 'streaming' | 'completed' | 'error' | 'aborted', extra?: { finishReason?: string; usage?: ChatStreamUsage | StreamUsage | undefined; contextTokens?: number; maxContextTokens?: number; error?: string; }, ): AssistantFlush { const finished = capturedSteps ?? []; const stepsText = finished.map((s) => s.text ?? '').join(''); const trailing = inProgressText ?? ''; // assistantParts emits text parts only for FINISHED steps; append the // in-progress step's text (the partial answer cut off by an error/abort, or // simply not yet flushed mid-stream) as the last text part so the persisted // parts match what streamed to the client. const parts = assistantParts(finished, '') as unknown as Array< Record >; if (trailing) parts.push({ type: 'text', text: trailing }); const metadata: Record = { parts: parts as unknown as UIMessage['parts'], }; // finishReason: prefer an explicit one; else derive a sensible value from the // terminal status (so onError/onAbort records keep their historical reason). if (extra?.finishReason) { metadata.finishReason = extra.finishReason; } else if (status === 'error' || status === 'aborted') { metadata.finishReason = status; } if (extra?.usage !== undefined) { metadata.usage = normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage; } if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens; if (extra?.maxContextTokens) metadata.maxContextTokens = extra.maxContextTokens; if (extra?.error) metadata.error = extra.error; return { content: stepsText + trailing, toolCalls: serializeSteps(finished), metadata, status, }; } /** * Reduce SDK step objects to a compact, JSON-serializable trace for the * `tool_calls` column. Stores only what the UI action-log and history need — * never raw provider payloads or keys. */ export function serializeSteps( steps: ReadonlyArray<{ toolCalls?: ReadonlyArray<{ toolName?: string; input?: unknown }>; toolResults?: ReadonlyArray<{ toolName?: string; output?: unknown }>; }>, ): unknown { const calls: Array<{ toolName?: string; input?: unknown; output?: unknown }> = []; for (const step of steps ?? []) { for (const call of step.toolCalls ?? []) { calls.push({ toolName: call.toolName, input: call.input }); } for (const r of step.toolResults ?? []) { calls.push({ toolName: r.toolName, output: compactToolOutput(r.output) }); } } return calls.length > 0 ? calls : null; }