From 7c308728de1a35fff2cb8949674a9926ae8afda9 Mon Sep 17 00:00:00 2001 From: claude_code Date: Tue, 23 Jun 2026 02:13:54 +0300 Subject: [PATCH] chore(ai-chat): add stream timing logs + env-gated aiFetch bypass (diagnostics) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The streaming chat turn hangs in all browsers while the non-streaming test endpoint works — both use the same model/transport (createOpenAI + aiFetch), so the suspect is the streaming path / custom undici RetryAgent transport. - ai-http.ts: wrap aiFetch with per-request timing logs (start, ms-to-headers on success, elapsed ms + cause on failure). Chat at info, embeddings at debug. Only host+path logged. - ai-chat.controller.ts / ai-chat.service.ts: log turn START, first-chunk latency, FINISHED duration, and elapsed ms on disconnect/error/abort. - ai.service.ts: AI_BYPASS_RESILIENT_FETCH=true makes the CHAT model omit fetch:aiFetch and use the default global fetch — isolates transport vs request-shape. Chat-only; embeddings/STT untouched; reversible via env. - .env.example: document the flag. No timeout/retry change. tsc clean; ai-chat + ai suites pass (292). --- .env.example | 5 ++ .../src/core/ai-chat/ai-chat.controller.ts | 9 +++- .../src/core/ai-chat/ai-chat.service.ts | 20 +++++++- apps/server/src/integrations/ai/ai-http.ts | 47 ++++++++++++++++++- apps/server/src/integrations/ai/ai.service.ts | 31 ++++++++++-- 5 files changed, 103 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index 1948fafe..ae932ee5 100644 --- a/.env.example +++ b/.env.example @@ -128,6 +128,11 @@ MCP_DOCMOST_PASSWORD= # A slow/hung embeddings endpoint fails after this and the batch continues. # AI_EMBEDDING_TIMEOUT_MS=120000 +# Diagnostic: bypass the resilient outbound HTTP layer (custom undici RetryAgent) +# for the CHAT model, using the default global fetch instead. Use only to isolate +# a streaming/transport issue; leave unset in normal operation. +# AI_BYPASS_RESILIENT_FETCH=true + # --- Anonymous public-share AI assistant --- # Opt-in per workspace (AI settings -> "public share assistant"; off by default). # When enabled, anonymous visitors of a published share can ask an AI about that diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.ts b/apps/server/src/core/ai-chat/ai-chat.controller.ts index 0870969e..965e8a84 100644 --- a/apps/server/src/core/ai-chat/ai-chat.controller.ts +++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts @@ -142,6 +142,9 @@ export class AiChatController { const body = (req.body ?? {}) as AiChatStreamBody; + // Diagnostic timing baseline for this turn (see START / terminal logs below). + const startedAt = Date.now(); + // Resolve the agent role for this turn BEFORE hijack: existing chats read it // from ai_chats.role_id (authoritative), a new chat from body.roleId. The // role drives both the persona and the optional model override below. @@ -167,7 +170,7 @@ export class AiChatController { // so log it here before aborting the agent loop. if (!res.raw.writableEnded) { this.logger.warn( - 'AI chat stream: client disconnected before completion; aborting turn', + `AI chat stream: client disconnected before completion after ${Date.now() - startedAt}ms; aborting turn`, ); controller.abort(); } @@ -175,6 +178,10 @@ export class AiChatController { req.raw.once('close', onClose); res.raw.once('finish', () => req.raw.off('close', onClose)); + this.logger.log( + `AI chat stream START chat=${body.chatId ?? 'new'} ua="${req.headers['user-agent'] ?? ''}"`, + ); + // Commit to streaming: hijack so Fastify stops managing the response and // the AI SDK can write the UI-message stream directly to the Node socket. res.hijack(); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 457682b8..c74b9945 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -192,6 +192,7 @@ export class AiChatService { model, role, }: AiChatStreamArgs): Promise { + const turnStartedAt = Date.now(); // Resolve / create the chat. A new chat is created when no valid chatId is // supplied or the supplied one does not belong to this workspace. let isNewChat = false; @@ -380,6 +381,10 @@ export class AiChatService { const capturedSteps: StepLike[] = []; let inProgressText = ''; + // Log only the FIRST streamed chunk so we can see the provider's observed + // time-to-first-token without flooding the log with every delta. + let firstChunkLogged = false; + // NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous // failure here (or in pipe below) would skip the terminal callbacks, so the // catch releases the leased external clients to avoid a connection leak. @@ -404,6 +409,12 @@ export class AiChatService { prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), abortSignal: signal, onChunk: ({ chunk }) => { + if (!firstChunkLogged) { + firstChunkLogged = true; + this.logger.log( + `AI chat stream first chunk (${chunk.type}) chat=${chatId} after ${Date.now() - turnStartedAt}ms`, + ); + } // 'text-delta' is the assistant's prose; tool-call args are separate chunk // types — so this mirrors exactly what streams to the client. if (chunk.type === 'text-delta') inProgressText += chunk.text; @@ -415,6 +426,9 @@ export class AiChatService { inProgressText = ''; }, onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { + this.logger.log( + `AI chat stream FINISHED chat=${chatId} in ${Date.now() - turnStartedAt}ms, ${steps.length} step(s)`, + ); await persistAssistant({ text, toolCalls: serializeSteps(steps), @@ -444,6 +458,9 @@ export class AiChatService { const e = error as { stack?: string }; const errorText = describeProviderError(error, String(error)); this.logger.error(`AI chat stream error: ${errorText}`, e?.stack); + this.logger.warn( + `AI chat stream ERROR terminal chat=${chatId} after ${Date.now() - turnStartedAt}ms`, + ); // Persist the PARTIAL answer streamed before the failure (text + any // finished tool steps) WITH the error in metadata, so the turn shows what // the user already saw plus the cause — not just a bare error. @@ -466,7 +483,8 @@ export class AiChatService { // invisible in the logs. Log it (warn) so the abort is traceable. this.logger.warn( `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` + - `step(s), ${partialChars} chars partial text; persisting partial turn.`, + `step(s), ${partialChars} chars partial text; persisting partial turn` + + ` after ${Date.now() - turnStartedAt}ms`, ); await persistAssistant( buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'), diff --git a/apps/server/src/integrations/ai/ai-http.ts b/apps/server/src/integrations/ai/ai-http.ts index 5182393c..b894ae9b 100644 --- a/apps/server/src/integrations/ai/ai-http.ts +++ b/apps/server/src/integrations/ai/ai-http.ts @@ -1,4 +1,5 @@ import { Agent, RetryAgent, type Dispatcher } from 'undici'; +import { Logger } from '@nestjs/common'; /** * Dedicated, resilient outbound HTTP layer for ALL AI provider calls. @@ -83,11 +84,53 @@ const dispatcher: Dispatcher = new RetryAgent(baseAgent, { ], }); +const logger = new Logger('AiHttp'); +let requestSeq = 0; + /** * A `fetch`-compatible function that routes the request through the shared, * resilient AI dispatcher. Injected into AI SDK provider factories via their * `fetch` option. Follows the repo convention (see mcp-clients.service.ts * `guardedFetch`). + * + * Wrapped with timing logs so provider latency is visible: for streaming + * responses `fetch` resolves when RESPONSE HEADERS arrive (the body streams + * after), so "in ms (headers received)" is exactly the provider's + * time-to-first-byte, and a rejection time pinpoints a headers/body timeout. + * Chat/Responses calls log at info; bulk embedding calls log at debug so RAG + * indexing never floods the logs. No secrets are logged — only host + pathname. */ -export const aiFetch: typeof fetch = (input, init) => - fetch(input, { ...init, dispatcher } as RequestInit); +export const aiFetch: typeof fetch = async (input, init) => { + const id = ++requestSeq; + const method = (init?.method ?? 'GET').toUpperCase(); + const rawUrl = + typeof input === 'string' + ? input + : input instanceof URL + ? input.href + : (input as Request).url; + let path = rawUrl; + try { + const u = new URL(rawUrl); + path = u.host + u.pathname; + } catch { + // Non-absolute / unparseable URL: keep the raw string (still no secrets). + } + const isChat = /\/(chat\/completions|responses)\b/.test(path); + const log = (msg: string): void => + isChat ? logger.log(msg) : logger.debug(msg); + const startedAt = performance.now(); + log(`provider request #${id} -> ${method} ${path}`); + try { + const res = await fetch(input, { ...init, dispatcher } as RequestInit); + const ms = Math.round(performance.now() - startedAt); + log(`provider request #${id} <- ${res.status} in ${ms}ms (headers received)`); + return res; + } catch (err) { + const ms = Math.round(performance.now() - startedAt); + logger.warn( + `provider request #${id} x after ${ms}ms: ${(err as Error)?.message ?? String(err)}`, + ); + throw err; + } +}; diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 442ebf9b..f5f4dca3 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -133,6 +133,19 @@ export class AiService { throw new AiNotConfiguredException(); } + // Diagnostic toggle: when AI_BYPASS_RESILIENT_FETCH=true the chat model + // bypasses the resilient aiFetch (custom undici RetryAgent) and uses the + // default global fetch. Isolates whether the streaming chat hang comes from + // the custom transport vs the request shape. Reversible via env, no rebuild. + const bypassResilientFetch = + process.env.AI_BYPASS_RESILIENT_FETCH === 'true'; + if (bypassResilientFetch) { + this.logger.warn( + 'AI chat: resilient aiFetch BYPASSED for chat model ' + + '(AI_BYPASS_RESILIENT_FETCH=true; using default fetch)', + ); + } + switch (driver) { case 'openai': // baseURL (when set) covers openai-compatible endpoints. Use Chat @@ -141,14 +154,22 @@ export class AiService { // Responses API (/responses), which OpenAI-compatible gateways // (OpenRouter, etc.) reject on multi-turn requests (history with // assistant messages) → 400. - return createOpenAI({ apiKey, baseURL: baseUrl, fetch: aiFetch }).chat( - chatModel, - ); + return createOpenAI({ + apiKey, + baseURL: baseUrl, + ...(bypassResilientFetch ? {} : { fetch: aiFetch }), + }).chat(chatModel); case 'gemini': - return createGoogleGenerativeAI({ apiKey, fetch: aiFetch })(chatModel); + return createGoogleGenerativeAI({ + apiKey, + ...(bypassResilientFetch ? {} : { fetch: aiFetch }), + })(chatModel); case 'ollama': // Ollama needs no API key. - return createOllama({ baseURL: baseUrl, fetch: aiFetch })(chatModel); + return createOllama({ + baseURL: baseUrl, + ...(bypassResilientFetch ? {} : { fetch: aiFetch }), + })(chatModel); default: throw new AiNotConfiguredException(); }