chore(ai-chat): add stream timing logs + env-gated aiFetch bypass (diagnostics)

The streaming chat turn hangs in all browsers while the non-streaming test endpoint works — both use the same model/transport (createOpenAI + aiFetch), so the suspect is the streaming path / custom undici RetryAgent transport. - ai-http.ts: wrap aiFetch with per-request timing logs (start, ms-to-headers on success, elapsed ms + cause on failure). Chat at info, embeddings at debug. Only host+path logged. - ai-chat.controller.ts / ai-chat.service.ts: log turn START, first-chunk latency, FINISHED duration, and elapsed ms on disconnect/error/abort. - ai.service.ts: AI_BYPASS_RESILIENT_FETCH=true makes the CHAT model omit fetch:aiFetch and use the default global fetch — isolates transport vs request-shape. Chat-only; embeddings/STT untouched; reversible via env. - .env.example: document the flag. No timeout/retry change. tsc clean; ai-chat + ai suites pass (292).
2026-06-23 02:13:54 +03:00
parent da058bb6a0
commit 7c308728de
5 changed files with 103 additions and 9 deletions
--- a/apps/server/src/integrations/ai/ai-http.ts
+++ b/apps/server/src/integrations/ai/ai-http.ts
@@ -1,4 +1,5 @@
 import { Agent, RetryAgent, type Dispatcher } from 'undici';
+import { Logger } from '@nestjs/common';

 /**
 * Dedicated, resilient outbound HTTP layer for ALL AI provider calls.
@@ -83,11 +84,53 @@ const dispatcher: Dispatcher = new RetryAgent(baseAgent, {
  ],
 });

+const logger = new Logger('AiHttp');
+let requestSeq = 0;
+
 /**
 * A `fetch`-compatible function that routes the request through the shared,
 * resilient AI dispatcher. Injected into AI SDK provider factories via their
 * `fetch` option. Follows the repo convention (see mcp-clients.service.ts
 * `guardedFetch`).
+ *
+ * Wrapped with timing logs so provider latency is visible: for streaming
+ * responses `fetch` resolves when RESPONSE HEADERS arrive (the body streams
+ * after), so "in <ms>ms (headers received)" is exactly the provider's
+ * time-to-first-byte, and a rejection time pinpoints a headers/body timeout.
+ * Chat/Responses calls log at info; bulk embedding calls log at debug so RAG
+ * indexing never floods the logs. No secrets are logged — only host + pathname.
 */
-export const aiFetch: typeof fetch = (input, init) =>
-  fetch(input, { ...init, dispatcher } as RequestInit);
+export const aiFetch: typeof fetch = async (input, init) => {
+  const id = ++requestSeq;
+  const method = (init?.method ?? 'GET').toUpperCase();
+  const rawUrl =
+    typeof input === 'string'
+      ? input
+      : input instanceof URL
+        ? input.href
+        : (input as Request).url;
+  let path = rawUrl;
+  try {
+    const u = new URL(rawUrl);
+    path = u.host + u.pathname;
+  } catch {
+    // Non-absolute / unparseable URL: keep the raw string (still no secrets).
+  }
+  const isChat = /\/(chat\/completions|responses)\b/.test(path);
+  const log = (msg: string): void =>
+    isChat ? logger.log(msg) : logger.debug(msg);
+  const startedAt = performance.now();
+  log(`provider request #${id} -> ${method} ${path}`);
+  try {
+    const res = await fetch(input, { ...init, dispatcher } as RequestInit);
+    const ms = Math.round(performance.now() - startedAt);
+    log(`provider request #${id} <- ${res.status} in ${ms}ms (headers received)`);
+    return res;
+  } catch (err) {
+    const ms = Math.round(performance.now() - startedAt);
+    logger.warn(
+      `provider request #${id} x after ${ms}ms: ${(err as Error)?.message ?? String(err)}`,
+    );
+    throw err;
+  }
+};
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -133,6 +133,19 @@ export class AiService {
      throw new AiNotConfiguredException();
    }

+    // Diagnostic toggle: when AI_BYPASS_RESILIENT_FETCH=true the chat model
+    // bypasses the resilient aiFetch (custom undici RetryAgent) and uses the
+    // default global fetch. Isolates whether the streaming chat hang comes from
+    // the custom transport vs the request shape. Reversible via env, no rebuild.
+    const bypassResilientFetch =
+      process.env.AI_BYPASS_RESILIENT_FETCH === 'true';
+    if (bypassResilientFetch) {
+      this.logger.warn(
+        'AI chat: resilient aiFetch BYPASSED for chat model ' +
+          '(AI_BYPASS_RESILIENT_FETCH=true; using default fetch)',
+      );
+    }
+
    switch (driver) {
      case 'openai':
        // baseURL (when set) covers openai-compatible endpoints. Use Chat
@@ -141,14 +154,22 @@ export class AiService {
        // Responses API (/responses), which OpenAI-compatible gateways
        // (OpenRouter, etc.) reject on multi-turn requests (history with
        // assistant messages) → 400.
-        return createOpenAI({ apiKey, baseURL: baseUrl, fetch: aiFetch }).chat(
-          chatModel,
-        );
+        return createOpenAI({
+          apiKey,
+          baseURL: baseUrl,
+          ...(bypassResilientFetch ? {} : { fetch: aiFetch }),
+        }).chat(chatModel);
      case 'gemini':
-        return createGoogleGenerativeAI({ apiKey, fetch: aiFetch })(chatModel);
+        return createGoogleGenerativeAI({
+          apiKey,
+          ...(bypassResilientFetch ? {} : { fetch: aiFetch }),
+        })(chatModel);
      case 'ollama':
        // Ollama needs no API key.
-        return createOllama({ baseURL: baseUrl, fetch: aiFetch })(chatModel);
+        return createOllama({
+          baseURL: baseUrl,
+          ...(bypassResilientFetch ? {} : { fetch: aiFetch }),
+        })(chatModel);
      default:
        throw new AiNotConfiguredException();
    }