chore(ai-chat): add stream timing logs + env-gated aiFetch bypass (diagnostics)
The streaming chat turn hangs in all browsers while the non-streaming test endpoint works — both use the same model/transport (createOpenAI + aiFetch), so the suspect is the streaming path / custom undici RetryAgent transport. - ai-http.ts: wrap aiFetch with per-request timing logs (start, ms-to-headers on success, elapsed ms + cause on failure). Chat at info, embeddings at debug. Only host+path logged. - ai-chat.controller.ts / ai-chat.service.ts: log turn START, first-chunk latency, FINISHED duration, and elapsed ms on disconnect/error/abort. - ai.service.ts: AI_BYPASS_RESILIENT_FETCH=true makes the CHAT model omit fetch:aiFetch and use the default global fetch — isolates transport vs request-shape. Chat-only; embeddings/STT untouched; reversible via env. - .env.example: document the flag. No timeout/retry change. tsc clean; ai-chat + ai suites pass (292).
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import { Agent, RetryAgent, type Dispatcher } from 'undici';
|
||||
import { Logger } from '@nestjs/common';
|
||||
|
||||
/**
|
||||
* Dedicated, resilient outbound HTTP layer for ALL AI provider calls.
|
||||
@@ -83,11 +84,53 @@ const dispatcher: Dispatcher = new RetryAgent(baseAgent, {
|
||||
],
|
||||
});
|
||||
|
||||
const logger = new Logger('AiHttp');
|
||||
let requestSeq = 0;
|
||||
|
||||
/**
|
||||
* A `fetch`-compatible function that routes the request through the shared,
|
||||
* resilient AI dispatcher. Injected into AI SDK provider factories via their
|
||||
* `fetch` option. Follows the repo convention (see mcp-clients.service.ts
|
||||
* `guardedFetch`).
|
||||
*
|
||||
* Wrapped with timing logs so provider latency is visible: for streaming
|
||||
* responses `fetch` resolves when RESPONSE HEADERS arrive (the body streams
|
||||
* after), so "in <ms>ms (headers received)" is exactly the provider's
|
||||
* time-to-first-byte, and a rejection time pinpoints a headers/body timeout.
|
||||
* Chat/Responses calls log at info; bulk embedding calls log at debug so RAG
|
||||
* indexing never floods the logs. No secrets are logged — only host + pathname.
|
||||
*/
|
||||
export const aiFetch: typeof fetch = (input, init) =>
|
||||
fetch(input, { ...init, dispatcher } as RequestInit);
|
||||
export const aiFetch: typeof fetch = async (input, init) => {
|
||||
const id = ++requestSeq;
|
||||
const method = (init?.method ?? 'GET').toUpperCase();
|
||||
const rawUrl =
|
||||
typeof input === 'string'
|
||||
? input
|
||||
: input instanceof URL
|
||||
? input.href
|
||||
: (input as Request).url;
|
||||
let path = rawUrl;
|
||||
try {
|
||||
const u = new URL(rawUrl);
|
||||
path = u.host + u.pathname;
|
||||
} catch {
|
||||
// Non-absolute / unparseable URL: keep the raw string (still no secrets).
|
||||
}
|
||||
const isChat = /\/(chat\/completions|responses)\b/.test(path);
|
||||
const log = (msg: string): void =>
|
||||
isChat ? logger.log(msg) : logger.debug(msg);
|
||||
const startedAt = performance.now();
|
||||
log(`provider request #${id} -> ${method} ${path}`);
|
||||
try {
|
||||
const res = await fetch(input, { ...init, dispatcher } as RequestInit);
|
||||
const ms = Math.round(performance.now() - startedAt);
|
||||
log(`provider request #${id} <- ${res.status} in ${ms}ms (headers received)`);
|
||||
return res;
|
||||
} catch (err) {
|
||||
const ms = Math.round(performance.now() - startedAt);
|
||||
logger.warn(
|
||||
`provider request #${id} x after ${ms}ms: ${(err as Error)?.message ?? String(err)}`,
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -133,6 +133,19 @@ export class AiService {
|
||||
throw new AiNotConfiguredException();
|
||||
}
|
||||
|
||||
// Diagnostic toggle: when AI_BYPASS_RESILIENT_FETCH=true the chat model
|
||||
// bypasses the resilient aiFetch (custom undici RetryAgent) and uses the
|
||||
// default global fetch. Isolates whether the streaming chat hang comes from
|
||||
// the custom transport vs the request shape. Reversible via env, no rebuild.
|
||||
const bypassResilientFetch =
|
||||
process.env.AI_BYPASS_RESILIENT_FETCH === 'true';
|
||||
if (bypassResilientFetch) {
|
||||
this.logger.warn(
|
||||
'AI chat: resilient aiFetch BYPASSED for chat model ' +
|
||||
'(AI_BYPASS_RESILIENT_FETCH=true; using default fetch)',
|
||||
);
|
||||
}
|
||||
|
||||
switch (driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints. Use Chat
|
||||
@@ -141,14 +154,22 @@ export class AiService {
|
||||
// Responses API (/responses), which OpenAI-compatible gateways
|
||||
// (OpenRouter, etc.) reject on multi-turn requests (history with
|
||||
// assistant messages) → 400.
|
||||
return createOpenAI({ apiKey, baseURL: baseUrl, fetch: aiFetch }).chat(
|
||||
chatModel,
|
||||
);
|
||||
return createOpenAI({
|
||||
apiKey,
|
||||
baseURL: baseUrl,
|
||||
...(bypassResilientFetch ? {} : { fetch: aiFetch }),
|
||||
}).chat(chatModel);
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({ apiKey, fetch: aiFetch })(chatModel);
|
||||
return createGoogleGenerativeAI({
|
||||
apiKey,
|
||||
...(bypassResilientFetch ? {} : { fetch: aiFetch }),
|
||||
})(chatModel);
|
||||
case 'ollama':
|
||||
// Ollama needs no API key.
|
||||
return createOllama({ baseURL: baseUrl, fetch: aiFetch })(chatModel);
|
||||
return createOllama({
|
||||
baseURL: baseUrl,
|
||||
...(bypassResilientFetch ? {} : { fetch: aiFetch }),
|
||||
})(chatModel);
|
||||
default:
|
||||
throw new AiNotConfiguredException();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user