revert(ai-http): drop resilient fetch/RetryAgent layer (#140)
The custom undici RetryAgent + aiFetch transport added for issue #140 did not actually heal mid-stream provider drops: undici's retry path is a Range-based download-resume that SSE/chat-completions endpoints cannot satisfy, so a reset after the first byte only swapped ECONNRESET for a "server does not support the range header" error. Its only real effect was reconnecting a poisoned keep-alive socket before the first byte, and PR #141 on top of it turned the 60s headers timeout into deterministic ~61s failures (plus CONTENT_LENGTH_MISMATCH from retrying a POST body after a timeout abort). The root cause is the z.ai coding endpoint, not our transport. Remove the whole layer and return all AI provider calls to Node's default global fetch. - delete integrations/ai/ai-http.ts and its spec - ai.service.ts: drop the aiFetch import, the AI_BYPASS_RESILIENT_FETCH diagnostic toggle, and fetch:aiFetch from every chat/embedding/STT factory; raw STT call back to global fetch - ai-chat.controller.ts: drop the stream-timing START log + startedAt - ai-chat.service.ts: drop the first-chunk/FINISHED/ERROR timing logs - .env.example: drop AI_BYPASS_RESILIENT_FETCH Reverts:1af5d34a,7c308728,b7abb7ea,35fc58ea,d6cd2754,6efb8656. Preserved (not part of the rollback): client-disconnect abort, title generation in onFinish, partial-answer persistence, Safari SSE heartbeat. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -14,7 +14,6 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
|
||||
import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';
|
||||
import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception';
|
||||
import { describeProviderError } from './ai-error.util';
|
||||
import { aiFetch } from './ai-http';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { SecretBoxService } from '../crypto/secret-box';
|
||||
import { AiDriver } from './ai.types';
|
||||
@@ -133,19 +132,6 @@ export class AiService {
|
||||
throw new AiNotConfiguredException();
|
||||
}
|
||||
|
||||
// Diagnostic toggle: when AI_BYPASS_RESILIENT_FETCH=true the chat model
|
||||
// bypasses the resilient aiFetch (custom undici RetryAgent) and uses the
|
||||
// default global fetch. Isolates whether the streaming chat hang comes from
|
||||
// the custom transport vs the request shape. Reversible via env, no rebuild.
|
||||
const bypassResilientFetch =
|
||||
process.env.AI_BYPASS_RESILIENT_FETCH === 'true';
|
||||
if (bypassResilientFetch) {
|
||||
this.logger.warn(
|
||||
'AI chat: resilient aiFetch BYPASSED for chat model ' +
|
||||
'(AI_BYPASS_RESILIENT_FETCH=true; using default fetch)',
|
||||
);
|
||||
}
|
||||
|
||||
switch (driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints. Use Chat
|
||||
@@ -154,22 +140,12 @@ export class AiService {
|
||||
// Responses API (/responses), which OpenAI-compatible gateways
|
||||
// (OpenRouter, etc.) reject on multi-turn requests (history with
|
||||
// assistant messages) → 400.
|
||||
return createOpenAI({
|
||||
apiKey,
|
||||
baseURL: baseUrl,
|
||||
...(bypassResilientFetch ? {} : { fetch: aiFetch }),
|
||||
}).chat(chatModel);
|
||||
return createOpenAI({ apiKey, baseURL: baseUrl }).chat(chatModel);
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({
|
||||
apiKey,
|
||||
...(bypassResilientFetch ? {} : { fetch: aiFetch }),
|
||||
})(chatModel);
|
||||
return createGoogleGenerativeAI({ apiKey })(chatModel);
|
||||
case 'ollama':
|
||||
// Ollama needs no API key.
|
||||
return createOllama({
|
||||
baseURL: baseUrl,
|
||||
...(bypassResilientFetch ? {} : { fetch: aiFetch }),
|
||||
})(chatModel);
|
||||
return createOllama({ baseURL: baseUrl })(chatModel);
|
||||
default:
|
||||
throw new AiNotConfiguredException();
|
||||
}
|
||||
@@ -204,18 +180,15 @@ export class AiService {
|
||||
return createOpenAI({
|
||||
apiKey: cfg.embeddingApiKey,
|
||||
baseURL: cfg.embeddingBaseUrl,
|
||||
fetch: aiFetch,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({
|
||||
apiKey: cfg.embeddingApiKey,
|
||||
fetch: aiFetch,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
case 'ollama':
|
||||
// Ollama needs no API key (e.g. nomic-embed-text).
|
||||
return createOllama({
|
||||
baseURL: cfg.embeddingBaseUrl,
|
||||
fetch: aiFetch,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
default:
|
||||
throw new AiEmbeddingNotConfiguredException();
|
||||
@@ -262,7 +235,6 @@ export class AiService {
|
||||
const model = createOpenAI({
|
||||
apiKey: cfg.sttApiKey ?? 'unused',
|
||||
baseURL,
|
||||
fetch: aiFetch,
|
||||
}).transcription(cfg.sttModel);
|
||||
const { text } = await transcribe({
|
||||
model,
|
||||
@@ -296,7 +268,7 @@ export class AiService {
|
||||
);
|
||||
}
|
||||
const url = `${baseURL.replace(/\/$/, '')}/audio/transcriptions`;
|
||||
const res = await aiFetch(url, {
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
Reference in New Issue
Block a user