revert(ai-http): drop resilient fetch/RetryAgent layer (#140)

The custom undici RetryAgent + aiFetch transport added for issue #140 did not actually heal mid-stream provider drops: undici's retry path is a Range-based download-resume that SSE/chat-completions endpoints cannot satisfy, so a reset after the first byte only swapped ECONNRESET for a "server does not support the range header" error. Its only real effect was reconnecting a poisoned keep-alive socket before the first byte, and PR #141 on top of it turned the 60s headers timeout into deterministic ~61s failures (plus CONTENT_LENGTH_MISMATCH from retrying a POST body after a timeout abort). The root cause is the z.ai coding endpoint, not our transport. Remove the whole layer and return all AI provider calls to Node's default global fetch. - delete integrations/ai/ai-http.ts and its spec - ai.service.ts: drop the aiFetch import, the AI_BYPASS_RESILIENT_FETCH diagnostic toggle, and fetch:aiFetch from every chat/embedding/STT factory; raw STT call back to global fetch - ai-chat.controller.ts: drop the stream-timing START log + startedAt - ai-chat.service.ts: drop the first-chunk/FINISHED/ERROR timing logs - .env.example: drop AI_BYPASS_RESILIENT_FETCH Reverts: 1af5d34a, 7c308728, b7abb7ea, 35fc58ea, d6cd2754, 6efb8656. Preserved (not part of the rollback): client-disconnect abort, title generation in onFinish, partial-answer persistence, Safari SSE heartbeat. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 18:48:33 +03:00
parent 0fabaa5bfb
commit 5161de8ba9
6 changed files with 6 additions and 351 deletions
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -14,7 +14,6 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
 import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';
 import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception';
 import { describeProviderError } from './ai-error.util';
-import { aiFetch } from './ai-http';
 import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
 import { SecretBoxService } from '../crypto/secret-box';
 import { AiDriver } from './ai.types';
@@ -133,19 +132,6 @@ export class AiService {
      throw new AiNotConfiguredException();
    }

-    // Diagnostic toggle: when AI_BYPASS_RESILIENT_FETCH=true the chat model
-    // bypasses the resilient aiFetch (custom undici RetryAgent) and uses the
-    // default global fetch. Isolates whether the streaming chat hang comes from
-    // the custom transport vs the request shape. Reversible via env, no rebuild.
-    const bypassResilientFetch =
-      process.env.AI_BYPASS_RESILIENT_FETCH === 'true';
-    if (bypassResilientFetch) {
-      this.logger.warn(
-        'AI chat: resilient aiFetch BYPASSED for chat model ' +
-          '(AI_BYPASS_RESILIENT_FETCH=true; using default fetch)',
-      );
-    }
-
    switch (driver) {
      case 'openai':
        // baseURL (when set) covers openai-compatible endpoints. Use Chat
@@ -154,22 +140,12 @@ export class AiService {
        // Responses API (/responses), which OpenAI-compatible gateways
        // (OpenRouter, etc.) reject on multi-turn requests (history with
        // assistant messages) → 400.
-        return createOpenAI({
-          apiKey,
-          baseURL: baseUrl,
-          ...(bypassResilientFetch ? {} : { fetch: aiFetch }),
-        }).chat(chatModel);
+        return createOpenAI({ apiKey, baseURL: baseUrl }).chat(chatModel);
      case 'gemini':
-        return createGoogleGenerativeAI({
-          apiKey,
-          ...(bypassResilientFetch ? {} : { fetch: aiFetch }),
-        })(chatModel);
+        return createGoogleGenerativeAI({ apiKey })(chatModel);
      case 'ollama':
        // Ollama needs no API key.
-        return createOllama({
-          baseURL: baseUrl,
-          ...(bypassResilientFetch ? {} : { fetch: aiFetch }),
-        })(chatModel);
+        return createOllama({ baseURL: baseUrl })(chatModel);
      default:
        throw new AiNotConfiguredException();
    }
@@ -204,18 +180,15 @@ export class AiService {
        return createOpenAI({
          apiKey: cfg.embeddingApiKey,
          baseURL: cfg.embeddingBaseUrl,
-          fetch: aiFetch,
        }).textEmbeddingModel(cfg.embeddingModel);
      case 'gemini':
        return createGoogleGenerativeAI({
          apiKey: cfg.embeddingApiKey,
-          fetch: aiFetch,
        }).textEmbeddingModel(cfg.embeddingModel);
      case 'ollama':
        // Ollama needs no API key (e.g. nomic-embed-text).
        return createOllama({
          baseURL: cfg.embeddingBaseUrl,
-          fetch: aiFetch,
        }).textEmbeddingModel(cfg.embeddingModel);
      default:
        throw new AiEmbeddingNotConfiguredException();
@@ -262,7 +235,6 @@ export class AiService {
    const model = createOpenAI({
      apiKey: cfg.sttApiKey ?? 'unused',
      baseURL,
-      fetch: aiFetch,
    }).transcription(cfg.sttModel);
    const { text } = await transcribe({
      model,
@@ -296,7 +268,7 @@ export class AiService {
      );
    }
    const url = `${baseURL.replace(/\/$/, '')}/audio/transcriptions`;
-    const res = await aiFetch(url, {
+    const res = await fetch(url, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',