revert(ai-http): drop resilient fetch/RetryAgent layer (#140)

The custom undici RetryAgent + aiFetch transport added for issue #140 did not actually heal mid-stream provider drops: undici's retry path is a Range-based download-resume that SSE/chat-completions endpoints cannot satisfy, so a reset after the first byte only swapped ECONNRESET for a "server does not support the range header" error. Its only real effect was reconnecting a poisoned keep-alive socket before the first byte, and PR #141 on top of it turned the 60s headers timeout into deterministic ~61s failures (plus CONTENT_LENGTH_MISMATCH from retrying a POST body after a timeout abort). The root cause is the z.ai coding endpoint, not our transport. Remove the whole layer and return all AI provider calls to Node's default global fetch. - delete integrations/ai/ai-http.ts and its spec - ai.service.ts: drop the aiFetch import, the AI_BYPASS_RESILIENT_FETCH diagnostic toggle, and fetch:aiFetch from every chat/embedding/STT factory; raw STT call back to global fetch - ai-chat.controller.ts: drop the stream-timing START log + startedAt - ai-chat.service.ts: drop the first-chunk/FINISHED/ERROR timing logs - .env.example: drop AI_BYPASS_RESILIENT_FETCH Reverts: 1af5d34a, 7c308728, b7abb7ea, 35fc58ea, d6cd2754, 6efb8656. Preserved (not part of the rollback): client-disconnect abort, title generation in onFinish, partial-answer persistence, Safari SSE heartbeat. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 18:48:33 +03:00
parent 0fabaa5bfb
commit 5161de8ba9
6 changed files with 6 additions and 351 deletions
--- a/apps/server/src/core/ai-chat/ai-chat.controller.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts
@@ -142,9 +142,6 @@ export class AiChatController {

    const body = (req.body ?? {}) as AiChatStreamBody;

-    // Diagnostic timing baseline for this turn (see START / terminal logs below).
-    const startedAt = Date.now();
-
    // Resolve the agent role for this turn BEFORE hijack: existing chats read it
    // from ai_chats.role_id (authoritative), a new chat from body.roleId. The
    // role drives both the persona and the optional model override below.
@@ -170,7 +167,7 @@ export class AiChatController {
      // so log it here before aborting the agent loop.
      if (!res.raw.writableEnded) {
        this.logger.warn(
-          `AI chat stream: client disconnected before completion after ${Date.now() - startedAt}ms; aborting turn`,
+          'AI chat stream: client disconnected before completion; aborting turn',
        );
        controller.abort();
      }
@@ -178,10 +175,6 @@ export class AiChatController {
    req.raw.once('close', onClose);
    res.raw.once('finish', () => req.raw.off('close', onClose));

-    this.logger.log(
-      `AI chat stream START chat=${body.chatId ?? 'new'} ua="${req.headers['user-agent'] ?? ''}"`,
-    );
-
    // Commit to streaming: hijack so Fastify stops managing the response and
    // the AI SDK can write the UI-message stream directly to the Node socket.
    res.hijack();
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -192,7 +192,6 @@ export class AiChatService {
    model,
    role,
  }: AiChatStreamArgs): Promise<void> {
-    const turnStartedAt = Date.now();
    // Resolve / create the chat. A new chat is created when no valid chatId is
    // supplied or the supplied one does not belong to this workspace.
    let isNewChat = false;
@@ -381,10 +380,6 @@ export class AiChatService {
    const capturedSteps: StepLike[] = [];
    let inProgressText = '';

-    // Log only the FIRST streamed chunk so we can see the provider's observed
-    // time-to-first-token without flooding the log with every delta.
-    let firstChunkLogged = false;
-
    // NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous
    // failure here (or in pipe below) would skip the terminal callbacks, so the
    // catch releases the leased external clients to avoid a connection leak.
@@ -409,12 +404,6 @@ export class AiChatService {
      prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
      abortSignal: signal,
      onChunk: ({ chunk }) => {
-        if (!firstChunkLogged) {
-          firstChunkLogged = true;
-          this.logger.log(
-            `AI chat stream first chunk (${chunk.type}) chat=${chatId} after ${Date.now() - turnStartedAt}ms`,
-          );
-        }
        // 'text-delta' is the assistant's prose; tool-call args are separate chunk
        // types — so this mirrors exactly what streams to the client.
        if (chunk.type === 'text-delta') inProgressText += chunk.text;
@@ -426,9 +415,6 @@ export class AiChatService {
        inProgressText = '';
      },
      onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
-        this.logger.log(
-          `AI chat stream FINISHED chat=${chatId} in ${Date.now() - turnStartedAt}ms, ${steps.length} step(s)`,
-        );
        await persistAssistant({
          text,
          toolCalls: serializeSteps(steps),
@@ -474,9 +460,6 @@ export class AiChatService {
        const e = error as { stack?: string };
        const errorText = describeProviderError(error, String(error));
        this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
-        this.logger.warn(
-          `AI chat stream ERROR terminal chat=${chatId} after ${Date.now() - turnStartedAt}ms`,
-        );
        // Persist the PARTIAL answer streamed before the failure (text + any
        // finished tool steps) WITH the error in metadata, so the turn shows what
        // the user already saw plus the cause — not just a bare error.
@@ -499,8 +482,7 @@ export class AiChatService {
        // invisible in the logs. Log it (warn) so the abort is traceable.
        this.logger.warn(
          `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
-            `step(s), ${partialChars} chars partial text; persisting partial turn` +
-            ` after ${Date.now() - turnStartedAt}ms`,
+            `step(s), ${partialChars} chars partial text; persisting partial turn.`,
        );
        await persistAssistant(
          buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),