From 4cc8df836f1bb88e45bca6252a5f7a0b4bd7176e Mon Sep 17 00:00:00 2001
From: claude_code <claude_code@vvzvlad.xyz>
Date: Wed, 24 Jun 2026 21:24:05 +0300
Subject: [PATCH] chore(ai): passive z.ai provider HTTP telemetry (#175)

Investigate the intermittent (~20-30%) long-turn failure
"Lost connection to the AI provider" = AI_RetryError / read ECONNRESET
on the gitmost->z.ai link (browser-agnostic, mid-turn). Pure
instrumentation, no behavior change:

- ai-http-diagnostics.ts: a passive fetch wrapper injected into the
  OpenAI-compatible (z.ai) client. Per provider HTTP call it logs
  time-to-headers/status on success, and on a pre-response rejection the
  latency, error code/cause, request-body size and idle-gap since the
  previous call. The Response is returned untouched (streaming intact),
  errors rethrown unchanged; no retry/timeout/dispatcher.
- ai.service.ts: wire the instrumented fetch into the openai case only.

Lets us classify the reset as connection-phase vs mid-stream before
choosing a fix, without repeating the reverted RetryAgent (#140).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../integrations/ai/ai-http-diagnostics.ts    | 75 +++++++++++++++++++
 apps/server/src/integrations/ai/ai.service.ts | 17 ++++-
 2 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 apps/server/src/integrations/ai/ai-http-diagnostics.ts
diff --git a/apps/server/src/integrations/ai/ai-http-diagnostics.ts b/apps/server/src/integrations/ai/ai-http-diagnostics.ts
new file mode 100644
index 00000000..eb9beeb2
--- /dev/null
+++ b/apps/server/src/integrations/ai/ai-http-diagnostics.ts
@@ -0,0 +1,75 @@
+import { Logger } from '@nestjs/common';
+
+/**
+ * DIAGNOSTIC (provider ECONNRESET investigation) — temporary.
+ *
+ * A PASSIVE, behavior-neutral wrapper around the global `fetch`, injected into
+ * the OpenAI-compatible provider client (`createOpenAI({ fetch })`, the z.ai
+ * path). Per provider HTTP call it logs: time-to-response-headers + status +
+ * request-body size on success; and on a pre-response rejection the failure
+ * latency + error code/cause + request-body size + the idle gap since the
+ * previous provider call. It NEVER retries, times out, swaps the dispatcher, or
+ * reads/clones the response body — the Response is returned untouched (streaming
+ * unaffected) and any error is rethrown unchanged.
+ *
+ * How to read the result (a long agentic turn makes one provider call per step):
+ *  - a failed turn whose last provider line is "PRE-RESPONSE FAILED ... ECONNRESET"
+ *    => the reset is in the CONNECTION phase of a step's request (the provider
+ *    never replied) — usually a poisoned keep-alive socket or the provider/middle
+ *    box resetting that request (large body / idle gap are the suspects, hence
+ *    reqBytes + idleSincePrevCall below).
+ *  - the last line is "OK status=200" and the turn still errors with NO
+ *    "PRE-RESPONSE FAILED" => the cut happened MID-STREAM (after headers), a
+ *    different failure mode.
+ *
+ * The seq/last-call timestamps are module-level, so under concurrent turns the
+ * idle-gap figure is approximate (fine for single-user reproduction).
+ */
+export function createDiagnosticFetch(context: string): typeof fetch {
+  const logger = new Logger(context);
+  let callSeq = 0;
+  let lastCallStartedAt: number | undefined;
+
+  return async (input: Parameters<typeof fetch>[0], init?: Parameters<typeof fetch>[1]): Promise<Response> => {
+    const callId = ++callSeq;
+    const startedAt = Date.now();
+    const idleSincePrev =
+      lastCallStartedAt === undefined ? undefined : startedAt - lastCallStartedAt;
+    lastCallStartedAt = startedAt;
+    // Request body size: the chat payload is a JSON string. Used to test whether
+    // failures correlate with the large accumulated context on later agent steps.
+    const body = init?.body as unknown;
+    const bodyBytes =
+      typeof body === 'string'
+        ? body.length
+        : body instanceof Uint8Array
+          ? body.byteLength
+          : undefined;
+    try {
+      // Delegate to global fetch; return the Response UNTOUCHED (never read/clone
+      // the body) so the streamed SSE response is unaffected.
+      const res = await fetch(input, init);
+      logger.log(
+        `provider HTTP DIAGNOSTIC: call#${callId} OK ` +
+          `headersAfter=${Date.now() - startedAt}ms status=${res.status} ` +
+          `reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`,
+      );
+      return res;
+    } catch (err) {
+      // fetch() rejected => PRE-RESPONSE failure (no headers/body received yet):
+      // the connection/request phase. Log it and rethrow the SAME error.
+      const e = err as {
+        name?: string;
+        message?: string;
+        cause?: { code?: string; message?: string };
+      };
+      logger.warn(
+        `provider HTTP DIAGNOSTIC: call#${callId} PRE-RESPONSE FAILED ` +
+          `after=${Date.now() - startedAt}ms code=${e?.cause?.code ?? 'none'} ` +
+          `name=${e?.name ?? 'Error'} cause=${e?.cause?.message ?? e?.message ?? 'unknown'} ` +
+          `reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`,
+      );
+      throw err;
+    }
+  };
+}
diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts
index 078de791..4f72d23b 100644
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -14,6 +14,8 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
 import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';
 import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception';
 import { describeProviderError } from './ai-error.util';
+// DIAGNOSTIC (provider ECONNRESET investigation) — temporary.
+import { createDiagnosticFetch } from './ai-http-diagnostics';
 import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
 import { SecretBoxService } from '../crypto/secret-box';
 import { AiDriver } from './ai.types';
@@ -43,6 +45,13 @@ export interface ChatModelOverride {
 export class AiService {
   private readonly logger = new Logger(AiService.name);
 
+  // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: passive
+  // instrumentation of the OpenAI-compatible provider HTTP calls (z.ai).
+  // Logs call timing/outcome only — no behavior change.
+  private readonly aiDiagnosticFetch = createDiagnosticFetch(
+    'AiService:provider-http',
+  );
+
   constructor(
     private readonly aiSettings: AiSettingsService,
     private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
@@ -140,7 +149,13 @@ export class AiService {
         // Responses API (/responses), which OpenAI-compatible gateways
         // (OpenRouter, etc.) reject on multi-turn requests (history with
         // assistant messages) → 400.
-        return createOpenAI({ apiKey, baseURL: baseUrl }).chat(chatModel);
+        // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: pass the
+        // passive instrumented fetch (logging only; no behavior change).
+        return createOpenAI({
+          apiKey,
+          baseURL: baseUrl,
+          fetch: this.aiDiagnosticFetch,
+        }).chat(chatModel);
       case 'gemini':
         return createGoogleGenerativeAI({ apiKey })(chatModel);
       case 'ollama':