From 35fc58eaaa332df31b7e10d56156969a2e4536a5 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Tue, 23 Jun 2026 04:09:10 +0300 Subject: [PATCH 1/2] fix(ai-http): fail fast + retry on provider header stall (#140) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The z.ai GLM coding endpoint intermittently accepts the chat request but never sends response headers; undici's default 300s headersTimeout then hung the user for five minutes before failing, and UND_ERR_HEADERS_TIMEOUT was not in the RetryAgent's retried error set, so there was no recovery. headersTimeout only bounds time-to-FIRST-headers (before any body) — it is NOT the streaming budget, so lowering it does not truncate live SSE streams. Cap it (env AI_HTTP_HEADERS_TIMEOUT_MS, default 60s) so a header stall fails fast, and add UND_ERR_HEADERS_TIMEOUT to the retried error codes so the stalled request is retried on a fresh connection (which usually responds in seconds). bodyTimeout kept generous (env AI_HTTP_BODY_TIMEOUT_MS, default 300s) so slow streams with sparse chunks survive. UND_ERR_BODY_TIMEOUT is deliberately NOT retried (mid-body, partial SSE already delivered). Co-Authored-By: Claude Opus 4.8 --- apps/server/src/integrations/ai/ai-http.ts | 32 ++++++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/apps/server/src/integrations/ai/ai-http.ts b/apps/server/src/integrations/ai/ai-http.ts index 650c5f60..403b0f7d 100644 --- a/apps/server/src/integrations/ai/ai-http.ts +++ b/apps/server/src/integrations/ai/ai-http.ts @@ -7,7 +7,9 @@ import { Logger } from '@nestjs/common'; * WHY THIS EXISTS * --------------- * Production logs showed the AI chat stream (and title generation) failing with - * `read ECONNRESET` after the AI SDK's own retries were exhausted. The provider + * `read ECONNRESET` after the AI SDK's own retries were exhausted, and + * (z.ai GLM coding endpoint, #140) intermittently stalling without ever sending + * response headers until undici's 300s default cut the request with no retry. The provider * clients were built with NO custom `fetch`, so all outbound LLM traffic used * Node's default global undici agent: default keep-alive pooling and NO * transport-level reconnect on connection resets. `read ECONNRESET` is a TCP RST @@ -41,6 +43,21 @@ import { Logger } from '@nestjs/common'; * error message for that rarer mid-stream case changes. */ +// `headersTimeout` bounds time-to-FIRST-response-headers (before any body). It +// is NOT the streaming budget: once headers arrive the SSE body streams freely, +// unaffected by this value — so it is safe to keep SHORT. Some providers (seen +// with the z.ai GLM coding endpoint, #140) intermittently accept the request but +// never send response headers; undici's 300s default then hangs the user for +// FIVE MINUTES before failing, with no retry. Cap it so a stalled request fails +// FAST and is retried on a fresh connection (the retry usually lands on a healthy +// path and responds in seconds). Env-overridable for ops tuning. +const HEADERS_TIMEOUT_MS = + Number(process.env.AI_HTTP_HEADERS_TIMEOUT_MS) || 60_000; +// `bodyTimeout` bounds the gap BETWEEN streamed body chunks (not total stream +// length). Kept generous so a legitimately slow/thinking model with sparse SSE +// chunks is never killed mid-stream. Env-overridable. +const BODY_TIMEOUT_MS = Number(process.env.AI_HTTP_BODY_TIMEOUT_MS) || 300_000; + const baseAgent = new Agent({ // Cap TCP/TLS connect so a stuck connect fails fast and gets retried instead // of hanging indefinitely. @@ -49,8 +66,11 @@ const baseAgent = new Agent({ // a stale/half-closed socket can be reused, which is exactly the condition // that produces `read ECONNRESET`. Do NOT raise this. keepAliveTimeout: 4_000, - // Do NOT override headersTimeout/bodyTimeout — keep undici defaults so - // long-lived SSE streaming responses are not killed mid-stream. + // Short time-to-headers (see HEADERS_TIMEOUT_MS) so a header stall fails fast + // and gets retried; generous per-chunk body timeout so real streams survive + // (see BODY_TIMEOUT_MS). Lowering headersTimeout does NOT truncate streams. + headersTimeout: HEADERS_TIMEOUT_MS, + bodyTimeout: BODY_TIMEOUT_MS, }); const dispatcher: Dispatcher = new RetryAgent(baseAgent, { @@ -80,6 +100,12 @@ const dispatcher: Dispatcher = new RetryAgent(baseAgent, { 'EHOSTDOWN', 'EHOSTUNREACH', 'UND_ERR_SOCKET', + // Added (NOT in undici's default set): a header timeout fires BEFORE any + // response body, so retrying is clean (no partially-consumed stream / Range + // problem) — and it is exactly the z.ai stall mode (#140), where a fresh + // retry usually succeeds. We deliberately do NOT retry UND_ERR_BODY_TIMEOUT + // (mid-body; partial SSE already delivered, not safe to resume). + 'UND_ERR_HEADERS_TIMEOUT', 'EPIPE', ], }); -- 2.49.1 From d6cd2754698d5712f6593a8a2be5d7d463880c1f Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Tue, 23 Jun 2026 04:11:50 +0300 Subject: [PATCH 2/2] test(ai-http): cover header-stall fail-fast + retry (#140) Extend ai-http.spec with two loopback-server tests: a provider that stalls without sending headers triggers the (lowered) headersTimeout and is retried on a fresh connection, recovering; a healthy fast response passes through in one attempt. No external network calls. Co-Authored-By: Claude Opus 4.8 --- .../src/integrations/ai/ai-http.spec.ts | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/apps/server/src/integrations/ai/ai-http.spec.ts b/apps/server/src/integrations/ai/ai-http.spec.ts index 044c18f9..4fbfab04 100644 --- a/apps/server/src/integrations/ai/ai-http.spec.ts +++ b/apps/server/src/integrations/ai/ai-http.spec.ts @@ -1,5 +1,10 @@ +import * as http from 'node:http'; import { RetryAgent } from 'undici'; +// A short header timeout makes the #140 "header stall" deterministic and fast. +// Must be set BEFORE importing ai-http (the undici agents read it at module load). +process.env.AI_HTTP_HEADERS_TIMEOUT_MS = '800'; + import { aiFetch } from './ai-http'; /** @@ -45,3 +50,63 @@ describe('ai-http', () => { } }); }); + +/** + * #140 regression: a provider that accepts the request but stalls without ever + * sending response headers must FAIL FAST (at headersTimeout — set to 800ms + * above, not undici's 300s default) and be RETRIED on a fresh connection. + * headersTimeout only bounds time-to-headers, so a healthy fast response is + * unaffected. Uses a real loopback server; makes no external network calls. + */ +describe('aiFetch header-stall resilience (#140)', () => { + function makeServer( + handler: http.RequestListener, + ): Promise<{ url: string; close: () => Promise }> { + return new Promise((resolve) => { + const server = http.createServer(handler); + server.listen(0, '127.0.0.1', () => { + const port = (server.address() as { port: number }).port; + resolve({ + url: `http://127.0.0.1:${port}/health`, + close: () => new Promise((r) => server.close(() => r())), + }); + }); + }); + } + + it('retries a header stall on a fresh connection and recovers', async () => { + let attempts = 0; + const { url, close } = await makeServer((_req, res) => { + attempts++; + // First attempt: never send headers -> UND_ERR_HEADERS_TIMEOUT -> retry. + if (attempts === 1) return; + res.writeHead(200, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ ok: true, servedOnAttempt: attempts })); + }); + try { + const res = await aiFetch(url, { method: 'GET' }); + expect(res.status).toBe(200); + const body = (await res.json()) as { servedOnAttempt: number }; + expect(attempts).toBeGreaterThanOrEqual(2); // the stalled attempt was retried + expect(body.servedOnAttempt).toBeGreaterThanOrEqual(2); + } finally { + await close(); + } + }, 15000); + + it('passes a healthy fast response straight through (one attempt)', async () => { + let attempts = 0; + const { url, close } = await makeServer((_req, res) => { + attempts++; + res.writeHead(200, { 'content-type': 'application/json' }); + res.end(JSON.stringify({ ok: true })); + }); + try { + const res = await aiFetch(url, { method: 'GET' }); + expect(res.status).toBe(200); + expect(attempts).toBe(1); + } finally { + await close(); + } + }, 15000); +}); -- 2.49.1