From e5effa13e130d82f000be58d8d78b11871797236 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Tue, 23 Jun 2026 18:49:04 +0300 Subject: [PATCH] fix(ai-http): generous-finite AI timeouts (120s) instead of disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refines the #144 timeout decision with measured data. A 30-min probe of paced single z.ai requests: 22/22 succeeded, TTFB 1.6–9.9s, zero timeouts/429s, no multi-minute hang. So z.ai answers fast when NOT bursted; the reported "hangs tens of minutes" is the burst path (20-step agent + stacked retries), addressed by the per-host concurrency gate + 429 backoff. Therefore headersTimeout/bodyTimeout default to 120s (env-overridable; 0 to disable) rather than 0/infinite: 120s is ~12× the worst observed paced TTFB, so it tolerates real slow turns but cuts a genuinely-stuck request with a clear error instead of hanging for minutes (curl-style "wait forever" was too loose; #141's 60s was too tight). Sanitizer now falls back to the default on a bad env value; an explicit env 0 still disables. Co-Authored-By: Claude Opus 4.8 --- .../src/integrations/ai/ai-http.spec.ts | 10 ++-- apps/server/src/integrations/ai/ai-http.ts | 50 ++++++++++++------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/apps/server/src/integrations/ai/ai-http.spec.ts b/apps/server/src/integrations/ai/ai-http.spec.ts index 7e3b7b24..1301ff20 100644 --- a/apps/server/src/integrations/ai/ai-http.spec.ts +++ b/apps/server/src/integrations/ai/ai-http.spec.ts @@ -161,11 +161,11 @@ describe('ai-http', () => { it('aiFetch awaits a slow-first-byte response that a finite headersTimeout would abort (curl parity)', async () => { // The SAME server delays the response headers by 2.5s (z.ai's slow first - // byte). A control agent with a finite headersTimeout aborts it; the shipped - // aiFetch (timeouts disabled) must instead deliver the 200 — proving it waits - // like curl rather than killing the stream the way #141's 60s cap would. The - // gap is wide because undici's timeout timer wheel is coarse (~1s); a 2.5s - // delay vs a 1s control timeout makes the abort deterministic, not flaky. + // byte). A control agent with a TIGHT headersTimeout aborts it; the shipped + // aiFetch (generous 120s default) must instead deliver the 200 — proving a + // real slow-first-byte turn is tolerated, not killed the way #141's 60s cap + // did. The gap is wide because undici's timeout timer wheel is coarse (~1s); + // a 2.5s delay vs a 1s control timeout makes the abort deterministic. const srv = await loopback((_req, res) => { setTimeout(() => { res.writeHead(200, { 'content-type': 'text/event-stream' }); diff --git a/apps/server/src/integrations/ai/ai-http.ts b/apps/server/src/integrations/ai/ai-http.ts index d5f99c4b..efd0091c 100644 --- a/apps/server/src/integrations/ai/ai-http.ts +++ b/apps/server/src/integrations/ai/ai-http.ts @@ -60,27 +60,43 @@ import { Logger } from '@nestjs/common'; * `UND_ERR_REQ_CONTENT_LENGTH_MISMATCH` — the exact production error. * (Reproduced in ai-http.spec.ts.) * - * THE FIX: behave like curl. Disable the time-to-first-header / inter-chunk - * timeouts by default (env-overridable) so the transport WAITS for z.ai's slow - * first byte instead of aborting it, and NEVER retry a header/body timeout at the - * transport layer (only genuine connection resets are retried on a fresh socket). + * THE FIX (three parts, all in this module): + * 1. GENEROUS-FINITE timeouts (default 120s), not the 60s of #141 and not + * "infinite". A 30-min probe showed paced single requests always answer in + * <10s; so 120s tolerates real slow turns yet bounds a genuinely-stuck one. + * 2. NEVER retry a header/body timeout at the transport layer (only genuine + * connection resets are retried on a fresh socket) — retrying a timed-out + * POST-with-body is what produced #141's CONTENT_LENGTH_MISMATCH. + * 3. Serialize per host + back off on 429 (see below): z.ai's coding plan + * throttles bursts (the agent fires up to 20 requests/turn), so we hold a + * single in-flight slot per host and wait out 429s instead of cascading. */ // Time-to-FIRST-response-headers (`headersTimeout`) and gap-between-streamed- -// body-chunks (`bodyTimeout`) bounds, in ms. Default 0 = DISABLED — wait like -// `curl`. See the #140 root-cause note above: any finite headersTimeout (undici's -// 300s default, or the 60s of #141) aborts z.ai's slow-first-byte reasoning turn -// before it answers. Operators who want a finite cap can set the env vars. +// body-chunks (`bodyTimeout`) bounds, in ms. GENEROUS but FINITE by default +// (env-overridable; set 0 to disable entirely). +// +// Rationale (measured): a PACED single z.ai request responds within ~10s and +// NEVER hung over a 30-min probe (22/22, max 9.9s). z.ai only stalls under +// BURSTS — so paired with the per-host concurrency gate below (which removes +// bursts), a request still pending after 120s is genuinely STUCK, not +// normal-slow: cut it with a clear error rather than hang for minutes (the +// reported "висит десятки минут" symptom). 120s is ~12× the observed worst paced +// TTFB. Contrast: #141's 60s was too tight (aborted real slow turns at ~61s); +// a curl-style "wait forever" (0) is too loose (a truly stuck request hangs). // // undici REQUIRES a non-negative integer here and throws at construction time on // anything else, so a typo'd env value (e.g. "60s") must NOT reach it — that -// would crash the whole AI layer at import. Sanitize: unset/invalid/negative → 0. -const envTimeoutMs = (name: string): number => { - const n = Number(process.env[name]); - return Number.isInteger(n) && n >= 0 ? n : 0; +// would crash the whole AI layer at import. Sanitize: invalid/negative → default; +// an explicit env 0 disables the timeout. +const envTimeoutMs = (name: string, def: number): number => { + const raw = process.env[name]; + if (raw === undefined) return def; + const n = Number(raw); + return Number.isInteger(n) && n >= 0 ? n : def; }; -const HEADERS_TIMEOUT_MS = envTimeoutMs('AI_HTTP_HEADERS_TIMEOUT_MS'); -const BODY_TIMEOUT_MS = envTimeoutMs('AI_HTTP_BODY_TIMEOUT_MS'); +const HEADERS_TIMEOUT_MS = envTimeoutMs('AI_HTTP_HEADERS_TIMEOUT_MS', 120_000); +const BODY_TIMEOUT_MS = envTimeoutMs('AI_HTTP_BODY_TIMEOUT_MS', 120_000); const baseAgent = new Agent({ // Cap TCP/TLS connect so a stuck connect fails fast and gets retried instead @@ -90,9 +106,9 @@ const baseAgent = new Agent({ // a stale/half-closed socket can be reused, which is exactly the condition // that produces `read ECONNRESET`. Do NOT raise this. keepAliveTimeout: 4_000, - // 0 = disabled: wait for z.ai's slow first byte / streamed chunks like curl, - // instead of aborting the heavy chat stream prematurely (#140). Do NOT lower - // these to a finite value without re-reading the root-cause note above. + // Generous-but-finite (default 120s; see HEADERS_TIMEOUT_MS above): tolerate + // z.ai's slow first byte / sparse reasoning chunks, but cut a genuinely stuck + // request so it can't hang for minutes (#140). Do NOT drop back to ~60s. headersTimeout: HEADERS_TIMEOUT_MS, bodyTimeout: BODY_TIMEOUT_MS, });