Merge pull request 'fix(ai-http): fail fast + retry on provider header stall (#140)' (#141) from fix/ai-stream-headers-timeout into develop
Reviewed-on: #141
This commit was merged in pull request #141.
This commit is contained in:
@@ -1,5 +1,10 @@
|
|||||||
|
import * as http from 'node:http';
|
||||||
import { RetryAgent } from 'undici';
|
import { RetryAgent } from 'undici';
|
||||||
|
|
||||||
|
// A short header timeout makes the #140 "header stall" deterministic and fast.
|
||||||
|
// Must be set BEFORE importing ai-http (the undici agents read it at module load).
|
||||||
|
process.env.AI_HTTP_HEADERS_TIMEOUT_MS = '800';
|
||||||
|
|
||||||
import { aiFetch } from './ai-http';
|
import { aiFetch } from './ai-http';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -45,3 +50,63 @@ describe('ai-http', () => {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* #140 regression: a provider that accepts the request but stalls without ever
|
||||||
|
* sending response headers must FAIL FAST (at headersTimeout — set to 800ms
|
||||||
|
* above, not undici's 300s default) and be RETRIED on a fresh connection.
|
||||||
|
* headersTimeout only bounds time-to-headers, so a healthy fast response is
|
||||||
|
* unaffected. Uses a real loopback server; makes no external network calls.
|
||||||
|
*/
|
||||||
|
describe('aiFetch header-stall resilience (#140)', () => {
|
||||||
|
function makeServer(
|
||||||
|
handler: http.RequestListener,
|
||||||
|
): Promise<{ url: string; close: () => Promise<void> }> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const server = http.createServer(handler);
|
||||||
|
server.listen(0, '127.0.0.1', () => {
|
||||||
|
const port = (server.address() as { port: number }).port;
|
||||||
|
resolve({
|
||||||
|
url: `http://127.0.0.1:${port}/health`,
|
||||||
|
close: () => new Promise<void>((r) => server.close(() => r())),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
it('retries a header stall on a fresh connection and recovers', async () => {
|
||||||
|
let attempts = 0;
|
||||||
|
const { url, close } = await makeServer((_req, res) => {
|
||||||
|
attempts++;
|
||||||
|
// First attempt: never send headers -> UND_ERR_HEADERS_TIMEOUT -> retry.
|
||||||
|
if (attempts === 1) return;
|
||||||
|
res.writeHead(200, { 'content-type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({ ok: true, servedOnAttempt: attempts }));
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
const res = await aiFetch(url, { method: 'GET' });
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
const body = (await res.json()) as { servedOnAttempt: number };
|
||||||
|
expect(attempts).toBeGreaterThanOrEqual(2); // the stalled attempt was retried
|
||||||
|
expect(body.servedOnAttempt).toBeGreaterThanOrEqual(2);
|
||||||
|
} finally {
|
||||||
|
await close();
|
||||||
|
}
|
||||||
|
}, 15000);
|
||||||
|
|
||||||
|
it('passes a healthy fast response straight through (one attempt)', async () => {
|
||||||
|
let attempts = 0;
|
||||||
|
const { url, close } = await makeServer((_req, res) => {
|
||||||
|
attempts++;
|
||||||
|
res.writeHead(200, { 'content-type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({ ok: true }));
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
const res = await aiFetch(url, { method: 'GET' });
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
expect(attempts).toBe(1);
|
||||||
|
} finally {
|
||||||
|
await close();
|
||||||
|
}
|
||||||
|
}, 15000);
|
||||||
|
});
|
||||||
|
|||||||
@@ -7,7 +7,9 @@ import { Logger } from '@nestjs/common';
|
|||||||
* WHY THIS EXISTS
|
* WHY THIS EXISTS
|
||||||
* ---------------
|
* ---------------
|
||||||
* Production logs showed the AI chat stream (and title generation) failing with
|
* Production logs showed the AI chat stream (and title generation) failing with
|
||||||
* `read ECONNRESET` after the AI SDK's own retries were exhausted. The provider
|
* `read ECONNRESET` after the AI SDK's own retries were exhausted, and
|
||||||
|
* (z.ai GLM coding endpoint, #140) intermittently stalling without ever sending
|
||||||
|
* response headers until undici's 300s default cut the request with no retry. The provider
|
||||||
* clients were built with NO custom `fetch`, so all outbound LLM traffic used
|
* clients were built with NO custom `fetch`, so all outbound LLM traffic used
|
||||||
* Node's default global undici agent: default keep-alive pooling and NO
|
* Node's default global undici agent: default keep-alive pooling and NO
|
||||||
* transport-level reconnect on connection resets. `read ECONNRESET` is a TCP RST
|
* transport-level reconnect on connection resets. `read ECONNRESET` is a TCP RST
|
||||||
@@ -41,6 +43,21 @@ import { Logger } from '@nestjs/common';
|
|||||||
* error message for that rarer mid-stream case changes.
|
* error message for that rarer mid-stream case changes.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// `headersTimeout` bounds time-to-FIRST-response-headers (before any body). It
|
||||||
|
// is NOT the streaming budget: once headers arrive the SSE body streams freely,
|
||||||
|
// unaffected by this value — so it is safe to keep SHORT. Some providers (seen
|
||||||
|
// with the z.ai GLM coding endpoint, #140) intermittently accept the request but
|
||||||
|
// never send response headers; undici's 300s default then hangs the user for
|
||||||
|
// FIVE MINUTES before failing, with no retry. Cap it so a stalled request fails
|
||||||
|
// FAST and is retried on a fresh connection (the retry usually lands on a healthy
|
||||||
|
// path and responds in seconds). Env-overridable for ops tuning.
|
||||||
|
const HEADERS_TIMEOUT_MS =
|
||||||
|
Number(process.env.AI_HTTP_HEADERS_TIMEOUT_MS) || 60_000;
|
||||||
|
// `bodyTimeout` bounds the gap BETWEEN streamed body chunks (not total stream
|
||||||
|
// length). Kept generous so a legitimately slow/thinking model with sparse SSE
|
||||||
|
// chunks is never killed mid-stream. Env-overridable.
|
||||||
|
const BODY_TIMEOUT_MS = Number(process.env.AI_HTTP_BODY_TIMEOUT_MS) || 300_000;
|
||||||
|
|
||||||
const baseAgent = new Agent({
|
const baseAgent = new Agent({
|
||||||
// Cap TCP/TLS connect so a stuck connect fails fast and gets retried instead
|
// Cap TCP/TLS connect so a stuck connect fails fast and gets retried instead
|
||||||
// of hanging indefinitely.
|
// of hanging indefinitely.
|
||||||
@@ -49,8 +66,11 @@ const baseAgent = new Agent({
|
|||||||
// a stale/half-closed socket can be reused, which is exactly the condition
|
// a stale/half-closed socket can be reused, which is exactly the condition
|
||||||
// that produces `read ECONNRESET`. Do NOT raise this.
|
// that produces `read ECONNRESET`. Do NOT raise this.
|
||||||
keepAliveTimeout: 4_000,
|
keepAliveTimeout: 4_000,
|
||||||
// Do NOT override headersTimeout/bodyTimeout — keep undici defaults so
|
// Short time-to-headers (see HEADERS_TIMEOUT_MS) so a header stall fails fast
|
||||||
// long-lived SSE streaming responses are not killed mid-stream.
|
// and gets retried; generous per-chunk body timeout so real streams survive
|
||||||
|
// (see BODY_TIMEOUT_MS). Lowering headersTimeout does NOT truncate streams.
|
||||||
|
headersTimeout: HEADERS_TIMEOUT_MS,
|
||||||
|
bodyTimeout: BODY_TIMEOUT_MS,
|
||||||
});
|
});
|
||||||
|
|
||||||
const dispatcher: Dispatcher = new RetryAgent(baseAgent, {
|
const dispatcher: Dispatcher = new RetryAgent(baseAgent, {
|
||||||
@@ -80,6 +100,12 @@ const dispatcher: Dispatcher = new RetryAgent(baseAgent, {
|
|||||||
'EHOSTDOWN',
|
'EHOSTDOWN',
|
||||||
'EHOSTUNREACH',
|
'EHOSTUNREACH',
|
||||||
'UND_ERR_SOCKET',
|
'UND_ERR_SOCKET',
|
||||||
|
// Added (NOT in undici's default set): a header timeout fires BEFORE any
|
||||||
|
// response body, so retrying is clean (no partially-consumed stream / Range
|
||||||
|
// problem) — and it is exactly the z.ai stall mode (#140), where a fresh
|
||||||
|
// retry usually succeeds. We deliberately do NOT retry UND_ERR_BODY_TIMEOUT
|
||||||
|
// (mid-body; partial SSE already delivered, not safe to resume).
|
||||||
|
'UND_ERR_HEADERS_TIMEOUT',
|
||||||
'EPIPE',
|
'EPIPE',
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user