revert(ai-http): drop resilient fetch/RetryAgent layer (#140)
The custom undici RetryAgent + aiFetch transport added for issue #140 did not actually heal mid-stream provider drops: undici's retry path is a Range-based download-resume that SSE/chat-completions endpoints cannot satisfy, so a reset after the first byte only swapped ECONNRESET for a "server does not support the range header" error. Its only real effect was reconnecting a poisoned keep-alive socket before the first byte, and PR #141 on top of it turned the 60s headers timeout into deterministic ~61s failures (plus CONTENT_LENGTH_MISMATCH from retrying a POST body after a timeout abort). The root cause is the z.ai coding endpoint, not our transport. Remove the whole layer and return all AI provider calls to Node's default global fetch. - delete integrations/ai/ai-http.ts and its spec - ai.service.ts: drop the aiFetch import, the AI_BYPASS_RESILIENT_FETCH diagnostic toggle, and fetch:aiFetch from every chat/embedding/STT factory; raw STT call back to global fetch - ai-chat.controller.ts: drop the stream-timing START log + startedAt - ai-chat.service.ts: drop the first-chunk/FINISHED/ERROR timing logs - .env.example: drop AI_BYPASS_RESILIENT_FETCH Reverts:1af5d34a,7c308728,b7abb7ea,35fc58ea,d6cd2754,6efb8656. Preserved (not part of the rollback): client-disconnect abort, title generation in onFinish, partial-answer persistence, Safari SSE heartbeat. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -142,9 +142,6 @@ export class AiChatController {
|
||||
|
||||
const body = (req.body ?? {}) as AiChatStreamBody;
|
||||
|
||||
// Diagnostic timing baseline for this turn (see START / terminal logs below).
|
||||
const startedAt = Date.now();
|
||||
|
||||
// Resolve the agent role for this turn BEFORE hijack: existing chats read it
|
||||
// from ai_chats.role_id (authoritative), a new chat from body.roleId. The
|
||||
// role drives both the persona and the optional model override below.
|
||||
@@ -170,7 +167,7 @@ export class AiChatController {
|
||||
// so log it here before aborting the agent loop.
|
||||
if (!res.raw.writableEnded) {
|
||||
this.logger.warn(
|
||||
`AI chat stream: client disconnected before completion after ${Date.now() - startedAt}ms; aborting turn`,
|
||||
'AI chat stream: client disconnected before completion; aborting turn',
|
||||
);
|
||||
controller.abort();
|
||||
}
|
||||
@@ -178,10 +175,6 @@ export class AiChatController {
|
||||
req.raw.once('close', onClose);
|
||||
res.raw.once('finish', () => req.raw.off('close', onClose));
|
||||
|
||||
this.logger.log(
|
||||
`AI chat stream START chat=${body.chatId ?? 'new'} ua="${req.headers['user-agent'] ?? ''}"`,
|
||||
);
|
||||
|
||||
// Commit to streaming: hijack so Fastify stops managing the response and
|
||||
// the AI SDK can write the UI-message stream directly to the Node socket.
|
||||
res.hijack();
|
||||
|
||||
@@ -192,7 +192,6 @@ export class AiChatService {
|
||||
model,
|
||||
role,
|
||||
}: AiChatStreamArgs): Promise<void> {
|
||||
const turnStartedAt = Date.now();
|
||||
// Resolve / create the chat. A new chat is created when no valid chatId is
|
||||
// supplied or the supplied one does not belong to this workspace.
|
||||
let isNewChat = false;
|
||||
@@ -381,10 +380,6 @@ export class AiChatService {
|
||||
const capturedSteps: StepLike[] = [];
|
||||
let inProgressText = '';
|
||||
|
||||
// Log only the FIRST streamed chunk so we can see the provider's observed
|
||||
// time-to-first-token without flooding the log with every delta.
|
||||
let firstChunkLogged = false;
|
||||
|
||||
// NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous
|
||||
// failure here (or in pipe below) would skip the terminal callbacks, so the
|
||||
// catch releases the leased external clients to avoid a connection leak.
|
||||
@@ -409,12 +404,6 @@ export class AiChatService {
|
||||
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
|
||||
abortSignal: signal,
|
||||
onChunk: ({ chunk }) => {
|
||||
if (!firstChunkLogged) {
|
||||
firstChunkLogged = true;
|
||||
this.logger.log(
|
||||
`AI chat stream first chunk (${chunk.type}) chat=${chatId} after ${Date.now() - turnStartedAt}ms`,
|
||||
);
|
||||
}
|
||||
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
|
||||
// types — so this mirrors exactly what streams to the client.
|
||||
if (chunk.type === 'text-delta') inProgressText += chunk.text;
|
||||
@@ -426,9 +415,6 @@ export class AiChatService {
|
||||
inProgressText = '';
|
||||
},
|
||||
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
||||
this.logger.log(
|
||||
`AI chat stream FINISHED chat=${chatId} in ${Date.now() - turnStartedAt}ms, ${steps.length} step(s)`,
|
||||
);
|
||||
await persistAssistant({
|
||||
text,
|
||||
toolCalls: serializeSteps(steps),
|
||||
@@ -474,9 +460,6 @@ export class AiChatService {
|
||||
const e = error as { stack?: string };
|
||||
const errorText = describeProviderError(error, String(error));
|
||||
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
|
||||
this.logger.warn(
|
||||
`AI chat stream ERROR terminal chat=${chatId} after ${Date.now() - turnStartedAt}ms`,
|
||||
);
|
||||
// Persist the PARTIAL answer streamed before the failure (text + any
|
||||
// finished tool steps) WITH the error in metadata, so the turn shows what
|
||||
// the user already saw plus the cause — not just a bare error.
|
||||
@@ -499,8 +482,7 @@ export class AiChatService {
|
||||
// invisible in the logs. Log it (warn) so the abort is traceable.
|
||||
this.logger.warn(
|
||||
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
|
||||
`step(s), ${partialChars} chars partial text; persisting partial turn` +
|
||||
` after ${Date.now() - turnStartedAt}ms`,
|
||||
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
|
||||
);
|
||||
await persistAssistant(
|
||||
buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
|
||||
|
||||
Reference in New Issue
Block a user