revert(ai-http): drop resilient fetch/RetryAgent layer (#140)

The custom undici RetryAgent + aiFetch transport added for issue #140
did not actually heal mid-stream provider drops: undici's retry path is
a Range-based download-resume that SSE/chat-completions endpoints cannot
satisfy, so a reset after the first byte only swapped ECONNRESET for a
"server does not support the range header" error. Its only real effect
was reconnecting a poisoned keep-alive socket before the first byte, and
PR #141 on top of it turned the 60s headers timeout into deterministic
~61s failures (plus CONTENT_LENGTH_MISMATCH from retrying a POST body
after a timeout abort). The root cause is the z.ai coding endpoint, not
our transport.

Remove the whole layer and return all AI provider calls to Node's
default global fetch.

- delete integrations/ai/ai-http.ts and its spec
- ai.service.ts: drop the aiFetch import, the AI_BYPASS_RESILIENT_FETCH
  diagnostic toggle, and fetch:aiFetch from every chat/embedding/STT
  factory; raw STT call back to global fetch
- ai-chat.controller.ts: drop the stream-timing START log + startedAt
- ai-chat.service.ts: drop the first-chunk/FINISHED/ERROR timing logs
- .env.example: drop AI_BYPASS_RESILIENT_FETCH

Reverts: 1af5d34a, 7c308728, b7abb7ea, 35fc58ea, d6cd2754, 6efb8656.
Preserved (not part of the rollback): client-disconnect abort, title
generation in onFinish, partial-answer persistence, Safari SSE heartbeat.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-23 18:48:33 +03:00
parent 0fabaa5bfb
commit 5161de8ba9
6 changed files with 6 additions and 351 deletions

View File

@@ -142,9 +142,6 @@ export class AiChatController {
const body = (req.body ?? {}) as AiChatStreamBody;
// Diagnostic timing baseline for this turn (see START / terminal logs below).
const startedAt = Date.now();
// Resolve the agent role for this turn BEFORE hijack: existing chats read it
// from ai_chats.role_id (authoritative), a new chat from body.roleId. The
// role drives both the persona and the optional model override below.
@@ -170,7 +167,7 @@ export class AiChatController {
// so log it here before aborting the agent loop.
if (!res.raw.writableEnded) {
this.logger.warn(
`AI chat stream: client disconnected before completion after ${Date.now() - startedAt}ms; aborting turn`,
'AI chat stream: client disconnected before completion; aborting turn',
);
controller.abort();
}
@@ -178,10 +175,6 @@ export class AiChatController {
req.raw.once('close', onClose);
res.raw.once('finish', () => req.raw.off('close', onClose));
this.logger.log(
`AI chat stream START chat=${body.chatId ?? 'new'} ua="${req.headers['user-agent'] ?? ''}"`,
);
// Commit to streaming: hijack so Fastify stops managing the response and
// the AI SDK can write the UI-message stream directly to the Node socket.
res.hijack();

View File

@@ -192,7 +192,6 @@ export class AiChatService {
model,
role,
}: AiChatStreamArgs): Promise<void> {
const turnStartedAt = Date.now();
// Resolve / create the chat. A new chat is created when no valid chatId is
// supplied or the supplied one does not belong to this workspace.
let isNewChat = false;
@@ -381,10 +380,6 @@ export class AiChatService {
const capturedSteps: StepLike[] = [];
let inProgressText = '';
// Log only the FIRST streamed chunk so we can see the provider's observed
// time-to-first-token without flooding the log with every delta.
let firstChunkLogged = false;
// NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous
// failure here (or in pipe below) would skip the terminal callbacks, so the
// catch releases the leased external clients to avoid a connection leak.
@@ -409,12 +404,6 @@ export class AiChatService {
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
abortSignal: signal,
onChunk: ({ chunk }) => {
if (!firstChunkLogged) {
firstChunkLogged = true;
this.logger.log(
`AI chat stream first chunk (${chunk.type}) chat=${chatId} after ${Date.now() - turnStartedAt}ms`,
);
}
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
// types — so this mirrors exactly what streams to the client.
if (chunk.type === 'text-delta') inProgressText += chunk.text;
@@ -426,9 +415,6 @@ export class AiChatService {
inProgressText = '';
},
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
this.logger.log(
`AI chat stream FINISHED chat=${chatId} in ${Date.now() - turnStartedAt}ms, ${steps.length} step(s)`,
);
await persistAssistant({
text,
toolCalls: serializeSteps(steps),
@@ -474,9 +460,6 @@ export class AiChatService {
const e = error as { stack?: string };
const errorText = describeProviderError(error, String(error));
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
this.logger.warn(
`AI chat stream ERROR terminal chat=${chatId} after ${Date.now() - turnStartedAt}ms`,
);
// Persist the PARTIAL answer streamed before the failure (text + any
// finished tool steps) WITH the error in metadata, so the turn shows what
// the user already saw plus the cause — not just a bare error.
@@ -499,8 +482,7 @@ export class AiChatService {
// invisible in the logs. Log it (warn) so the abort is traceable.
this.logger.warn(
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
`step(s), ${partialChars} chars partial text; persisting partial turn` +
` after ${Date.now() - turnStartedAt}ms`,
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
);
await persistAssistant(
buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),