chore(ai-chat): add temporary Safari stream-drop diagnostics

Investigate the Safari-only "Lost connection to the AI provider" mid-stream
disconnect (Chrome unaffected). Pure instrumentation, no behavior change:
the 15s heartbeat interval and all stream callbacks are unchanged.

- sse-resilience.ts: startSseHeartbeat() gains an optional onBeat hook fired
  after each successfully written ping (beat counter).
- ai-chat.service.ts: track stream start, first-chunk latency, model-silent
  gap and heartbeat count; log them on finish/error/abort to classify the
  drop (idle-gap vs hard wall-clock cap vs slow first chunk).
- ai-chat.controller.ts: append elapsed-since-request to the disconnect warn.

All blocks tagged "DIAGNOSTIC ... temporary" for easy removal once the Safari
failure mode is identified.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-24 15:14:29 +03:00
parent 6566d2153c
commit 13cac155c1
3 changed files with 59 additions and 2 deletions

View File

@@ -159,6 +159,9 @@ export class AiChatController {
// we also drop it on response `finish` so it never lingers after the stream
// completes normally (the AI SDK pipes the response fire-and-forget, so we
// cannot simply remove it once `stream()` returns).
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: wall-clock at
// which a Safari disconnect is observed, measured from request receipt.
const reqStartedAt = Date.now();
const controller = new AbortController();
const onClose = (): void => {
// A genuine disconnect leaves the response unfinished (unlike a normal
@@ -167,7 +170,8 @@ export class AiChatController {
// so log it here before aborting the agent loop.
if (!res.raw.writableEnded) {
this.logger.warn(
'AI chat stream: client disconnected before completion; aborting turn',
`AI chat stream: client disconnected before completion; aborting turn ` +
`(elapsed=${Date.now() - reqStartedAt}ms since request received)`,
);
controller.abort();
}

View File

@@ -380,6 +380,15 @@ export class AiChatService {
const capturedSteps: StepLike[] = [];
let inProgressText = '';
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
// first-chunk latency, the model-silent gap right before a disconnect, and
// how many SSE heartbeats were written, so a Safari drop can be classified
// (idle-gap vs hard wall-clock cap vs slow first chunk).
const streamStartedAt = Date.now();
let firstModelChunkAt: number | undefined;
let lastModelChunkAt = streamStartedAt;
let heartbeatsSent = 0;
// NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous
// failure here (or in pipe below) would skip the terminal callbacks, so the
// catch releases the leased external clients to avoid a connection leak.
@@ -404,6 +413,12 @@ export class AiChatService {
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
abortSignal: signal,
onChunk: ({ chunk }) => {
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
// output chunk means the stream is actively emitting bytes; track first
// + most-recent activity timestamps.
const now = Date.now();
firstModelChunkAt ??= now;
lastModelChunkAt = now;
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
// types — so this mirrors exactly what streams to the client.
if (chunk.type === 'text-delta') inProgressText += chunk.text;
@@ -415,6 +430,14 @@ export class AiChatService {
inProgressText = '';
},
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
// baseline for Safari comparison.
const diagNow = Date.now();
this.logger.log(
`AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
`heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
);
await persistAssistant({
text,
toolCalls: serializeSteps(steps),
@@ -464,6 +487,14 @@ export class AiChatService {
const e = error as { stack?: string };
const errorText = describeProviderError(error, String(error));
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
// an error-terminated stream.
const diagNow = Date.now();
this.logger.warn(
`AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
);
// Persist the PARTIAL answer streamed before the failure (text + any
// finished tool steps) WITH the error in metadata, so the turn shows what
// the user already saw plus the cause — not just a bare error.
@@ -488,6 +519,15 @@ export class AiChatService {
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
);
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
// line — classifies the Safari drop.
const diagNow = Date.now();
this.logger.warn(
`AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
`steps=${steps.length}`,
);
await persistAssistant(
buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
);
@@ -566,7 +606,11 @@ export class AiChatService {
// headers are sent, and is guarded for response-likes that lack it.
res.raw.flushHeaders?.();
// Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout).
startSseHeartbeat(res.raw);
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: count beats so a disconnect log can show
// how many pings were written before Safari dropped.
startSseHeartbeat(res.raw, 15_000, () => {
heartbeatsSent += 1;
});
} catch (err) {
// Synchronous failure before/while wiring the stream: the terminal
// callbacks will not run, so release the leased external clients here and

View File

@@ -28,15 +28,24 @@ import type { ServerResponse } from 'node:http';
* the response finishes or the socket closes. The interval is unref()'d so it
* never keeps the process alive, and writes are guarded so we never write to an
* already-ended/destroyed socket.
*
* `onBeat` is an OPTIONAL diagnostic hook invoked once after each heartbeat that
* was actually written (only when the write did not throw). It is purely for
* telemetry/counters and never affects the heartbeat behavior.
*/
export function startSseHeartbeat(
res: ServerResponse,
intervalMs = 15_000,
onBeat?: () => void,
): () => void {
const timer = setInterval(() => {
if (res.writableEnded || res.destroyed) return;
try {
res.write(': ping\n\n');
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Notify the
// optional hook only after a successful write, so beat counters reflect
// pings that actually reached the socket.
onBeat?.();
} catch {
// Socket vanished between the guard and the write; nothing to do.
}