From fd66ee6cceee99aefc02062f7b88a3817e1569be Mon Sep 17 00:00:00 2001
From: claude_code <claude_code@vvzvlad.xyz>
Date: Tue, 23 Jun 2026 02:41:14 +0300
Subject: [PATCH] fix(ai-chat): stop title generation racing the chat stream
 (provider stall)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A new-chat turn fired the chat stream (streamText) and title generation
(generateText) concurrently to the same z.ai coding endpoint. That plan
stalls one of two concurrent requests, so the chat stream black-holed for
~300s (undici headers timeout) and the turn hung forever in every browser;
the AI SDK then retried 3x. Server logs showed two concurrent POSTs to
/chat/completions per turn — one 200 in ~8s, the other "fetch failed after
301209ms". Bypassing the custom undici transport did not help, confirming
the cause is the concurrency, not the transport.

Move generateTitle from before the response pipe into onFinish, so it runs
solo AFTER the stream's provider call completes. A first turn that errors or
aborts no longer auto-titles (fallback "Untitled chat" already handles a
null title) — acceptable, and it removes the request that was stalling.
---
 .../src/core/ai-chat/ai-chat.service.ts       | 28 +++++++++++--------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts
index c74b9945..e4932cc2 100644
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -449,6 +449,22 @@ export class AiChatService {
         });
         // Lifecycle: release the external MCP clients leased for this turn.
         await closeExternalClients();
+
+        // Generate the chat title for a freshly created chat AFTER the stream's
+        // provider call has completed — NOT concurrently with it. The z.ai coding
+        // endpoint stalls one of two concurrent requests to the same plan, which
+        // black-holed the chat stream (~300s headers timeout) when title
+        // generation raced it. Running it here (solo, fire-and-forget) avoids the
+        // race; never block the turn on it, swallow any error.
+        if (isNewChat && incomingText) {
+          void this.generateTitle(chatId, workspace.id, incomingText).catch(
+            (err) => {
+              this.logger.warn(
+                `Title generation failed: ${(err as Error)?.message ?? err}`,
+              );
+            },
+          );
+        }
       },
       onError: async ({ error }) => {
         // NestJS Logger.error(message, stack?, context?): pass the real message
@@ -493,18 +509,6 @@ export class AiChatService {
       },
       });
 
-      // Fire-and-forget async title generation for a freshly created chat. Never
-      // block the stream on it; swallow any error.
-      if (isNewChat && incomingText) {
-        void this.generateTitle(chatId, workspace.id, incomingText).catch(
-          (err) => {
-            this.logger.warn(
-              `Title generation failed: ${(err as Error)?.message ?? err}`,
-            );
-          },
-        );
-      }
-
       // Stream the UI-message protocol straight to the hijacked Node response.
       // Without onError the AI SDK masks the cause ('An error occurred.') and the
       // UI shows a generic failure. Surface the real provider message instead.