From fe1bbbe8065b2a9554ccf615a5c1808657504751 Mon Sep 17 00:00:00 2001
From: claude code agent 227 <claude_code@vvzvlad.xyz>
Date: Wed, 24 Jun 2026 22:18:15 +0300
Subject: [PATCH] feat(ai-chat): surface reasoning from openai-compatible
 providers (z.ai/GLM) (#175)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The agent's chain-of-thought was never shown: glm-5.2 (and other openai-
compatible providers — DeepSeek, etc.) stream their thinking as
`reasoning_content` deltas, but the official @ai-sdk/openai provider does NOT map
that field (verified: 0 occurrences in the package), so our
`createOpenAI(...).chat()` silently dropped it. The model "thinks" server-side
and only the final answer streamed — which also made the connection look idle
during a long reasoning phase.

Fix: for the `openai` driver with a CUSTOM baseURL (an openai-compatible
third-party endpoint), build the model with @ai-sdk/openai-compatible instead.
It maps the streamed `reasoning_content` to reasoning parts (confirmed live: the
stream now carries reasoning-start/delta/end), which the client already renders,
and it targets Chat Completions (the portable endpoint these gateways accept on
multi-turn history). Real OpenAI (no baseURL) keeps the official provider.

Verified on the stand against z.ai glm-5.2: reasoning parts now stream; MCP tool
calls (searxng/crawl4ai), the multi-step agent loop, and a normal finish all
still work.

Tests: ai.service.spec asserts the provider switch (custom baseURL ->
openai-compatible; no baseURL -> openai.chat). AI/mcp specs green. server tsc
clean.

Note: complementary to the long-turn timeout fix (#175 / PR fix/ai-stream-undici-
timeout) — they touch the same openai case and compose at merge.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../src/integrations/ai/ai.service.spec.ts    | 40 +++++++++++++++++++
 apps/server/src/integrations/ai/ai.service.ts | 31 +++++++++-----
 2 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/apps/server/src/integrations/ai/ai.service.spec.ts b/apps/server/src/integrations/ai/ai.service.spec.ts
index ef44a59d..ae9d3770 100644
--- a/apps/server/src/integrations/ai/ai.service.spec.ts
+++ b/apps/server/src/integrations/ai/ai.service.spec.ts
@@ -285,3 +285,43 @@ describe('AiService.getChatModel role model override', () => {
     );
   });
 });
+
+/**
+ * Provider selection for the `openai` driver (reasoning surfacing). A custom
+ * baseURL means an openai-COMPATIBLE third-party endpoint (z.ai/GLM, DeepSeek,
+ * ...): we must use @ai-sdk/openai-compatible, which maps the streamed
+ * `reasoning_content` to reasoning parts (the official @ai-sdk/openai provider
+ * drops it). Real OpenAI (no baseURL) keeps the official provider. We assert via
+ * the built model's `.provider` tag.
+ */
+describe('AiService.getChatModel openai provider selection', () => {
+  function serviceWith(baseUrl: string | undefined) {
+    const aiSettings = {
+      resolve: jest.fn().mockResolvedValue({
+        driver: 'openai',
+        chatModel: 'glm-5.2',
+        apiKey: 'key',
+        baseUrl,
+      }),
+    };
+    return new AiService(
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      aiSettings as any,
+      { find: jest.fn() } as any,
+      { decryptSecret: jest.fn() } as any,
+    );
+  }
+
+  it('uses the openai-compatible provider when a custom baseURL is set', async () => {
+    const model = await serviceWith('https://api.z.ai/api/coding/paas/v4').getChatModel(
+      'ws-1',
+    );
+    // openai-compatible surfaces reasoning_content; tagged "openai-compatible.*".
+    expect((model as { provider: string }).provider).toContain('openai-compatible');
+  });
+
+  it('uses the official openai provider when there is no baseURL (real OpenAI)', async () => {
+    const model = await serviceWith(undefined).getChatModel('ws-1');
+    expect((model as { provider: string }).provider).toBe('openai.chat');
+  });
+});
diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts
index 4f72d23b..46134aed 100644
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -7,6 +7,7 @@ import {
   type LanguageModel,
 } from 'ai';
 import { createOpenAI } from '@ai-sdk/openai';
+import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
 import { createGoogleGenerativeAI } from '@ai-sdk/google';
 import { createOllama } from 'ai-sdk-ollama';
 import { AiSettingsService } from './ai-settings.service';
@@ -143,17 +144,29 @@ export class AiService {
 
     switch (driver) {
       case 'openai':
-        // baseURL (when set) covers openai-compatible endpoints. Use Chat
-        // Completions (/chat/completions) — the portable OpenAI-compatible
-        // endpoint. The default callable createOpenAI(...)(model) targets the
-        // Responses API (/responses), which OpenAI-compatible gateways
-        // (OpenRouter, etc.) reject on multi-turn requests (history with
-        // assistant messages) → 400.
-        // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: pass the
-        // passive instrumented fetch (logging only; no behavior change).
+        // A custom baseURL means an openai-COMPATIBLE third-party endpoint
+        // (z.ai / GLM, DeepSeek, OpenRouter, ...). Use @ai-sdk/openai-compatible
+        // there: unlike the official @ai-sdk/openai provider, it maps the
+        // provider's streamed `reasoning_content` to reasoning parts, so the
+        // agent's chain-of-thought is surfaced to the UI (and the model is not
+        // silent during a long server-side "thinking" phase). It also targets
+        // Chat Completions (/chat/completions), the portable endpoint that
+        // OpenAI-compatible gateways accept on multi-turn history (the official
+        // provider's default callable targets /responses, which they 400).
+        if (baseUrl) {
+          return createOpenAICompatible({
+            name: 'openai-compatible',
+            apiKey,
+            baseURL: baseUrl,
+            // Passive ECONNRESET telemetry; on the chat path it also carries the
+            // streaming fetch (disabled long-turn timeouts) once #175 lands.
+            fetch: this.aiDiagnosticFetch,
+          })(chatModel);
+        }
+        // Real OpenAI (no custom baseURL): keep the official provider, on Chat
+        // Completions to preserve multi-turn compatibility.
         return createOpenAI({
           apiKey,
-          baseURL: baseUrl,
           fetch: this.aiDiagnosticFetch,
         }).chat(chatModel);
       case 'gemini':