From 59190148db2aa6c4e0b43f35b1c3ab00ecb44d38 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 22:58:15 +0300 Subject: [PATCH] feat(ai-chat): explicit chatApiStyle selector to surface reasoning (#175) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebuilt on develop (after #176) and reworked per review: instead of inferring the provider from baseUrl (`if (baseUrl)`), the admin picks the chat provider EXPLICITLY via a new `chatApiStyle` ('openai-compatible' | 'openai'), mirroring the existing sttApiStyle. A custom baseURL can front real OpenAI too, so the heuristic was fragile. Why reasoning was missing: glm-5.2 (and DeepSeek etc.) stream their thinking as `reasoning_content`, but the official @ai-sdk/openai provider does not map that field. 'openai-compatible' uses @ai-sdk/openai-compatible, which does — so reasoning parts now stream (verified live: reasoning-start/delta/end appear, and disappear when set to 'openai'). - Default (unset) = 'openai-compatible', so existing openai+baseUrl workspaces surface reasoning with no admin action. No DB migration (field lives in the settings.ai.provider JSON blob). - includeUsage: true on the openai-compatible model — without it the provider omits streamed usage, zeroing the live token counter / reasoning-token metadata. The official provider always sent it; this keeps parity. (Confirmed live: usage.totalTokens present.) - openai-compatible has no default endpoint, so with no baseURL (real OpenAI, or a role's cross-driver override that cleared it) it falls back to the official provider. Plumbing: ai.types (ChatApiStyle / CHAT_API_STYLES + AiProviderSettings / MaskedAiSettings), update DTO (@IsIn), ai-settings.service (resolve / getMasked / update allowlist), workspace.repo updateAiProviderSettings ALLOWED (the second, SQL-level allowlist the review missed — without it the field never persisted), ai.service selector. Client: ai-settings-service types + a Protocol + {/* Anonymous public-share assistant: a single master toggle + an optional cheaper model id. Reuses this card's driver/URL/key. */} diff --git a/apps/client/src/features/workspace/services/ai-settings-service.ts b/apps/client/src/features/workspace/services/ai-settings-service.ts index 1814acd5..189589b0 100644 --- a/apps/client/src/features/workspace/services/ai-settings-service.ts +++ b/apps/client/src/features/workspace/services/ai-settings-service.ts @@ -9,6 +9,12 @@ export type AiDriver = "openai" | "gemini" | "ollama"; // - 'json' -> JSON body with base64-encoded audio (OpenRouter) export type SttApiStyle = "multipart" | "json"; +// Chat provider implementation for the `openai` driver (chosen explicitly): +// - 'openai-compatible' -> maps streamed reasoning_content to reasoning parts +// (z.ai/GLM, DeepSeek, OpenRouter, ...). Default. +// - 'openai' -> official provider; real-OpenAI reasoning-model shaping. +export type ChatApiStyle = "openai-compatible" | "openai"; + // Masked AI provider settings returned by the server. // No API key is ever returned; only `hasApiKey` / `hasEmbeddingApiKey` indicate // whether one is stored. `embeddingBaseUrl` is the RAW stored value (empty means @@ -16,6 +22,7 @@ export type SttApiStyle = "multipart" | "json"; export interface IAiSettings { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; // Cheap model id for the anonymous public-share assistant; empty = chatModel. publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = @@ -49,6 +56,7 @@ export interface IAiSettings { export interface IAiSettingsUpdate { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = // built-in locked persona. diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 182a45f2..95e33aa9 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -239,7 +239,7 @@ export class WorkspaceRepo { // is a real jsonb object, never a double-encoded string. The CASE self-heals // workspaces whose settings.ai.provider was previously corrupted into an // array/string. - const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId']; + const ALLOWED = ['driver', 'chatModel', 'chatApiStyle', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId']; const entries = Object.entries(provider).filter( ([k, v]) => v !== undefined && ALLOWED.includes(k), ); diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts index e556c0d0..0717c3c4 100644 --- a/apps/server/src/integrations/ai/ai-settings.service.ts +++ b/apps/server/src/integrations/ai/ai-settings.service.ts @@ -14,6 +14,7 @@ import { MaskedAiSettings, ResolvedAiConfig, SttApiStyle, + ChatApiStyle, } from './ai.types'; /** @@ -24,6 +25,7 @@ import { export interface UpdateAiSettingsInput { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; @@ -157,6 +159,8 @@ export class AiSettingsService { const config: ResolvedAiConfig = { driver: provider.driver, chatModel: provider.chatModel, + // Plain passthrough; getChatModel defaults unset to 'openai-compatible'. + chatApiStyle: provider.chatApiStyle, // Cheap model id for the anonymous public-share assistant; reuses the chat // driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel. publicShareChatModel: provider.publicShareChatModel, @@ -238,6 +242,7 @@ export class AiSettingsService { return { driver: provider.driver, chatModel: provider.chatModel, + chatApiStyle: provider.chatApiStyle, embeddingModel: provider.embeddingModel, baseUrl: provider.baseUrl, embeddingBaseUrl: provider.embeddingBaseUrl, @@ -278,6 +283,7 @@ export class AiSettingsService { for (const key of [ 'driver', 'chatModel', + 'chatApiStyle', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', diff --git a/apps/server/src/integrations/ai/ai.service.spec.ts b/apps/server/src/integrations/ai/ai.service.spec.ts index ef44a59d..b3c7f6f0 100644 --- a/apps/server/src/integrations/ai/ai.service.spec.ts +++ b/apps/server/src/integrations/ai/ai.service.spec.ts @@ -285,3 +285,64 @@ describe('AiService.getChatModel role model override', () => { ); }); }); + +/** + * Chat provider selection by the EXPLICIT `chatApiStyle` (NOT inferred from + * baseUrl): 'openai-compatible' (default) uses @ai-sdk/openai-compatible, which + * maps streamed reasoning_content to reasoning parts; 'openai' uses the official + * provider; and openai-compatible without a baseURL safely falls back to the + * official provider (it has no default endpoint). Asserted via `.provider`. + */ +describe('AiService.getChatModel chatApiStyle provider selection', () => { + function serviceWith(opts: { + baseUrl?: string; + chatApiStyle?: 'openai-compatible' | 'openai'; + }) { + const aiSettings = { + resolve: jest.fn().mockResolvedValue({ + driver: 'openai', + chatModel: 'glm-5.2', + apiKey: 'key', + baseUrl: opts.baseUrl, + chatApiStyle: opts.chatApiStyle, + }), + }; + return new AiService( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + aiSettings as any, + { find: jest.fn() } as never, + { decryptSecret: jest.fn() } as never, + ); + } + + const providerOf = async (svc: AiService) => + ( + (await svc.getChatModel('ws-1')) as { provider: string } + ).provider; + + it("'openai-compatible' + baseURL -> openai-compatible provider", async () => { + expect( + await providerOf( + serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai-compatible' }), + ), + ).toContain('openai-compatible'); + }); + + it("'openai' + baseURL -> official openai provider", async () => { + expect( + await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai' })), + ).toBe('openai.chat'); + }); + + it('unset + baseURL -> defaults to openai-compatible', async () => { + expect( + await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4' })), + ).toContain('openai-compatible'); + }); + + it("'openai-compatible' WITHOUT baseURL -> safe fallback to official openai", async () => { + expect( + await providerOf(serviceWith({ chatApiStyle: 'openai-compatible' })), + ).toBe('openai.chat'); + }); +}); diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 2a524f2c..18f15b5d 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -7,6 +7,7 @@ import { type LanguageModel, } from 'ai'; import { createOpenAI } from '@ai-sdk/openai'; +import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import { createGoogleGenerativeAI } from '@ai-sdk/google'; import { createOllama } from 'ai-sdk-ollama'; import { AiSettingsService } from './ai-settings.service'; @@ -95,6 +96,10 @@ export class AiService { let apiKey = cfg.apiKey; let baseUrl = cfg.baseUrl; + // Chat provider implementation, chosen EXPLICITLY by the admin (not inferred + // from baseUrl). Unset → 'openai-compatible' so reasoning is surfaced by + // default for this fork's openai+baseUrl setups. + const chatApiStyle = cfg.chatApiStyle ?? 'openai-compatible'; // A driver override that differs from the workspace driver needs that // driver's own creds (the workspace driver's key would be wrong/absent). @@ -145,19 +150,41 @@ export class AiService { } switch (driver) { - case 'openai': - // baseURL (when set) covers openai-compatible endpoints. Use Chat - // Completions (/chat/completions) — the portable OpenAI-compatible - // endpoint. The default callable createOpenAI(...)(model) targets the - // Responses API (/responses), which OpenAI-compatible gateways - // (OpenRouter, etc.) reject on multi-turn requests (history with - // assistant messages) → 400. The provider fetch is the instrumented - // streaming fetch (finite-but-generous stream timeouts, #175). + case 'openai': { + // The provider implementation is chosen by the admin's `chatApiStyle` + // (NOT inferred from baseUrl — a custom URL can front real OpenAI too). + // Both branches hit Chat Completions (/chat/completions); the provider + // fetch is the instrumented streaming fetch (finite-but-generous stream + // timeouts, #175). + // + // 'openai-compatible' (default) maps the third-party provider's streamed + // `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, ...) — the + // point of #175. It has no default endpoint, so it requires a baseURL; + // when there is none (real OpenAI, or a role's cross-driver override that + // cleared baseUrl) we fall back to the official provider. + if (chatApiStyle === 'openai-compatible' && baseUrl) { + return createOpenAICompatible({ + name: 'openai-compatible', + apiKey, + baseURL: baseUrl, + // Keep streamed token usage (stream_options.include_usage): without + // it @ai-sdk/openai-compatible omits usage, zeroing the live token + // counter and reasoning-token metadata. The official provider always + // sent it, so this preserves parity. + includeUsage: true, + fetch: this.aiProviderFetch, + })(chatModel); + } + // Official @ai-sdk/openai: real-OpenAI reasoning-model request shaping; + // `.chat()` targets Chat Completions (the default callable targets the + // Responses API, which openai-compatible gateways 400 on multi-turn + // history). In this fork baseUrl is normally set; undefined = real OpenAI. return createOpenAI({ apiKey, baseURL: baseUrl, fetch: this.aiProviderFetch, }).chat(chatModel); + } case 'gemini': return createGoogleGenerativeAI({ apiKey })(chatModel); case 'ollama': diff --git a/apps/server/src/integrations/ai/ai.types.ts b/apps/server/src/integrations/ai/ai.types.ts index 0a3d925e..5cdb6d1d 100644 --- a/apps/server/src/integrations/ai/ai.types.ts +++ b/apps/server/src/integrations/ai/ai.types.ts @@ -16,6 +16,15 @@ export const AI_DRIVERS: AiDriver[] = ['openai', 'gemini', 'ollama']; export type SttApiStyle = 'multipart' | 'json'; export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json']; +// Chat provider implementation for the `openai` driver. Chosen explicitly by the +// admin (NOT inferred from baseUrl — a custom URL can front real OpenAI too). +// 'openai-compatible' = @ai-sdk/openai-compatible: maps streamed +// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, OpenRouter, ...). +// 'openai' = official @ai-sdk/openai: real-OpenAI reasoning-model request shaping +// (max_completion_tokens, the 'developer' role), no third-party reasoning map. +export type ChatApiStyle = 'openai-compatible' | 'openai'; +export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai']; + /** * Non-secret provider settings persisted under `settings.ai.provider`. * The API key is intentionally absent here. @@ -23,6 +32,9 @@ export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json']; export interface AiProviderSettings { driver: AiDriver; chatModel: string; + // Chat provider implementation for the `openai` driver. Unset → defaults to + // 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle. + chatApiStyle?: ChatApiStyle; embeddingModel?: string; baseUrl?: string; // Embedding-specific base URL. Falls back to `baseUrl` when empty/unset. @@ -76,6 +88,7 @@ export interface ResolvedAiConfig extends Partial { export interface MaskedAiSettings { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; diff --git a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts index 37fe8143..53aa8220 100644 --- a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts +++ b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts @@ -1,5 +1,12 @@ import { IsIn, IsOptional, IsString } from 'class-validator'; -import { AI_DRIVERS, AiDriver, STT_API_STYLES, SttApiStyle } from '../ai.types'; +import { + AI_DRIVERS, + AiDriver, + CHAT_API_STYLES, + ChatApiStyle, + STT_API_STYLES, + SttApiStyle, +} from '../ai.types'; /** * Admin update payload for the workspace AI provider settings. @@ -18,6 +25,10 @@ export class UpdateAiSettingsDto { @IsString() chatModel?: string; + @IsOptional() + @IsIn(CHAT_API_STYLES) + chatApiStyle?: ChatApiStyle; + @IsOptional() @IsString() embeddingModel?: string;