feat(ai-chat): surface reasoning from openai-compatible providers (z.ai/GLM) (#175) #177

Merged
Ghost merged 2 commits from feat/reasoning-openai-compatible into develop 2026-06-24 23:19:15 +03:00
13 changed files with 337 additions and 27 deletions

View File

@@ -25,9 +25,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
flagging dangling references, empty or duplicate definitions, and `[^id]`
markers inside table rows, so an agent can fix its own markup. The page is
still created; the field is omitted when there are no problems. (#166)
- **AI chat "Protocol" setting (`chatApiStyle`).** A new admin choice in AI
settings for the `openai` driver: `openai-compatible` (default) routes chat
through `@ai-sdk/openai-compatible`, which surfaces a provider's streamed
reasoning (`reasoning_content` → reasoning parts) for z.ai/GLM, DeepSeek,
OpenRouter, etc.; `openai` uses the official provider (real-OpenAI
reasoning-model request shaping). Chosen explicitly rather than inferred from
the base URL, since a custom URL can front real OpenAI too. (#175, #177)
### Changed
- **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
For the `openai` driver the chat provider defaults to the openai-compatible
implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
model's reasoning out of the box. An endpoint that is real OpenAI behind a
custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177)
- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the
same id are ONE footnote — one number, one definition, several back-references
— instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are

View File

@@ -1307,5 +1307,9 @@
"Page tree (child pages, recursive)": "Page tree (child pages, recursive)",
"Render the full nested tree of all descendant pages": "Render the full nested tree of all descendant pages",
"Showing {{count}} subpages_one": "Showing {{count}} subpage",
"Showing {{count}} subpages_other": "Showing {{count}} subpages"
"Showing {{count}} subpages_other": "Showing {{count}} subpages",
"Protocol": "Protocol",
"How chat requests are sent and how reasoning is surfaced": "How chat requests are sent and how reasoning is surfaced",
"OpenAI-compatible (surfaces reasoning)": "OpenAI-compatible (surfaces reasoning)",
"OpenAI (official)": "OpenAI (official)"
}

View File

@@ -1160,5 +1160,9 @@
"Render the full nested tree of all descendant pages": "Показать полное вложенное дерево всех дочерних страниц",
"Showing {{count}} subpages_one": "Показано {{count}} подстраница",
"Showing {{count}} subpages_few": "Показано {{count}} подстраницы",
"Showing {{count}} subpages_many": "Показано {{count}} подстраниц"
"Showing {{count}} subpages_many": "Показано {{count}} подстраниц",
"Protocol": "Протокол",
"How chat requests are sent and how reasoning is surfaced": "Как отправляются запросы чата и как показывается reasoning",
"OpenAI-compatible (surfaces reasoning)": "OpenAI-совместимый (показывает reasoning)",
"OpenAI (official)": "OpenAI (официальный)"
}

View File

@@ -38,6 +38,7 @@ import {
AiTestCapability,
IAiSettingsUpdate,
SttApiStyle,
ChatApiStyle,
} from "@/features/workspace/services/ai-settings-service.ts";
import { useAiRolesQuery } from "@/features/ai-chat/queries/ai-chat-query.ts";
import { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
@@ -82,6 +83,8 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
// (empty means "leave unchanged" unless explicitly cleared).
const formSchema = z.object({
chatModel: z.string(),
// Chat provider implementation (reasoning surfacing). Default openai-compatible.
chatApiStyle: z.enum(["openai-compatible", "openai"]),
// Cheap model id for the anonymous public-share assistant; empty = use chatModel.
publicShareChatModel: z.string(),
// Agent-role id whose persona the public-share assistant adopts; empty =
@@ -308,6 +311,7 @@ export default function AiProviderSettings() {
validate: zod4Resolver(formSchema),
initialValues: {
chatModel: "",
chatApiStyle: "openai-compatible" as ChatApiStyle,
publicShareChatModel: "",
publicShareAssistantRoleId: "",
embeddingModel: "",
@@ -330,6 +334,7 @@ export default function AiProviderSettings() {
if (!settings) return;
form.setValues({
chatModel: settings.chatModel ?? "",
chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
publicShareChatModel: settings.publicShareChatModel ?? "",
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
embeddingModel: settings.embeddingModel ?? "",
@@ -359,6 +364,7 @@ export default function AiProviderSettings() {
// Everything is OpenAI-compatible.
driver: "openai",
chatModel: values.chatModel,
chatApiStyle: values.chatApiStyle,
// Cheap model id for the anonymous public-share assistant; empty falls
// back to chatModel server-side.
publicShareChatModel: values.publicShareChatModel,
@@ -761,6 +767,24 @@ export default function AiProviderSettings() {
{t("Resolves to {{url}}", { url: chatResolved })}
</Text>
<Select
mt="sm"
label={t("Protocol")}
description={t(
"How chat requests are sent and how reasoning is surfaced",
)}
data={[
{
value: "openai-compatible",
label: t("OpenAI-compatible (surfaces reasoning)"),
},
{ value: "openai", label: t("OpenAI (official)") },
]}
allowDeselect={false}
disabled={isLoading}
{...form.getInputProps("chatApiStyle")}
/>
{/* Anonymous public-share assistant: a single master toggle + an
optional cheaper model id. Reuses this card's driver/URL/key. */}
<Group justify="space-between" align="center" wrap="nowrap" mt="md">

View File

@@ -9,6 +9,12 @@ export type AiDriver = "openai" | "gemini" | "ollama";
// - 'json' -> JSON body with base64-encoded audio (OpenRouter)
export type SttApiStyle = "multipart" | "json";
// Chat provider implementation for the `openai` driver (chosen explicitly):
// - 'openai-compatible' -> maps streamed reasoning_content to reasoning parts
// (z.ai/GLM, DeepSeek, OpenRouter, ...). Default.
// - 'openai' -> official provider; real-OpenAI reasoning-model shaping.
export type ChatApiStyle = "openai-compatible" | "openai";
// Masked AI provider settings returned by the server.
// No API key is ever returned; only `hasApiKey` / `hasEmbeddingApiKey` indicate
// whether one is stored. `embeddingBaseUrl` is the RAW stored value (empty means
@@ -16,6 +22,7 @@ export type SttApiStyle = "multipart" | "json";
export interface IAiSettings {
driver?: AiDriver;
chatModel?: string;
chatApiStyle?: ChatApiStyle;
// Cheap model id for the anonymous public-share assistant; empty = chatModel.
publicShareChatModel?: string;
// Agent-role id whose persona the public-share assistant adopts; empty =
@@ -49,6 +56,7 @@ export interface IAiSettings {
export interface IAiSettingsUpdate {
driver?: AiDriver;
chatModel?: string;
chatApiStyle?: ChatApiStyle;
publicShareChatModel?: string;
// Agent-role id whose persona the public-share assistant adopts; empty =
// built-in locked persona.

View File

@@ -10,6 +10,29 @@ import {
import { ExpressionBuilder, sql } from 'kysely';
import { DB, Workspaces } from '@docmost/db/types/db';
/**
* Writable `settings.ai.provider` keys, enforced at this generic SQL layer. This
* repo cannot import AI-feature types, so this list is its own copy; a parity
* test (ai-provider-settings-keys.spec.ts) asserts it equals
* PROVIDER_SETTINGS_KEYS in ai.types so a future drift fails in CI rather than
* silently dropping a field at this boundary.
*/
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
'driver',
'chatModel',
'chatApiStyle',
'embeddingModel',
'baseUrl',
'embeddingBaseUrl',
'sttModel',
'sttBaseUrl',
'sttApiStyle',
'sttLanguage',
'systemPrompt',
'publicShareChatModel',
'publicShareAssistantRoleId',
];
@Injectable()
export class WorkspaceRepo {
public baseFields: Array<keyof Workspaces> = [
@@ -239,9 +262,8 @@ export class WorkspaceRepo {
// is a real jsonb object, never a double-encoded string. The CASE self-heals
// workspaces whose settings.ai.provider was previously corrupted into an
// array/string.
const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId'];
const entries = Object.entries(provider).filter(
([k, v]) => v !== undefined && ALLOWED.includes(k),
([k, v]) => v !== undefined && AI_PROVIDER_SETTINGS_ALLOWED.includes(k),
);
const patch = entries.length
? sql`jsonb_build_object(${sql.join(

View File

@@ -0,0 +1,43 @@
import { validate } from 'class-validator';
import { plainToInstance } from 'class-transformer';
import { PROVIDER_SETTINGS_KEYS } from './ai.types';
import { AI_PROVIDER_SETTINGS_ALLOWED } from '@docmost/db/repos/workspace/workspace.repo';
import { UpdateAiSettingsDto } from './dto/update-ai-settings.dto';
/**
* Drift guard: the writable provider-settings keys are maintained in two layers
* that TypeScript cannot cross-check — PROVIDER_SETTINGS_KEYS (ai.types, used by
* the settings service) and AI_PROVIDER_SETTINGS_ALLOWED (the generic workspace
* repo's SQL boundary). A key missing from the repo copy silently drops the field
* on persist (exactly what happened to chatApiStyle), so this asserts they match.
*/
describe('provider-settings key allowlist parity', () => {
it('the repo SQL allowlist equals PROVIDER_SETTINGS_KEYS', () => {
expect([...AI_PROVIDER_SETTINGS_ALLOWED].sort()).toEqual(
[...PROVIDER_SETTINGS_KEYS].sort(),
);
});
});
/** DTO validation for the new chatApiStyle field (@IsIn(CHAT_API_STYLES)). */
describe('UpdateAiSettingsDto.chatApiStyle', () => {
const errorsFor = async (chatApiStyle: unknown) =>
validate(plainToInstance(UpdateAiSettingsDto, { chatApiStyle }));
it('accepts both valid values', async () => {
for (const v of ['openai-compatible', 'openai']) {
const errs = await errorsFor(v);
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
}
});
it('rejects an unknown value', async () => {
const errs = await errorsFor('definitely-not-a-style');
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeDefined();
});
it('accepts the field being omitted (optional)', async () => {
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
});
});

View File

@@ -14,6 +14,8 @@ import {
MaskedAiSettings,
ResolvedAiConfig,
SttApiStyle,
ChatApiStyle,
PROVIDER_SETTINGS_KEYS,
} from './ai.types';
/**
@@ -24,6 +26,7 @@ import {
export interface UpdateAiSettingsInput {
driver?: AiDriver;
chatModel?: string;
chatApiStyle?: ChatApiStyle;
embeddingModel?: string;
baseUrl?: string;
embeddingBaseUrl?: string;
@@ -157,6 +160,8 @@ export class AiSettingsService {
const config: ResolvedAiConfig = {
driver: provider.driver,
chatModel: provider.chatModel,
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
chatApiStyle: provider.chatApiStyle,
// Cheap model id for the anonymous public-share assistant; reuses the chat
// driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel.
publicShareChatModel: provider.publicShareChatModel,
@@ -238,6 +243,7 @@ export class AiSettingsService {
return {
driver: provider.driver,
chatModel: provider.chatModel,
chatApiStyle: provider.chatApiStyle,
embeddingModel: provider.embeddingModel,
baseUrl: provider.baseUrl,
embeddingBaseUrl: provider.embeddingBaseUrl,
@@ -275,20 +281,8 @@ export class AiSettingsService {
// Persist non-secret provider fields (only those present in the partial).
const providerPatch: Partial<AiProviderSettings> = {};
for (const key of [
'driver',
'chatModel',
'embeddingModel',
'baseUrl',
'embeddingBaseUrl',
'sttModel',
'sttBaseUrl',
'sttApiStyle',
'sttLanguage',
'systemPrompt',
'publicShareChatModel',
'publicShareAssistantRoleId',
] as const) {
// Single source of truth for the writable provider keys (see ai.types).
for (const key of PROVIDER_SETTINGS_KEYS) {
if (nonSecret[key] !== undefined) {
(providerPatch as Record<string, unknown>)[key] = nonSecret[key];
}

View File

@@ -0,0 +1,58 @@
// `.provider` alone cannot prove the openai-compatible factory was called with
// `includeUsage: true` — a regression dropping it (which zeroes streamed token
// usage / reasoning-token metadata) would still pass. So mock the factory and
// assert the exact args. jest.mock is module-scoped, hence a dedicated file.
const mockCompatibleModel = { provider: 'openai-compatible.chat', modelId: 'm' };
// jest allows `mock`-prefixed vars inside a jest.mock factory.
const mockCreateOpenAICompatible = jest.fn(
(_settings: unknown) => () => mockCompatibleModel,
);
jest.mock('@ai-sdk/openai-compatible', () => ({
createOpenAICompatible: (settings: unknown) =>
mockCreateOpenAICompatible(settings),
}));
import { AiService } from './ai.service';
describe('AiService.getChatModel openai-compatible factory args', () => {
function serviceWith(chatApiStyle?: 'openai-compatible' | 'openai') {
const aiSettings = {
resolve: jest.fn().mockResolvedValue({
driver: 'openai',
chatModel: 'glm-5.2',
apiKey: 'the-key',
baseUrl: 'https://api.z.ai/v4',
chatApiStyle,
}),
};
return new AiService(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
aiSettings as any,
{ find: jest.fn() } as never,
{ decryptSecret: jest.fn() } as never,
);
}
beforeEach(() => mockCreateOpenAICompatible.mockClear());
it('passes includeUsage:true plus baseURL/apiKey/fetch (default style)', async () => {
await serviceWith().getChatModel('ws-1'); // unset -> openai-compatible
expect(mockCreateOpenAICompatible).toHaveBeenCalledTimes(1);
expect(mockCreateOpenAICompatible).toHaveBeenCalledWith(
expect.objectContaining({
name: 'openai-compatible',
baseURL: 'https://api.z.ai/v4',
apiKey: 'the-key',
includeUsage: true,
fetch: expect.any(Function),
}),
);
});
it("does NOT use the openai-compatible factory for chatApiStyle 'openai'", async () => {
await serviceWith('openai').getChatModel('ws-1');
expect(mockCreateOpenAICompatible).not.toHaveBeenCalled();
});
});

View File

@@ -285,3 +285,64 @@ describe('AiService.getChatModel role model override', () => {
);
});
});
/**
* Chat provider selection by the EXPLICIT `chatApiStyle` (NOT inferred from
* baseUrl): 'openai-compatible' (default) uses @ai-sdk/openai-compatible, which
* maps streamed reasoning_content to reasoning parts; 'openai' uses the official
* provider; and openai-compatible without a baseURL safely falls back to the
* official provider (it has no default endpoint). Asserted via `.provider`.
*/
describe('AiService.getChatModel chatApiStyle provider selection', () => {
function serviceWith(opts: {
baseUrl?: string;
chatApiStyle?: 'openai-compatible' | 'openai';
}) {
const aiSettings = {
resolve: jest.fn().mockResolvedValue({
driver: 'openai',
chatModel: 'glm-5.2',
apiKey: 'key',
baseUrl: opts.baseUrl,
chatApiStyle: opts.chatApiStyle,
}),
};
return new AiService(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
aiSettings as any,
{ find: jest.fn() } as never,
{ decryptSecret: jest.fn() } as never,
);
}
const providerOf = async (svc: AiService) =>
(
(await svc.getChatModel('ws-1')) as { provider: string }
).provider;
it("'openai-compatible' + baseURL -> openai-compatible provider", async () => {
expect(
await providerOf(
serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai-compatible' }),
),
).toContain('openai-compatible');
});
it("'openai' + baseURL -> official openai provider", async () => {
expect(
await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai' })),
).toBe('openai.chat');
});
it('unset + baseURL -> defaults to openai-compatible', async () => {
expect(
await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4' })),
).toContain('openai-compatible');
});
it("'openai-compatible' WITHOUT baseURL -> safe fallback to official openai", async () => {
expect(
await providerOf(serviceWith({ chatApiStyle: 'openai-compatible' })),
).toBe('openai.chat');
});
});

View File

@@ -7,6 +7,7 @@ import {
type LanguageModel,
} from 'ai';
import { createOpenAI } from '@ai-sdk/openai';
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { createOllama } from 'ai-sdk-ollama';
import { AiSettingsService } from './ai-settings.service';
@@ -95,6 +96,10 @@ export class AiService {
let apiKey = cfg.apiKey;
let baseUrl = cfg.baseUrl;
// Chat provider implementation, chosen EXPLICITLY by the admin (not inferred
// from baseUrl). Unset → 'openai-compatible' so reasoning is surfaced by
// default for this fork's openai+baseUrl setups.
const chatApiStyle = cfg.chatApiStyle ?? 'openai-compatible';
// A driver override that differs from the workspace driver needs that
// driver's own creds (the workspace driver's key would be wrong/absent).
@@ -145,19 +150,41 @@ export class AiService {
}
switch (driver) {
case 'openai':
// baseURL (when set) covers openai-compatible endpoints. Use Chat
// Completions (/chat/completions) — the portable OpenAI-compatible
// endpoint. The default callable createOpenAI(...)(model) targets the
// Responses API (/responses), which OpenAI-compatible gateways
// (OpenRouter, etc.) reject on multi-turn requests (history with
// assistant messages) → 400. The provider fetch is the instrumented
// streaming fetch (finite-but-generous stream timeouts, #175).
case 'openai': {
// The provider implementation is chosen by the admin's `chatApiStyle`
// (NOT inferred from baseUrl — a custom URL can front real OpenAI too).
// Both branches hit Chat Completions (/chat/completions); the provider
// fetch is the instrumented streaming fetch (finite-but-generous stream
// timeouts, #175).
//
// 'openai-compatible' (default) maps the third-party provider's streamed
// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, ...) — the
// point of #175. It has no default endpoint, so it requires a baseURL;
// when there is none (real OpenAI, or a role's cross-driver override that
// cleared baseUrl) we fall back to the official provider.
if (chatApiStyle === 'openai-compatible' && baseUrl) {
return createOpenAICompatible({
name: 'openai-compatible',
apiKey,
baseURL: baseUrl,
// Keep streamed token usage (stream_options.include_usage): without
// it @ai-sdk/openai-compatible omits usage, zeroing the live token
// counter and reasoning-token metadata. The official provider always
// sent it, so this preserves parity.
includeUsage: true,
fetch: this.aiProviderFetch,
})(chatModel);
}
// Official @ai-sdk/openai: real-OpenAI reasoning-model request shaping;
// `.chat()` targets Chat Completions (the default callable targets the
// Responses API, which openai-compatible gateways 400 on multi-turn
// history). In this fork baseUrl is normally set; undefined = real OpenAI.
return createOpenAI({
apiKey,
baseURL: baseUrl,
fetch: this.aiProviderFetch,
}).chat(chatModel);
}
case 'gemini':
return createGoogleGenerativeAI({ apiKey })(chatModel);
case 'ollama':

View File

@@ -16,6 +16,15 @@ export const AI_DRIVERS: AiDriver[] = ['openai', 'gemini', 'ollama'];
export type SttApiStyle = 'multipart' | 'json';
export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json'];
// Chat provider implementation for the `openai` driver. Chosen explicitly by the
// admin (NOT inferred from baseUrl — a custom URL can front real OpenAI too).
// 'openai-compatible' = @ai-sdk/openai-compatible: maps streamed
// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, OpenRouter, ...).
// 'openai' = official @ai-sdk/openai: real-OpenAI reasoning-model request shaping
// (max_completion_tokens, the 'developer' role), no third-party reasoning map.
export type ChatApiStyle = 'openai-compatible' | 'openai';
export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
/**
* Non-secret provider settings persisted under `settings.ai.provider`.
* The API key is intentionally absent here.
@@ -23,6 +32,9 @@ export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json'];
export interface AiProviderSettings {
driver: AiDriver;
chatModel: string;
// Chat provider implementation for the `openai` driver. Unset → defaults to
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
chatApiStyle?: ChatApiStyle;
embeddingModel?: string;
baseUrl?: string;
// Embedding-specific base URL. Falls back to `baseUrl` when empty/unset.
@@ -45,6 +57,34 @@ export interface AiProviderSettings {
publicShareAssistantRoleId?: string;
}
/**
* The persisted, non-secret provider setting keys — the SINGLE source of truth
* for which fields a settings update may write through to `settings.ai.provider`.
* `satisfies readonly (keyof AiProviderSettings)[]` makes the compiler reject a
* typo or a key that is not a real provider setting.
*
* The settings service consumes this directly. The generic workspace repo cannot
* import AI types, so it keeps its own copy of the same keys, guarded by a parity
* test against this constant (so any future drift fails in CI, not silently in
* prod — a missing key there validates fine, passes the service, and is then
* dropped at the SQL boundary with no error).
*/
export const PROVIDER_SETTINGS_KEYS = [
'driver',
'chatModel',
'chatApiStyle',
'embeddingModel',
'baseUrl',
'embeddingBaseUrl',
'sttModel',
'sttBaseUrl',
'sttApiStyle',
'sttLanguage',
'systemPrompt',
'publicShareChatModel',
'publicShareAssistantRoleId',
] as const satisfies readonly (keyof AiProviderSettings)[];
/**
* Fully resolved provider config, including the decrypted API key for the
* stored driver. Returned by `AiSettingsService.resolve`. The keys are held in
@@ -76,6 +116,7 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
export interface MaskedAiSettings {
driver?: AiDriver;
chatModel?: string;
chatApiStyle?: ChatApiStyle;
embeddingModel?: string;
baseUrl?: string;
embeddingBaseUrl?: string;

View File

@@ -1,5 +1,12 @@
import { IsIn, IsOptional, IsString } from 'class-validator';
import { AI_DRIVERS, AiDriver, STT_API_STYLES, SttApiStyle } from '../ai.types';
import {
AI_DRIVERS,
AiDriver,
CHAT_API_STYLES,
ChatApiStyle,
STT_API_STYLES,
SttApiStyle,
} from '../ai.types';
/**
* Admin update payload for the workspace AI provider settings.
@@ -18,6 +25,10 @@ export class UpdateAiSettingsDto {
@IsString()
chatModel?: string;
@IsOptional()
@IsIn(CHAT_API_STYLES)
chatApiStyle?: ChatApiStyle;
@IsOptional()
@IsString()
embeddingModel?: string;