Compare commits
12 Commits
fix/footno
...
fix/ai-str
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c065e26d14 | ||
|
|
b0faa2fe32 | ||
|
|
d1fbcc1bfa | ||
|
|
6edbbab43b | ||
|
|
59190148db | ||
| 80a4b5a1b0 | |||
|
|
da15b55786 | ||
|
|
a14560c7c9 | ||
|
|
4cc8df836f | ||
|
|
04a418e1a6 | ||
|
|
255bc06883 | ||
| 8c06553b49 |
13
.env.example
13
.env.example
@@ -136,6 +136,19 @@ MCP_DOCMOST_PASSWORD=
|
||||
# A slow/hung embeddings endpoint fails after this and the batch continues.
|
||||
# AI_EMBEDDING_TIMEOUT_MS=120000
|
||||
|
||||
# Silence timeout (ms) for streaming chat/agent AI calls AND external-MCP traffic.
|
||||
# Bounds time-to-first-byte and the gap BETWEEN chunks (NOT the total turn length),
|
||||
# so an arbitrarily long turn that keeps streaming is never cut. Finite so a hung
|
||||
# provider is eventually broken instead of leaking forever. Default 900000 (15 min).
|
||||
# AI_STREAM_TIMEOUT_MS=900000
|
||||
|
||||
# Keep-alive recycle window (ms) for streaming chat/agent AI + external-MCP calls.
|
||||
# A pooled connection idle longer than this is closed instead of reused, so a
|
||||
# NAT / egress firewall / reverse proxy that silently drops idle connections
|
||||
# cannot poison a reused socket into a PRE-RESPONSE `read ECONNRESET`. Lower it if
|
||||
# your egress drops idle connections faster than ~10s. Default 10000 (10 s).
|
||||
# AI_STREAM_KEEPALIVE_MS=10000
|
||||
|
||||
# --- Anonymous public-share AI assistant ---
|
||||
# Opt-in per workspace (AI settings -> "public share assistant"; off by default).
|
||||
# When enabled, anonymous visitors of a published share can ask an AI about that
|
||||
|
||||
13
CHANGELOG.md
13
CHANGELOG.md
@@ -25,9 +25,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
flagging dangling references, empty or duplicate definitions, and `[^id]`
|
||||
markers inside table rows, so an agent can fix its own markup. The page is
|
||||
still created; the field is omitted when there are no problems. (#166)
|
||||
- **AI chat "Protocol" setting (`chatApiStyle`).** A new admin choice in AI
|
||||
settings for the `openai` driver: `openai-compatible` (default) routes chat
|
||||
through `@ai-sdk/openai-compatible`, which surfaces a provider's streamed
|
||||
reasoning (`reasoning_content` → reasoning parts) for z.ai/GLM, DeepSeek,
|
||||
OpenRouter, etc.; `openai` uses the official provider (real-OpenAI
|
||||
reasoning-model request shaping). Chosen explicitly rather than inferred from
|
||||
the base URL, since a custom URL can front real OpenAI too. (#175, #177)
|
||||
|
||||
### Changed
|
||||
|
||||
- **AI chat default provider is now `openai-compatible` (reasoning surfaced).**
|
||||
For the `openai` driver the chat provider defaults to the openai-compatible
|
||||
implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the
|
||||
model's reasoning out of the box. An endpoint that is real OpenAI behind a
|
||||
custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177)
|
||||
|
||||
- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the
|
||||
same id are ONE footnote — one number, one definition, several back-references
|
||||
— instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are
|
||||
|
||||
@@ -1307,5 +1307,9 @@
|
||||
"Page tree (child pages, recursive)": "Page tree (child pages, recursive)",
|
||||
"Render the full nested tree of all descendant pages": "Render the full nested tree of all descendant pages",
|
||||
"Showing {{count}} subpages_one": "Showing {{count}} subpage",
|
||||
"Showing {{count}} subpages_other": "Showing {{count}} subpages"
|
||||
"Showing {{count}} subpages_other": "Showing {{count}} subpages",
|
||||
"Protocol": "Protocol",
|
||||
"How chat requests are sent and how reasoning is surfaced": "How chat requests are sent and how reasoning is surfaced",
|
||||
"OpenAI-compatible (surfaces reasoning)": "OpenAI-compatible (surfaces reasoning)",
|
||||
"OpenAI (official)": "OpenAI (official)"
|
||||
}
|
||||
|
||||
@@ -1160,5 +1160,9 @@
|
||||
"Render the full nested tree of all descendant pages": "Показать полное вложенное дерево всех дочерних страниц",
|
||||
"Showing {{count}} subpages_one": "Показано {{count}} подстраница",
|
||||
"Showing {{count}} subpages_few": "Показано {{count}} подстраницы",
|
||||
"Showing {{count}} subpages_many": "Показано {{count}} подстраниц"
|
||||
"Showing {{count}} subpages_many": "Показано {{count}} подстраниц",
|
||||
"Protocol": "Протокол",
|
||||
"How chat requests are sent and how reasoning is surfaced": "Как отправляются запросы чата и как показывается reasoning",
|
||||
"OpenAI-compatible (surfaces reasoning)": "OpenAI-совместимый (показывает reasoning)",
|
||||
"OpenAI (official)": "OpenAI (официальный)"
|
||||
}
|
||||
|
||||
@@ -56,7 +56,13 @@ function buildInitialValues(server?: IAiMcpServer): FormValues {
|
||||
transport: server?.transport ?? "http",
|
||||
url: server?.url ?? "",
|
||||
authHeader: "",
|
||||
toolAllowlist: server?.toolAllowlist ?? [],
|
||||
// Defensive: TagsInput calls `.map`, so a non-array here (e.g. an API that
|
||||
// returns the jsonb column as a JSON string) would crash the whole page. The
|
||||
// server normalizes this now, but guard anyway so a bad shape can never take
|
||||
// the settings UI down.
|
||||
toolAllowlist: Array.isArray(server?.toolAllowlist)
|
||||
? server.toolAllowlist
|
||||
: [],
|
||||
enabled: server?.enabled ?? true,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ import {
|
||||
AiTestCapability,
|
||||
IAiSettingsUpdate,
|
||||
SttApiStyle,
|
||||
ChatApiStyle,
|
||||
} from "@/features/workspace/services/ai-settings-service.ts";
|
||||
import { useAiRolesQuery } from "@/features/ai-chat/queries/ai-chat-query.ts";
|
||||
import { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
|
||||
@@ -82,6 +83,8 @@ const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
|
||||
// (empty means "leave unchanged" unless explicitly cleared).
|
||||
const formSchema = z.object({
|
||||
chatModel: z.string(),
|
||||
// Chat provider implementation (reasoning surfacing). Default openai-compatible.
|
||||
chatApiStyle: z.enum(["openai-compatible", "openai"]),
|
||||
// Cheap model id for the anonymous public-share assistant; empty = use chatModel.
|
||||
publicShareChatModel: z.string(),
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
@@ -308,6 +311,7 @@ export default function AiProviderSettings() {
|
||||
validate: zod4Resolver(formSchema),
|
||||
initialValues: {
|
||||
chatModel: "",
|
||||
chatApiStyle: "openai-compatible" as ChatApiStyle,
|
||||
publicShareChatModel: "",
|
||||
publicShareAssistantRoleId: "",
|
||||
embeddingModel: "",
|
||||
@@ -330,6 +334,7 @@ export default function AiProviderSettings() {
|
||||
if (!settings) return;
|
||||
form.setValues({
|
||||
chatModel: settings.chatModel ?? "",
|
||||
chatApiStyle: settings.chatApiStyle ?? "openai-compatible",
|
||||
publicShareChatModel: settings.publicShareChatModel ?? "",
|
||||
publicShareAssistantRoleId: settings.publicShareAssistantRoleId ?? "",
|
||||
embeddingModel: settings.embeddingModel ?? "",
|
||||
@@ -359,6 +364,7 @@ export default function AiProviderSettings() {
|
||||
// Everything is OpenAI-compatible.
|
||||
driver: "openai",
|
||||
chatModel: values.chatModel,
|
||||
chatApiStyle: values.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; empty falls
|
||||
// back to chatModel server-side.
|
||||
publicShareChatModel: values.publicShareChatModel,
|
||||
@@ -761,6 +767,24 @@ export default function AiProviderSettings() {
|
||||
{t("Resolves to {{url}}", { url: chatResolved })}
|
||||
</Text>
|
||||
|
||||
<Select
|
||||
mt="sm"
|
||||
label={t("Protocol")}
|
||||
description={t(
|
||||
"How chat requests are sent and how reasoning is surfaced",
|
||||
)}
|
||||
data={[
|
||||
{
|
||||
value: "openai-compatible",
|
||||
label: t("OpenAI-compatible (surfaces reasoning)"),
|
||||
},
|
||||
{ value: "openai", label: t("OpenAI (official)") },
|
||||
]}
|
||||
allowDeselect={false}
|
||||
disabled={isLoading}
|
||||
{...form.getInputProps("chatApiStyle")}
|
||||
/>
|
||||
|
||||
{/* Anonymous public-share assistant: a single master toggle + an
|
||||
optional cheaper model id. Reuses this card's driver/URL/key. */}
|
||||
<Group justify="space-between" align="center" wrap="nowrap" mt="md">
|
||||
|
||||
@@ -9,6 +9,12 @@ export type AiDriver = "openai" | "gemini" | "ollama";
|
||||
// - 'json' -> JSON body with base64-encoded audio (OpenRouter)
|
||||
export type SttApiStyle = "multipart" | "json";
|
||||
|
||||
// Chat provider implementation for the `openai` driver (chosen explicitly):
|
||||
// - 'openai-compatible' -> maps streamed reasoning_content to reasoning parts
|
||||
// (z.ai/GLM, DeepSeek, OpenRouter, ...). Default.
|
||||
// - 'openai' -> official provider; real-OpenAI reasoning-model shaping.
|
||||
export type ChatApiStyle = "openai-compatible" | "openai";
|
||||
|
||||
// Masked AI provider settings returned by the server.
|
||||
// No API key is ever returned; only `hasApiKey` / `hasEmbeddingApiKey` indicate
|
||||
// whether one is stored. `embeddingBaseUrl` is the RAW stored value (empty means
|
||||
@@ -16,6 +22,7 @@ export type SttApiStyle = "multipart" | "json";
|
||||
export interface IAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
// Cheap model id for the anonymous public-share assistant; empty = chatModel.
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
@@ -49,6 +56,7 @@ export interface IAiSettings {
|
||||
export interface IAiSettingsUpdate {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty =
|
||||
// built-in locked persona.
|
||||
|
||||
@@ -6,6 +6,7 @@ import { createMCPClient } from '@ai-sdk/mcp';
|
||||
import { Agent, type Dispatcher } from 'undici';
|
||||
import { AiMcpServerRepo } from '@docmost/db/repos/ai-chat/ai-mcp-server.repo';
|
||||
import { AiMcpServer } from '@docmost/db/types/entity.types';
|
||||
import { streamingDispatcherOptions } from '../../../integrations/ai/ai-streaming-fetch';
|
||||
import { SecretBoxService } from '../../../integrations/crypto/secret-box';
|
||||
import { isUrlAllowed, isIpAllowed } from './ssrf-guard';
|
||||
|
||||
@@ -400,6 +401,16 @@ export function validateResolvedAddresses(
|
||||
*/
|
||||
function buildPinnedDispatcher(): Agent {
|
||||
return new Agent({
|
||||
// Raise undici's default 300s headers/body timeouts on external MCP traffic
|
||||
// to the same generous-but-finite silence timeout the chat fetch uses (#175).
|
||||
// A long agent turn keeps an SSE transport (e.g. crawl4ai's /mcp/sse) open
|
||||
// across the whole turn; that connection can idle BETWEEN tool calls longer
|
||||
// than 5 min, and undici's bodyTimeout would otherwise sever it mid-task — a
|
||||
// tool-call failure that aborts the streamed turn and shows the user "Lost
|
||||
// connection to the AI provider". A slow single tool call (a crawl) can
|
||||
// likewise exceed headersTimeout. The timeout stays FINITE so a genuinely
|
||||
// hung server is still broken eventually.
|
||||
...streamingDispatcherOptions(),
|
||||
connect: {
|
||||
lookup: (hostname, _options, callback) => {
|
||||
// Always resolve ALL addresses ourselves; do not trust the caller's
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
import { parseToolAllowlist } from './ai-mcp-server.repo';
|
||||
|
||||
/**
|
||||
* The `tool_allowlist` jsonb column historically round-trips as a JSON STRING
|
||||
* (rows written by the old double-encoding `jsonbArray`), so the driver hands
|
||||
* back `'["a","b"]'` instead of an array. `parseToolAllowlist` normalizes both
|
||||
* shapes to the `string[] | null` the entity type promises — fixing the settings
|
||||
* UI crash (TagsInput `.map` on a string) and the tool-allowlist enforcement
|
||||
* (which did `Array.isArray(allow)` and silently allowed ALL tools for a string).
|
||||
*/
|
||||
describe('parseToolAllowlist', () => {
|
||||
it('passes a real string array through unchanged', () => {
|
||||
expect(parseToolAllowlist(['search', 'crawl'])).toEqual(['search', 'crawl']);
|
||||
});
|
||||
|
||||
it('parses a JSON-string array (the double-encoded read) into an array', () => {
|
||||
// This is exactly what the DB returns for an old row: a jsonb string scalar.
|
||||
expect(parseToolAllowlist('["alpha","beta"]')).toEqual(['alpha', 'beta']);
|
||||
});
|
||||
|
||||
it('returns null for null / undefined (unrestricted)', () => {
|
||||
expect(parseToolAllowlist(null)).toBeNull();
|
||||
expect(parseToolAllowlist(undefined)).toBeNull();
|
||||
});
|
||||
|
||||
it('returns [] for an empty array (no items, but a present allowlist)', () => {
|
||||
expect(parseToolAllowlist([])).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns null for a JSON string that is not an array', () => {
|
||||
expect(parseToolAllowlist('"justastring"')).toBeNull();
|
||||
expect(parseToolAllowlist('{"a":1}')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for an unparseable string', () => {
|
||||
expect(parseToolAllowlist('not json at all')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null when elements are not all strings (defensive)', () => {
|
||||
expect(parseToolAllowlist([1, 2, 3] as unknown)).toBeNull();
|
||||
expect(parseToolAllowlist('[1,2,3]')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for a non-string, non-array primitive', () => {
|
||||
expect(parseToolAllowlist(42 as unknown)).toBeNull();
|
||||
expect(parseToolAllowlist(true as unknown)).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -21,32 +21,35 @@ export class AiMcpServerRepo {
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
): Promise<AiMcpServer | undefined> {
|
||||
return this.db
|
||||
const row = await this.db
|
||||
.selectFrom('aiMcpServers')
|
||||
.selectAll('aiMcpServers')
|
||||
.where('id', '=', id)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.executeTakeFirst();
|
||||
return row ? normalizeRow(row) : row;
|
||||
}
|
||||
|
||||
async listByWorkspace(workspaceId: string): Promise<AiMcpServer[]> {
|
||||
return this.db
|
||||
const rows = await this.db
|
||||
.selectFrom('aiMcpServers')
|
||||
.selectAll('aiMcpServers')
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.orderBy('createdAt', 'asc')
|
||||
.execute();
|
||||
return rows.map(normalizeRow);
|
||||
}
|
||||
|
||||
/** Enabled servers only — used by the agent loop to build the toolset. */
|
||||
async listEnabled(workspaceId: string): Promise<AiMcpServer[]> {
|
||||
return this.db
|
||||
const rows = await this.db
|
||||
.selectFrom('aiMcpServers')
|
||||
.selectAll('aiMcpServers')
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.where('enabled', '=', true)
|
||||
.orderBy('createdAt', 'asc')
|
||||
.execute();
|
||||
return rows.map(normalizeRow);
|
||||
}
|
||||
|
||||
async insert(
|
||||
@@ -130,6 +133,14 @@ export class AiMcpServerRepo {
|
||||
* Encode a string[] as a jsonb bind for the `tool_allowlist` column. Passing a
|
||||
* plain JS array to the postgres driver would serialize it as a Postgres array
|
||||
* literal (incompatible with jsonb), so we bind the JSON text and cast it.
|
||||
*
|
||||
* The cast is `::text::jsonb`, NOT `::jsonb`: if the parameter is bound straight
|
||||
* to a jsonb cast, node-postgres infers its type as jsonb and JSON-stringifies
|
||||
* the (already-JSON) string a SECOND time, so the column ends up holding a jsonb
|
||||
* STRING SCALAR (`"[\"a\"]"`) instead of a jsonb ARRAY. Forcing the param through
|
||||
* `::text` first binds it as text (sent verbatim), and `::jsonb` then parses it
|
||||
* into a real array. (`normalizeRow` below repairs rows written the old way.)
|
||||
*
|
||||
* Returns null for null/empty arrays (an empty allowlist means "no restriction"
|
||||
* is not intended — callers pass null to clear; an empty array is normalized to
|
||||
* null here so it never round-trips as `[]`).
|
||||
@@ -139,5 +150,37 @@ function jsonbArray(value: string[] | null | undefined) {
|
||||
return null;
|
||||
}
|
||||
// Typed as string[] so it is assignable to the toolAllowlist column.
|
||||
return sql<string[]>`${JSON.stringify(value)}::jsonb`;
|
||||
return sql<string[]>`${JSON.stringify(value)}::text::jsonb`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the `toolAllowlist` value read from the DB into the `string[] | null`
|
||||
* the entity type promises. The jsonb column historically round-trips as a JSON
|
||||
* STRING (rows written by the old double-encoding `jsonbArray`, see above), so
|
||||
* the driver hands back a string like `'["a","b"]'` rather than an array. Be
|
||||
* tolerant: an already-parsed array passes through; a JSON string is parsed; null
|
||||
* / a non-array / unparseable value becomes null (unrestricted).
|
||||
*/
|
||||
export function parseToolAllowlist(value: unknown): string[] | null {
|
||||
if (value == null) return null;
|
||||
if (Array.isArray(value)) {
|
||||
return value.every((v) => typeof v === 'string') ? (value as string[]) : null;
|
||||
}
|
||||
if (typeof value === 'string') {
|
||||
try {
|
||||
const parsed = JSON.parse(value);
|
||||
return Array.isArray(parsed) &&
|
||||
parsed.every((v) => typeof v === 'string')
|
||||
? (parsed as string[])
|
||||
: null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Normalize a DB row so `toolAllowlist` is always `string[] | null`. */
|
||||
function normalizeRow(row: AiMcpServer): AiMcpServer {
|
||||
return { ...row, toolAllowlist: parseToolAllowlist(row.toolAllowlist) };
|
||||
}
|
||||
|
||||
@@ -10,6 +10,29 @@ import {
|
||||
import { ExpressionBuilder, sql } from 'kysely';
|
||||
import { DB, Workspaces } from '@docmost/db/types/db';
|
||||
|
||||
/**
|
||||
* Writable `settings.ai.provider` keys, enforced at this generic SQL layer. This
|
||||
* repo cannot import AI-feature types, so this list is its own copy; a parity
|
||||
* test (ai-provider-settings-keys.spec.ts) asserts it equals
|
||||
* PROVIDER_SETTINGS_KEYS in ai.types so a future drift fails in CI rather than
|
||||
* silently dropping a field at this boundary.
|
||||
*/
|
||||
export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
];
|
||||
|
||||
@Injectable()
|
||||
export class WorkspaceRepo {
|
||||
public baseFields: Array<keyof Workspaces> = [
|
||||
@@ -239,9 +262,8 @@ export class WorkspaceRepo {
|
||||
// is a real jsonb object, never a double-encoded string. The CASE self-heals
|
||||
// workspaces whose settings.ai.provider was previously corrupted into an
|
||||
// array/string.
|
||||
const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId'];
|
||||
const entries = Object.entries(provider).filter(
|
||||
([k, v]) => v !== undefined && ALLOWED.includes(k),
|
||||
([k, v]) => v !== undefined && AI_PROVIDER_SETTINGS_ALLOWED.includes(k),
|
||||
);
|
||||
const patch = entries.length
|
||||
? sql`jsonb_build_object(${sql.join(
|
||||
|
||||
40
apps/server/src/integrations/ai/ai-provider-http.spec.ts
Normal file
40
apps/server/src/integrations/ai/ai-provider-http.spec.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { createInstrumentedFetch } from './ai-provider-http';
|
||||
|
||||
/**
|
||||
* createInstrumentedFetch must be behavior-neutral: it delegates to the supplied
|
||||
* baseFetch with the SAME input/init, returns the Response object untouched (so
|
||||
* the streamed SSE body is never read/cloned), and rethrows the same error. The
|
||||
* baseFetch injection is the seam that carries the streaming fetch (#175) onto
|
||||
* the chat provider, so it is tested directly.
|
||||
*/
|
||||
describe('createInstrumentedFetch', () => {
|
||||
it('delegates to the injected baseFetch with the same input/init', async () => {
|
||||
const fakeResponse = new Response('ok', { status: 200 });
|
||||
const baseFetch = jest.fn().mockResolvedValue(fakeResponse);
|
||||
const instrumented = createInstrumentedFetch('test', baseFetch as never);
|
||||
|
||||
const init = { method: 'POST', body: '{"q":1}' };
|
||||
const res = await instrumented('https://example.com/v1/chat', init);
|
||||
|
||||
expect(baseFetch).toHaveBeenCalledTimes(1);
|
||||
expect(baseFetch).toHaveBeenCalledWith('https://example.com/v1/chat', init);
|
||||
// The Response is returned UNTOUCHED (same reference — never read/cloned).
|
||||
expect(res).toBe(fakeResponse);
|
||||
});
|
||||
|
||||
it('rethrows the base fetch error unchanged (pre-response failure)', async () => {
|
||||
const err = Object.assign(new TypeError('fetch failed'), {
|
||||
cause: { code: 'ECONNRESET' },
|
||||
});
|
||||
const baseFetch = jest.fn().mockRejectedValue(err);
|
||||
const instrumented = createInstrumentedFetch('test', baseFetch as never);
|
||||
|
||||
await expect(instrumented('https://example.com/')).rejects.toBe(err);
|
||||
});
|
||||
|
||||
it('defaults to the global fetch when no baseFetch is given', () => {
|
||||
// Constructing without a baseFetch must not throw — it simply wraps global
|
||||
// fetch (the non-chat default).
|
||||
expect(() => createInstrumentedFetch('test')).not.toThrow();
|
||||
});
|
||||
});
|
||||
87
apps/server/src/integrations/ai/ai-provider-http.ts
Normal file
87
apps/server/src/integrations/ai/ai-provider-http.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
import { Logger } from '@nestjs/common';
|
||||
|
||||
/**
|
||||
* The provider HTTP fetch used by the chat path: a thin, behavior-neutral
|
||||
* instrumentation wrapper around a supplied `fetch`.
|
||||
*
|
||||
* It defaults to the global `fetch`, but the chat provider passes the streaming
|
||||
* fetch (which RAISES undici's 300s stream timeouts to a generous-but-finite
|
||||
* silence timeout so a long agent turn is not severed mid-stream — #175). So this
|
||||
* wrapper observes the EXACT transport a turn uses. It NEVER retries, times out,
|
||||
* swaps the dispatcher, or reads/clones the response body — the Response is
|
||||
* returned untouched (streaming unaffected) and any error is rethrown unchanged.
|
||||
*
|
||||
* Per provider HTTP call it logs: time-to-response-headers + status + request
|
||||
* body size on success; and on a pre-response rejection the failure latency +
|
||||
* error code/cause + request body size + the idle gap since the previous call.
|
||||
* This telemetry is intentional and kept (it diagnoses provider connection
|
||||
* resets / mid-stream cuts), and it is load-bearing: the streaming fetch reaches
|
||||
* the chat provider THROUGH this wrapper, so the two are one construct.
|
||||
*
|
||||
* How to read the result (a long agentic turn makes one provider call per step):
|
||||
* - a failed turn whose last provider line is "PRE-RESPONSE FAILED ... ECONNRESET"
|
||||
* => the reset is in the CONNECTION phase of a step's request (the provider
|
||||
* never replied) — usually a poisoned keep-alive socket or the provider/middle
|
||||
* box resetting that request (large body / idle gap are the suspects, hence
|
||||
* reqBytes + idleSincePrevCall below).
|
||||
* - the last line is "OK status=200" and the turn still errors with NO
|
||||
* "PRE-RESPONSE FAILED" => the cut happened MID-STREAM (after headers), a
|
||||
* different failure mode.
|
||||
*
|
||||
* The seq/last-call timestamps are module-level, so under concurrent turns the
|
||||
* idle-gap figure is approximate (fine for single-user diagnosis).
|
||||
*/
|
||||
export function createInstrumentedFetch(
|
||||
context: string,
|
||||
// The underlying fetch to instrument. Defaults to the global fetch; the chat
|
||||
// provider passes the streaming fetch (raised, finite undici stream timeouts,
|
||||
// #175) so the telemetry observes the SAME transport the long agent turn uses.
|
||||
baseFetch: typeof fetch = fetch,
|
||||
): typeof fetch {
|
||||
const logger = new Logger(context);
|
||||
let callSeq = 0;
|
||||
let lastCallStartedAt: number | undefined;
|
||||
|
||||
return async (input: Parameters<typeof fetch>[0], init?: Parameters<typeof fetch>[1]): Promise<Response> => {
|
||||
const callId = ++callSeq;
|
||||
const startedAt = Date.now();
|
||||
const idleSincePrev =
|
||||
lastCallStartedAt === undefined ? undefined : startedAt - lastCallStartedAt;
|
||||
lastCallStartedAt = startedAt;
|
||||
// Request body size: the chat payload is a JSON string. Used to test whether
|
||||
// failures correlate with the large accumulated context on later agent steps.
|
||||
const body = init?.body as unknown;
|
||||
const bodyBytes =
|
||||
typeof body === 'string'
|
||||
? body.length
|
||||
: body instanceof Uint8Array
|
||||
? body.byteLength
|
||||
: undefined;
|
||||
try {
|
||||
// Delegate to the base fetch; return the Response UNTOUCHED (never read/
|
||||
// clone the body) so the streamed SSE response is unaffected.
|
||||
const res = await baseFetch(input, init);
|
||||
logger.log(
|
||||
`provider HTTP: call#${callId} OK ` +
|
||||
`headersAfter=${Date.now() - startedAt}ms status=${res.status} ` +
|
||||
`reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`,
|
||||
);
|
||||
return res;
|
||||
} catch (err) {
|
||||
// fetch() rejected => PRE-RESPONSE failure (no headers/body received yet):
|
||||
// the connection/request phase. Log it and rethrow the SAME error.
|
||||
const e = err as {
|
||||
name?: string;
|
||||
message?: string;
|
||||
cause?: { code?: string; message?: string };
|
||||
};
|
||||
logger.warn(
|
||||
`provider HTTP: call#${callId} PRE-RESPONSE FAILED ` +
|
||||
`after=${Date.now() - startedAt}ms code=${e?.cause?.code ?? 'none'} ` +
|
||||
`name=${e?.name ?? 'Error'} cause=${e?.cause?.message ?? e?.message ?? 'unknown'} ` +
|
||||
`reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`,
|
||||
);
|
||||
throw err;
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
import { validate } from 'class-validator';
|
||||
import { plainToInstance } from 'class-transformer';
|
||||
import { PROVIDER_SETTINGS_KEYS } from './ai.types';
|
||||
import { AI_PROVIDER_SETTINGS_ALLOWED } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { UpdateAiSettingsDto } from './dto/update-ai-settings.dto';
|
||||
|
||||
/**
|
||||
* Drift guard: the writable provider-settings keys are maintained in two layers
|
||||
* that TypeScript cannot cross-check — PROVIDER_SETTINGS_KEYS (ai.types, used by
|
||||
* the settings service) and AI_PROVIDER_SETTINGS_ALLOWED (the generic workspace
|
||||
* repo's SQL boundary). A key missing from the repo copy silently drops the field
|
||||
* on persist (exactly what happened to chatApiStyle), so this asserts they match.
|
||||
*/
|
||||
describe('provider-settings key allowlist parity', () => {
|
||||
it('the repo SQL allowlist equals PROVIDER_SETTINGS_KEYS', () => {
|
||||
expect([...AI_PROVIDER_SETTINGS_ALLOWED].sort()).toEqual(
|
||||
[...PROVIDER_SETTINGS_KEYS].sort(),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
/** DTO validation for the new chatApiStyle field (@IsIn(CHAT_API_STYLES)). */
|
||||
describe('UpdateAiSettingsDto.chatApiStyle', () => {
|
||||
const errorsFor = async (chatApiStyle: unknown) =>
|
||||
validate(plainToInstance(UpdateAiSettingsDto, { chatApiStyle }));
|
||||
|
||||
it('accepts both valid values', async () => {
|
||||
for (const v of ['openai-compatible', 'openai']) {
|
||||
const errs = await errorsFor(v);
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects an unknown value', async () => {
|
||||
const errs = await errorsFor('definitely-not-a-style');
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeDefined();
|
||||
});
|
||||
|
||||
it('accepts the field being omitted (optional)', async () => {
|
||||
const errs = await validate(plainToInstance(UpdateAiSettingsDto, {}));
|
||||
expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -14,6 +14,8 @@ import {
|
||||
MaskedAiSettings,
|
||||
ResolvedAiConfig,
|
||||
SttApiStyle,
|
||||
ChatApiStyle,
|
||||
PROVIDER_SETTINGS_KEYS,
|
||||
} from './ai.types';
|
||||
|
||||
/**
|
||||
@@ -24,6 +26,7 @@ import {
|
||||
export interface UpdateAiSettingsInput {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
embeddingBaseUrl?: string;
|
||||
@@ -157,6 +160,8 @@ export class AiSettingsService {
|
||||
const config: ResolvedAiConfig = {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
// Cheap model id for the anonymous public-share assistant; reuses the chat
|
||||
// driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel.
|
||||
publicShareChatModel: provider.publicShareChatModel,
|
||||
@@ -238,6 +243,7 @@ export class AiSettingsService {
|
||||
return {
|
||||
driver: provider.driver,
|
||||
chatModel: provider.chatModel,
|
||||
chatApiStyle: provider.chatApiStyle,
|
||||
embeddingModel: provider.embeddingModel,
|
||||
baseUrl: provider.baseUrl,
|
||||
embeddingBaseUrl: provider.embeddingBaseUrl,
|
||||
@@ -275,20 +281,8 @@ export class AiSettingsService {
|
||||
|
||||
// Persist non-secret provider fields (only those present in the partial).
|
||||
const providerPatch: Partial<AiProviderSettings> = {};
|
||||
for (const key of [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
] as const) {
|
||||
// Single source of truth for the writable provider keys (see ai.types).
|
||||
for (const key of PROVIDER_SETTINGS_KEYS) {
|
||||
if (nonSecret[key] !== undefined) {
|
||||
(providerPatch as Record<string, unknown>)[key] = nonSecret[key];
|
||||
}
|
||||
|
||||
235
apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts
Normal file
235
apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
import * as http from 'node:http';
|
||||
import {
|
||||
createStreamingFetch,
|
||||
withPreResponseRetry,
|
||||
streamTimeoutMs,
|
||||
streamKeepAliveMs,
|
||||
streamingDispatcherOptions,
|
||||
isRetryableConnectError,
|
||||
} from './ai-streaming-fetch';
|
||||
|
||||
/**
|
||||
* #175: undici's default 300s headers/body timeouts severed long agent turns.
|
||||
* The streaming fetch raises them to a generous-but-FINITE silence timeout (not
|
||||
* 0 — a true hang must still break). We pin: the configured value + env override,
|
||||
* that both dispatcher timeouts use it, and that a delayed response streams.
|
||||
*/
|
||||
describe('streamTimeoutMs', () => {
|
||||
const ORIG = process.env.AI_STREAM_TIMEOUT_MS;
|
||||
afterEach(() => {
|
||||
if (ORIG === undefined) delete process.env.AI_STREAM_TIMEOUT_MS;
|
||||
else process.env.AI_STREAM_TIMEOUT_MS = ORIG;
|
||||
});
|
||||
|
||||
it('defaults to a generous-but-finite 15 minutes', () => {
|
||||
delete process.env.AI_STREAM_TIMEOUT_MS;
|
||||
expect(streamTimeoutMs()).toBe(900_000);
|
||||
// Finite — NOT disabled (0 would let a hung provider leak forever).
|
||||
expect(streamTimeoutMs()).toBeGreaterThan(0);
|
||||
expect(Number.isFinite(streamTimeoutMs())).toBe(true);
|
||||
});
|
||||
|
||||
it('honours a positive AI_STREAM_TIMEOUT_MS override', () => {
|
||||
process.env.AI_STREAM_TIMEOUT_MS = '120000';
|
||||
expect(streamTimeoutMs()).toBe(120000);
|
||||
});
|
||||
|
||||
it('ignores an invalid / non-positive override (falls back to default)', () => {
|
||||
for (const bad of ['0', '-5', 'abc', '']) {
|
||||
process.env.AI_STREAM_TIMEOUT_MS = bad;
|
||||
expect(streamTimeoutMs()).toBe(900_000);
|
||||
}
|
||||
});
|
||||
|
||||
it('applies the silence timeout + keep-alive recycle window to the dispatcher', () => {
|
||||
delete process.env.AI_STREAM_TIMEOUT_MS;
|
||||
delete process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
expect(streamingDispatcherOptions()).toEqual({
|
||||
headersTimeout: 900_000,
|
||||
bodyTimeout: 900_000,
|
||||
keepAliveTimeout: 10_000,
|
||||
keepAliveMaxTimeout: 10_000,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('streamKeepAliveMs', () => {
|
||||
const ORIG = process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
afterEach(() => {
|
||||
if (ORIG === undefined) delete process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
else process.env.AI_STREAM_KEEPALIVE_MS = ORIG;
|
||||
});
|
||||
|
||||
it('defaults to 10s (recycle idle sockets so a NAT/proxy drop cannot poison reuse)', () => {
|
||||
delete process.env.AI_STREAM_KEEPALIVE_MS;
|
||||
expect(streamKeepAliveMs()).toBe(10_000);
|
||||
});
|
||||
|
||||
it('honours a positive override and ignores invalid/non-positive', () => {
|
||||
process.env.AI_STREAM_KEEPALIVE_MS = '4000';
|
||||
expect(streamKeepAliveMs()).toBe(4000);
|
||||
for (const bad of ['0', '-1', 'x', '']) {
|
||||
process.env.AI_STREAM_KEEPALIVE_MS = bad;
|
||||
expect(streamKeepAliveMs()).toBe(10_000);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('isRetryableConnectError', () => {
|
||||
it('matches connection-level codes on the error or its cause', () => {
|
||||
expect(isRetryableConnectError({ cause: { code: 'ECONNRESET' } })).toBe(true);
|
||||
expect(isRetryableConnectError({ cause: { code: 'UND_ERR_SOCKET' } })).toBe(true);
|
||||
expect(isRetryableConnectError({ code: 'ECONNREFUSED' })).toBe(true);
|
||||
});
|
||||
it('does NOT match aborts / unrelated errors', () => {
|
||||
expect(isRetryableConnectError({ name: 'AbortError', cause: { code: 'ABORT_ERR' } })).toBe(false);
|
||||
expect(isRetryableConnectError({ cause: { code: 'UND_ERR_HEADERS_TIMEOUT' } })).toBe(false);
|
||||
expect(isRetryableConnectError(new Error('plain'))).toBe(false);
|
||||
expect(isRetryableConnectError(undefined)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('createStreamingFetch — against a delayed server', () => {
|
||||
const ORIG = process.env.AI_STREAM_TIMEOUT_MS;
|
||||
let server: http.Server;
|
||||
let url: string;
|
||||
// The server waits before sending ANY byte (a long time-to-first-token). It is
|
||||
// > undici's ~1s timeout-timer granularity so a sub-second configured timeout
|
||||
// fires deterministically in the load-bearing test below.
|
||||
const DELAY = 1500;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = http.createServer((_req, res) => {
|
||||
setTimeout(() => {
|
||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
||||
res.end('ok');
|
||||
}, DELAY);
|
||||
});
|
||||
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
||||
const addr = server.address() as import('node:net').AddressInfo;
|
||||
url = `http://127.0.0.1:${addr.port}/`;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (ORIG === undefined) delete process.env.AI_STREAM_TIMEOUT_MS;
|
||||
else process.env.AI_STREAM_TIMEOUT_MS = ORIG;
|
||||
});
|
||||
|
||||
it('streams the delayed response at the default (generous) timeout', async () => {
|
||||
delete process.env.AI_STREAM_TIMEOUT_MS; // default 15 min >> DELAY
|
||||
const streamingFetch = createStreamingFetch();
|
||||
const res = await streamingFetch(url);
|
||||
expect(res.status).toBe(200);
|
||||
expect(await res.text()).toBe('ok');
|
||||
});
|
||||
|
||||
it('LOAD-BEARING: a sub-DELAY AI_STREAM_TIMEOUT_MS actually severs the response', async () => {
|
||||
// Proves the configured dispatcher is wired into the fetch: with the timeout
|
||||
// set below DELAY the call must reject with undici's headers-timeout. If the
|
||||
// dispatcher were lost (fallback to global fetch's 300s default), the 1.5s
|
||||
// response would slip through and this would NOT throw.
|
||||
process.env.AI_STREAM_TIMEOUT_MS = '500';
|
||||
const streamingFetch = createStreamingFetch();
|
||||
let caught: unknown;
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
await streamingFetch(url).then((r) => r.text());
|
||||
} catch (e) {
|
||||
caught = e;
|
||||
}
|
||||
// It rejected (a lost dispatcher -> global 300s default would NOT reject on a
|
||||
// 1.5s response) and it did so BEFORE the response would have arrived (DELAY).
|
||||
// Use `.name` (realm-safe) — undici's TypeError fails cross-realm instanceof.
|
||||
expect(caught).toBeDefined();
|
||||
expect((caught as Error)?.name).toBe('TypeError');
|
||||
expect(Date.now() - startedAt).toBeLessThan(DELAY);
|
||||
// When present, the undici cause is the headers timeout.
|
||||
const code = (caught as { cause?: { code?: string } })?.cause?.code;
|
||||
if (code) expect(code).toBe('UND_ERR_HEADERS_TIMEOUT');
|
||||
});
|
||||
});
|
||||
|
||||
describe('withPreResponseRetry', () => {
|
||||
// The retry is the OUTERMOST layer (over the dispatcher-bound streaming fetch),
|
||||
// matching ai.service's withPreResponseRetry(instrument(createStreamingFetch())).
|
||||
// PRE_RESPONSE_CONNECT_RETRIES is 2 -> at most 3 total attempts.
|
||||
const MAX_ATTEMPTS = 3;
|
||||
let server: http.Server;
|
||||
let url: string;
|
||||
let requests = 0;
|
||||
// 'first' resets only the first connection; 'all' resets every connection.
|
||||
let resetMode: 'first' | 'all' = 'first';
|
||||
|
||||
const retryingFetch = () => withPreResponseRetry(createStreamingFetch());
|
||||
|
||||
beforeAll(async () => {
|
||||
server = http.createServer((req, res) => {
|
||||
requests += 1;
|
||||
const shouldReset = resetMode === 'all' || requests === 1;
|
||||
if (shouldReset) {
|
||||
// Reset before any response byte (a poisoned/stale keep-alive socket).
|
||||
const sock = req.socket as import('node:net').Socket & {
|
||||
resetAndDestroy?: () => void;
|
||||
};
|
||||
if (typeof sock.resetAndDestroy === 'function') sock.resetAndDestroy();
|
||||
else sock.destroy();
|
||||
return;
|
||||
}
|
||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
||||
res.end('ok');
|
||||
});
|
||||
await new Promise<void>((resolve) => server.listen(0, '127.0.0.1', resolve));
|
||||
const addr = server.address() as import('node:net').AddressInfo;
|
||||
url = `http://127.0.0.1:${addr.port}/`;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
requests = 0;
|
||||
resetMode = 'first';
|
||||
});
|
||||
|
||||
it('retries a pre-response reset on a fresh connection and succeeds', async () => {
|
||||
resetMode = 'first';
|
||||
const res = await retryingFetch()(url);
|
||||
expect(res.status).toBe(200);
|
||||
expect(await res.text()).toBe('ok');
|
||||
// first request reset -> retry -> second request served.
|
||||
expect(requests).toBe(2);
|
||||
});
|
||||
|
||||
it('gives up after the retry bound and rethrows the original reset', async () => {
|
||||
resetMode = 'all'; // every attempt resets -> retries exhaust
|
||||
let caught: unknown;
|
||||
try {
|
||||
await retryingFetch()(url);
|
||||
} catch (e) {
|
||||
caught = e;
|
||||
}
|
||||
expect(caught).toBeDefined();
|
||||
// A retryable connection error reached the caller (not swallowed).
|
||||
expect(isRetryableConnectError(caught)).toBe(true);
|
||||
// Bounded: exactly PRE_RESPONSE_CONNECT_RETRIES + 1 attempts hit the server
|
||||
// (pins both the limit and that the final error propagates — guards an
|
||||
// off-by-one or an infinite loop).
|
||||
expect(requests).toBe(MAX_ATTEMPTS);
|
||||
});
|
||||
|
||||
it('does NOT retry an aborted request (no retry storm)', async () => {
|
||||
resetMode = 'all';
|
||||
const ctrl = new AbortController();
|
||||
ctrl.abort();
|
||||
await expect(
|
||||
retryingFetch()(url, { signal: ctrl.signal }),
|
||||
).rejects.toBeDefined();
|
||||
// Pre-aborted: the request never reached the server, so nothing was retried.
|
||||
expect(requests).toBe(0);
|
||||
});
|
||||
});
|
||||
156
apps/server/src/integrations/ai/ai-streaming-fetch.ts
Normal file
156
apps/server/src/integrations/ai/ai-streaming-fetch.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
import { Agent } from 'undici';
|
||||
|
||||
/**
|
||||
* Default SILENCE timeout for streaming AI calls (15 min). Generous, but FINITE.
|
||||
*
|
||||
* Node's global fetch (undici) defaults headersTimeout and bodyTimeout to
|
||||
* 300_000ms, which severed legitimate long agent turns mid-stream — surfacing as
|
||||
* "Lost connection to the AI provider" (#175): a late step with a huge context
|
||||
* pushes the model's time-to-first-token past 5 min, or a reasoning model pauses
|
||||
* >5 min between chunks. We do NOT disable the timeout (0) — that would let a
|
||||
* genuinely hung provider, with the client still connected, hang forever
|
||||
* (abortSignal only fires on client disconnect). Instead we raise it well above
|
||||
* any realistic gap while keeping it finite so a true hang is eventually broken.
|
||||
*
|
||||
* This bounds SILENCE (time-to-first-byte and the gap BETWEEN chunks), NOT total
|
||||
* turn duration — so an arbitrarily long turn that keeps streaming bytes is never
|
||||
* cut; only a stream that goes quiet for longer than this is treated as a hang.
|
||||
*/
|
||||
const DEFAULT_STREAM_TIMEOUT_MS = 900_000;
|
||||
|
||||
/**
|
||||
* Default keep-alive recycle window (10s). A pooled connection idle longer than
|
||||
* this is CLOSED rather than reused.
|
||||
*
|
||||
* Long agent turns leave gaps of tens of seconds between provider calls (one
|
||||
* call per step; a crawl/search tool runs in between). A NAT / reverse proxy /
|
||||
* conntrack in front of the deployment silently drops an idle connection after
|
||||
* its own timeout; undici, not knowing, then reuses that dead socket and the
|
||||
* next request fails PRE-RESPONSE with `read ECONNRESET` (#175 prod telemetry:
|
||||
* the resets correlate with idleSincePrevCall ~42s, while a direct path to the
|
||||
* provider does NOT reset). Recycling idle sockets well below such a drop window
|
||||
* means a long-gap call opens a fresh connection instead of reusing a stale one.
|
||||
* `keepAliveMaxTimeout` also caps a server-advertised keep-alive so the provider
|
||||
* cannot push the reuse window back up.
|
||||
*/
|
||||
const DEFAULT_STREAM_KEEPALIVE_MS = 10_000;
|
||||
|
||||
/**
|
||||
* How many times to retry a PRE-RESPONSE connection failure (a reset/timeout
|
||||
* before ANY response byte) on a fresh connection. Safe because `fetch()` only
|
||||
* rejects before the Response resolves — a started stream is never replayed.
|
||||
*/
|
||||
const PRE_RESPONSE_CONNECT_RETRIES = 2;
|
||||
|
||||
/** undici cause codes for a connection-level failure that occurred PRE-RESPONSE. */
|
||||
const RETRYABLE_CONNECT_CODES = new Set([
|
||||
'ECONNRESET',
|
||||
'ECONNREFUSED',
|
||||
'EPIPE',
|
||||
'ETIMEDOUT',
|
||||
'UND_ERR_SOCKET',
|
||||
'UND_ERR_CONNECT_TIMEOUT',
|
||||
]);
|
||||
|
||||
function positiveEnv(name: string, fallback: number): number {
|
||||
const raw = Number(process.env[name]);
|
||||
return Number.isFinite(raw) && raw > 0 ? raw : fallback;
|
||||
}
|
||||
|
||||
/**
|
||||
* The configured silence timeout (ms). Override with `AI_STREAM_TIMEOUT_MS`; a
|
||||
* missing/invalid/non-positive value falls back to {@link DEFAULT_STREAM_TIMEOUT_MS}.
|
||||
*/
|
||||
export function streamTimeoutMs(): number {
|
||||
return positiveEnv('AI_STREAM_TIMEOUT_MS', DEFAULT_STREAM_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
/** Keep-alive recycle window (ms). Override with `AI_STREAM_KEEPALIVE_MS`. */
|
||||
export function streamKeepAliveMs(): number {
|
||||
return positiveEnv('AI_STREAM_KEEPALIVE_MS', DEFAULT_STREAM_KEEPALIVE_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* undici `Agent` options for streaming AI traffic — the (generous, finite)
|
||||
* silence timeouts plus the keep-alive recycle window. Shared by the chat
|
||||
* provider fetch and the external-MCP dispatcher so they behave identically.
|
||||
*/
|
||||
export function streamingDispatcherOptions(): {
|
||||
headersTimeout: number;
|
||||
bodyTimeout: number;
|
||||
keepAliveTimeout: number;
|
||||
keepAliveMaxTimeout: number;
|
||||
} {
|
||||
const t = streamTimeoutMs();
|
||||
const ka = streamKeepAliveMs();
|
||||
return {
|
||||
headersTimeout: t,
|
||||
bodyTimeout: t,
|
||||
keepAliveTimeout: ka,
|
||||
keepAliveMaxTimeout: ka,
|
||||
};
|
||||
}
|
||||
|
||||
/** True for a connection-level error worth retrying on a fresh connection. */
|
||||
export function isRetryableConnectError(err: unknown): boolean {
|
||||
const e = err as { code?: string; cause?: { code?: string } } | undefined;
|
||||
const code = e?.cause?.code ?? e?.code;
|
||||
return typeof code === 'string' && RETRYABLE_CONNECT_CODES.has(code);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a `fetch` for long-lived streaming AI calls (the agent chat turn) backed
|
||||
* by a dedicated undici dispatcher (finite silence timeouts + keep-alive
|
||||
* recycling, #175). A single shared dispatcher is returned (callers hold it for
|
||||
* the service lifetime) so its connection pool is reused.
|
||||
*
|
||||
* This is the BASE transport — no retry. The chat path wraps it as
|
||||
* `withPreResponseRetry(createInstrumentedFetch(ctx, createStreamingFetch()))`
|
||||
* so the retry is the OUTERMOST layer and the instrumentation observes EVERY
|
||||
* attempt (a recovered reset is still logged — see withPreResponseRetry).
|
||||
*/
|
||||
export function createStreamingFetch(): typeof fetch {
|
||||
const dispatcher = new Agent(streamingDispatcherOptions());
|
||||
return ((input: Parameters<typeof fetch>[0], init?: RequestInit) =>
|
||||
fetch(input, {
|
||||
...(init ?? {}),
|
||||
// `dispatcher` is an undici-specific init field (not in the DOM
|
||||
// RequestInit type); Node's global fetch reads it. Cast to satisfy it.
|
||||
dispatcher,
|
||||
} as RequestInit & { dispatcher: Agent })) as typeof fetch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap a fetch so a PRE-RESPONSE connection reset (`baseFetch` rejects before the
|
||||
* Response resolves — so nothing has streamed) is retried a few times on a fresh
|
||||
* connection (#175). A poisoned keep-alive socket is destroyed by undici on the
|
||||
* reset, so the retry lands on a new connection. An abort (client disconnect) is
|
||||
* never retried.
|
||||
*
|
||||
* This is the OUTERMOST transport layer by design: composing it as
|
||||
* `withPreResponseRetry(instrumentedFetch)` means every attempt — including the
|
||||
* resets that the retry recovers from — flows through the instrumentation, so the
|
||||
* "PRE-RESPONSE FAILED ... ECONNRESET ... idleSincePrevCall" telemetry stays
|
||||
* visible precisely when the fix is working (and AI_STREAM_KEEPALIVE_MS can be
|
||||
* tuned from real data). A retry INSIDE the transport would hide it.
|
||||
*/
|
||||
export function withPreResponseRetry(baseFetch: typeof fetch): typeof fetch {
|
||||
return (async (input: Parameters<typeof fetch>[0], init?: RequestInit) => {
|
||||
for (let attempt = 0; ; attempt++) {
|
||||
try {
|
||||
return await baseFetch(input, init);
|
||||
} catch (err) {
|
||||
const aborted = init?.signal?.aborted === true;
|
||||
if (
|
||||
aborted ||
|
||||
attempt >= PRE_RESPONSE_CONNECT_RETRIES ||
|
||||
!isRetryableConnectError(err)
|
||||
) {
|
||||
throw err;
|
||||
}
|
||||
// Brief backoff before the fresh-connection retry.
|
||||
await new Promise((resolve) => setTimeout(resolve, 150 * (attempt + 1)));
|
||||
}
|
||||
}
|
||||
}) as typeof fetch;
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
// `.provider` alone cannot prove the openai-compatible factory was called with
|
||||
// `includeUsage: true` — a regression dropping it (which zeroes streamed token
|
||||
// usage / reasoning-token metadata) would still pass. So mock the factory and
|
||||
// assert the exact args. jest.mock is module-scoped, hence a dedicated file.
|
||||
|
||||
const mockCompatibleModel = { provider: 'openai-compatible.chat', modelId: 'm' };
|
||||
// jest allows `mock`-prefixed vars inside a jest.mock factory.
|
||||
const mockCreateOpenAICompatible = jest.fn(
|
||||
(_settings: unknown) => () => mockCompatibleModel,
|
||||
);
|
||||
|
||||
jest.mock('@ai-sdk/openai-compatible', () => ({
|
||||
createOpenAICompatible: (settings: unknown) =>
|
||||
mockCreateOpenAICompatible(settings),
|
||||
}));
|
||||
|
||||
import { AiService } from './ai.service';
|
||||
|
||||
describe('AiService.getChatModel openai-compatible factory args', () => {
|
||||
function serviceWith(chatApiStyle?: 'openai-compatible' | 'openai') {
|
||||
const aiSettings = {
|
||||
resolve: jest.fn().mockResolvedValue({
|
||||
driver: 'openai',
|
||||
chatModel: 'glm-5.2',
|
||||
apiKey: 'the-key',
|
||||
baseUrl: 'https://api.z.ai/v4',
|
||||
chatApiStyle,
|
||||
}),
|
||||
};
|
||||
return new AiService(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
aiSettings as any,
|
||||
{ find: jest.fn() } as never,
|
||||
{ decryptSecret: jest.fn() } as never,
|
||||
);
|
||||
}
|
||||
|
||||
beforeEach(() => mockCreateOpenAICompatible.mockClear());
|
||||
|
||||
it('passes includeUsage:true plus baseURL/apiKey/fetch (default style)', async () => {
|
||||
await serviceWith().getChatModel('ws-1'); // unset -> openai-compatible
|
||||
expect(mockCreateOpenAICompatible).toHaveBeenCalledTimes(1);
|
||||
expect(mockCreateOpenAICompatible).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
name: 'openai-compatible',
|
||||
baseURL: 'https://api.z.ai/v4',
|
||||
apiKey: 'the-key',
|
||||
includeUsage: true,
|
||||
fetch: expect.any(Function),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("does NOT use the openai-compatible factory for chatApiStyle 'openai'", async () => {
|
||||
await serviceWith('openai').getChatModel('ws-1');
|
||||
expect(mockCreateOpenAICompatible).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -285,3 +285,64 @@ describe('AiService.getChatModel role model override', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Chat provider selection by the EXPLICIT `chatApiStyle` (NOT inferred from
|
||||
* baseUrl): 'openai-compatible' (default) uses @ai-sdk/openai-compatible, which
|
||||
* maps streamed reasoning_content to reasoning parts; 'openai' uses the official
|
||||
* provider; and openai-compatible without a baseURL safely falls back to the
|
||||
* official provider (it has no default endpoint). Asserted via `.provider`.
|
||||
*/
|
||||
describe('AiService.getChatModel chatApiStyle provider selection', () => {
|
||||
function serviceWith(opts: {
|
||||
baseUrl?: string;
|
||||
chatApiStyle?: 'openai-compatible' | 'openai';
|
||||
}) {
|
||||
const aiSettings = {
|
||||
resolve: jest.fn().mockResolvedValue({
|
||||
driver: 'openai',
|
||||
chatModel: 'glm-5.2',
|
||||
apiKey: 'key',
|
||||
baseUrl: opts.baseUrl,
|
||||
chatApiStyle: opts.chatApiStyle,
|
||||
}),
|
||||
};
|
||||
return new AiService(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
aiSettings as any,
|
||||
{ find: jest.fn() } as never,
|
||||
{ decryptSecret: jest.fn() } as never,
|
||||
);
|
||||
}
|
||||
|
||||
const providerOf = async (svc: AiService) =>
|
||||
(
|
||||
(await svc.getChatModel('ws-1')) as { provider: string }
|
||||
).provider;
|
||||
|
||||
it("'openai-compatible' + baseURL -> openai-compatible provider", async () => {
|
||||
expect(
|
||||
await providerOf(
|
||||
serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai-compatible' }),
|
||||
),
|
||||
).toContain('openai-compatible');
|
||||
});
|
||||
|
||||
it("'openai' + baseURL -> official openai provider", async () => {
|
||||
expect(
|
||||
await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai' })),
|
||||
).toBe('openai.chat');
|
||||
});
|
||||
|
||||
it('unset + baseURL -> defaults to openai-compatible', async () => {
|
||||
expect(
|
||||
await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4' })),
|
||||
).toContain('openai-compatible');
|
||||
});
|
||||
|
||||
it("'openai-compatible' WITHOUT baseURL -> safe fallback to official openai", async () => {
|
||||
expect(
|
||||
await providerOf(serviceWith({ chatApiStyle: 'openai-compatible' })),
|
||||
).toBe('openai.chat');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
type LanguageModel,
|
||||
} from 'ai';
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
|
||||
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
||||
import { createOllama } from 'ai-sdk-ollama';
|
||||
import { AiSettingsService } from './ai-settings.service';
|
||||
@@ -14,6 +15,11 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
|
||||
import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';
|
||||
import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception';
|
||||
import { describeProviderError } from './ai-error.util';
|
||||
import { createInstrumentedFetch } from './ai-provider-http';
|
||||
import {
|
||||
createStreamingFetch,
|
||||
withPreResponseRetry,
|
||||
} from './ai-streaming-fetch';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { SecretBoxService } from '../crypto/secret-box';
|
||||
import { AiDriver } from './ai.types';
|
||||
@@ -43,6 +49,17 @@ export interface ChatModelOverride {
|
||||
export class AiService {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
|
||||
// Provider HTTP fetch for the chat path, layered so each transport concern is
|
||||
// observed (#175). Inside-out: the streaming fetch (finite silence timeouts +
|
||||
// keep-alive recycling) → provider-HTTP instrumentation (logs every attempt) →
|
||||
// pre-response connection-reset retry as the OUTERMOST layer. Retry-outer means
|
||||
// a reset the retry recovers from is still logged with its idle-gap, instead of
|
||||
// collapsing into a clean "OK". Held for the service lifetime to reuse the
|
||||
// streaming dispatcher's connection pool.
|
||||
private readonly aiProviderFetch = withPreResponseRetry(
|
||||
createInstrumentedFetch('AiService:provider-http', createStreamingFetch()),
|
||||
);
|
||||
|
||||
constructor(
|
||||
private readonly aiSettings: AiSettingsService,
|
||||
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
|
||||
@@ -83,6 +100,10 @@ export class AiService {
|
||||
|
||||
let apiKey = cfg.apiKey;
|
||||
let baseUrl = cfg.baseUrl;
|
||||
// Chat provider implementation, chosen EXPLICITLY by the admin (not inferred
|
||||
// from baseUrl). Unset → 'openai-compatible' so reasoning is surfaced by
|
||||
// default for this fork's openai+baseUrl setups.
|
||||
const chatApiStyle = cfg.chatApiStyle ?? 'openai-compatible';
|
||||
|
||||
// A driver override that differs from the workspace driver needs that
|
||||
// driver's own creds (the workspace driver's key would be wrong/absent).
|
||||
@@ -133,14 +154,41 @@ export class AiService {
|
||||
}
|
||||
|
||||
switch (driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints. Use Chat
|
||||
// Completions (/chat/completions) — the portable OpenAI-compatible
|
||||
// endpoint. The default callable createOpenAI(...)(model) targets the
|
||||
// Responses API (/responses), which OpenAI-compatible gateways
|
||||
// (OpenRouter, etc.) reject on multi-turn requests (history with
|
||||
// assistant messages) → 400.
|
||||
return createOpenAI({ apiKey, baseURL: baseUrl }).chat(chatModel);
|
||||
case 'openai': {
|
||||
// The provider implementation is chosen by the admin's `chatApiStyle`
|
||||
// (NOT inferred from baseUrl — a custom URL can front real OpenAI too).
|
||||
// Both branches hit Chat Completions (/chat/completions); the provider
|
||||
// fetch is the instrumented streaming fetch (finite-but-generous stream
|
||||
// timeouts, #175).
|
||||
//
|
||||
// 'openai-compatible' (default) maps the third-party provider's streamed
|
||||
// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, ...) — the
|
||||
// point of #175. It has no default endpoint, so it requires a baseURL;
|
||||
// when there is none (real OpenAI, or a role's cross-driver override that
|
||||
// cleared baseUrl) we fall back to the official provider.
|
||||
if (chatApiStyle === 'openai-compatible' && baseUrl) {
|
||||
return createOpenAICompatible({
|
||||
name: 'openai-compatible',
|
||||
apiKey,
|
||||
baseURL: baseUrl,
|
||||
// Keep streamed token usage (stream_options.include_usage): without
|
||||
// it @ai-sdk/openai-compatible omits usage, zeroing the live token
|
||||
// counter and reasoning-token metadata. The official provider always
|
||||
// sent it, so this preserves parity.
|
||||
includeUsage: true,
|
||||
fetch: this.aiProviderFetch,
|
||||
})(chatModel);
|
||||
}
|
||||
// Official @ai-sdk/openai: real-OpenAI reasoning-model request shaping;
|
||||
// `.chat()` targets Chat Completions (the default callable targets the
|
||||
// Responses API, which openai-compatible gateways 400 on multi-turn
|
||||
// history). In this fork baseUrl is normally set; undefined = real OpenAI.
|
||||
return createOpenAI({
|
||||
apiKey,
|
||||
baseURL: baseUrl,
|
||||
fetch: this.aiProviderFetch,
|
||||
}).chat(chatModel);
|
||||
}
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({ apiKey })(chatModel);
|
||||
case 'ollama':
|
||||
|
||||
@@ -16,6 +16,15 @@ export const AI_DRIVERS: AiDriver[] = ['openai', 'gemini', 'ollama'];
|
||||
export type SttApiStyle = 'multipart' | 'json';
|
||||
export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json'];
|
||||
|
||||
// Chat provider implementation for the `openai` driver. Chosen explicitly by the
|
||||
// admin (NOT inferred from baseUrl — a custom URL can front real OpenAI too).
|
||||
// 'openai-compatible' = @ai-sdk/openai-compatible: maps streamed
|
||||
// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, OpenRouter, ...).
|
||||
// 'openai' = official @ai-sdk/openai: real-OpenAI reasoning-model request shaping
|
||||
// (max_completion_tokens, the 'developer' role), no third-party reasoning map.
|
||||
export type ChatApiStyle = 'openai-compatible' | 'openai';
|
||||
export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai'];
|
||||
|
||||
/**
|
||||
* Non-secret provider settings persisted under `settings.ai.provider`.
|
||||
* The API key is intentionally absent here.
|
||||
@@ -23,6 +32,9 @@ export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json'];
|
||||
export interface AiProviderSettings {
|
||||
driver: AiDriver;
|
||||
chatModel: string;
|
||||
// Chat provider implementation for the `openai` driver. Unset → defaults to
|
||||
// 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle.
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
// Embedding-specific base URL. Falls back to `baseUrl` when empty/unset.
|
||||
@@ -45,6 +57,34 @@ export interface AiProviderSettings {
|
||||
publicShareAssistantRoleId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* The persisted, non-secret provider setting keys — the SINGLE source of truth
|
||||
* for which fields a settings update may write through to `settings.ai.provider`.
|
||||
* `satisfies readonly (keyof AiProviderSettings)[]` makes the compiler reject a
|
||||
* typo or a key that is not a real provider setting.
|
||||
*
|
||||
* The settings service consumes this directly. The generic workspace repo cannot
|
||||
* import AI types, so it keeps its own copy of the same keys, guarded by a parity
|
||||
* test against this constant (so any future drift fails in CI, not silently in
|
||||
* prod — a missing key there validates fine, passes the service, and is then
|
||||
* dropped at the SQL boundary with no error).
|
||||
*/
|
||||
export const PROVIDER_SETTINGS_KEYS = [
|
||||
'driver',
|
||||
'chatModel',
|
||||
'chatApiStyle',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
] as const satisfies readonly (keyof AiProviderSettings)[];
|
||||
|
||||
/**
|
||||
* Fully resolved provider config, including the decrypted API key for the
|
||||
* stored driver. Returned by `AiSettingsService.resolve`. The keys are held in
|
||||
@@ -76,6 +116,7 @@ export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
export interface MaskedAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
embeddingBaseUrl?: string;
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import { IsIn, IsOptional, IsString } from 'class-validator';
|
||||
import { AI_DRIVERS, AiDriver, STT_API_STYLES, SttApiStyle } from '../ai.types';
|
||||
import {
|
||||
AI_DRIVERS,
|
||||
AiDriver,
|
||||
CHAT_API_STYLES,
|
||||
ChatApiStyle,
|
||||
STT_API_STYLES,
|
||||
SttApiStyle,
|
||||
} from '../ai.types';
|
||||
|
||||
/**
|
||||
* Admin update payload for the workspace AI provider settings.
|
||||
@@ -18,6 +25,10 @@ export class UpdateAiSettingsDto {
|
||||
@IsString()
|
||||
chatModel?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsIn(CHAT_API_STYLES)
|
||||
chatApiStyle?: ChatApiStyle;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
embeddingModel?: string;
|
||||
|
||||
Reference in New Issue
Block a user