fix(ai-chat): OpenAI Chat Completions for multi-turn + provider settings, stream UX & errors" -m "Live-stand fixes (OpenRouter / OpenAI-compatible):
- openai provider: use .chat() (Chat Completions) instead of the default callable (Responses API), which gateways reject on multi-turn -> 400. - updateAiProviderSettings: assemble settings.ai.provider via jsonb_build_object with ::text-cast bound params + jsonb_typeof self-heal (postgres.js was double-encoding it into an array; the ::text cast avoids 'could not determine data type of parameter'). - chat agent: drop the hard maxOutputTokens cap (truncated complex tool calls); keep a tiny cap only on the test-connection ping. - testConnection + chat stream: surface the real provider error (statusCode+message) to logs and the UI instead of generic masks; never log the API key. - chat UI: typing indicator, incremental streaming render, tool 'running' status, Stop. Also bundled (prior uncommitted ai-chat work): - history 'AI agent' provenance badge; vector RAG (pgvector image + page_embeddings + AI_QUEUE indexer + space-scoped semanticSearch); external MCP servers backend (@ai-sdk/mcp client, SSRF IP-pinning, encrypted headers, admin CRUD/Test); yjs duplicate-instance fix via pnpm patch (single CJS instance server-side).
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
import { ServiceUnavailableException } from '@nestjs/common';
|
||||
|
||||
/**
|
||||
* Thrown when no usable embedding config exists for the workspace (missing
|
||||
* driver / embedding model / API key). Distinct from the chat variant so RAG
|
||||
* callers (indexer / semanticSearch) can 503 or skip independently of chat
|
||||
* being configured (§6.2/§6.7).
|
||||
*/
|
||||
export class AiEmbeddingNotConfiguredException extends ServiceUnavailableException {
|
||||
constructor() {
|
||||
super('AI embedding model not configured');
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,16 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { generateText, type LanguageModel } from 'ai';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import {
|
||||
embedMany,
|
||||
generateText,
|
||||
type EmbeddingModel,
|
||||
type LanguageModel,
|
||||
} from 'ai';
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
||||
import { createOllama } from 'ai-sdk-ollama';
|
||||
import { AiSettingsService } from './ai-settings.service';
|
||||
import { AiNotConfiguredException } from './ai-not-configured.exception';
|
||||
import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';
|
||||
|
||||
/**
|
||||
* Builds AI SDK language models from per-workspace config and runs cheap
|
||||
@@ -16,6 +22,8 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
|
||||
*/
|
||||
@Injectable()
|
||||
export class AiService {
|
||||
private readonly logger = new Logger(AiService.name);
|
||||
|
||||
constructor(private readonly aiSettings: AiSettingsService) {}
|
||||
|
||||
/**
|
||||
@@ -34,8 +42,13 @@ export class AiService {
|
||||
|
||||
switch (cfg.driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints.
|
||||
return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl })(
|
||||
// baseURL (when set) covers openai-compatible endpoints. Use Chat
|
||||
// Completions (/chat/completions) — the portable OpenAI-compatible
|
||||
// endpoint. The default callable createOpenAI(...)(model) targets the
|
||||
// Responses API (/responses), which OpenAI-compatible gateways
|
||||
// (OpenRouter, etc.) reject on multi-turn requests (history with
|
||||
// assistant messages) → 400.
|
||||
return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl }).chat(
|
||||
cfg.chatModel,
|
||||
);
|
||||
case 'gemini':
|
||||
@@ -48,34 +61,90 @@ export class AiService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the workspace config and build the text-embedding model used by the
|
||||
* RAG indexer / semanticSearch (§6.7 stage D). Built PER WORKSPACE on demand,
|
||||
* same as getChatModel; the decrypted key is never logged.
|
||||
*
|
||||
* Throws AiEmbeddingNotConfiguredException (→ 503) when the driver,
|
||||
* embeddingModel or (for non-ollama) the API key is missing, so RAG callers
|
||||
* can 503 or skip independently of chat being configured.
|
||||
*/
|
||||
async getEmbeddingModel(workspaceId: string): Promise<EmbeddingModel> {
|
||||
const cfg = await this.aiSettings.resolve(workspaceId);
|
||||
if (
|
||||
!cfg?.driver ||
|
||||
!cfg?.embeddingModel ||
|
||||
(cfg.driver !== 'ollama' && !cfg.apiKey)
|
||||
) {
|
||||
throw new AiEmbeddingNotConfiguredException();
|
||||
}
|
||||
|
||||
switch (cfg.driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints.
|
||||
return createOpenAI({
|
||||
apiKey: cfg.apiKey,
|
||||
baseURL: cfg.baseUrl,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({
|
||||
apiKey: cfg.apiKey,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
case 'ollama':
|
||||
// Ollama needs no API key (e.g. nomic-embed-text).
|
||||
return createOllama({ baseURL: cfg.baseUrl }).textEmbeddingModel(
|
||||
cfg.embeddingModel,
|
||||
);
|
||||
default:
|
||||
throw new AiEmbeddingNotConfiguredException();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed a batch of texts with the workspace embedding model. Returns one
|
||||
* vector per input, in the same order. Thin wrapper over the AI SDK's
|
||||
* embedMany; never logs the key or the texts.
|
||||
*/
|
||||
async embedTexts(workspaceId: string, texts: string[]): Promise<number[][]> {
|
||||
if (texts.length === 0) return [];
|
||||
const model = await this.getEmbeddingModel(workspaceId);
|
||||
const { embeddings } = await embedMany({ model, values: texts });
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cheap connectivity check. Builds the model and asks for a one-word reply.
|
||||
* Never leaks the provider's raw error body or the key — only a short,
|
||||
* generic message (§6.4/§8.3).
|
||||
* On AiNotConfiguredException returns a generic "not configured" message; for
|
||||
* any other failure surfaces the provider's own cause (e.g. AI SDK
|
||||
* `AI_APICallError` -> `${statusCode}: ${message}`) so a 402 / wrong model /
|
||||
* missing key is diagnosable, and logs the full error. The decrypted key is
|
||||
* never logged or returned — AI SDK error messages/4xx bodies do not contain
|
||||
* it, and the resolved config (which holds the key) is never dumped (§6.4/§8.3).
|
||||
*/
|
||||
async testConnection(
|
||||
workspaceId: string,
|
||||
): Promise<{ ok: true } | { ok: false; error: string }> {
|
||||
let model: LanguageModel;
|
||||
try {
|
||||
model = await this.getChatModel(workspaceId);
|
||||
const model = await this.getChatModel(workspaceId);
|
||||
// maxOutputTokens keeps the probe cheap and avoids providers (e.g.
|
||||
// OpenRouter) reserving/charging for the model's full max-token budget,
|
||||
// which would 402 on a key with limited credit.
|
||||
await generateText({ model, prompt: 'ping', maxOutputTokens: 16 });
|
||||
return { ok: true };
|
||||
} catch (err) {
|
||||
if (err instanceof AiNotConfiguredException) {
|
||||
return { ok: false, error: 'AI provider not configured' };
|
||||
}
|
||||
// Defensive: do not surface internal error details.
|
||||
return { ok: false, error: 'AI provider not configured' };
|
||||
}
|
||||
|
||||
try {
|
||||
await generateText({ model, prompt: 'ping' });
|
||||
return { ok: true };
|
||||
} catch {
|
||||
// Do NOT include the provider's raw error (may echo the request/key).
|
||||
return {
|
||||
ok: false,
|
||||
error: 'Failed to reach the AI provider. Check the settings and key.',
|
||||
};
|
||||
// Surface the real provider cause so failures are diagnosable, and log the
|
||||
// full error. AI SDK errors expose statusCode/message (and responseBody);
|
||||
// none of these carry the key. Do NOT log/return the resolved config.
|
||||
this.logger.error('AI test connection failed', err as Error);
|
||||
const e = err as { statusCode?: number; message?: string };
|
||||
const msg = e?.statusCode
|
||||
? `${e.statusCode}: ${e.message}`
|
||||
: (e?.message ?? 'Unknown error');
|
||||
return { ok: false, error: msg };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user