fix(ai-chat): OpenAI Chat Completions for multi-turn + provider settings, stream UX & errors" -m "Live-stand fixes (OpenRouter / OpenAI-compatible):

- openai provider: use .chat() (Chat Completions) instead of the default callable
  (Responses API), which gateways reject on multi-turn -> 400.
- updateAiProviderSettings: assemble settings.ai.provider via jsonb_build_object
  with ::text-cast bound params + jsonb_typeof self-heal (postgres.js was
  double-encoding it into an array; the ::text cast avoids 'could not determine
  data type of parameter').
- chat agent: drop the hard maxOutputTokens cap (truncated complex tool calls);
  keep a tiny cap only on the test-connection ping.
- testConnection + chat stream: surface the real provider error (statusCode+message)
  to logs and the UI instead of generic masks; never log the API key.
- chat UI: typing indicator, incremental streaming render, tool 'running' status, Stop.

Also bundled (prior uncommitted ai-chat work):
- history 'AI agent' provenance badge; vector RAG (pgvector image + page_embeddings
  + AI_QUEUE indexer + space-scoped semanticSearch); external MCP servers backend
  (@ai-sdk/mcp client, SSRF IP-pinning, encrypted headers, admin CRUD/Test);
  yjs duplicate-instance fix via pnpm patch (single CJS instance server-side).
This commit is contained in:
vvzvlad
2026-06-17 04:28:29 +03:00
parent 44b340dc1a
commit a4b7919753
44 changed files with 2633 additions and 122 deletions

View File

@@ -0,0 +1,13 @@
import { ServiceUnavailableException } from '@nestjs/common';
/**
* Thrown when no usable embedding config exists for the workspace (missing
* driver / embedding model / API key). Distinct from the chat variant so RAG
* callers (indexer / semanticSearch) can 503 or skip independently of chat
* being configured (§6.2/§6.7).
*/
export class AiEmbeddingNotConfiguredException extends ServiceUnavailableException {
constructor() {
super('AI embedding model not configured');
}
}

View File

@@ -1,10 +1,16 @@
import { Injectable } from '@nestjs/common';
import { generateText, type LanguageModel } from 'ai';
import { Injectable, Logger } from '@nestjs/common';
import {
embedMany,
generateText,
type EmbeddingModel,
type LanguageModel,
} from 'ai';
import { createOpenAI } from '@ai-sdk/openai';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { createOllama } from 'ai-sdk-ollama';
import { AiSettingsService } from './ai-settings.service';
import { AiNotConfiguredException } from './ai-not-configured.exception';
import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';
/**
* Builds AI SDK language models from per-workspace config and runs cheap
@@ -16,6 +22,8 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
*/
@Injectable()
export class AiService {
private readonly logger = new Logger(AiService.name);
constructor(private readonly aiSettings: AiSettingsService) {}
/**
@@ -34,8 +42,13 @@ export class AiService {
switch (cfg.driver) {
case 'openai':
// baseURL (when set) covers openai-compatible endpoints.
return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl })(
// baseURL (when set) covers openai-compatible endpoints. Use Chat
// Completions (/chat/completions) — the portable OpenAI-compatible
// endpoint. The default callable createOpenAI(...)(model) targets the
// Responses API (/responses), which OpenAI-compatible gateways
// (OpenRouter, etc.) reject on multi-turn requests (history with
// assistant messages) → 400.
return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl }).chat(
cfg.chatModel,
);
case 'gemini':
@@ -48,34 +61,90 @@ export class AiService {
}
}
/**
* Resolve the workspace config and build the text-embedding model used by the
* RAG indexer / semanticSearch (§6.7 stage D). Built PER WORKSPACE on demand,
* same as getChatModel; the decrypted key is never logged.
*
* Throws AiEmbeddingNotConfiguredException (→ 503) when the driver,
* embeddingModel or (for non-ollama) the API key is missing, so RAG callers
* can 503 or skip independently of chat being configured.
*/
async getEmbeddingModel(workspaceId: string): Promise<EmbeddingModel> {
const cfg = await this.aiSettings.resolve(workspaceId);
if (
!cfg?.driver ||
!cfg?.embeddingModel ||
(cfg.driver !== 'ollama' && !cfg.apiKey)
) {
throw new AiEmbeddingNotConfiguredException();
}
switch (cfg.driver) {
case 'openai':
// baseURL (when set) covers openai-compatible endpoints.
return createOpenAI({
apiKey: cfg.apiKey,
baseURL: cfg.baseUrl,
}).textEmbeddingModel(cfg.embeddingModel);
case 'gemini':
return createGoogleGenerativeAI({
apiKey: cfg.apiKey,
}).textEmbeddingModel(cfg.embeddingModel);
case 'ollama':
// Ollama needs no API key (e.g. nomic-embed-text).
return createOllama({ baseURL: cfg.baseUrl }).textEmbeddingModel(
cfg.embeddingModel,
);
default:
throw new AiEmbeddingNotConfiguredException();
}
}
/**
* Embed a batch of texts with the workspace embedding model. Returns one
* vector per input, in the same order. Thin wrapper over the AI SDK's
* embedMany; never logs the key or the texts.
*/
async embedTexts(workspaceId: string, texts: string[]): Promise<number[][]> {
if (texts.length === 0) return [];
const model = await this.getEmbeddingModel(workspaceId);
const { embeddings } = await embedMany({ model, values: texts });
return embeddings;
}
/**
* Cheap connectivity check. Builds the model and asks for a one-word reply.
* Never leaks the provider's raw error body or the key — only a short,
* generic message (§6.4/§8.3).
* On AiNotConfiguredException returns a generic "not configured" message; for
* any other failure surfaces the provider's own cause (e.g. AI SDK
* `AI_APICallError` -> `${statusCode}: ${message}`) so a 402 / wrong model /
* missing key is diagnosable, and logs the full error. The decrypted key is
* never logged or returned — AI SDK error messages/4xx bodies do not contain
* it, and the resolved config (which holds the key) is never dumped (§6.4/§8.3).
*/
async testConnection(
workspaceId: string,
): Promise<{ ok: true } | { ok: false; error: string }> {
let model: LanguageModel;
try {
model = await this.getChatModel(workspaceId);
const model = await this.getChatModel(workspaceId);
// maxOutputTokens keeps the probe cheap and avoids providers (e.g.
// OpenRouter) reserving/charging for the model's full max-token budget,
// which would 402 on a key with limited credit.
await generateText({ model, prompt: 'ping', maxOutputTokens: 16 });
return { ok: true };
} catch (err) {
if (err instanceof AiNotConfiguredException) {
return { ok: false, error: 'AI provider not configured' };
}
// Defensive: do not surface internal error details.
return { ok: false, error: 'AI provider not configured' };
}
try {
await generateText({ model, prompt: 'ping' });
return { ok: true };
} catch {
// Do NOT include the provider's raw error (may echo the request/key).
return {
ok: false,
error: 'Failed to reach the AI provider. Check the settings and key.',
};
// Surface the real provider cause so failures are diagnosable, and log the
// full error. AI SDK errors expose statusCode/message (and responseBody);
// none of these carry the key. Do NOT log/return the resolved config.
this.logger.error('AI test connection failed', err as Error);
const e = err as { statusCode?: number; message?: string };
const msg = e?.statusCode
? `${e.statusCode}: ${e.message}`
: (e?.message ?? 'Unknown error');
return { ok: false, error: msg };
}
}
}

View File

@@ -22,6 +22,17 @@ export interface IPageHistoryJob {
pageId: string;
}
/**
* AI_QUEUE payload for a content change that should trigger a RAG reindex
* (§6.7 stage D / §14[M1]). Produced by the collab persistence extension on
* `onStoreDocument` and by the page-delete path (the delete case carries the
* ids of pages whose embeddings must be purged).
*/
export interface IPageContentUpdatedJob {
pageIds: string[];
workspaceId: string;
}
export interface INotificationCreateJob {
userId: string;
workspaceId: string;