fix(ai-chat): OpenAI Chat Completions for multi-turn + provider settings, stream UX & errors" -m "Live-stand fixes (OpenRouter / OpenAI-compatible):

- openai provider: use .chat() (Chat Completions) instead of the default callable (Responses API), which gateways reject on multi-turn -> 400. - updateAiProviderSettings: assemble settings.ai.provider via jsonb_build_object with ::text-cast bound params + jsonb_typeof self-heal (postgres.js was double-encoding it into an array; the ::text cast avoids 'could not determine data type of parameter'). - chat agent: drop the hard maxOutputTokens cap (truncated complex tool calls); keep a tiny cap only on the test-connection ping. - testConnection + chat stream: surface the real provider error (statusCode+message) to logs and the UI instead of generic masks; never log the API key. - chat UI: typing indicator, incremental streaming render, tool 'running' status, Stop. Also bundled (prior uncommitted ai-chat work): - history 'AI agent' provenance badge; vector RAG (pgvector image + page_embeddings + AI_QUEUE indexer + space-scoped semanticSearch); external MCP servers backend (@ai-sdk/mcp client, SSRF IP-pinning, encrypted headers, admin CRUD/Test); yjs duplicate-instance fix via pnpm patch (single CJS instance server-side).
2026-06-17 04:28:29 +03:00
parent 44b340dc1a
commit a4b7919753
44 changed files with 2633 additions and 122 deletions
--- a/apps/server/src/integrations/ai/ai-embedding-not-configured.exception.ts
+++ b/apps/server/src/integrations/ai/ai-embedding-not-configured.exception.ts
@@ -0,0 +1,13 @@
+import { ServiceUnavailableException } from '@nestjs/common';
+
+/**
+ * Thrown when no usable embedding config exists for the workspace (missing
+ * driver / embedding model / API key). Distinct from the chat variant so RAG
+ * callers (indexer / semanticSearch) can 503 or skip independently of chat
+ * being configured (§6.2/§6.7).
+ */
+export class AiEmbeddingNotConfiguredException extends ServiceUnavailableException {
+  constructor() {
+    super('AI embedding model not configured');
+  }
+}
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -1,10 +1,16 @@
-import { Injectable } from '@nestjs/common';
-import { generateText, type LanguageModel } from 'ai';
+import { Injectable, Logger } from '@nestjs/common';
+import {
+  embedMany,
+  generateText,
+  type EmbeddingModel,
+  type LanguageModel,
+} from 'ai';
 import { createOpenAI } from '@ai-sdk/openai';
 import { createGoogleGenerativeAI } from '@ai-sdk/google';
 import { createOllama } from 'ai-sdk-ollama';
 import { AiSettingsService } from './ai-settings.service';
 import { AiNotConfiguredException } from './ai-not-configured.exception';
+import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception';

 /**
 * Builds AI SDK language models from per-workspace config and runs cheap
@@ -16,6 +22,8 @@ import { AiNotConfiguredException } from './ai-not-configured.exception';
 */
@Injectable()
 export class AiService {
+  private readonly logger = new Logger(AiService.name);
+
  constructor(private readonly aiSettings: AiSettingsService) {}

  /**
@@ -34,8 +42,13 @@ export class AiService {

    switch (cfg.driver) {
      case 'openai':
-        // baseURL (when set) covers openai-compatible endpoints.
-        return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl })(
+        // baseURL (when set) covers openai-compatible endpoints. Use Chat
+        // Completions (/chat/completions) — the portable OpenAI-compatible
+        // endpoint. The default callable createOpenAI(...)(model) targets the
+        // Responses API (/responses), which OpenAI-compatible gateways
+        // (OpenRouter, etc.) reject on multi-turn requests (history with
+        // assistant messages) → 400.
+        return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl }).chat(
          cfg.chatModel,
        );
      case 'gemini':
@@ -48,34 +61,90 @@ export class AiService {
    }
  }

+  /**
+   * Resolve the workspace config and build the text-embedding model used by the
+   * RAG indexer / semanticSearch (§6.7 stage D). Built PER WORKSPACE on demand,
+   * same as getChatModel; the decrypted key is never logged.
+   *
+   * Throws AiEmbeddingNotConfiguredException (→ 503) when the driver,
+   * embeddingModel or (for non-ollama) the API key is missing, so RAG callers
+   * can 503 or skip independently of chat being configured.
+   */
+  async getEmbeddingModel(workspaceId: string): Promise<EmbeddingModel> {
+    const cfg = await this.aiSettings.resolve(workspaceId);
+    if (
+      !cfg?.driver ||
+      !cfg?.embeddingModel ||
+      (cfg.driver !== 'ollama' && !cfg.apiKey)
+    ) {
+      throw new AiEmbeddingNotConfiguredException();
+    }
+
+    switch (cfg.driver) {
+      case 'openai':
+        // baseURL (when set) covers openai-compatible endpoints.
+        return createOpenAI({
+          apiKey: cfg.apiKey,
+          baseURL: cfg.baseUrl,
+        }).textEmbeddingModel(cfg.embeddingModel);
+      case 'gemini':
+        return createGoogleGenerativeAI({
+          apiKey: cfg.apiKey,
+        }).textEmbeddingModel(cfg.embeddingModel);
+      case 'ollama':
+        // Ollama needs no API key (e.g. nomic-embed-text).
+        return createOllama({ baseURL: cfg.baseUrl }).textEmbeddingModel(
+          cfg.embeddingModel,
+        );
+      default:
+        throw new AiEmbeddingNotConfiguredException();
+    }
+  }
+
+  /**
+   * Embed a batch of texts with the workspace embedding model. Returns one
+   * vector per input, in the same order. Thin wrapper over the AI SDK's
+   * embedMany; never logs the key or the texts.
+   */
+  async embedTexts(workspaceId: string, texts: string[]): Promise<number[][]> {
+    if (texts.length === 0) return [];
+    const model = await this.getEmbeddingModel(workspaceId);
+    const { embeddings } = await embedMany({ model, values: texts });
+    return embeddings;
+  }
+
  /**
   * Cheap connectivity check. Builds the model and asks for a one-word reply.
-   * Never leaks the provider's raw error body or the key — only a short,
-   * generic message (§6.4/§8.3).
+   * On AiNotConfiguredException returns a generic "not configured" message; for
+   * any other failure surfaces the provider's own cause (e.g. AI SDK
+   * `AI_APICallError` -> `${statusCode}: ${message}`) so a 402 / wrong model /
+   * missing key is diagnosable, and logs the full error. The decrypted key is
+   * never logged or returned — AI SDK error messages/4xx bodies do not contain
+   * it, and the resolved config (which holds the key) is never dumped (§6.4/§8.3).
   */
  async testConnection(
    workspaceId: string,
  ): Promise<{ ok: true } | { ok: false; error: string }> {
-    let model: LanguageModel;
    try {
-      model = await this.getChatModel(workspaceId);
+      const model = await this.getChatModel(workspaceId);
+      // maxOutputTokens keeps the probe cheap and avoids providers (e.g.
+      // OpenRouter) reserving/charging for the model's full max-token budget,
+      // which would 402 on a key with limited credit.
+      await generateText({ model, prompt: 'ping', maxOutputTokens: 16 });
+      return { ok: true };
    } catch (err) {
      if (err instanceof AiNotConfiguredException) {
        return { ok: false, error: 'AI provider not configured' };
      }
-      // Defensive: do not surface internal error details.
-      return { ok: false, error: 'AI provider not configured' };
-    }
-
-    try {
-      await generateText({ model, prompt: 'ping' });
-      return { ok: true };
-    } catch {
-      // Do NOT include the provider's raw error (may echo the request/key).
-      return {
-        ok: false,
-        error: 'Failed to reach the AI provider. Check the settings and key.',
-      };
+      // Surface the real provider cause so failures are diagnosable, and log the
+      // full error. AI SDK errors expose statusCode/message (and responseBody);
+      // none of these carry the key. Do NOT log/return the resolved config.
+      this.logger.error('AI test connection failed', err as Error);
+      const e = err as { statusCode?: number; message?: string };
+      const msg = e?.statusCode
+        ? `${e.statusCode}: ${e.message}`
+        : (e?.message ?? 'Unknown error');
+      return { ok: false, error: msg };
    }
  }
 }