feat(ai): separate base URL and API key for chat vs embedding model

Per-workspace AI provider config previously shared a single base URL and a single API key between the chat model and the embedding model. Add dedicated, optional embedding endpoint/token that fall back to the chat values when empty, preserving backward compatibility. - db: new migration adds nullable `embedding_api_key_enc` to `ai_provider_credentials`; chat key stays in `api_key_enc` - repo: add `upsertEmbeddingKey` / `clearEmbeddingKey` (on-conflict touches only its own column, so chat/embedding keys never overwrite) - ai-settings.service: store non-secret `embeddingBaseUrl`; resolve() applies fallback (embeddingBaseUrl || baseUrl; embedding key || chat key); getMasked() exposes raw `embeddingBaseUrl` + `hasEmbeddingApiKey`, never the key; update() handles the embedding key write-only - ai.service: getEmbeddingModel() builds openai/gemini/ollama with the embedding-specific URL/key; chat path unchanged - client: new "Embedding base URL" and "Embedding API key" fields with fallback hints and a clear-key action Requires running the DB migration on deploy.
2026-06-18 01:33:45 +03:00
parent 334a50f003
commit a7f244053b
10 changed files with 245 additions and 47 deletions
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -66,34 +66,38 @@ export class AiService {
   * RAG indexer / semanticSearch (§6.7 stage D). Built PER WORKSPACE on demand,
   * same as getChatModel; the decrypted key is never logged.
   *
+   * Uses the embedding-specific endpoint/key (`embeddingBaseUrl` /
+   * `embeddingApiKey`), which fall back to the chat values when unset (resolved
+   * by AiSettingsService.resolve).
+   *
   * Throws AiEmbeddingNotConfiguredException (→ 503) when the driver,
-   * embeddingModel or (for non-ollama) the API key is missing, so RAG callers
-   * can 503 or skip independently of chat being configured.
+   * embeddingModel or (for non-ollama) the embedding API key is missing, so RAG
+   * callers can 503 or skip independently of chat being configured.
   */
  async getEmbeddingModel(workspaceId: string): Promise<EmbeddingModel> {
    const cfg = await this.aiSettings.resolve(workspaceId);
    if (
      !cfg?.driver ||
      !cfg?.embeddingModel ||
-      (cfg.driver !== 'ollama' && !cfg.apiKey)
+      (cfg.driver !== 'ollama' && !cfg.embeddingApiKey)
    ) {
      throw new AiEmbeddingNotConfiguredException();
    }

    switch (cfg.driver) {
      case 'openai':
-        // baseURL (when set) covers openai-compatible endpoints.
+        // embeddingBaseUrl (when set) covers openai-compatible endpoints.
        return createOpenAI({
-          apiKey: cfg.apiKey,
-          baseURL: cfg.baseUrl,
+          apiKey: cfg.embeddingApiKey,
+          baseURL: cfg.embeddingBaseUrl,
        }).textEmbeddingModel(cfg.embeddingModel);
      case 'gemini':
        return createGoogleGenerativeAI({
-          apiKey: cfg.apiKey,
+          apiKey: cfg.embeddingApiKey,
        }).textEmbeddingModel(cfg.embeddingModel);
      case 'ollama':
        // Ollama needs no API key (e.g. nomic-embed-text).
-        return createOllama({ baseURL: cfg.baseUrl }).textEmbeddingModel(
+        return createOllama({ baseURL: cfg.embeddingBaseUrl }).textEmbeddingModel(
          cfg.embeddingModel,
        );
      default: