feat(ai): separate base URL and API key for chat vs embedding model
Per-workspace AI provider config previously shared a single base URL and a single API key between the chat model and the embedding model. Add dedicated, optional embedding endpoint/token that fall back to the chat values when empty, preserving backward compatibility. - db: new migration adds nullable `embedding_api_key_enc` to `ai_provider_credentials`; chat key stays in `api_key_enc` - repo: add `upsertEmbeddingKey` / `clearEmbeddingKey` (on-conflict touches only its own column, so chat/embedding keys never overwrite) - ai-settings.service: store non-secret `embeddingBaseUrl`; resolve() applies fallback (embeddingBaseUrl || baseUrl; embedding key || chat key); getMasked() exposes raw `embeddingBaseUrl` + `hasEmbeddingApiKey`, never the key; update() handles the embedding key write-only - ai.service: getEmbeddingModel() builds openai/gemini/ollama with the embedding-specific URL/key; chat path unchanged - client: new "Embedding base URL" and "Embedding API key" fields with fallback hints and a clear-key action Requires running the DB migration on deploy.
This commit is contained in:
@@ -13,16 +13,18 @@ import {
|
||||
|
||||
/**
|
||||
* Shape of the partial update accepted by `update`. Mirrors the validated
|
||||
* controller DTO. `apiKey` is write-only: undefined = leave, '' = clear,
|
||||
* non-empty = encrypt + store (§6.4/§8).
|
||||
* controller DTO. `apiKey` / `embeddingApiKey` are write-only: undefined =
|
||||
* leave, '' = clear, non-empty = encrypt + store (§6.4/§8).
|
||||
*/
|
||||
export interface UpdateAiSettingsInput {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
embeddingBaseUrl?: string;
|
||||
systemPrompt?: string;
|
||||
apiKey?: string;
|
||||
embeddingApiKey?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -71,6 +73,11 @@ export class AiSettingsService {
|
||||
systemPrompt: provider.systemPrompt,
|
||||
};
|
||||
|
||||
// Effective embedding base URL: the embedding-specific value, else the chat
|
||||
// base URL. URL is non-secret and relevant for ollama too, so set it
|
||||
// unconditionally.
|
||||
config.embeddingBaseUrl = provider.embeddingBaseUrl || provider.baseUrl;
|
||||
|
||||
if (provider.driver !== 'ollama') {
|
||||
const creds = await this.aiProviderCredentialsRepo.find(
|
||||
workspaceId,
|
||||
@@ -79,26 +86,34 @@ export class AiSettingsService {
|
||||
if (creds?.apiKeyEnc) {
|
||||
config.apiKey = this.secretBox.decryptSecret(creds.apiKeyEnc);
|
||||
}
|
||||
// Effective embedding key: the embedding-specific key, else the chat key.
|
||||
config.embeddingApiKey = creds?.embeddingApiKeyEnc
|
||||
? this.secretBox.decryptSecret(creds.embeddingApiKeyEnc)
|
||||
: config.apiKey;
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Masked settings safe for admin clients. NEVER includes the key (even
|
||||
* encrypted); only `hasApiKey` for the current driver. Also reports RAG
|
||||
* indexing coverage (`indexedPages`/`totalPages`) for the settings UI.
|
||||
* Masked settings safe for admin clients. NEVER includes any key (even
|
||||
* encrypted); only `hasApiKey` / `hasEmbeddingApiKey` for the current driver.
|
||||
* Returns the RAW stored `embeddingBaseUrl` (empty means "uses chat value");
|
||||
* the fallback is applied only by `resolve`. Also reports RAG indexing
|
||||
* coverage (`indexedPages`/`totalPages`) for the settings UI.
|
||||
*/
|
||||
async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
|
||||
const provider = await this.readProvider(workspaceId);
|
||||
|
||||
let hasApiKey = false;
|
||||
let hasEmbeddingApiKey = false;
|
||||
if (provider.driver) {
|
||||
const creds = await this.aiProviderCredentialsRepo.find(
|
||||
workspaceId,
|
||||
provider.driver,
|
||||
);
|
||||
hasApiKey = !!creds?.apiKeyEnc;
|
||||
hasEmbeddingApiKey = !!creds?.embeddingApiKeyEnc;
|
||||
}
|
||||
|
||||
const [indexedPages, totalPages] = await Promise.all([
|
||||
@@ -111,8 +126,10 @@ export class AiSettingsService {
|
||||
chatModel: provider.chatModel,
|
||||
embeddingModel: provider.embeddingModel,
|
||||
baseUrl: provider.baseUrl,
|
||||
embeddingBaseUrl: provider.embeddingBaseUrl,
|
||||
systemPrompt: provider.systemPrompt,
|
||||
hasApiKey,
|
||||
hasEmbeddingApiKey,
|
||||
indexedPages,
|
||||
totalPages,
|
||||
};
|
||||
@@ -120,19 +137,20 @@ export class AiSettingsService {
|
||||
|
||||
/**
|
||||
* Apply a partial update. Non-secret fields are persisted via
|
||||
* `updateAiProviderSettings`; the API key is handled separately:
|
||||
* - apiKey === undefined → leave existing key untouched
|
||||
* - apiKey === '' → clear the key for the target driver
|
||||
* - apiKey non-empty → encrypt + upsert for the target driver
|
||||
* `updateAiProviderSettings`; the chat / embedding API keys are handled
|
||||
* separately, each write-only:
|
||||
* - key === undefined → leave existing key untouched
|
||||
* - key === '' → clear the key for the target driver
|
||||
* - key non-empty → encrypt + upsert for the target driver
|
||||
*
|
||||
* Target driver for the key = incoming dto.driver, else the stored driver.
|
||||
* If a key is supplied but no driver can be determined → BadRequest.
|
||||
* Target driver for the keys = incoming dto.driver, else the stored driver.
|
||||
* If any key is supplied but no driver can be determined → BadRequest.
|
||||
*/
|
||||
async update(
|
||||
workspaceId: string,
|
||||
dto: UpdateAiSettingsInput,
|
||||
): Promise<MaskedAiSettings> {
|
||||
const { apiKey, ...nonSecret } = dto;
|
||||
const { apiKey, embeddingApiKey, ...nonSecret } = dto;
|
||||
|
||||
// Persist non-secret provider fields (only those present in the partial).
|
||||
const providerPatch: Partial<AiProviderSettings> = {};
|
||||
@@ -141,6 +159,7 @@ export class AiSettingsService {
|
||||
'chatModel',
|
||||
'embeddingModel',
|
||||
'baseUrl',
|
||||
'embeddingBaseUrl',
|
||||
'systemPrompt',
|
||||
] as const) {
|
||||
if (nonSecret[key] !== undefined) {
|
||||
@@ -154,8 +173,9 @@ export class AiSettingsService {
|
||||
);
|
||||
}
|
||||
|
||||
// Key handling (write-only).
|
||||
if (apiKey !== undefined) {
|
||||
// Key handling (write-only). Both keys share the same target driver and the
|
||||
// same "driver required" guard, resolved once.
|
||||
if (apiKey !== undefined || embeddingApiKey !== undefined) {
|
||||
const stored = await this.readProvider(workspaceId);
|
||||
const targetDriver = dto.driver ?? stored.driver;
|
||||
if (!targetDriver) {
|
||||
@@ -164,15 +184,38 @@ export class AiSettingsService {
|
||||
);
|
||||
}
|
||||
|
||||
if (apiKey === '') {
|
||||
await this.aiProviderCredentialsRepo.clearKey(workspaceId, targetDriver);
|
||||
} else {
|
||||
const enc = this.secretBox.encryptSecret(apiKey);
|
||||
await this.aiProviderCredentialsRepo.upsert(
|
||||
workspaceId,
|
||||
targetDriver,
|
||||
enc,
|
||||
);
|
||||
// Chat key.
|
||||
if (apiKey !== undefined) {
|
||||
if (apiKey === '') {
|
||||
await this.aiProviderCredentialsRepo.clearKey(
|
||||
workspaceId,
|
||||
targetDriver,
|
||||
);
|
||||
} else {
|
||||
const enc = this.secretBox.encryptSecret(apiKey);
|
||||
await this.aiProviderCredentialsRepo.upsert(
|
||||
workspaceId,
|
||||
targetDriver,
|
||||
enc,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Embedding key.
|
||||
if (embeddingApiKey !== undefined) {
|
||||
if (embeddingApiKey === '') {
|
||||
await this.aiProviderCredentialsRepo.clearEmbeddingKey(
|
||||
workspaceId,
|
||||
targetDriver,
|
||||
);
|
||||
} else {
|
||||
const enc = this.secretBox.encryptSecret(embeddingApiKey);
|
||||
await this.aiProviderCredentialsRepo.upsertEmbeddingKey(
|
||||
workspaceId,
|
||||
targetDriver,
|
||||
enc,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -66,34 +66,38 @@ export class AiService {
|
||||
* RAG indexer / semanticSearch (§6.7 stage D). Built PER WORKSPACE on demand,
|
||||
* same as getChatModel; the decrypted key is never logged.
|
||||
*
|
||||
* Uses the embedding-specific endpoint/key (`embeddingBaseUrl` /
|
||||
* `embeddingApiKey`), which fall back to the chat values when unset (resolved
|
||||
* by AiSettingsService.resolve).
|
||||
*
|
||||
* Throws AiEmbeddingNotConfiguredException (→ 503) when the driver,
|
||||
* embeddingModel or (for non-ollama) the API key is missing, so RAG callers
|
||||
* can 503 or skip independently of chat being configured.
|
||||
* embeddingModel or (for non-ollama) the embedding API key is missing, so RAG
|
||||
* callers can 503 or skip independently of chat being configured.
|
||||
*/
|
||||
async getEmbeddingModel(workspaceId: string): Promise<EmbeddingModel> {
|
||||
const cfg = await this.aiSettings.resolve(workspaceId);
|
||||
if (
|
||||
!cfg?.driver ||
|
||||
!cfg?.embeddingModel ||
|
||||
(cfg.driver !== 'ollama' && !cfg.apiKey)
|
||||
(cfg.driver !== 'ollama' && !cfg.embeddingApiKey)
|
||||
) {
|
||||
throw new AiEmbeddingNotConfiguredException();
|
||||
}
|
||||
|
||||
switch (cfg.driver) {
|
||||
case 'openai':
|
||||
// baseURL (when set) covers openai-compatible endpoints.
|
||||
// embeddingBaseUrl (when set) covers openai-compatible endpoints.
|
||||
return createOpenAI({
|
||||
apiKey: cfg.apiKey,
|
||||
baseURL: cfg.baseUrl,
|
||||
apiKey: cfg.embeddingApiKey,
|
||||
baseURL: cfg.embeddingBaseUrl,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
case 'gemini':
|
||||
return createGoogleGenerativeAI({
|
||||
apiKey: cfg.apiKey,
|
||||
apiKey: cfg.embeddingApiKey,
|
||||
}).textEmbeddingModel(cfg.embeddingModel);
|
||||
case 'ollama':
|
||||
// Ollama needs no API key (e.g. nomic-embed-text).
|
||||
return createOllama({ baseURL: cfg.baseUrl }).textEmbeddingModel(
|
||||
return createOllama({ baseURL: cfg.embeddingBaseUrl }).textEmbeddingModel(
|
||||
cfg.embeddingModel,
|
||||
);
|
||||
default:
|
||||
|
||||
@@ -19,31 +19,40 @@ export interface AiProviderSettings {
|
||||
chatModel: string;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
// Embedding-specific base URL. Falls back to `baseUrl` when empty/unset.
|
||||
embeddingBaseUrl?: string;
|
||||
systemPrompt?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fully resolved provider config, including the decrypted API key for the
|
||||
* stored driver. Returned by `AiSettingsService.resolve`. The key is held in
|
||||
* memory only while building the provider and is never logged.
|
||||
* stored driver. Returned by `AiSettingsService.resolve`. The keys are held in
|
||||
* memory only while building the provider and are never logged.
|
||||
*
|
||||
* `embeddingBaseUrl` / `embeddingApiKey` are the embedding-specific endpoint and
|
||||
* key, already resolved with the chat-value fallback applied by `resolve`.
|
||||
*/
|
||||
export interface ResolvedAiConfig extends Partial<AiProviderSettings> {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
apiKey?: string;
|
||||
embeddingApiKey?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Masked provider settings safe to return to admin clients. NEVER includes the
|
||||
* API key (not even encrypted); only a `hasApiKey` boolean.
|
||||
* Masked provider settings safe to return to admin clients. NEVER includes any
|
||||
* API key (not even encrypted); only `hasApiKey` / `hasEmbeddingApiKey` booleans.
|
||||
* `embeddingBaseUrl` reflects the RAW stored value (empty means "uses chat value").
|
||||
*/
|
||||
export interface MaskedAiSettings {
|
||||
driver?: AiDriver;
|
||||
chatModel?: string;
|
||||
embeddingModel?: string;
|
||||
baseUrl?: string;
|
||||
embeddingBaseUrl?: string;
|
||||
systemPrompt?: string;
|
||||
hasApiKey: boolean;
|
||||
hasEmbeddingApiKey: boolean;
|
||||
// RAG indexing coverage for the settings UI.
|
||||
indexedPages: number;
|
||||
totalPages: number;
|
||||
|
||||
@@ -4,9 +4,10 @@ import { AI_DRIVERS, AiDriver } from '../ai.types';
|
||||
/**
|
||||
* Admin update payload for the workspace AI provider settings.
|
||||
*
|
||||
* `apiKey` is write-only (§8.2): provided → stored encrypted, '' → cleared,
|
||||
* absent → left untouched. It is NEVER returned by any endpoint. The global
|
||||
* ValidationPipe runs with `whitelist: true`, so unknown fields are stripped.
|
||||
* `apiKey` / `embeddingApiKey` are write-only (§8.2): provided → stored
|
||||
* encrypted, '' → cleared, absent → left untouched. They are NEVER returned by
|
||||
* any endpoint. The global ValidationPipe runs with `whitelist: true`, so
|
||||
* unknown fields are stripped.
|
||||
*/
|
||||
export class UpdateAiSettingsDto {
|
||||
@IsOptional()
|
||||
@@ -25,6 +26,10 @@ export class UpdateAiSettingsDto {
|
||||
@IsString()
|
||||
baseUrl?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
embeddingBaseUrl?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
systemPrompt?: string;
|
||||
@@ -32,4 +37,8 @@ export class UpdateAiSettingsDto {
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
apiKey?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
embeddingApiKey?: string;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user