Files
gitmost/apps/server/src/integrations/ai/ai-settings.service.ts
claude_code 9b61024b95 feat(ai-chat): header badge shows current/max context, max from AI settings (#189)
The floating chat window's header badge flipped meaning — a live per-turn token
counter while streaming, the persisted context size at rest — so it "reset to 1"
on each prompt and conflated two different numbers. Replace it with a stable
"current / max" context badge (e.g. `572 / 200k`). The live "Thinking · N tokens"
inside the chat body stays; only the duplicate live counter is removed from the
header.

Max comes from a new admin setting "Context window (tokens)". The server resolves
it and attaches `maxContextTokens` to the completed assistant turn's metadata
(next to contextTokens), so the badge needs no client-side model resolution and
this survives public shares / per-role models.

Server:
- ai.types: chatContextWindow on AiProviderSettings + PROVIDER_SETTINGS_KEYS +
  ResolvedAiConfig + MaskedAiSettings.
- workspace.repo: chatContextWindow in AI_PROVIDER_SETTINGS_ALLOWED (parity).
- update-ai-settings.dto: @IsInt @Min(0) chatContextWindow.
- ai-settings.service: coerce the ::text-stored value to a positive int in
  resolve()/getMasked().
- ai-chat.service: flushAssistant writes metadata.maxContextTokens (>0); the
  completed turn passes resolved.chatContextWindow.

Client:
- ai-chat.types: maxContextTokens on the message-row metadata.
- ai-chat-window: read maxContextTokens; render "current [/ max]"; drop the
  liveTurnTokens state/branch and the onLiveTurnTokens prop; new tooltip.
- chat-thread: remove the live-turn-token throttle effect and plumbing.
- count-stream-tokens: drop the now-dead liveTurnTokens()/types; keep
  estimateTokens.
- settings: chatContextWindow on IAiSettings(+Update) + a NumberInput in the AI
  provider settings form.

i18n: add the badge/settings keys (en, ru); remove the two now-unused keys.
Tests: flushAssistant maxContextTokens, DTO validation, trim token tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-25 22:39:09 +03:00

390 lines
15 KiB
TypeScript

import { BadRequestException, Injectable } from '@nestjs/common';
import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueName, QueueJob } from '../queue/constants';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { SecretBoxService } from '../crypto/secret-box';
import {
AiDriver,
AiProviderSettings,
MaskedAiSettings,
ResolvedAiConfig,
SttApiStyle,
ChatApiStyle,
PROVIDER_SETTINGS_KEYS,
} from './ai.types';
/**
* Shape of the partial update accepted by `update`. Mirrors the validated
* controller DTO. `apiKey` / `embeddingApiKey` are write-only: undefined =
* leave, '' = clear, non-empty = encrypt + store (§6.4/§8).
*/
export interface UpdateAiSettingsInput {
driver?: AiDriver;
chatModel?: string;
// Max context window in tokens for the chat header badge. 0/empty = no limit.
chatContextWindow?: number;
chatApiStyle?: ChatApiStyle;
embeddingModel?: string;
baseUrl?: string;
embeddingBaseUrl?: string;
systemPrompt?: string;
apiKey?: string;
embeddingApiKey?: string;
sttModel?: string;
sttBaseUrl?: string;
sttApiStyle?: SttApiStyle;
// ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty = auto-detect.
sttLanguage?: string;
sttApiKey?: string;
publicShareChatModel?: string;
publicShareAssistantRoleId?: string;
}
/**
* Reads/writes the per-workspace AI provider config.
*
* Non-secret fields live in `settings.ai.provider`; the API key lives encrypted
* in `ai_provider_credentials` (per driver). The decrypted key is only ever
* returned by `resolve` (server-side use) and is NEVER logged or returned to a
* client (§8).
*/
@Injectable()
export class AiSettingsService {
constructor(
private readonly workspaceRepo: WorkspaceRepo,
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly pageRepo: PageRepo,
private readonly secretBox: SecretBoxService,
@InjectQueue(QueueName.AI_QUEUE) private readonly aiQueue: Queue,
) {}
/**
* Enqueue a full workspace RAG reindex (manual "Reindex now").
*
* Uses a stable per-workspace jobId so rapid re-triggers de-duplicate instead
* of stacking multiple full reindex passes. A prior non-active job with that
* id is removed first so a lingering completed/failed/waiting entry can never
* block a fresh reindex (BullMQ ignores add() when the jobId already exists).
* If a reindex is already running, remove() is a no-op (it leaves a
* locked/active job in place, returning 0 without throwing), and the add()
* below then de-duplicates against that still-present jobId — so the running
* pass is kept and no duplicate is started. The .catch only guards against
* transport/Redis errors.
*
* Also cancels any pending delayed WORKSPACE_DELETE_EMBEDDINGS job (scheduled
* when AI Search was disabled) so it cannot wipe the embeddings we are about
* to rebuild. The job no-ops if embeddings are unconfigured.
*/
async reindex(workspaceId: string): Promise<void> {
// A reindex means embeddings must persist: drop the delayed purge, if any.
await this.aiQueue
.remove(`ai-search-disabled-${workspaceId}`)
.catch(() => undefined);
const jobId = `ai-reindex-${workspaceId}`;
// Clear a prior non-active entry so a stale job can't block this reindex.
// A locked/active job is left in place (remove() no-ops) and the add() below
// de-duplicates against it, keeping the in-progress pass.
await this.aiQueue.remove(jobId).catch(() => undefined);
await this.aiQueue.add(
QueueJob.WORKSPACE_CREATE_EMBEDDINGS,
{ workspaceId },
{
jobId,
removeOnComplete: true,
removeOnFail: true,
},
);
}
/**
* Whether the anonymous public-share AI assistant is enabled for a workspace
* (single master toggle `settings.ai.publicShareAssistant`, default false).
* Used by the public `/api/shares/ai/stream` guardrail funnel: when off, the
* route 404s so the feature's existence is not revealed.
*/
async isPublicShareAssistantEnabled(workspaceId: string): Promise<boolean> {
const workspace = await this.workspaceRepo.findById(workspaceId);
const settings = (workspace?.settings ?? {}) as {
ai?: { publicShareAssistant?: boolean };
};
return settings?.ai?.publicShareAssistant === true;
}
/**
* Resolve the display name of the agent role acting as the public-share
* assistant's identity, so the anonymous widget can label messages with the
* persona name instead of the generic "AI agent". Returns null when no role
* is configured, or the referenced role is missing/disabled (built-in persona
* → the client falls back to "AI agent"). Mirrors the role resolution in
* PublicShareChatService.resolveShareRole.
*/
async resolvePublicShareAssistantName(
workspaceId: string,
): Promise<string | null> {
const resolved = await this.resolve(workspaceId);
const roleId = resolved?.publicShareAssistantRoleId;
if (!roleId) return null;
const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
if (!role || !role.enabled) return null;
const name = role.name?.trim();
return name ? name : null;
}
/** Read the stored non-secret provider settings for a workspace. */
private async readProvider(
workspaceId: string,
): Promise<Partial<AiProviderSettings>> {
const workspace = await this.workspaceRepo.findById(workspaceId);
const settings = (workspace?.settings ?? {}) as {
ai?: { provider?: Partial<AiProviderSettings> };
};
return settings?.ai?.provider ?? {};
}
/**
* Resolve the full config including the decrypted API key for the stored
* driver. Returns null when no driver is configured. Ollama needs no key.
* The key is never logged.
*/
async resolve(workspaceId: string): Promise<ResolvedAiConfig | null> {
const provider = await this.readProvider(workspaceId);
if (!provider.driver) return null;
// Provider values are stored as ::text (see workspace.repo.ts), so
// chatContextWindow arrives as a string here; parse it back to a positive
// integer or undefined.
const ctxWindow = Number(provider.chatContextWindow);
const config: ResolvedAiConfig = {
driver: provider.driver,
chatModel: provider.chatModel,
// Max context window for the chat header badge denominator. 0/unset = no
// limit.
chatContextWindow:
Number.isFinite(ctxWindow) && ctxWindow > 0
? Math.floor(ctxWindow)
: undefined,
// Plain passthrough; getChatModel defaults unset to 'openai-compatible'.
chatApiStyle: provider.chatApiStyle,
// Cheap model id for the anonymous public-share assistant; reuses the chat
// driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel.
publicShareChatModel: provider.publicShareChatModel,
// Agent-role id whose persona the public-share assistant adopts; empty/unset
// = built-in locked persona.
publicShareAssistantRoleId: provider.publicShareAssistantRoleId,
embeddingModel: provider.embeddingModel,
sttModel: provider.sttModel,
// Plain passthrough, no fallback; the transcribe path defaults unset to
// 'multipart' (current behavior).
sttApiStyle: provider.sttApiStyle,
// Plain passthrough; empty/unset = auto-detect at the transcribe path.
sttLanguage: provider.sttLanguage,
baseUrl: provider.baseUrl,
systemPrompt: provider.systemPrompt,
};
// Effective embedding base URL: the embedding-specific value, else the chat
// base URL. URL is non-secret and relevant for ollama too, so set it
// unconditionally.
config.embeddingBaseUrl = provider.embeddingBaseUrl || provider.baseUrl;
// Effective STT base URL: the STT-specific value, else the chat base URL.
// Set unconditionally, same rationale as embeddingBaseUrl.
config.sttBaseUrl = provider.sttBaseUrl || provider.baseUrl;
if (provider.driver !== 'ollama') {
const creds = await this.aiProviderCredentialsRepo.find(
workspaceId,
provider.driver,
);
if (creds?.apiKeyEnc) {
config.apiKey = this.secretBox.decryptSecret(creds.apiKeyEnc);
}
// Effective embedding key: the embedding-specific key, else the chat key.
config.embeddingApiKey = creds?.embeddingApiKeyEnc
? this.secretBox.decryptSecret(creds.embeddingApiKeyEnc)
: config.apiKey;
// Effective STT key: the STT-specific key, else the chat key.
config.sttApiKey = creds?.sttApiKeyEnc
? this.secretBox.decryptSecret(creds.sttApiKeyEnc)
: config.apiKey;
}
return config;
}
/**
* Masked settings safe for admin clients. NEVER includes any key (even
* encrypted); only `hasApiKey` / `hasEmbeddingApiKey` for the current driver.
* Returns the RAW stored `embeddingBaseUrl` (empty means "uses chat value");
* the fallback is applied only by `resolve`. Also reports RAG indexing
* coverage (`indexedPages`/`totalPages`) for the settings UI.
*/
async getMasked(workspaceId: string): Promise<MaskedAiSettings> {
const provider = await this.readProvider(workspaceId);
// Provider values are stored as ::text (see workspace.repo.ts), so
// chatContextWindow arrives as a string; coerce it to a positive integer or
// undefined so the client receives a real number.
const ctxWindow = Number(provider.chatContextWindow);
const chatContextWindow =
Number.isFinite(ctxWindow) && ctxWindow > 0
? Math.floor(ctxWindow)
: undefined;
let hasApiKey = false;
let hasEmbeddingApiKey = false;
let hasSttApiKey = false;
if (provider.driver) {
const creds = await this.aiProviderCredentialsRepo.find(
workspaceId,
provider.driver,
);
hasApiKey = !!creds?.apiKeyEnc;
hasEmbeddingApiKey = !!creds?.embeddingApiKeyEnc;
hasSttApiKey = !!creds?.sttApiKeyEnc;
}
// totalPages now counts only pages with embeddable content (non-empty text
// or already-stored embeddings), so empty/text-less pages don't keep the
// "Indexed N of M pages" bar below 100% forever.
const [indexedPages, totalPages] = await Promise.all([
this.pageEmbeddingRepo.countIndexedPages(workspaceId),
this.pageRepo.countEmbeddablePages(workspaceId),
]);
return {
driver: provider.driver,
chatModel: provider.chatModel,
chatContextWindow,
chatApiStyle: provider.chatApiStyle,
embeddingModel: provider.embeddingModel,
baseUrl: provider.baseUrl,
embeddingBaseUrl: provider.embeddingBaseUrl,
sttModel: provider.sttModel,
sttBaseUrl: provider.sttBaseUrl,
sttApiStyle: provider.sttApiStyle,
sttLanguage: provider.sttLanguage,
systemPrompt: provider.systemPrompt,
publicShareChatModel: provider.publicShareChatModel,
publicShareAssistantRoleId: provider.publicShareAssistantRoleId,
hasApiKey,
hasEmbeddingApiKey,
hasSttApiKey,
indexedPages,
totalPages,
};
}
/**
* Apply a partial update. Non-secret fields are persisted via
* `updateAiProviderSettings`; the chat / embedding API keys are handled
* separately, each write-only:
* - key === undefined → leave existing key untouched
* - key === '' → clear the key for the target driver
* - key non-empty → encrypt + upsert for the target driver
*
* Target driver for the keys = incoming dto.driver, else the stored driver.
* If any key is supplied but no driver can be determined → BadRequest.
*/
async update(
workspaceId: string,
dto: UpdateAiSettingsInput,
): Promise<MaskedAiSettings> {
const { apiKey, embeddingApiKey, sttApiKey, ...nonSecret } = dto;
// Persist non-secret provider fields (only those present in the partial).
const providerPatch: Partial<AiProviderSettings> = {};
// Single source of truth for the writable provider keys (see ai.types).
for (const key of PROVIDER_SETTINGS_KEYS) {
if (nonSecret[key] !== undefined) {
(providerPatch as Record<string, unknown>)[key] = nonSecret[key];
}
}
if (Object.keys(providerPatch).length > 0) {
await this.workspaceRepo.updateAiProviderSettings(
workspaceId,
providerPatch,
);
}
// Key handling (write-only). Both keys share the same target driver and the
// same "driver required" guard, resolved once.
if (
apiKey !== undefined ||
embeddingApiKey !== undefined ||
sttApiKey !== undefined
) {
const stored = await this.readProvider(workspaceId);
const targetDriver = dto.driver ?? stored.driver;
if (!targetDriver) {
throw new BadRequestException(
'Cannot set the API key without a driver; set the driver first',
);
}
// Chat key.
if (apiKey !== undefined) {
if (apiKey === '') {
await this.aiProviderCredentialsRepo.clearKey(
workspaceId,
targetDriver,
);
} else {
const enc = this.secretBox.encryptSecret(apiKey);
await this.aiProviderCredentialsRepo.upsert(
workspaceId,
targetDriver,
enc,
);
}
}
// Embedding key.
if (embeddingApiKey !== undefined) {
if (embeddingApiKey === '') {
await this.aiProviderCredentialsRepo.clearEmbeddingKey(
workspaceId,
targetDriver,
);
} else {
const enc = this.secretBox.encryptSecret(embeddingApiKey);
await this.aiProviderCredentialsRepo.upsertEmbeddingKey(
workspaceId,
targetDriver,
enc,
);
}
}
// STT key.
if (sttApiKey !== undefined) {
if (sttApiKey === '') {
await this.aiProviderCredentialsRepo.clearSttKey(
workspaceId,
targetDriver,
);
} else {
const enc = this.secretBox.encryptSecret(sttApiKey);
await this.aiProviderCredentialsRepo.upsertSttKey(
workspaceId,
targetDriver,
enc,
);
}
}
}
return this.getMasked(workspaceId);
}
}