feat(ai): server-side voice dictation (STT) with mic in chat and editor
Add push-to-talk voice dictation that transcribes recorded audio on the server via the workspace's OpenAI-compatible AI provider (Whisper / gpt-4o-transcribe / self-hosted whisper), then inserts the text. Backend: - New `stt_api_key_enc` column + migration; STT creds parity with chat/ embeddings (sttModel/sttBaseUrl/sttApiKey, write-only key, fallbacks to chat baseUrl/key). Both provider whitelists updated (service + repo). - AiService.getTranscriptionModel + AiTranscriptionService. - Gated POST /ai-chat/transcribe (dictation flag → 403, JWT + workspace scope + throttle, 25MB cap, MIME whitelist, never logs audio/key). - New `settings.ai.dictation` workspace flag (DTO + service + audit). Frontend: - Wire up the Voice/STT settings card (model/base URL/key) and the Voice-dictation toggle. - New `features/dictation`: useDictation (MediaRecorder state machine), MicButton, transcribe service; integrated into the chat composer and a new editor-toolbar dictation group, both gated by ai.dictation.
This commit is contained in:
@@ -0,0 +1,18 @@
|
||||
import { type Kysely } from 'kysely';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
// Encrypted, STT-specific provider key. Separate from `api_key_enc`
|
||||
// (the chat key) so the transcription model can use a different token.
|
||||
// When NULL, the STT model falls back to `api_key_enc`.
|
||||
await db.schema
|
||||
.alterTable('ai_provider_credentials')
|
||||
.addColumn('stt_api_key_enc', 'text', (col) => col)
|
||||
.execute();
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await db.schema
|
||||
.alterTable('ai_provider_credentials')
|
||||
.dropColumn('stt_api_key_enc')
|
||||
.execute();
|
||||
}
|
||||
@@ -98,4 +98,42 @@ export class AiProviderCredentialsRepo {
|
||||
.where('driver', '=', driver)
|
||||
.execute();
|
||||
}
|
||||
|
||||
// Upsert the STT-specific encrypted key. If no row exists yet this inserts one
|
||||
// with `apiKeyEnc` left null (the column is nullable). On conflict only
|
||||
// `sttApiKeyEnc` / `updatedAt` are touched, so the chat & embedding keys are kept.
|
||||
async upsertSttKey(
|
||||
workspaceId: string,
|
||||
driver: string,
|
||||
sttApiKeyEnc: string,
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<AiProviderCredentials> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
return db
|
||||
.insertInto('aiProviderCredentials')
|
||||
.values({ workspaceId, driver, sttApiKeyEnc })
|
||||
.onConflict((oc) =>
|
||||
oc.columns(['workspaceId', 'driver']).doUpdateSet({
|
||||
sttApiKeyEnc,
|
||||
updatedAt: new Date(),
|
||||
}),
|
||||
)
|
||||
.returningAll()
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
// Clear only the STT-specific key; the chat & embedding keys are kept.
|
||||
async clearSttKey(
|
||||
workspaceId: string,
|
||||
driver: string,
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<void> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
await db
|
||||
.updateTable('aiProviderCredentials')
|
||||
.set({ sttApiKeyEnc: null, updatedAt: new Date() })
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.where('driver', '=', driver)
|
||||
.execute();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,7 +239,7 @@ export class WorkspaceRepo {
|
||||
// is a real jsonb object, never a double-encoded string. The CASE self-heals
|
||||
// workspaces whose settings.ai.provider was previously corrupted into an
|
||||
// array/string.
|
||||
const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'systemPrompt'];
|
||||
const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'systemPrompt'];
|
||||
const entries = Object.entries(provider).filter(
|
||||
([k, v]) => v !== undefined && ALLOWED.includes(k),
|
||||
);
|
||||
|
||||
@@ -14,6 +14,8 @@ export interface AiProviderCredentials {
|
||||
apiKeyEnc: string | null;
|
||||
// Encrypted, embedding-specific provider key. Falls back to apiKeyEnc when null.
|
||||
embeddingApiKeyEnc: string | null;
|
||||
// Encrypted, STT-specific provider key. Falls back to apiKeyEnc when null.
|
||||
sttApiKeyEnc: string | null;
|
||||
createdAt: Generated<Timestamp>;
|
||||
updatedAt: Generated<Timestamp>;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user