feat(ai/stt): add dictation language selection to STT settings
Add a per-workspace `sttLanguage` setting (ISO-639-1 hint; empty = auto-detect) and a searchable language picker in the Voice / STT settings card. The hint is forwarded to the transcription endpoint: - multipart path via the AI SDK `providerOptions.openai.language` - JSON (OpenRouter) path via a top-level `language` body field only when non-empty, so auto-detect behaves exactly as before. Threaded through the whole stack: ai.types, update DTO, AiSettingsService (resolve/getMasked/update), the workspace.repo SQL allowlist, the client ai-settings service types, and the provider-settings form. Adds en-US source keys and ru-RU translations. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -33,6 +33,8 @@ export interface UpdateAiSettingsInput {
|
||||
sttModel?: string;
|
||||
sttBaseUrl?: string;
|
||||
sttApiStyle?: SttApiStyle;
|
||||
// ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty = auto-detect.
|
||||
sttLanguage?: string;
|
||||
sttApiKey?: string;
|
||||
publicShareChatModel?: string;
|
||||
publicShareAssistantRoleId?: string;
|
||||
@@ -166,6 +168,8 @@ export class AiSettingsService {
|
||||
// Plain passthrough, no fallback; the transcribe path defaults unset to
|
||||
// 'multipart' (current behavior).
|
||||
sttApiStyle: provider.sttApiStyle,
|
||||
// Plain passthrough; empty/unset = auto-detect at the transcribe path.
|
||||
sttLanguage: provider.sttLanguage,
|
||||
baseUrl: provider.baseUrl,
|
||||
systemPrompt: provider.systemPrompt,
|
||||
};
|
||||
@@ -240,6 +244,7 @@ export class AiSettingsService {
|
||||
sttModel: provider.sttModel,
|
||||
sttBaseUrl: provider.sttBaseUrl,
|
||||
sttApiStyle: provider.sttApiStyle,
|
||||
sttLanguage: provider.sttLanguage,
|
||||
systemPrompt: provider.systemPrompt,
|
||||
publicShareChatModel: provider.publicShareChatModel,
|
||||
publicShareAssistantRoleId: provider.publicShareAssistantRoleId,
|
||||
@@ -279,6 +284,7 @@ export class AiSettingsService {
|
||||
'sttModel',
|
||||
'sttBaseUrl',
|
||||
'sttApiStyle',
|
||||
'sttLanguage',
|
||||
'systemPrompt',
|
||||
'publicShareChatModel',
|
||||
'publicShareAssistantRoleId',
|
||||
|
||||
@@ -212,12 +212,22 @@ export class AiService {
|
||||
const cfg = await this.aiSettings.resolve(workspaceId);
|
||||
if (!cfg?.sttModel) throw new AiSttNotConfiguredException();
|
||||
const baseURL = cfg.sttBaseUrl || cfg.baseUrl;
|
||||
// Trimmed language hint; empty/unset = auto-detect (never forward an empty
|
||||
// string to the provider, which would override auto-detect).
|
||||
const sttLanguage = cfg.sttLanguage?.trim() || undefined;
|
||||
|
||||
// Explicit, admin-chosen request encoding (no URL guessing). 'json' is the
|
||||
// OpenRouter style (JSON + base64 input_audio); everything else uses the
|
||||
// OpenAI-compatible multipart path via the AI SDK.
|
||||
if (cfg.sttApiStyle === 'json') {
|
||||
return this.transcribeJsonBase64(baseURL, cfg.sttApiKey, cfg.sttModel, audio, format);
|
||||
return this.transcribeJsonBase64(
|
||||
baseURL,
|
||||
cfg.sttApiKey,
|
||||
cfg.sttModel,
|
||||
audio,
|
||||
format,
|
||||
sttLanguage,
|
||||
);
|
||||
}
|
||||
|
||||
// Standard OpenAI-compatible multipart path (AI SDK). apiKey may be unused for
|
||||
@@ -226,14 +236,23 @@ export class AiService {
|
||||
apiKey: cfg.sttApiKey ?? 'unused',
|
||||
baseURL,
|
||||
}).transcription(cfg.sttModel);
|
||||
const { text } = await transcribe({ model, audio });
|
||||
const { text } = await transcribe({
|
||||
model,
|
||||
audio,
|
||||
// Forward the language hint only when set; the OpenAI transcription model
|
||||
// reads it from providerOptions.openai.language.
|
||||
...(sttLanguage
|
||||
? { providerOptions: { openai: { language: sttLanguage } } }
|
||||
: {}),
|
||||
});
|
||||
return text.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* JSON + base64 transcription body (OpenRouter-style). POSTs
|
||||
* { model, input_audio: { data, format } } to {baseURL}/audio/transcriptions
|
||||
* and returns { text }.
|
||||
* and returns { text }. The optional `language` ISO-639-1 hint is included as
|
||||
* a top-level body field only when set (empty/unset = auto-detect).
|
||||
*/
|
||||
private async transcribeJsonBase64(
|
||||
baseURL: string | undefined,
|
||||
@@ -241,6 +260,7 @@ export class AiService {
|
||||
model: string,
|
||||
audio: Uint8Array,
|
||||
format: string,
|
||||
language?: string,
|
||||
): Promise<string> {
|
||||
if (!baseURL) {
|
||||
throw new BadRequestException(
|
||||
@@ -256,6 +276,7 @@ export class AiService {
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
...(language ? { language } : {}),
|
||||
input_audio: {
|
||||
data: Buffer.from(audio).toString('base64'),
|
||||
format,
|
||||
|
||||
@@ -31,6 +31,8 @@ export interface AiProviderSettings {
|
||||
// STT-specific base URL. Falls back to baseUrl when empty/unset.
|
||||
sttBaseUrl?: string;
|
||||
sttApiStyle?: SttApiStyle;
|
||||
// ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty/unset = auto-detect.
|
||||
sttLanguage?: string;
|
||||
systemPrompt?: string;
|
||||
// Cheap chat model id used ONLY by the anonymous public-share assistant. The
|
||||
// driver / baseUrl / apiKey of the main chat provider are reused; this is the
|
||||
@@ -80,6 +82,8 @@ export interface MaskedAiSettings {
|
||||
sttModel?: string;
|
||||
sttBaseUrl?: string;
|
||||
sttApiStyle?: SttApiStyle;
|
||||
// ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty/unset = auto-detect.
|
||||
sttLanguage?: string;
|
||||
systemPrompt?: string;
|
||||
publicShareChatModel?: string;
|
||||
// Agent-role id whose persona the public-share assistant adopts; empty/unset
|
||||
|
||||
@@ -54,6 +54,11 @@ export class UpdateAiSettingsDto {
|
||||
@IsIn(STT_API_STYLES)
|
||||
sttApiStyle?: SttApiStyle;
|
||||
|
||||
// ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty = auto-detect.
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
sttLanguage?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
sttApiKey?: string;
|
||||
|
||||
Reference in New Issue
Block a user