diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json
index fc39a8d9..f1dac8c2 100644
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -1221,6 +1221,9 @@
"How transcription requests are sent to the endpoint": "How transcription requests are sent to the endpoint",
"OpenAI-compatible (multipart/form-data)": "OpenAI-compatible (multipart/form-data)",
"OpenRouter (JSON, base64 audio)": "OpenRouter (JSON, base64 audio)",
+ "Dictation language": "Dictation language",
+ "Auto-detect": "Auto-detect",
+ "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.": "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.",
"Agent role": "Agent role",
"Universal assistant": "Universal assistant",
"Add role": "Add role",
diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json
index 73a68aa4..6bb76aad 100644
--- a/apps/client/public/locales/ru-RU/translation.json
+++ b/apps/client/public/locales/ru-RU/translation.json
@@ -1123,5 +1123,8 @@
"Added {{name}} to favorites": "{{name}} добавлено в избранное",
"Removed {{name}} from favorites": "{{name}} удалено из избранного",
"Page menu for {{name}}": "Меню страницы для {{name}}",
- "Create subpage of {{name}}": "Создать подстраницу для {{name}}"
+ "Create subpage of {{name}}": "Создать подстраницу для {{name}}",
+ "Dictation language": "Язык диктовки",
+ "Auto-detect": "Автоопределение",
+ "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.": "Подсказка языка речи для модели транскрипции. «Автоопределение» оставляет выбор за моделью."
}
diff --git a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
index 4726a0ef..63500797 100644
--- a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
+++ b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
@@ -42,6 +42,40 @@ import { useAiRolesQuery } from "@/features/ai-chat/queries/ai-chat-query.ts";
import { IAiRole } from "@/features/ai-chat/types/ai-chat.types.ts";
import AiMcpServers from "./ai-mcp-servers.tsx";
+// Curated ISO-639-1 dictation languages for the STT card. The empty-value
+// "Auto-detect" entry is prepended in render (it needs translation). Values
+// are sent verbatim to the transcription model as the language hint.
+const STT_LANGUAGE_OPTIONS: { value: string; label: string }[] = [
+ { value: "en", label: "English" },
+ { value: "ru", label: "Russian — Русский" },
+ { value: "uk", label: "Ukrainian — Українська" },
+ { value: "de", label: "German — Deutsch" },
+ { value: "fr", label: "French — Français" },
+ { value: "es", label: "Spanish — Español" },
+ { value: "it", label: "Italian — Italiano" },
+ { value: "pt", label: "Portuguese — Português" },
+ { value: "nl", label: "Dutch — Nederlands" },
+ { value: "pl", label: "Polish — Polski" },
+ { value: "tr", label: "Turkish — Türkçe" },
+ { value: "cs", label: "Czech — Čeština" },
+ { value: "sv", label: "Swedish — Svenska" },
+ { value: "fi", label: "Finnish — Suomi" },
+ { value: "da", label: "Danish — Dansk" },
+ { value: "no", label: "Norwegian — Norsk" },
+ { value: "ro", label: "Romanian — Română" },
+ { value: "hu", label: "Hungarian — Magyar" },
+ { value: "el", label: "Greek — Ελληνικά" },
+ { value: "he", label: "Hebrew — עברית" },
+ { value: "ar", label: "Arabic — العربية" },
+ { value: "hi", label: "Hindi — हिन्दी" },
+ { value: "id", label: "Indonesian — Bahasa Indonesia" },
+ { value: "vi", label: "Vietnamese — Tiếng Việt" },
+ { value: "th", label: "Thai — ไทย" },
+ { value: "ja", label: "Japanese — 日本語" },
+ { value: "ko", label: "Korean — 한국어" },
+ { value: "zh", label: "Chinese — 中文" },
+];
+
// No driver field: every endpoint is OpenAI-compatible, so the form carries only
// the user-editable fields. `apiKey` / `embeddingApiKey` are write-only buffers
// (empty means "leave unchanged" unless explicitly cleared).
@@ -63,6 +97,8 @@ const formSchema = z.object({
sttModel: z.string(),
sttBaseUrl: z.string(),
sttApiStyle: z.enum(["multipart", "json"]),
+ // ISO-639-1 dictation language; empty = auto-detect.
+ sttLanguage: z.string(),
sttApiKey: z.string(),
});
@@ -233,6 +269,7 @@ export default function AiProviderSettings() {
sttModel: "",
sttBaseUrl: "",
sttApiStyle: "multipart" as SttApiStyle,
+ sttLanguage: "",
sttApiKey: "",
},
});
@@ -254,6 +291,7 @@ export default function AiProviderSettings() {
sttModel: settings.sttModel ?? "",
sttBaseUrl: settings.sttBaseUrl ?? "",
sttApiStyle: settings.sttApiStyle ?? "multipart",
+ sttLanguage: settings.sttLanguage ?? "",
sttApiKey: "",
});
form.resetDirty();
@@ -288,6 +326,7 @@ export default function AiProviderSettings() {
sttModel: values.sttModel,
sttBaseUrl: values.sttBaseUrl,
sttApiStyle: values.sttApiStyle,
+ sttLanguage: values.sttLanguage,
};
// Key semantics (never send the stored key back) — see resolveKeyField:
@@ -923,6 +962,22 @@ export default function AiProviderSettings() {
{...form.getInputProps("sttApiStyle")}
/>
+
+
v !== undefined && ALLOWED.includes(k),
);
diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts
index 6dafe127..e556c0d0 100644
--- a/apps/server/src/integrations/ai/ai-settings.service.ts
+++ b/apps/server/src/integrations/ai/ai-settings.service.ts
@@ -33,6 +33,8 @@ export interface UpdateAiSettingsInput {
sttModel?: string;
sttBaseUrl?: string;
sttApiStyle?: SttApiStyle;
+ // ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty = auto-detect.
+ sttLanguage?: string;
sttApiKey?: string;
publicShareChatModel?: string;
publicShareAssistantRoleId?: string;
@@ -166,6 +168,8 @@ export class AiSettingsService {
// Plain passthrough, no fallback; the transcribe path defaults unset to
// 'multipart' (current behavior).
sttApiStyle: provider.sttApiStyle,
+ // Plain passthrough; empty/unset = auto-detect at the transcribe path.
+ sttLanguage: provider.sttLanguage,
baseUrl: provider.baseUrl,
systemPrompt: provider.systemPrompt,
};
@@ -240,6 +244,7 @@ export class AiSettingsService {
sttModel: provider.sttModel,
sttBaseUrl: provider.sttBaseUrl,
sttApiStyle: provider.sttApiStyle,
+ sttLanguage: provider.sttLanguage,
systemPrompt: provider.systemPrompt,
publicShareChatModel: provider.publicShareChatModel,
publicShareAssistantRoleId: provider.publicShareAssistantRoleId,
@@ -279,6 +284,7 @@ export class AiSettingsService {
'sttModel',
'sttBaseUrl',
'sttApiStyle',
+ 'sttLanguage',
'systemPrompt',
'publicShareChatModel',
'publicShareAssistantRoleId',
diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts
index f6593f4c..858118f0 100644
--- a/apps/server/src/integrations/ai/ai.service.ts
+++ b/apps/server/src/integrations/ai/ai.service.ts
@@ -212,12 +212,22 @@ export class AiService {
const cfg = await this.aiSettings.resolve(workspaceId);
if (!cfg?.sttModel) throw new AiSttNotConfiguredException();
const baseURL = cfg.sttBaseUrl || cfg.baseUrl;
+ // Trimmed language hint; empty/unset = auto-detect (never forward an empty
+ // string to the provider, which would override auto-detect).
+ const sttLanguage = cfg.sttLanguage?.trim() || undefined;
// Explicit, admin-chosen request encoding (no URL guessing). 'json' is the
// OpenRouter style (JSON + base64 input_audio); everything else uses the
// OpenAI-compatible multipart path via the AI SDK.
if (cfg.sttApiStyle === 'json') {
- return this.transcribeJsonBase64(baseURL, cfg.sttApiKey, cfg.sttModel, audio, format);
+ return this.transcribeJsonBase64(
+ baseURL,
+ cfg.sttApiKey,
+ cfg.sttModel,
+ audio,
+ format,
+ sttLanguage,
+ );
}
// Standard OpenAI-compatible multipart path (AI SDK). apiKey may be unused for
@@ -226,14 +236,23 @@ export class AiService {
apiKey: cfg.sttApiKey ?? 'unused',
baseURL,
}).transcription(cfg.sttModel);
- const { text } = await transcribe({ model, audio });
+ const { text } = await transcribe({
+ model,
+ audio,
+ // Forward the language hint only when set; the OpenAI transcription model
+ // reads it from providerOptions.openai.language.
+ ...(sttLanguage
+ ? { providerOptions: { openai: { language: sttLanguage } } }
+ : {}),
+ });
return text.trim();
}
/**
* JSON + base64 transcription body (OpenRouter-style). POSTs
* { model, input_audio: { data, format } } to {baseURL}/audio/transcriptions
- * and returns { text }.
+ * and returns { text }. The optional `language` ISO-639-1 hint is included as
+ * a top-level body field only when set (empty/unset = auto-detect).
*/
private async transcribeJsonBase64(
baseURL: string | undefined,
@@ -241,6 +260,7 @@ export class AiService {
model: string,
audio: Uint8Array,
format: string,
+ language?: string,
): Promise {
if (!baseURL) {
throw new BadRequestException(
@@ -256,6 +276,7 @@ export class AiService {
},
body: JSON.stringify({
model,
+ ...(language ? { language } : {}),
input_audio: {
data: Buffer.from(audio).toString('base64'),
format,
diff --git a/apps/server/src/integrations/ai/ai.types.ts b/apps/server/src/integrations/ai/ai.types.ts
index d6a70892..0a3d925e 100644
--- a/apps/server/src/integrations/ai/ai.types.ts
+++ b/apps/server/src/integrations/ai/ai.types.ts
@@ -31,6 +31,8 @@ export interface AiProviderSettings {
// STT-specific base URL. Falls back to baseUrl when empty/unset.
sttBaseUrl?: string;
sttApiStyle?: SttApiStyle;
+ // ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty/unset = auto-detect.
+ sttLanguage?: string;
systemPrompt?: string;
// Cheap chat model id used ONLY by the anonymous public-share assistant. The
// driver / baseUrl / apiKey of the main chat provider are reused; this is the
@@ -80,6 +82,8 @@ export interface MaskedAiSettings {
sttModel?: string;
sttBaseUrl?: string;
sttApiStyle?: SttApiStyle;
+ // ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty/unset = auto-detect.
+ sttLanguage?: string;
systemPrompt?: string;
publicShareChatModel?: string;
// Agent-role id whose persona the public-share assistant adopts; empty/unset
diff --git a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts
index 891304ce..37fe8143 100644
--- a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts
+++ b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts
@@ -54,6 +54,11 @@ export class UpdateAiSettingsDto {
@IsIn(STT_API_STYLES)
sttApiStyle?: SttApiStyle;
+ // ISO-639-1 dictation language hint (e.g. 'en', 'ru'). Empty = auto-detect.
+ @IsOptional()
+ @IsString()
+ sttLanguage?: string;
+
@IsOptional()
@IsString()
sttApiKey?: string;