From 44a1b5b003f5eef51bb610018caecfe029e9dabe Mon Sep 17 00:00:00 2001
From: claude_code <claude_code@vvzvlad.xyz>
Date: Mon, 22 Jun 2026 23:59:35 +0300
Subject: [PATCH] feat(dictation): gate streaming dictation behind a workspace
 toggle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Streaming (silence-cut) dictation was hardcoded on. Put it behind a per-workspace
flag settings.ai.dictationStreaming, default off, with batch dictation as the
default and fallback. Mirrors the existing settings.ai.dictation flag end to end:

- server: aiDictationStreaming on UpdateWorkspaceDto + workspace.service writes
  settings.ai.dictationStreaming via updateAiSettings (jsonb merge keeps siblings)
- client: IWorkspaceAiSettings.dictationStreaming, an optimistic "Streaming
  dictation" sub-toggle under "Voice dictation" (disabled when dictation is off)
- gate the MicButton streaming prop in the editor toolbar and chat composer on
  the flag instead of a literal true

When the flag is absent/false both call sites pass streaming=false, so the VAD
model/wasm are never fetched and behavior is unchanged. Reuses the existing STT
model and /ai-chat/transcribe — no new provider/model/endpoint fields.

Removes the backlog entry now that it is implemented.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../public/locales/en-US/translation.json     |   2 +
 .../ai-chat/components/chat-input.tsx         |   6 +-
 .../fixed-toolbar/groups/dictation-group.tsx  |   9 +-
 .../components/ai-provider-settings.tsx       |  60 ++++++++++
 .../workspace/types/workspace.types.ts        |   2 +
 .../workspace/dto/update-workspace.dto.ts     |   4 +
 .../workspace/services/workspace.service.ts   |  15 +++
 .../streaming-dictation-feature-toggle.md     | 106 ------------------
 8 files changed, 96 insertions(+), 108 deletions(-)
 delete mode 100644 docs/backlog/streaming-dictation-feature-toggle.md
diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json
index fdbb681d..acceeb48 100644
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -1201,6 +1201,8 @@
   "Semantic search": "Semantic search",
   "Voice / STT": "Voice / STT",
   "Voice dictation": "Voice dictation",
+  "Streaming dictation": "Streaming dictation",
+  "Transcribe as you speak, cutting on pauses": "Transcribe as you speak, cutting on pauses",
   "Voice dictation is not available yet.": "Voice dictation is not available yet.",
   "Test endpoint": "Test endpoint",
   "Save endpoints": "Save endpoints",
diff --git a/apps/client/src/features/ai-chat/components/chat-input.tsx b/apps/client/src/features/ai-chat/components/chat-input.tsx
index 58661cb6..1a3a1bde 100644
--- a/apps/client/src/features/ai-chat/components/chat-input.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-input.tsx
@@ -35,6 +35,10 @@ export default function ChatInput({
   const [value, setValue] = useAtom(aiChatDraftAtom);
   const workspace = useAtomValue(workspaceAtom);
   const isDictationEnabled = workspace?.settings?.ai?.dictation === true;
+  // Streaming (silence-cut) dictation is opt-in per workspace; absent/false
+  // keeps the stable batch path.
+  const streamingDictation =
+    workspace?.settings?.ai?.dictationStreaming === true;
 
   const submit = (): void => {
     const text = value.trim();
@@ -71,7 +75,7 @@ export default function ChatInput({
       {isDictationEnabled && (
         <MicButton
           size="lg"
-          streaming
+          streaming={streamingDictation}
           disabled={isStreaming || disabled}
           onText={(text) => setValue((v) => (v ? `${v} ${text}` : text))}
         />
diff --git a/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx b/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
index f9ee2198..e8921816 100644
--- a/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
+++ b/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
@@ -1,5 +1,7 @@
 import { FC, useRef } from "react";
 import type { Editor } from "@tiptap/react";
+import { useAtomValue } from "jotai";
+import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
 import { MicButton } from "@/features/dictation/components/mic-button";
 
 interface Props {
@@ -9,6 +11,11 @@ interface Props {
 }
 
 export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
+  // Streaming (silence-cut) dictation is opt-in per workspace; absent/false
+  // keeps the stable batch path.
+  const workspace = useAtomValue(workspaceAtom);
+  const streamingDictation =
+    workspace?.settings?.ai?.dictationStreaming === true;
   // Caret snapshot taken when dictation starts (where the first segment lands).
   const rangeRef = useRef<{ from: number; to: number } | null>(null);
   // Running insertion point: after each inserted segment we remember the caret
@@ -70,7 +77,7 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
   return (
     <MicButton
       size="md"
-      streaming
+      streaming={streamingDictation}
       onStart={handleStart}
       onText={handleText}
       disabled={!editor.isEditable}
diff --git a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
index 7c7764c8..f57348a1 100644
--- a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
+++ b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
@@ -267,6 +267,8 @@ export default function AiProviderSettings() {
   const [dictationEnabled, setDictationEnabled] = useState<boolean>(
     workspace?.settings?.ai?.dictation ?? false,
   );
+  const [streamingDictationEnabled, setStreamingDictationEnabled] =
+    useState<boolean>(workspace?.settings?.ai?.dictationStreaming ?? false);
   const [publicShareAssistantEnabled, setPublicShareAssistantEnabled] =
     useState<boolean>(
       workspace?.settings?.ai?.publicShareAssistant ?? false,
@@ -274,6 +276,8 @@ export default function AiProviderSettings() {
   const [chatToggleLoading, setChatToggleLoading] = useState(false);
   const [searchToggleLoading, setSearchToggleLoading] = useState(false);
   const [dictationToggleLoading, setDictationToggleLoading] = useState(false);
+  const [streamingDictationToggleLoading, setStreamingDictationToggleLoading] =
+    useState(false);
   const [
     publicShareAssistantToggleLoading,
     setPublicShareAssistantToggleLoading,
@@ -512,6 +516,35 @@ export default function AiProviderSettings() {
     }
   }
 
+  // Optimistic toggle for the streaming (silence-cut) dictation sub-mode
+  // (settings.ai.dictationStreaming). Only meaningful when dictation is on.
+  async function handleToggleStreamingDictation(value: boolean) {
+    setStreamingDictationToggleLoading(true);
+    const previous = streamingDictationEnabled;
+    setStreamingDictationEnabled(value);
+    try {
+      const updated = await updateWorkspace({ aiDictationStreaming: value });
+      setWorkspace({
+        ...updated,
+        settings: {
+          ...updated.settings,
+          ai: { ...updated.settings?.ai, dictationStreaming: value },
+        },
+      });
+      notifications.show({ message: t("Updated successfully") });
+    } catch (err) {
+      setStreamingDictationEnabled(previous);
+      const message = (err as { response?: { data?: { message?: string } } })
+        ?.response?.data?.message;
+      notifications.show({
+        message: message ?? t("Failed to update data"),
+        color: "red",
+      });
+    } finally {
+      setStreamingDictationToggleLoading(false);
+    }
+  }
+
   // Optimistic toggle for the anonymous public-share AI assistant
   // (settings.ai.publicShareAssistant). When off, the public endpoint 404s.
   async function handleTogglePublicShareAssistant(value: boolean) {
@@ -952,6 +985,33 @@ export default function AiProviderSettings() {
           )}
         </Text>
 
+        {/* Streaming dictation is a sub-mode of voice dictation: it cuts on
+            pauses and transcribes each segment as you speak. Disabled unless
+            dictation itself is on. */}
+        <Group justify="space-between" align="center" wrap="nowrap">
+          <Stack gap={0}>
+            <Text fw={600} size="sm">
+              {t("Streaming dictation")}
+            </Text>
+            <Text size="xs" c="dimmed">
+              {t("Transcribe as you speak, cutting on pauses")}
+            </Text>
+          </Stack>
+          <Switch
+            label={t("Streaming dictation")}
+            labelPosition="left"
+            checked={streamingDictationEnabled}
+            disabled={
+              !dictationEnabled ||
+              dictationToggleLoading ||
+              streamingDictationToggleLoading
+            }
+            onChange={(e) =>
+              handleToggleStreamingDictation(e.currentTarget.checked)
+            }
+          />
+        </Group>
+
         <Group grow align="flex-start">
           <TextInput
             label={t("Model")}
diff --git a/apps/client/src/features/workspace/types/workspace.types.ts b/apps/client/src/features/workspace/types/workspace.types.ts
index 14eb0a91..0dcdd5a3 100644
--- a/apps/client/src/features/workspace/types/workspace.types.ts
+++ b/apps/client/src/features/workspace/types/workspace.types.ts
@@ -25,6 +25,7 @@ export interface IWorkspace {
   mcpEnabled?: boolean;
   aiChat?: boolean;
   aiDictation?: boolean;
+  aiDictationStreaming?: boolean;
   aiPublicShareAssistant?: boolean;
   trashRetentionDays?: number;
   restrictApiToAdmins?: boolean;
@@ -62,6 +63,7 @@ export interface IWorkspaceAiSettings {
   mcp?: boolean;
   chat?: boolean;
   dictation?: boolean;
+  dictationStreaming?: boolean;
   publicShareAssistant?: boolean;
 }
 
diff --git a/apps/server/src/core/workspace/dto/update-workspace.dto.ts b/apps/server/src/core/workspace/dto/update-workspace.dto.ts
index 404593d6..8d206b86 100644
--- a/apps/server/src/core/workspace/dto/update-workspace.dto.ts
+++ b/apps/server/src/core/workspace/dto/update-workspace.dto.ts
@@ -55,6 +55,10 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
   @IsBoolean()
   aiDictation: boolean;
 
+  @IsOptional()
+  @IsBoolean()
+  aiDictationStreaming: boolean;
+
   // Workspace master toggle that enables/disables the HTML embed block type.
   // Persisted at settings.htmlEmbed. ABSENT/false => OFF (default). The block
   // itself renders in a sandboxed iframe, so this is a feature switch, not a
diff --git a/apps/server/src/core/workspace/services/workspace.service.ts b/apps/server/src/core/workspace/services/workspace.service.ts
index bb564e79..504ce33d 100644
--- a/apps/server/src/core/workspace/services/workspace.service.ts
+++ b/apps/server/src/core/workspace/services/workspace.service.ts
@@ -511,6 +511,20 @@ export class WorkspaceService {
         );
       }
 
+      if (typeof updateWorkspaceDto.aiDictationStreaming !== 'undefined') {
+        const prev = settingsBefore?.ai?.dictationStreaming ?? false;
+        if (prev !== updateWorkspaceDto.aiDictationStreaming) {
+          before.aiDictationStreaming = prev;
+          after.aiDictationStreaming = updateWorkspaceDto.aiDictationStreaming;
+        }
+        await this.workspaceRepo.updateAiSettings(
+          workspaceId,
+          'dictationStreaming',
+          updateWorkspaceDto.aiDictationStreaming,
+          trx,
+        );
+      }
+
       if (typeof updateWorkspaceDto.htmlEmbed !== 'undefined') {
         const prev = settingsBefore?.htmlEmbed ?? false;
         if (prev !== updateWorkspaceDto.htmlEmbed) {
@@ -564,6 +578,7 @@ export class WorkspaceService {
       delete updateWorkspaceDto.allowMemberTemplates;
       delete updateWorkspaceDto.aiChat;
       delete updateWorkspaceDto.aiDictation;
+      delete updateWorkspaceDto.aiDictationStreaming;
       delete updateWorkspaceDto.htmlEmbed;
       delete updateWorkspaceDto.trackerHead;
       delete updateWorkspaceDto.aiPublicShareAssistant;
diff --git a/docs/backlog/streaming-dictation-feature-toggle.md b/docs/backlog/streaming-dictation-feature-toggle.md
deleted file mode 100644
index 402c41f5..00000000
--- a/docs/backlog/streaming-dictation-feature-toggle.md
+++ /dev/null
@@ -1,106 +0,0 @@
-# Стриминговая (по тишине) диктовка под фиче-тогглом, по умолчанию ВЫКЛ
-
-Статус: **открыто.**
-
-## Контекст
-
-Стриминговая диктовка (нарезка по тишине через Silero VAD,
-`@ricky0123/vad-web`) уже в `develop` и сейчас **жёстко включена**: `MicButton`
-получает проп `streaming` литералом `true` в двух местах — редактор
-([dictation-group.tsx](../../apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx))
-и чат
-([chat-input.tsx](../../apps/client/src/features/ai-chat/components/chat-input.tsx)).
-Фича экспериментальная:
-
-- тяжёлые ассеты (ONNX-модель + ORT-wasm, 13–26 МБ, грузятся в браузер при
-  первом использовании);
-- задержка инициализации модели на первом клике (компиляция wasm + подъём
-  inference-сессии — повторяется на каждую загрузку страницы);
-- много мелких запросов на `/ai-chat/transcribe` (по одному на сегмент речи)
-  вместо одного на запись.
-
-Её нужно сделать **opt-in на воркспейс, по умолчанию выключенной**, с обычной
-батч-диктовкой как дефолтом и фолбэком.
-
-## Цель
-
-Спрятать стриминговый путь за булевым флагом воркспейса
-`settings.ai.dictationStreaming` (default `false`). Выкл → текущая стабильная
-батч-диктовка. Вкл → стриминговая.
-
-**Минимализм (явно):** один булев флаг, переиспользуем существующую STT-модель
-и эндпоинт `/ai-chat/transcribe`, **без новых полей провайдера / модели /
-эндпоинта / секретов** — осознанное требование после претензий к realtime-PR
-(#118) за лишние поля настроек.
-
-## Дизайн
-
-### Сервер
-
-- В типе AI-настроек
-  ([integrations/ai/ai.types.ts](../../apps/server/src/integrations/ai/ai.types.ts))
-  и в
-  [dto/update-ai-settings.dto.ts](../../apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts)
-  добавить `dictationStreaming?: boolean` рядом с уже существующим флагом
-  `dictation`. Проверить, валидируется ли апдейт настроек по whitelist
-  (`ai-settings.service.ts`) — если да, внести ключ; иначе passthrough.
-- Это **только клиентский поведенческий флаг**: эндпоинт транскрипции и
-  STT-модель не меняются (стриминг переиспользует `/ai-chat/transcribe`).
-  Флаг просто отдаётся в составе `settings.ai`, который клиент уже читает.
-
-### Клиент
-
-- Тип
-  [features/workspace/types/workspace.types.ts](../../apps/client/src/features/workspace/types/workspace.types.ts)
-  (`settings.ai`, рядом с `dictation?: boolean`): добавить
-  `dictationStreaming?: boolean`.
-- UI
-  [ai-provider-settings.tsx](../../apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx):
-  добавить Switch «Streaming dictation (cut on pauses)» **внутри/рядом** с
-  тумблером «Voice dictation» — активен только когда `dictation` включена (это
-  под-режим диктовки). Оптимистичный апдейт по образцу `dictation`
-  (см. `handleDictationToggle` и запись `ai: { ...ai, dictation: value }`),
-  пишет `settings.ai.dictationStreaming`. Default off. Новый i18n-ключ.
-- Гейтинг: в `dictation-group.tsx` и `chat-input.tsx` заменить жёсткий
-  `streaming` (литерал `true`) на `streaming={settings.ai.dictationStreaming === true}`.
-  Проп `streaming` у `MicButton` уже выбирает хук (`useStreamingDictation` vs
-  `useDictation`) — там менять ничего не нужно.
-
-## Критерии приёмки
-
-- Свежий воркспейс (флага нет) → mic-кнопка использует **батч**-диктовку;
-  ассеты VAD (ONNX/wasm) **не грузятся** (ленивый `import()` в
-  `useStreamingDictation.start()` срабатывает только при `streaming` и клике,
-  которого при выкл не будет — оба хука инертны до `start()`).
-- Тоггл вкл → стриминговая диктовка работает и в редакторе, и в чате.
-- Тоггл выкл → возврат к батчу; стриминговые ассеты не подгружаются.
-- Нет новых полей модели / эндпоинта / секрета — переиспользуется
-  диктовочная STT-модель и `/ai-chat/transcribe`.
-- Флаг персистится на воркспейс и гейтится как прочие `settings.ai.*`.
-
-## Затрагиваемые файлы (указатели)
-
-- **Сервер:** `integrations/ai/ai.types.ts`,
-  `integrations/ai/dto/update-ai-settings.dto.ts`,
-  `integrations/ai/ai-settings.service.ts` (если есть нормализация/whitelist).
-- **Клиент:** `features/workspace/types/workspace.types.ts`,
-  `features/workspace/components/settings/components/ai-provider-settings.tsx`
-  (Switch + i18n), `features/editor/components/fixed-toolbar/groups/dictation-group.tsx`,
-  `features/ai-chat/components/chat-input.tsx`.
-
-## Заметки / краевые случаи
-
-- Батч-диктовка остаётся дефолтом и фолбэком (в т.ч. если стриминговая
-  инициализация падает).
-- Подтвердить, что выкл-состояние не тянет ни одного VAD-байта: `MicButton`
-  хоть и вызывает оба хука безусловно (правило хуков), оба инертны до
-  `start()`, поэтому при `streaming=false` модель/wasm не запрашиваются.
-- **Не** добавлять отдельные модель/эндпоинт под стриминг — переиспользовать
-  диктовочные (явное требование после realtime-PR).
-
-## Вне scope
-
-- Preload / мгновенный старт и латентность инициализации модели — отдельный
-  follow-up.
-- Realtime-websocket путь (PR #118, [streaming-dictation-plan.md](../streaming-dictation-plan.md))
-  — не мержится.