feat(dictation): gate streaming dictation behind a workspace toggle

Streaming (silence-cut) dictation was hardcoded on. Put it behind a per-workspace flag settings.ai.dictationStreaming, default off, with batch dictation as the default and fallback. Mirrors the existing settings.ai.dictation flag end to end: - server: aiDictationStreaming on UpdateWorkspaceDto + workspace.service writes settings.ai.dictationStreaming via updateAiSettings (jsonb merge keeps siblings) - client: IWorkspaceAiSettings.dictationStreaming, an optimistic "Streaming dictation" sub-toggle under "Voice dictation" (disabled when dictation is off) - gate the MicButton streaming prop in the editor toolbar and chat composer on the flag instead of a literal true When the flag is absent/false both call sites pass streaming=false, so the VAD model/wasm are never fetched and behavior is unchanged. Reuses the existing STT model and /ai-chat/transcribe — no new provider/model/endpoint fields. Removes the backlog entry now that it is implemented. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-22 23:59:35 +03:00
parent ee25d52965
commit 44a1b5b003
8 changed files with 96 additions and 108 deletions
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -1201,6 +1201,8 @@
  "Semantic search": "Semantic search",
  "Voice / STT": "Voice / STT",
  "Voice dictation": "Voice dictation",
+  "Streaming dictation": "Streaming dictation",
+  "Transcribe as you speak, cutting on pauses": "Transcribe as you speak, cutting on pauses",
  "Voice dictation is not available yet.": "Voice dictation is not available yet.",
  "Test endpoint": "Test endpoint",
  "Save endpoints": "Save endpoints",
--- a/apps/client/src/features/ai-chat/components/chat-input.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-input.tsx
@@ -35,6 +35,10 @@ export default function ChatInput({
  const [value, setValue] = useAtom(aiChatDraftAtom);
  const workspace = useAtomValue(workspaceAtom);
  const isDictationEnabled = workspace?.settings?.ai?.dictation === true;
+  // Streaming (silence-cut) dictation is opt-in per workspace; absent/false
+  // keeps the stable batch path.
+  const streamingDictation =
+    workspace?.settings?.ai?.dictationStreaming === true;

  const submit = (): void => {
    const text = value.trim();
@@ -71,7 +75,7 @@ export default function ChatInput({
      {isDictationEnabled && (
        <MicButton
          size="lg"
-          streaming
+          streaming={streamingDictation}
          disabled={isStreaming || disabled}
          onText={(text) => setValue((v) => (v ? `${v} ${text}` : text))}
        />
--- a/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
+++ b/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx
@@ -1,5 +1,7 @@
 import { FC, useRef } from "react";
 import type { Editor } from "@tiptap/react";
+import { useAtomValue } from "jotai";
+import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
 import { MicButton } from "@/features/dictation/components/mic-button";

 interface Props {
@@ -9,6 +11,11 @@ interface Props {
 }

 export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
+  // Streaming (silence-cut) dictation is opt-in per workspace; absent/false
+  // keeps the stable batch path.
+  const workspace = useAtomValue(workspaceAtom);
+  const streamingDictation =
+    workspace?.settings?.ai?.dictationStreaming === true;
  // Caret snapshot taken when dictation starts (where the first segment lands).
  const rangeRef = useRef<{ from: number; to: number } | null>(null);
  // Running insertion point: after each inserted segment we remember the caret
@@ -70,7 +77,7 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
  return (
    <MicButton
      size="md"
-      streaming
+      streaming={streamingDictation}
      onStart={handleStart}
      onText={handleText}
      disabled={!editor.isEditable}
--- a/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
+++ b/apps/client/src/features/workspace/components/settings/components/ai-provider-settings.tsx
@@ -267,6 +267,8 @@ export default function AiProviderSettings() {
  const [dictationEnabled, setDictationEnabled] = useState<boolean>(
    workspace?.settings?.ai?.dictation ?? false,
  );
+  const [streamingDictationEnabled, setStreamingDictationEnabled] =
+    useState<boolean>(workspace?.settings?.ai?.dictationStreaming ?? false);
  const [publicShareAssistantEnabled, setPublicShareAssistantEnabled] =
    useState<boolean>(
      workspace?.settings?.ai?.publicShareAssistant ?? false,
@@ -274,6 +276,8 @@ export default function AiProviderSettings() {
  const [chatToggleLoading, setChatToggleLoading] = useState(false);
  const [searchToggleLoading, setSearchToggleLoading] = useState(false);
  const [dictationToggleLoading, setDictationToggleLoading] = useState(false);
+  const [streamingDictationToggleLoading, setStreamingDictationToggleLoading] =
+    useState(false);
  const [
    publicShareAssistantToggleLoading,
    setPublicShareAssistantToggleLoading,
@@ -512,6 +516,35 @@ export default function AiProviderSettings() {
    }
  }

+  // Optimistic toggle for the streaming (silence-cut) dictation sub-mode
+  // (settings.ai.dictationStreaming). Only meaningful when dictation is on.
+  async function handleToggleStreamingDictation(value: boolean) {
+    setStreamingDictationToggleLoading(true);
+    const previous = streamingDictationEnabled;
+    setStreamingDictationEnabled(value);
+    try {
+      const updated = await updateWorkspace({ aiDictationStreaming: value });
+      setWorkspace({
+        ...updated,
+        settings: {
+          ...updated.settings,
+          ai: { ...updated.settings?.ai, dictationStreaming: value },
+        },
+      });
+      notifications.show({ message: t("Updated successfully") });
+    } catch (err) {
+      setStreamingDictationEnabled(previous);
+      const message = (err as { response?: { data?: { message?: string } } })
+        ?.response?.data?.message;
+      notifications.show({
+        message: message ?? t("Failed to update data"),
+        color: "red",
+      });
+    } finally {
+      setStreamingDictationToggleLoading(false);
+    }
+  }
+
  // Optimistic toggle for the anonymous public-share AI assistant
  // (settings.ai.publicShareAssistant). When off, the public endpoint 404s.
  async function handleTogglePublicShareAssistant(value: boolean) {
@@ -952,6 +985,33 @@ export default function AiProviderSettings() {
          )}
        </Text>

+        {/* Streaming dictation is a sub-mode of voice dictation: it cuts on
+            pauses and transcribes each segment as you speak. Disabled unless
+            dictation itself is on. */}
+        <Group justify="space-between" align="center" wrap="nowrap">
+          <Stack gap={0}>
+            <Text fw={600} size="sm">
+              {t("Streaming dictation")}
+            </Text>
+            <Text size="xs" c="dimmed">
+              {t("Transcribe as you speak, cutting on pauses")}
+            </Text>
+          </Stack>
+          <Switch
+            label={t("Streaming dictation")}
+            labelPosition="left"
+            checked={streamingDictationEnabled}
+            disabled={
+              !dictationEnabled ||
+              dictationToggleLoading ||
+              streamingDictationToggleLoading
+            }
+            onChange={(e) =>
+              handleToggleStreamingDictation(e.currentTarget.checked)
+            }
+          />
+        </Group>
+
        <Group grow align="flex-start">
          <TextInput
            label={t("Model")}
--- a/apps/client/src/features/workspace/types/workspace.types.ts
+++ b/apps/client/src/features/workspace/types/workspace.types.ts
@@ -25,6 +25,7 @@ export interface IWorkspace {
  mcpEnabled?: boolean;
  aiChat?: boolean;
  aiDictation?: boolean;
+  aiDictationStreaming?: boolean;
  aiPublicShareAssistant?: boolean;
  trashRetentionDays?: number;
  restrictApiToAdmins?: boolean;
@@ -62,6 +63,7 @@ export interface IWorkspaceAiSettings {
  mcp?: boolean;
  chat?: boolean;
  dictation?: boolean;
+  dictationStreaming?: boolean;
  publicShareAssistant?: boolean;
 }

--- a/apps/server/src/core/workspace/dto/update-workspace.dto.ts
+++ b/apps/server/src/core/workspace/dto/update-workspace.dto.ts
@@ -55,6 +55,10 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
  @IsBoolean()
  aiDictation: boolean;

+  @IsOptional()
+  @IsBoolean()
+  aiDictationStreaming: boolean;
+
  // Workspace master toggle that enables/disables the HTML embed block type.
  // Persisted at settings.htmlEmbed. ABSENT/false => OFF (default). The block
  // itself renders in a sandboxed iframe, so this is a feature switch, not a
--- a/apps/server/src/core/workspace/services/workspace.service.ts
+++ b/apps/server/src/core/workspace/services/workspace.service.ts
@@ -511,6 +511,20 @@ export class WorkspaceService {
        );
      }

+      if (typeof updateWorkspaceDto.aiDictationStreaming !== 'undefined') {
+        const prev = settingsBefore?.ai?.dictationStreaming ?? false;
+        if (prev !== updateWorkspaceDto.aiDictationStreaming) {
+          before.aiDictationStreaming = prev;
+          after.aiDictationStreaming = updateWorkspaceDto.aiDictationStreaming;
+        }
+        await this.workspaceRepo.updateAiSettings(
+          workspaceId,
+          'dictationStreaming',
+          updateWorkspaceDto.aiDictationStreaming,
+          trx,
+        );
+      }
+
      if (typeof updateWorkspaceDto.htmlEmbed !== 'undefined') {
        const prev = settingsBefore?.htmlEmbed ?? false;
        if (prev !== updateWorkspaceDto.htmlEmbed) {
@@ -564,6 +578,7 @@ export class WorkspaceService {
      delete updateWorkspaceDto.allowMemberTemplates;
      delete updateWorkspaceDto.aiChat;
      delete updateWorkspaceDto.aiDictation;
+      delete updateWorkspaceDto.aiDictationStreaming;
      delete updateWorkspaceDto.htmlEmbed;
      delete updateWorkspaceDto.trackerHead;
      delete updateWorkspaceDto.aiPublicShareAssistant;