Files
gitmost/apps/client/src/features/dictation/components/mic-button.tsx
vvzvlad 874bdd021c feat(ai): server-side voice dictation (STT) with mic in chat and editor
Add push-to-talk voice dictation that transcribes recorded audio on the
server via the workspace's OpenAI-compatible AI provider (Whisper /
gpt-4o-transcribe / self-hosted whisper), then inserts the text.

Backend:
- New `stt_api_key_enc` column + migration; STT creds parity with chat/
  embeddings (sttModel/sttBaseUrl/sttApiKey, write-only key, fallbacks to
  chat baseUrl/key). Both provider whitelists updated (service + repo).
- AiService.getTranscriptionModel + AiTranscriptionService.
- Gated POST /ai-chat/transcribe (dictation flag → 403, JWT + workspace
  scope + throttle, 25MB cap, MIME whitelist, never logs audio/key).
- New `settings.ai.dictation` workspace flag (DTO + service + audit).

Frontend:
- Wire up the Voice/STT settings card (model/base URL/key) and the
  Voice-dictation toggle.
- New `features/dictation`: useDictation (MediaRecorder state machine),
  MicButton, transcribe service; integrated into the chat composer and a
  new editor-toolbar dictation group, both gated by ai.dictation.
2026-06-18 18:45:33 +03:00

77 lines
2.0 KiB
TypeScript

import { FC } from "react";
import { ActionIcon, Loader, Tooltip } from "@mantine/core";
import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react";
import { useTranslation } from "react-i18next";
import { useDictation } from "@/features/dictation/hooks/use-dictation";
interface MicButtonProps {
onText: (text: string) => void;
onStart?: () => void;
disabled?: boolean;
// Mantine ActionIcon size token; "lg" matches the chat composer, "md" the
// editor toolbar.
size?: "md" | "lg";
}
/**
* Self-contained dictation toggle. Owns its own capture state machine: a click
* starts recording (mic icon), a second click stops it (stop icon), and while
* the audio is being transcribed it shows a spinner and is disabled to prevent
* overlapping requests.
*/
export const MicButton: FC<MicButtonProps> = ({
onText,
onStart,
disabled,
size = "lg",
}) => {
const { t } = useTranslation();
const { status, start, stop } = useDictation({ onText, onStart });
const iconSize = size === "lg" ? 18 : 16;
if (status === "recording") {
return (
<Tooltip label={t("Stop recording")} withArrow>
<ActionIcon
size={size}
color="red"
variant="light"
onClick={stop}
aria-label={t("Stop recording")}
>
<IconPlayerStopFilled size={iconSize} />
</ActionIcon>
</Tooltip>
);
}
if (status === "transcribing" || status === "error") {
return (
<Tooltip label={t("Transcribing…")} withArrow>
<ActionIcon
size={size}
variant="subtle"
disabled
aria-label={t("Transcribing…")}
>
<Loader size="xs" />
</ActionIcon>
</Tooltip>
);
}
return (
<Tooltip label={t("Start dictation")} withArrow>
<ActionIcon
size={size}
variant="subtle"
onClick={() => void start()}
disabled={disabled}
aria-label={t("Start dictation")}
>
<IconMicrophone size={iconSize} />
</ActionIcon>
</Tooltip>
);
};