Files
gitmost/apps/client/src/features/ai-chat/components/chat-input.tsx
vvzvlad 874bdd021c feat(ai): server-side voice dictation (STT) with mic in chat and editor
Add push-to-talk voice dictation that transcribes recorded audio on the
server via the workspace's OpenAI-compatible AI provider (Whisper /
gpt-4o-transcribe / self-hosted whisper), then inserts the text.

Backend:
- New `stt_api_key_enc` column + migration; STT creds parity with chat/
  embeddings (sttModel/sttBaseUrl/sttApiKey, write-only key, fallbacks to
  chat baseUrl/key). Both provider whitelists updated (service + repo).
- AiService.getTranscriptionModel + AiTranscriptionService.
- Gated POST /ai-chat/transcribe (dictation flag → 403, JWT + workspace
  scope + throttle, 25MB cap, MIME whitelist, never logs audio/key).
- New `settings.ai.dictation` workspace flag (DTO + service + audit).

Frontend:
- Wire up the Voice/STT settings card (model/base URL/key) and the
  Voice-dictation toggle.
- New `features/dictation`: useDictation (MediaRecorder state machine),
  MicButton, transcribe service; integrated into the chat composer and a
  new editor-toolbar dictation group, both gated by ai.dictation.
2026-06-18 18:45:33 +03:00

99 lines
3.0 KiB
TypeScript

import { KeyboardEvent } from "react";
import { ActionIcon, Group, Textarea, Tooltip } from "@mantine/core";
import { IconPlayerStopFilled, IconSend } from "@tabler/icons-react";
import { useTranslation } from "react-i18next";
import { useAtom, useAtomValue } from "jotai";
import { aiChatDraftAtom } from "@/features/ai-chat/atoms/ai-chat-atom.ts";
import { workspaceAtom } from "@/features/user/atoms/current-user-atom";
import { MicButton } from "@/features/dictation/components/mic-button";
interface ChatInputProps {
onSend: (text: string) => void;
onStop: () => void;
isStreaming: boolean;
disabled?: boolean;
}
/**
* Message composer. Enter sends, Shift+Enter inserts a newline. While the agent
* is streaming, the send button becomes a Stop button (calls `stop()`); the
* textarea stays usable so the user can draft the next turn.
*/
export default function ChatInput({
onSend,
onStop,
isStreaming,
disabled,
}: ChatInputProps) {
const { t } = useTranslation();
const [value, setValue] = useAtom(aiChatDraftAtom);
const workspace = useAtomValue(workspaceAtom);
const isDictationEnabled = workspace?.settings?.ai?.dictation === true;
const send = (): void => {
const text = value.trim();
if (!text || isStreaming || disabled) return;
onSend(text);
setValue("");
};
const handleKeyDown = (e: KeyboardEvent<HTMLTextAreaElement>): void => {
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
send();
}
};
return (
<Group gap="xs" align="flex-end" wrap="nowrap">
<Textarea
style={{ flex: 1 }}
placeholder={t("Ask the AI agent…")}
value={value}
onChange={(e) => setValue(e.currentTarget.value)}
onKeyDown={handleKeyDown}
autosize
minRows={1}
maxRows={6}
disabled={disabled}
// Focus the composer whenever this input mounts. ChatThread is remounted
// via `key` on every chat appearance (window open, "New chat", chat
// switch), so a fresh chat lands with the cursor ready in the field.
autoFocus
/>
{isDictationEnabled && (
<MicButton
size="lg"
disabled={isStreaming || disabled}
onText={(text) => setValue((v) => (v ? `${v} ${text}` : text))}
/>
)}
{isStreaming ? (
<Tooltip label={t("Stop")} withArrow>
<ActionIcon
size="lg"
color="red"
variant="light"
onClick={onStop}
aria-label={t("Stop")}
>
<IconPlayerStopFilled size={18} />
</ActionIcon>
</Tooltip>
) : (
<Tooltip label={t("Send")} withArrow>
<ActionIcon
size="lg"
variant="filled"
onClick={send}
disabled={disabled || value.trim().length === 0}
aria-label={t("Send")}
>
<IconSend size={18} />
</ActionIcon>
</Tooltip>
)}
</Group>
);
}