feat(ai): server-side voice dictation (STT) with mic in chat and editor
Add push-to-talk voice dictation that transcribes recorded audio on the server via the workspace's OpenAI-compatible AI provider (Whisper / gpt-4o-transcribe / self-hosted whisper), then inserts the text. Backend: - New `stt_api_key_enc` column + migration; STT creds parity with chat/ embeddings (sttModel/sttBaseUrl/sttApiKey, write-only key, fallbacks to chat baseUrl/key). Both provider whitelists updated (service + repo). - AiService.getTranscriptionModel + AiTranscriptionService. - Gated POST /ai-chat/transcribe (dictation flag → 403, JWT + workspace scope + throttle, 25MB cap, MIME whitelist, never logs audio/key). - New `settings.ai.dictation` workspace flag (DTO + service + audit). Frontend: - Wire up the Voice/STT settings card (model/base URL/key) and the Voice-dictation toggle. - New `features/dictation`: useDictation (MediaRecorder state machine), MicButton, transcribe service; integrated into the chat composer and a new editor-toolbar dictation group, both gated by ai.dictation.
This commit is contained in:
@@ -13,6 +13,7 @@ import { QuickInsertsGroup } from "./groups/quick-inserts-group";
|
||||
import { MoreInsertsGroup } from "./groups/more-inserts-group";
|
||||
import { HistoryGroup } from "./groups/history-group";
|
||||
import { AskAiGroup } from "./groups/ask-ai-group";
|
||||
import { DictationGroup } from "./groups/dictation-group";
|
||||
import { workspaceAtom } from "@/features/user/atoms/current-user-atom";
|
||||
import classes from "./fixed-toolbar.module.css";
|
||||
|
||||
@@ -30,6 +31,7 @@ export const FixedToolbar: FC<FixedToolbarProps> = ({
|
||||
const state = useToolbarState(editor);
|
||||
const workspace = useAtomValue(workspaceAtom);
|
||||
const isGenerativeAiEnabled = workspace?.settings?.ai?.generative === true;
|
||||
const isDictationEnabled = workspace?.settings?.ai?.dictation === true;
|
||||
|
||||
if (!editor || !state) return null;
|
||||
|
||||
@@ -65,6 +67,12 @@ export const FixedToolbar: FC<FixedToolbarProps> = ({
|
||||
<MoreInsertsGroup editor={editor} templateMode={templateMode} />
|
||||
<div className={classes.divider} />
|
||||
<HistoryGroup editor={editor} state={state} />
|
||||
{isDictationEnabled && (
|
||||
<>
|
||||
<div className={classes.divider} />
|
||||
<DictationGroup editor={editor} />
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className={classes.spacer} aria-hidden />
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import { FC, useRef } from "react";
|
||||
import type { Editor } from "@tiptap/react";
|
||||
import { MicButton } from "@/features/dictation/components/mic-button";
|
||||
|
||||
interface Props {
|
||||
editor: Editor;
|
||||
}
|
||||
|
||||
export const DictationGroup: FC<Props> = ({ editor }) => {
|
||||
const rangeRef = useRef<{ from: number; to: number } | null>(null);
|
||||
|
||||
const handleStart = () => {
|
||||
const { from, to } = editor.state.selection;
|
||||
rangeRef.current = { from, to };
|
||||
};
|
||||
|
||||
const handleText = (text: string) => {
|
||||
// The editor may be gone by the time async transcription returns; bail out
|
||||
// instead of operating on a destroyed instance.
|
||||
if (!editor || editor.isDestroyed) return;
|
||||
const snapshot = rangeRef.current;
|
||||
rangeRef.current = null;
|
||||
// The document may have shrunk during transcription (e.g. a collaborative
|
||||
// edit), so clamp the snapshot into the current bounds before inserting.
|
||||
const docSize = editor.state.doc.content.size;
|
||||
const clamp = (p: number) => Math.max(0, Math.min(p, docSize));
|
||||
try {
|
||||
if (snapshot) {
|
||||
// Insert at the snapshotted caret; a trailing space keeps words
|
||||
// separated (the hook already trims the transcribed text).
|
||||
editor
|
||||
.chain()
|
||||
.focus()
|
||||
.insertContentAt(
|
||||
{ from: clamp(snapshot.from), to: clamp(snapshot.to) },
|
||||
`${text} `,
|
||||
)
|
||||
.run();
|
||||
} else {
|
||||
editor.chain().focus().insertContent(`${text} `).run();
|
||||
}
|
||||
} catch {
|
||||
// The snapshot drifted out of range; fall back to the current caret.
|
||||
try {
|
||||
editor.chain().focus().insertContent(`${text} `).run();
|
||||
} catch {
|
||||
// The editor may have been destroyed; ignore so a dead editor can't
|
||||
// surface an uncaught error.
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<MicButton
|
||||
size="md"
|
||||
onStart={handleStart}
|
||||
onText={handleText}
|
||||
disabled={!editor.isEditable}
|
||||
/>
|
||||
);
|
||||
};
|
||||
Reference in New Issue
Block a user