feat(dictation): reason model — speaking tooltip on a disabled mic + shared error resolver (#309)
The dictation mic could be grey/disabled while silently showing "Start
dictation", and Mantine's native `disabled` set pointer-events:none so the
Tooltip never fired at all — the UI knew the cause but told the user nothing.
Runtime error strings were also duplicated verbatim across the two dictation
hooks.
- New dictation-status.ts: the single source of truth. A DictationUnavailableReason
enum (connecting/offline/read-only/unsupported/busy) + a DictationErrorCode enum,
pure classifiers (classifyGetUserMediaError / classifyTranscriptionError) and
resolvers (resolveUnavailableLabel / dictationErrorMessage). All user-facing
dictation strings are formed here; the verbatim server message still wins for
transcription errors.
- page-editor publishes dictationAvailabilityAtom { isEditable, reason } computed
at the source (editable/edit-mode/showStatic/collab status): connecting vs
offline (stuck) vs read-only. DictationGroup forwards the reason to MicButton.
- MicButton is reason-aware: a disabled mic shows the cause-specific tooltip. The
disabled-hover silence is fixed by marking disabled the Mantine way
(data-disabled/aria-disabled + click guard) instead of the native attribute, so
the Tooltip fires — applied to both the idle (reason) and error (errorMessage)
states.
- Both hooks route every error through the shared resolver (deleting the
duplicated transcriptionErrorMessage), and expose errorMessage for the tooltip.
Wording is byte-identical to each hook's original (incl. the batch hook's
DOMException name prefix and the verbatim server message).
- i18n: 3 new reason keys in en-US + ru-RU, and the previously-missing ru-RU
dictation error translations.
Tests: dictation-status.test.ts (all classifier/resolver branches, incl. server
message passthrough) + mic-button.test.tsx (disabled mic shows the reason text,
uses data-disabled not native disabled — fails against the pre-fix code).
vitest: 5 files / 32 passed.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1274,6 +1274,9 @@
|
||||
"Voice dictation is not configured": "Voice dictation is not configured",
|
||||
"Microphone is unavailable or already in use": "Microphone is unavailable or already in use",
|
||||
"Audio recording is not available in this browser/context": "Audio recording is not available in this browser/context",
|
||||
"Dictation becomes available once the page finishes connecting": "Dictation becomes available once the page finishes connecting",
|
||||
"No connection to the collaboration server — dictation unavailable": "No connection to the collaboration server — dictation unavailable",
|
||||
"This page is read-only": "This page is read-only",
|
||||
"Request format": "Request format",
|
||||
"How transcription requests are sent to the endpoint": "How transcription requests are sent to the endpoint",
|
||||
"OpenAI-compatible (multipart/form-data)": "OpenAI-compatible (multipart/form-data)",
|
||||
|
||||
@@ -393,6 +393,16 @@
|
||||
"No speech detected": "Речь не распознана",
|
||||
"Transcription failed": "Не удалось распознать речь",
|
||||
"Voice dictation is not configured": "Голосовой ввод не настроен",
|
||||
"Start dictation": "Начать диктовку",
|
||||
"Stop recording": "Остановить запись",
|
||||
"Microphone access denied": "Доступ к микрофону запрещён",
|
||||
"No microphone found": "Микрофон не найден",
|
||||
"Microphone is unavailable or already in use": "Микрофон недоступен или уже используется",
|
||||
"Could not start recording": "Не удалось начать запись",
|
||||
"Audio recording is not available in this browser/context": "Запись аудио недоступна в этом браузере/контексте",
|
||||
"Dictation becomes available once the page finishes connecting": "Диктовка станет доступна после подключения к документу",
|
||||
"No connection to the collaboration server — dictation unavailable": "Нет связи с сервером совместного редактирования — диктовка недоступна",
|
||||
"This page is read-only": "Страница открыта только для чтения",
|
||||
"Embed PDF": "Встроить PDF",
|
||||
"Upload and embed a PDF file.": "Загрузите и встроите PDF-файл.",
|
||||
"Embed as PDF": "Встроить как PDF",
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { MantineProvider } from "@mantine/core";
|
||||
|
||||
// A disabled mic must explain WHY it is unavailable rather than silently saying
|
||||
// "Start dictation". This renders MicButton in its idle+disabled state with a
|
||||
// forwarded reason and asserts the accessible label resolves to that reason's
|
||||
// text via the shared resolver (dictation-status.resolveUnavailableLabel).
|
||||
|
||||
// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
|
||||
|
||||
// Pass i18n keys through verbatim so we assert the exact resolved string.
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({ t: (s: string) => s }),
|
||||
}));
|
||||
|
||||
// Keep both controllers inert and idle so MicButton renders the idle branch.
|
||||
const idleCtl = {
|
||||
status: "idle" as const,
|
||||
start: vi.fn(async () => {}),
|
||||
stop: vi.fn(),
|
||||
cancel: vi.fn(),
|
||||
audioLevel: 0,
|
||||
errorMessage: null,
|
||||
};
|
||||
vi.mock("@/features/dictation/hooks/use-dictation", () => ({
|
||||
useDictation: () => idleCtl,
|
||||
}));
|
||||
vi.mock("@/features/dictation/hooks/use-streaming-dictation", () => ({
|
||||
useStreamingDictation: () => idleCtl,
|
||||
}));
|
||||
|
||||
import { MicButton } from "./mic-button";
|
||||
|
||||
function renderButton(props: React.ComponentProps<typeof MicButton>) {
|
||||
render(
|
||||
<MantineProvider>
|
||||
<MicButton {...props} />
|
||||
</MantineProvider>,
|
||||
);
|
||||
}
|
||||
|
||||
describe("MicButton — disabled reason label", () => {
|
||||
// jsdom has no MediaRecorder / mediaDevices, so isDictationSupported() would
|
||||
// report "unsupported" and mask the forwarded reason. Stub both so the button
|
||||
// is considered supported and the availability reason is what surfaces.
|
||||
beforeEach(() => {
|
||||
(globalThis as unknown as { MediaRecorder: unknown }).MediaRecorder =
|
||||
class {};
|
||||
Object.defineProperty(navigator, "mediaDevices", {
|
||||
configurable: true,
|
||||
value: { getUserMedia: vi.fn() },
|
||||
});
|
||||
});
|
||||
afterEach(() => {
|
||||
delete (globalThis as unknown as { MediaRecorder?: unknown }).MediaRecorder;
|
||||
});
|
||||
|
||||
it("shows the cause-specific reason instead of 'Start dictation' when disabled with a reason", () => {
|
||||
renderButton({ onText: () => {}, disabled: true, unavailableReason: "offline" });
|
||||
const expected =
|
||||
"No connection to the collaboration server — dictation unavailable";
|
||||
// The reason surfaces as the accessible label (and the tooltip text).
|
||||
const button = screen.getByRole("button", { name: expected });
|
||||
expect(button).toBeDefined();
|
||||
// It is marked disabled the Mantine way (data-disabled), NOT the native
|
||||
// `disabled` attribute — otherwise pointer-events:none would kill the tooltip.
|
||||
expect(button.getAttribute("data-disabled")).toBe("true");
|
||||
expect(button.hasAttribute("disabled")).toBe(false);
|
||||
// And it no longer silently reads "Start dictation".
|
||||
expect(screen.queryByRole("button", { name: "Start dictation" })).toBeNull();
|
||||
});
|
||||
|
||||
it("reads 'Start dictation' when enabled with no reason", () => {
|
||||
renderButton({ onText: () => {} });
|
||||
expect(
|
||||
screen.getByRole("button", { name: "Start dictation" }),
|
||||
).toBeDefined();
|
||||
});
|
||||
});
|
||||
@@ -4,6 +4,11 @@ import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useDictation } from "@/features/dictation/hooks/use-dictation";
|
||||
import { useStreamingDictation } from "@/features/dictation/hooks/use-streaming-dictation";
|
||||
import {
|
||||
isDictationSupported,
|
||||
resolveUnavailableLabel,
|
||||
type DictationUnavailableReason,
|
||||
} from "@/features/dictation/dictation-status";
|
||||
import classes from "./mic-button.module.css";
|
||||
|
||||
interface MicButtonProps {
|
||||
@@ -21,6 +26,9 @@ interface MicButtonProps {
|
||||
// When true, use the streaming (Silero-VAD) dictation controller, which emits
|
||||
// text progressively as the user pauses; otherwise use the batch controller.
|
||||
streaming?: boolean;
|
||||
// When the mic is disabled for an availability reason, this is the cause the
|
||||
// idle tooltip explains (e.g. pre-sync "connecting", "offline", "read-only").
|
||||
unavailableReason?: DictationUnavailableReason;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -37,6 +45,7 @@ export const MicButton: FC<MicButtonProps> = ({
|
||||
color,
|
||||
iconSize,
|
||||
streaming = false,
|
||||
unavailableReason,
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
// Call BOTH hooks unconditionally to respect the rules of hooks: which one is
|
||||
@@ -46,7 +55,7 @@ export const MicButton: FC<MicButtonProps> = ({
|
||||
const batchCtl = useDictation({ onText, onStart });
|
||||
const streamingCtl = useStreamingDictation({ onText, onStart });
|
||||
const ctl = streaming ? streamingCtl : batchCtl;
|
||||
const { status, start, stop, audioLevel } = ctl;
|
||||
const { status, start, stop, audioLevel, errorMessage } = ctl;
|
||||
const resolvedIconSize = iconSize ?? (size === "lg" ? 18 : 16);
|
||||
|
||||
if (status === "recording") {
|
||||
@@ -82,15 +91,28 @@ export const MicButton: FC<MicButtonProps> = ({
|
||||
) {
|
||||
// "loading" (streaming hook fetching the VAD model on first use) shows the
|
||||
// same spinner+disabled state so the first click is visibly acknowledged and
|
||||
// a confusing second click can't fire while the model loads.
|
||||
const label = status === "loading" ? t("Preparing…") : t("Transcribing…");
|
||||
// a confusing second click can't fire while the model loads. The error case
|
||||
// explains the failure via the hook's resolved errorMessage instead of the
|
||||
// transient "Transcribing…" label.
|
||||
const label =
|
||||
status === "error"
|
||||
? (errorMessage ?? t("Transcription failed"))
|
||||
: status === "loading"
|
||||
? t("Preparing…")
|
||||
: t("Transcribing…");
|
||||
return (
|
||||
<Tooltip label={label} withArrow>
|
||||
<ActionIcon
|
||||
size={size}
|
||||
variant="subtle"
|
||||
color={color}
|
||||
disabled
|
||||
// Mark disabled the Mantine way (data-disabled/aria-disabled) rather
|
||||
// than the native `disabled` attribute: native `disabled` sets
|
||||
// `pointer-events:none`, which suppresses hover so the Tooltip never
|
||||
// fires. This is a status display with no click action to guard, so
|
||||
// keeping it hoverable simply lets the error reason be read on hover.
|
||||
data-disabled
|
||||
aria-disabled
|
||||
aria-label={label}
|
||||
>
|
||||
<Loader size="xs" />
|
||||
@@ -99,15 +121,38 @@ export const MicButton: FC<MicButtonProps> = ({
|
||||
);
|
||||
}
|
||||
|
||||
// Idle branch. A grey/disabled mic must explain WHY it can't record. An
|
||||
// unsupported browser/context is detected here; otherwise the parent forwards
|
||||
// a cause-specific reason. We must NOT pass the native `disabled` prop: Mantine
|
||||
// renders `<button disabled>` with `pointer-events:none`, which suppresses
|
||||
// hover so the Tooltip never fires. Instead mark it disabled the Mantine way
|
||||
// (data-disabled/aria-disabled) — keeping it hoverable and in the a11y tree —
|
||||
// and guard the click ourselves.
|
||||
const unsupported = !isDictationSupported();
|
||||
const isDisabled = disabled || unsupported;
|
||||
const reason: DictationUnavailableReason | undefined = unsupported
|
||||
? "unsupported"
|
||||
: unavailableReason;
|
||||
const idleLabel =
|
||||
isDisabled && reason
|
||||
? resolveUnavailableLabel(reason, t)
|
||||
: t("Start dictation");
|
||||
return (
|
||||
<Tooltip label={t("Start dictation")} withArrow>
|
||||
<Tooltip label={idleLabel} withArrow>
|
||||
<ActionIcon
|
||||
size={size}
|
||||
variant="subtle"
|
||||
color={color}
|
||||
onClick={() => void start()}
|
||||
disabled={disabled}
|
||||
aria-label={t("Start dictation")}
|
||||
onClick={(e) => {
|
||||
if (isDisabled) {
|
||||
e.preventDefault();
|
||||
return;
|
||||
}
|
||||
void start();
|
||||
}}
|
||||
data-disabled={isDisabled || undefined}
|
||||
aria-disabled={isDisabled}
|
||||
aria-label={idleLabel}
|
||||
>
|
||||
<IconMicrophone size={resolvedIconSize} />
|
||||
</ActionIcon>
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
classifyGetUserMediaError,
|
||||
classifyTranscriptionError,
|
||||
dictationErrorMessage,
|
||||
resolveUnavailableLabel,
|
||||
isDictationSupported,
|
||||
} from "./dictation-status";
|
||||
|
||||
// Unit tests for the shared dictation-status resolvers (dictation-status.ts).
|
||||
// Both dictation hooks and the mic button form their user-facing strings here,
|
||||
// so a regression in the classification or message mapping would silently swap
|
||||
// what a user reads when the mic is grey or a recording fails. A fake `t`
|
||||
// returns its key verbatim so we assert the exact i18n key each branch selects.
|
||||
const t = (k: string) => k;
|
||||
|
||||
describe("classifyGetUserMediaError", () => {
|
||||
it("maps NotAllowedError / SecurityError to mic-denied", () => {
|
||||
expect(classifyGetUserMediaError({ name: "NotAllowedError" })).toBe(
|
||||
"mic-denied",
|
||||
);
|
||||
expect(classifyGetUserMediaError({ name: "SecurityError" })).toBe(
|
||||
"mic-denied",
|
||||
);
|
||||
});
|
||||
|
||||
it("maps NotFoundError / OverconstrainedError to no-mic", () => {
|
||||
expect(classifyGetUserMediaError({ name: "NotFoundError" })).toBe("no-mic");
|
||||
expect(classifyGetUserMediaError({ name: "OverconstrainedError" })).toBe(
|
||||
"no-mic",
|
||||
);
|
||||
});
|
||||
|
||||
it("maps NotReadableError / AbortError to mic-in-use", () => {
|
||||
expect(classifyGetUserMediaError({ name: "NotReadableError" })).toBe(
|
||||
"mic-in-use",
|
||||
);
|
||||
expect(classifyGetUserMediaError({ name: "AbortError" })).toBe(
|
||||
"mic-in-use",
|
||||
);
|
||||
});
|
||||
|
||||
it("maps anything else / undefined to unknown", () => {
|
||||
expect(classifyGetUserMediaError({ name: "WeirdError" })).toBe("unknown");
|
||||
expect(classifyGetUserMediaError(undefined)).toBe("unknown");
|
||||
expect(classifyGetUserMediaError({})).toBe("unknown");
|
||||
});
|
||||
});
|
||||
|
||||
describe("classifyTranscriptionError", () => {
|
||||
it("returns the verbatim server message when present", () => {
|
||||
const err = { response: { status: 500, data: { message: "provider 404" } } };
|
||||
expect(classifyTranscriptionError(err)).toEqual({
|
||||
code: "transcription-failed",
|
||||
serverMessage: "provider 404",
|
||||
});
|
||||
});
|
||||
|
||||
it("maps 503 / 403 (no server message) to stt-not-configured", () => {
|
||||
expect(classifyTranscriptionError({ response: { status: 503 } })).toEqual({
|
||||
code: "stt-not-configured",
|
||||
});
|
||||
expect(classifyTranscriptionError({ response: { status: 403 } })).toEqual({
|
||||
code: "stt-not-configured",
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to transcription-failed with no server message otherwise", () => {
|
||||
expect(classifyTranscriptionError({ response: { status: 500 } })).toEqual({
|
||||
code: "transcription-failed",
|
||||
});
|
||||
expect(classifyTranscriptionError(new Error("network"))).toEqual({
|
||||
code: "transcription-failed",
|
||||
});
|
||||
// Blank server message is ignored (does not win as verbatim text).
|
||||
expect(
|
||||
classifyTranscriptionError({ response: { data: { message: " " } } }),
|
||||
).toEqual({ code: "transcription-failed" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("dictationErrorMessage", () => {
|
||||
it("maps each code to the expected i18n key", () => {
|
||||
expect(dictationErrorMessage("mic-denied", t)).toBe(
|
||||
"Microphone access denied",
|
||||
);
|
||||
expect(dictationErrorMessage("no-mic", t)).toBe("No microphone found");
|
||||
expect(dictationErrorMessage("mic-in-use", t)).toBe(
|
||||
"Microphone is unavailable or already in use",
|
||||
);
|
||||
expect(dictationErrorMessage("no-media-devices", t)).toBe(
|
||||
"Audio recording is not available in this browser/context",
|
||||
);
|
||||
expect(dictationErrorMessage("stt-not-configured", t)).toBe(
|
||||
"Voice dictation is not configured",
|
||||
);
|
||||
expect(dictationErrorMessage("transcription-failed", t)).toBe(
|
||||
"Transcription failed",
|
||||
);
|
||||
expect(dictationErrorMessage("recorder-failed", t)).toBe(
|
||||
"Could not start recording",
|
||||
);
|
||||
expect(dictationErrorMessage("vad-init-failed", t)).toBe(
|
||||
"Could not start recording",
|
||||
);
|
||||
expect(dictationErrorMessage("unknown", t)).toBe(
|
||||
"Could not start recording",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns the server message verbatim for transcription-failed (not the t key)", () => {
|
||||
expect(
|
||||
dictationErrorMessage("transcription-failed", t, {
|
||||
serverMessage: "quota exceeded",
|
||||
}),
|
||||
).toBe("quota exceeded");
|
||||
});
|
||||
|
||||
it("appends the detail to recorder-failed / unknown", () => {
|
||||
expect(
|
||||
dictationErrorMessage("recorder-failed", t, { detail: "boom" }),
|
||||
).toBe("Could not start recording: boom");
|
||||
expect(dictationErrorMessage("unknown", t, { detail: "nope" })).toBe(
|
||||
"Could not start recording: nope",
|
||||
);
|
||||
});
|
||||
|
||||
it("appends the detail to transcription-failed when there is no server message", () => {
|
||||
expect(
|
||||
dictationErrorMessage("transcription-failed", t, { detail: "timeout" }),
|
||||
).toBe("Transcription failed: timeout");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveUnavailableLabel", () => {
|
||||
it("maps each reason to its expected i18n key", () => {
|
||||
expect(resolveUnavailableLabel("connecting", t)).toBe(
|
||||
"Dictation becomes available once the page finishes connecting",
|
||||
);
|
||||
expect(resolveUnavailableLabel("offline", t)).toBe(
|
||||
"No connection to the collaboration server — dictation unavailable",
|
||||
);
|
||||
expect(resolveUnavailableLabel("read-only", t)).toBe(
|
||||
"This page is read-only",
|
||||
);
|
||||
expect(resolveUnavailableLabel("unsupported", t)).toBe(
|
||||
"Audio recording is not available in this browser/context",
|
||||
);
|
||||
expect(resolveUnavailableLabel("busy", t)).toBe("Transcribing…");
|
||||
});
|
||||
});
|
||||
|
||||
describe("isDictationSupported", () => {
|
||||
it("returns a boolean", () => {
|
||||
expect(typeof isDictationSupported()).toBe("boolean");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,113 @@
|
||||
// Single source of truth for "why dictation is unavailable" and "why it failed".
|
||||
// Both dictation hooks and the mic button pull their user-facing strings from
|
||||
// the resolvers here so the wording lives in exactly one place.
|
||||
|
||||
export type DictationUnavailableReason =
|
||||
| "connecting"
|
||||
| "offline"
|
||||
| "read-only"
|
||||
| "unsupported"
|
||||
| "busy";
|
||||
|
||||
export type DictationErrorCode =
|
||||
| "no-media-devices"
|
||||
| "mic-denied"
|
||||
| "no-mic"
|
||||
| "mic-in-use"
|
||||
| "recorder-failed"
|
||||
| "vad-init-failed"
|
||||
| "stt-not-configured"
|
||||
| "transcription-failed"
|
||||
| "unknown";
|
||||
|
||||
// True if this browser/context can record audio.
|
||||
export function isDictationSupported(): boolean {
|
||||
return (
|
||||
typeof MediaRecorder !== "undefined" &&
|
||||
typeof navigator !== "undefined" &&
|
||||
!!navigator.mediaDevices?.getUserMedia
|
||||
);
|
||||
}
|
||||
|
||||
// getUserMedia / VAD.start rejection -> code, by DOMException .name.
|
||||
export function classifyGetUserMediaError(err: unknown): DictationErrorCode {
|
||||
const name = (err as { name?: string })?.name;
|
||||
if (name === "NotAllowedError" || name === "SecurityError")
|
||||
return "mic-denied";
|
||||
if (name === "NotFoundError" || name === "OverconstrainedError")
|
||||
return "no-mic";
|
||||
if (name === "NotReadableError" || name === "AbortError") return "mic-in-use";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
// Transcription HTTP failure -> code (+ verbatim server message when present).
|
||||
export function classifyTranscriptionError(err: unknown): {
|
||||
code: DictationErrorCode;
|
||||
serverMessage?: string;
|
||||
} {
|
||||
const resp = (
|
||||
err as { response?: { status?: number; data?: { message?: string } } }
|
||||
)?.response;
|
||||
const serverMessage = resp?.data?.message;
|
||||
if (serverMessage && serverMessage.trim().length > 0)
|
||||
return { code: "transcription-failed", serverMessage };
|
||||
if (resp?.status === 503 || resp?.status === 403)
|
||||
return { code: "stt-not-configured" };
|
||||
return { code: "transcription-failed" };
|
||||
}
|
||||
|
||||
type TFn = (key: string) => string;
|
||||
|
||||
// Code -> user text. The ONE place runtime error strings are formed.
|
||||
// serverMessage (verbatim) wins for transcription-failed; detail is appended
|
||||
// to the generic "could not start"/"transcription failed" strings.
|
||||
export function dictationErrorMessage(
|
||||
code: DictationErrorCode,
|
||||
t: TFn,
|
||||
extra?: { serverMessage?: string; detail?: string },
|
||||
): string {
|
||||
const detail = extra?.detail;
|
||||
switch (code) {
|
||||
case "mic-denied":
|
||||
return t("Microphone access denied");
|
||||
case "no-mic":
|
||||
return t("No microphone found");
|
||||
case "mic-in-use":
|
||||
return t("Microphone is unavailable or already in use");
|
||||
case "no-media-devices":
|
||||
return t("Audio recording is not available in this browser/context");
|
||||
case "stt-not-configured":
|
||||
return t("Voice dictation is not configured");
|
||||
case "transcription-failed":
|
||||
if (extra?.serverMessage && extra.serverMessage.trim().length > 0)
|
||||
return extra.serverMessage;
|
||||
return `${t("Transcription failed")}${detail ? `: ${detail}` : ""}`;
|
||||
case "recorder-failed":
|
||||
case "vad-init-failed":
|
||||
case "unknown":
|
||||
default:
|
||||
return `${t("Could not start recording")}${detail ? `: ${detail}` : ""}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Unavailable reason -> tooltip text (the ONE place these strings are formed).
|
||||
export function resolveUnavailableLabel(
|
||||
r: DictationUnavailableReason,
|
||||
t: TFn,
|
||||
): string {
|
||||
switch (r) {
|
||||
case "connecting":
|
||||
return t("Dictation becomes available once the page finishes connecting");
|
||||
case "offline":
|
||||
return t(
|
||||
"No connection to the collaboration server — dictation unavailable",
|
||||
);
|
||||
case "read-only":
|
||||
return t("This page is read-only");
|
||||
case "unsupported":
|
||||
return t("Audio recording is not available in this browser/context");
|
||||
case "busy":
|
||||
default:
|
||||
return t("Transcribing…");
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,11 @@ import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { notifications } from "@mantine/notifications";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { transcribeAudio } from "@/features/dictation/services/dictation-service";
|
||||
import {
|
||||
classifyGetUserMediaError,
|
||||
classifyTranscriptionError,
|
||||
dictationErrorMessage,
|
||||
} from "@/features/dictation/dictation-status";
|
||||
|
||||
// "loading" is set only by the streaming hook while it lazily loads the VAD
|
||||
// model on first use; the batch hook never sets it. It exists so the streaming
|
||||
@@ -26,6 +31,8 @@ interface UseDictationResult {
|
||||
cancel: () => void;
|
||||
// Smoothed live microphone level in the 0..1 range while recording (0 when idle).
|
||||
audioLevel: number;
|
||||
// The last error shown to the user (null until one occurs / on a new start).
|
||||
errorMessage: string | null;
|
||||
}
|
||||
|
||||
// Candidate container/codec combinations in preference order. The first one the
|
||||
@@ -67,6 +74,8 @@ export function useDictation(
|
||||
const { t } = useTranslation();
|
||||
const [status, setStatus] = useState<DictationStatus>("idle");
|
||||
const [audioLevel, setAudioLevel] = useState(0);
|
||||
// Last error message shown to the user; the mic button reads it for its tooltip.
|
||||
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
||||
|
||||
// Keep the latest callbacks in a ref so the recorder's onstop closure always
|
||||
// calls the current handlers without re-creating the recorder.
|
||||
@@ -194,15 +203,16 @@ export function useDictation(
|
||||
if (startingRef.current || recorderRef.current || streamRef.current) return;
|
||||
if (status !== "idle") return;
|
||||
startingRef.current = true;
|
||||
// Clear any stale error from a previous attempt.
|
||||
setErrorMessage(null);
|
||||
|
||||
if (!navigator.mediaDevices?.getUserMedia) {
|
||||
const reason =
|
||||
"navigator.mediaDevices.getUserMedia is unavailable in this context";
|
||||
console.error("[dictation] " + reason);
|
||||
notifications.show({
|
||||
color: "red",
|
||||
message: t("Audio recording is not available in this browser/context"),
|
||||
});
|
||||
const message = dictationErrorMessage("no-media-devices", t);
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
setStatus("idle");
|
||||
startingRef.current = false;
|
||||
return;
|
||||
@@ -215,19 +225,16 @@ export function useDictation(
|
||||
// Always log the full error for diagnosis (name, message, stack).
|
||||
console.error("[dictation] getUserMedia failed", err);
|
||||
const name = (err as { name?: string })?.name;
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
let message: string;
|
||||
if (name === "NotAllowedError" || name === "SecurityError") {
|
||||
message = t("Microphone access denied");
|
||||
} else if (name === "NotFoundError" || name === "OverconstrainedError") {
|
||||
message = t("No microphone found");
|
||||
} else if (name === "NotReadableError" || name === "AbortError") {
|
||||
message = t("Microphone is unavailable or already in use");
|
||||
} else {
|
||||
// Unknown failure: show the real reason instead of a generic string.
|
||||
message = `${t("Could not start recording")}: ${name ? `${name}: ` : ""}${detail}`;
|
||||
}
|
||||
const rawDetail = (err as { message?: string })?.message ?? String(err);
|
||||
// Prefix the DOMException name (e.g. "TypeError: …") so the generic
|
||||
// resolver branch reproduces this hook's original "Could not start
|
||||
// recording: <name>: <detail>" text. Each caller owns its own detail; the
|
||||
// streaming hook intentionally does not add the name.
|
||||
const detail = `${name ? `${name}: ` : ""}${rawDetail}`;
|
||||
const code = classifyGetUserMediaError(err);
|
||||
const message = dictationErrorMessage(code, t, { detail });
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
setStatus("idle");
|
||||
startingRef.current = false;
|
||||
return;
|
||||
@@ -249,10 +256,10 @@ export function useDictation(
|
||||
// The stream was acquired but the recorder failed to construct; stop the
|
||||
// tracks so the MediaStream does not leak before bailing out.
|
||||
stopTracks();
|
||||
notifications.show({
|
||||
color: "red",
|
||||
message: `${t("Could not start recording")}: ${(err as { message?: string })?.message ?? String(err)}`,
|
||||
});
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
const message = dictationErrorMessage("recorder-failed", t, { detail });
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
setStatus("idle");
|
||||
startingRef.current = false;
|
||||
return;
|
||||
@@ -293,21 +300,14 @@ export function useDictation(
|
||||
.catch((err: unknown) => {
|
||||
// Log the full error for diagnosis (status + body + stack).
|
||||
console.error("[dictation] transcription failed", err);
|
||||
const resp = (
|
||||
err as { response?: { status?: number; data?: { message?: string } } }
|
||||
)?.response;
|
||||
const serverMsg = resp?.data?.message;
|
||||
let message: string;
|
||||
if (serverMsg && serverMsg.trim().length > 0) {
|
||||
// The server already explains the cause (e.g. provider 404, bad
|
||||
// format, STT not configured) — show it verbatim.
|
||||
message = serverMsg;
|
||||
} else if (resp?.status === 503 || resp?.status === 403) {
|
||||
message = t("Voice dictation is not configured");
|
||||
} else {
|
||||
message = `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
|
||||
}
|
||||
const { code, serverMessage } = classifyTranscriptionError(err);
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
const message = dictationErrorMessage(code, t, {
|
||||
serverMessage,
|
||||
detail,
|
||||
});
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
setStatus("error");
|
||||
if (errorTimerRef.current !== null) {
|
||||
clearTimeout(errorTimerRef.current);
|
||||
@@ -332,10 +332,10 @@ export function useDictation(
|
||||
stopTracks();
|
||||
recorderRef.current = null;
|
||||
startingRef.current = false;
|
||||
notifications.show({
|
||||
color: "red",
|
||||
message: `${t("Could not start recording")}: ${(err as { message?: string })?.message ?? String(err)}`,
|
||||
});
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
const message = dictationErrorMessage("recorder-failed", t, { detail });
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
setStatus("idle");
|
||||
return;
|
||||
}
|
||||
@@ -405,5 +405,5 @@ export function useDictation(
|
||||
};
|
||||
}, [clearTimer, stopTracks, stopMeter]);
|
||||
|
||||
return { status, start, stop, cancel, audioLevel };
|
||||
return { status, start, stop, cancel, audioLevel, errorMessage };
|
||||
}
|
||||
|
||||
@@ -4,6 +4,11 @@ import { useTranslation } from "react-i18next";
|
||||
import { transcribeAudio } from "@/features/dictation/services/dictation-service";
|
||||
import { encodeWavPcm16 } from "@/features/dictation/utils/encode-wav";
|
||||
import type { DictationStatus } from "@/features/dictation/hooks/use-dictation";
|
||||
import {
|
||||
classifyGetUserMediaError,
|
||||
classifyTranscriptionError,
|
||||
dictationErrorMessage,
|
||||
} from "@/features/dictation/dictation-status";
|
||||
|
||||
// Lazily-imported MicVAD type. The runtime import happens inside start() so the
|
||||
// heavy onnxruntime-web / Silero model is code-split out of the main bundle and
|
||||
@@ -27,6 +32,8 @@ interface UseStreamingDictationResult {
|
||||
cancel: () => void;
|
||||
// Smoothed live speech level in the 0..1 range while recording (0 when idle).
|
||||
audioLevel: number;
|
||||
// The last error shown to the user (null until one occurs / on a new start).
|
||||
errorMessage: string | null;
|
||||
}
|
||||
|
||||
// Sample rate of the audio MicVAD hands to onSpeechEnd (Silero VAD runs at 16k).
|
||||
@@ -60,6 +67,8 @@ export function useStreamingDictation(
|
||||
const { t } = useTranslation();
|
||||
const [status, setStatus] = useState<DictationStatus>("idle");
|
||||
const [audioLevel, setAudioLevel] = useState(0);
|
||||
// Last error message shown to the user; the mic button reads it for its tooltip.
|
||||
const [errorMessage, setErrorMessage] = useState<string | null>(null);
|
||||
|
||||
// Keep the latest callbacks in a ref so async VAD/HTTP closures always call the
|
||||
// current handlers without re-creating the VAD.
|
||||
@@ -158,26 +167,6 @@ export function useStreamingDictation(
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Map a transcription error to a user-facing message, mirroring the batch hook.
|
||||
const transcriptionErrorMessage = useCallback(
|
||||
(err: unknown): string => {
|
||||
const resp = (
|
||||
err as { response?: { status?: number; data?: { message?: string } } }
|
||||
)?.response;
|
||||
const serverMsg = resp?.data?.message;
|
||||
if (serverMsg && serverMsg.trim().length > 0) {
|
||||
// The server already explains the cause (e.g. provider 404, bad format,
|
||||
// STT not configured) — show it verbatim.
|
||||
return serverMsg;
|
||||
}
|
||||
if (resp?.status === 503 || resp?.status === 403) {
|
||||
return t("Voice dictation is not configured");
|
||||
}
|
||||
return `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
|
||||
},
|
||||
[t],
|
||||
);
|
||||
|
||||
// Handle one ended speech segment: encode to WAV and transcribe. Results are
|
||||
// buffered by seq and flushed in order. A single failed segment does NOT kill
|
||||
// the session: log + one notification, then advance past that seq so later
|
||||
@@ -204,10 +193,14 @@ export function useStreamingDictation(
|
||||
if (epoch !== epochRef.current) return;
|
||||
// Log the full error for diagnosis (status + body + stack).
|
||||
console.error("[dictation] segment transcription failed", err);
|
||||
notifications.show({
|
||||
color: "red",
|
||||
message: transcriptionErrorMessage(err),
|
||||
const { code, serverMessage } = classifyTranscriptionError(err);
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
const message = dictationErrorMessage(code, t, {
|
||||
serverMessage,
|
||||
detail,
|
||||
});
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
// Skip this seq so later segments can still flush in order.
|
||||
if (nextEmitSeqRef.current === seq) {
|
||||
nextEmitSeqRef.current += 1;
|
||||
@@ -226,7 +219,7 @@ export function useStreamingDictation(
|
||||
}
|
||||
});
|
||||
},
|
||||
[drainResults, transcriptionErrorMessage],
|
||||
[drainResults, t],
|
||||
);
|
||||
|
||||
const start = useCallback(async (): Promise<void> => {
|
||||
@@ -236,6 +229,8 @@ export function useStreamingDictation(
|
||||
if (startingRef.current || vadRef.current || activeRef.current) return;
|
||||
if (status !== "idle") return;
|
||||
startingRef.current = true;
|
||||
// Clear any stale error from a previous attempt.
|
||||
setErrorMessage(null);
|
||||
|
||||
// Notify the caller right when dictation begins (before any async work) so the
|
||||
// editor can snapshot the caret position.
|
||||
@@ -354,10 +349,9 @@ export function useStreamingDictation(
|
||||
// actually runs.)
|
||||
console.error("[dictation] VAD init failed", err);
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
notifications.show({
|
||||
color: "red",
|
||||
message: `${t("Could not start recording")}: ${detail}`,
|
||||
});
|
||||
const message = dictationErrorMessage("vad-init-failed", t, { detail });
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
// Defensive: if MicVAD.new partially succeeded before throwing, make sure we
|
||||
// don't leak it.
|
||||
destroyVad();
|
||||
@@ -379,19 +373,11 @@ export function useStreamingDictation(
|
||||
} catch (err) {
|
||||
// Always log the full error for diagnosis (name, message, stack).
|
||||
console.error("[dictation] VAD.start failed", err);
|
||||
const name = (err as { name?: string })?.name;
|
||||
const detail = (err as { message?: string })?.message ?? String(err);
|
||||
let message: string;
|
||||
if (name === "NotAllowedError" || name === "SecurityError") {
|
||||
message = t("Microphone access denied");
|
||||
} else if (name === "NotFoundError" || name === "OverconstrainedError") {
|
||||
message = t("No microphone found");
|
||||
} else if (name === "NotReadableError" || name === "AbortError") {
|
||||
message = t("Microphone is unavailable or already in use");
|
||||
} else {
|
||||
message = `${t("Could not start recording")}: ${detail}`;
|
||||
}
|
||||
const code = classifyGetUserMediaError(err);
|
||||
const message = dictationErrorMessage(code, t, { detail });
|
||||
notifications.show({ color: "red", message });
|
||||
setErrorMessage(message);
|
||||
activeRef.current = false;
|
||||
destroyVad();
|
||||
setStatus("idle");
|
||||
@@ -470,5 +456,5 @@ export function useStreamingDictation(
|
||||
};
|
||||
}, [clearTimer, destroyVad]);
|
||||
|
||||
return { status, start, stop, cancel, audioLevel };
|
||||
return { status, start, stop, cancel, audioLevel, errorMessage };
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { atom } from "jotai";
|
||||
import { Editor } from "@tiptap/core";
|
||||
import { PageEditMode } from "@/features/user/types/user.types.ts";
|
||||
import type { DictationUnavailableReason } from "@/features/dictation/dictation-status";
|
||||
|
||||
export const pageEditorAtom = atom<Editor | null>(null);
|
||||
|
||||
@@ -15,3 +16,15 @@ export const showLinkMenuAtom = atom(false);
|
||||
// Current page's edit mode — initialized from the user's saved preference on
|
||||
// first load, can be toggled locally without persisting to the server.
|
||||
export const currentPageEditModeAtom = atom<PageEditMode>(PageEditMode.Edit);
|
||||
|
||||
// Whether the dictation mic can start, and (when it can't) the cause-specific
|
||||
// reason the mic button surfaces as a tooltip. Published by the page editor,
|
||||
// consumed by DictationGroup -> MicButton.
|
||||
export type DictationAvailability = {
|
||||
isEditable: boolean;
|
||||
reason: DictationUnavailableReason | null;
|
||||
};
|
||||
export const dictationAvailabilityAtom = atom<DictationAvailability>({
|
||||
isEditable: false,
|
||||
reason: null,
|
||||
});
|
||||
|
||||
+6
-10
@@ -1,7 +1,8 @@
|
||||
import { FC, useRef } from "react";
|
||||
import { Editor, useEditorState } from "@tiptap/react";
|
||||
import { Editor } from "@tiptap/react";
|
||||
import { useAtomValue } from "jotai";
|
||||
import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
|
||||
import { dictationAvailabilityAtom } from "@/features/editor/atoms/editor-atoms.ts";
|
||||
import { MicButton } from "@/features/dictation/components/mic-button";
|
||||
|
||||
interface Props {
|
||||
@@ -16,20 +17,14 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
|
||||
const workspace = useAtomValue(workspaceAtom);
|
||||
const streamingDictation =
|
||||
workspace?.settings?.ai?.dictationStreaming === true;
|
||||
// Cause-specific reason the mic is unavailable (published by the page editor).
|
||||
const dictationAvailability = useAtomValue(dictationAvailabilityAtom);
|
||||
// Caret snapshot taken when dictation starts (where the first segment lands).
|
||||
const rangeRef = useRef<{ from: number; to: number } | null>(null);
|
||||
// Running insertion point: after each inserted segment we remember the caret
|
||||
// end so the NEXT segment appends right after it, contiguously, regardless of
|
||||
// where the user's caret currently is. Null until the first segment lands.
|
||||
const insertPosRef = useRef<number | null>(null);
|
||||
// editor.isEditable is a mutable, non-reactive field — read it via
|
||||
// useEditorState so the mic re-enables when the body flips to editable after
|
||||
// collab sync (otherwise it stays stuck disabled). Mirrors the body's own
|
||||
// reactive read.
|
||||
const isEditable = useEditorState({
|
||||
editor,
|
||||
selector: (ctx) => ctx.editor?.isEditable ?? false,
|
||||
});
|
||||
|
||||
const handleStart = () => {
|
||||
const { from, to } = editor.state.selection;
|
||||
@@ -88,7 +83,8 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
|
||||
streaming={streamingDictation}
|
||||
onStart={handleStart}
|
||||
onText={handleText}
|
||||
disabled={!isEditable}
|
||||
disabled={!editor.isEditable}
|
||||
unavailableReason={dictationAvailability.reason ?? undefined}
|
||||
color={color}
|
||||
iconSize={iconSize}
|
||||
/>
|
||||
|
||||
@@ -27,14 +27,16 @@ import {
|
||||
collabExtensions,
|
||||
mainExtensions,
|
||||
} from "@/features/editor/extensions/extensions";
|
||||
import { useAtom, useAtomValue } from "jotai";
|
||||
import { useAtom, useAtomValue, useSetAtom } from "jotai";
|
||||
import useCollaborationUrl from "@/features/editor/hooks/use-collaboration-url";
|
||||
import { currentUserAtom } from "@/features/user/atoms/current-user-atom";
|
||||
import {
|
||||
currentPageEditModeAtom,
|
||||
dictationAvailabilityAtom,
|
||||
pageEditorAtom,
|
||||
yjsConnectionStatusAtom,
|
||||
} from "@/features/editor/atoms/editor-atoms";
|
||||
import type { DictationUnavailableReason } from "@/features/dictation/dictation-status";
|
||||
import { asideStateAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom";
|
||||
import {
|
||||
activeCommentIdAtom,
|
||||
@@ -139,6 +141,7 @@ export default function PageEditor({
|
||||
const { pageSlug } = useParams();
|
||||
const slugId = extractPageSlugId(pageSlug);
|
||||
const currentPageEditMode = useAtomValue(currentPageEditModeAtom);
|
||||
const setDictationAvailability = useSetAtom(dictationAvailabilityAtom);
|
||||
const canScroll = useCallback(
|
||||
() => Boolean(isComponentMounted.current && editorRef.current),
|
||||
[isComponentMounted],
|
||||
@@ -488,6 +491,34 @@ export default function PageEditor({
|
||||
);
|
||||
}, [currentPageEditMode, editor, editable, showStatic]);
|
||||
|
||||
// Publish whether dictation can start and, if not, the cause-specific reason
|
||||
// the mic button surfaces. Recomputed on the same signals that drive body
|
||||
// editability so the tooltip never lies about the current state.
|
||||
useEffect(() => {
|
||||
const inEditMode = currentPageEditMode === PageEditMode.Edit;
|
||||
const isEditable = editable && inEditMode && !showStatic; // mirrors editor.isEditable
|
||||
let reason: DictationUnavailableReason | null = null;
|
||||
if (!isEditable) {
|
||||
if (editable && inEditMode && showStatic) {
|
||||
// Permitted to edit and in edit mode, but the collab doc hasn't synced yet.
|
||||
reason =
|
||||
yjsConnectionStatus === WebSocketStatus.Disconnected
|
||||
? "offline"
|
||||
: "connecting";
|
||||
} else {
|
||||
// No edit permission or not in edit mode.
|
||||
reason = "read-only";
|
||||
}
|
||||
}
|
||||
setDictationAvailability({ isEditable, reason });
|
||||
}, [
|
||||
editable,
|
||||
currentPageEditMode,
|
||||
showStatic,
|
||||
yjsConnectionStatus,
|
||||
setDictationAvailability,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
if (
|
||||
!hasConnectedOnceRef.current &&
|
||||
|
||||
Reference in New Issue
Block a user