feat(dictation): reason model — speaking tooltip on a disabled mic + shared error resolver (#309)

The dictation mic could be grey/disabled while silently showing "Start dictation", and Mantine's native `disabled` set pointer-events:none so the Tooltip never fired at all — the UI knew the cause but told the user nothing. Runtime error strings were also duplicated verbatim across the two dictation hooks. - New dictation-status.ts: the single source of truth. A DictationUnavailableReason enum (connecting/offline/read-only/unsupported/busy) + a DictationErrorCode enum, pure classifiers (classifyGetUserMediaError / classifyTranscriptionError) and resolvers (resolveUnavailableLabel / dictationErrorMessage). All user-facing dictation strings are formed here; the verbatim server message still wins for transcription errors. - page-editor publishes dictationAvailabilityAtom { isEditable, reason } computed at the source (editable/edit-mode/showStatic/collab status): connecting vs offline (stuck) vs read-only. DictationGroup forwards the reason to MicButton. - MicButton is reason-aware: a disabled mic shows the cause-specific tooltip. The disabled-hover silence is fixed by marking disabled the Mantine way (data-disabled/aria-disabled + click guard) instead of the native attribute, so the Tooltip fires — applied to both the idle (reason) and error (errorMessage) states. - Both hooks route every error through the shared resolver (deleting the duplicated transcriptionErrorMessage), and expose errorMessage for the tooltip. Wording is byte-identical to each hook's original (incl. the batch hook's DOMException name prefix and the verbatim server message). - i18n: 3 new reason keys in en-US + ru-RU, and the previously-missing ru-RU dictation error translations. Tests: dictation-status.test.ts (all classifier/resolver branches, incl. server message passthrough) + mic-button.test.tsx (disabled mic shows the reason text, uses data-disabled not native disabled — fails against the pre-fix code). vitest: 5 files / 32 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-03 18:15:09 +03:00
parent b861266ff8
commit a86e5f409f
11 changed files with 532 additions and 98 deletions
@@ -1274,6 +1274,9 @@
  "Voice dictation is not configured": "Voice dictation is not configured",
  "Microphone is unavailable or already in use": "Microphone is unavailable or already in use",
  "Audio recording is not available in this browser/context": "Audio recording is not available in this browser/context",
+  "Dictation becomes available once the page finishes connecting": "Dictation becomes available once the page finishes connecting",
+  "No connection to the collaboration server — dictation unavailable": "No connection to the collaboration server — dictation unavailable",
+  "This page is read-only": "This page is read-only",
  "Request format": "Request format",
  "How transcription requests are sent to the endpoint": "How transcription requests are sent to the endpoint",
  "OpenAI-compatible (multipart/form-data)": "OpenAI-compatible (multipart/form-data)",
@@ -393,6 +393,16 @@
  "No speech detected": "Речь не распознана",
  "Transcription failed": "Не удалось распознать речь",
  "Voice dictation is not configured": "Голосовой ввод не настроен",
+  "Start dictation": "Начать диктовку",
+  "Stop recording": "Остановить запись",
+  "Microphone access denied": "Доступ к микрофону запрещён",
+  "No microphone found": "Микрофон не найден",
+  "Microphone is unavailable or already in use": "Микрофон недоступен или уже используется",
+  "Could not start recording": "Не удалось начать запись",
+  "Audio recording is not available in this browser/context": "Запись аудио недоступна в этом браузере/контексте",
+  "Dictation becomes available once the page finishes connecting": "Диктовка станет доступна после подключения к документу",
+  "No connection to the collaboration server — dictation unavailable": "Нет связи с сервером совместного редактирования — диктовка недоступна",
+  "This page is read-only": "Страница открыта только для чтения",
  "Embed PDF": "Встроить PDF",
  "Upload and embed a PDF file.": "Загрузите и встроите PDF-файл.",
  "Embed as PDF": "Встроить как PDF",
@@ -0,0 +1,80 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen } from "@testing-library/react";
+import { MantineProvider } from "@mantine/core";
+
+// A disabled mic must explain WHY it is unavailable rather than silently saying
+// "Start dictation". This renders MicButton in its idle+disabled state with a
+// forwarded reason and asserts the accessible label resolves to that reason's
+// text via the shared resolver (dictation-status.resolveUnavailableLabel).
+
+// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
+
+// Pass i18n keys through verbatim so we assert the exact resolved string.
+vi.mock("react-i18next", () => ({
+  useTranslation: () => ({ t: (s: string) => s }),
+}));
+
+// Keep both controllers inert and idle so MicButton renders the idle branch.
+const idleCtl = {
+  status: "idle" as const,
+  start: vi.fn(async () => {}),
+  stop: vi.fn(),
+  cancel: vi.fn(),
+  audioLevel: 0,
+  errorMessage: null,
+};
+vi.mock("@/features/dictation/hooks/use-dictation", () => ({
+  useDictation: () => idleCtl,
+}));
+vi.mock("@/features/dictation/hooks/use-streaming-dictation", () => ({
+  useStreamingDictation: () => idleCtl,
+}));
+
+import { MicButton } from "./mic-button";
+
+function renderButton(props: React.ComponentProps<typeof MicButton>) {
+  render(
+    <MantineProvider>
+      <MicButton {...props} />
+    </MantineProvider>,
+  );
+}
+
+describe("MicButton — disabled reason label", () => {
+  // jsdom has no MediaRecorder / mediaDevices, so isDictationSupported() would
+  // report "unsupported" and mask the forwarded reason. Stub both so the button
+  // is considered supported and the availability reason is what surfaces.
+  beforeEach(() => {
+    (globalThis as unknown as { MediaRecorder: unknown }).MediaRecorder =
+      class {};
+    Object.defineProperty(navigator, "mediaDevices", {
+      configurable: true,
+      value: { getUserMedia: vi.fn() },
+    });
+  });
+  afterEach(() => {
+    delete (globalThis as unknown as { MediaRecorder?: unknown }).MediaRecorder;
+  });
+
+  it("shows the cause-specific reason instead of 'Start dictation' when disabled with a reason", () => {
+    renderButton({ onText: () => {}, disabled: true, unavailableReason: "offline" });
+    const expected =
+      "No connection to the collaboration server — dictation unavailable";
+    // The reason surfaces as the accessible label (and the tooltip text).
+    const button = screen.getByRole("button", { name: expected });
+    expect(button).toBeDefined();
+    // It is marked disabled the Mantine way (data-disabled), NOT the native
+    // `disabled` attribute — otherwise pointer-events:none would kill the tooltip.
+    expect(button.getAttribute("data-disabled")).toBe("true");
+    expect(button.hasAttribute("disabled")).toBe(false);
+    // And it no longer silently reads "Start dictation".
+    expect(screen.queryByRole("button", { name: "Start dictation" })).toBeNull();
+  });
+
+  it("reads 'Start dictation' when enabled with no reason", () => {
+    renderButton({ onText: () => {} });
+    expect(
+      screen.getByRole("button", { name: "Start dictation" }),
+    ).toBeDefined();
+  });
+});
@@ -4,6 +4,11 @@ import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react";
 import { useTranslation } from "react-i18next";
 import { useDictation } from "@/features/dictation/hooks/use-dictation";
 import { useStreamingDictation } from "@/features/dictation/hooks/use-streaming-dictation";
+import {
+  isDictationSupported,
+  resolveUnavailableLabel,
+  type DictationUnavailableReason,
+} from "@/features/dictation/dictation-status";
 import classes from "./mic-button.module.css";

 interface MicButtonProps {
@@ -21,6 +26,9 @@ interface MicButtonProps {
  // When true, use the streaming (Silero-VAD) dictation controller, which emits
  // text progressively as the user pauses; otherwise use the batch controller.
  streaming?: boolean;
+  // When the mic is disabled for an availability reason, this is the cause the
+  // idle tooltip explains (e.g. pre-sync "connecting", "offline", "read-only").
+  unavailableReason?: DictationUnavailableReason;
 }

 /**
@@ -37,6 +45,7 @@ export const MicButton: FC<MicButtonProps> = ({
  color,
  iconSize,
  streaming = false,
+  unavailableReason,
 }) => {
  const { t } = useTranslation();
  // Call BOTH hooks unconditionally to respect the rules of hooks: which one is
@@ -46,7 +55,7 @@ export const MicButton: FC<MicButtonProps> = ({
  const batchCtl = useDictation({ onText, onStart });
  const streamingCtl = useStreamingDictation({ onText, onStart });
  const ctl = streaming ? streamingCtl : batchCtl;
-  const { status, start, stop, audioLevel } = ctl;
+  const { status, start, stop, audioLevel, errorMessage } = ctl;
  const resolvedIconSize = iconSize ?? (size === "lg" ? 18 : 16);

  if (status === "recording") {
@@ -82,15 +91,28 @@ export const MicButton: FC<MicButtonProps> = ({
  ) {
    // "loading" (streaming hook fetching the VAD model on first use) shows the
    // same spinner+disabled state so the first click is visibly acknowledged and
-    // a confusing second click can't fire while the model loads.
-    const label = status === "loading" ? t("Preparing…") : t("Transcribing…");
+    // a confusing second click can't fire while the model loads. The error case
+    // explains the failure via the hook's resolved errorMessage instead of the
+    // transient "Transcribing…" label.
+    const label =
+      status === "error"
+        ? (errorMessage ?? t("Transcription failed"))
+        : status === "loading"
+          ? t("Preparing…")
+          : t("Transcribing…");
    return (
      <Tooltip label={label} withArrow>
        <ActionIcon
          size={size}
          variant="subtle"
          color={color}
-          disabled
+          // Mark disabled the Mantine way (data-disabled/aria-disabled) rather
+          // than the native `disabled` attribute: native `disabled` sets
+          // `pointer-events:none`, which suppresses hover so the Tooltip never
+          // fires. This is a status display with no click action to guard, so
+          // keeping it hoverable simply lets the error reason be read on hover.
+          data-disabled
+          aria-disabled
          aria-label={label}
        >
          <Loader size="xs" />
@@ -99,15 +121,38 @@ export const MicButton: FC<MicButtonProps> = ({
    );
  }

+  // Idle branch. A grey/disabled mic must explain WHY it can't record. An
+  // unsupported browser/context is detected here; otherwise the parent forwards
+  // a cause-specific reason. We must NOT pass the native `disabled` prop: Mantine
+  // renders `<button disabled>` with `pointer-events:none`, which suppresses
+  // hover so the Tooltip never fires. Instead mark it disabled the Mantine way
+  // (data-disabled/aria-disabled) — keeping it hoverable and in the a11y tree —
+  // and guard the click ourselves.
+  const unsupported = !isDictationSupported();
+  const isDisabled = disabled || unsupported;
+  const reason: DictationUnavailableReason | undefined = unsupported
+    ? "unsupported"
+    : unavailableReason;
+  const idleLabel =
+    isDisabled && reason
+      ? resolveUnavailableLabel(reason, t)
+      : t("Start dictation");
  return (
-    <Tooltip label={t("Start dictation")} withArrow>
+    <Tooltip label={idleLabel} withArrow>
      <ActionIcon
        size={size}
        variant="subtle"
        color={color}
-        onClick={() => void start()}
-        disabled={disabled}
-        aria-label={t("Start dictation")}
+        onClick={(e) => {
+          if (isDisabled) {
+            e.preventDefault();
+            return;
+          }
+          void start();
+        }}
+        data-disabled={isDisabled || undefined}
+        aria-disabled={isDisabled}
+        aria-label={idleLabel}
      >
        <IconMicrophone size={resolvedIconSize} />
      </ActionIcon>
@@ -0,0 +1,157 @@
+import { describe, it, expect } from "vitest";
+import {
+  classifyGetUserMediaError,
+  classifyTranscriptionError,
+  dictationErrorMessage,
+  resolveUnavailableLabel,
+  isDictationSupported,
+} from "./dictation-status";
+
+// Unit tests for the shared dictation-status resolvers (dictation-status.ts).
+// Both dictation hooks and the mic button form their user-facing strings here,
+// so a regression in the classification or message mapping would silently swap
+// what a user reads when the mic is grey or a recording fails. A fake `t`
+// returns its key verbatim so we assert the exact i18n key each branch selects.
+const t = (k: string) => k;
+
+describe("classifyGetUserMediaError", () => {
+  it("maps NotAllowedError / SecurityError to mic-denied", () => {
+    expect(classifyGetUserMediaError({ name: "NotAllowedError" })).toBe(
+      "mic-denied",
+    );
+    expect(classifyGetUserMediaError({ name: "SecurityError" })).toBe(
+      "mic-denied",
+    );
+  });
+
+  it("maps NotFoundError / OverconstrainedError to no-mic", () => {
+    expect(classifyGetUserMediaError({ name: "NotFoundError" })).toBe("no-mic");
+    expect(classifyGetUserMediaError({ name: "OverconstrainedError" })).toBe(
+      "no-mic",
+    );
+  });
+
+  it("maps NotReadableError / AbortError to mic-in-use", () => {
+    expect(classifyGetUserMediaError({ name: "NotReadableError" })).toBe(
+      "mic-in-use",
+    );
+    expect(classifyGetUserMediaError({ name: "AbortError" })).toBe(
+      "mic-in-use",
+    );
+  });
+
+  it("maps anything else / undefined to unknown", () => {
+    expect(classifyGetUserMediaError({ name: "WeirdError" })).toBe("unknown");
+    expect(classifyGetUserMediaError(undefined)).toBe("unknown");
+    expect(classifyGetUserMediaError({})).toBe("unknown");
+  });
+});
+
+describe("classifyTranscriptionError", () => {
+  it("returns the verbatim server message when present", () => {
+    const err = { response: { status: 500, data: { message: "provider 404" } } };
+    expect(classifyTranscriptionError(err)).toEqual({
+      code: "transcription-failed",
+      serverMessage: "provider 404",
+    });
+  });
+
+  it("maps 503 / 403 (no server message) to stt-not-configured", () => {
+    expect(classifyTranscriptionError({ response: { status: 503 } })).toEqual({
+      code: "stt-not-configured",
+    });
+    expect(classifyTranscriptionError({ response: { status: 403 } })).toEqual({
+      code: "stt-not-configured",
+    });
+  });
+
+  it("falls back to transcription-failed with no server message otherwise", () => {
+    expect(classifyTranscriptionError({ response: { status: 500 } })).toEqual({
+      code: "transcription-failed",
+    });
+    expect(classifyTranscriptionError(new Error("network"))).toEqual({
+      code: "transcription-failed",
+    });
+    // Blank server message is ignored (does not win as verbatim text).
+    expect(
+      classifyTranscriptionError({ response: { data: { message: "   " } } }),
+    ).toEqual({ code: "transcription-failed" });
+  });
+});
+
+describe("dictationErrorMessage", () => {
+  it("maps each code to the expected i18n key", () => {
+    expect(dictationErrorMessage("mic-denied", t)).toBe(
+      "Microphone access denied",
+    );
+    expect(dictationErrorMessage("no-mic", t)).toBe("No microphone found");
+    expect(dictationErrorMessage("mic-in-use", t)).toBe(
+      "Microphone is unavailable or already in use",
+    );
+    expect(dictationErrorMessage("no-media-devices", t)).toBe(
+      "Audio recording is not available in this browser/context",
+    );
+    expect(dictationErrorMessage("stt-not-configured", t)).toBe(
+      "Voice dictation is not configured",
+    );
+    expect(dictationErrorMessage("transcription-failed", t)).toBe(
+      "Transcription failed",
+    );
+    expect(dictationErrorMessage("recorder-failed", t)).toBe(
+      "Could not start recording",
+    );
+    expect(dictationErrorMessage("vad-init-failed", t)).toBe(
+      "Could not start recording",
+    );
+    expect(dictationErrorMessage("unknown", t)).toBe(
+      "Could not start recording",
+    );
+  });
+
+  it("returns the server message verbatim for transcription-failed (not the t key)", () => {
+    expect(
+      dictationErrorMessage("transcription-failed", t, {
+        serverMessage: "quota exceeded",
+      }),
+    ).toBe("quota exceeded");
+  });
+
+  it("appends the detail to recorder-failed / unknown", () => {
+    expect(
+      dictationErrorMessage("recorder-failed", t, { detail: "boom" }),
+    ).toBe("Could not start recording: boom");
+    expect(dictationErrorMessage("unknown", t, { detail: "nope" })).toBe(
+      "Could not start recording: nope",
+    );
+  });
+
+  it("appends the detail to transcription-failed when there is no server message", () => {
+    expect(
+      dictationErrorMessage("transcription-failed", t, { detail: "timeout" }),
+    ).toBe("Transcription failed: timeout");
+  });
+});
+
+describe("resolveUnavailableLabel", () => {
+  it("maps each reason to its expected i18n key", () => {
+    expect(resolveUnavailableLabel("connecting", t)).toBe(
+      "Dictation becomes available once the page finishes connecting",
+    );
+    expect(resolveUnavailableLabel("offline", t)).toBe(
+      "No connection to the collaboration server — dictation unavailable",
+    );
+    expect(resolveUnavailableLabel("read-only", t)).toBe(
+      "This page is read-only",
+    );
+    expect(resolveUnavailableLabel("unsupported", t)).toBe(
+      "Audio recording is not available in this browser/context",
+    );
+    expect(resolveUnavailableLabel("busy", t)).toBe("Transcribing…");
+  });
+});
+
+describe("isDictationSupported", () => {
+  it("returns a boolean", () => {
+    expect(typeof isDictationSupported()).toBe("boolean");
+  });
+});
@@ -0,0 +1,113 @@
+// Single source of truth for "why dictation is unavailable" and "why it failed".
+// Both dictation hooks and the mic button pull their user-facing strings from
+// the resolvers here so the wording lives in exactly one place.
+
+export type DictationUnavailableReason =
+  | "connecting"
+  | "offline"
+  | "read-only"
+  | "unsupported"
+  | "busy";
+
+export type DictationErrorCode =
+  | "no-media-devices"
+  | "mic-denied"
+  | "no-mic"
+  | "mic-in-use"
+  | "recorder-failed"
+  | "vad-init-failed"
+  | "stt-not-configured"
+  | "transcription-failed"
+  | "unknown";
+
+// True if this browser/context can record audio.
+export function isDictationSupported(): boolean {
+  return (
+    typeof MediaRecorder !== "undefined" &&
+    typeof navigator !== "undefined" &&
+    !!navigator.mediaDevices?.getUserMedia
+  );
+}
+
+// getUserMedia / VAD.start rejection -> code, by DOMException .name.
+export function classifyGetUserMediaError(err: unknown): DictationErrorCode {
+  const name = (err as { name?: string })?.name;
+  if (name === "NotAllowedError" || name === "SecurityError")
+    return "mic-denied";
+  if (name === "NotFoundError" || name === "OverconstrainedError")
+    return "no-mic";
+  if (name === "NotReadableError" || name === "AbortError") return "mic-in-use";
+  return "unknown";
+}
+
+// Transcription HTTP failure -> code (+ verbatim server message when present).
+export function classifyTranscriptionError(err: unknown): {
+  code: DictationErrorCode;
+  serverMessage?: string;
+} {
+  const resp = (
+    err as { response?: { status?: number; data?: { message?: string } } }
+  )?.response;
+  const serverMessage = resp?.data?.message;
+  if (serverMessage && serverMessage.trim().length > 0)
+    return { code: "transcription-failed", serverMessage };
+  if (resp?.status === 503 || resp?.status === 403)
+    return { code: "stt-not-configured" };
+  return { code: "transcription-failed" };
+}
+
+type TFn = (key: string) => string;
+
+// Code -> user text. The ONE place runtime error strings are formed.
+// serverMessage (verbatim) wins for transcription-failed; detail is appended
+// to the generic "could not start"/"transcription failed" strings.
+export function dictationErrorMessage(
+  code: DictationErrorCode,
+  t: TFn,
+  extra?: { serverMessage?: string; detail?: string },
+): string {
+  const detail = extra?.detail;
+  switch (code) {
+    case "mic-denied":
+      return t("Microphone access denied");
+    case "no-mic":
+      return t("No microphone found");
+    case "mic-in-use":
+      return t("Microphone is unavailable or already in use");
+    case "no-media-devices":
+      return t("Audio recording is not available in this browser/context");
+    case "stt-not-configured":
+      return t("Voice dictation is not configured");
+    case "transcription-failed":
+      if (extra?.serverMessage && extra.serverMessage.trim().length > 0)
+        return extra.serverMessage;
+      return `${t("Transcription failed")}${detail ? `: ${detail}` : ""}`;
+    case "recorder-failed":
+    case "vad-init-failed":
+    case "unknown":
+    default:
+      return `${t("Could not start recording")}${detail ? `: ${detail}` : ""}`;
+  }
+}
+
+// Unavailable reason -> tooltip text (the ONE place these strings are formed).
+export function resolveUnavailableLabel(
+  r: DictationUnavailableReason,
+  t: TFn,
+): string {
+  switch (r) {
+    case "connecting":
+      return t("Dictation becomes available once the page finishes connecting");
+    case "offline":
+      return t(
+        "No connection to the collaboration server — dictation unavailable",
+      );
+    case "read-only":
+      return t("This page is read-only");
+    case "unsupported":
+      return t("Audio recording is not available in this browser/context");
+    case "busy":
+    default:
+      return t("Transcribing…");
+  }
+}
@@ -2,6 +2,11 @@ import { useCallback, useEffect, useRef, useState } from "react";
 import { notifications } from "@mantine/notifications";
 import { useTranslation } from "react-i18next";
 import { transcribeAudio } from "@/features/dictation/services/dictation-service";
+import {
+  classifyGetUserMediaError,
+  classifyTranscriptionError,
+  dictationErrorMessage,
+} from "@/features/dictation/dictation-status";

 // "loading" is set only by the streaming hook while it lazily loads the VAD
 // model on first use; the batch hook never sets it. It exists so the streaming
@@ -26,6 +31,8 @@ interface UseDictationResult {
  cancel: () => void;
  // Smoothed live microphone level in the 0..1 range while recording (0 when idle).
  audioLevel: number;
+  // The last error shown to the user (null until one occurs / on a new start).
+  errorMessage: string | null;
 }

 // Candidate container/codec combinations in preference order. The first one the
@@ -67,6 +74,8 @@ export function useDictation(
  const { t } = useTranslation();
  const [status, setStatus] = useState<DictationStatus>("idle");
  const [audioLevel, setAudioLevel] = useState(0);
+  // Last error message shown to the user; the mic button reads it for its tooltip.
+  const [errorMessage, setErrorMessage] = useState<string | null>(null);

  // Keep the latest callbacks in a ref so the recorder's onstop closure always
  // calls the current handlers without re-creating the recorder.
@@ -194,15 +203,16 @@ export function useDictation(
    if (startingRef.current || recorderRef.current || streamRef.current) return;
    if (status !== "idle") return;
    startingRef.current = true;
+    // Clear any stale error from a previous attempt.
+    setErrorMessage(null);

    if (!navigator.mediaDevices?.getUserMedia) {
      const reason =
        "navigator.mediaDevices.getUserMedia is unavailable in this context";
      console.error("[dictation] " + reason);
-      notifications.show({
-        color: "red",
-        message: t("Audio recording is not available in this browser/context"),
-      });
+      const message = dictationErrorMessage("no-media-devices", t);
+      notifications.show({ color: "red", message });
+      setErrorMessage(message);
      setStatus("idle");
      startingRef.current = false;
      return;
@@ -215,19 +225,16 @@ export function useDictation(
      // Always log the full error for diagnosis (name, message, stack).
      console.error("[dictation] getUserMedia failed", err);
      const name = (err as { name?: string })?.name;
-      const detail = (err as { message?: string })?.message ?? String(err);
-      let message: string;
-      if (name === "NotAllowedError" || name === "SecurityError") {
-        message = t("Microphone access denied");
-      } else if (name === "NotFoundError" || name === "OverconstrainedError") {
-        message = t("No microphone found");
-      } else if (name === "NotReadableError" || name === "AbortError") {
-        message = t("Microphone is unavailable or already in use");
-      } else {
-        // Unknown failure: show the real reason instead of a generic string.
-        message = `${t("Could not start recording")}: ${name ? `${name}: ` : ""}${detail}`;
-      }
+      const rawDetail = (err as { message?: string })?.message ?? String(err);
+      // Prefix the DOMException name (e.g. "TypeError: …") so the generic
+      // resolver branch reproduces this hook's original "Could not start
+      // recording: <name>: <detail>" text. Each caller owns its own detail; the
+      // streaming hook intentionally does not add the name.
+      const detail = `${name ? `${name}: ` : ""}${rawDetail}`;
+      const code = classifyGetUserMediaError(err);
+      const message = dictationErrorMessage(code, t, { detail });
      notifications.show({ color: "red", message });
+      setErrorMessage(message);
      setStatus("idle");
      startingRef.current = false;
      return;
@@ -249,10 +256,10 @@ export function useDictation(
      // The stream was acquired but the recorder failed to construct; stop the
      // tracks so the MediaStream does not leak before bailing out.
      stopTracks();
-      notifications.show({
-        color: "red",
-        message: `${t("Could not start recording")}: ${(err as { message?: string })?.message ?? String(err)}`,
-      });
+      const detail = (err as { message?: string })?.message ?? String(err);
+      const message = dictationErrorMessage("recorder-failed", t, { detail });
+      notifications.show({ color: "red", message });
+      setErrorMessage(message);
      setStatus("idle");
      startingRef.current = false;
      return;
@@ -293,21 +300,14 @@ export function useDictation(
        .catch((err: unknown) => {
          // Log the full error for diagnosis (status + body + stack).
          console.error("[dictation] transcription failed", err);
-          const resp = (
-            err as { response?: { status?: number; data?: { message?: string } } }
-          )?.response;
-          const serverMsg = resp?.data?.message;
-          let message: string;
-          if (serverMsg && serverMsg.trim().length > 0) {
-            // The server already explains the cause (e.g. provider 404, bad
-            // format, STT not configured) — show it verbatim.
-            message = serverMsg;
-          } else if (resp?.status === 503 || resp?.status === 403) {
-            message = t("Voice dictation is not configured");
-          } else {
-            message = `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
-          }
+          const { code, serverMessage } = classifyTranscriptionError(err);
+          const detail = (err as { message?: string })?.message ?? String(err);
+          const message = dictationErrorMessage(code, t, {
+            serverMessage,
+            detail,
+          });
          notifications.show({ color: "red", message });
+          setErrorMessage(message);
          setStatus("error");
          if (errorTimerRef.current !== null) {
            clearTimeout(errorTimerRef.current);
@@ -332,10 +332,10 @@ export function useDictation(
      stopTracks();
      recorderRef.current = null;
      startingRef.current = false;
-      notifications.show({
-        color: "red",
-        message: `${t("Could not start recording")}: ${(err as { message?: string })?.message ?? String(err)}`,
-      });
+      const detail = (err as { message?: string })?.message ?? String(err);
+      const message = dictationErrorMessage("recorder-failed", t, { detail });
+      notifications.show({ color: "red", message });
+      setErrorMessage(message);
      setStatus("idle");
      return;
    }
@@ -405,5 +405,5 @@ export function useDictation(
    };
  }, [clearTimer, stopTracks, stopMeter]);

-  return { status, start, stop, cancel, audioLevel };
+  return { status, start, stop, cancel, audioLevel, errorMessage };
 }
@@ -4,6 +4,11 @@ import { useTranslation } from "react-i18next";
 import { transcribeAudio } from "@/features/dictation/services/dictation-service";
 import { encodeWavPcm16 } from "@/features/dictation/utils/encode-wav";
 import type { DictationStatus } from "@/features/dictation/hooks/use-dictation";
+import {
+  classifyGetUserMediaError,
+  classifyTranscriptionError,
+  dictationErrorMessage,
+} from "@/features/dictation/dictation-status";

 // Lazily-imported MicVAD type. The runtime import happens inside start() so the
 // heavy onnxruntime-web / Silero model is code-split out of the main bundle and
@@ -27,6 +32,8 @@ interface UseStreamingDictationResult {
  cancel: () => void;
  // Smoothed live speech level in the 0..1 range while recording (0 when idle).
  audioLevel: number;
+  // The last error shown to the user (null until one occurs / on a new start).
+  errorMessage: string | null;
 }

 // Sample rate of the audio MicVAD hands to onSpeechEnd (Silero VAD runs at 16k).
@@ -60,6 +67,8 @@ export function useStreamingDictation(
  const { t } = useTranslation();
  const [status, setStatus] = useState<DictationStatus>("idle");
  const [audioLevel, setAudioLevel] = useState(0);
+  // Last error message shown to the user; the mic button reads it for its tooltip.
+  const [errorMessage, setErrorMessage] = useState<string | null>(null);

  // Keep the latest callbacks in a ref so async VAD/HTTP closures always call the
  // current handlers without re-creating the VAD.
@@ -158,26 +167,6 @@ export function useStreamingDictation(
    }
  }, []);

-  // Map a transcription error to a user-facing message, mirroring the batch hook.
-  const transcriptionErrorMessage = useCallback(
-    (err: unknown): string => {
-      const resp = (
-        err as { response?: { status?: number; data?: { message?: string } } }
-      )?.response;
-      const serverMsg = resp?.data?.message;
-      if (serverMsg && serverMsg.trim().length > 0) {
-        // The server already explains the cause (e.g. provider 404, bad format,
-        // STT not configured) — show it verbatim.
-        return serverMsg;
-      }
-      if (resp?.status === 503 || resp?.status === 403) {
-        return t("Voice dictation is not configured");
-      }
-      return `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
-    },
-    [t],
-  );
-
  // Handle one ended speech segment: encode to WAV and transcribe. Results are
  // buffered by seq and flushed in order. A single failed segment does NOT kill
  // the session: log + one notification, then advance past that seq so later
@@ -204,10 +193,14 @@ export function useStreamingDictation(
          if (epoch !== epochRef.current) return;
          // Log the full error for diagnosis (status + body + stack).
          console.error("[dictation] segment transcription failed", err);
-          notifications.show({
-            color: "red",
-            message: transcriptionErrorMessage(err),
+          const { code, serverMessage } = classifyTranscriptionError(err);
+          const detail = (err as { message?: string })?.message ?? String(err);
+          const message = dictationErrorMessage(code, t, {
+            serverMessage,
+            detail,
          });
+          notifications.show({ color: "red", message });
+          setErrorMessage(message);
          // Skip this seq so later segments can still flush in order.
          if (nextEmitSeqRef.current === seq) {
            nextEmitSeqRef.current += 1;
@@ -226,7 +219,7 @@ export function useStreamingDictation(
          }
        });
    },
-    [drainResults, transcriptionErrorMessage],
+    [drainResults, t],
  );

  const start = useCallback(async (): Promise<void> => {
@@ -236,6 +229,8 @@ export function useStreamingDictation(
    if (startingRef.current || vadRef.current || activeRef.current) return;
    if (status !== "idle") return;
    startingRef.current = true;
+    // Clear any stale error from a previous attempt.
+    setErrorMessage(null);

    // Notify the caller right when dictation begins (before any async work) so the
    // editor can snapshot the caret position.
@@ -354,10 +349,9 @@ export function useStreamingDictation(
      // actually runs.)
      console.error("[dictation] VAD init failed", err);
      const detail = (err as { message?: string })?.message ?? String(err);
-      notifications.show({
-        color: "red",
-        message: `${t("Could not start recording")}: ${detail}`,
-      });
+      const message = dictationErrorMessage("vad-init-failed", t, { detail });
+      notifications.show({ color: "red", message });
+      setErrorMessage(message);
      // Defensive: if MicVAD.new partially succeeded before throwing, make sure we
      // don't leak it.
      destroyVad();
@@ -379,19 +373,11 @@ export function useStreamingDictation(
    } catch (err) {
      // Always log the full error for diagnosis (name, message, stack).
      console.error("[dictation] VAD.start failed", err);
-      const name = (err as { name?: string })?.name;
      const detail = (err as { message?: string })?.message ?? String(err);
-      let message: string;
-      if (name === "NotAllowedError" || name === "SecurityError") {
-        message = t("Microphone access denied");
-      } else if (name === "NotFoundError" || name === "OverconstrainedError") {
-        message = t("No microphone found");
-      } else if (name === "NotReadableError" || name === "AbortError") {
-        message = t("Microphone is unavailable or already in use");
-      } else {
-        message = `${t("Could not start recording")}: ${detail}`;
-      }
+      const code = classifyGetUserMediaError(err);
+      const message = dictationErrorMessage(code, t, { detail });
      notifications.show({ color: "red", message });
+      setErrorMessage(message);
      activeRef.current = false;
      destroyVad();
      setStatus("idle");
@@ -470,5 +456,5 @@ export function useStreamingDictation(
    };
  }, [clearTimer, destroyVad]);

-  return { status, start, stop, cancel, audioLevel };
+  return { status, start, stop, cancel, audioLevel, errorMessage };
 }
@@ -1,6 +1,7 @@
 import { atom } from "jotai";
 import { Editor } from "@tiptap/core";
 import { PageEditMode } from "@/features/user/types/user.types.ts";
+import type { DictationUnavailableReason } from "@/features/dictation/dictation-status";

 export const pageEditorAtom = atom<Editor | null>(null);

@@ -15,3 +16,15 @@ export const showLinkMenuAtom = atom(false);
 // Current page's edit mode — initialized from the user's saved preference on
 // first load, can be toggled locally without persisting to the server.
 export const currentPageEditModeAtom = atom<PageEditMode>(PageEditMode.Edit);
+
+// Whether the dictation mic can start, and (when it can't) the cause-specific
+// reason the mic button surfaces as a tooltip. Published by the page editor,
+// consumed by DictationGroup -> MicButton.
+export type DictationAvailability = {
+  isEditable: boolean;
+  reason: DictationUnavailableReason | null;
+};
+export const dictationAvailabilityAtom = atom<DictationAvailability>({
+  isEditable: false,
+  reason: null,
+});
@@ -1,7 +1,8 @@
 import { FC, useRef } from "react";
-import { Editor, useEditorState } from "@tiptap/react";
+import { Editor } from "@tiptap/react";
 import { useAtomValue } from "jotai";
 import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
+import { dictationAvailabilityAtom } from "@/features/editor/atoms/editor-atoms.ts";
 import { MicButton } from "@/features/dictation/components/mic-button";

 interface Props {
@@ -16,20 +17,14 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
  const workspace = useAtomValue(workspaceAtom);
  const streamingDictation =
    workspace?.settings?.ai?.dictationStreaming === true;
+  // Cause-specific reason the mic is unavailable (published by the page editor).
+  const dictationAvailability = useAtomValue(dictationAvailabilityAtom);
  // Caret snapshot taken when dictation starts (where the first segment lands).
  const rangeRef = useRef<{ from: number; to: number } | null>(null);
  // Running insertion point: after each inserted segment we remember the caret
  // end so the NEXT segment appends right after it, contiguously, regardless of
  // where the user's caret currently is. Null until the first segment lands.
  const insertPosRef = useRef<number | null>(null);
-  // editor.isEditable is a mutable, non-reactive field — read it via
-  // useEditorState so the mic re-enables when the body flips to editable after
-  // collab sync (otherwise it stays stuck disabled). Mirrors the body's own
-  // reactive read.
-  const isEditable = useEditorState({
-    editor,
-    selector: (ctx) => ctx.editor?.isEditable ?? false,
-  });

  const handleStart = () => {
    const { from, to } = editor.state.selection;
@@ -88,7 +83,8 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
      streaming={streamingDictation}
      onStart={handleStart}
      onText={handleText}
-      disabled={!isEditable}
+      disabled={!editor.isEditable}
+      unavailableReason={dictationAvailability.reason ?? undefined}
      color={color}
      iconSize={iconSize}
    />
@@ -27,14 +27,16 @@ import {
  collabExtensions,
  mainExtensions,
 } from "@/features/editor/extensions/extensions";
-import { useAtom, useAtomValue } from "jotai";
+import { useAtom, useAtomValue, useSetAtom } from "jotai";
 import useCollaborationUrl from "@/features/editor/hooks/use-collaboration-url";
 import { currentUserAtom } from "@/features/user/atoms/current-user-atom";
 import {
  currentPageEditModeAtom,
+  dictationAvailabilityAtom,
  pageEditorAtom,
  yjsConnectionStatusAtom,
 } from "@/features/editor/atoms/editor-atoms";
+import type { DictationUnavailableReason } from "@/features/dictation/dictation-status";
 import { asideStateAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom";
 import {
  activeCommentIdAtom,
@@ -139,6 +141,7 @@ export default function PageEditor({
  const { pageSlug } = useParams();
  const slugId = extractPageSlugId(pageSlug);
  const currentPageEditMode = useAtomValue(currentPageEditModeAtom);
+  const setDictationAvailability = useSetAtom(dictationAvailabilityAtom);
  const canScroll = useCallback(
    () => Boolean(isComponentMounted.current && editorRef.current),
    [isComponentMounted],
@@ -488,6 +491,34 @@ export default function PageEditor({
    );
  }, [currentPageEditMode, editor, editable, showStatic]);

+  // Publish whether dictation can start and, if not, the cause-specific reason
+  // the mic button surfaces. Recomputed on the same signals that drive body
+  // editability so the tooltip never lies about the current state.
+  useEffect(() => {
+    const inEditMode = currentPageEditMode === PageEditMode.Edit;
+    const isEditable = editable && inEditMode && !showStatic; // mirrors editor.isEditable
+    let reason: DictationUnavailableReason | null = null;
+    if (!isEditable) {
+      if (editable && inEditMode && showStatic) {
+        // Permitted to edit and in edit mode, but the collab doc hasn't synced yet.
+        reason =
+          yjsConnectionStatus === WebSocketStatus.Disconnected
+            ? "offline"
+            : "connecting";
+      } else {
+        // No edit permission or not in edit mode.
+        reason = "read-only";
+      }
+    }
+    setDictationAvailability({ isEditable, reason });
+  }, [
+    editable,
+    currentPageEditMode,
+    showStatic,
+    yjsConnectionStatus,
+    setDictationAvailability,
+  ]);
+
  useEffect(() => {
    if (
      !hasConnectedOnceRef.current &&