diff --git a/apps/client/src/features/dictation/components/mic-button.tsx b/apps/client/src/features/dictation/components/mic-button.tsx
index 8c0974ae..70ead74e 100644
--- a/apps/client/src/features/dictation/components/mic-button.tsx
+++ b/apps/client/src/features/dictation/components/mic-button.tsx
@@ -75,15 +75,23 @@ export const MicButton: FC<MicButtonProps> = ({
     );
   }
 
-  if (status === "transcribing" || status === "error") {
+  if (
+    status === "loading" ||
+    status === "transcribing" ||
+    status === "error"
+  ) {
+    // "loading" (streaming hook fetching the VAD model on first use) shows the
+    // same spinner+disabled state so the first click is visibly acknowledged and
+    // a confusing second click can't fire while the model loads.
+    const label = status === "loading" ? t("Preparing…") : t("Transcribing…");
     return (
-      <Tooltip label={t("Transcribing…")} withArrow>
+      <Tooltip label={label} withArrow>
         <ActionIcon
           size={size}
           variant="subtle"
           color={color}
           disabled
-          aria-label={t("Transcribing…")}
+          aria-label={label}
         >
           <Loader size="xs" />
         </ActionIcon>
diff --git a/apps/client/src/features/dictation/hooks/use-dictation.ts b/apps/client/src/features/dictation/hooks/use-dictation.ts
index 0d32402f..4d8c451d 100644
--- a/apps/client/src/features/dictation/hooks/use-dictation.ts
+++ b/apps/client/src/features/dictation/hooks/use-dictation.ts
@@ -3,7 +3,15 @@ import { notifications } from "@mantine/notifications";
 import { useTranslation } from "react-i18next";
 import { transcribeAudio } from "@/features/dictation/services/dictation-service";
 
-export type DictationStatus = "idle" | "recording" | "transcribing" | "error";
+// "loading" is set only by the streaming hook while it lazily loads the VAD
+// model on first use; the batch hook never sets it. It exists so the streaming
+// hook and the mic button can show immediate feedback during that load.
+export type DictationStatus =
+  | "idle"
+  | "recording"
+  | "transcribing"
+  | "error"
+  | "loading";
 
 interface UseDictationOptions {
   onText: (text: string) => void;
diff --git a/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts
index 8128df91..b086747c 100644
--- a/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts
+++ b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts
@@ -67,6 +67,9 @@ export function useStreamingDictation(
   optionsRef.current = options;
 
   const vadRef = useRef<MicVADInstance | null>(null);
+  // AudioContext we create+resume inside the click gesture and inject into
+  // MicVAD (see start()). We own it; MicVAD does not close an injected context.
+  const audioContextRef = useRef<AudioContext | null>(null);
   const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
   const canceledRef = useRef(false);
   const startingRef = useRef(false);
@@ -250,6 +253,26 @@ export function useStreamingDictation(
     inFlightRef.current = 0;
     resetLevel();
 
+    // Create and resume the AudioContext NOW, inside the click gesture, before
+    // the (first-time-slow) model load below. A context first touched outside a
+    // user gesture stays "suspended" and the VAD audio worklet never runs — that
+    // is exactly why the first click did nothing and only the second (model
+    // already cached, so MicVAD.new was fast enough to create the context inside
+    // the gesture) started recording. We own this context and inject it into
+    // MicVAD (which then will NOT close it); it is reused across start/stop and
+    // closed only on unmount.
+    const AudioCtor =
+      window.AudioContext ||
+      (window as unknown as { webkitAudioContext?: typeof AudioContext })
+        .webkitAudioContext;
+    if (AudioCtor && !audioContextRef.current) {
+      audioContextRef.current = new AudioCtor();
+    }
+    // Resume within the gesture; swallow rejection (e.g. already running/closed).
+    void audioContextRef.current?.resume().catch(() => {});
+    // Show immediate feedback while the model loads (see Part B).
+    setStatus("loading");
+
     let vad: MicVADInstance;
     try {
       // Lazy import so the heavy onnx model/worklet are only fetched on first use
@@ -265,6 +288,12 @@ export function useStreamingDictation(
         // mic is opened only by the explicit vad.start() below, where the real
         // getUserMedia errors are caught and mapped.
         startOnLoad: false,
+        // Inject the AudioContext we created+resumed inside the click gesture so
+        // the VAD worklet runs on a "running" context. When provided, the library
+        // uses it and does NOT take ownership/close it.
+        ...(audioContextRef.current
+          ? { audioContext: audioContextRef.current }
+          : {}),
         // Only pass asset paths when defined; otherwise the library uses its
         // bundled CDN defaults.
         ...(VAD_BASE_ASSET_PATH !== undefined
@@ -430,6 +459,14 @@ export function useStreamingDictation(
       activeRef.current = false;
       canceledRef.current = true;
       destroyVad();
+      // Close the AudioContext we own (MicVAD never closes an injected one).
+      if (
+        audioContextRef.current &&
+        audioContextRef.current.state !== "closed"
+      ) {
+        void audioContextRef.current.close().catch(() => {});
+      }
+      audioContextRef.current = null;
     };
   }, [clearTimer, destroyVad]);