feat(dictation): streaming STT via silence cut (Silero VAD)
Add a lightweight "streaming" dictation mode as a simpler alternative to the realtime-websocket path: detect speech with Silero VAD (@ricky0123/vad-web), cut each segment on a pause and POST it to the existing /ai-chat/transcribe endpoint, so text appears progressively. No server changes. - new useStreamingDictation hook (same API as useDictation), lazy-loads VAD, in-order seq emission, session-epoch guard against stop->start races - new encodeWavPcm16 util (Float32 -> mono PCM16 WAV, accepted by the server) - MicButton gains a `streaming` prop; enabled in the editor toolbar and chat - VAD tuning: redemptionMs 640 / preSpeechPadMs 320 / minSpeechMs 96 - batch dictation kept as the fallback (streaming=false) - deps: @ricky0123/vad-web@0.0.30, onnxruntime-web@1.27.0 Note: VAD assets load from the library CDN by default; for self-hosted/offline set VAD_BASE_ASSET_PATH/VAD_ONNX_WASM_BASE_PATH and copy assets to public/vad/. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,7 @@
|
|||||||
"@mantine/modals": "8.3.18",
|
"@mantine/modals": "8.3.18",
|
||||||
"@mantine/notifications": "8.3.18",
|
"@mantine/notifications": "8.3.18",
|
||||||
"@mantine/spotlight": "8.3.18",
|
"@mantine/spotlight": "8.3.18",
|
||||||
|
"@ricky0123/vad-web": "^0.0.30",
|
||||||
"@slidoapp/emoji-mart": "5.8.7",
|
"@slidoapp/emoji-mart": "5.8.7",
|
||||||
"@slidoapp/emoji-mart-data": "1.2.4",
|
"@slidoapp/emoji-mart-data": "1.2.4",
|
||||||
"@slidoapp/emoji-mart-react": "1.1.5",
|
"@slidoapp/emoji-mart-react": "1.1.5",
|
||||||
@@ -53,6 +54,7 @@
|
|||||||
"mantine-form-zod-resolver": "1.3.0",
|
"mantine-form-zod-resolver": "1.3.0",
|
||||||
"mermaid": "11.15.0",
|
"mermaid": "11.15.0",
|
||||||
"mitt": "3.0.1",
|
"mitt": "3.0.1",
|
||||||
|
"onnxruntime-web": "^1.27.0",
|
||||||
"posthog-js": "1.372.2",
|
"posthog-js": "1.372.2",
|
||||||
"react": "18.3.1",
|
"react": "18.3.1",
|
||||||
"react-clear-modal": "^2.0.18",
|
"react-clear-modal": "^2.0.18",
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ export default function ChatInput({
|
|||||||
{isDictationEnabled && (
|
{isDictationEnabled && (
|
||||||
<MicButton
|
<MicButton
|
||||||
size="lg"
|
size="lg"
|
||||||
|
streaming
|
||||||
disabled={isStreaming || disabled}
|
disabled={isStreaming || disabled}
|
||||||
onText={(text) => setValue((v) => (v ? `${v} ${text}` : text))}
|
onText={(text) => setValue((v) => (v ? `${v} ${text}` : text))}
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ import { ActionIcon, Loader, Tooltip } from "@mantine/core";
|
|||||||
import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react";
|
import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import { useDictation } from "@/features/dictation/hooks/use-dictation";
|
import { useDictation } from "@/features/dictation/hooks/use-dictation";
|
||||||
|
import { useStreamingDictation } from "@/features/dictation/hooks/use-streaming-dictation";
|
||||||
import classes from "./mic-button.module.css";
|
import classes from "./mic-button.module.css";
|
||||||
|
|
||||||
interface MicButtonProps {
|
interface MicButtonProps {
|
||||||
@@ -17,6 +18,9 @@ interface MicButtonProps {
|
|||||||
color?: string;
|
color?: string;
|
||||||
// Optional explicit glyph size override; defaults to the size-token value.
|
// Optional explicit glyph size override; defaults to the size-token value.
|
||||||
iconSize?: number;
|
iconSize?: number;
|
||||||
|
// When true, use the streaming (Silero-VAD) dictation controller, which emits
|
||||||
|
// text progressively as the user pauses; otherwise use the batch controller.
|
||||||
|
streaming?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -32,9 +36,17 @@ export const MicButton: FC<MicButtonProps> = ({
|
|||||||
size = "lg",
|
size = "lg",
|
||||||
color,
|
color,
|
||||||
iconSize,
|
iconSize,
|
||||||
|
streaming = false,
|
||||||
}) => {
|
}) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const { status, start, stop, audioLevel } = useDictation({ onText, onStart });
|
// Call BOTH hooks unconditionally to respect the rules of hooks: which one is
|
||||||
|
// active is a render-time choice, but both must be invoked every render. This
|
||||||
|
// is safe because both controllers are inert until start() is called — neither
|
||||||
|
// opens the mic on mount — so the unused one costs nothing.
|
||||||
|
const batchCtl = useDictation({ onText, onStart });
|
||||||
|
const streamingCtl = useStreamingDictation({ onText, onStart });
|
||||||
|
const ctl = streaming ? streamingCtl : batchCtl;
|
||||||
|
const { status, start, stop, audioLevel } = ctl;
|
||||||
const resolvedIconSize = iconSize ?? (size === "lg" ? 18 : 16);
|
const resolvedIconSize = iconSize ?? (size === "lg" ? 18 : 16);
|
||||||
|
|
||||||
if (status === "recording") {
|
if (status === "recording") {
|
||||||
|
|||||||
@@ -0,0 +1,429 @@
|
|||||||
|
import { useCallback, useEffect, useRef, useState } from "react";
|
||||||
|
import { notifications } from "@mantine/notifications";
|
||||||
|
import { useTranslation } from "react-i18next";
|
||||||
|
import { transcribeAudio } from "@/features/dictation/services/dictation-service";
|
||||||
|
import { encodeWavPcm16 } from "@/features/dictation/utils/encode-wav";
|
||||||
|
import type { DictationStatus } from "@/features/dictation/hooks/use-dictation";
|
||||||
|
|
||||||
|
// Lazily-imported MicVAD type. The runtime import happens inside start() so the
|
||||||
|
// heavy onnxruntime-web / Silero model is code-split out of the main bundle and
|
||||||
|
// only fetched when the user actually begins dictation.
|
||||||
|
type MicVADInstance = {
|
||||||
|
start: () => Promise<void>;
|
||||||
|
pause: () => Promise<void>;
|
||||||
|
destroy: () => Promise<void>;
|
||||||
|
};
|
||||||
|
|
||||||
|
interface UseStreamingDictationOptions {
|
||||||
|
onText: (text: string) => void;
|
||||||
|
onStart?: () => void;
|
||||||
|
maxDurationMs?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface UseStreamingDictationResult {
|
||||||
|
status: DictationStatus;
|
||||||
|
start: () => Promise<void>;
|
||||||
|
stop: () => void;
|
||||||
|
cancel: () => void;
|
||||||
|
// Smoothed live speech level in the 0..1 range while recording (0 when idle).
|
||||||
|
audioLevel: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample rate of the audio MicVAD hands to onSpeechEnd (Silero VAD runs at 16k).
|
||||||
|
const VAD_SAMPLE_RATE = 16000;
|
||||||
|
|
||||||
|
// Asset paths for the VAD worklet and the onnxruntime WASM binaries. For this
|
||||||
|
// prototype they are left undefined so the library loads its bundled assets from
|
||||||
|
// its default CDN — this avoids fragile rolldown asset-copy config. For a
|
||||||
|
// self-hosted / offline / privacy build, copy the vad-web `dist` worklet + the
|
||||||
|
// `*.onnx` model and the onnxruntime-web `*.wasm` files into
|
||||||
|
// `apps/client/public/vad/` and set these to that local path (e.g. "/vad/").
|
||||||
|
const VAD_BASE_ASSET_PATH: string | undefined = undefined;
|
||||||
|
const VAD_ONNX_WASM_BASE_PATH: string | undefined = undefined;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Streaming variant of useDictation. Detects speech with a real (Silero) VAD and,
|
||||||
|
* each time the speaker pauses, cuts that speech segment and POSTs it to the same
|
||||||
|
* batch transcription endpoint, so text appears progressively as the user speaks.
|
||||||
|
*
|
||||||
|
* Returns the SAME shape as useDictation ({ status, start, stop, cancel,
|
||||||
|
* audioLevel }) so MicButton can use either interchangeably. Refs hold the live
|
||||||
|
* VAD instance / counters / timer so component re-renders never lose them, and
|
||||||
|
* every exit path destroys the VAD and stops the MediaStream.
|
||||||
|
*/
|
||||||
|
export function useStreamingDictation(
|
||||||
|
options: UseStreamingDictationOptions,
|
||||||
|
): UseStreamingDictationResult {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const [status, setStatus] = useState<DictationStatus>("idle");
|
||||||
|
const [audioLevel, setAudioLevel] = useState(0);
|
||||||
|
|
||||||
|
// Keep the latest callbacks in a ref so async VAD/HTTP closures always call the
|
||||||
|
// current handlers without re-creating the VAD.
|
||||||
|
const optionsRef = useRef(options);
|
||||||
|
optionsRef.current = options;
|
||||||
|
|
||||||
|
const vadRef = useRef<MicVADInstance | null>(null);
|
||||||
|
const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
|
||||||
|
const canceledRef = useRef(false);
|
||||||
|
const startingRef = useRef(false);
|
||||||
|
// True while a recording session is active (VAD listening). Used to ignore late
|
||||||
|
// VAD callbacks that fire after stop()/cancel().
|
||||||
|
const activeRef = useRef(false);
|
||||||
|
|
||||||
|
// In-order emission: each segment gets a monotonically increasing seq when its
|
||||||
|
// speech ends; completed transcriptions are buffered by seq and flushed in
|
||||||
|
// order so out-of-order HTTP responses can't scramble the text.
|
||||||
|
const nextSeqRef = useRef(0);
|
||||||
|
const nextEmitSeqRef = useRef(0);
|
||||||
|
const resultsRef = useRef<Map<number, string>>(new Map());
|
||||||
|
// Number of transcription requests still in flight.
|
||||||
|
const inFlightRef = useRef(0);
|
||||||
|
// Session epoch: bumped when a NEW session starts (start) or everything is
|
||||||
|
// hard-discarded (cancel). Each in-flight request captures the epoch at send
|
||||||
|
// time; if the epoch has since changed, the request is stale and its
|
||||||
|
// then/catch/finally are skipped so old text can't leak into a new session and
|
||||||
|
// the in-flight counter can't be driven negative across sessions.
|
||||||
|
const epochRef = useRef(0);
|
||||||
|
|
||||||
|
// Exponentially smoothed speech level, and the last value pushed to React state.
|
||||||
|
const smoothedLevelRef = useRef(0);
|
||||||
|
const emittedLevelRef = useRef(0);
|
||||||
|
|
||||||
|
const clearTimer = useCallback(() => {
|
||||||
|
if (timerRef.current !== null) {
|
||||||
|
clearTimeout(timerRef.current);
|
||||||
|
timerRef.current = null;
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Reset the level meter back to zero (refs + React state).
|
||||||
|
const resetLevel = useCallback(() => {
|
||||||
|
smoothedLevelRef.current = 0;
|
||||||
|
emittedLevelRef.current = 0;
|
||||||
|
setAudioLevel(0);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Destroy the live VAD instance (which also releases the mic stream and audio
|
||||||
|
// context it created). Safe to call multiple times and on any exit path;
|
||||||
|
// defensive try/catch so teardown never throws.
|
||||||
|
const destroyVad = useCallback(() => {
|
||||||
|
const vad = vadRef.current;
|
||||||
|
vadRef.current = null;
|
||||||
|
if (vad) {
|
||||||
|
try {
|
||||||
|
// destroy() pauses + tears down the worklet/stream/context internally.
|
||||||
|
// It returns a promise, so attach a .catch too: the surrounding
|
||||||
|
// try/catch only catches synchronous throws, and a rejected destroy()
|
||||||
|
// would otherwise surface as an unhandled rejection.
|
||||||
|
void vad
|
||||||
|
.destroy()
|
||||||
|
.catch((err) =>
|
||||||
|
console.warn("[dictation] VAD teardown failed", err),
|
||||||
|
);
|
||||||
|
} catch (err) {
|
||||||
|
// Cleanup must never throw; just log for diagnosis.
|
||||||
|
console.warn("[dictation] VAD teardown failed", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Decide the status once recording has ended: stay "transcribing" while
|
||||||
|
// requests are in flight, otherwise return to "idle".
|
||||||
|
const settleAfterStop = useCallback(() => {
|
||||||
|
if (inFlightRef.current > 0) {
|
||||||
|
setStatus("transcribing");
|
||||||
|
} else {
|
||||||
|
setStatus("idle");
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Drain the in-order result buffer: while the next expected seq is ready, trim
|
||||||
|
// it, emit it if non-empty, and advance. Called after every resolved request.
|
||||||
|
const drainResults = useCallback(() => {
|
||||||
|
const results = resultsRef.current;
|
||||||
|
while (results.has(nextEmitSeqRef.current)) {
|
||||||
|
const text = results.get(nextEmitSeqRef.current)!;
|
||||||
|
results.delete(nextEmitSeqRef.current);
|
||||||
|
nextEmitSeqRef.current += 1;
|
||||||
|
const trimmed = text.trim();
|
||||||
|
// Whisper often returns a leading space; emit the trimmed value.
|
||||||
|
if (trimmed.length > 0) optionsRef.current.onText(trimmed);
|
||||||
|
}
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
// Map a transcription error to a user-facing message, mirroring the batch hook.
|
||||||
|
const transcriptionErrorMessage = useCallback(
|
||||||
|
(err: unknown): string => {
|
||||||
|
const resp = (
|
||||||
|
err as { response?: { status?: number; data?: { message?: string } } }
|
||||||
|
)?.response;
|
||||||
|
const serverMsg = resp?.data?.message;
|
||||||
|
if (serverMsg && serverMsg.trim().length > 0) {
|
||||||
|
// The server already explains the cause (e.g. provider 404, bad format,
|
||||||
|
// STT not configured) — show it verbatim.
|
||||||
|
return serverMsg;
|
||||||
|
}
|
||||||
|
if (resp?.status === 503 || resp?.status === 403) {
|
||||||
|
return t("Voice dictation is not configured");
|
||||||
|
}
|
||||||
|
return `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`;
|
||||||
|
},
|
||||||
|
[t],
|
||||||
|
);
|
||||||
|
|
||||||
|
// Handle one ended speech segment: encode to WAV and transcribe. Results are
|
||||||
|
// buffered by seq and flushed in order. A single failed segment does NOT kill
|
||||||
|
// the session: log + one notification, then advance past that seq so later
|
||||||
|
// segments still flush.
|
||||||
|
const handleSegment = useCallback(
|
||||||
|
(audio: Float32Array) => {
|
||||||
|
const seq = nextSeqRef.current;
|
||||||
|
nextSeqRef.current += 1;
|
||||||
|
inFlightRef.current += 1;
|
||||||
|
// Capture the epoch for this request synchronously at send time.
|
||||||
|
const epoch = epochRef.current;
|
||||||
|
|
||||||
|
const wavBlob = encodeWavPcm16(audio, VAD_SAMPLE_RATE);
|
||||||
|
void transcribeAudio(wavBlob, "speech.wav")
|
||||||
|
.then((text) => {
|
||||||
|
// Stale request from a previous session: drop it without touching any
|
||||||
|
// current-session state.
|
||||||
|
if (epoch !== epochRef.current) return;
|
||||||
|
// Defend against a non-string server value before drainResults trims.
|
||||||
|
resultsRef.current.set(seq, typeof text === "string" ? text : "");
|
||||||
|
drainResults();
|
||||||
|
})
|
||||||
|
.catch((err: unknown) => {
|
||||||
|
if (epoch !== epochRef.current) return;
|
||||||
|
// Log the full error for diagnosis (status + body + stack).
|
||||||
|
console.error("[dictation] segment transcription failed", err);
|
||||||
|
notifications.show({
|
||||||
|
color: "red",
|
||||||
|
message: transcriptionErrorMessage(err),
|
||||||
|
});
|
||||||
|
// Skip this seq so later segments can still flush in order.
|
||||||
|
if (nextEmitSeqRef.current === seq) {
|
||||||
|
nextEmitSeqRef.current += 1;
|
||||||
|
drainResults();
|
||||||
|
} else {
|
||||||
|
resultsRef.current.set(seq, "");
|
||||||
|
drainResults();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.finally(() => {
|
||||||
|
if (epoch !== epochRef.current) return;
|
||||||
|
inFlightRef.current -= 1;
|
||||||
|
// If recording already stopped, flip to idle once everything drained.
|
||||||
|
if (!activeRef.current && inFlightRef.current === 0) {
|
||||||
|
setStatus("idle");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
[drainResults, transcriptionErrorMessage],
|
||||||
|
);
|
||||||
|
|
||||||
|
const start = useCallback(async (): Promise<void> => {
|
||||||
|
// Synchronous live guard: status is stale between renders, so also block on
|
||||||
|
// refs to prevent a double-click from creating two VAD instances (the first
|
||||||
|
// would leak its mic stream).
|
||||||
|
if (startingRef.current || vadRef.current || activeRef.current) return;
|
||||||
|
if (status !== "idle") return;
|
||||||
|
startingRef.current = true;
|
||||||
|
|
||||||
|
// Notify the caller right when dictation begins (before any async work) so the
|
||||||
|
// editor can snapshot the caret position.
|
||||||
|
optionsRef.current.onStart?.();
|
||||||
|
|
||||||
|
// Reset per-session in-order emission state. Bump the epoch so any request
|
||||||
|
// still in flight from a previous (stopped) session becomes stale and its
|
||||||
|
// then/catch/finally are skipped — it can neither emit old text into this
|
||||||
|
// new session nor decrement this session's freshly-zeroed in-flight counter.
|
||||||
|
epochRef.current += 1;
|
||||||
|
canceledRef.current = false;
|
||||||
|
nextSeqRef.current = 0;
|
||||||
|
nextEmitSeqRef.current = 0;
|
||||||
|
resultsRef.current = new Map();
|
||||||
|
inFlightRef.current = 0;
|
||||||
|
resetLevel();
|
||||||
|
|
||||||
|
let vad: MicVADInstance;
|
||||||
|
try {
|
||||||
|
// Lazy import so the heavy onnx model/worklet are only fetched on first use
|
||||||
|
// and code-split out of the main bundle.
|
||||||
|
const { MicVAD } = await import("@ricky0123/vad-web");
|
||||||
|
|
||||||
|
vad = await MicVAD.new({
|
||||||
|
// Silero v5 model (smaller/faster than the legacy model).
|
||||||
|
model: "v5",
|
||||||
|
// vad-web 0.0.30 defaults startOnLoad:true, which opens the mic (calls
|
||||||
|
// getUserMedia) inside new() and leaves the later vad.start() a no-op —
|
||||||
|
// making its mic-permission error handling dead code. Force it off so the
|
||||||
|
// mic is opened only by the explicit vad.start() below, where the real
|
||||||
|
// getUserMedia errors are caught and mapped.
|
||||||
|
startOnLoad: false,
|
||||||
|
// Only pass asset paths when defined; otherwise the library uses its
|
||||||
|
// bundled CDN defaults.
|
||||||
|
...(VAD_BASE_ASSET_PATH !== undefined
|
||||||
|
? { baseAssetPath: VAD_BASE_ASSET_PATH }
|
||||||
|
: {}),
|
||||||
|
...(VAD_ONNX_WASM_BASE_PATH !== undefined
|
||||||
|
? { onnxWASMBasePath: VAD_ONNX_WASM_BASE_PATH }
|
||||||
|
: {}),
|
||||||
|
// --- VAD tuning (all tunable) ---
|
||||||
|
// Probability over which a frame counts as speech.
|
||||||
|
positiveSpeechThreshold: 0.5,
|
||||||
|
// Probability under which a frame counts as non-speech (~0.15 below the
|
||||||
|
// positive threshold, per Silero guidance).
|
||||||
|
negativeSpeechThreshold: 0.35,
|
||||||
|
// Silence to wait through before ending a segment (the "don't cut
|
||||||
|
// immediately" delay) — ~0.6s. NOTE: vad-web 0.0.30 takes this in ms, not
|
||||||
|
// frames (one Silero frame is ~32ms at 16k).
|
||||||
|
redemptionMs: 640,
|
||||||
|
// Audio kept before speech start (left padding so the first word isn't
|
||||||
|
// clipped) — ~0.3s.
|
||||||
|
preSpeechPadMs: 320,
|
||||||
|
// Ignore sub-100ms blips like clicks.
|
||||||
|
minSpeechMs: 96,
|
||||||
|
onFrameProcessed: (probabilities: { isSpeech: number }) => {
|
||||||
|
// Drive the level meter from the speech probability. Light exponential
|
||||||
|
// smoothing + a throttle so React state isn't updated every frame; this
|
||||||
|
// powers the existing button halo. Reuses the VAD's own frame
|
||||||
|
// probabilities — no second AudioContext/AnalyserNode.
|
||||||
|
if (!activeRef.current) return;
|
||||||
|
const level = Math.min(1, Math.max(0, probabilities.isSpeech));
|
||||||
|
smoothedLevelRef.current = smoothedLevelRef.current * 0.8 + level * 0.2;
|
||||||
|
if (Math.abs(smoothedLevelRef.current - emittedLevelRef.current) > 0.01) {
|
||||||
|
emittedLevelRef.current = smoothedLevelRef.current;
|
||||||
|
setAudioLevel(smoothedLevelRef.current);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
onSpeechStart: () => {
|
||||||
|
// No-op: the segment is only handled once it ends.
|
||||||
|
},
|
||||||
|
onSpeechEnd: (audio: Float32Array) => {
|
||||||
|
// A pause was detected — cut this segment and transcribe it. Ignore late
|
||||||
|
// callbacks that fire after stop()/cancel().
|
||||||
|
if (!activeRef.current || canceledRef.current) return;
|
||||||
|
handleSegment(audio);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
// With startOnLoad:false, new() loads the model/worklet/wasm but does NOT
|
||||||
|
// open the mic, so a throw here is an asset/init failure (model fetch,
|
||||||
|
// worklet, onnxruntime wasm), not a mic-permission error. Map it as a
|
||||||
|
// generic "could not start" with the underlying detail. (The mic-permission
|
||||||
|
// name checks are kept in the vad.start() catch below, where getUserMedia
|
||||||
|
// actually runs.)
|
||||||
|
console.error("[dictation] VAD init failed", err);
|
||||||
|
const detail = (err as { message?: string })?.message ?? String(err);
|
||||||
|
notifications.show({
|
||||||
|
color: "red",
|
||||||
|
message: `${t("Could not start recording")}: ${detail}`,
|
||||||
|
});
|
||||||
|
// Defensive: if MicVAD.new partially succeeded before throwing, make sure we
|
||||||
|
// don't leak it.
|
||||||
|
destroyVad();
|
||||||
|
setStatus("idle");
|
||||||
|
startingRef.current = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vadRef.current = vad;
|
||||||
|
// Accept frames once start() resolves; the VAD callbacks already guard on
|
||||||
|
// activeRef, so setting it before start() is safe.
|
||||||
|
activeRef.current = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// With startOnLoad:false this is where getUserMedia actually runs, so map
|
||||||
|
// mic-permission errors here the same way the batch hook does; otherwise
|
||||||
|
// fall back to a generic "could not start" message.
|
||||||
|
await vad.start();
|
||||||
|
} catch (err) {
|
||||||
|
// Always log the full error for diagnosis (name, message, stack).
|
||||||
|
console.error("[dictation] VAD.start failed", err);
|
||||||
|
const name = (err as { name?: string })?.name;
|
||||||
|
const detail = (err as { message?: string })?.message ?? String(err);
|
||||||
|
let message: string;
|
||||||
|
if (name === "NotAllowedError" || name === "SecurityError") {
|
||||||
|
message = t("Microphone access denied");
|
||||||
|
} else if (name === "NotFoundError" || name === "OverconstrainedError") {
|
||||||
|
message = t("No microphone found");
|
||||||
|
} else if (name === "NotReadableError" || name === "AbortError") {
|
||||||
|
message = t("Microphone is unavailable or already in use");
|
||||||
|
} else {
|
||||||
|
message = `${t("Could not start recording")}: ${detail}`;
|
||||||
|
}
|
||||||
|
notifications.show({ color: "red", message });
|
||||||
|
activeRef.current = false;
|
||||||
|
destroyVad();
|
||||||
|
setStatus("idle");
|
||||||
|
startingRef.current = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setStatus("recording");
|
||||||
|
// Recording has truly begun; release the synchronous start guard.
|
||||||
|
startingRef.current = false;
|
||||||
|
|
||||||
|
// Optional overall safety cap: auto-stop after maxDurationMs like the batch
|
||||||
|
// hook does.
|
||||||
|
const maxDurationMs = optionsRef.current.maxDurationMs ?? 120000;
|
||||||
|
timerRef.current = setTimeout(() => {
|
||||||
|
if (activeRef.current) stopRef.current();
|
||||||
|
}, maxDurationMs);
|
||||||
|
}, [status, t, resetLevel, destroyVad, handleSegment]);
|
||||||
|
|
||||||
|
const stop = useCallback((): void => {
|
||||||
|
clearTimer();
|
||||||
|
if (!activeRef.current && !vadRef.current) {
|
||||||
|
// Nothing is running; make sure the UI is idle.
|
||||||
|
setStatus("idle");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Mark inactive first so late onSpeechEnd/onFrameProcessed callbacks are
|
||||||
|
// ignored. Any speech segment that has NOT yet ended (user clicks Stop
|
||||||
|
// mid-utterance) is dropped — acceptable for v1; users normally pause before
|
||||||
|
// stopping.
|
||||||
|
activeRef.current = false;
|
||||||
|
destroyVad();
|
||||||
|
resetLevel();
|
||||||
|
settleAfterStop();
|
||||||
|
}, [clearTimer, destroyVad, resetLevel, settleAfterStop]);
|
||||||
|
|
||||||
|
// Keep stop() reachable from the maxDuration timer closure (which is created
|
||||||
|
// before stop is defined) without re-creating the VAD.
|
||||||
|
const stopRef = useRef(stop);
|
||||||
|
stopRef.current = stop;
|
||||||
|
|
||||||
|
const cancel = useCallback((): void => {
|
||||||
|
clearTimer();
|
||||||
|
canceledRef.current = true;
|
||||||
|
activeRef.current = false;
|
||||||
|
// Hard discard: bump the epoch so any in-flight request becomes stale and is
|
||||||
|
// ignored the moment it resolves (no emit, no counter touch).
|
||||||
|
epochRef.current += 1;
|
||||||
|
// Drop pending results / queue; in-flight requests will resolve into a now-
|
||||||
|
// empty buffer and be ignored.
|
||||||
|
resultsRef.current = new Map();
|
||||||
|
nextSeqRef.current = 0;
|
||||||
|
nextEmitSeqRef.current = 0;
|
||||||
|
inFlightRef.current = 0;
|
||||||
|
destroyVad();
|
||||||
|
resetLevel();
|
||||||
|
setStatus("idle");
|
||||||
|
}, [clearTimer, destroyVad, resetLevel]);
|
||||||
|
|
||||||
|
// Clean up on unmount: destroy the VAD, stop the mic stream, clear the timer.
|
||||||
|
// Defensive try/catch lives inside destroyVad so teardown never throws.
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
clearTimer();
|
||||||
|
activeRef.current = false;
|
||||||
|
canceledRef.current = true;
|
||||||
|
destroyVad();
|
||||||
|
};
|
||||||
|
}, [clearTimer, destroyVad]);
|
||||||
|
|
||||||
|
return { status, start, stop, cancel, audioLevel };
|
||||||
|
}
|
||||||
32
apps/client/src/features/dictation/utils/encode-wav.ts
Normal file
32
apps/client/src/features/dictation/utils/encode-wav.ts
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
// Encode mono Float32 PCM samples into a 16-bit PCM WAV blob (audio/wav).
|
||||||
|
// The server STT endpoint whitelists audio/wav, so this is sent as-is.
|
||||||
|
export function encodeWavPcm16(samples: Float32Array, sampleRate = 16000): Blob {
|
||||||
|
const bytesPerSample = 2;
|
||||||
|
const blockAlign = bytesPerSample; // mono
|
||||||
|
const dataSize = samples.length * bytesPerSample;
|
||||||
|
const buffer = new ArrayBuffer(44 + dataSize);
|
||||||
|
const view = new DataView(buffer);
|
||||||
|
const writeStr = (offset: number, s: string) => {
|
||||||
|
for (let i = 0; i < s.length; i++) view.setUint8(offset + i, s.charCodeAt(i));
|
||||||
|
};
|
||||||
|
writeStr(0, "RIFF");
|
||||||
|
view.setUint32(4, 36 + dataSize, true);
|
||||||
|
writeStr(8, "WAVE");
|
||||||
|
writeStr(12, "fmt ");
|
||||||
|
view.setUint32(16, 16, true); // PCM fmt chunk size
|
||||||
|
view.setUint16(20, 1, true); // audio format = PCM
|
||||||
|
view.setUint16(22, 1, true); // channels = mono
|
||||||
|
view.setUint32(24, sampleRate, true);
|
||||||
|
view.setUint32(28, sampleRate * blockAlign, true); // byte rate
|
||||||
|
view.setUint16(32, blockAlign, true);
|
||||||
|
view.setUint16(34, 16, true); // bits per sample
|
||||||
|
writeStr(36, "data");
|
||||||
|
view.setUint32(40, dataSize, true);
|
||||||
|
let offset = 44;
|
||||||
|
for (let i = 0; i < samples.length; i++) {
|
||||||
|
const clamped = Math.max(-1, Math.min(1, samples[i]));
|
||||||
|
view.setInt16(offset, clamped < 0 ? clamped * 0x8000 : clamped * 0x7fff, true);
|
||||||
|
offset += 2;
|
||||||
|
}
|
||||||
|
return new Blob([buffer], { type: "audio/wav" });
|
||||||
|
}
|
||||||
@@ -9,42 +9,57 @@ interface Props {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
|
export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
|
||||||
|
// Caret snapshot taken when dictation starts (where the first segment lands).
|
||||||
const rangeRef = useRef<{ from: number; to: number } | null>(null);
|
const rangeRef = useRef<{ from: number; to: number } | null>(null);
|
||||||
|
// Running insertion point: after each inserted segment we remember the caret
|
||||||
|
// end so the NEXT segment appends right after it, contiguously, regardless of
|
||||||
|
// where the user's caret currently is. Null until the first segment lands.
|
||||||
|
const insertPosRef = useRef<number | null>(null);
|
||||||
|
|
||||||
const handleStart = () => {
|
const handleStart = () => {
|
||||||
const { from, to } = editor.state.selection;
|
const { from, to } = editor.state.selection;
|
||||||
rangeRef.current = { from, to };
|
rangeRef.current = { from, to };
|
||||||
|
// New session: forget any insertion point from a previous dictation so the
|
||||||
|
// first segment uses the fresh snapshot above.
|
||||||
|
insertPosRef.current = null;
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleText = (text: string) => {
|
const handleText = (text: string) => {
|
||||||
// The editor may be gone by the time async transcription returns; bail out
|
// The editor may be gone by the time async transcription returns; bail out
|
||||||
// instead of operating on a destroyed instance.
|
// instead of operating on a destroyed instance.
|
||||||
if (!editor || editor.isDestroyed) return;
|
if (!editor || editor.isDestroyed) return;
|
||||||
const snapshot = rangeRef.current;
|
|
||||||
rangeRef.current = null;
|
|
||||||
// The document may have shrunk during transcription (e.g. a collaborative
|
// The document may have shrunk during transcription (e.g. a collaborative
|
||||||
// edit), so clamp the snapshot into the current bounds before inserting.
|
// edit), so clamp any position into the current bounds before inserting.
|
||||||
const docSize = editor.state.doc.content.size;
|
const docSize = editor.state.doc.content.size;
|
||||||
const clamp = (p: number) => Math.max(0, Math.min(p, docSize));
|
const clamp = (p: number) => Math.max(0, Math.min(p, docSize));
|
||||||
|
// First segment lands at the snapshotted caret range; subsequent segments
|
||||||
|
// land at a zero-length range at the running insertion point so they stay
|
||||||
|
// contiguous even if the user clicked elsewhere mid-dictation.
|
||||||
|
const snapshot = rangeRef.current;
|
||||||
|
const range =
|
||||||
|
insertPosRef.current !== null
|
||||||
|
? { from: clamp(insertPosRef.current), to: clamp(insertPosRef.current) }
|
||||||
|
: snapshot
|
||||||
|
? { from: clamp(snapshot.from), to: clamp(snapshot.to) }
|
||||||
|
: null;
|
||||||
try {
|
try {
|
||||||
if (snapshot) {
|
if (range) {
|
||||||
// Insert at the snapshotted caret; a trailing space keeps words
|
// Insert at the resolved range; a trailing space keeps words separated
|
||||||
// separated (the hook already trims the transcribed text).
|
// (the hook already trims the transcribed text).
|
||||||
editor
|
editor.chain().focus().insertContentAt(range, `${text} `).run();
|
||||||
.chain()
|
|
||||||
.focus()
|
|
||||||
.insertContentAt(
|
|
||||||
{ from: clamp(snapshot.from), to: clamp(snapshot.to) },
|
|
||||||
`${text} `,
|
|
||||||
)
|
|
||||||
.run();
|
|
||||||
} else {
|
} else {
|
||||||
|
// No snapshot and no running point (shouldn't happen normally) — fall
|
||||||
|
// back to the current caret.
|
||||||
editor.chain().focus().insertContent(`${text} `).run();
|
editor.chain().focus().insertContent(`${text} `).run();
|
||||||
}
|
}
|
||||||
|
// Remember where the inserted text ends so the next segment appends right
|
||||||
|
// after it, independent of later user caret moves.
|
||||||
|
insertPosRef.current = editor.state.selection.to;
|
||||||
} catch {
|
} catch {
|
||||||
// The snapshot drifted out of range; fall back to the current caret.
|
// The range drifted out of bounds; fall back to the current caret.
|
||||||
try {
|
try {
|
||||||
editor.chain().focus().insertContent(`${text} `).run();
|
editor.chain().focus().insertContent(`${text} `).run();
|
||||||
|
insertPosRef.current = editor.state.selection.to;
|
||||||
} catch {
|
} catch {
|
||||||
// The editor may have been destroyed; ignore so a dead editor can't
|
// The editor may have been destroyed; ignore so a dead editor can't
|
||||||
// surface an uncaught error.
|
// surface an uncaught error.
|
||||||
@@ -55,6 +70,7 @@ export const DictationGroup: FC<Props> = ({ editor, color, iconSize }) => {
|
|||||||
return (
|
return (
|
||||||
<MicButton
|
<MicButton
|
||||||
size="md"
|
size="md"
|
||||||
|
streaming
|
||||||
onStart={handleStart}
|
onStart={handleStart}
|
||||||
onText={handleText}
|
onText={handleText}
|
||||||
disabled={!editor.isEditable}
|
disabled={!editor.isEditable}
|
||||||
|
|||||||
47
pnpm-lock.yaml
generated
47
pnpm-lock.yaml
generated
@@ -299,6 +299,9 @@ importers:
|
|||||||
'@mantine/spotlight':
|
'@mantine/spotlight':
|
||||||
specifier: 8.3.18
|
specifier: 8.3.18
|
||||||
version: 8.3.18(@mantine/core@8.3.18(@mantine/hooks@8.3.18(react@18.3.1))(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(@mantine/hooks@8.3.18(react@18.3.1))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
|
version: 8.3.18(@mantine/core@8.3.18(@mantine/hooks@8.3.18(react@18.3.1))(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(@mantine/hooks@8.3.18(react@18.3.1))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
|
||||||
|
'@ricky0123/vad-web':
|
||||||
|
specifier: ^0.0.30
|
||||||
|
version: 0.0.30
|
||||||
'@slidoapp/emoji-mart':
|
'@slidoapp/emoji-mart':
|
||||||
specifier: 5.8.7
|
specifier: 5.8.7
|
||||||
version: 5.8.7
|
version: 5.8.7
|
||||||
@@ -374,6 +377,9 @@ importers:
|
|||||||
mitt:
|
mitt:
|
||||||
specifier: 3.0.1
|
specifier: 3.0.1
|
||||||
version: 3.0.1
|
version: 3.0.1
|
||||||
|
onnxruntime-web:
|
||||||
|
specifier: ^1.27.0
|
||||||
|
version: 1.27.0
|
||||||
posthog-js:
|
posthog-js:
|
||||||
specifier: 1.372.2
|
specifier: 1.372.2
|
||||||
version: 1.372.2
|
version: 1.372.2
|
||||||
@@ -4205,6 +4211,9 @@ packages:
|
|||||||
'@remirror/core-constants@3.0.0':
|
'@remirror/core-constants@3.0.0':
|
||||||
resolution: {integrity: sha512-42aWfPrimMfDKDi4YegyS7x+/0tlzaqwPQCULLanv3DMIlu96KTJR0fM5isWX2UViOqlGnX6YFgqWepcX+XMNg==}
|
resolution: {integrity: sha512-42aWfPrimMfDKDi4YegyS7x+/0tlzaqwPQCULLanv3DMIlu96KTJR0fM5isWX2UViOqlGnX6YFgqWepcX+XMNg==}
|
||||||
|
|
||||||
|
'@ricky0123/vad-web@0.0.30':
|
||||||
|
resolution: {integrity: sha512-cJyYrh4YeeUBJcbR9Bic/bFDyB9qBkAepvpuWM3vLxnAi7bC3VHzf51UeNdT+OtY4D7MLAgV8iJMc4z41ZnaWg==}
|
||||||
|
|
||||||
'@rolldown/binding-android-arm64@1.0.0-rc.12':
|
'@rolldown/binding-android-arm64@1.0.0-rc.12':
|
||||||
resolution: {integrity: sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA==}
|
resolution: {integrity: sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA==}
|
||||||
engines: {node: ^20.19.0 || >=22.12.0}
|
engines: {node: ^20.19.0 || >=22.12.0}
|
||||||
@@ -5253,6 +5262,7 @@ packages:
|
|||||||
|
|
||||||
'@ungap/structured-clone@1.3.0':
|
'@ungap/structured-clone@1.3.0':
|
||||||
resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==}
|
resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==}
|
||||||
|
deprecated: Potential CWE-502 - Update to 1.3.1 or higher
|
||||||
|
|
||||||
'@unrs/resolver-binding-android-arm-eabi@1.11.1':
|
'@unrs/resolver-binding-android-arm-eabi@1.11.1':
|
||||||
resolution: {integrity: sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw==}
|
resolution: {integrity: sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw==}
|
||||||
@@ -7026,6 +7036,9 @@ packages:
|
|||||||
resolution: {integrity: sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==}
|
resolution: {integrity: sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==}
|
||||||
hasBin: true
|
hasBin: true
|
||||||
|
|
||||||
|
flatbuffers@25.9.23:
|
||||||
|
resolution: {integrity: sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==}
|
||||||
|
|
||||||
flatted@3.4.2:
|
flatted@3.4.2:
|
||||||
resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==}
|
resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==}
|
||||||
|
|
||||||
@@ -7188,6 +7201,9 @@ packages:
|
|||||||
graceful-fs@4.2.11:
|
graceful-fs@4.2.11:
|
||||||
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
|
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
|
||||||
|
|
||||||
|
guid-typescript@1.0.9:
|
||||||
|
resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==}
|
||||||
|
|
||||||
hachure-fill@0.5.2:
|
hachure-fill@0.5.2:
|
||||||
resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==}
|
resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==}
|
||||||
|
|
||||||
@@ -8623,6 +8639,12 @@ packages:
|
|||||||
resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
|
resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
|
||||||
engines: {node: '>=6'}
|
engines: {node: '>=6'}
|
||||||
|
|
||||||
|
onnxruntime-common@1.27.0:
|
||||||
|
resolution: {integrity: sha512-3KxL5wIVqa8Ex08jxSzncm9CMgw8CjOFyOQ7SxvG9o0cVLlhTNKXyIQuTbtX4tGPJEf73OER2xrjt4HJSBL4ow==}
|
||||||
|
|
||||||
|
onnxruntime-web@1.27.0:
|
||||||
|
resolution: {integrity: sha512-ogDLsqIozHZwifPuN37OproAo0byX6t43/bP8GzeZWBWD6MOGExswFAx3up4NS/vvWBOg2u2PXomDt3rMmdQSg==}
|
||||||
|
|
||||||
open@8.4.2:
|
open@8.4.2:
|
||||||
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
|
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
|
||||||
engines: {node: '>=12'}
|
engines: {node: '>=12'}
|
||||||
@@ -8912,6 +8934,9 @@ packages:
|
|||||||
pkg-types@1.3.1:
|
pkg-types@1.3.1:
|
||||||
resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==}
|
resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==}
|
||||||
|
|
||||||
|
platform@1.3.6:
|
||||||
|
resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==}
|
||||||
|
|
||||||
pluralize@8.0.0:
|
pluralize@8.0.0:
|
||||||
resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==}
|
resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==}
|
||||||
engines: {node: '>=4'}
|
engines: {node: '>=4'}
|
||||||
@@ -9645,6 +9670,7 @@ packages:
|
|||||||
|
|
||||||
sliced@1.0.1:
|
sliced@1.0.1:
|
||||||
resolution: {integrity: sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==}
|
resolution: {integrity: sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==}
|
||||||
|
deprecated: Unsupported
|
||||||
|
|
||||||
socket.io-adapter@2.5.4:
|
socket.io-adapter@2.5.4:
|
||||||
resolution: {integrity: sha512-wDNHGXGewWAjQPt3pyeYBtpWSq9cLE5UW1ZUPL/2eGK9jtse/FpXib7epSTsz0Q0m+6sg6Y4KtcFTlah1bdOVg==}
|
resolution: {integrity: sha512-wDNHGXGewWAjQPt3pyeYBtpWSq9cLE5UW1ZUPL/2eGK9jtse/FpXib7epSTsz0Q0m+6sg6Y4KtcFTlah1bdOVg==}
|
||||||
@@ -14568,6 +14594,10 @@ snapshots:
|
|||||||
|
|
||||||
'@remirror/core-constants@3.0.0': {}
|
'@remirror/core-constants@3.0.0': {}
|
||||||
|
|
||||||
|
'@ricky0123/vad-web@0.0.30':
|
||||||
|
dependencies:
|
||||||
|
onnxruntime-web: 1.27.0
|
||||||
|
|
||||||
'@rolldown/binding-android-arm64@1.0.0-rc.12':
|
'@rolldown/binding-android-arm64@1.0.0-rc.12':
|
||||||
optional: true
|
optional: true
|
||||||
|
|
||||||
@@ -17812,6 +17842,8 @@ snapshots:
|
|||||||
|
|
||||||
flat@5.0.2: {}
|
flat@5.0.2: {}
|
||||||
|
|
||||||
|
flatbuffers@25.9.23: {}
|
||||||
|
|
||||||
flatted@3.4.2: {}
|
flatted@3.4.2: {}
|
||||||
|
|
||||||
follow-redirects@1.16.0: {}
|
follow-redirects@1.16.0: {}
|
||||||
@@ -17970,6 +18002,8 @@ snapshots:
|
|||||||
|
|
||||||
graceful-fs@4.2.11: {}
|
graceful-fs@4.2.11: {}
|
||||||
|
|
||||||
|
guid-typescript@1.0.9: {}
|
||||||
|
|
||||||
hachure-fill@0.5.2: {}
|
hachure-fill@0.5.2: {}
|
||||||
|
|
||||||
handlebars@4.7.9:
|
handlebars@4.7.9:
|
||||||
@@ -19587,6 +19621,17 @@ snapshots:
|
|||||||
dependencies:
|
dependencies:
|
||||||
mimic-fn: 2.1.0
|
mimic-fn: 2.1.0
|
||||||
|
|
||||||
|
onnxruntime-common@1.27.0: {}
|
||||||
|
|
||||||
|
onnxruntime-web@1.27.0:
|
||||||
|
dependencies:
|
||||||
|
flatbuffers: 25.9.23
|
||||||
|
guid-typescript: 1.0.9
|
||||||
|
long: 5.3.2
|
||||||
|
onnxruntime-common: 1.27.0
|
||||||
|
platform: 1.3.6
|
||||||
|
protobufjs: 7.5.8
|
||||||
|
|
||||||
open@8.4.2:
|
open@8.4.2:
|
||||||
dependencies:
|
dependencies:
|
||||||
define-lazy-prop: 2.0.0
|
define-lazy-prop: 2.0.0
|
||||||
@@ -19911,6 +19956,8 @@ snapshots:
|
|||||||
mlly: 1.8.0
|
mlly: 1.8.0
|
||||||
pathe: 2.0.3
|
pathe: 2.0.3
|
||||||
|
|
||||||
|
platform@1.3.6: {}
|
||||||
|
|
||||||
pluralize@8.0.0: {}
|
pluralize@8.0.0: {}
|
||||||
|
|
||||||
png-chunk-text@1.0.0: {}
|
png-chunk-text@1.0.0: {}
|
||||||
|
|||||||
Reference in New Issue
Block a user