From 7093f184b211a15747a9ed1cb5bba1809b0ed224 Mon Sep 17 00:00:00 2001 From: claude_code Date: Mon, 22 Jun 2026 17:19:11 +0300 Subject: [PATCH] fix(dictation): self-host Silero VAD / onnxruntime-web assets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streaming dictation failed at runtime with "no available backend found / 'text/html' is not a valid JavaScript MIME type": @ricky0123/vad-web 0.0.30 defaults baseAssetPath/onnxWASMBasePath to "./" (relative to the page URL), so the worklet, Silero model and ORT wasm/mjs were requested against the SPA catch-all and came back as index.html. Serve them from a fixed /vad/ instead: - scripts/copy-vad-assets.mjs copies the 4 runtime assets (vad worklet, silero_vad_v5.onnx, ort-wasm-simd-threaded.jsep.{mjs,wasm}) from node_modules into apps/client/public/vad/ (gitignored — the ORT wasm is ~26 MB) - client dev/build scripts run the copy first so the assets are always present - useStreamingDictation points both path constants at "/vad/" Verified: dev server serves all four under /vad/ with HTTP 200 and correct Content-Type (js/wasm, never text/html); tsc clean. Prod (Docker) build runs the copy step, so dist/vad/* ships in the image. Co-Authored-By: Claude Opus 4.8 --- .gitignore | 3 + apps/client/package.json | 4 +- apps/client/scripts/copy-vad-assets.mjs | 59 +++++++++++++++++++ .../hooks/use-streaming-dictation.ts | 19 +++--- 4 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 apps/client/scripts/copy-vad-assets.mjs diff --git a/.gitignore b/.gitignore index e814fb29..16a16b59 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,6 @@ lerna-debug.log* # TypeScript incremental build artifacts *.tsbuildinfo + +# Self-hosted VAD / onnxruntime-web assets (copied from node_modules at dev/build time) +apps/client/public/vad/ diff --git a/apps/client/package.json b/apps/client/package.json index 3ccdea68..a652dd77 100644 --- a/apps/client/package.json +++ b/apps/client/package.json @@ -3,8 +3,8 @@ "private": true, "version": "0.93.0", "scripts": { - "dev": "vite", - "build": "tsc && vite build", + "dev": "node scripts/copy-vad-assets.mjs && vite", + "build": "node scripts/copy-vad-assets.mjs && tsc && vite build", "lint": "eslint .", "preview": "vite preview", "format": "prettier --write \"src/**/*.tsx\" \"src/**/*.ts\"", diff --git a/apps/client/scripts/copy-vad-assets.mjs b/apps/client/scripts/copy-vad-assets.mjs new file mode 100644 index 00000000..4e0cccd0 --- /dev/null +++ b/apps/client/scripts/copy-vad-assets.mjs @@ -0,0 +1,59 @@ +// Self-host the @ricky0123/vad-web + onnxruntime-web runtime assets under +// apps/client/public/vad/. +// +// WHY THIS EXISTS: +// Both vad-web and onnxruntime-web resolve their assets by URL *at runtime* (the +// VAD audio worklet + Silero model, and ORT's wasm/mjs backend). In vad-web +// 0.0.30 the default baseAssetPath / onnxWASMBasePath is "./" — i.e. relative to +// the current page URL — NOT a CDN. In this SPA that "./" request hits the +// client-side catch-all route and gets served index.html (text/html), so the +// onnxruntime ESM/wasm backend fails to initialize ("'text/html' is not a valid +// JavaScript MIME type"). We fix that by copying the four needed files into +// public/vad/ and pointing both path constants at the fixed absolute "/vad/". +// +// These copies are NOT committed (the ORT wasm is ~26 MB); this script runs +// before `dev` and `build` (see package.json) to repopulate them from +// node_modules. It is idempotent: it (re)creates the dir and overwrites. + +import { createRequire } from "node:module"; +import { fileURLToPath } from "node:url"; +import path from "node:path"; +import fs from "node:fs"; + +const require = createRequire(import.meta.url); +const here = path.dirname(fileURLToPath(import.meta.url)); +const outDir = path.join(here, "..", "public", "vad"); + +// vad-web exposes ./package.json, so derive its dist dir from there. +const vadDist = path.join( + path.dirname(require.resolve("@ricky0123/vad-web/package.json")), + "dist", +); + +// onnxruntime-web's "exports" map does NOT expose ./package.json, so resolving +// it would throw ERR_PACKAGE_PATH_NOT_EXPORTED. It DOES export the exact asset +// subpaths we need, so resolve those files directly. +const ortMjs = require.resolve( + "onnxruntime-web/ort-wasm-simd-threaded.jsep.mjs", +); +const ortWasm = require.resolve( + "onnxruntime-web/ort-wasm-simd-threaded.jsep.wasm", +); + +// [absolute source path, output filename] +const files = [ + [path.join(vadDist, "vad.worklet.bundle.min.js"), "vad.worklet.bundle.min.js"], + [path.join(vadDist, "silero_vad_v5.onnx"), "silero_vad_v5.onnx"], + [ortMjs, "ort-wasm-simd-threaded.jsep.mjs"], + [ortWasm, "ort-wasm-simd-threaded.jsep.wasm"], +]; + +fs.mkdirSync(outDir, { recursive: true }); +for (const [src, name] of files) { + if (!fs.existsSync(src)) { + console.error(`[copy-vad-assets] missing source: ${src}`); + process.exit(1); + } + fs.copyFileSync(src, path.join(outDir, name)); + console.log(`[copy-vad-assets] ${name}`); +} diff --git a/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts index 658e2e55..b8bae935 100644 --- a/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts +++ b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts @@ -32,14 +32,17 @@ interface UseStreamingDictationResult { // Sample rate of the audio MicVAD hands to onSpeechEnd (Silero VAD runs at 16k). const VAD_SAMPLE_RATE = 16000; -// Asset paths for the VAD worklet and the onnxruntime WASM binaries. For this -// prototype they are left undefined so the library loads its bundled assets from -// its default CDN — this avoids fragile rolldown asset-copy config. For a -// self-hosted / offline / privacy build, copy the vad-web `dist` worklet + the -// `*.onnx` model and the onnxruntime-web `*.wasm` files into -// `apps/client/public/vad/` and set these to that local path (e.g. "/vad/"). -const VAD_BASE_ASSET_PATH: string | undefined = undefined; -const VAD_ONNX_WASM_BASE_PATH: string | undefined = undefined; +// Asset paths for the VAD worklet/Silero model and the onnxruntime-web WASM +// binaries. vad-web 0.0.30's default asset path is "./" (relative to the current +// page URL), NOT a CDN — in this SPA that request hits the client-side catch-all +// route and returns index.html (text/html), so the onnxruntime ESM/wasm backend +// fails to initialize. We instead self-host the four needed files (the vad-web +// worklet + `silero_vad_v5.onnx` model and the onnxruntime-web `*.jsep.mjs`/ +// `*.jsep.wasm`) under `apps/client/public/vad/` — populated by +// `scripts/copy-vad-assets.mjs`, which runs before `dev`/`build` — and point both +// paths at the fixed absolute "/vad/". +const VAD_BASE_ASSET_PATH: string | undefined = "/vad/"; +const VAD_ONNX_WASM_BASE_PATH: string | undefined = "/vad/"; /** * Streaming variant of useDictation. Detects speech with a real (Silero) VAD and,