feat(dictation): streaming STT via silence cut (Silero VAD)

Add a lightweight "streaming" dictation mode as a simpler alternative to the
realtime-websocket path: detect speech with Silero VAD (@ricky0123/vad-web),
cut each segment on a pause and POST it to the existing /ai-chat/transcribe
endpoint, so text appears progressively. No server changes.

- new useStreamingDictation hook (same API as useDictation), lazy-loads VAD,
  in-order seq emission, session-epoch guard against stop->start races
- new encodeWavPcm16 util (Float32 -> mono PCM16 WAV, accepted by the server)
- MicButton gains a `streaming` prop; enabled in the editor toolbar and chat
- VAD tuning: redemptionMs 640 / preSpeechPadMs 320 / minSpeechMs 96
- batch dictation kept as the fallback (streaming=false)
- deps: @ricky0123/vad-web@0.0.30, onnxruntime-web@1.27.0

Note: VAD assets load from the library CDN by default; for self-hosted/offline
set VAD_BASE_ASSET_PATH/VAD_ONNX_WASM_BASE_PATH and copy assets to public/vad/.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-22 16:52:05 +03:00
parent 7ce1a24f82
commit 4f0da42d88
7 changed files with 555 additions and 16 deletions

47
pnpm-lock.yaml generated
View File

@@ -299,6 +299,9 @@ importers:
'@mantine/spotlight':
specifier: 8.3.18
version: 8.3.18(@mantine/core@8.3.18(@mantine/hooks@8.3.18(react@18.3.1))(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(@mantine/hooks@8.3.18(react@18.3.1))(react-dom@18.3.1(react@18.3.1))(react@18.3.1)
'@ricky0123/vad-web':
specifier: ^0.0.30
version: 0.0.30
'@slidoapp/emoji-mart':
specifier: 5.8.7
version: 5.8.7
@@ -374,6 +377,9 @@ importers:
mitt:
specifier: 3.0.1
version: 3.0.1
onnxruntime-web:
specifier: ^1.27.0
version: 1.27.0
posthog-js:
specifier: 1.372.2
version: 1.372.2
@@ -4205,6 +4211,9 @@ packages:
'@remirror/core-constants@3.0.0':
resolution: {integrity: sha512-42aWfPrimMfDKDi4YegyS7x+/0tlzaqwPQCULLanv3DMIlu96KTJR0fM5isWX2UViOqlGnX6YFgqWepcX+XMNg==}
'@ricky0123/vad-web@0.0.30':
resolution: {integrity: sha512-cJyYrh4YeeUBJcbR9Bic/bFDyB9qBkAepvpuWM3vLxnAi7bC3VHzf51UeNdT+OtY4D7MLAgV8iJMc4z41ZnaWg==}
'@rolldown/binding-android-arm64@1.0.0-rc.12':
resolution: {integrity: sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA==}
engines: {node: ^20.19.0 || >=22.12.0}
@@ -5253,6 +5262,7 @@ packages:
'@ungap/structured-clone@1.3.0':
resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==}
deprecated: Potential CWE-502 - Update to 1.3.1 or higher
'@unrs/resolver-binding-android-arm-eabi@1.11.1':
resolution: {integrity: sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw==}
@@ -7026,6 +7036,9 @@ packages:
resolution: {integrity: sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==}
hasBin: true
flatbuffers@25.9.23:
resolution: {integrity: sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==}
flatted@3.4.2:
resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==}
@@ -7188,6 +7201,9 @@ packages:
graceful-fs@4.2.11:
resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
guid-typescript@1.0.9:
resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==}
hachure-fill@0.5.2:
resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==}
@@ -8623,6 +8639,12 @@ packages:
resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
engines: {node: '>=6'}
onnxruntime-common@1.27.0:
resolution: {integrity: sha512-3KxL5wIVqa8Ex08jxSzncm9CMgw8CjOFyOQ7SxvG9o0cVLlhTNKXyIQuTbtX4tGPJEf73OER2xrjt4HJSBL4ow==}
onnxruntime-web@1.27.0:
resolution: {integrity: sha512-ogDLsqIozHZwifPuN37OproAo0byX6t43/bP8GzeZWBWD6MOGExswFAx3up4NS/vvWBOg2u2PXomDt3rMmdQSg==}
open@8.4.2:
resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==}
engines: {node: '>=12'}
@@ -8912,6 +8934,9 @@ packages:
pkg-types@1.3.1:
resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==}
platform@1.3.6:
resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==}
pluralize@8.0.0:
resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==}
engines: {node: '>=4'}
@@ -9645,6 +9670,7 @@ packages:
sliced@1.0.1:
resolution: {integrity: sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==}
deprecated: Unsupported
socket.io-adapter@2.5.4:
resolution: {integrity: sha512-wDNHGXGewWAjQPt3pyeYBtpWSq9cLE5UW1ZUPL/2eGK9jtse/FpXib7epSTsz0Q0m+6sg6Y4KtcFTlah1bdOVg==}
@@ -14568,6 +14594,10 @@ snapshots:
'@remirror/core-constants@3.0.0': {}
'@ricky0123/vad-web@0.0.30':
dependencies:
onnxruntime-web: 1.27.0
'@rolldown/binding-android-arm64@1.0.0-rc.12':
optional: true
@@ -17812,6 +17842,8 @@ snapshots:
flat@5.0.2: {}
flatbuffers@25.9.23: {}
flatted@3.4.2: {}
follow-redirects@1.16.0: {}
@@ -17970,6 +18002,8 @@ snapshots:
graceful-fs@4.2.11: {}
guid-typescript@1.0.9: {}
hachure-fill@0.5.2: {}
handlebars@4.7.9:
@@ -19587,6 +19621,17 @@ snapshots:
dependencies:
mimic-fn: 2.1.0
onnxruntime-common@1.27.0: {}
onnxruntime-web@1.27.0:
dependencies:
flatbuffers: 25.9.23
guid-typescript: 1.0.9
long: 5.3.2
onnxruntime-common: 1.27.0
platform: 1.3.6
protobufjs: 7.5.8
open@8.4.2:
dependencies:
define-lazy-prop: 2.0.0
@@ -19911,6 +19956,8 @@ snapshots:
mlly: 1.8.0
pathe: 2.0.3
platform@1.3.6: {}
pluralize@8.0.0: {}
png-chunk-text@1.0.0: {}