Implements all reviewer comments (code-review, red-team, and test-strategy audit), accepting the recommended variants. Server — realtime service (ai-realtime.service.ts): - SSRF: pin the validated IP via a WebSocket `lookup` hook that re-checks every resolved address with isIpAllowed (mirrors external-mcp buildPinnedDispatcher), closing the TOCTOU/DNS-rebinding window; fix the misleading comment. - no-silent-loss: on Stop, drain the in-flight segment (bounded 2.5s) and deliver the final via onFinal before closing instead of dropping the tail. - fail-closed deriveRealtimeUrl: a non-empty unparseable base now THROWS (no silent api.openai.com fallback that would leak a self-hosted key); http://ws:// bases rejected (plaintext key). Path normalization preserved. - parseUpstreamEvent keys the accumulator by item_id+content_index so GA segments don't concatenate. - inject a wsFactory seam for testing; also fix a latent bug — `import WebSocket from 'ws'` resolved to undefined at runtime (no esModuleInterop) -> import=require. - unref idle/max/drain timers. Server — realtime gateway (ai-realtime.gateway.ts, session-limits.ts): - reject revoked/disabled users and inactive sessions (mirror jwt.strategy: findById+isUserDisabled + findActiveById) with NO counter increment. - CSWSH: Origin allowlist (matching APP_URL, or no Origin for native clients) before auth, no increment. - extract SessionCounters (delete-at-zero, never negative) + pure canConnect (both caps >= checked before any increment); document the per-process/in-memory cap caveat (single-replica only). Client: - dictation-group: realtime final now inserts at the captured rangeRef SNAPSHOT (not the live caret) and guards editor.isEditable; single-space separator. - use-realtime-dictation/realtime-dictation-client: stop-during-acquisition tears down the mic (no leak / button reset); reconnect re-emits start (double-start guarded); interim ghost cleared on teardown; io() options de-duplicated. - pcm16-worklet: flush the partial sub-frame tail on stop; one-pole anti-aliasing low-pass before 48k->24k. - extract shared mic-capture (acquireMicStream/mapGetUserMediaError, used by batch + realtime), pure DSP (pcm16-dsp.ts), and the session reducer/baseLanguageSubtag; extract applyInterimMeta/clampRange/resolveUrl/appendFinalToDraft. Tests + infra: +~150 server tests (deriveRealtimeUrl, parseUpstreamEvent branches, openSession/lifecycle/timers/testConnection via fake ws, gateway auth/caps/no-leak, realtime-test admin contract, AiSettings update/resolve, DTO boolean, SSRF deny) and +~140 client tests (DSP property/edge, resampler continuity, framing, reducer, mic-capture, RealtimeDictationClient/MicButton, ProseMirror interim regression + history guards, appendFinalToDraft, resolveKeyField, route contract). Added @vitest/coverage-v8. CHANGELOG [Unreleased] entry incl. the single-replica caveat. Review: APPROVE WITH SUGGESTIONS (no critical/regression); applied the drain-timer unref. Server tsc clean + 358 tests; client tsc clean + 201 tests; vite build ok. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
109 lines
3.5 KiB
TypeScript
109 lines
3.5 KiB
TypeScript
import { Extension } from "@tiptap/core";
|
|
import type { Editor } from "@tiptap/core";
|
|
import { Plugin, PluginKey } from "@tiptap/pm/state";
|
|
import { Decoration, DecorationSet } from "@tiptap/pm/view";
|
|
|
|
// Plugin key shared by the extension and the imperative helpers below so they
|
|
// dispatch/read the same plugin state.
|
|
const dictationInterimKey = new PluginKey<DictationInterimState>(
|
|
"dictationInterim",
|
|
);
|
|
|
|
interface DictationInterimState {
|
|
// The current interim (partial) transcript. Empty string means "no ghost".
|
|
text: string;
|
|
}
|
|
|
|
/**
|
|
* Pure interim-state reducer (extracted for unit testing): a meta-only update
|
|
* replaces the interim text; any other transaction passes the previous state
|
|
* through unchanged. The decoration follows the caret on its own because it is
|
|
* recomputed against the live selection on every render — so non-meta edits do
|
|
* not need to touch this state.
|
|
*/
|
|
export function applyInterimMeta(
|
|
meta: DictationInterimState | undefined,
|
|
prev: DictationInterimState,
|
|
): DictationInterimState {
|
|
if (meta) {
|
|
return { text: meta.text };
|
|
}
|
|
return prev;
|
|
}
|
|
|
|
/**
|
|
* B2 editor decoration: shows the realtime interim (partial) transcript as a
|
|
* ghost widget at the caret. The interim is held ONLY in plugin meta state and
|
|
* rendered as a widget Decoration — it is NEVER written into the document, so
|
|
* it produces no Yjs update and no history entry. Only final segments are
|
|
* committed (by the dictation-group / chat consumers).
|
|
*/
|
|
export const DictationInterim = Extension.create({
|
|
name: "dictationInterim",
|
|
|
|
addProseMirrorPlugins() {
|
|
return [
|
|
new Plugin<DictationInterimState>({
|
|
key: dictationInterimKey,
|
|
state: {
|
|
init: (): DictationInterimState => ({ text: "" }),
|
|
apply: (tr, value): DictationInterimState => {
|
|
const meta = tr.getMeta(dictationInterimKey) as
|
|
| DictationInterimState
|
|
| undefined;
|
|
return applyInterimMeta(meta, value);
|
|
},
|
|
},
|
|
props: {
|
|
decorations(state) {
|
|
const pluginState = dictationInterimKey.getState(state);
|
|
const text = pluginState?.text ?? "";
|
|
if (!text) {
|
|
return null;
|
|
}
|
|
|
|
// Render the interim as an inline ghost at the caret. Inline styles
|
|
// keep this self-contained — no global CSS is required.
|
|
const widget = Decoration.widget(
|
|
state.selection.head,
|
|
() => {
|
|
const span = document.createElement("span");
|
|
span.textContent = text;
|
|
span.setAttribute("contenteditable", "false");
|
|
span.style.opacity = "0.5";
|
|
span.style.fontStyle = "italic";
|
|
return span;
|
|
},
|
|
{ side: 1, ignoreSelection: true },
|
|
);
|
|
|
|
return DecorationSet.create(state.doc, [widget]);
|
|
},
|
|
},
|
|
}),
|
|
];
|
|
},
|
|
});
|
|
|
|
/**
|
|
* Set the interim ghost text via a META-ONLY transaction — no doc steps, so it
|
|
* generates no Yjs update and no history entry.
|
|
*/
|
|
export function setDictationInterim(editor: Editor, text: string): void {
|
|
editor.view.dispatch(
|
|
editor.state.tr.setMeta(dictationInterimKey, { text }),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Clear the interim ghost text via a META-ONLY transaction (same no-op-on-doc
|
|
* guarantee as setDictationInterim).
|
|
*/
|
|
export function clearDictationInterim(editor: Editor): void {
|
|
editor.view.dispatch(
|
|
editor.state.tr.setMeta(dictationInterimKey, { text: "" }),
|
|
);
|
|
}
|
|
|
|
export default DictationInterim;
|