Implements all reviewer comments (code-review, red-team, and test-strategy audit), accepting the recommended variants. Server — realtime service (ai-realtime.service.ts): - SSRF: pin the validated IP via a WebSocket `lookup` hook that re-checks every resolved address with isIpAllowed (mirrors external-mcp buildPinnedDispatcher), closing the TOCTOU/DNS-rebinding window; fix the misleading comment. - no-silent-loss: on Stop, drain the in-flight segment (bounded 2.5s) and deliver the final via onFinal before closing instead of dropping the tail. - fail-closed deriveRealtimeUrl: a non-empty unparseable base now THROWS (no silent api.openai.com fallback that would leak a self-hosted key); http://ws:// bases rejected (plaintext key). Path normalization preserved. - parseUpstreamEvent keys the accumulator by item_id+content_index so GA segments don't concatenate. - inject a wsFactory seam for testing; also fix a latent bug — `import WebSocket from 'ws'` resolved to undefined at runtime (no esModuleInterop) -> import=require. - unref idle/max/drain timers. Server — realtime gateway (ai-realtime.gateway.ts, session-limits.ts): - reject revoked/disabled users and inactive sessions (mirror jwt.strategy: findById+isUserDisabled + findActiveById) with NO counter increment. - CSWSH: Origin allowlist (matching APP_URL, or no Origin for native clients) before auth, no increment. - extract SessionCounters (delete-at-zero, never negative) + pure canConnect (both caps >= checked before any increment); document the per-process/in-memory cap caveat (single-replica only). Client: - dictation-group: realtime final now inserts at the captured rangeRef SNAPSHOT (not the live caret) and guards editor.isEditable; single-space separator. - use-realtime-dictation/realtime-dictation-client: stop-during-acquisition tears down the mic (no leak / button reset); reconnect re-emits start (double-start guarded); interim ghost cleared on teardown; io() options de-duplicated. - pcm16-worklet: flush the partial sub-frame tail on stop; one-pole anti-aliasing low-pass before 48k->24k. - extract shared mic-capture (acquireMicStream/mapGetUserMediaError, used by batch + realtime), pure DSP (pcm16-dsp.ts), and the session reducer/baseLanguageSubtag; extract applyInterimMeta/clampRange/resolveUrl/appendFinalToDraft. Tests + infra: +~150 server tests (deriveRealtimeUrl, parseUpstreamEvent branches, openSession/lifecycle/timers/testConnection via fake ws, gateway auth/caps/no-leak, realtime-test admin contract, AiSettings update/resolve, DTO boolean, SSRF deny) and +~140 client tests (DSP property/edge, resampler continuity, framing, reducer, mic-capture, RealtimeDictationClient/MicButton, ProseMirror interim regression + history guards, appendFinalToDraft, resolveKeyField, route contract). Added @vitest/coverage-v8. CHANGELOG [Unreleased] entry incl. the single-replica caveat. Review: APPROVE WITH SUGGESTIONS (no critical/regression); applied the drain-timer unref. Server tsc clean + 358 tests; client tsc clean + 201 tests; vite build ok. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
55 lines
2.3 KiB
TypeScript
55 lines
2.3 KiB
TypeScript
import 'reflect-metadata';
|
|
import { plainToInstance } from 'class-transformer';
|
|
import { validate } from 'class-validator';
|
|
import { UpdateAiSettingsDto } from './update-ai-settings.dto';
|
|
import { isUrlAllowed } from '../../../core/ai-chat/external-mcp/ssrf-guard';
|
|
|
|
// SSRF contract for sttRealtimeBaseUrl.
|
|
//
|
|
// The DTO intentionally validates sttRealtimeBaseUrl with @IsString() ONLY (no
|
|
// @IsUrl): an admin may legitimately point at an internal-looking host that DNS
|
|
// resolves to a public address, and over-strict URL validation would reject
|
|
// valid setups. The real defense is the CONNECT-TIME SSRF guard (isUrlAllowed on
|
|
// the http-equivalent of the wss URL), which blocks link-local/loopback/private
|
|
// targets. This pins both halves of that contract.
|
|
|
|
async function validateDto(payload: Record<string, unknown>) {
|
|
const dto = plainToInstance(UpdateAiSettingsDto, payload);
|
|
return validate(dto as object);
|
|
}
|
|
|
|
describe('UpdateAiSettingsDto.sttRealtimeBaseUrl is @IsString only (no @IsUrl)', () => {
|
|
it('accepts a metadata-service URL at the DTO layer (string, not URL-validated)', async () => {
|
|
const errors = await validateDto({
|
|
sttRealtimeBaseUrl: 'http://169.254.169.254/v1',
|
|
});
|
|
const fieldErr = errors.find(
|
|
(e) => e.property === 'sttRealtimeBaseUrl',
|
|
);
|
|
// No DTO-level rejection: blocking is deferred to the connect-time guard.
|
|
expect(fieldErr).toBeUndefined();
|
|
});
|
|
|
|
it('rejects a non-string sttRealtimeBaseUrl with an isString error', async () => {
|
|
const errors = await validateDto({ sttRealtimeBaseUrl: 123 });
|
|
const fieldErr = errors.find(
|
|
(e) => e.property === 'sttRealtimeBaseUrl',
|
|
);
|
|
expect(Object.keys(fieldErr?.constraints ?? {})).toContain('isString');
|
|
});
|
|
});
|
|
|
|
describe('connect-time SSRF guard blocks the metadata service', () => {
|
|
it('isUrlAllowed denies the http-equivalent of the cloud metadata endpoint', async () => {
|
|
// The realtime path derives a wss URL then checks isUrlAllowed on the
|
|
// http(s)-equivalent. For http://169.254.169.254 the equivalent is itself.
|
|
const result = await isUrlAllowed('http://169.254.169.254/v1');
|
|
expect(result.ok).toBe(false);
|
|
});
|
|
|
|
it('isUrlAllowed denies loopback', async () => {
|
|
const result = await isUrlAllowed('http://127.0.0.1/v1');
|
|
expect(result.ok).toBe(false);
|
|
});
|
|
});
|