diff --git a/.env.example b/.env.example index b04078e3..11e9fc13 100644 --- a/.env.example +++ b/.env.example @@ -112,7 +112,12 @@ MCP_DOCMOST_PASSWORD= # # Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent, # keyed by the server-resolved workspace id) bounds the owner's bill even if the -# per-IP limit is fully evaded. It is a COST backstop, not an access control, -# and FAILS OPEN if Redis is unavailable. Override the hourly cap below +# per-IP limit is fully evaded. It is a COST backstop, not an access control, and +# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going +# offline is safer than an unbounded bill). Override the hourly cap below # (default: 300 calls per workspace per rolling hour). # SHARE_AI_WORKSPACE_MAX_PER_HOUR=300 +# +# Per-request output-token ceiling for the anonymous assistant (default: 512). +# Worst-case output per accepted call = agent steps (5) × this value. +# SHARE_AI_MAX_OUTPUT_TOKENS=512 diff --git a/apps/server/src/core/ai-chat/public-share-chat.service.ts b/apps/server/src/core/ai-chat/public-share-chat.service.ts index d385c4f0..380d2fd7 100644 --- a/apps/server/src/core/ai-chat/public-share-chat.service.ts +++ b/apps/server/src/core/ai-chat/public-share-chat.service.ts @@ -63,6 +63,22 @@ export interface PublicShareChatStreamArgs { export const MAX_SHARE_MESSAGES = 30; export const MAX_SHARE_MESSAGE_CHARS = 8000; +/** + * Per-request output-token ceiling for the anonymous assistant. `streamText` + * runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted + * request is bounded by (steps × this). The per-workspace cap bounds the COUNT + * of calls; this bounds the SIZE of each, so a single anonymous call cannot run + * up the provider bill even if the per-IP throttle is evaded. Env-overridable + * seam; a non-positive or unparseable value falls back to the default. + */ +export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512; +export function resolveShareAiMaxOutputTokens(): number { + const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS); + return Number.isFinite(raw) && raw > 0 + ? Math.floor(raw) + : SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT; +} + /** * Keep ONLY genuine conversation turns from the client-held transcript. The * payload is fully attacker-controlled; a forged `system` turn could try to @@ -204,6 +220,9 @@ export class PublicShareChatService { tools, // Bound the agent loop for anonymous callers. stopWhen: stepCountIs(5), + // Cap per-request output so one anonymous call cannot run up the provider + // bill even if the per-IP throttle is evaded; worst case = steps × this. + maxOutputTokens: resolveShareAiMaxOutputTokens(), abortSignal: signal, onError: ({ error }) => { const e = error as { diff --git a/apps/server/src/core/ai-chat/public-share-chat.spec.ts b/apps/server/src/core/ai-chat/public-share-chat.spec.ts index 2be6a5f4..3ac99f66 100644 --- a/apps/server/src/core/ai-chat/public-share-chat.spec.ts +++ b/apps/server/src/core/ai-chat/public-share-chat.spec.ts @@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt'; import { PublicShareChatService, filterShareTranscript, + resolveShareAiMaxOutputTokens, + SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT, } from './public-share-chat.service'; import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service'; import { @@ -396,6 +398,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () => }); }); +describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => { + const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS'; + const original = process.env[ENV]; + + afterEach(() => { + if (original === undefined) delete process.env[ENV]; + else process.env[ENV] = original; + }); + + it('falls back to the default when unset', () => { + delete process.env[ENV]; + expect(resolveShareAiMaxOutputTokens()).toBe( + SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT, + ); + expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512); + }); + + it('uses (and floors) a valid positive value from the env', () => { + process.env[ENV] = '1024.9'; + expect(resolveShareAiMaxOutputTokens()).toBe(1024); + }); + + it('falls back to the default for zero, a negative, or a non-numeric value', () => { + process.env[ENV] = '0'; + expect(resolveShareAiMaxOutputTokens()).toBe( + SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT, + ); + process.env[ENV] = '-5'; + expect(resolveShareAiMaxOutputTokens()).toBe( + SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT, + ); + process.env[ENV] = 'not-a-number'; + expect(resolveShareAiMaxOutputTokens()).toBe( + SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT, + ); + }); +}); + describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => { it('allows up to the cap within a window, then 429s (returns false)', async () => { const limiter = makeLimiter(3, 60_000, () => 1_000); @@ -477,11 +517,12 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace expect(await limiter.tryConsume('ws-1')).toBe(true); }); - it('FAILS OPEN (returns true) when the Redis eval rejects', async () => { - // The per-workspace cap is a COST backstop, not an access boundary: the - // funnel access gates and the per-IP throttle still apply. A transient - // Redis failure must therefore ADMIT the call (true) rather than 500/429, - // so a Redis blip cannot take the public-share assistant fully offline. + it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => { + // The per-workspace cap is the COST backstop for an OPTIONAL anonymous + // assistant. If Redis is unavailable we cannot prove the workspace is under + // its cap, so we DENY (controller 429s) rather than admit an unmetered, + // billable call — a brief Redis blip disabling the assistant is safer than + // an unbounded provider bill. const failingRedis = { eval: () => Promise.reject(new Error('redis down')), } as unknown as import('ioredis').Redis; @@ -495,7 +536,7 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace const errSpy = jest .spyOn(Logger.prototype, 'error') .mockImplementation(() => undefined); - expect(await limiter.tryConsume('ws-1')).toBe(true); + expect(await limiter.tryConsume('ws-1')).toBe(false); expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed errSpy.mockRestore(); }); diff --git a/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts b/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts index bf14d7d6..bcc40c5a 100644 --- a/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts +++ b/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts @@ -99,9 +99,11 @@ export class PublicShareWorkspaceLimiter { /** * Account one call for `key`. Returns true if it is within the cap (allowed), * false if the cap over the trailing window is exceeded (caller must 429). - * On a Redis failure we FAIL OPEN (return true): the cap is a cost backstop, - * not an auth boundary, and the access funnel + per-IP throttle still apply — - * we never want a transient Redis blip to take the assistant fully offline. + * On a Redis failure we FAIL CLOSED (return false): this cap is the COST + * backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we + * cannot prove the workspace is under its cap and therefore DENY rather than + * admit an unmetered, billable anonymous call. A transient Redis blip briefly + * disabling the assistant is preferable to an unbounded provider bill. */ async tryConsume(key: string): Promise { const t = this.now(); @@ -120,15 +122,16 @@ export class PublicShareWorkspaceLimiter { ); return admitted === 1; } catch (err) { - // Fail OPEN: this per-workspace cap is a COST backstop, not an access - // control — the funnel access gates and the per-IP throttle still apply. - // A transient Redis failure must not take the public-share assistant - // fully offline, so we admit the call rather than 500 the request. + // FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is + // under its cap, so we DENY (the controller 429s) rather than admit an + // unmetered, billable anonymous call. The assistant is optional, so a + // transient Redis blip briefly disabling it is the safer failure mode than + // an unbounded provider bill. this.logger.error( - `share-ai workspace limiter Redis failure for key "${key}"; failing open`, + `share-ai workspace limiter Redis failure for key "${key}"; failing closed`, err as Error, ); - return true; + return false; } } }