feat(share-ai): cap per-request output tokens and fail closed on Redis loss
Harden the anonymous public-share AI assistant against token-cost abuse before exposing it to the internet: - Add an env-tunable per-request output ceiling (maxOutputTokens) to the public-share streamText call so one anonymous request cannot run up the provider bill even if the per-IP throttle is evaded. New resolveShareAiMaxOutputTokens() / SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT (env SHARE_AI_MAX_OUTPUT_TOKENS, default 512), mirroring resolveShareAiWorkspaceMax(). - Flip the per-workspace cost limiter to FAIL CLOSED on Redis failure (was fail-open): if Redis is unavailable we cannot prove the workspace is under its cap, so deny rather than admit an unmetered, billable call. - Update the limiter spec (fail-open -> fail-closed) and add resolver tests; document both knobs in .env.example. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -63,6 +63,22 @@ export interface PublicShareChatStreamArgs {
|
||||
export const MAX_SHARE_MESSAGES = 30;
|
||||
export const MAX_SHARE_MESSAGE_CHARS = 8000;
|
||||
|
||||
/**
|
||||
* Per-request output-token ceiling for the anonymous assistant. `streamText`
|
||||
* runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
|
||||
* request is bounded by (steps × this). The per-workspace cap bounds the COUNT
|
||||
* of calls; this bounds the SIZE of each, so a single anonymous call cannot run
|
||||
* up the provider bill even if the per-IP throttle is evaded. Env-overridable
|
||||
* seam; a non-positive or unparseable value falls back to the default.
|
||||
*/
|
||||
export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
|
||||
export function resolveShareAiMaxOutputTokens(): number {
|
||||
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
|
||||
return Number.isFinite(raw) && raw > 0
|
||||
? Math.floor(raw)
|
||||
: SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep ONLY genuine conversation turns from the client-held transcript. The
|
||||
* payload is fully attacker-controlled; a forged `system` turn could try to
|
||||
@@ -204,6 +220,9 @@ export class PublicShareChatService {
|
||||
tools,
|
||||
// Bound the agent loop for anonymous callers.
|
||||
stopWhen: stepCountIs(5),
|
||||
// Cap per-request output so one anonymous call cannot run up the provider
|
||||
// bill even if the per-IP throttle is evaded; worst case = steps × this.
|
||||
maxOutputTokens: resolveShareAiMaxOutputTokens(),
|
||||
abortSignal: signal,
|
||||
onError: ({ error }) => {
|
||||
const e = error as {
|
||||
|
||||
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
|
||||
import {
|
||||
PublicShareChatService,
|
||||
filterShareTranscript,
|
||||
resolveShareAiMaxOutputTokens,
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
} from './public-share-chat.service';
|
||||
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
|
||||
import {
|
||||
@@ -396,6 +398,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
|
||||
const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
|
||||
const original = process.env[ENV];
|
||||
|
||||
afterEach(() => {
|
||||
if (original === undefined) delete process.env[ENV];
|
||||
else process.env[ENV] = original;
|
||||
});
|
||||
|
||||
it('falls back to the default when unset', () => {
|
||||
delete process.env[ENV];
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
|
||||
});
|
||||
|
||||
it('uses (and floors) a valid positive value from the env', () => {
|
||||
process.env[ENV] = '1024.9';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(1024);
|
||||
});
|
||||
|
||||
it('falls back to the default for zero, a negative, or a non-numeric value', () => {
|
||||
process.env[ENV] = '0';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
process.env[ENV] = '-5';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
process.env[ENV] = 'not-a-number';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
|
||||
it('allows up to the cap within a window, then 429s (returns false)', async () => {
|
||||
const limiter = makeLimiter(3, 60_000, () => 1_000);
|
||||
@@ -477,11 +517,12 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
|
||||
expect(await limiter.tryConsume('ws-1')).toBe(true);
|
||||
});
|
||||
|
||||
it('FAILS OPEN (returns true) when the Redis eval rejects', async () => {
|
||||
// The per-workspace cap is a COST backstop, not an access boundary: the
|
||||
// funnel access gates and the per-IP throttle still apply. A transient
|
||||
// Redis failure must therefore ADMIT the call (true) rather than 500/429,
|
||||
// so a Redis blip cannot take the public-share assistant fully offline.
|
||||
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
|
||||
// The per-workspace cap is the COST backstop for an OPTIONAL anonymous
|
||||
// assistant. If Redis is unavailable we cannot prove the workspace is under
|
||||
// its cap, so we DENY (controller 429s) rather than admit an unmetered,
|
||||
// billable call — a brief Redis blip disabling the assistant is safer than
|
||||
// an unbounded provider bill.
|
||||
const failingRedis = {
|
||||
eval: () => Promise.reject(new Error('redis down')),
|
||||
} as unknown as import('ioredis').Redis;
|
||||
@@ -495,7 +536,7 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
|
||||
const errSpy = jest
|
||||
.spyOn(Logger.prototype, 'error')
|
||||
.mockImplementation(() => undefined);
|
||||
expect(await limiter.tryConsume('ws-1')).toBe(true);
|
||||
expect(await limiter.tryConsume('ws-1')).toBe(false);
|
||||
expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed
|
||||
errSpy.mockRestore();
|
||||
});
|
||||
|
||||
@@ -99,9 +99,11 @@ export class PublicShareWorkspaceLimiter {
|
||||
/**
|
||||
* Account one call for `key`. Returns true if it is within the cap (allowed),
|
||||
* false if the cap over the trailing window is exceeded (caller must 429).
|
||||
* On a Redis failure we FAIL OPEN (return true): the cap is a cost backstop,
|
||||
* not an auth boundary, and the access funnel + per-IP throttle still apply —
|
||||
* we never want a transient Redis blip to take the assistant fully offline.
|
||||
* On a Redis failure we FAIL CLOSED (return false): this cap is the COST
|
||||
* backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
|
||||
* cannot prove the workspace is under its cap and therefore DENY rather than
|
||||
* admit an unmetered, billable anonymous call. A transient Redis blip briefly
|
||||
* disabling the assistant is preferable to an unbounded provider bill.
|
||||
*/
|
||||
async tryConsume(key: string): Promise<boolean> {
|
||||
const t = this.now();
|
||||
@@ -120,15 +122,16 @@ export class PublicShareWorkspaceLimiter {
|
||||
);
|
||||
return admitted === 1;
|
||||
} catch (err) {
|
||||
// Fail OPEN: this per-workspace cap is a COST backstop, not an access
|
||||
// control — the funnel access gates and the per-IP throttle still apply.
|
||||
// A transient Redis failure must not take the public-share assistant
|
||||
// fully offline, so we admit the call rather than 500 the request.
|
||||
// FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
|
||||
// under its cap, so we DENY (the controller 429s) rather than admit an
|
||||
// unmetered, billable anonymous call. The assistant is optional, so a
|
||||
// transient Redis blip briefly disabling it is the safer failure mode than
|
||||
// an unbounded provider bill.
|
||||
this.logger.error(
|
||||
`share-ai workspace limiter Redis failure for key "${key}"; failing open`,
|
||||
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
|
||||
err as Error,
|
||||
);
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user