feat(share-ai): cap per-request output tokens and fail closed on Redis loss
Harden the anonymous public-share AI assistant against token-cost abuse before exposing it to the internet: - Add an env-tunable per-request output ceiling (maxOutputTokens) to the public-share streamText call so one anonymous request cannot run up the provider bill even if the per-IP throttle is evaded. New resolveShareAiMaxOutputTokens() / SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT (env SHARE_AI_MAX_OUTPUT_TOKENS, default 512), mirroring resolveShareAiWorkspaceMax(). - Flip the per-workspace cost limiter to FAIL CLOSED on Redis failure (was fail-open): if Redis is unavailable we cannot prove the workspace is under its cap, so deny rather than admit an unmetered, billable call. - Update the limiter spec (fail-open -> fail-closed) and add resolver tests; document both knobs in .env.example. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -112,7 +112,12 @@ MCP_DOCMOST_PASSWORD=
|
|||||||
#
|
#
|
||||||
# Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent,
|
# Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent,
|
||||||
# keyed by the server-resolved workspace id) bounds the owner's bill even if the
|
# keyed by the server-resolved workspace id) bounds the owner's bill even if the
|
||||||
# per-IP limit is fully evaded. It is a COST backstop, not an access control,
|
# per-IP limit is fully evaded. It is a COST backstop, not an access control, and
|
||||||
# and FAILS OPEN if Redis is unavailable. Override the hourly cap below
|
# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going
|
||||||
|
# offline is safer than an unbounded bill). Override the hourly cap below
|
||||||
# (default: 300 calls per workspace per rolling hour).
|
# (default: 300 calls per workspace per rolling hour).
|
||||||
# SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
|
# SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
|
||||||
|
#
|
||||||
|
# Per-request output-token ceiling for the anonymous assistant (default: 512).
|
||||||
|
# Worst-case output per accepted call = agent steps (5) × this value.
|
||||||
|
# SHARE_AI_MAX_OUTPUT_TOKENS=512
|
||||||
|
|||||||
@@ -63,6 +63,22 @@ export interface PublicShareChatStreamArgs {
|
|||||||
export const MAX_SHARE_MESSAGES = 30;
|
export const MAX_SHARE_MESSAGES = 30;
|
||||||
export const MAX_SHARE_MESSAGE_CHARS = 8000;
|
export const MAX_SHARE_MESSAGE_CHARS = 8000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-request output-token ceiling for the anonymous assistant. `streamText`
|
||||||
|
* runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
|
||||||
|
* request is bounded by (steps × this). The per-workspace cap bounds the COUNT
|
||||||
|
* of calls; this bounds the SIZE of each, so a single anonymous call cannot run
|
||||||
|
* up the provider bill even if the per-IP throttle is evaded. Env-overridable
|
||||||
|
* seam; a non-positive or unparseable value falls back to the default.
|
||||||
|
*/
|
||||||
|
export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
|
||||||
|
export function resolveShareAiMaxOutputTokens(): number {
|
||||||
|
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
|
||||||
|
return Number.isFinite(raw) && raw > 0
|
||||||
|
? Math.floor(raw)
|
||||||
|
: SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Keep ONLY genuine conversation turns from the client-held transcript. The
|
* Keep ONLY genuine conversation turns from the client-held transcript. The
|
||||||
* payload is fully attacker-controlled; a forged `system` turn could try to
|
* payload is fully attacker-controlled; a forged `system` turn could try to
|
||||||
@@ -204,6 +220,9 @@ export class PublicShareChatService {
|
|||||||
tools,
|
tools,
|
||||||
// Bound the agent loop for anonymous callers.
|
// Bound the agent loop for anonymous callers.
|
||||||
stopWhen: stepCountIs(5),
|
stopWhen: stepCountIs(5),
|
||||||
|
// Cap per-request output so one anonymous call cannot run up the provider
|
||||||
|
// bill even if the per-IP throttle is evaded; worst case = steps × this.
|
||||||
|
maxOutputTokens: resolveShareAiMaxOutputTokens(),
|
||||||
abortSignal: signal,
|
abortSignal: signal,
|
||||||
onError: ({ error }) => {
|
onError: ({ error }) => {
|
||||||
const e = error as {
|
const e = error as {
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
|
|||||||
import {
|
import {
|
||||||
PublicShareChatService,
|
PublicShareChatService,
|
||||||
filterShareTranscript,
|
filterShareTranscript,
|
||||||
|
resolveShareAiMaxOutputTokens,
|
||||||
|
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||||
} from './public-share-chat.service';
|
} from './public-share-chat.service';
|
||||||
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
|
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
|
||||||
import {
|
import {
|
||||||
@@ -396,6 +398,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
|
||||||
|
const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
|
||||||
|
const original = process.env[ENV];
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (original === undefined) delete process.env[ENV];
|
||||||
|
else process.env[ENV] = original;
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to the default when unset', () => {
|
||||||
|
delete process.env[ENV];
|
||||||
|
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||||
|
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||||
|
);
|
||||||
|
expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses (and floors) a valid positive value from the env', () => {
|
||||||
|
process.env[ENV] = '1024.9';
|
||||||
|
expect(resolveShareAiMaxOutputTokens()).toBe(1024);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to the default for zero, a negative, or a non-numeric value', () => {
|
||||||
|
process.env[ENV] = '0';
|
||||||
|
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||||
|
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||||
|
);
|
||||||
|
process.env[ENV] = '-5';
|
||||||
|
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||||
|
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||||
|
);
|
||||||
|
process.env[ENV] = 'not-a-number';
|
||||||
|
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||||
|
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
|
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
|
||||||
it('allows up to the cap within a window, then 429s (returns false)', async () => {
|
it('allows up to the cap within a window, then 429s (returns false)', async () => {
|
||||||
const limiter = makeLimiter(3, 60_000, () => 1_000);
|
const limiter = makeLimiter(3, 60_000, () => 1_000);
|
||||||
@@ -477,11 +517,12 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
|
|||||||
expect(await limiter.tryConsume('ws-1')).toBe(true);
|
expect(await limiter.tryConsume('ws-1')).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('FAILS OPEN (returns true) when the Redis eval rejects', async () => {
|
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
|
||||||
// The per-workspace cap is a COST backstop, not an access boundary: the
|
// The per-workspace cap is the COST backstop for an OPTIONAL anonymous
|
||||||
// funnel access gates and the per-IP throttle still apply. A transient
|
// assistant. If Redis is unavailable we cannot prove the workspace is under
|
||||||
// Redis failure must therefore ADMIT the call (true) rather than 500/429,
|
// its cap, so we DENY (controller 429s) rather than admit an unmetered,
|
||||||
// so a Redis blip cannot take the public-share assistant fully offline.
|
// billable call — a brief Redis blip disabling the assistant is safer than
|
||||||
|
// an unbounded provider bill.
|
||||||
const failingRedis = {
|
const failingRedis = {
|
||||||
eval: () => Promise.reject(new Error('redis down')),
|
eval: () => Promise.reject(new Error('redis down')),
|
||||||
} as unknown as import('ioredis').Redis;
|
} as unknown as import('ioredis').Redis;
|
||||||
@@ -495,7 +536,7 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
|
|||||||
const errSpy = jest
|
const errSpy = jest
|
||||||
.spyOn(Logger.prototype, 'error')
|
.spyOn(Logger.prototype, 'error')
|
||||||
.mockImplementation(() => undefined);
|
.mockImplementation(() => undefined);
|
||||||
expect(await limiter.tryConsume('ws-1')).toBe(true);
|
expect(await limiter.tryConsume('ws-1')).toBe(false);
|
||||||
expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed
|
expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed
|
||||||
errSpy.mockRestore();
|
errSpy.mockRestore();
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -99,9 +99,11 @@ export class PublicShareWorkspaceLimiter {
|
|||||||
/**
|
/**
|
||||||
* Account one call for `key`. Returns true if it is within the cap (allowed),
|
* Account one call for `key`. Returns true if it is within the cap (allowed),
|
||||||
* false if the cap over the trailing window is exceeded (caller must 429).
|
* false if the cap over the trailing window is exceeded (caller must 429).
|
||||||
* On a Redis failure we FAIL OPEN (return true): the cap is a cost backstop,
|
* On a Redis failure we FAIL CLOSED (return false): this cap is the COST
|
||||||
* not an auth boundary, and the access funnel + per-IP throttle still apply —
|
* backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
|
||||||
* we never want a transient Redis blip to take the assistant fully offline.
|
* cannot prove the workspace is under its cap and therefore DENY rather than
|
||||||
|
* admit an unmetered, billable anonymous call. A transient Redis blip briefly
|
||||||
|
* disabling the assistant is preferable to an unbounded provider bill.
|
||||||
*/
|
*/
|
||||||
async tryConsume(key: string): Promise<boolean> {
|
async tryConsume(key: string): Promise<boolean> {
|
||||||
const t = this.now();
|
const t = this.now();
|
||||||
@@ -120,15 +122,16 @@ export class PublicShareWorkspaceLimiter {
|
|||||||
);
|
);
|
||||||
return admitted === 1;
|
return admitted === 1;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// Fail OPEN: this per-workspace cap is a COST backstop, not an access
|
// FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
|
||||||
// control — the funnel access gates and the per-IP throttle still apply.
|
// under its cap, so we DENY (the controller 429s) rather than admit an
|
||||||
// A transient Redis failure must not take the public-share assistant
|
// unmetered, billable anonymous call. The assistant is optional, so a
|
||||||
// fully offline, so we admit the call rather than 500 the request.
|
// transient Redis blip briefly disabling it is the safer failure mode than
|
||||||
|
// an unbounded provider bill.
|
||||||
this.logger.error(
|
this.logger.error(
|
||||||
`share-ai workspace limiter Redis failure for key "${key}"; failing open`,
|
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
|
||||||
err as Error,
|
err as Error,
|
||||||
);
|
);
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user