Merge branch 'feat/share-ai-cost-guards' into develop

This commit is contained in:
claude_code
2026-06-21 02:21:04 +03:00
4 changed files with 85 additions and 17 deletions

View File

@@ -127,7 +127,12 @@ MCP_DOCMOST_PASSWORD=
#
# Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent,
# keyed by the server-resolved workspace id) bounds the owner's bill even if the
# per-IP limit is fully evaded. It is a COST backstop, not an access control,
# and FAILS OPEN if Redis is unavailable. Override the hourly cap below
# per-IP limit is fully evaded. It is a COST backstop, not an access control, and
# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going
# offline is safer than an unbounded bill). Override the hourly cap below
# (default: 300 calls per workspace per rolling hour).
# SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
#
# Per-request output-token ceiling for the anonymous assistant (default: 512).
# Worst-case output per accepted call = agent steps (5) × this value.
# SHARE_AI_MAX_OUTPUT_TOKENS=512

View File

@@ -63,6 +63,22 @@ export interface PublicShareChatStreamArgs {
export const MAX_SHARE_MESSAGES = 30;
export const MAX_SHARE_MESSAGE_CHARS = 8000;
/**
* Per-request output-token ceiling for the anonymous assistant. `streamText`
* runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
* request is bounded by (steps × this). The per-workspace cap bounds the COUNT
* of calls; this bounds the SIZE of each, so a single anonymous call cannot run
* up the provider bill even if the per-IP throttle is evaded. Env-overridable
* seam; a non-positive or unparseable value falls back to the default.
*/
export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
export function resolveShareAiMaxOutputTokens(): number {
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
return Number.isFinite(raw) && raw > 0
? Math.floor(raw)
: SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
}
/**
* Keep ONLY genuine conversation turns from the client-held transcript. The
* payload is fully attacker-controlled; a forged `system` turn could try to
@@ -204,6 +220,9 @@ export class PublicShareChatService {
tools,
// Bound the agent loop for anonymous callers.
stopWhen: stepCountIs(5),
// Cap per-request output so one anonymous call cannot run up the provider
// bill even if the per-IP throttle is evaded; worst case = steps × this.
maxOutputTokens: resolveShareAiMaxOutputTokens(),
abortSignal: signal,
onError: ({ error }) => {
const e = error as {

View File

@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
import {
PublicShareChatService,
filterShareTranscript,
resolveShareAiMaxOutputTokens,
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
} from './public-share-chat.service';
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
import {
@@ -396,6 +398,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
});
});
describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
const original = process.env[ENV];
afterEach(() => {
if (original === undefined) delete process.env[ENV];
else process.env[ENV] = original;
});
it('falls back to the default when unset', () => {
delete process.env[ENV];
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
});
it('uses (and floors) a valid positive value from the env', () => {
process.env[ENV] = '1024.9';
expect(resolveShareAiMaxOutputTokens()).toBe(1024);
});
it('falls back to the default for zero, a negative, or a non-numeric value', () => {
process.env[ENV] = '0';
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
process.env[ENV] = '-5';
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
process.env[ENV] = 'not-a-number';
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
});
});
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
it('allows up to the cap within a window, then 429s (returns false)', async () => {
const limiter = makeLimiter(3, 60_000, () => 1_000);
@@ -477,11 +517,12 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
expect(await limiter.tryConsume('ws-1')).toBe(true);
});
it('FAILS OPEN (returns true) when the Redis eval rejects', async () => {
// The per-workspace cap is a COST backstop, not an access boundary: the
// funnel access gates and the per-IP throttle still apply. A transient
// Redis failure must therefore ADMIT the call (true) rather than 500/429,
// so a Redis blip cannot take the public-share assistant fully offline.
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
// The per-workspace cap is the COST backstop for an OPTIONAL anonymous
// assistant. If Redis is unavailable we cannot prove the workspace is under
// its cap, so we DENY (controller 429s) rather than admit an unmetered,
// billable call — a brief Redis blip disabling the assistant is safer than
// an unbounded provider bill.
const failingRedis = {
eval: () => Promise.reject(new Error('redis down')),
} as unknown as import('ioredis').Redis;
@@ -495,7 +536,7 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
const errSpy = jest
.spyOn(Logger.prototype, 'error')
.mockImplementation(() => undefined);
expect(await limiter.tryConsume('ws-1')).toBe(true);
expect(await limiter.tryConsume('ws-1')).toBe(false);
expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed
errSpy.mockRestore();
});

View File

@@ -99,9 +99,11 @@ export class PublicShareWorkspaceLimiter {
/**
* Account one call for `key`. Returns true if it is within the cap (allowed),
* false if the cap over the trailing window is exceeded (caller must 429).
* On a Redis failure we FAIL OPEN (return true): the cap is a cost backstop,
* not an auth boundary, and the access funnel + per-IP throttle still apply —
* we never want a transient Redis blip to take the assistant fully offline.
* On a Redis failure we FAIL CLOSED (return false): this cap is the COST
* backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
* cannot prove the workspace is under its cap and therefore DENY rather than
* admit an unmetered, billable anonymous call. A transient Redis blip briefly
* disabling the assistant is preferable to an unbounded provider bill.
*/
async tryConsume(key: string): Promise<boolean> {
const t = this.now();
@@ -120,15 +122,16 @@ export class PublicShareWorkspaceLimiter {
);
return admitted === 1;
} catch (err) {
// Fail OPEN: this per-workspace cap is a COST backstop, not an access
// control — the funnel access gates and the per-IP throttle still apply.
// A transient Redis failure must not take the public-share assistant
// fully offline, so we admit the call rather than 500 the request.
// FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
// under its cap, so we DENY (the controller 429s) rather than admit an
// unmetered, billable anonymous call. The assistant is optional, so a
// transient Redis blip briefly disabling it is the safer failure mode than
// an unbounded provider bill.
this.logger.error(
`share-ai workspace limiter Redis failure for key "${key}"; failing open`,
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
err as Error,
);
return true;
return false;
}
}
}