feat(share-ai): cap per-request output tokens and fail closed on Redis loss

Harden the anonymous public-share AI assistant against token-cost abuse before exposing it to the internet: - Add an env-tunable per-request output ceiling (maxOutputTokens) to the public-share streamText call so one anonymous request cannot run up the provider bill even if the per-IP throttle is evaded. New resolveShareAiMaxOutputTokens() / SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT (env SHARE_AI_MAX_OUTPUT_TOKENS, default 512), mirroring resolveShareAiWorkspaceMax(). - Flip the per-workspace cost limiter to FAIL CLOSED on Redis failure (was fail-open): if Redis is unavailable we cannot prove the workspace is under its cap, so deny rather than admit an unmetered, billable call. - Update the limiter spec (fail-open -> fail-closed) and add resolver tests; document both knobs in .env.example. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-21 02:13:04 +03:00
parent 987a4fd32e
commit 262a0707d9
4 changed files with 85 additions and 17 deletions
--- a/.env.example
+++ b/.env.example
@@ -112,7 +112,12 @@ MCP_DOCMOST_PASSWORD=
 #
 # Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent,
 # keyed by the server-resolved workspace id) bounds the owner's bill even if the
-# per-IP limit is fully evaded. It is a COST backstop, not an access control,
+# per-IP limit is fully evaded. It is a COST backstop, not an access control, and
-# and FAILS OPEN if Redis is unavailable. Override the hourly cap below
+# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going
 # offline is safer than an unbounded bill). Override the hourly cap below
 # (default: 300 calls per workspace per rolling hour).
 # SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
 #
 # Per-request output-token ceiling for the anonymous assistant (default: 512).
 # Worst-case output per accepted call = agent steps (5) × this value.
 # SHARE_AI_MAX_OUTPUT_TOKENS=512
--- a/apps/server/src/core/ai-chat/public-share-chat.service.ts
+++ b/apps/server/src/core/ai-chat/public-share-chat.service.ts
@@ -63,6 +63,22 @@ export interface PublicShareChatStreamArgs {
 export const MAX_SHARE_MESSAGES = 30;
 export const MAX_SHARE_MESSAGE_CHARS = 8000;
 /**
 * Per-request output-token ceiling for the anonymous assistant. `streamText`
 * runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
 * request is bounded by (steps × this). The per-workspace cap bounds the COUNT
 * of calls; this bounds the SIZE of each, so a single anonymous call cannot run
 * up the provider bill even if the per-IP throttle is evaded. Env-overridable
 * seam; a non-positive or unparseable value falls back to the default.
 */
 export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
 export function resolveShareAiMaxOutputTokens(): number {
  const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
  return Number.isFinite(raw) && raw > 0
    ? Math.floor(raw)
    : SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
 }
 /**
 * Keep ONLY genuine conversation turns from the client-held transcript. The
 * payload is fully attacker-controlled; a forged `system` turn could try to
@@ -204,6 +220,9 @@ export class PublicShareChatService {
        tools,
        // Bound the agent loop for anonymous callers.
        stopWhen: stepCountIs(5),
        // Cap per-request output so one anonymous call cannot run up the provider
        // bill even if the per-IP throttle is evaded; worst case = steps × this.
        maxOutputTokens: resolveShareAiMaxOutputTokens(),
        abortSignal: signal,
        onError: ({ error }) => {
          const e = error as {
--- a/apps/server/src/core/ai-chat/public-share-chat.spec.ts
+++ b/apps/server/src/core/ai-chat/public-share-chat.spec.ts
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
 import {
  PublicShareChatService,
  filterShareTranscript,
  resolveShareAiMaxOutputTokens,
  SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
 } from './public-share-chat.service';
 import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
 import {
@@ -396,6 +398,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
  });
 });
 describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
  const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
  const original = process.env[ENV];
  afterEach(() => {
    if (original === undefined) delete process.env[ENV];
    else process.env[ENV] = original;
  });
  it('falls back to the default when unset', () => {
    delete process.env[ENV];
    expect(resolveShareAiMaxOutputTokens()).toBe(
      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
    );
    expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
  });
  it('uses (and floors) a valid positive value from the env', () => {
    process.env[ENV] = '1024.9';
    expect(resolveShareAiMaxOutputTokens()).toBe(1024);
  });
  it('falls back to the default for zero, a negative, or a non-numeric value', () => {
    process.env[ENV] = '0';
    expect(resolveShareAiMaxOutputTokens()).toBe(
      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
    );
    process.env[ENV] = '-5';
    expect(resolveShareAiMaxOutputTokens()).toBe(
      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
    );
    process.env[ENV] = 'not-a-number';
    expect(resolveShareAiMaxOutputTokens()).toBe(
      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
    );
  });
 });
 describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
  it('allows up to the cap within a window, then 429s (returns false)', async () => {
    const limiter = makeLimiter(3, 60_000, () => 1_000);
@@ -477,11 +517,12 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
    expect(await limiter.tryConsume('ws-1')).toBe(true);
  });
-  it('FAILS OPEN (returns true) when the Redis eval rejects', async () => {
+  it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
-    // The per-workspace cap is a COST backstop, not an access boundary: the
+    // The per-workspace cap is the COST backstop for an OPTIONAL anonymous
-    // funnel access gates and the per-IP throttle still apply. A transient
+    // assistant. If Redis is unavailable we cannot prove the workspace is under
-    // Redis failure must therefore ADMIT the call (true) rather than 500/429,
+    // its cap, so we DENY (controller 429s) rather than admit an unmetered,
-    // so a Redis blip cannot take the public-share assistant fully offline.
+    // billable call — a brief Redis blip disabling the assistant is safer than
    // an unbounded provider bill.
    const failingRedis = {
      eval: () => Promise.reject(new Error('redis down')),
    } as unknown as import('ioredis').Redis;
@@ -495,7 +536,7 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
    const errSpy = jest
      .spyOn(Logger.prototype, 'error')
      .mockImplementation(() => undefined);
-    expect(await limiter.tryConsume('ws-1')).toBe(true);
+    expect(await limiter.tryConsume('ws-1')).toBe(false);
    expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed
    errSpy.mockRestore();
  });
--- a/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
+++ b/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
@@ -99,9 +99,11 @@ export class PublicShareWorkspaceLimiter {
  /**
   * Account one call for `key`. Returns true if it is within the cap (allowed),
   * false if the cap over the trailing window is exceeded (caller must 429).
-   * On a Redis failure we FAIL OPEN (return true): the cap is a cost backstop,
+   * On a Redis failure we FAIL CLOSED (return false): this cap is the COST
-   * not an auth boundary, and the access funnel + per-IP throttle still apply —
+   * backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
-   * we never want a transient Redis blip to take the assistant fully offline.
+   * cannot prove the workspace is under its cap and therefore DENY rather than
   * admit an unmetered, billable anonymous call. A transient Redis blip briefly
   * disabling the assistant is preferable to an unbounded provider bill.
   */
  async tryConsume(key: string): Promise<boolean> {
    const t = this.now();
@@ -120,15 +122,16 @@ export class PublicShareWorkspaceLimiter {
      );
      return admitted === 1;
    } catch (err) {
-      // Fail OPEN: this per-workspace cap is a COST backstop, not an access
+      // FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
-      // control — the funnel access gates and the per-IP throttle still apply.
+      // under its cap, so we DENY (the controller 429s) rather than admit an
-      // A transient Redis failure must not take the public-share assistant
+      // unmetered, billable anonymous call. The assistant is optional, so a
-      // fully offline, so we admit the call rather than 500 the request.
+      // transient Redis blip briefly disabling it is the safer failure mode than
      // an unbounded provider bill.
      this.logger.error(
-        `share-ai workspace limiter Redis failure for key "${key}"; failing open`,
+        `share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
        err as Error,
      );
-      return true;
+      return false;
    }
  }
 }