Merge branch 'feat/share-ai-cost-guards' into develop

2026-06-21 02:21:04 +03:00
parent 70c26f356a 262a0707d9
commit 64818cf9df
4 changed files with 85 additions and 17 deletions
--- a/.env.example
+++ b/.env.example
@@ -127,7 +127,12 @@ MCP_DOCMOST_PASSWORD=
 #
 # Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent,
 # keyed by the server-resolved workspace id) bounds the owner's bill even if the
-# per-IP limit is fully evaded. It is a COST backstop, not an access control,
-# and FAILS OPEN if Redis is unavailable. Override the hourly cap below
+# per-IP limit is fully evaded. It is a COST backstop, not an access control, and
+# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going
+# offline is safer than an unbounded bill). Override the hourly cap below
 # (default: 300 calls per workspace per rolling hour).
 # SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
+#
+# Per-request output-token ceiling for the anonymous assistant (default: 512).
+# Worst-case output per accepted call = agent steps (5) × this value.
+# SHARE_AI_MAX_OUTPUT_TOKENS=512
--- a/apps/server/src/core/ai-chat/public-share-chat.service.ts
+++ b/apps/server/src/core/ai-chat/public-share-chat.service.ts
@@ -63,6 +63,22 @@ export interface PublicShareChatStreamArgs {
 export const MAX_SHARE_MESSAGES = 30;
 export const MAX_SHARE_MESSAGE_CHARS = 8000;

+/**
+ * Per-request output-token ceiling for the anonymous assistant. `streamText`
+ * runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
+ * request is bounded by (steps × this). The per-workspace cap bounds the COUNT
+ * of calls; this bounds the SIZE of each, so a single anonymous call cannot run
+ * up the provider bill even if the per-IP throttle is evaded. Env-overridable
+ * seam; a non-positive or unparseable value falls back to the default.
+ */
+export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
+export function resolveShareAiMaxOutputTokens(): number {
+  const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
+  return Number.isFinite(raw) && raw > 0
+    ? Math.floor(raw)
+    : SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
+}
+
 /**
 * Keep ONLY genuine conversation turns from the client-held transcript. The
 * payload is fully attacker-controlled; a forged `system` turn could try to
@@ -204,6 +220,9 @@ export class PublicShareChatService {
        tools,
        // Bound the agent loop for anonymous callers.
        stopWhen: stepCountIs(5),
+        // Cap per-request output so one anonymous call cannot run up the provider
+        // bill even if the per-IP throttle is evaded; worst case = steps × this.
+        maxOutputTokens: resolveShareAiMaxOutputTokens(),
        abortSignal: signal,
        onError: ({ error }) => {
          const e = error as {
--- a/apps/server/src/core/ai-chat/public-share-chat.spec.ts
+++ b/apps/server/src/core/ai-chat/public-share-chat.spec.ts
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
 import {
  PublicShareChatService,
  filterShareTranscript,
+  resolveShareAiMaxOutputTokens,
+  SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
 } from './public-share-chat.service';
 import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
 import {
@@ -396,6 +398,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
  });
 });

+describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
+  const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
+  const original = process.env[ENV];
+
+  afterEach(() => {
+    if (original === undefined) delete process.env[ENV];
+    else process.env[ENV] = original;
+  });
+
+  it('falls back to the default when unset', () => {
+    delete process.env[ENV];
+    expect(resolveShareAiMaxOutputTokens()).toBe(
+      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+    );
+    expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
+  });
+
+  it('uses (and floors) a valid positive value from the env', () => {
+    process.env[ENV] = '1024.9';
+    expect(resolveShareAiMaxOutputTokens()).toBe(1024);
+  });
+
+  it('falls back to the default for zero, a negative, or a non-numeric value', () => {
+    process.env[ENV] = '0';
+    expect(resolveShareAiMaxOutputTokens()).toBe(
+      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+    );
+    process.env[ENV] = '-5';
+    expect(resolveShareAiMaxOutputTokens()).toBe(
+      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+    );
+    process.env[ENV] = 'not-a-number';
+    expect(resolveShareAiMaxOutputTokens()).toBe(
+      SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+    );
+  });
+});
+
 describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
  it('allows up to the cap within a window, then 429s (returns false)', async () => {
    const limiter = makeLimiter(3, 60_000, () => 1_000);
@@ -477,11 +517,12 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
    expect(await limiter.tryConsume('ws-1')).toBe(true);
  });

-  it('FAILS OPEN (returns true) when the Redis eval rejects', async () => {
-    // The per-workspace cap is a COST backstop, not an access boundary: the
-    // funnel access gates and the per-IP throttle still apply. A transient
-    // Redis failure must therefore ADMIT the call (true) rather than 500/429,
-    // so a Redis blip cannot take the public-share assistant fully offline.
+  it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
+    // The per-workspace cap is the COST backstop for an OPTIONAL anonymous
+    // assistant. If Redis is unavailable we cannot prove the workspace is under
+    // its cap, so we DENY (controller 429s) rather than admit an unmetered,
+    // billable call — a brief Redis blip disabling the assistant is safer than
+    // an unbounded provider bill.
    const failingRedis = {
      eval: () => Promise.reject(new Error('redis down')),
    } as unknown as import('ioredis').Redis;
@@ -495,7 +536,7 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
    const errSpy = jest
      .spyOn(Logger.prototype, 'error')
      .mockImplementation(() => undefined);
-    expect(await limiter.tryConsume('ws-1')).toBe(true);
+    expect(await limiter.tryConsume('ws-1')).toBe(false);
    expect(errSpy).toHaveBeenCalled(); // the failure MUST be logged, not swallowed
    errSpy.mockRestore();
  });
--- a/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
+++ b/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
@@ -99,9 +99,11 @@ export class PublicShareWorkspaceLimiter {
  /**
   * Account one call for `key`. Returns true if it is within the cap (allowed),
   * false if the cap over the trailing window is exceeded (caller must 429).
-   * On a Redis failure we FAIL OPEN (return true): the cap is a cost backstop,
-   * not an auth boundary, and the access funnel + per-IP throttle still apply —
-   * we never want a transient Redis blip to take the assistant fully offline.
+   * On a Redis failure we FAIL CLOSED (return false): this cap is the COST
+   * backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
+   * cannot prove the workspace is under its cap and therefore DENY rather than
+   * admit an unmetered, billable anonymous call. A transient Redis blip briefly
+   * disabling the assistant is preferable to an unbounded provider bill.
   */
  async tryConsume(key: string): Promise<boolean> {
    const t = this.now();
@@ -120,15 +122,16 @@ export class PublicShareWorkspaceLimiter {
      );
      return admitted === 1;
    } catch (err) {
-      // Fail OPEN: this per-workspace cap is a COST backstop, not an access
-      // control — the funnel access gates and the per-IP throttle still apply.
-      // A transient Redis failure must not take the public-share assistant
-      // fully offline, so we admit the call rather than 500 the request.
+      // FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
+      // under its cap, so we DENY (the controller 429s) rather than admit an
+      // unmetered, billable anonymous call. The assistant is optional, so a
+      // transient Redis blip briefly disabling it is the safer failure mode than
+      // an unbounded provider bill.
      this.logger.error(
-        `share-ai workspace limiter Redis failure for key "${key}"; failing open`,
+        `share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
        err as Error,
      );
-      return true;
+      return false;
    }
  }
 }