feat(ai-chat): surface the real cause in the error banner

The AI chat error banner always showed a generic "Something went wrong" with no reason. The server already forwards the provider cause into the stream (e.g. "Cannot connect to API: read ECONNRESET"), but the client hid it behind a static heading. - describeChatError now returns { title, detail }: a short heading naming the cause category plus a one-line explanation. - Add classifyProviderError: maps connection reset, timeout, rate limit, context-window overflow, quota and auth failures to clear categories; the 403/503 gating responses are preserved; unknown errors fall back to the verbatim provider text. - Match HTTP status codes only as the leading token and textual signatures only against the message head (before "| response body:"), so a number or phrase in the response-body snippet never mislabels the cause. - Use the new {title, detail} in all three banners: chat-thread, share-ai-widget and the persisted-error banner in message-item. - Cover the classifier with 20 unit tests (categories + regressions).
2026-06-21 18:54:43 +03:00
parent 0cfc3c8f89
commit 0bbf94c154
5 changed files with 310 additions and 48 deletions
--- a/apps/client/src/features/ai-chat/components/chat-thread.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx
@@ -175,6 +175,11 @@ export default function ChatThread({

  const isStreaming = status === "submitted" || status === "streaming";

+  // Classify the turn error into a heading + detail so the banner names the cause
+  // (connection reset, timeout, rate limit, context overflow, quota, ...) instead
+  // of a generic "Something went wrong".
+  const errorView = error ? describeChatError(error.message ?? "", t) : null;
+
  // Clicking a role card both binds the role to THIS new chat and immediately
  // starts the conversation. roleIdRef is set synchronously here because the
  // parent's selectedRoleId state update would only reach roleIdRef on the next
@@ -198,15 +203,15 @@ export default function ChatThread({
        assistantName={assistantName}
      />

-      {error && (
+      {errorView && (
        <Alert
          variant="light"
          color="red"
          icon={<IconAlertTriangle size={16} />}
          mb="xs"
-          title={t("Something went wrong")}
+          title={errorView.title}
        >
-          {describeChatError(error.message ?? "", t)}
+          {errorView.detail}
        </Alert>
      )}

--- a/apps/client/src/features/ai-chat/components/message-item.tsx
+++ b/apps/client/src/features/ai-chat/components/message-item.tsx
@@ -114,14 +114,18 @@ export default function MessageItem({
      {(() => {
        const errorText = (message.metadata as { error?: string } | undefined)?.error;
        if (!errorText) return null;
+        // Same classified-error banner as the live chat: a heading naming the
+        // cause plus a one-line detail.
+        const errorView = describeChatError(errorText, t);
        return (
          <Alert
            variant="light"
            color="red"
            icon={<IconAlertTriangle size={16} />}
            mt={4}
+            title={errorView.title}
          >
-            {describeChatError(errorText, t)}
+            {errorView.detail}
          </Alert>
        );
      })()}
--- a/apps/client/src/features/ai-chat/utils/error-message.test.ts
+++ b/apps/client/src/features/ai-chat/utils/error-message.test.ts
@@ -6,48 +6,163 @@ import { describeChatError } from "./error-message";
 const t = (key: string) => key;

 describe("describeChatError", () => {
-  it('surfaces a provider "402: ..." stream error verbatim', () => {
-    expect(describeChatError("402: Insufficient credits", t)).toBe(
-      "402: Insufficient credits",
-    );
-  });
-
-  it('does NOT misclassify a body that merely contains "403" (no "statusCode":403)', () => {
-    // A provider message mentioning the number 403 must be surfaced verbatim,
-    // never folded into the "AI chat is disabled" gating message.
-    const msg = "429: rate limited after 403 attempts";
-    expect(describeChatError(msg, t)).toBe(msg);
-  });
-
-  it('maps a {"statusCode":403} body to the disabled message', () => {
+  it('maps a {"statusCode":403} body to the disabled heading', () => {
    const body = '{"statusCode":403,"message":"Forbidden"}';
-    expect(describeChatError(body, t)).toBe(
-      "AI chat is disabled for this workspace.",
-    );
+    expect(describeChatError(body, t)).toEqual({
+      title: "AI chat is disabled",
+      detail: "AI chat is disabled for this workspace.",
+    });
  });

-  it('maps a {"statusCode":503} body to the not-configured message', () => {
+  it('maps a {"statusCode":503} body to the not-configured heading', () => {
    const body = '{"statusCode":503,"message":"Service Unavailable"}';
-    expect(describeChatError(body, t)).toBe(
-      "The AI provider is not configured. Ask an administrator to set it up.",
+    expect(describeChatError(body, t)).toEqual({
+      title: "AI provider not configured",
+      detail:
+        "The AI provider is not configured. Ask an administrator to set it up.",
+    });
+  });
+
+  it("classifies a dropped connection (ECONNRESET) as a lost-connection error", () => {
+    expect(
+      describeChatError("Cannot connect to API: read ECONNRESET", t).title,
+    ).toBe("Lost connection to the AI provider");
+  });
+
+  it('classifies "fetch failed" as a lost-connection error', () => {
+    expect(describeChatError("fetch failed", t).title).toBe(
+      "Lost connection to the AI provider",
    );
  });

-  it('falls back to the generic message for "An error occurred."', () => {
-    expect(describeChatError("An error occurred.", t)).toBe(
-      "The AI agent could not respond. Please try again.",
+  it("classifies ETIMEDOUT as a timeout", () => {
+    expect(describeChatError("ETIMEDOUT", t).title).toBe(
+      "The AI provider timed out",
    );
  });

-  it('falls back to the generic message for "Internal server error"', () => {
-    expect(describeChatError("Internal server error", t)).toBe(
-      "The AI agent could not respond. Please try again.",
+  it('classifies "504: Gateway Timeout" as a timeout', () => {
+    expect(describeChatError("504: Gateway Timeout", t).title).toBe(
+      "The AI provider timed out",
    );
  });

-  it("falls back to the generic message for empty input", () => {
-    expect(describeChatError("", t)).toBe(
-      "The AI agent could not respond. Please try again.",
+  it('classifies "429: Too Many Requests" as rate limited', () => {
+    expect(describeChatError("429: Too Many Requests", t).title).toBe(
+      "Rate limited by the AI provider",
+    );
+  });
+
+  it('does NOT misclassify a body that merely contains "403" as disabled', () => {
+    // Regression intent: a provider message mentioning the number 403 must never
+    // be folded into the "AI chat is disabled" gating heading. Here the 429
+    // signature wins (checked before any bare-403 logic exists), so it maps to
+    // the rate-limit category instead.
+    const view = describeChatError("429: rate limited after 403 attempts", t);
+    expect(view.title).toBe("Rate limited by the AI provider");
+    expect(view.title).not.toBe("AI chat is disabled");
+  });
+
+  it("classifies a context-window overflow as too-large", () => {
+    expect(
+      describeChatError(
+        "This model's maximum context length is 128000 tokens",
+        t,
+      ).title,
+    ).toBe("The conversation is too large");
+  });
+
+  it('classifies "402: Insufficient credits" as quota exceeded', () => {
+    expect(describeChatError("402: Insufficient credits", t).title).toBe(
+      "AI provider quota exceeded",
+    );
+  });
+
+  it('classifies "401: Unauthorized" as an auth failure', () => {
+    expect(describeChatError("401: Unauthorized", t).title).toBe(
+      "AI provider authentication failed",
+    );
+  });
+
+  it("falls back to the generic heading + detail for empty input", () => {
+    expect(describeChatError("", t)).toEqual({
+      title: "Something went wrong",
+      detail: "The AI agent could not respond. Please try again.",
+    });
+  });
+
+  it('falls back to the generic heading + detail for "An error occurred."', () => {
+    expect(describeChatError("An error occurred.", t)).toEqual({
+      title: "Something went wrong",
+      detail: "The AI agent could not respond. Please try again.",
+    });
+  });
+
+  it('falls back to the generic heading + detail for "Internal server error"', () => {
+    expect(describeChatError("Internal server error", t)).toEqual({
+      title: "Something went wrong",
+      detail: "The AI agent could not respond. Please try again.",
+    });
+  });
+
+  it("surfaces an unknown-but-informative provider detail verbatim under the generic heading", () => {
+    expect(describeChatError("418: I'm a teapot", t)).toEqual({
+      title: "Something went wrong",
+      detail: "418: I'm a teapot",
+    });
+  });
+
+  it("does NOT treat a number inside the response body as a leading status code (no auth)", () => {
+    // The real status (500) leads the string; the "401" lives in the snippet and
+    // must not trigger the auth category. The verbatim provider text is surfaced.
+    const body =
+      "500: Server error | response body: model gpt-4o-401-preview not found";
+    expect(describeChatError(body, t)).toEqual({
+      title: "Something went wrong",
+      detail: body,
+    });
+  });
+
+  it("does NOT treat a passing mention of billing as a quota error", () => {
+    // "billing" is no longer a quota signature; the verbatim text is surfaced.
+    const body = "502: Bad Gateway | response body: see our billing page";
+    expect(describeChatError(body, t)).toEqual({
+      title: "Something went wrong",
+      detail: body,
+    });
+  });
+
+  it('still rate-limits "429: rate limited after 403 attempts" and never disables', () => {
+    const view = describeChatError("429: rate limited after 403 attempts", t);
+    expect(view.title).toBe("Rate limited by the AI provider");
+    expect(view.title).not.toBe("AI chat is disabled");
+  });
+
+  it('does NOT treat "rate limit" inside the response body as a rate-limit error', () => {
+    // The textual rate-limit phrase lives only in the response-body snippet, and
+    // the leading 500 is not a classified numeric code, so it must not leak into
+    // the rate-limit category. (The detail itself falls back to the generic line
+    // here because the leading message contains "Internal Server Error", which
+    // providerDetail suppresses — the title is what this case pins.)
+    const body =
+      "500: Internal Server Error | response body: rate limit info: see our docs";
+    expect(describeChatError(body, t).title).toBe("Something went wrong");
+    expect(describeChatError(body, t).title).not.toBe(
+      "Rate limited by the AI provider",
+    );
+  });
+
+  it('does NOT treat ETIMEDOUT inside the response body as a timeout', () => {
+    // The 503 leads the string but is not a classified numeric code, and the
+    // ETIMEDOUT signature appears only in the body, so it must not leak into the
+    // timeout category; the verbatim text is surfaced under the generic heading.
+    const body = "503: x | response body: ETIMEDOUT appears in this log line";
+    expect(describeChatError(body, t)).toEqual({
+      title: "Something went wrong",
+      detail: body,
+    });
+    expect(describeChatError(body, t).title).not.toBe(
+      "The AI provider timed out",
    );
  });
 });
--- a/apps/client/src/features/ai-chat/utils/error-message.ts
+++ b/apps/client/src/features/ai-chat/utils/error-message.ts
@@ -1,24 +1,158 @@
 /**
- * Turn an AI chat error message into a friendly inline string. Used for BOTH the
- * live `useChat().error` (its `.message`) and a persisted assistant error stored
- * in `metadata.error`. Our own gating responses arrive as a raw NestJS JSON error
- * body carrying a numeric "statusCode" field (matched precisely, not by bare
- * substring, so a provider message that merely contains "403"/"503"/"disabled" is
- * never misclassified). Everything else — provider stream failures forwarded as
- * "<status>: <message>" (402 credits, 429 rate limit, ...) — is surfaced verbatim.
+ * A classified AI chat error: a short bold heading naming the cause category and
+ * a one-line human-readable detail / next step. Both strings are already passed
+ * through `t`, so callers render them directly.
+ */
+export interface ChatErrorView {
+  title: string;
+  detail: string;
+}
+
+/**
+ * Turn an AI chat error message into a friendly heading + detail. Used for BOTH
+ * the live `useChat().error` (its `.message`) and a persisted assistant error in
+ * `metadata.error`. Our own gating responses arrive as a raw NestJS JSON error
+ * body carrying a numeric "statusCode" (matched precisely, not by bare substring,
+ * so a provider message that merely contains "403"/"503" is never misclassified).
+ * Known provider/network failures (connection reset, timeout, rate limit, context
+ * overflow, quota, auth) are mapped to a clear category; anything else falls back
+ * to the raw provider detail (or a generic line) under the original heading.
 */
 export function describeChatError(
  message: string,
  t: (key: string) => string,
-): string {
+): ChatErrorView {
  const msg = message ?? "";
+
  if (/"statusCode"\s*:\s*403\b/.test(msg)) {
-    return t("AI chat is disabled for this workspace.");
+    return {
+      title: t("AI chat is disabled"),
+      detail: t("AI chat is disabled for this workspace."),
+    };
  }
  if (/"statusCode"\s*:\s*503\b/.test(msg)) {
-    return t("The AI provider is not configured. Ask an administrator to set it up.");
+    return {
+      title: t("AI provider not configured"),
+      detail: t(
+        "The AI provider is not configured. Ask an administrator to set it up.",
+      ),
+    };
  }
-  return providerDetail(msg) ?? t("The AI agent could not respond. Please try again.");
+
+  const category = classifyProviderError(msg);
+  if (category) {
+    return { title: t(category.title), detail: t(category.detail) };
+  }
+
+  // Unknown error: surface the raw provider detail when it is informative,
+  // otherwise a generic line. The heading stays the original generic one.
+  return {
+    title: t("Something went wrong"),
+    detail:
+      providerDetail(msg) ??
+      t("The AI agent could not respond. Please try again."),
+  };
+}
+
+interface ErrorCategory {
+  /** English key for the bold heading. */
+  title: string;
+  /** English key for the one-line explanation. */
+  detail: string;
+}
+
+/**
+ * Map a provider/network error string to a friendly category. Order matters: the
+ * most specific signatures are tested first. Returns null when nothing matches,
+ * so the caller can fall back to the raw provider text. The English keys returned
+ * here are passed through `t` by the caller.
+ *
+ * The server formats provider errors as "<statusCode>: <message> | response body:
+ * <snippet>" (see server-side describeProviderError), so the HTTP status is always
+ * the LEADING token. We match a numeric code only when it leads the string, so a
+ * number inside the response-body snippet never triggers a category; textual
+ * signatures are matched only against the leading message (before the response
+ * body), so a phrase inside the snippet never triggers a category either.
+ */
+function classifyProviderError(msg: string): ErrorCategory | null {
+  const code = /^\s*(\d{3})\b/.exec(msg)?.[1] ?? "";
+  // The server appends "| response body: <snippet>" to provider errors; match
+  // textual signatures only against the leading provider message so a phrase
+  // inside the response-body snippet never triggers a wrong category. The numeric
+  // status code is read from the start of the full string above.
+  const head = msg.split(/\|\s*response body:/i)[0];
+
+  // Connection dropped / provider unreachable. ECONNRESET is the production case:
+  // the LLM socket was reset mid-stream. "terminated" is scoped to a connection/
+  // stream context so it does not match benign "... was terminated" messages.
+  if (
+    /ECONNRESET|ECONNREFUSED|ENOTFOUND|EAI_AGAIN|EPIPE|socket hang up|cannot connect|fetch failed|network error|connection (?:error|closed|reset|terminated)|stream terminated/i.test(
+      head,
+    )
+  ) {
+    return {
+      title: "Lost connection to the AI provider",
+      detail:
+        "The connection to the AI provider dropped before the answer finished. Please try again.",
+    };
+  }
+  // Timeout.
+  if (
+    code === "504" ||
+    code === "408" ||
+    /ETIMEDOUT|timed[\s-]?out|\btimeout\b/i.test(head)
+  ) {
+    return {
+      title: "The AI provider timed out",
+      detail: "The AI provider took too long to respond. Please try again.",
+    };
+  }
+  // Rate limited.
+  if (code === "429" || /rate[\s-]?limit|too many requests/i.test(head)) {
+    return {
+      title: "Rate limited by the AI provider",
+      detail:
+        "The AI provider is rate-limiting requests. Wait a moment and try again.",
+    };
+  }
+  // Context window / token budget exceeded.
+  if (
+    code === "413" ||
+    /context[\s_-]?(?:length|window)|maximum context|context_length_exceeded|too many tokens|maximum[^.]*tokens|reduce the length/i.test(
+      head,
+    )
+  ) {
+    return {
+      title: "The conversation is too large",
+      detail:
+        "The document and search results exceeded the model's context window. Start a new chat or narrow the request.",
+    };
+  }
+  // Out of credits / quota / payment required.
+  if (
+    code === "402" ||
+    /payment required|insufficient (?:credits|quota|funds|balance)|out of credits|quota (?:exceeded|exhausted)/i.test(
+      head,
+    )
+  ) {
+    return {
+      title: "AI provider quota exceeded",
+      detail:
+        "The AI provider rejected the request because of credits or quota. Check the provider account.",
+    };
+  }
+  // Authentication / bad API key.
+  if (
+    code === "401" ||
+    /\bunauthorized\b|invalid api key|user not found|\bauthentication\b/i.test(head)
+  ) {
+    return {
+      title: "AI provider authentication failed",
+      detail:
+        "The AI provider rejected the credentials. Ask an administrator to check the API key.",
+    };
+  }
+  return null;
 }

 /**
--- a/apps/client/src/features/share/components/share-ai-widget.tsx
+++ b/apps/client/src/features/share/components/share-ai-widget.tsx
@@ -88,6 +88,10 @@ export default function ShareAiWidget({

  const isStreaming = status === "submitted" || status === "streaming";

+  // Same classified-error banner as the internal chat: name the cause instead of a
+  // generic heading.
+  const errorView = error ? describeChatError(error.message ?? "", t) : null;
+
  const handleSend = () => {
    const text = input.trim();
    if (!text || isStreaming) return;
@@ -173,18 +177,18 @@ export default function ShareAiWidget({
          />
        </Box>

-        {error && (
+        {errorView && (
          <Alert
            variant="light"
            color="red"
            icon={<IconAlertTriangle size={16} />}
            mx="sm"
            mb="xs"
-            title={t("Something went wrong")}
+            title={errorView.title}
          >
-            {/* Surface the real cause (provider/gating message) instead of a
+            {/* Surface the real cause (provider/gating category) instead of a
                generic line — same helper the internal chat uses. */}
-            {describeChatError(error.message ?? "", t)}
+            {errorView.detail}
          </Alert>
        )}