fix(ai-chat): parse reasoning markdown only while expanded to stop the thinking-stream hang (#302)

The reasoning block memoized its markdown render on [trimmed] alone, so as the reasoning text streamed in it re-parsed the whole, ever-growing text (marked + DOMPurify) on every throttled delta (~20Hz) — an O(n^2) CPU storm that pinned the main thread and froze the chat during a long "thinking" phase. Worse, the block is collapsed by default, so all that parsing was for a hidden body the user never sees (html is only shown inside <Collapse in={open}>). Gate the parse on `open`: collapsed shows the cheap raw-text fallback and does no markdown parsing; expanding parses the current text once (an instant user click), and further streaming while open is the normal per-delta append render, like the answer. Test: assert renderChatMarkdown is not called while collapsed and is called once on expand. closes #302 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-03 05:01:37 +03:00
parent e648771ab8
commit 2d30ad1fa2
2 changed files with 35 additions and 9 deletions
@@ -1,7 +1,14 @@
 import { describe, it, expect, vi } from "vitest";
-import { render, screen } from "@testing-library/react";
+import { render, screen, fireEvent } from "@testing-library/react";
 import { MantineProvider } from "@mantine/core";

+// Spy on the markdown renderer so we can assert it is NOT called while the block
+// is collapsed (the #302 fix) and IS called once on expand. The count/fallback
+// tests don't depend on real markdown, so a light stub is safe.
+vi.mock("@/features/ai-chat/utils/markdown.ts", () => ({
+  renderChatMarkdown: vi.fn((md: string) => `<p>${md}</p>`),
+}));
+
 // Stub react-i18next so `t` returns the key with `{{count}}` interpolated. This
 // keeps the assertions on the component's OWN count logic (authoritative vs
 // estimate) rather than on translation, and mirrors the t-mock pattern used by
@@ -17,6 +24,7 @@ vi.mock("react-i18next", () => ({

 import ReasoningBlock from "./reasoning-block";
 import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
+import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";

 // matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.

@@ -62,4 +70,18 @@ describe("ReasoningBlock", () => {
    // either way the text is present in the document.
    expect(screen.getByText(/reasoning/)).toBeDefined();
  });
+
+  it("does not parse the reasoning markdown while collapsed; parses on expand (#302)", () => {
+    const renderSpy = vi.mocked(renderChatMarkdown);
+    renderSpy.mockClear();
+    renderBlock({ text: "**bold** reasoning", tokens: 5 });
+    // Collapsed is the default. The expensive markdown parse (marked + DOMPurify)
+    // must NOT run for the hidden body — that O(n^2) re-parse on every streamed
+    // delta is exactly what froze the chat (#302). The collapsed body shows the
+    // cheap raw-text fallback instead.
+    expect(renderSpy).not.toHaveBeenCalled();
+    // Expanding parses the current text exactly once (a user-initiated click).
+    fireEvent.click(screen.getByRole("button"));
+    expect(renderSpy).toHaveBeenCalledTimes(1);
+  });
 });
@@ -34,15 +34,19 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
  // Authoritative count wins; otherwise estimate live from the streamed text.
  const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
  const trimmed = text.trim();
-  // Memoize the markdown render so toggling `open` (or a parent re-render caused
-  // by an unrelated streamed delta) does not re-parse the reasoning text; it
-  // recomputes only when the reasoning text itself changes (while it streams in).
-  // collapseBlankLines collapses the blank-line gaps the model emits between every
-  // list item / paragraph so the reasoning renders compactly (tight lists, joined
-  // paragraphs) — ONLY here, not in the normal answer.
+  // Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the
+  // default and the common case during a long "thinking" stream: reasoning text
+  // streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only
+  // memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta
+  // — an O(n²) storm that pins the main thread and freezes the chat, all for a block
+  // the user isn't even looking at (the html is only shown inside <Collapse in={open}>
+  // below). Gating on `open` skips that hidden parsing entirely; expanding parses the
+  // current text once (an instant, user-initiated click), and further streaming while
+  // open is the normal per-delta append render, like the answer.
  const html = useMemo(
-    () => (trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : ""),
-    [trimmed],
+    () =>
+      open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "",
+    [open, trimmed],
  );

  return (