Compare commits

...

1 Commits

Author SHA1 Message Date
agent_coder 2d30ad1fa2 fix(ai-chat): parse reasoning markdown only while expanded to stop the thinking-stream hang (#302)
The reasoning block memoized its markdown render on [trimmed] alone, so as the
reasoning text streamed in it re-parsed the whole, ever-growing text (marked +
DOMPurify) on every throttled delta (~20Hz) — an O(n^2) CPU storm that pinned the
main thread and froze the chat during a long "thinking" phase. Worse, the block is
collapsed by default, so all that parsing was for a hidden body the user never sees
(html is only shown inside <Collapse in={open}>).

Gate the parse on `open`: collapsed shows the cheap raw-text fallback and does no
markdown parsing; expanding parses the current text once (an instant user click), and
further streaming while open is the normal per-delta append render, like the answer.

Test: assert renderChatMarkdown is not called while collapsed and is called once on
expand.

closes #302

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-03 05:01:37 +03:00
2 changed files with 35 additions and 9 deletions
@@ -1,7 +1,14 @@
import { describe, it, expect, vi } from "vitest"; import { describe, it, expect, vi } from "vitest";
import { render, screen } from "@testing-library/react"; import { render, screen, fireEvent } from "@testing-library/react";
import { MantineProvider } from "@mantine/core"; import { MantineProvider } from "@mantine/core";
// Spy on the markdown renderer so we can assert it is NOT called while the block
// is collapsed (the #302 fix) and IS called once on expand. The count/fallback
// tests don't depend on real markdown, so a light stub is safe.
vi.mock("@/features/ai-chat/utils/markdown.ts", () => ({
renderChatMarkdown: vi.fn((md: string) => `<p>${md}</p>`),
}));
// Stub react-i18next so `t` returns the key with `{{count}}` interpolated. This // Stub react-i18next so `t` returns the key with `{{count}}` interpolated. This
// keeps the assertions on the component's OWN count logic (authoritative vs // keeps the assertions on the component's OWN count logic (authoritative vs
// estimate) rather than on translation, and mirrors the t-mock pattern used by // estimate) rather than on translation, and mirrors the t-mock pattern used by
@@ -17,6 +24,7 @@ vi.mock("react-i18next", () => ({
import ReasoningBlock from "./reasoning-block"; import ReasoningBlock from "./reasoning-block";
import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. // matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
@@ -62,4 +70,18 @@ describe("ReasoningBlock", () => {
// either way the text is present in the document. // either way the text is present in the document.
expect(screen.getByText(/reasoning/)).toBeDefined(); expect(screen.getByText(/reasoning/)).toBeDefined();
}); });
it("does not parse the reasoning markdown while collapsed; parses on expand (#302)", () => {
const renderSpy = vi.mocked(renderChatMarkdown);
renderSpy.mockClear();
renderBlock({ text: "**bold** reasoning", tokens: 5 });
// Collapsed is the default. The expensive markdown parse (marked + DOMPurify)
// must NOT run for the hidden body — that O(n^2) re-parse on every streamed
// delta is exactly what froze the chat (#302). The collapsed body shows the
// cheap raw-text fallback instead.
expect(renderSpy).not.toHaveBeenCalled();
// Expanding parses the current text exactly once (a user-initiated click).
fireEvent.click(screen.getByRole("button"));
expect(renderSpy).toHaveBeenCalledTimes(1);
});
}); });
@@ -34,15 +34,19 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
// Authoritative count wins; otherwise estimate live from the streamed text. // Authoritative count wins; otherwise estimate live from the streamed text.
const count = tokens && tokens > 0 ? tokens : estimateTokens(text); const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
const trimmed = text.trim(); const trimmed = text.trim();
// Memoize the markdown render so toggling `open` (or a parent re-render caused // Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the
// by an unrelated streamed delta) does not re-parse the reasoning text; it // default and the common case during a long "thinking" stream: reasoning text
// recomputes only when the reasoning text itself changes (while it streams in). // streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only
// collapseBlankLines collapses the blank-line gaps the model emits between every // memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta
// list item / paragraph so the reasoning renders compactly (tight lists, joined // — an O(n²) storm that pins the main thread and freezes the chat, all for a block
// paragraphs) — ONLY here, not in the normal answer. // the user isn't even looking at (the html is only shown inside <Collapse in={open}>
// below). Gating on `open` skips that hidden parsing entirely; expanding parses the
// current text once (an instant, user-initiated click), and further streaming while
// open is the normal per-delta append render, like the answer.
const html = useMemo( const html = useMemo(
() => (trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : ""), () =>
[trimmed], open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "",
[open, trimmed],
); );
return ( return (