fix(ai-chat): parse reasoning markdown only while expanded to stop the thinking-stream hang (#302)
The reasoning block memoized its markdown render on [trimmed] alone, so as the
reasoning text streamed in it re-parsed the whole, ever-growing text (marked +
DOMPurify) on every throttled delta (~20Hz) — an O(n^2) CPU storm that pinned the
main thread and froze the chat during a long "thinking" phase. Worse, the block is
collapsed by default, so all that parsing was for a hidden body the user never sees
(html is only shown inside <Collapse in={open}>).
Gate the parse on `open`: collapsed shows the cheap raw-text fallback and does no
markdown parsing; expanding parses the current text once (an instant user click), and
further streaming while open is the normal per-delta append render, like the answer.
Test: assert renderChatMarkdown is not called while collapsed and is called once on
expand.
closes #302
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,14 @@
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
import { render, screen } from "@testing-library/react";
|
||||
import { render, screen, fireEvent } from "@testing-library/react";
|
||||
import { MantineProvider } from "@mantine/core";
|
||||
|
||||
// Spy on the markdown renderer so we can assert it is NOT called while the block
|
||||
// is collapsed (the #302 fix) and IS called once on expand. The count/fallback
|
||||
// tests don't depend on real markdown, so a light stub is safe.
|
||||
vi.mock("@/features/ai-chat/utils/markdown.ts", () => ({
|
||||
renderChatMarkdown: vi.fn((md: string) => `<p>${md}</p>`),
|
||||
}));
|
||||
|
||||
// Stub react-i18next so `t` returns the key with `{{count}}` interpolated. This
|
||||
// keeps the assertions on the component's OWN count logic (authoritative vs
|
||||
// estimate) rather than on translation, and mirrors the t-mock pattern used by
|
||||
@@ -17,6 +24,7 @@ vi.mock("react-i18next", () => ({
|
||||
|
||||
import ReasoningBlock from "./reasoning-block";
|
||||
import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
|
||||
import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
|
||||
|
||||
// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
|
||||
|
||||
@@ -62,4 +70,18 @@ describe("ReasoningBlock", () => {
|
||||
// either way the text is present in the document.
|
||||
expect(screen.getByText(/reasoning/)).toBeDefined();
|
||||
});
|
||||
|
||||
it("does not parse the reasoning markdown while collapsed; parses on expand (#302)", () => {
|
||||
const renderSpy = vi.mocked(renderChatMarkdown);
|
||||
renderSpy.mockClear();
|
||||
renderBlock({ text: "**bold** reasoning", tokens: 5 });
|
||||
// Collapsed is the default. The expensive markdown parse (marked + DOMPurify)
|
||||
// must NOT run for the hidden body — that O(n^2) re-parse on every streamed
|
||||
// delta is exactly what froze the chat (#302). The collapsed body shows the
|
||||
// cheap raw-text fallback instead.
|
||||
expect(renderSpy).not.toHaveBeenCalled();
|
||||
// Expanding parses the current text exactly once (a user-initiated click).
|
||||
fireEvent.click(screen.getByRole("button"));
|
||||
expect(renderSpy).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -34,15 +34,19 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
|
||||
// Authoritative count wins; otherwise estimate live from the streamed text.
|
||||
const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
|
||||
const trimmed = text.trim();
|
||||
// Memoize the markdown render so toggling `open` (or a parent re-render caused
|
||||
// by an unrelated streamed delta) does not re-parse the reasoning text; it
|
||||
// recomputes only when the reasoning text itself changes (while it streams in).
|
||||
// collapseBlankLines collapses the blank-line gaps the model emits between every
|
||||
// list item / paragraph so the reasoning renders compactly (tight lists, joined
|
||||
// paragraphs) — ONLY here, not in the normal answer.
|
||||
// Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the
|
||||
// default and the common case during a long "thinking" stream: reasoning text
|
||||
// streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only
|
||||
// memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta
|
||||
// — an O(n²) storm that pins the main thread and freezes the chat, all for a block
|
||||
// the user isn't even looking at (the html is only shown inside <Collapse in={open}>
|
||||
// below). Gating on `open` skips that hidden parsing entirely; expanding parses the
|
||||
// current text once (an instant, user-initiated click), and further streaming while
|
||||
// open is the normal per-delta append render, like the answer.
|
||||
const html = useMemo(
|
||||
() => (trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : ""),
|
||||
[trimmed],
|
||||
() =>
|
||||
open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "",
|
||||
[open, trimmed],
|
||||
);
|
||||
|
||||
return (
|
||||
|
||||
Reference in New Issue
Block a user