diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx
index 3898136e..914b6c8d 100644
--- a/apps/client/src/features/ai-chat/components/chat-thread.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx
@@ -36,6 +36,14 @@ import {
} from "@/features/ai-chat/utils/queue-helpers.ts";
import classes from "@/features/ai-chat/components/ai-chat.module.css";
+// Throttle how often the streamed `messages` state triggers a re-render. Without
+// it, useChat updates state on EVERY token, so the whole transcript's markdown
+// (marked + DOMPurify) is re-parsed per token — on a long agent run that grows
+// into a quadratic CPU storm that pins the main thread and freezes the UI.
+// ~50ms (20 Hz) keeps streaming visually smooth while decoupling re-render cost
+// from the token rate.
+const STREAM_THROTTLE_MS = 50;
+
/** The page the user is currently viewing, sent as chat context. */
export interface OpenPageContext {
id: string;
@@ -253,6 +261,8 @@ export default function ChatThread({
id: chatStoreId,
messages: initialMessages,
transport,
+ // See STREAM_THROTTLE_MS — bounds re-render/markdown-reparse frequency.
+ experimental_throttle: STREAM_THROTTLE_MS,
// `onFinish` (ai@6 useChat) fires from a `finally` on EVERY terminal outcome
// — success, user Stop/abort (`isAbort`), network drop (`isDisconnect`), and
// stream error (`isError`). Keep calling `onTurnFinished()` on all of them
diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx
index 6436b4d6..0eabbd87 100644
--- a/apps/client/src/features/ai-chat/components/message-item.tsx
+++ b/apps/client/src/features/ai-chat/components/message-item.tsx
@@ -1,3 +1,4 @@
+import { memo } from "react";
import { Box, Text } from "@mantine/core";
import { useTranslation } from "react-i18next";
import type { UIMessage } from "@ai-sdk/react";
@@ -34,6 +35,39 @@ interface MessageItemProps {
assistantName?: string;
}
+/**
+ * One assistant text part rendered as sanitized markdown. Memoized on its inputs
+ * so a finalized text part is NOT re-parsed on every streamed delta: during a
+ * turn only the actively-growing tail part changes its `text`, so every earlier
+ * part hits the memo and skips the expensive marked + DOMPurify pass. Props are
+ * primitives, so React.memo's default shallow compare is exactly right (the
+ * `text` string is compared by value).
+ */
+const MarkdownPart = memo(function MarkdownPart({
+ text,
+ neutralizeInternalLinks,
+}: {
+ text: string;
+ neutralizeInternalLinks: boolean;
+}) {
+ const html = renderChatMarkdown(text, { neutralizeInternalLinks });
+ if (html) {
+ return (
+
+ );
+ }
+ // Fallback when markdown could not render synchronously: raw text.
+ return (
+
+ {text}
+
+ );
+});
+
/**
* Render a single UIMessage by iterating its `parts`:
* - `text` parts -> sanitized markdown.
@@ -41,12 +75,13 @@ interface MessageItemProps {
* Other part kinds (reasoning, sources, files, step-start) are ignored for v1.
* User messages render their text as a right-aligned plain bubble.
*
- * This component is intentionally NOT memoized: `useChat` replaces the streaming
- * assistant message with a freshly cloned object on every streamed delta, so the
- * `message` prop identity (and its `parts`) changes each tick. Re-rendering the
- * text parts on each delta is what makes the answer stream in progressively.
+ * This component is memoized (see `arePropsEqual` at the bottom) on a cheap
+ * per-message content signature: the streaming TAIL message's signature changes
+ * on each delta so it still re-renders and streams in, while finalized rows are
+ * skipped. Each text part's markdown is itself memoized via `MarkdownPart`, so a
+ * long turn no longer re-parses the whole transcript on every token.
*/
-export default function MessageItem({
+function MessageItem({
message,
showCitations = true,
neutralizeInternalLinks = false,
@@ -109,24 +144,12 @@ export default function MessageItem({
// starts with an empty text part before the first token arrives); the
// typing indicator covers that gap until real content streams in.
if (!part.text.trim()) return null;
- const html = renderChatMarkdown(part.text, {
- neutralizeInternalLinks,
- });
- if (html) {
- return (
-
- );
- }
- // Fallback when markdown could not render synchronously: raw text.
return (
-
- {part.text}
-
+
);
}
@@ -177,3 +200,65 @@ export default function MessageItem({
);
}
+
+/** Cheap content signature for one message: changes iff something VISIBLE in the
+ * row changed. Streaming is APPEND-ONLY (text parts only grow, parts are only
+ * appended, a tool/text part flips state once), so a per-part [type, text
+ * length, state, error/output presence] tuple + the persisted metadata
+ * (error/finishReason) is a sufficient change signal without comparing full
+ * strings on every delta. */
+function messageSignature(message: UIMessage): string {
+ const parts = message.parts
+ .map((p) => {
+ const any = p as {
+ type: string;
+ text?: string;
+ state?: string;
+ errorText?: string;
+ output?: unknown;
+ };
+ return [
+ any.type,
+ any.text?.length ?? 0,
+ any.state ?? "",
+ any.errorText ? 1 : 0,
+ any.output !== undefined ? 1 : 0,
+ ].join(":");
+ })
+ .join("|");
+ const meta = message.metadata as
+ | { error?: string; finishReason?: string; usage?: { reasoningTokens?: number } }
+ | undefined;
+ // `usage.reasoningTokens` is neither append-only nor part-bound: the authoritative
+ // turn total arrives on the final `finish-step` AFTER the reasoning text length and
+ // state are already frozen. Without it in the signature the row's signature would be
+ // unchanged at that point and the re-render skipped, so the "Thinking · N tokens"
+ // header (reasoningTokensForPart) would keep the live estimate instead of snapping
+ // to the exact figure.
+ return `${message.id}#${message.role}#${parts}#${meta?.error ?? ""}#${
+ meta?.finishReason ?? ""
+ }#${meta?.usage?.reasoningTokens ?? ""}`;
+}
+
+/** Skip re-rendering a message whose visible content is unchanged. The streaming
+ * TAIL message gets a fresh object whose signature changes each delta, so it
+ * still re-renders and streams in; every FINALIZED message is skipped, turning a
+ * per-token whole-transcript re-render into a tail-only one. */
+function arePropsEqual(
+ prev: MessageItemProps,
+ next: MessageItemProps,
+): boolean {
+ if (
+ prev.showCitations !== next.showCitations ||
+ prev.neutralizeInternalLinks !== next.neutralizeInternalLinks ||
+ prev.assistantName !== next.assistantName
+ ) {
+ return false;
+ }
+ // Fast path: identical message object (finalized rows keep their identity
+ // across deltas) — skip without building signatures.
+ if (prev.message === next.message) return true;
+ return messageSignature(prev.message) === messageSignature(next.message);
+}
+
+export default memo(MessageItem, arePropsEqual);
diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
index 43e88a69..49b6b5de 100644
--- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx
+++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
@@ -1,4 +1,4 @@
-import { useState } from "react";
+import { memo, useMemo, useState } from "react";
import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core";
import { IconChevronDown } from "@tabler/icons-react";
import { useTranslation } from "react-i18next";
@@ -26,14 +26,20 @@ interface ReasoningBlockProps {
* Providers that don't stream reasoning TEXT still render this block from the
* authoritative count alone (header only, empty body) so the cost is visible.
*/
-export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
+function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
const { t } = useTranslation();
const [open, setOpen] = useState(false);
// Authoritative count wins; otherwise estimate live from the streamed text.
const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
const trimmed = text.trim();
- const html = trimmed ? renderChatMarkdown(trimmed, {}) : "";
+ // Memoize the markdown render so toggling `open` (or a parent re-render caused
+ // by an unrelated streamed delta) does not re-parse the reasoning text; it
+ // recomputes only when the reasoning text itself changes (while it streams in).
+ const html = useMemo(
+ () => (trimmed ? renderChatMarkdown(trimmed, {}) : ""),
+ [trimmed],
+ );
return (
@@ -81,3 +87,8 @@ export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
);
}
+
+// Memoized: re-renders only when `text`/`tokens` change (primitive props, default
+// shallow compare), so a parent re-render during streaming of OTHER content does
+// not re-run the markdown parse for an already-finalized reasoning block.
+export default memo(ReasoningBlock);