diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx
index 3898136e..914b6c8d 100644
--- a/apps/client/src/features/ai-chat/components/chat-thread.tsx
+++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx
@@ -36,6 +36,14 @@ import {
 } from "@/features/ai-chat/utils/queue-helpers.ts";
 import classes from "@/features/ai-chat/components/ai-chat.module.css";
 
+// Throttle how often the streamed `messages` state triggers a re-render. Without
+// it, useChat updates state on EVERY token, so the whole transcript's markdown
+// (marked + DOMPurify) is re-parsed per token — on a long agent run that grows
+// into a quadratic CPU storm that pins the main thread and freezes the UI.
+// ~50ms (20 Hz) keeps streaming visually smooth while decoupling re-render cost
+// from the token rate.
+const STREAM_THROTTLE_MS = 50;
+
 /** The page the user is currently viewing, sent as chat context. */
 export interface OpenPageContext {
   id: string;
@@ -253,6 +261,8 @@ export default function ChatThread({
     id: chatStoreId,
     messages: initialMessages,
     transport,
+    // See STREAM_THROTTLE_MS — bounds re-render/markdown-reparse frequency.
+    experimental_throttle: STREAM_THROTTLE_MS,
     // `onFinish` (ai@6 useChat) fires from a `finally` on EVERY terminal outcome
     // — success, user Stop/abort (`isAbort`), network drop (`isDisconnect`), and
     // stream error (`isError`). Keep calling `onTurnFinished()` on all of them
diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx
index 6436b4d6..0eabbd87 100644
--- a/apps/client/src/features/ai-chat/components/message-item.tsx
+++ b/apps/client/src/features/ai-chat/components/message-item.tsx
@@ -1,3 +1,4 @@
+import { memo } from "react";
 import { Box, Text } from "@mantine/core";
 import { useTranslation } from "react-i18next";
 import type { UIMessage } from "@ai-sdk/react";
@@ -34,6 +35,39 @@ interface MessageItemProps {
   assistantName?: string;
 }
 
+/**
+ * One assistant text part rendered as sanitized markdown. Memoized on its inputs
+ * so a finalized text part is NOT re-parsed on every streamed delta: during a
+ * turn only the actively-growing tail part changes its `text`, so every earlier
+ * part hits the memo and skips the expensive marked + DOMPurify pass. Props are
+ * primitives, so React.memo's default shallow compare is exactly right (the
+ * `text` string is compared by value).
+ */
+const MarkdownPart = memo(function MarkdownPart({
+  text,
+  neutralizeInternalLinks,
+}: {
+  text: string;
+  neutralizeInternalLinks: boolean;
+}) {
+  const html = renderChatMarkdown(text, { neutralizeInternalLinks });
+  if (html) {
+    return (
+      <div
+        className={classes.markdown}
+        // Sanitized by renderChatMarkdown (DOMPurify) before insertion.
+        dangerouslySetInnerHTML={{ __html: html }}
+      />
+    );
+  }
+  // Fallback when markdown could not render synchronously: raw text.
+  return (
+    <Text className={classes.markdown} style={{ whiteSpace: "pre-wrap" }}>
+      {text}
+    </Text>
+  );
+});
+
 /**
  * Render a single UIMessage by iterating its `parts`:
  *  - `text` parts -> sanitized markdown.
@@ -41,12 +75,13 @@ interface MessageItemProps {
  * Other part kinds (reasoning, sources, files, step-start) are ignored for v1.
  * User messages render their text as a right-aligned plain bubble.
  *
- * This component is intentionally NOT memoized: `useChat` replaces the streaming
- * assistant message with a freshly cloned object on every streamed delta, so the
- * `message` prop identity (and its `parts`) changes each tick. Re-rendering the
- * text parts on each delta is what makes the answer stream in progressively.
+ * This component is memoized (see `arePropsEqual` at the bottom) on a cheap
+ * per-message content signature: the streaming TAIL message's signature changes
+ * on each delta so it still re-renders and streams in, while finalized rows are
+ * skipped. Each text part's markdown is itself memoized via `MarkdownPart`, so a
+ * long turn no longer re-parses the whole transcript on every token.
  */
-export default function MessageItem({
+function MessageItem({
   message,
   showCitations = true,
   neutralizeInternalLinks = false,
@@ -109,24 +144,12 @@ export default function MessageItem({
           // starts with an empty text part before the first token arrives); the
           // typing indicator covers that gap until real content streams in.
           if (!part.text.trim()) return null;
-          const html = renderChatMarkdown(part.text, {
-            neutralizeInternalLinks,
-          });
-          if (html) {
-            return (
-              <div
-                key={index}
-                className={classes.markdown}
-                // Sanitized by renderChatMarkdown (DOMPurify) before insertion.
-                dangerouslySetInnerHTML={{ __html: html }}
-              />
-            );
-          }
-          // Fallback when markdown could not render synchronously: raw text.
           return (
-            <Text key={index} className={classes.markdown} style={{ whiteSpace: "pre-wrap" }}>
-              {part.text}
-            </Text>
+            <MarkdownPart
+              key={index}
+              text={part.text}
+              neutralizeInternalLinks={neutralizeInternalLinks}
+            />
           );
         }
 
@@ -177,3 +200,65 @@ export default function MessageItem({
     </Box>
   );
 }
+
+/** Cheap content signature for one message: changes iff something VISIBLE in the
+ *  row changed. Streaming is APPEND-ONLY (text parts only grow, parts are only
+ *  appended, a tool/text part flips state once), so a per-part [type, text
+ *  length, state, error/output presence] tuple + the persisted metadata
+ *  (error/finishReason) is a sufficient change signal without comparing full
+ *  strings on every delta. */
+function messageSignature(message: UIMessage): string {
+  const parts = message.parts
+    .map((p) => {
+      const any = p as {
+        type: string;
+        text?: string;
+        state?: string;
+        errorText?: string;
+        output?: unknown;
+      };
+      return [
+        any.type,
+        any.text?.length ?? 0,
+        any.state ?? "",
+        any.errorText ? 1 : 0,
+        any.output !== undefined ? 1 : 0,
+      ].join(":");
+    })
+    .join("|");
+  const meta = message.metadata as
+    | { error?: string; finishReason?: string; usage?: { reasoningTokens?: number } }
+    | undefined;
+  // `usage.reasoningTokens` is neither append-only nor part-bound: the authoritative
+  // turn total arrives on the final `finish-step` AFTER the reasoning text length and
+  // state are already frozen. Without it in the signature the row's signature would be
+  // unchanged at that point and the re-render skipped, so the "Thinking · N tokens"
+  // header (reasoningTokensForPart) would keep the live estimate instead of snapping
+  // to the exact figure.
+  return `${message.id}#${message.role}#${parts}#${meta?.error ?? ""}#${
+    meta?.finishReason ?? ""
+  }#${meta?.usage?.reasoningTokens ?? ""}`;
+}
+
+/** Skip re-rendering a message whose visible content is unchanged. The streaming
+ *  TAIL message gets a fresh object whose signature changes each delta, so it
+ *  still re-renders and streams in; every FINALIZED message is skipped, turning a
+ *  per-token whole-transcript re-render into a tail-only one. */
+function arePropsEqual(
+  prev: MessageItemProps,
+  next: MessageItemProps,
+): boolean {
+  if (
+    prev.showCitations !== next.showCitations ||
+    prev.neutralizeInternalLinks !== next.neutralizeInternalLinks ||
+    prev.assistantName !== next.assistantName
+  ) {
+    return false;
+  }
+  // Fast path: identical message object (finalized rows keep their identity
+  // across deltas) — skip without building signatures.
+  if (prev.message === next.message) return true;
+  return messageSignature(prev.message) === messageSignature(next.message);
+}
+
+export default memo(MessageItem, arePropsEqual);
diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
index 43e88a69..49b6b5de 100644
--- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx
+++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
@@ -1,4 +1,4 @@
-import { useState } from "react";
+import { memo, useMemo, useState } from "react";
 import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core";
 import { IconChevronDown } from "@tabler/icons-react";
 import { useTranslation } from "react-i18next";
@@ -26,14 +26,20 @@ interface ReasoningBlockProps {
  * Providers that don't stream reasoning TEXT still render this block from the
  * authoritative count alone (header only, empty body) so the cost is visible.
  */
-export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
+function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
   const { t } = useTranslation();
   const [open, setOpen] = useState(false);
 
   // Authoritative count wins; otherwise estimate live from the streamed text.
   const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
   const trimmed = text.trim();
-  const html = trimmed ? renderChatMarkdown(trimmed, {}) : "";
+  // Memoize the markdown render so toggling `open` (or a parent re-render caused
+  // by an unrelated streamed delta) does not re-parse the reasoning text; it
+  // recomputes only when the reasoning text itself changes (while it streams in).
+  const html = useMemo(
+    () => (trimmed ? renderChatMarkdown(trimmed, {}) : ""),
+    [trimmed],
+  );
 
   return (
     <Box className={classes.reasoningBlock} mb={6}>
@@ -81,3 +87,8 @@ export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
     </Box>
   );
 }
+
+// Memoized: re-renders only when `text`/`tokens` change (primitive props, default
+// shallow compare), so a parent re-render during streaming of OTHER content does
+// not re-run the markdown parse for an already-finalized reasoning block.
+export default memo(ReasoningBlock);