diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index 3898136e..914b6c8d 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -36,6 +36,14 @@ import { } from "@/features/ai-chat/utils/queue-helpers.ts"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; +// Throttle how often the streamed `messages` state triggers a re-render. Without +// it, useChat updates state on EVERY token, so the whole transcript's markdown +// (marked + DOMPurify) is re-parsed per token — on a long agent run that grows +// into a quadratic CPU storm that pins the main thread and freezes the UI. +// ~50ms (20 Hz) keeps streaming visually smooth while decoupling re-render cost +// from the token rate. +const STREAM_THROTTLE_MS = 50; + /** The page the user is currently viewing, sent as chat context. */ export interface OpenPageContext { id: string; @@ -253,6 +261,8 @@ export default function ChatThread({ id: chatStoreId, messages: initialMessages, transport, + // See STREAM_THROTTLE_MS — bounds re-render/markdown-reparse frequency. + experimental_throttle: STREAM_THROTTLE_MS, // `onFinish` (ai@6 useChat) fires from a `finally` on EVERY terminal outcome // — success, user Stop/abort (`isAbort`), network drop (`isDisconnect`), and // stream error (`isError`). Keep calling `onTurnFinished()` on all of them diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx index 6436b4d6..0eabbd87 100644 --- a/apps/client/src/features/ai-chat/components/message-item.tsx +++ b/apps/client/src/features/ai-chat/components/message-item.tsx @@ -1,3 +1,4 @@ +import { memo } from "react"; import { Box, Text } from "@mantine/core"; import { useTranslation } from "react-i18next"; import type { UIMessage } from "@ai-sdk/react"; @@ -34,6 +35,39 @@ interface MessageItemProps { assistantName?: string; } +/** + * One assistant text part rendered as sanitized markdown. Memoized on its inputs + * so a finalized text part is NOT re-parsed on every streamed delta: during a + * turn only the actively-growing tail part changes its `text`, so every earlier + * part hits the memo and skips the expensive marked + DOMPurify pass. Props are + * primitives, so React.memo's default shallow compare is exactly right (the + * `text` string is compared by value). + */ +const MarkdownPart = memo(function MarkdownPart({ + text, + neutralizeInternalLinks, +}: { + text: string; + neutralizeInternalLinks: boolean; +}) { + const html = renderChatMarkdown(text, { neutralizeInternalLinks }); + if (html) { + return ( +
+ ); + } + // Fallback when markdown could not render synchronously: raw text. + return ( + + {text} + + ); +}); + /** * Render a single UIMessage by iterating its `parts`: * - `text` parts -> sanitized markdown. @@ -41,12 +75,13 @@ interface MessageItemProps { * Other part kinds (reasoning, sources, files, step-start) are ignored for v1. * User messages render their text as a right-aligned plain bubble. * - * This component is intentionally NOT memoized: `useChat` replaces the streaming - * assistant message with a freshly cloned object on every streamed delta, so the - * `message` prop identity (and its `parts`) changes each tick. Re-rendering the - * text parts on each delta is what makes the answer stream in progressively. + * This component is memoized (see `arePropsEqual` at the bottom) on a cheap + * per-message content signature: the streaming TAIL message's signature changes + * on each delta so it still re-renders and streams in, while finalized rows are + * skipped. Each text part's markdown is itself memoized via `MarkdownPart`, so a + * long turn no longer re-parses the whole transcript on every token. */ -export default function MessageItem({ +function MessageItem({ message, showCitations = true, neutralizeInternalLinks = false, @@ -109,24 +144,12 @@ export default function MessageItem({ // starts with an empty text part before the first token arrives); the // typing indicator covers that gap until real content streams in. if (!part.text.trim()) return null; - const html = renderChatMarkdown(part.text, { - neutralizeInternalLinks, - }); - if (html) { - return ( -
- ); - } - // Fallback when markdown could not render synchronously: raw text. return ( - - {part.text} - + ); } @@ -177,3 +200,65 @@ export default function MessageItem({ ); } + +/** Cheap content signature for one message: changes iff something VISIBLE in the + * row changed. Streaming is APPEND-ONLY (text parts only grow, parts are only + * appended, a tool/text part flips state once), so a per-part [type, text + * length, state, error/output presence] tuple + the persisted metadata + * (error/finishReason) is a sufficient change signal without comparing full + * strings on every delta. */ +function messageSignature(message: UIMessage): string { + const parts = message.parts + .map((p) => { + const any = p as { + type: string; + text?: string; + state?: string; + errorText?: string; + output?: unknown; + }; + return [ + any.type, + any.text?.length ?? 0, + any.state ?? "", + any.errorText ? 1 : 0, + any.output !== undefined ? 1 : 0, + ].join(":"); + }) + .join("|"); + const meta = message.metadata as + | { error?: string; finishReason?: string; usage?: { reasoningTokens?: number } } + | undefined; + // `usage.reasoningTokens` is neither append-only nor part-bound: the authoritative + // turn total arrives on the final `finish-step` AFTER the reasoning text length and + // state are already frozen. Without it in the signature the row's signature would be + // unchanged at that point and the re-render skipped, so the "Thinking · N tokens" + // header (reasoningTokensForPart) would keep the live estimate instead of snapping + // to the exact figure. + return `${message.id}#${message.role}#${parts}#${meta?.error ?? ""}#${ + meta?.finishReason ?? "" + }#${meta?.usage?.reasoningTokens ?? ""}`; +} + +/** Skip re-rendering a message whose visible content is unchanged. The streaming + * TAIL message gets a fresh object whose signature changes each delta, so it + * still re-renders and streams in; every FINALIZED message is skipped, turning a + * per-token whole-transcript re-render into a tail-only one. */ +function arePropsEqual( + prev: MessageItemProps, + next: MessageItemProps, +): boolean { + if ( + prev.showCitations !== next.showCitations || + prev.neutralizeInternalLinks !== next.neutralizeInternalLinks || + prev.assistantName !== next.assistantName + ) { + return false; + } + // Fast path: identical message object (finalized rows keep their identity + // across deltas) — skip without building signatures. + if (prev.message === next.message) return true; + return messageSignature(prev.message) === messageSignature(next.message); +} + +export default memo(MessageItem, arePropsEqual); diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx index 43e88a69..49b6b5de 100644 --- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx +++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx @@ -1,4 +1,4 @@ -import { useState } from "react"; +import { memo, useMemo, useState } from "react"; import { Box, Collapse, Group, Text, UnstyledButton } from "@mantine/core"; import { IconChevronDown } from "@tabler/icons-react"; import { useTranslation } from "react-i18next"; @@ -26,14 +26,20 @@ interface ReasoningBlockProps { * Providers that don't stream reasoning TEXT still render this block from the * authoritative count alone (header only, empty body) so the cost is visible. */ -export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { +function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { const { t } = useTranslation(); const [open, setOpen] = useState(false); // Authoritative count wins; otherwise estimate live from the streamed text. const count = tokens && tokens > 0 ? tokens : estimateTokens(text); const trimmed = text.trim(); - const html = trimmed ? renderChatMarkdown(trimmed, {}) : ""; + // Memoize the markdown render so toggling `open` (or a parent re-render caused + // by an unrelated streamed delta) does not re-parse the reasoning text; it + // recomputes only when the reasoning text itself changes (while it streams in). + const html = useMemo( + () => (trimmed ? renderChatMarkdown(trimmed, {}) : ""), + [trimmed], + ); return ( @@ -81,3 +87,8 @@ export default function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { ); } + +// Memoized: re-renders only when `text`/`tokens` change (primitive props, default +// shallow compare), so a parent re-render during streaming of OTHER content does +// not re-run the markdown parse for an already-finalized reasoning block. +export default memo(ReasoningBlock);