diff --git a/.gitignore b/.gitignore
index cf440100..4eb9e6fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,6 +43,8 @@ lerna-debug.log*
.nx/cache
.claude/worktrees/
.claude/tmp/
+# Local Chrome performance traces recorded by the AI-chat perf harness
+.claude/perf-traces/
# TypeScript incremental build artifacts
*.tsbuildinfo
diff --git a/apps/client/perf/ai-chat-perf-main.tsx b/apps/client/perf/ai-chat-perf-main.tsx
new file mode 100644
index 00000000..0c75f68c
--- /dev/null
+++ b/apps/client/perf/ai-chat-perf-main.tsx
@@ -0,0 +1,50 @@
+/**
+ * DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at
+ * /perf/ai-chat-perf.html; never part of the production build, which uses the
+ * single default index.html entry).
+ *
+ * Mounts the minimal provider stack the real ChatThread needs (Mantine, router
+ * for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE
+ * React mounts so ChatThread's DefaultChatTransport requests to
+ * /api/ai-chat/stream are answered by the synthetic SSE generator.
+ */
+
+import "@mantine/core/styles.css";
+
+import ReactDOM from "react-dom/client";
+import { MantineProvider } from "@mantine/core";
+import { MemoryRouter } from "react-router-dom";
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import { mantineCssResolver, theme } from "../src/theme.ts";
+// i18n side-effect init (http-backend). Translations load from /locales in dev;
+// missing keys fall back to the key text, which is fine for the harness.
+import "../src/i18n.ts";
+import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts";
+import PerfHarness from "./harness.tsx";
+
+// MUST run before React mounts: ChatThread creates its transport with the
+// global fetch, so the patch has to be in place before the first send.
+installAiChatStreamFetchPatch();
+
+const queryClient = new QueryClient({
+ defaultOptions: {
+ queries: {
+ refetchOnMount: false,
+ refetchOnWindowFocus: false,
+ retry: false,
+ staleTime: 5 * 60 * 1000,
+ },
+ },
+});
+
+const container = document.getElementById("root") as HTMLElement;
+
+ReactDOM.createRoot(container).render(
+
+
+
+
+
+
+ ,
+);
diff --git a/apps/client/perf/ai-chat-perf.html b/apps/client/perf/ai-chat-perf.html
new file mode 100644
index 00000000..5509160b
--- /dev/null
+++ b/apps/client/perf/ai-chat-perf.html
@@ -0,0 +1,12 @@
+
+
+
+
+
+ AI chat perf harness
+
+
+
+
+
+
diff --git a/apps/client/perf/harness.tsx b/apps/client/perf/harness.tsx
new file mode 100644
index 00000000..32af237d
--- /dev/null
+++ b/apps/client/perf/harness.tsx
@@ -0,0 +1,390 @@
+/**
+ * DEV-ONLY perf harness UI for the AI chat feature.
+ *
+ * Left panel: controls + live stats. Right side: a bordered box (~real chat
+ * window size) hosting the REAL ChatThread component.
+ *
+ * Scenario A "Open existing chat": mount ChatThread seeded with a large
+ * persisted transcript and measure click -> post-mount-paint time.
+ * Scenario B "Live agent stream": mount an empty chat and auto-send a message;
+ * the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream
+ * through the real useChat pipeline.
+ */
+
+import { useEffect, useMemo, useRef, useState } from "react";
+import type { CSSProperties, MutableRefObject } from "react";
+import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx";
+import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
+import {
+ PRESETS,
+ buildPersistedRows,
+ buildTurnScript,
+ setLiveStreamSettings,
+ type PresetKey,
+} from "./synthetic-turn.ts";
+
+const AUTO_SEND_TEXT = "Run the synthetic perf turn";
+const AUTO_SEND_TIMEOUT_MS = 1000;
+/** Stats display refresh period — 2x/s so the display itself stays cheap. */
+const STATS_FLUSH_MS = 500;
+
+// ---------------------------------------------------------------------------
+// Shared mutable stats (written from callbacks, flushed to state at 2 Hz)
+// ---------------------------------------------------------------------------
+
+interface PerfStats {
+ longtaskCount: number;
+ longtaskTotalMs: number;
+ longtaskMaxMs: number;
+ fps: number;
+ sseChunks: number;
+ sseChars: number;
+ mountAMs: number | null;
+ streamState: "idle" | "streaming" | "done" | "aborted";
+}
+
+function emptyStats(): PerfStats {
+ return {
+ longtaskCount: 0,
+ longtaskTotalMs: 0,
+ longtaskMaxMs: 0,
+ fps: 0,
+ sseChunks: 0,
+ sseChars: 0,
+ mountAMs: null,
+ streamState: "idle",
+ };
+}
+
+/**
+ * Self-contained stats panel: owns the longtask observer, the FPS meter and the
+ * 2 Hz flush interval. Isolated in its OWN component so its periodic setState
+ * re-renders only this panel — NOT the ChatThread under measurement.
+ */
+function StatsPanel({ stats }: { stats: MutableRefObject }) {
+ const [snapshot, setSnapshot] = useState(() => ({ ...stats.current }));
+
+ // Long tasks (main-thread blocks > 50ms).
+ useEffect(() => {
+ let observer: PerformanceObserver | null = null;
+ try {
+ observer = new PerformanceObserver((list) => {
+ for (const entry of list.getEntries()) {
+ stats.current.longtaskCount += 1;
+ stats.current.longtaskTotalMs += entry.duration;
+ stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration);
+ }
+ });
+ observer.observe({ type: "longtask", buffered: true });
+ } catch {
+ // longtask entries unsupported in this browser — panel shows zeros.
+ }
+ return () => observer?.disconnect();
+ }, [stats]);
+
+ // FPS: frames rendered within the trailing 1s window.
+ useEffect(() => {
+ let raf = 0;
+ const frames: number[] = [];
+ const loop = (now: number) => {
+ frames.push(now);
+ while (frames.length > 0 && frames[0] <= now - 1000) frames.shift();
+ stats.current.fps = frames.length;
+ raf = requestAnimationFrame(loop);
+ };
+ raf = requestAnimationFrame(loop);
+ return () => cancelAnimationFrame(raf);
+ }, [stats]);
+
+ // Flush the mutable stats into the display at most 2x/s.
+ useEffect(() => {
+ const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS);
+ return () => window.clearInterval(id);
+ }, [stats]);
+
+ const resetLongtasks = () => {
+ stats.current.longtaskCount = 0;
+ stats.current.longtaskTotalMs = 0;
+ stats.current.longtaskMaxMs = 0;
+ setSnapshot({ ...stats.current });
+ };
+
+ const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 };
+ return (
+
+
Stats
+
FPS (1s){snapshot.fps}
+
Long tasks{snapshot.longtaskCount}
+
Long total{snapshot.longtaskTotalMs.toFixed(0)} ms
+
Long max{snapshot.longtaskMaxMs.toFixed(0)} ms
+
SSE chunks{snapshot.sseChunks}
+
SSE chars{snapshot.sseChars.toLocaleString()}
+
Stream{snapshot.streamState}
+
+ Mount A
+ {snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`}
+
+
+
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Auto-send (scenario B): drive the REAL composer in the mounted DOM
+// ---------------------------------------------------------------------------
+
+/**
+ * Fill the composer textarea via the native value setter + an `input` event
+ * (React 18 controlled-input pattern), then click the enabled "Send" button.
+ * Retried on rAF until the elements exist (ChatThread mounts asynchronously).
+ */
+function autoSend(host: HTMLElement, text: string): void {
+ const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS;
+
+ const tryClick = () => {
+ const button = host.querySelector('button[aria-label="Send"]');
+ if (button && !button.disabled) {
+ button.click();
+ return;
+ }
+ if (performance.now() < deadline) requestAnimationFrame(tryClick);
+ else console.error("[perf] auto-send: Send button never became clickable");
+ };
+
+ const trySetValue = () => {
+ const textarea = host.querySelector("textarea");
+ if (!textarea) {
+ if (performance.now() < deadline) requestAnimationFrame(trySetValue);
+ else console.error("[perf] auto-send: textarea not found");
+ return;
+ }
+ const setter = Object.getOwnPropertyDescriptor(
+ window.HTMLTextAreaElement.prototype,
+ "value",
+ )?.set;
+ setter?.call(textarea, text);
+ textarea.dispatchEvent(new Event("input", { bubbles: true }));
+ // Click on a later frame so React commits the controlled value (which
+ // enables the Send button) before we press it.
+ requestAnimationFrame(tryClick);
+ };
+
+ requestAnimationFrame(trySetValue);
+}
+
+// ---------------------------------------------------------------------------
+// Harness
+// ---------------------------------------------------------------------------
+
+interface MountState {
+ mode: "A" | "B";
+ key: number;
+ chatId: string | null;
+ rows: IAiChatMessageRow[];
+}
+
+const noop = (): void => {};
+
+export default function PerfHarness() {
+ const [preset, setPreset] = useState("20k");
+ const [intervalMs, setIntervalMs] = useState(15);
+ const [mounted, setMounted] = useState(null);
+ const [fixtureInfo, setFixtureInfo] = useState(null);
+
+ const statsRef = useRef(emptyStats());
+ const hostRef = useRef(null);
+ const keyCounterRef = useRef(0);
+ const mountStartRef = useRef(0);
+ const pendingMountMeasureRef = useRef(false);
+
+ // The scripted live turn for the current preset (reused across B runs; the
+ // script is immutable data, so rebuilding per run is unnecessary).
+ const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]);
+
+ const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []);
+
+ // Scenario A: mount ChatThread seeded with a large persisted transcript.
+ const handleMountA = () => {
+ const fixture = buildPersistedRows(PRESETS[preset]);
+ setFixtureInfo(
+ `Persisted fixture: ${fixture.rows.length} rows, ` +
+ `${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`,
+ );
+ statsRef.current.mountAMs = null;
+ // Mark AFTER fixture generation: we measure mount cost, not generation cost
+ // (production receives its rows from the network).
+ performance.mark("perf:mountA:start");
+ mountStartRef.current = performance.now();
+ pendingMountMeasureRef.current = true;
+ keyCounterRef.current += 1;
+ setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows });
+ };
+
+ // Measure scenario A: effect runs after the mount commit; double rAF lands
+ // after the first paint of the mounted transcript.
+ useEffect(() => {
+ if (!pendingMountMeasureRef.current) return;
+ pendingMountMeasureRef.current = false;
+ requestAnimationFrame(() => {
+ requestAnimationFrame(() => {
+ statsRef.current.mountAMs = performance.now() - mountStartRef.current;
+ performance.mark("perf:mountA:end");
+ try {
+ performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end");
+ } catch {
+ // Marks cleared mid-run — ignore.
+ }
+ });
+ });
+ }, [mounted]);
+
+ // Scenario B: mount an empty chat, arm the synthetic stream, auto-send.
+ const handleStartB = () => {
+ statsRef.current.sseChunks = 0;
+ statsRef.current.sseChars = 0;
+ statsRef.current.streamState = "streaming";
+ setLiveStreamSettings({
+ script: liveScript,
+ chunkIntervalMs: intervalMs,
+ onProgress: (chunks, chars) => {
+ statsRef.current.sseChunks = chunks;
+ statsRef.current.sseChars = chars;
+ },
+ onDone: () => {
+ statsRef.current.streamState = "done";
+ performance.mark("perf:streamB:end");
+ try {
+ performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end");
+ } catch {
+ // Start mark missing (e.g. marks cleared) — ignore.
+ }
+ },
+ onAbort: () => {
+ statsRef.current.streamState = "aborted";
+ },
+ });
+ performance.mark("perf:streamB:start");
+ keyCounterRef.current += 1;
+ setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] });
+ if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT);
+ };
+
+ const handleUnmount = () => setMounted(null);
+
+ const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" };
+ const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" };
+
+ return (
+
+
+ {/* Right: the real ChatThread inside a real-window-sized box */}
+
+
+ {mounted ? (
+
+ ) : (
+
+ ChatThread unmounted. Use the controls on the left.
+
+ )}
+
+
+
+ );
+}
diff --git a/apps/client/perf/synthetic-turn.ts b/apps/client/perf/synthetic-turn.ts
new file mode 100644
index 00000000..439a5ab0
--- /dev/null
+++ b/apps/client/perf/synthetic-turn.ts
@@ -0,0 +1,517 @@
+/**
+ * DEV-ONLY synthetic agent-turn generator for the AI chat perf harness.
+ *
+ * Produces one scripted agent turn (reasoning + tool calls + markdown answer)
+ * from a size config, and materializes it two ways:
+ * - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"),
+ * served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`;
+ * - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat").
+ *
+ * Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema`
+ * (strict objects — only the exact field names below are accepted).
+ */
+
+import type { UIMessage } from "@ai-sdk/react";
+import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
+
+// ---------------------------------------------------------------------------
+// Config / presets
+// ---------------------------------------------------------------------------
+
+/** 1 token ~= 4 chars — the approximation used throughout this module. */
+const CHARS_PER_TOKEN = 4;
+
+export interface TurnConfig {
+ /** Number of agent steps; each step = one reasoning block + one tool call. */
+ steps: number;
+ /** Approximate reasoning tokens generated per step. */
+ reasoningTokensPerStep: number;
+ /** Size of each tool call's output `content` filler, in bytes (ASCII). */
+ toolOutputBytes: number;
+ /** Approximate size of the final markdown answer, in tokens. */
+ answerTokens: number;
+}
+
+export type PresetKey = "5k" | "20k" | "50k";
+
+export const PRESETS: Record = {
+ "5k": {
+ steps: 3,
+ reasoningTokensPerStep: 500,
+ toolOutputBytes: 10_000,
+ answerTokens: 600,
+ },
+ "20k": {
+ steps: 6,
+ reasoningTokensPerStep: 2500,
+ toolOutputBytes: 20_000,
+ answerTokens: 1500,
+ },
+ "50k": {
+ steps: 10,
+ reasoningTokensPerStep: 4000,
+ toolOutputBytes: 40_000,
+ answerTokens: 3000,
+ },
+};
+
+// ---------------------------------------------------------------------------
+// Text generators
+// ---------------------------------------------------------------------------
+
+/** Mixed Russian/English prose sentences cycled to build reasoning text. */
+const REASONING_SENTENCES = [
+ "Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.",
+ "First I need to inspect the current page content to understand its overall structure.",
+ "Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.",
+ "The table in section three contains the migration matrix that I should cross-check against the summary.",
+ "Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.",
+ "Let me compare the numbers from the executive summary with the raw data in the appendix.",
+ "Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.",
+ "I should keep the page ids from the tool output so the final answer can cite the source pages.",
+ "Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.",
+ "The remaining sections look consistent, so I can move on to drafting the structured answer.",
+];
+
+/**
+ * Build realistic prose of ~`targetChars` characters, inserting a newline
+ * roughly every 200 characters (mirrors how reasoning text tends to wrap).
+ */
+function makeProse(targetChars: number): string {
+ const pieces: string[] = [];
+ let length = 0;
+ let sinceNewline = 0;
+ let i = 0;
+ while (length < targetChars) {
+ const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length];
+ i += 1;
+ pieces.push(sentence);
+ length += sentence.length + 1;
+ sinceNewline += sentence.length + 1;
+ if (sinceNewline >= 200) {
+ pieces.push("\n");
+ sinceNewline = 0;
+ } else {
+ pieces.push(" ");
+ }
+ }
+ return pieces.join("").trimEnd();
+}
+
+/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */
+function markdownSection(n: number): string {
+ return [
+ `## Section ${n}: migration analysis`,
+ ``,
+ `The workspace contains **${n * 12} pages** that still reference the legacy API. ` +
+ `Most of them live under [Perf test page](/p/page-1) and need the new transport. ` +
+ `Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`,
+ ``,
+ `- Update the fetch layer to the v6 transport`,
+ `- Перенести таблицы соответствия идентификаторов`,
+ `- Verify citation links after the move`,
+ `- Проверить отображение длинных ответов в узкой панели`,
+ ``,
+ `| Область | Страниц | Статус | Риск |`,
+ `| --- | --- | --- | --- |`,
+ `| API reference | ${n + 4} | migrated | low |`,
+ `| Onboarding | ${n + 2} | in progress | medium |`,
+ `| Release notes | ${n * 3} | pending | high |`,
+ ``,
+ "```ts",
+ `export function migrateSection${n}(rows: Row[]): Row[] {`,
+ ` return rows`,
+ ` .filter((row) => row.section === ${n})`,
+ ` .map((row) => ({ ...row, migrated: true }));`,
+ `}`,
+ "```",
+ ].join("\n");
+}
+
+/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */
+function makeMarkdownAnswer(targetChars: number): string {
+ const sections: string[] = [];
+ let length = 0;
+ let n = 1;
+ while (length < targetChars) {
+ const section = markdownSection(n);
+ sections.push(section);
+ length += section.length + 2;
+ n += 1;
+ }
+ return sections.join("\n\n");
+}
+
+/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */
+function makeFiller(bytes: number): string {
+ const unit = "Perf filler content for the synthetic getPage tool output. ";
+ return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes);
+}
+
+// ---------------------------------------------------------------------------
+// Turn script
+// ---------------------------------------------------------------------------
+
+export interface TurnToolCall {
+ toolCallId: string;
+ toolName: "getPage";
+ input: { pageId: string };
+ output: { id: string; title: string; content: string };
+}
+
+export interface TurnStep {
+ reasoningText: string;
+ tool: TurnToolCall;
+}
+
+export interface TurnScript {
+ steps: TurnStep[];
+ answerText: string;
+ /** Approximate reasoning tokens for the whole turn (chars / 4). */
+ reasoningTokens: number;
+ /** Approximate context size after this turn, in tokens. */
+ contextTokens: number;
+ maxContextTokens: number;
+ /** Actual generated visible chars: reasoning + tool outputs + answer. */
+ totalChars: number;
+ /** totalChars / 4, rounded. */
+ approxTokens: number;
+}
+
+/**
+ * Build the scripted agent turn for a config. `idPrefix` keeps tool call ids
+ * unique when several scripts coexist (e.g. 3 persisted turns in one chat).
+ */
+export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript {
+ const steps: TurnStep[] = [];
+ let reasoningChars = 0;
+ let toolChars = 0;
+ for (let i = 0; i < config.steps; i++) {
+ const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN);
+ const content = makeFiller(config.toolOutputBytes);
+ reasoningChars += reasoningText.length;
+ toolChars += content.length;
+ steps.push({
+ reasoningText,
+ tool: {
+ toolCallId: `${idPrefix}-call-${i + 1}`,
+ toolName: "getPage",
+ input: { pageId: "page-1" },
+ output: { id: "page-1", title: "Perf test page", content },
+ },
+ });
+ }
+ const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN);
+ const totalChars = reasoningChars + toolChars + answerText.length;
+ return {
+ steps,
+ answerText,
+ reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN),
+ contextTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+ maxContextTokens: 200_000,
+ totalChars,
+ approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+ };
+}
+
+// ---------------------------------------------------------------------------
+// Scenario A: persisted rows
+// ---------------------------------------------------------------------------
+
+/** Number of user+assistant pairs the preset is split across for history. */
+const HISTORY_TURNS = 3;
+
+const USER_PROMPTS = [
+ "Проанализируй документ и выдели ключевые тезисы по каждому разделу.",
+ "Now cross-check the migration matrix against the summary and list every mismatch.",
+ "Собери финальный план миграции с оценкой рисков по каждой области.",
+];
+
+/** Persisted UIMessage parts for one finished assistant turn. */
+function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] {
+ const parts: unknown[] = [];
+ for (const step of script.steps) {
+ parts.push({ type: "reasoning", text: step.reasoningText, state: "done" });
+ parts.push({
+ type: `tool-${step.tool.toolName}`,
+ toolCallId: step.tool.toolCallId,
+ state: "output-available",
+ input: step.tool.input,
+ output: step.tool.output,
+ });
+ }
+ parts.push({ type: "text", text: script.answerText, state: "done" });
+ return parts as UIMessage["parts"];
+}
+
+export interface PersistedFixture {
+ rows: IAiChatMessageRow[];
+ totalChars: number;
+ approxTokens: number;
+}
+
+/**
+ * Materialize the preset as a finished 3-turn transcript: user row + assistant
+ * row per turn, with the preset's steps/answer split across the assistant turns.
+ * Approximate accounting — the actual totals are reported back for display.
+ */
+export function buildPersistedRows(config: TurnConfig): PersistedFixture {
+ const rows: IAiChatMessageRow[] = [];
+ const baseTime = Date.now() - HISTORY_TURNS * 60_000;
+ let totalChars = 0;
+
+ for (let t = 0; t < HISTORY_TURNS; t++) {
+ // Distribute steps as evenly as possible (earlier turns get the remainder).
+ const stepsForTurn =
+ Math.floor(config.steps / HISTORY_TURNS) +
+ (t < config.steps % HISTORY_TURNS ? 1 : 0);
+ const turnConfig: TurnConfig = {
+ steps: Math.max(1, stepsForTurn),
+ reasoningTokensPerStep: config.reasoningTokensPerStep,
+ toolOutputBytes: config.toolOutputBytes,
+ answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)),
+ };
+ const script = buildTurnScript(turnConfig, `hist-${t + 1}`);
+ totalChars += script.totalChars;
+
+ const userText = USER_PROMPTS[t % USER_PROMPTS.length];
+ rows.push({
+ id: `perf-row-u${t + 1}`,
+ role: "user",
+ content: userText,
+ metadata: null,
+ createdAt: new Date(baseTime + t * 60_000).toISOString(),
+ });
+ rows.push({
+ id: `perf-row-a${t + 1}`,
+ role: "assistant",
+ content: script.answerText,
+ metadata: {
+ parts: scriptToPersistedParts(script),
+ usage: { reasoningTokens: script.reasoningTokens },
+ contextTokens: script.contextTokens,
+ maxContextTokens: script.maxContextTokens,
+ finishReason: "stop",
+ },
+ createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(),
+ });
+ }
+
+ return {
+ rows,
+ totalChars,
+ approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+ };
+}
+
+// ---------------------------------------------------------------------------
+// Scenario B: SSE stream
+// ---------------------------------------------------------------------------
+
+/** Streaming delta size in chars (reasoning/answer text is split into these). */
+const DELTA_CHARS = 200;
+
+function splitDeltas(text: string, size = DELTA_CHARS): string[] {
+ const deltas: string[] = [];
+ for (let i = 0; i < text.length; i += size) {
+ deltas.push(text.slice(i, i + size));
+ }
+ return deltas;
+}
+
+/** One pre-serialized SSE frame plus its visible-char contribution for stats. */
+interface SseFrame {
+ data: string;
+ chars: number;
+}
+
+function frame(chunk: Record, chars = 0): SseFrame {
+ return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars };
+}
+
+/**
+ * Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames
+ * (excluding the final `data: [DONE]` terminator, appended by the pump).
+ */
+function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] {
+ const frames: SseFrame[] = [];
+ frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } }));
+
+ script.steps.forEach((step, i) => {
+ frames.push(frame({ type: "start-step" }));
+ const reasoningId = `${messageId}-r${i + 1}`;
+ frames.push(frame({ type: "reasoning-start", id: reasoningId }));
+ for (const delta of splitDeltas(step.reasoningText)) {
+ frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length));
+ }
+ frames.push(frame({ type: "reasoning-end", id: reasoningId }));
+
+ const { toolCallId, toolName, input, output } = step.tool;
+ frames.push(frame({ type: "tool-input-start", toolCallId, toolName }));
+ frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input }));
+ // The tool result arrives as ONE chunk, like the real server sends it.
+ frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length));
+ frames.push(frame({ type: "finish-step" }));
+ });
+
+ // Final step: the markdown answer.
+ frames.push(frame({ type: "start-step" }));
+ const textId = `${messageId}-answer`;
+ frames.push(frame({ type: "text-start", id: textId }));
+ for (const delta of splitDeltas(script.answerText)) {
+ frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length));
+ }
+ frames.push(frame({ type: "text-end", id: textId }));
+ frames.push(frame({ type: "finish-step" }));
+
+ frames.push(
+ frame({
+ type: "finish",
+ messageMetadata: {
+ usage: { reasoningTokens: script.reasoningTokens },
+ contextTokens: script.contextTokens,
+ maxContextTokens: script.maxContextTokens,
+ finishReason: "stop",
+ },
+ }),
+ );
+ return frames;
+}
+
+export interface LiveStreamSettings {
+ script: TurnScript;
+ /** Delay between SSE chunks (one chunk per tick). */
+ chunkIntervalMs: number;
+ /** Progress callback: cumulative emitted chunk count and visible chars. */
+ onProgress?: (chunks: number, chars: number) => void;
+ /** Fired once after the `[DONE]` terminator is enqueued. */
+ onDone?: () => void;
+ /** Fired if the client aborted the stream (Stop button). */
+ onAbort?: () => void;
+}
+
+/**
+ * Build a synthetic SSE Response streaming the scripted turn, one chunk every
+ * `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works.
+ */
+export function buildSseResponse(
+ settings: LiveStreamSettings,
+ signal?: AbortSignal | null,
+): Response {
+ const messageId = `m-live-${Date.now()}`;
+ const frames = buildSseFrames(settings.script, messageId, "perf-chat");
+ const encoder = new TextEncoder();
+ let index = 0;
+ let emittedChars = 0;
+ let timer: number | undefined;
+
+ const stream = new ReadableStream({
+ start(controller) {
+ const stopPump = () => {
+ if (timer !== undefined) {
+ clearTimeout(timer);
+ timer = undefined;
+ }
+ };
+ const pump = () => {
+ timer = undefined;
+ if (signal?.aborted) {
+ stopPump();
+ try {
+ controller.close();
+ } catch {
+ // Already closed/cancelled — nothing to do.
+ }
+ return;
+ }
+ if (index >= frames.length) {
+ try {
+ controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+ controller.close();
+ } catch {
+ // Cancelled mid-flight.
+ }
+ settings.onDone?.();
+ return;
+ }
+ const next = frames[index];
+ index += 1;
+ try {
+ controller.enqueue(encoder.encode(next.data));
+ } catch {
+ stopPump();
+ return;
+ }
+ emittedChars += next.chars;
+ settings.onProgress?.(index, emittedChars);
+ timer = window.setTimeout(pump, settings.chunkIntervalMs);
+ };
+ signal?.addEventListener(
+ "abort",
+ () => {
+ stopPump();
+ try {
+ controller.close();
+ } catch {
+ // Reader already cancelled.
+ }
+ settings.onAbort?.();
+ },
+ { once: true },
+ );
+ timer = window.setTimeout(pump, settings.chunkIntervalMs);
+ },
+ cancel() {
+ if (timer !== undefined) {
+ clearTimeout(timer);
+ timer = undefined;
+ }
+ },
+ });
+
+ return new Response(stream, {
+ status: 200,
+ headers: {
+ "content-type": "text/event-stream",
+ "cache-control": "no-cache",
+ "x-vercel-ai-ui-message-stream": "v1",
+ },
+ });
+}
+
+// ---------------------------------------------------------------------------
+// window.fetch patch
+// ---------------------------------------------------------------------------
+
+let currentLiveSettings: LiveStreamSettings | null = null;
+
+/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */
+export function setLiveStreamSettings(settings: LiveStreamSettings): void {
+ currentLiveSettings = settings;
+}
+
+/**
+ * Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream`
+ * get the synthetic SSE Response; everything else passes through untouched.
+ */
+export function installAiChatStreamFetchPatch(): void {
+ const originalFetch = window.fetch.bind(window);
+ window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise => {
+ const url =
+ typeof input === "string"
+ ? input
+ : input instanceof URL
+ ? input.href
+ : input.url;
+ if (url.includes("/api/ai-chat/stream")) {
+ const settings = currentLiveSettings;
+ if (!settings) {
+ return Promise.resolve(
+ new Response("perf harness: no live stream configured", { status: 500 }),
+ );
+ }
+ return Promise.resolve(buildSseResponse(settings, init?.signal ?? null));
+ }
+ return originalFetch(input, init);
+ };
+}
diff --git a/apps/client/src/features/ai-chat/components/ai-chat.module.css b/apps/client/src/features/ai-chat/components/ai-chat.module.css
index cd788cdd..7b99178c 100644
--- a/apps/client/src/features/ai-chat/components/ai-chat.module.css
+++ b/apps/client/src/features/ai-chat/components/ai-chat.module.css
@@ -164,8 +164,8 @@
/* NOTE: `white-space: pre-wrap` is intentionally NOT set here. On the
rendered markdown
it would turn the newlines between block tags
(\n
, \n) into visible blank lines/indents on top of the
- margins. The plain-text fallback that needs pre-wrap sets it
- inline itself (see reasoning-block.tsx). */
+ margins. The streaming plain-text path that needs pre-wrap sets it
+ per chunk instead, in PlainChunk (see streaming-plain-text.tsx). */
}
.reasoningText p {
diff --git a/apps/client/src/features/ai-chat/components/message-item.test.ts b/apps/client/src/features/ai-chat/components/message-item.test.ts
index b5b6d96a..b8d9474e 100644
--- a/apps/client/src/features/ai-chat/components/message-item.test.ts
+++ b/apps/client/src/features/ai-chat/components/message-item.test.ts
@@ -65,6 +65,25 @@ describe("arePropsEqual", () => {
expect(arePropsEqual(props(m), props(m))).toBe(true);
});
+ // REGRESSION (stranded reasoning part): a reasoning part is left at
+ // `state:"streaming"` forever when the turn ends without `reasoning-end`
+ // (manual Stop during thinking). The signature is EQUAL across that turn-end
+ // flip (nothing in the message changed), so the comparator must ALSO compare
+ // `turnStreaming` — otherwise the memo swallows the flip and ReasoningBlock
+ // never switches from chunked plain text to its one-time markdown parse.
+ it("returns false when turnStreaming differs despite an equal signature", () => {
+ const m = msg([
+ { type: "reasoning", text: "thinking", state: "streaming" },
+ { type: "text", text: "answer" },
+ ]);
+ expect(
+ arePropsEqual(
+ props(m, { turnStreaming: true }),
+ props(m, { turnStreaming: false }),
+ ),
+ ).toBe(false);
+ });
+
it("returns true for the same content in a different message object", () => {
const a = msg([{ type: "text", text: "answer" }]);
const b = msg([{ type: "text", text: "answer" }]);
diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx
index 46c25af2..4e645d8a 100644
--- a/apps/client/src/features/ai-chat/components/message-item.tsx
+++ b/apps/client/src/features/ai-chat/components/message-item.tsx
@@ -52,6 +52,20 @@ interface MessageItemProps {
* absent; the public share passes the configured identity (agent role) name.
*/
assistantName?: string;
+ /**
+ * Whether the WHOLE turn is still streaming (MessageList's `isStreaming`).
+ * A reasoning part may be left `state: "streaming"` forever when the turn
+ * ends without a `reasoning-end` chunk (manual Stop during the thinking
+ * phase, or a provider that never emits it) — the AI SDK finalizes reasoning
+ * state ONLY on `reasoning-end`, not on `finish-step`/`finish`. So part-level
+ * state alone cannot prove liveness; the reasoning part is treated as live
+ * only while the whole turn is still streaming. Defaults to false.
+ *
+ * The parent passes it as "turn is live AND this is the tail row", so a
+ * stranded part in an EARLIER row never re-activates when a later turn
+ * streams.
+ */
+ turnStreaming?: boolean;
}
/**
@@ -105,6 +119,7 @@ function MessageItem({
showCitations = true,
neutralizeInternalLinks = false,
assistantName,
+ turnStreaming = false,
}: MessageItemProps) {
// `signature` is intentionally not read in the body — it exists solely as the
// memo key (see arePropsEqual). The render reads `message` directly.
@@ -155,8 +170,23 @@ function MessageItem({
const text = (part as { text?: string }).text ?? "";
if (!text.trim() && !(reasoningTokens && reasoningTokens > 0))
return null;
+ // Absent state (persisted rows) and "done" both mean finalized.
+ // `messageSignature` already includes each part's `state`, so the
+ // streaming→done flip changes the row signature and re-renders this
+ // row — which is what lets ReasoningBlock switch from chunked plain
+ // text to its one-time markdown parse (see reasoning-block.tsx).
+ // ALSO require the turn to be live: a part stranded at
+ // `state:"streaming"` after the turn ended (no `reasoning-end` — see
+ // the `turnStreaming` prop doc) must still finalize and parse.
+ const streaming =
+ turnStreaming && (part as { state?: string }).state === "streaming";
return (
-
+
);
}
@@ -245,7 +275,11 @@ export function arePropsEqual(
prev.signature === next.signature &&
prev.showCitations === next.showCitations &&
prev.neutralizeInternalLinks === next.neutralizeInternalLinks &&
- prev.assistantName === next.assistantName
+ prev.assistantName === next.assistantName &&
+ // The turn-end flip re-renders every row once (cheap, terminal event) —
+ // that is what converts a stranded `state:"streaming"` reasoning part to
+ // its one-time markdown parse (see the `turnStreaming` prop doc).
+ prev.turnStreaming === next.turnStreaming
);
}
diff --git a/apps/client/src/features/ai-chat/components/message-list.test.tsx b/apps/client/src/features/ai-chat/components/message-list.test.tsx
index b19470a0..20987a4a 100644
--- a/apps/client/src/features/ai-chat/components/message-list.test.tsx
+++ b/apps/client/src/features/ai-chat/components/message-list.test.tsx
@@ -1,5 +1,5 @@
import { describe, expect, it, vi } from "vitest";
-import { render } from "@testing-library/react";
+import { fireEvent, render } from "@testing-library/react";
import { MantineProvider } from "@mantine/core";
import type { UIMessage } from "@ai-sdk/react";
@@ -50,8 +50,9 @@ vi.stubGlobal(
// One assistant message wrapping the given `parts`. Reused across renders in the
// regression test to model how the AI SDK hands back the SAME message object.
-const msg = (parts: UIMessage["parts"]): UIMessage =>
- ({ id: "m1", role: "assistant", parts }) as UIMessage;
+// Pass an explicit `id` when a test renders several rows at once.
+const msg = (parts: UIMessage["parts"], id = "m1"): UIMessage =>
+ ({ id, role: "assistant", parts }) as UIMessage;
describe("MessageList", () => {
it("wires the real MessageItem and supplies a valid signature end-to-end", () => {
@@ -116,4 +117,102 @@ describe("MessageList", () => {
renderChatMarkdownSpy.mock.calls.some((c) => c[0] === "streamed answer"),
).toBe(true);
});
+
+ // REGRESSION (stranded reasoning part): the AI SDK sets a reasoning part's
+ // state to "done" ONLY on the `reasoning-end` chunk — `finish-step`/`finish`
+ // do NOT finalize it. A manual Stop during the thinking phase (or a provider
+ // that never emits `reasoning-end`) therefore leaves the part at
+ // `state:"streaming"` forever. MessageItem must derive ReasoningBlock's
+ // `streaming` from part state AND turn liveness (MessageList's `isStreaming`,
+ // forwarded as `turnStreaming`): while the turn streams the expanded block
+ // shows chunked plain text (no parse); once the turn ends — even though the
+ // part is still `state:"streaming"` — the block finalizes and does its
+ // one-time markdown parse. Note the message signature does NOT change across
+ // that flip, so this also exercises the `turnStreaming` memo comparison in
+ // arePropsEqual (without it the row would never re-render).
+ it("finalizes a reasoning part stranded at state:'streaming' when the turn ends", () => {
+ renderChatMarkdownSpy.mockClear();
+ const reasoningText = "**bold** thinking";
+ // Reasoning part stranded mid-stream + a non-empty answer part (a
+ // reasoning-only message renders nothing — see message-content.ts).
+ const message = msg([
+ { type: "reasoning", text: reasoningText, state: "streaming" },
+ { type: "text", text: "partial answer" },
+ ]);
+ const parsesOfReasoning = () =>
+ renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText)
+ .length;
+
+ const { rerender, getByRole, queryByText } = render(
+
+
+ ,
+ );
+ // Expand the reasoning block (its toggle is the only button in the list).
+ fireEvent.click(getByRole("button"));
+ // Turn live + part streaming -> ReasoningBlock received streaming=true:
+ // the body is chunked plain text (raw markdown syntax), NOT parsed.
+ expect(queryByText(/bold/)).not.toBeNull();
+ expect(parsesOfReasoning()).toBe(0);
+
+ // The turn ends WITHOUT `reasoning-end`: the part object is untouched
+ // (still state:"streaming"), only the turn-level flag flips.
+ rerender(
+
+
+ ,
+ );
+ // ReasoningBlock now received streaming=false and did its one-time parse.
+ expect(parsesOfReasoning()).toBe(1);
+ });
+
+ // REGRESSION (turn-global liveness leaking into earlier rows): `isStreaming`
+ // is turn-global, so forwarding it to EVERY row would re-mark a reasoning
+ // part stranded at `state:"streaming"` in a PREVIOUS message (see the test
+ // above) as live again whenever a LATER turn streams — an expanded stranded
+ // block would flip markdown -> raw plain text -> markdown across turn
+ // boundaries, re-parsing each time. MessageList must gate `turnStreaming`
+ // to the TAIL row only.
+ it("keeps a stranded reasoning part in an earlier message finalized while a later turn streams", () => {
+ renderChatMarkdownSpy.mockClear();
+ const reasoningText = "**bold** thinking";
+ // First (earlier) assistant message: its turn was stopped during the
+ // thinking phase, leaving the reasoning part at state:"streaming".
+ const first = msg(
+ [
+ { type: "reasoning", text: reasoningText, state: "streaming" },
+ { type: "text", text: "first answer" },
+ ],
+ "m1",
+ );
+ // Second assistant message: the LATER turn, currently streaming.
+ const second = msg([{ type: "text", text: "second answer" }], "m2");
+ const parsesOfReasoning = () =>
+ renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText)
+ .length;
+
+ const { rerender, getByRole, queryByText } = render(
+
+
+ ,
+ );
+ // Expand the first row's reasoning block (the only toggle in the list —
+ // the second message has no reasoning or tool parts).
+ fireEvent.click(getByRole("button"));
+ // The turn is live but the first row is NOT the tail: its ReasoningBlock
+ // received streaming=false, so the stranded part stays finalized and does
+ // its one-time markdown parse instead of dropping to chunked plain text.
+ expect(queryByText(/bold/)).not.toBeNull();
+ expect(parsesOfReasoning()).toBe(1);
+
+ // A later-turn delta re-renders the list; the earlier block must neither
+ // flip back to streaming nor re-parse.
+ (second.parts[0] as { text: string }).text = "second answer grows";
+ rerender(
+
+
+ ,
+ );
+ expect(parsesOfReasoning()).toBe(1);
+ });
});
diff --git a/apps/client/src/features/ai-chat/components/message-list.tsx b/apps/client/src/features/ai-chat/components/message-list.tsx
index 2cb2183c..25435aa5 100644
--- a/apps/client/src/features/ai-chat/components/message-list.tsx
+++ b/apps/client/src/features/ai-chat/components/message-list.tsx
@@ -196,7 +196,7 @@ export default function MessageList({
return (
- {messages.map((message) => (
+ {messages.map((message, index) => (
// `signature` is snapshotted HERE (parent render) into an immutable
// string and handed to MessageItem as its memo key. It must NOT be
// recomputed inside MessageItem's arePropsEqual: the AI SDK mutates the
@@ -210,6 +210,13 @@ export default function MessageList({
showCitations={showCitations}
neutralizeInternalLinks={neutralizeInternalLinks}
assistantName={assistantName}
+ // Turn-level liveness, gated to the TAIL row: only the tail message
+ // can belong to the in-flight turn, so a reasoning part stranded at
+ // `state:"streaming"` in an EARLIER message (its turn ended without
+ // `reasoning-end`) stays finalized and doesn't flip back to plain
+ // text (and re-parse) whenever a later turn streams — see
+ // message-item.tsx.
+ turnStreaming={isStreaming && index === messages.length - 1}
/>
))}
{typing && (
diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx
index ca3443fc..5754821d 100644
--- a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx
+++ b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx
@@ -28,7 +28,11 @@ import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts.
-function renderBlock(props: { text: string; tokens?: number }) {
+function renderBlock(props: {
+ text: string;
+ tokens?: number;
+ streaming?: boolean;
+}) {
return render(
@@ -84,4 +88,54 @@ describe("ReasoningBlock", () => {
fireEvent.click(screen.getByRole("button"));
expect(renderSpy).toHaveBeenCalledTimes(1);
});
+
+ it("does not parse while expanded and STREAMING; shows chunked plain text", () => {
+ const renderSpy = vi.mocked(renderChatMarkdown);
+ renderSpy.mockClear();
+ renderBlock({
+ text: "первый абзац размышлений\n\nвторой абзац растёт",
+ tokens: 5,
+ streaming: true,
+ });
+ fireEvent.click(screen.getByRole("button"));
+ // Expanded + still streaming: NO markdown parse and NO innerHTML swaps per
+ // delta — the body is chunked plain text (only the tail chunk updates).
+ // This is the O(n²) hole #302 left open (Safari whole-tab freeze).
+ expect(renderSpy).not.toHaveBeenCalled();
+ // Both paragraph chunks' raw text is present in the body.
+ expect(screen.getByText(/первый абзац размышлений/)).toBeDefined();
+ expect(screen.getByText(/второй абзац растёт/)).toBeDefined();
+ });
+
+ it("parses exactly once when streaming flips to done while expanded", () => {
+ const renderSpy = vi.mocked(renderChatMarkdown);
+ renderSpy.mockClear();
+ const { rerender } = renderBlock({
+ text: "**bold** reasoning",
+ tokens: 5,
+ streaming: true,
+ });
+ fireEvent.click(screen.getByRole("button"));
+ expect(renderSpy).not.toHaveBeenCalled();
+
+ // Finalization: the part's state flips streaming→done, the parent
+ // re-renders the row (the flip changes the message signature), and the
+ // block does its ONE markdown parse of the now-stable text.
+ rerender(
+
+
+ ,
+ );
+ expect(renderSpy).toHaveBeenCalledTimes(1);
+ // The parsed html branch rendered (the mock wraps the input in
…
).
+ expect(screen.getByText(/reasoning/)).toBeDefined();
+
+ // Further re-renders with unchanged props do not re-parse.
+ rerender(
+
+
+ ,
+ );
+ expect(renderSpy).toHaveBeenCalledTimes(1);
+ });
});
diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
index 25cc7459..8156730d 100644
--- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx
+++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx
@@ -5,6 +5,7 @@ import { useTranslation } from "react-i18next";
import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts";
import { collapseBlankLines } from "@/features/ai-chat/utils/collapse-blank-lines.ts";
import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts";
+import { StreamingPlainText } from "@/features/ai-chat/components/streaming-plain-text.tsx";
import classes from "@/features/ai-chat/components/ai-chat.module.css";
interface ReasoningBlockProps {
@@ -15,6 +16,10 @@ interface ReasoningBlockProps {
* step/turn has finished. When absent (or 0) the count is estimated from the
* text length so it ticks live as the reasoning streams in. */
tokens?: number;
+ /** True while the reasoning part is still streaming (part `state ===
+ * "streaming"`). False means finalized: persisted history or `state ===
+ * "done"`. Gates the markdown parse — see the invariant on the memo below. */
+ streaming?: boolean;
}
/**
@@ -27,26 +32,30 @@ interface ReasoningBlockProps {
* Providers that don't stream reasoning TEXT still render this block from the
* authoritative count alone (header only, empty body) so the cost is visible.
*/
-function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
+function ReasoningBlock({ text, tokens, streaming = false }: ReasoningBlockProps) {
const { t } = useTranslation();
const [open, setOpen] = useState(false);
// Authoritative count wins; otherwise estimate live from the streamed text.
const count = tokens && tokens > 0 ? tokens : estimateTokens(text);
const trimmed = text.trim();
- // Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the
- // default and the common case during a long "thinking" stream: reasoning text
- // streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only
- // memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta
- // — an O(n²) storm that pins the main thread and freezes the chat, all for a block
- // the user isn't even looking at (the html is only shown inside
- // below). Gating on `open` skips that hidden parsing entirely; expanding parses the
- // current text once (an instant, user-initiated click), and further streaming while
- // open is the normal per-delta append render, like the answer.
+ // Markdown parse invariant (per throttled ~20Hz stream delta the text GROWS):
+ // 1. Collapsed -> never parse (#302): the html is only shown inside
+ // , so parsing for a hidden body would be an O(n²)
+ // marked + DOMPurify storm.
+ // 2. Expanded + STREAMING -> no parse and no innerHTML swaps either: the body
+ // renders as chunked plain text (StreamingPlainText) with a memoized
+ // stable prefix, so each delta updates only the tail chunk's text node.
+ // This closes the O(n²) hole #302 left open ("expanded while streaming")
+ // that froze the whole tab in Safari when watching the thinking stream.
+ // 3. Finalized + expanded -> exactly one parse: `trimmed` and `streaming`
+ // are stable after the part is done, so this memo runs once per expand.
const html = useMemo(
() =>
- open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "",
- [open, trimmed],
+ open && trimmed && !streaming
+ ? renderChatMarkdown(collapseBlankLines(trimmed), {})
+ : "",
+ [open, trimmed, streaming],
);
return (
@@ -83,12 +92,12 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
dangerouslySetInnerHTML={{ __html: html }}
/>
) : (
-
- {trimmed}
-
+ // Still streaming (or markdown yielded nothing): chunked plain text.
+ // The wrapper carries the reasoningText styling; each chunk sets its
+ // own pre-wrap inline (NOT on this div — see ai-chat.module.css).
+
+
+
)}
)}
@@ -96,7 +105,7 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) {
);
}
-// Memoized: re-renders only when `text`/`tokens` change (primitive props, default
-// shallow compare), so a parent re-render during streaming of OTHER content does
-// not re-run the markdown parse for an already-finalized reasoning block.
+// Memoized: re-renders only when `text`/`tokens`/`streaming` change (primitive
+// props, default shallow compare), so a parent re-render during streaming of OTHER
+// content does not re-run the markdown parse for an already-finalized reasoning block.
export default memo(ReasoningBlock);
diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx
new file mode 100644
index 00000000..3f6876bf
--- /dev/null
+++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx
@@ -0,0 +1,146 @@
+import { describe, it, expect } from "vitest";
+import { render } from "@testing-library/react";
+
+import {
+ splitPlainChunks,
+ StreamingPlainText,
+} from "./streaming-plain-text";
+
+describe("splitPlainChunks", () => {
+ // THE load-bearing property (see the invariant comment in the module): under
+ // append-only growth, every chunk except the LAST must be byte-identical
+ // between successive calls, so the memoized chunk components never re-render
+ // for the stable prefix and each stream delta touches only the tail chunk.
+ it("keeps all non-last chunks byte-identical across append-only growth", () => {
+ // A simulated reasoning stream covering: appends inside the last paragraph,
+ // appends that ADD new blank lines, growth of a trailing newline run, and a
+ // trailing separator later followed by text.
+ const steps = [
+ "Пер",
+ "Первый абзац",
+ "Первый абзац\n",
+ "Первый абзац\n\n",
+ "Первый абзац\n\n\n",
+ "Первый абзац\n\n\nВторой",
+ "Первый абзац\n\n\nВторой абзац растёт",
+ "Первый абзац\n\n\nВторой абзац растёт\n\nТретий",
+ "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\n",
+ "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\nЧетвёртый",
+ ];
+ let prev: string[] = [];
+ for (const text of steps) {
+ const next = splitPlainChunks(text);
+ // Lossless: chunks always reassemble into the exact input.
+ expect(next.join("")).toBe(text);
+ // Chunk count never shrinks (boundaries never disappear).
+ expect(next.length).toBeGreaterThanOrEqual(prev.length);
+ // Every previously-FINAL chunk (all but prev's last) is unchanged.
+ for (let i = 0; i < prev.length - 1; i++) {
+ expect(next[i]).toBe(prev[i]);
+ }
+ prev = next;
+ }
+ // Guard against a vacuous pass: the final split must be multi-chunk.
+ expect(prev.length).toBeGreaterThanOrEqual(4);
+ });
+
+ it("attaches the blank-line separator run to the preceding chunk", () => {
+ expect(splitPlainChunks("a\n\nb")).toEqual(["a\n\n", "b"]);
+ // A longer run is ONE separator, not several boundaries.
+ expect(splitPlainChunks("a\n\n\n\nb")).toEqual(["a\n\n\n\n", "b"]);
+ expect(splitPlainChunks("a\n\nb\n\n\nc")).toEqual(["a\n\n", "b\n\n\n", "c"]);
+ });
+
+ it("single newlines are not boundaries", () => {
+ expect(splitPlainChunks("a\nb\nc")).toEqual(["a\nb\nc"]);
+ });
+
+ // INTENTIONAL: CRLF blank lines are NOT boundaries (the regex is `\n{2,}`
+ // only). Supporting `(?:\r?\n){2,}` would break the stable-prefix invariant:
+ // a lone trailing `\r` is not a boundary, but a later-appended `\n` would
+ // merge with it into a new separator unit and retroactively create a boundary
+ // INSIDE previously-emitted text, moving old chunk edges. So CRLF input stays
+ // in one (still lossless) chunk — only granularity is coarser; LLM output is
+ // `\n` in practice. See the doc comment on splitPlainChunks.
+ it("keeps CRLF blank lines inside one chunk", () => {
+ expect(splitPlainChunks("a\r\n\r\nb")).toEqual(["a\r\n\r\nb"]);
+ // Mixed input: only pure-`\n` runs split.
+ expect(splitPlainChunks("a\r\n\r\nb\n\nc")).toEqual(["a\r\n\r\nb\n\n", "c"]);
+ });
+
+ it("never emits empty phantom chunks (multi-blank-line / trailing newlines)", () => {
+ expect(splitPlainChunks("")).toEqual([]);
+ // A trailing newline run stays inside the last chunk (it may still grow).
+ expect(splitPlainChunks("a\n")).toEqual(["a\n"]);
+ expect(splitPlainChunks("a\n\n")).toEqual(["a\n\n"]);
+ expect(splitPlainChunks("a\n\nb\n\n")).toEqual(["a\n\n", "b\n\n"]);
+ // Degenerate all-newlines input is a single deterministic chunk.
+ expect(splitPlainChunks("\n\n\n")).toEqual(["\n\n\n"]);
+ for (const text of ["a\n\n\nb\n\n", "x\n\n\n\n\ny\n\nz\n"]) {
+ for (const chunk of splitPlainChunks(text)) {
+ expect(chunk.length).toBeGreaterThan(0);
+ }
+ }
+ });
+});
+
+describe("StreamingPlainText", () => {
+ it("renders one block per chunk, stripping trailing separator newlines at display time", () => {
+ const text = "первый абзац\n\nвторой абзац\n\n\nтретий";
+ const { container } = render();
+ const blocks = Array.from(container.querySelectorAll("div"));
+ // One block element per chunk.
+ expect(blocks.length).toBe(splitPlainChunks(text).length);
+ // DISPLAY-ONLY strip: each rendered block drops its chunk's trailing
+ // separator newlines — rendering them inside a pre-wrap block would add an
+ // empty line ON TOP of the block break (a doubled gap). The RAW chunks
+ // keep their separators (losslessness is asserted on splitPlainChunks
+ // above); multi-blank-line runs collapse to one uniform gap, consistent
+ // with collapseBlankLines on the finalized markdown path.
+ expect(blocks.map((b) => b.textContent)).toEqual([
+ "первый абзац",
+ "второй абзац",
+ "третий",
+ ]);
+ // The uniform paragraph gap comes from the block margin instead (matches
+ // the `.reasoningText p { margin: 0 0 4px }` rhythm of the markdown path).
+ for (const block of blocks) {
+ expect((block as HTMLElement).style.marginBottom).toBe("4px");
+ }
+ });
+
+ it("keeps interior newlines intact — only the trailing run is stripped", () => {
+ const text = "строка один\nстрока два\n\nхвост";
+ const { container } = render();
+ const blocks = Array.from(container.querySelectorAll("div"));
+ expect(blocks.map((b) => b.textContent)).toEqual([
+ "строка один\nстрока два",
+ "хвост",
+ ]);
+ });
+
+ // SECURITY INVARIANT — the load-bearing property of the streaming path: the
+ // reasoning text is raw, untrusted model output rendered WITHOUT a sanitizer
+ // (no marked/DOMPurify, no innerHTML). PlainChunk emits it as a React text
+ // node, which escapes it, so HTML in the model output is inert. This test
+ // pins that the path is a TEXT sink, not an HTML sink: a future change to
+ // `dangerouslySetInnerHTML` (reintroducing XSS) MUST fail here.
+ //
+ // The existing tests assert via textContent, which strips tags and so cannot
+ // distinguish an escaped literal from injected DOM. This one asserts on the
+ // parsed DOM directly: if the markup were injected as HTML, the /
+ // would become real elements and querySelector would find them.
+ it("renders HTML-like reasoning as an escaped literal, never as injected DOM", () => {
+ const text = "\n\nhi";
+ const { container } = render();
+ // No DOM elements were created from the payload — it was NOT parsed as HTML.
+ expect(container.querySelector("img")).toBeNull();
+ expect(container.querySelector("b")).toBeNull();
+ // The raw markup survived verbatim as text (proving it is escaped, not
+ // interpreted). textContent alone can't prove this, but combined with the
+ // querySelector assertions above it does: the literals are present AND no
+ // elements exist.
+ expect(container.textContent).toContain("hi");
+ expect(container.textContent).toContain("");
+ });
+});
diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx
new file mode 100644
index 00000000..bc72d790
--- /dev/null
+++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx
@@ -0,0 +1,90 @@
+import { memo, useMemo } from "react";
+
+/**
+ * Split plain text into chunks at blank-line (paragraph) boundaries, keeping
+ * each separator run attached to the END of the preceding chunk, so the chunks
+ * always reassemble byte-for-byte into the input.
+ *
+ * A boundary is the end of a maximal `\n{2,}` run that is followed by at least
+ * one more character. A newline run that is a SUFFIX of the text is NOT a
+ * boundary yet: under append-only growth it may still gain more newlines, and
+ * cutting there would move the boundary on the next call.
+ *
+ * CRITICAL INVARIANT (load-bearing for StreamingPlainText's memoization): for
+ * APPEND-ONLY growth of `text`, every chunk except the LAST is byte-identical
+ * between successive calls — previously-emitted boundaries never move. Proof
+ * sketch: appending never modifies existing characters, so (a) an existing
+ * boundary's newline run and its following character are untouched and the
+ * boundary persists at the same offset; (b) no NEW boundary can appear strictly
+ * inside the old text, because a `\n{2,}` run followed by a character entirely
+ * within the old text would already have been a boundary. New boundaries can
+ * only materialize at or after the old text's end, i.e. inside the last chunk.
+ *
+ * CRLF is deliberately NOT a boundary: supporting `(?:\r?\n){2,}` would BREAK
+ * the invariant above — a lone trailing `\r` is not a boundary, but a later-
+ * appended `\n` would merge with it into a new separator unit and retroactively
+ * create a boundary INSIDE previously-emitted text, moving old chunk edges.
+ * With `\n`-only runs, appended characters can never extend a run that is
+ * already followed by a non-`\n` character, so old boundaries are immutable.
+ * CRLF blank lines therefore intentionally stay inside one chunk: correctness/
+ * losslessness are unaffected, only chunk granularity for CRLF input (LLM
+ * output is `\n` in practice).
+ */
+export function splitPlainChunks(text: string): string[] {
+ const chunks: string[] = [];
+ let start = 0;
+ for (const match of text.matchAll(/\n{2,}/g)) {
+ const end = match.index + match[0].length;
+ // Suffix run: not a stable boundary yet (see the invariant above).
+ if (end >= text.length) break;
+ chunks.push(text.slice(start, end));
+ start = end;
+ }
+ if (start < text.length) chunks.push(text.slice(start));
+ return chunks;
+}
+
+/**
+ * One immutable chunk. Memoized on its string prop: during streaming only the
+ * TAIL chunk's text changes (see the splitPlainChunks invariant), so React
+ * skips every stable chunk and the per-delta DOM work is a single text-node
+ * update. `pre-wrap` is set per chunk (like the old raw-text fallback did), NOT
+ * on the surrounding markdown-styled container — see the note in
+ * ai-chat.module.css. Font/size/color are inherited from that container.
+ *
+ * DISPLAY-ONLY newline strip: the raw chunk keeps its trailing `\n{2,}`
+ * separator run attached (the splitPlainChunks invariant, load-bearing for the
+ * memo), but rendering those newlines inside a pre-wrap block would add an
+ * empty line ON TOP of the block break — a doubled gap. So the RENDERED string
+ * drops trailing newlines and the paragraph gap comes from `marginBottom: 4`
+ * instead, matching the `.reasoningText p { margin: 0 0 4px }` rhythm of the
+ * finalized markdown. Multi-blank-line runs thus collapse to one uniform gap,
+ * consistent with `collapseBlankLines` on the markdown path. The last chunk
+ * usually has no trailing newlines (strip is a no-op); its margin is harmless.
+ */
+const PlainChunk = memo(function PlainChunk({ text }: { text: string }) {
+ return (
+
+ {text.replace(/\n+$/, "")}
+
+ );
+});
+
+/**
+ * Renders still-streaming plain text as a list of paragraph chunks where only
+ * the tail chunk changes per delta. No markdown, no sanitizer, no innerHTML —
+ * this is the cheap streaming-time stand-in for the one-time markdown parse
+ * that happens after the part is finalized (see reasoning-block.tsx).
+ */
+export function StreamingPlainText({ text }: { text: string }) {
+ const chunks = useMemo(() => splitPlainChunks(text), [text]);
+ return (
+ <>
+ {chunks.map((chunk, index) => (
+ // Index keys are stable here: chunks are append-only (the invariant),
+ // so an index never gets a different chunk's content mid-stream.
+
+ ))}
+ >
+ );
+}