test(ai-chat): add dev-only perf harness for the chat stream pipeline

Mounts the real ChatThread against a synthetic AI SDK v6 UI-message SSE stream (multi-step reasoning + getPage tool calls + markdown answer; 5k/20k/50k-token presets, 15/5 ms chunk cadence) with long-task, FPS and mount-time instrumentation. Two scenarios: mount a persisted transcript (open-chat cost) and stream a live turn through the real useChat pipeline via a window.fetch patch scoped to /api/ai-chat/stream. Served only by the vite dev server at /perf/ai-chat-perf.html; the production build keeps its single index.html entry, so none of this ships. Also ignore local trace dumps under .claude/perf-traces/. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 03:51:22 +03:00
parent 351860ba4b
commit d4d05c8e8b
5 changed files with 971 additions and 0 deletions
@@ -43,6 +43,8 @@ lerna-debug.log*
 .nx/cache
 .claude/worktrees/
 .claude/tmp/
+# Local Chrome performance traces recorded by the AI-chat perf harness
+.claude/perf-traces/

 # TypeScript incremental build artifacts
 *.tsbuildinfo
@@ -0,0 +1,50 @@
+/**
+ * DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at
+ * /perf/ai-chat-perf.html; never part of the production build, which uses the
+ * single default index.html entry).
+ *
+ * Mounts the minimal provider stack the real ChatThread needs (Mantine, router
+ * for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE
+ * React mounts so ChatThread's DefaultChatTransport requests to
+ * /api/ai-chat/stream are answered by the synthetic SSE generator.
+ */
+
+import "@mantine/core/styles.css";
+
+import ReactDOM from "react-dom/client";
+import { MantineProvider } from "@mantine/core";
+import { MemoryRouter } from "react-router-dom";
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
+import { mantineCssResolver, theme } from "../src/theme.ts";
+// i18n side-effect init (http-backend). Translations load from /locales in dev;
+// missing keys fall back to the key text, which is fine for the harness.
+import "../src/i18n.ts";
+import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts";
+import PerfHarness from "./harness.tsx";
+
+// MUST run before React mounts: ChatThread creates its transport with the
+// global fetch, so the patch has to be in place before the first send.
+installAiChatStreamFetchPatch();
+
+const queryClient = new QueryClient({
+  defaultOptions: {
+    queries: {
+      refetchOnMount: false,
+      refetchOnWindowFocus: false,
+      retry: false,
+      staleTime: 5 * 60 * 1000,
+    },
+  },
+});
+
+const container = document.getElementById("root") as HTMLElement;
+
+ReactDOM.createRoot(container).render(
+  <MemoryRouter>
+    <MantineProvider theme={theme} cssVariablesResolver={mantineCssResolver}>
+      <QueryClientProvider client={queryClient}>
+        <PerfHarness />
+      </QueryClientProvider>
+    </MantineProvider>
+  </MemoryRouter>,
+);
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>AI chat perf harness</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="./ai-chat-perf-main.tsx"></script>
+  </body>
+</html>
@@ -0,0 +1,390 @@
+/**
+ * DEV-ONLY perf harness UI for the AI chat feature.
+ *
+ * Left panel: controls + live stats. Right side: a bordered box (~real chat
+ * window size) hosting the REAL ChatThread component.
+ *
+ * Scenario A "Open existing chat": mount ChatThread seeded with a large
+ * persisted transcript and measure click -> post-mount-paint time.
+ * Scenario B "Live agent stream": mount an empty chat and auto-send a message;
+ * the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream
+ * through the real useChat pipeline.
+ */
+
+import { useEffect, useMemo, useRef, useState } from "react";
+import type { CSSProperties, MutableRefObject } from "react";
+import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx";
+import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
+import {
+  PRESETS,
+  buildPersistedRows,
+  buildTurnScript,
+  setLiveStreamSettings,
+  type PresetKey,
+} from "./synthetic-turn.ts";
+
+const AUTO_SEND_TEXT = "Run the synthetic perf turn";
+const AUTO_SEND_TIMEOUT_MS = 1000;
+/** Stats display refresh period — 2x/s so the display itself stays cheap. */
+const STATS_FLUSH_MS = 500;
+
+// ---------------------------------------------------------------------------
+// Shared mutable stats (written from callbacks, flushed to state at 2 Hz)
+// ---------------------------------------------------------------------------
+
+interface PerfStats {
+  longtaskCount: number;
+  longtaskTotalMs: number;
+  longtaskMaxMs: number;
+  fps: number;
+  sseChunks: number;
+  sseChars: number;
+  mountAMs: number | null;
+  streamState: "idle" | "streaming" | "done" | "aborted";
+}
+
+function emptyStats(): PerfStats {
+  return {
+    longtaskCount: 0,
+    longtaskTotalMs: 0,
+    longtaskMaxMs: 0,
+    fps: 0,
+    sseChunks: 0,
+    sseChars: 0,
+    mountAMs: null,
+    streamState: "idle",
+  };
+}
+
+/**
+ * Self-contained stats panel: owns the longtask observer, the FPS meter and the
+ * 2 Hz flush interval. Isolated in its OWN component so its periodic setState
+ * re-renders only this panel — NOT the ChatThread under measurement.
+ */
+function StatsPanel({ stats }: { stats: MutableRefObject<PerfStats> }) {
+  const [snapshot, setSnapshot] = useState<PerfStats>(() => ({ ...stats.current }));
+
+  // Long tasks (main-thread blocks > 50ms).
+  useEffect(() => {
+    let observer: PerformanceObserver | null = null;
+    try {
+      observer = new PerformanceObserver((list) => {
+        for (const entry of list.getEntries()) {
+          stats.current.longtaskCount += 1;
+          stats.current.longtaskTotalMs += entry.duration;
+          stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration);
+        }
+      });
+      observer.observe({ type: "longtask", buffered: true });
+    } catch {
+      // longtask entries unsupported in this browser — panel shows zeros.
+    }
+    return () => observer?.disconnect();
+  }, [stats]);
+
+  // FPS: frames rendered within the trailing 1s window.
+  useEffect(() => {
+    let raf = 0;
+    const frames: number[] = [];
+    const loop = (now: number) => {
+      frames.push(now);
+      while (frames.length > 0 && frames[0] <= now - 1000) frames.shift();
+      stats.current.fps = frames.length;
+      raf = requestAnimationFrame(loop);
+    };
+    raf = requestAnimationFrame(loop);
+    return () => cancelAnimationFrame(raf);
+  }, [stats]);
+
+  // Flush the mutable stats into the display at most 2x/s.
+  useEffect(() => {
+    const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS);
+    return () => window.clearInterval(id);
+  }, [stats]);
+
+  const resetLongtasks = () => {
+    stats.current.longtaskCount = 0;
+    stats.current.longtaskTotalMs = 0;
+    stats.current.longtaskMaxMs = 0;
+    setSnapshot({ ...stats.current });
+  };
+
+  const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 };
+  return (
+    <div style={{ fontFamily: "monospace", fontSize: 12, lineHeight: 1.7 }}>
+      <div style={{ fontWeight: 700, marginBottom: 4 }}>Stats</div>
+      <div style={row}><span>FPS (1s)</span><span>{snapshot.fps}</span></div>
+      <div style={row}><span>Long tasks</span><span>{snapshot.longtaskCount}</span></div>
+      <div style={row}><span>Long total</span><span>{snapshot.longtaskTotalMs.toFixed(0)} ms</span></div>
+      <div style={row}><span>Long max</span><span>{snapshot.longtaskMaxMs.toFixed(0)} ms</span></div>
+      <div style={row}><span>SSE chunks</span><span>{snapshot.sseChunks}</span></div>
+      <div style={row}><span>SSE chars</span><span>{snapshot.sseChars.toLocaleString()}</span></div>
+      <div style={row}><span>Stream</span><span>{snapshot.streamState}</span></div>
+      <div style={row}>
+        <span>Mount A</span>
+        <span>{snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`}</span>
+      </div>
+      <button type="button" onClick={resetLongtasks} style={{ marginTop: 6 }}>
+        Reset long tasks
+      </button>
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Auto-send (scenario B): drive the REAL composer in the mounted DOM
+// ---------------------------------------------------------------------------
+
+/**
+ * Fill the composer textarea via the native value setter + an `input` event
+ * (React 18 controlled-input pattern), then click the enabled "Send" button.
+ * Retried on rAF until the elements exist (ChatThread mounts asynchronously).
+ */
+function autoSend(host: HTMLElement, text: string): void {
+  const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS;
+
+  const tryClick = () => {
+    const button = host.querySelector<HTMLButtonElement>('button[aria-label="Send"]');
+    if (button && !button.disabled) {
+      button.click();
+      return;
+    }
+    if (performance.now() < deadline) requestAnimationFrame(tryClick);
+    else console.error("[perf] auto-send: Send button never became clickable");
+  };
+
+  const trySetValue = () => {
+    const textarea = host.querySelector("textarea");
+    if (!textarea) {
+      if (performance.now() < deadline) requestAnimationFrame(trySetValue);
+      else console.error("[perf] auto-send: textarea not found");
+      return;
+    }
+    const setter = Object.getOwnPropertyDescriptor(
+      window.HTMLTextAreaElement.prototype,
+      "value",
+    )?.set;
+    setter?.call(textarea, text);
+    textarea.dispatchEvent(new Event("input", { bubbles: true }));
+    // Click on a later frame so React commits the controlled value (which
+    // enables the Send button) before we press it.
+    requestAnimationFrame(tryClick);
+  };
+
+  requestAnimationFrame(trySetValue);
+}
+
+// ---------------------------------------------------------------------------
+// Harness
+// ---------------------------------------------------------------------------
+
+interface MountState {
+  mode: "A" | "B";
+  key: number;
+  chatId: string | null;
+  rows: IAiChatMessageRow[];
+}
+
+const noop = (): void => {};
+
+export default function PerfHarness() {
+  const [preset, setPreset] = useState<PresetKey>("20k");
+  const [intervalMs, setIntervalMs] = useState<number>(15);
+  const [mounted, setMounted] = useState<MountState | null>(null);
+  const [fixtureInfo, setFixtureInfo] = useState<string | null>(null);
+
+  const statsRef = useRef<PerfStats>(emptyStats());
+  const hostRef = useRef<HTMLDivElement>(null);
+  const keyCounterRef = useRef(0);
+  const mountStartRef = useRef(0);
+  const pendingMountMeasureRef = useRef(false);
+
+  // The scripted live turn for the current preset (reused across B runs; the
+  // script is immutable data, so rebuilding per run is unnecessary).
+  const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]);
+
+  const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []);
+
+  // Scenario A: mount ChatThread seeded with a large persisted transcript.
+  const handleMountA = () => {
+    const fixture = buildPersistedRows(PRESETS[preset]);
+    setFixtureInfo(
+      `Persisted fixture: ${fixture.rows.length} rows, ` +
+        `${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`,
+    );
+    statsRef.current.mountAMs = null;
+    // Mark AFTER fixture generation: we measure mount cost, not generation cost
+    // (production receives its rows from the network).
+    performance.mark("perf:mountA:start");
+    mountStartRef.current = performance.now();
+    pendingMountMeasureRef.current = true;
+    keyCounterRef.current += 1;
+    setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows });
+  };
+
+  // Measure scenario A: effect runs after the mount commit; double rAF lands
+  // after the first paint of the mounted transcript.
+  useEffect(() => {
+    if (!pendingMountMeasureRef.current) return;
+    pendingMountMeasureRef.current = false;
+    requestAnimationFrame(() => {
+      requestAnimationFrame(() => {
+        statsRef.current.mountAMs = performance.now() - mountStartRef.current;
+        performance.mark("perf:mountA:end");
+        try {
+          performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end");
+        } catch {
+          // Marks cleared mid-run — ignore.
+        }
+      });
+    });
+  }, [mounted]);
+
+  // Scenario B: mount an empty chat, arm the synthetic stream, auto-send.
+  const handleStartB = () => {
+    statsRef.current.sseChunks = 0;
+    statsRef.current.sseChars = 0;
+    statsRef.current.streamState = "streaming";
+    setLiveStreamSettings({
+      script: liveScript,
+      chunkIntervalMs: intervalMs,
+      onProgress: (chunks, chars) => {
+        statsRef.current.sseChunks = chunks;
+        statsRef.current.sseChars = chars;
+      },
+      onDone: () => {
+        statsRef.current.streamState = "done";
+        performance.mark("perf:streamB:end");
+        try {
+          performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end");
+        } catch {
+          // Start mark missing (e.g. marks cleared) — ignore.
+        }
+      },
+      onAbort: () => {
+        statsRef.current.streamState = "aborted";
+      },
+    });
+    performance.mark("perf:streamB:start");
+    keyCounterRef.current += 1;
+    setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] });
+    if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT);
+  };
+
+  const handleUnmount = () => setMounted(null);
+
+  const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" };
+  const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" };
+
+  return (
+    <div style={{ display: "flex", height: "100vh", fontFamily: "system-ui, sans-serif" }}>
+      {/* Left: controls + stats */}
+      <div
+        style={{
+          width: 260,
+          flex: "0 0 260px",
+          padding: 12,
+          borderRight: "1px solid #ccc",
+          overflowY: "auto",
+          boxSizing: "border-box",
+        }}
+      >
+        <div style={{ fontWeight: 700, marginBottom: 4 }}>AI chat perf harness</div>
+
+        <label style={label}>Preset</label>
+        <select
+          value={preset}
+          onChange={(e) => setPreset(e.target.value as PresetKey)}
+          style={{ width: "100%" }}
+        >
+          <option value="5k">5k tokens</option>
+          <option value="20k">20k tokens</option>
+          <option value="50k">50k tokens</option>
+        </select>
+
+        <label style={label}>Chunk interval (scenario B)</label>
+        <select
+          value={intervalMs}
+          onChange={(e) => setIntervalMs(Number(e.target.value))}
+          style={{ width: "100%" }}
+        >
+          <option value={15}>15 ms (normal)</option>
+          <option value={5}>5 ms (stress)</option>
+        </select>
+
+        <div style={{ marginTop: 12 }}>
+          <button type="button" style={button} onClick={handleMountA}>
+            Mount persisted chat (A)
+          </button>
+          <button type="button" style={button} onClick={handleStartB}>
+            Start live stream (B)
+          </button>
+          <button type="button" style={button} onClick={handleUnmount} disabled={!mounted}>
+            Unmount
+          </button>
+        </div>
+
+        <div style={{ fontSize: 11, color: "#555", margin: "8px 0" }}>
+          <div>
+            Live turn: {liveScript.totalChars.toLocaleString()} chars ≈{" "}
+            {liveScript.approxTokens.toLocaleString()} tokens
+          </div>
+          {fixtureInfo && <div>{fixtureInfo}</div>}
+          {mounted && (
+            <div>
+              Mounted: scenario {mounted.mode} (key {mounted.key})
+            </div>
+          )}
+        </div>
+
+        <hr style={{ border: "none", borderTop: "1px solid #ddd" }} />
+        <StatsPanel stats={statsRef} />
+      </div>
+
+      {/* Right: the real ChatThread inside a real-window-sized box */}
+      <div
+        style={{
+          flex: 1,
+          display: "flex",
+          alignItems: "center",
+          justifyContent: "center",
+          background: "#f4f4f5",
+        }}
+      >
+        <div
+          ref={hostRef}
+          style={{
+            width: 540,
+            height: 680,
+            border: "1px solid #bbb",
+            borderRadius: 8,
+            background: "#fff",
+            padding: 8,
+            boxSizing: "border-box",
+            overflow: "hidden",
+          }}
+        >
+          {mounted ? (
+            <ChatThread
+              key={mounted.key}
+              chatId={mounted.chatId}
+              threadKey={`perf-${mounted.key}`}
+              initialRows={mounted.rows}
+              openPage={openPage}
+              roleId={null}
+              roles={[]}
+              onRolePicked={noop}
+              assistantName="Perf agent"
+              onTurnFinished={noop}
+              onServerChatId={noop}
+            />
+          ) : (
+            <div style={{ color: "#888", fontSize: 13, padding: 16 }}>
+              ChatThread unmounted. Use the controls on the left.
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,517 @@
+/**
+ * DEV-ONLY synthetic agent-turn generator for the AI chat perf harness.
+ *
+ * Produces one scripted agent turn (reasoning + tool calls + markdown answer)
+ * from a size config, and materializes it two ways:
+ *  - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"),
+ *    served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`;
+ *  - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat").
+ *
+ * Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema`
+ * (strict objects — only the exact field names below are accepted).
+ */
+
+import type { UIMessage } from "@ai-sdk/react";
+import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
+
+// ---------------------------------------------------------------------------
+// Config / presets
+// ---------------------------------------------------------------------------
+
+/** 1 token ~= 4 chars — the approximation used throughout this module. */
+const CHARS_PER_TOKEN = 4;
+
+export interface TurnConfig {
+  /** Number of agent steps; each step = one reasoning block + one tool call. */
+  steps: number;
+  /** Approximate reasoning tokens generated per step. */
+  reasoningTokensPerStep: number;
+  /** Size of each tool call's output `content` filler, in bytes (ASCII). */
+  toolOutputBytes: number;
+  /** Approximate size of the final markdown answer, in tokens. */
+  answerTokens: number;
+}
+
+export type PresetKey = "5k" | "20k" | "50k";
+
+export const PRESETS: Record<PresetKey, TurnConfig> = {
+  "5k": {
+    steps: 3,
+    reasoningTokensPerStep: 500,
+    toolOutputBytes: 10_000,
+    answerTokens: 600,
+  },
+  "20k": {
+    steps: 6,
+    reasoningTokensPerStep: 2500,
+    toolOutputBytes: 20_000,
+    answerTokens: 1500,
+  },
+  "50k": {
+    steps: 10,
+    reasoningTokensPerStep: 4000,
+    toolOutputBytes: 40_000,
+    answerTokens: 3000,
+  },
+};
+
+// ---------------------------------------------------------------------------
+// Text generators
+// ---------------------------------------------------------------------------
+
+/** Mixed Russian/English prose sentences cycled to build reasoning text. */
+const REASONING_SENTENCES = [
+  "Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.",
+  "First I need to inspect the current page content to understand its overall structure.",
+  "Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.",
+  "The table in section three contains the migration matrix that I should cross-check against the summary.",
+  "Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.",
+  "Let me compare the numbers from the executive summary with the raw data in the appendix.",
+  "Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.",
+  "I should keep the page ids from the tool output so the final answer can cite the source pages.",
+  "Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.",
+  "The remaining sections look consistent, so I can move on to drafting the structured answer.",
+];
+
+/**
+ * Build realistic prose of ~`targetChars` characters, inserting a newline
+ * roughly every 200 characters (mirrors how reasoning text tends to wrap).
+ */
+function makeProse(targetChars: number): string {
+  const pieces: string[] = [];
+  let length = 0;
+  let sinceNewline = 0;
+  let i = 0;
+  while (length < targetChars) {
+    const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length];
+    i += 1;
+    pieces.push(sentence);
+    length += sentence.length + 1;
+    sinceNewline += sentence.length + 1;
+    if (sinceNewline >= 200) {
+      pieces.push("\n");
+      sinceNewline = 0;
+    } else {
+      pieces.push(" ");
+    }
+  }
+  return pieces.join("").trimEnd();
+}
+
+/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */
+function markdownSection(n: number): string {
+  return [
+    `## Section ${n}: migration analysis`,
+    ``,
+    `The workspace contains **${n * 12} pages** that still reference the legacy API. ` +
+      `Most of them live under [Perf test page](/p/page-1) and need the new transport. ` +
+      `Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`,
+    ``,
+    `- Update the fetch layer to the v6 transport`,
+    `- Перенести таблицы соответствия идентификаторов`,
+    `- Verify citation links after the move`,
+    `- Проверить отображение длинных ответов в узкой панели`,
+    ``,
+    `| Область | Страниц | Статус | Риск |`,
+    `| --- | --- | --- | --- |`,
+    `| API reference | ${n + 4} | migrated | low |`,
+    `| Onboarding | ${n + 2} | in progress | medium |`,
+    `| Release notes | ${n * 3} | pending | high |`,
+    ``,
+    "```ts",
+    `export function migrateSection${n}(rows: Row[]): Row[] {`,
+    `  return rows`,
+    `    .filter((row) => row.section === ${n})`,
+    `    .map((row) => ({ ...row, migrated: true }));`,
+    `}`,
+    "```",
+  ].join("\n");
+}
+
+/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */
+function makeMarkdownAnswer(targetChars: number): string {
+  const sections: string[] = [];
+  let length = 0;
+  let n = 1;
+  while (length < targetChars) {
+    const section = markdownSection(n);
+    sections.push(section);
+    length += section.length + 2;
+    n += 1;
+  }
+  return sections.join("\n\n");
+}
+
+/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */
+function makeFiller(bytes: number): string {
+  const unit = "Perf filler content for the synthetic getPage tool output. ";
+  return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes);
+}
+
+// ---------------------------------------------------------------------------
+// Turn script
+// ---------------------------------------------------------------------------
+
+export interface TurnToolCall {
+  toolCallId: string;
+  toolName: "getPage";
+  input: { pageId: string };
+  output: { id: string; title: string; content: string };
+}
+
+export interface TurnStep {
+  reasoningText: string;
+  tool: TurnToolCall;
+}
+
+export interface TurnScript {
+  steps: TurnStep[];
+  answerText: string;
+  /** Approximate reasoning tokens for the whole turn (chars / 4). */
+  reasoningTokens: number;
+  /** Approximate context size after this turn, in tokens. */
+  contextTokens: number;
+  maxContextTokens: number;
+  /** Actual generated visible chars: reasoning + tool outputs + answer. */
+  totalChars: number;
+  /** totalChars / 4, rounded. */
+  approxTokens: number;
+}
+
+/**
+ * Build the scripted agent turn for a config. `idPrefix` keeps tool call ids
+ * unique when several scripts coexist (e.g. 3 persisted turns in one chat).
+ */
+export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript {
+  const steps: TurnStep[] = [];
+  let reasoningChars = 0;
+  let toolChars = 0;
+  for (let i = 0; i < config.steps; i++) {
+    const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN);
+    const content = makeFiller(config.toolOutputBytes);
+    reasoningChars += reasoningText.length;
+    toolChars += content.length;
+    steps.push({
+      reasoningText,
+      tool: {
+        toolCallId: `${idPrefix}-call-${i + 1}`,
+        toolName: "getPage",
+        input: { pageId: "page-1" },
+        output: { id: "page-1", title: "Perf test page", content },
+      },
+    });
+  }
+  const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN);
+  const totalChars = reasoningChars + toolChars + answerText.length;
+  return {
+    steps,
+    answerText,
+    reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN),
+    contextTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+    maxContextTokens: 200_000,
+    totalChars,
+    approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Scenario A: persisted rows
+// ---------------------------------------------------------------------------
+
+/** Number of user+assistant pairs the preset is split across for history. */
+const HISTORY_TURNS = 3;
+
+const USER_PROMPTS = [
+  "Проанализируй документ и выдели ключевые тезисы по каждому разделу.",
+  "Now cross-check the migration matrix against the summary and list every mismatch.",
+  "Собери финальный план миграции с оценкой рисков по каждой области.",
+];
+
+/** Persisted UIMessage parts for one finished assistant turn. */
+function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] {
+  const parts: unknown[] = [];
+  for (const step of script.steps) {
+    parts.push({ type: "reasoning", text: step.reasoningText, state: "done" });
+    parts.push({
+      type: `tool-${step.tool.toolName}`,
+      toolCallId: step.tool.toolCallId,
+      state: "output-available",
+      input: step.tool.input,
+      output: step.tool.output,
+    });
+  }
+  parts.push({ type: "text", text: script.answerText, state: "done" });
+  return parts as UIMessage["parts"];
+}
+
+export interface PersistedFixture {
+  rows: IAiChatMessageRow[];
+  totalChars: number;
+  approxTokens: number;
+}
+
+/**
+ * Materialize the preset as a finished 3-turn transcript: user row + assistant
+ * row per turn, with the preset's steps/answer split across the assistant turns.
+ * Approximate accounting — the actual totals are reported back for display.
+ */
+export function buildPersistedRows(config: TurnConfig): PersistedFixture {
+  const rows: IAiChatMessageRow[] = [];
+  const baseTime = Date.now() - HISTORY_TURNS * 60_000;
+  let totalChars = 0;
+
+  for (let t = 0; t < HISTORY_TURNS; t++) {
+    // Distribute steps as evenly as possible (earlier turns get the remainder).
+    const stepsForTurn =
+      Math.floor(config.steps / HISTORY_TURNS) +
+      (t < config.steps % HISTORY_TURNS ? 1 : 0);
+    const turnConfig: TurnConfig = {
+      steps: Math.max(1, stepsForTurn),
+      reasoningTokensPerStep: config.reasoningTokensPerStep,
+      toolOutputBytes: config.toolOutputBytes,
+      answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)),
+    };
+    const script = buildTurnScript(turnConfig, `hist-${t + 1}`);
+    totalChars += script.totalChars;
+
+    const userText = USER_PROMPTS[t % USER_PROMPTS.length];
+    rows.push({
+      id: `perf-row-u${t + 1}`,
+      role: "user",
+      content: userText,
+      metadata: null,
+      createdAt: new Date(baseTime + t * 60_000).toISOString(),
+    });
+    rows.push({
+      id: `perf-row-a${t + 1}`,
+      role: "assistant",
+      content: script.answerText,
+      metadata: {
+        parts: scriptToPersistedParts(script),
+        usage: { reasoningTokens: script.reasoningTokens },
+        contextTokens: script.contextTokens,
+        maxContextTokens: script.maxContextTokens,
+        finishReason: "stop",
+      },
+      createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(),
+    });
+  }
+
+  return {
+    rows,
+    totalChars,
+    approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Scenario B: SSE stream
+// ---------------------------------------------------------------------------
+
+/** Streaming delta size in chars (reasoning/answer text is split into these). */
+const DELTA_CHARS = 200;
+
+function splitDeltas(text: string, size = DELTA_CHARS): string[] {
+  const deltas: string[] = [];
+  for (let i = 0; i < text.length; i += size) {
+    deltas.push(text.slice(i, i + size));
+  }
+  return deltas;
+}
+
+/** One pre-serialized SSE frame plus its visible-char contribution for stats. */
+interface SseFrame {
+  data: string;
+  chars: number;
+}
+
+function frame(chunk: Record<string, unknown>, chars = 0): SseFrame {
+  return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars };
+}
+
+/**
+ * Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames
+ * (excluding the final `data: [DONE]` terminator, appended by the pump).
+ */
+function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] {
+  const frames: SseFrame[] = [];
+  frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } }));
+
+  script.steps.forEach((step, i) => {
+    frames.push(frame({ type: "start-step" }));
+    const reasoningId = `${messageId}-r${i + 1}`;
+    frames.push(frame({ type: "reasoning-start", id: reasoningId }));
+    for (const delta of splitDeltas(step.reasoningText)) {
+      frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length));
+    }
+    frames.push(frame({ type: "reasoning-end", id: reasoningId }));
+
+    const { toolCallId, toolName, input, output } = step.tool;
+    frames.push(frame({ type: "tool-input-start", toolCallId, toolName }));
+    frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input }));
+    // The tool result arrives as ONE chunk, like the real server sends it.
+    frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length));
+    frames.push(frame({ type: "finish-step" }));
+  });
+
+  // Final step: the markdown answer.
+  frames.push(frame({ type: "start-step" }));
+  const textId = `${messageId}-answer`;
+  frames.push(frame({ type: "text-start", id: textId }));
+  for (const delta of splitDeltas(script.answerText)) {
+    frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length));
+  }
+  frames.push(frame({ type: "text-end", id: textId }));
+  frames.push(frame({ type: "finish-step" }));
+
+  frames.push(
+    frame({
+      type: "finish",
+      messageMetadata: {
+        usage: { reasoningTokens: script.reasoningTokens },
+        contextTokens: script.contextTokens,
+        maxContextTokens: script.maxContextTokens,
+        finishReason: "stop",
+      },
+    }),
+  );
+  return frames;
+}
+
+export interface LiveStreamSettings {
+  script: TurnScript;
+  /** Delay between SSE chunks (one chunk per tick). */
+  chunkIntervalMs: number;
+  /** Progress callback: cumulative emitted chunk count and visible chars. */
+  onProgress?: (chunks: number, chars: number) => void;
+  /** Fired once after the `[DONE]` terminator is enqueued. */
+  onDone?: () => void;
+  /** Fired if the client aborted the stream (Stop button). */
+  onAbort?: () => void;
+}
+
+/**
+ * Build a synthetic SSE Response streaming the scripted turn, one chunk every
+ * `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works.
+ */
+export function buildSseResponse(
+  settings: LiveStreamSettings,
+  signal?: AbortSignal | null,
+): Response {
+  const messageId = `m-live-${Date.now()}`;
+  const frames = buildSseFrames(settings.script, messageId, "perf-chat");
+  const encoder = new TextEncoder();
+  let index = 0;
+  let emittedChars = 0;
+  let timer: number | undefined;
+
+  const stream = new ReadableStream<Uint8Array>({
+    start(controller) {
+      const stopPump = () => {
+        if (timer !== undefined) {
+          clearTimeout(timer);
+          timer = undefined;
+        }
+      };
+      const pump = () => {
+        timer = undefined;
+        if (signal?.aborted) {
+          stopPump();
+          try {
+            controller.close();
+          } catch {
+            // Already closed/cancelled — nothing to do.
+          }
+          return;
+        }
+        if (index >= frames.length) {
+          try {
+            controller.enqueue(encoder.encode("data: [DONE]\n\n"));
+            controller.close();
+          } catch {
+            // Cancelled mid-flight.
+          }
+          settings.onDone?.();
+          return;
+        }
+        const next = frames[index];
+        index += 1;
+        try {
+          controller.enqueue(encoder.encode(next.data));
+        } catch {
+          stopPump();
+          return;
+        }
+        emittedChars += next.chars;
+        settings.onProgress?.(index, emittedChars);
+        timer = window.setTimeout(pump, settings.chunkIntervalMs);
+      };
+      signal?.addEventListener(
+        "abort",
+        () => {
+          stopPump();
+          try {
+            controller.close();
+          } catch {
+            // Reader already cancelled.
+          }
+          settings.onAbort?.();
+        },
+        { once: true },
+      );
+      timer = window.setTimeout(pump, settings.chunkIntervalMs);
+    },
+    cancel() {
+      if (timer !== undefined) {
+        clearTimeout(timer);
+        timer = undefined;
+      }
+    },
+  });
+
+  return new Response(stream, {
+    status: 200,
+    headers: {
+      "content-type": "text/event-stream",
+      "cache-control": "no-cache",
+      "x-vercel-ai-ui-message-stream": "v1",
+    },
+  });
+}
+
+// ---------------------------------------------------------------------------
+// window.fetch patch
+// ---------------------------------------------------------------------------
+
+let currentLiveSettings: LiveStreamSettings | null = null;
+
+/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */
+export function setLiveStreamSettings(settings: LiveStreamSettings): void {
+  currentLiveSettings = settings;
+}
+
+/**
+ * Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream`
+ * get the synthetic SSE Response; everything else passes through untouched.
+ */
+export function installAiChatStreamFetchPatch(): void {
+  const originalFetch = window.fetch.bind(window);
+  window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
+    const url =
+      typeof input === "string"
+        ? input
+        : input instanceof URL
+          ? input.href
+          : input.url;
+    if (url.includes("/api/ai-chat/stream")) {
+      const settings = currentLiveSettings;
+      if (!settings) {
+        return Promise.resolve(
+          new Response("perf harness: no live stream configured", { status: 500 }),
+        );
+      }
+      return Promise.resolve(buildSseResponse(settings, init?.signal ?? null));
+    }
+    return originalFetch(input, init);
+  };
+}