diff --git a/.gitignore b/.gitignore index cf440100..4eb9e6fd 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,8 @@ lerna-debug.log* .nx/cache .claude/worktrees/ .claude/tmp/ +# Local Chrome performance traces recorded by the AI-chat perf harness +.claude/perf-traces/ # TypeScript incremental build artifacts *.tsbuildinfo diff --git a/apps/client/perf/ai-chat-perf-main.tsx b/apps/client/perf/ai-chat-perf-main.tsx new file mode 100644 index 00000000..0c75f68c --- /dev/null +++ b/apps/client/perf/ai-chat-perf-main.tsx @@ -0,0 +1,50 @@ +/** + * DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at + * /perf/ai-chat-perf.html; never part of the production build, which uses the + * single default index.html entry). + * + * Mounts the minimal provider stack the real ChatThread needs (Mantine, router + * for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE + * React mounts so ChatThread's DefaultChatTransport requests to + * /api/ai-chat/stream are answered by the synthetic SSE generator. + */ + +import "@mantine/core/styles.css"; + +import ReactDOM from "react-dom/client"; +import { MantineProvider } from "@mantine/core"; +import { MemoryRouter } from "react-router-dom"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { mantineCssResolver, theme } from "../src/theme.ts"; +// i18n side-effect init (http-backend). Translations load from /locales in dev; +// missing keys fall back to the key text, which is fine for the harness. +import "../src/i18n.ts"; +import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts"; +import PerfHarness from "./harness.tsx"; + +// MUST run before React mounts: ChatThread creates its transport with the +// global fetch, so the patch has to be in place before the first send. +installAiChatStreamFetchPatch(); + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + refetchOnMount: false, + refetchOnWindowFocus: false, + retry: false, + staleTime: 5 * 60 * 1000, + }, + }, +}); + +const container = document.getElementById("root") as HTMLElement; + +ReactDOM.createRoot(container).render( + + + + + + + , +); diff --git a/apps/client/perf/ai-chat-perf.html b/apps/client/perf/ai-chat-perf.html new file mode 100644 index 00000000..5509160b --- /dev/null +++ b/apps/client/perf/ai-chat-perf.html @@ -0,0 +1,12 @@ + + + + + + AI chat perf harness + + +
+ + + diff --git a/apps/client/perf/harness.tsx b/apps/client/perf/harness.tsx new file mode 100644 index 00000000..32af237d --- /dev/null +++ b/apps/client/perf/harness.tsx @@ -0,0 +1,390 @@ +/** + * DEV-ONLY perf harness UI for the AI chat feature. + * + * Left panel: controls + live stats. Right side: a bordered box (~real chat + * window size) hosting the REAL ChatThread component. + * + * Scenario A "Open existing chat": mount ChatThread seeded with a large + * persisted transcript and measure click -> post-mount-paint time. + * Scenario B "Live agent stream": mount an empty chat and auto-send a message; + * the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream + * through the real useChat pipeline. + */ + +import { useEffect, useMemo, useRef, useState } from "react"; +import type { CSSProperties, MutableRefObject } from "react"; +import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx"; +import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts"; +import { + PRESETS, + buildPersistedRows, + buildTurnScript, + setLiveStreamSettings, + type PresetKey, +} from "./synthetic-turn.ts"; + +const AUTO_SEND_TEXT = "Run the synthetic perf turn"; +const AUTO_SEND_TIMEOUT_MS = 1000; +/** Stats display refresh period — 2x/s so the display itself stays cheap. */ +const STATS_FLUSH_MS = 500; + +// --------------------------------------------------------------------------- +// Shared mutable stats (written from callbacks, flushed to state at 2 Hz) +// --------------------------------------------------------------------------- + +interface PerfStats { + longtaskCount: number; + longtaskTotalMs: number; + longtaskMaxMs: number; + fps: number; + sseChunks: number; + sseChars: number; + mountAMs: number | null; + streamState: "idle" | "streaming" | "done" | "aborted"; +} + +function emptyStats(): PerfStats { + return { + longtaskCount: 0, + longtaskTotalMs: 0, + longtaskMaxMs: 0, + fps: 0, + sseChunks: 0, + sseChars: 0, + mountAMs: null, + streamState: "idle", + }; +} + +/** + * Self-contained stats panel: owns the longtask observer, the FPS meter and the + * 2 Hz flush interval. Isolated in its OWN component so its periodic setState + * re-renders only this panel — NOT the ChatThread under measurement. + */ +function StatsPanel({ stats }: { stats: MutableRefObject }) { + const [snapshot, setSnapshot] = useState(() => ({ ...stats.current })); + + // Long tasks (main-thread blocks > 50ms). + useEffect(() => { + let observer: PerformanceObserver | null = null; + try { + observer = new PerformanceObserver((list) => { + for (const entry of list.getEntries()) { + stats.current.longtaskCount += 1; + stats.current.longtaskTotalMs += entry.duration; + stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration); + } + }); + observer.observe({ type: "longtask", buffered: true }); + } catch { + // longtask entries unsupported in this browser — panel shows zeros. + } + return () => observer?.disconnect(); + }, [stats]); + + // FPS: frames rendered within the trailing 1s window. + useEffect(() => { + let raf = 0; + const frames: number[] = []; + const loop = (now: number) => { + frames.push(now); + while (frames.length > 0 && frames[0] <= now - 1000) frames.shift(); + stats.current.fps = frames.length; + raf = requestAnimationFrame(loop); + }; + raf = requestAnimationFrame(loop); + return () => cancelAnimationFrame(raf); + }, [stats]); + + // Flush the mutable stats into the display at most 2x/s. + useEffect(() => { + const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS); + return () => window.clearInterval(id); + }, [stats]); + + const resetLongtasks = () => { + stats.current.longtaskCount = 0; + stats.current.longtaskTotalMs = 0; + stats.current.longtaskMaxMs = 0; + setSnapshot({ ...stats.current }); + }; + + const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 }; + return ( +
+
Stats
+
FPS (1s){snapshot.fps}
+
Long tasks{snapshot.longtaskCount}
+
Long total{snapshot.longtaskTotalMs.toFixed(0)} ms
+
Long max{snapshot.longtaskMaxMs.toFixed(0)} ms
+
SSE chunks{snapshot.sseChunks}
+
SSE chars{snapshot.sseChars.toLocaleString()}
+
Stream{snapshot.streamState}
+
+ Mount A + {snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`} +
+ +
+ ); +} + +// --------------------------------------------------------------------------- +// Auto-send (scenario B): drive the REAL composer in the mounted DOM +// --------------------------------------------------------------------------- + +/** + * Fill the composer textarea via the native value setter + an `input` event + * (React 18 controlled-input pattern), then click the enabled "Send" button. + * Retried on rAF until the elements exist (ChatThread mounts asynchronously). + */ +function autoSend(host: HTMLElement, text: string): void { + const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS; + + const tryClick = () => { + const button = host.querySelector('button[aria-label="Send"]'); + if (button && !button.disabled) { + button.click(); + return; + } + if (performance.now() < deadline) requestAnimationFrame(tryClick); + else console.error("[perf] auto-send: Send button never became clickable"); + }; + + const trySetValue = () => { + const textarea = host.querySelector("textarea"); + if (!textarea) { + if (performance.now() < deadline) requestAnimationFrame(trySetValue); + else console.error("[perf] auto-send: textarea not found"); + return; + } + const setter = Object.getOwnPropertyDescriptor( + window.HTMLTextAreaElement.prototype, + "value", + )?.set; + setter?.call(textarea, text); + textarea.dispatchEvent(new Event("input", { bubbles: true })); + // Click on a later frame so React commits the controlled value (which + // enables the Send button) before we press it. + requestAnimationFrame(tryClick); + }; + + requestAnimationFrame(trySetValue); +} + +// --------------------------------------------------------------------------- +// Harness +// --------------------------------------------------------------------------- + +interface MountState { + mode: "A" | "B"; + key: number; + chatId: string | null; + rows: IAiChatMessageRow[]; +} + +const noop = (): void => {}; + +export default function PerfHarness() { + const [preset, setPreset] = useState("20k"); + const [intervalMs, setIntervalMs] = useState(15); + const [mounted, setMounted] = useState(null); + const [fixtureInfo, setFixtureInfo] = useState(null); + + const statsRef = useRef(emptyStats()); + const hostRef = useRef(null); + const keyCounterRef = useRef(0); + const mountStartRef = useRef(0); + const pendingMountMeasureRef = useRef(false); + + // The scripted live turn for the current preset (reused across B runs; the + // script is immutable data, so rebuilding per run is unnecessary). + const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]); + + const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []); + + // Scenario A: mount ChatThread seeded with a large persisted transcript. + const handleMountA = () => { + const fixture = buildPersistedRows(PRESETS[preset]); + setFixtureInfo( + `Persisted fixture: ${fixture.rows.length} rows, ` + + `${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`, + ); + statsRef.current.mountAMs = null; + // Mark AFTER fixture generation: we measure mount cost, not generation cost + // (production receives its rows from the network). + performance.mark("perf:mountA:start"); + mountStartRef.current = performance.now(); + pendingMountMeasureRef.current = true; + keyCounterRef.current += 1; + setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows }); + }; + + // Measure scenario A: effect runs after the mount commit; double rAF lands + // after the first paint of the mounted transcript. + useEffect(() => { + if (!pendingMountMeasureRef.current) return; + pendingMountMeasureRef.current = false; + requestAnimationFrame(() => { + requestAnimationFrame(() => { + statsRef.current.mountAMs = performance.now() - mountStartRef.current; + performance.mark("perf:mountA:end"); + try { + performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end"); + } catch { + // Marks cleared mid-run — ignore. + } + }); + }); + }, [mounted]); + + // Scenario B: mount an empty chat, arm the synthetic stream, auto-send. + const handleStartB = () => { + statsRef.current.sseChunks = 0; + statsRef.current.sseChars = 0; + statsRef.current.streamState = "streaming"; + setLiveStreamSettings({ + script: liveScript, + chunkIntervalMs: intervalMs, + onProgress: (chunks, chars) => { + statsRef.current.sseChunks = chunks; + statsRef.current.sseChars = chars; + }, + onDone: () => { + statsRef.current.streamState = "done"; + performance.mark("perf:streamB:end"); + try { + performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end"); + } catch { + // Start mark missing (e.g. marks cleared) — ignore. + } + }, + onAbort: () => { + statsRef.current.streamState = "aborted"; + }, + }); + performance.mark("perf:streamB:start"); + keyCounterRef.current += 1; + setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] }); + if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT); + }; + + const handleUnmount = () => setMounted(null); + + const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" }; + const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" }; + + return ( +
+ {/* Left: controls + stats */} +
+
AI chat perf harness
+ + + + + + + +
+ + + +
+ +
+
+ Live turn: {liveScript.totalChars.toLocaleString()} chars ≈{" "} + {liveScript.approxTokens.toLocaleString()} tokens +
+ {fixtureInfo &&
{fixtureInfo}
} + {mounted && ( +
+ Mounted: scenario {mounted.mode} (key {mounted.key}) +
+ )} +
+ +
+ +
+ + {/* Right: the real ChatThread inside a real-window-sized box */} +
+
+ {mounted ? ( + + ) : ( +
+ ChatThread unmounted. Use the controls on the left. +
+ )} +
+
+
+ ); +} diff --git a/apps/client/perf/synthetic-turn.ts b/apps/client/perf/synthetic-turn.ts new file mode 100644 index 00000000..439a5ab0 --- /dev/null +++ b/apps/client/perf/synthetic-turn.ts @@ -0,0 +1,517 @@ +/** + * DEV-ONLY synthetic agent-turn generator for the AI chat perf harness. + * + * Produces one scripted agent turn (reasoning + tool calls + markdown answer) + * from a size config, and materializes it two ways: + * - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"), + * served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`; + * - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat"). + * + * Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema` + * (strict objects — only the exact field names below are accepted). + */ + +import type { UIMessage } from "@ai-sdk/react"; +import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts"; + +// --------------------------------------------------------------------------- +// Config / presets +// --------------------------------------------------------------------------- + +/** 1 token ~= 4 chars — the approximation used throughout this module. */ +const CHARS_PER_TOKEN = 4; + +export interface TurnConfig { + /** Number of agent steps; each step = one reasoning block + one tool call. */ + steps: number; + /** Approximate reasoning tokens generated per step. */ + reasoningTokensPerStep: number; + /** Size of each tool call's output `content` filler, in bytes (ASCII). */ + toolOutputBytes: number; + /** Approximate size of the final markdown answer, in tokens. */ + answerTokens: number; +} + +export type PresetKey = "5k" | "20k" | "50k"; + +export const PRESETS: Record = { + "5k": { + steps: 3, + reasoningTokensPerStep: 500, + toolOutputBytes: 10_000, + answerTokens: 600, + }, + "20k": { + steps: 6, + reasoningTokensPerStep: 2500, + toolOutputBytes: 20_000, + answerTokens: 1500, + }, + "50k": { + steps: 10, + reasoningTokensPerStep: 4000, + toolOutputBytes: 40_000, + answerTokens: 3000, + }, +}; + +// --------------------------------------------------------------------------- +// Text generators +// --------------------------------------------------------------------------- + +/** Mixed Russian/English prose sentences cycled to build reasoning text. */ +const REASONING_SENTENCES = [ + "Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.", + "First I need to inspect the current page content to understand its overall structure.", + "Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.", + "The table in section three contains the migration matrix that I should cross-check against the summary.", + "Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.", + "Let me compare the numbers from the executive summary with the raw data in the appendix.", + "Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.", + "I should keep the page ids from the tool output so the final answer can cite the source pages.", + "Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.", + "The remaining sections look consistent, so I can move on to drafting the structured answer.", +]; + +/** + * Build realistic prose of ~`targetChars` characters, inserting a newline + * roughly every 200 characters (mirrors how reasoning text tends to wrap). + */ +function makeProse(targetChars: number): string { + const pieces: string[] = []; + let length = 0; + let sinceNewline = 0; + let i = 0; + while (length < targetChars) { + const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length]; + i += 1; + pieces.push(sentence); + length += sentence.length + 1; + sinceNewline += sentence.length + 1; + if (sinceNewline >= 200) { + pieces.push("\n"); + sinceNewline = 0; + } else { + pieces.push(" "); + } + } + return pieces.join("").trimEnd(); +} + +/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */ +function markdownSection(n: number): string { + return [ + `## Section ${n}: migration analysis`, + ``, + `The workspace contains **${n * 12} pages** that still reference the legacy API. ` + + `Most of them live under [Perf test page](/p/page-1) and need the new transport. ` + + `Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`, + ``, + `- Update the fetch layer to the v6 transport`, + `- Перенести таблицы соответствия идентификаторов`, + `- Verify citation links after the move`, + `- Проверить отображение длинных ответов в узкой панели`, + ``, + `| Область | Страниц | Статус | Риск |`, + `| --- | --- | --- | --- |`, + `| API reference | ${n + 4} | migrated | low |`, + `| Onboarding | ${n + 2} | in progress | medium |`, + `| Release notes | ${n * 3} | pending | high |`, + ``, + "```ts", + `export function migrateSection${n}(rows: Row[]): Row[] {`, + ` return rows`, + ` .filter((row) => row.section === ${n})`, + ` .map((row) => ({ ...row, migrated: true }));`, + `}`, + "```", + ].join("\n"); +} + +/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */ +function makeMarkdownAnswer(targetChars: number): string { + const sections: string[] = []; + let length = 0; + let n = 1; + while (length < targetChars) { + const section = markdownSection(n); + sections.push(section); + length += section.length + 2; + n += 1; + } + return sections.join("\n\n"); +} + +/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */ +function makeFiller(bytes: number): string { + const unit = "Perf filler content for the synthetic getPage tool output. "; + return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes); +} + +// --------------------------------------------------------------------------- +// Turn script +// --------------------------------------------------------------------------- + +export interface TurnToolCall { + toolCallId: string; + toolName: "getPage"; + input: { pageId: string }; + output: { id: string; title: string; content: string }; +} + +export interface TurnStep { + reasoningText: string; + tool: TurnToolCall; +} + +export interface TurnScript { + steps: TurnStep[]; + answerText: string; + /** Approximate reasoning tokens for the whole turn (chars / 4). */ + reasoningTokens: number; + /** Approximate context size after this turn, in tokens. */ + contextTokens: number; + maxContextTokens: number; + /** Actual generated visible chars: reasoning + tool outputs + answer. */ + totalChars: number; + /** totalChars / 4, rounded. */ + approxTokens: number; +} + +/** + * Build the scripted agent turn for a config. `idPrefix` keeps tool call ids + * unique when several scripts coexist (e.g. 3 persisted turns in one chat). + */ +export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript { + const steps: TurnStep[] = []; + let reasoningChars = 0; + let toolChars = 0; + for (let i = 0; i < config.steps; i++) { + const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN); + const content = makeFiller(config.toolOutputBytes); + reasoningChars += reasoningText.length; + toolChars += content.length; + steps.push({ + reasoningText, + tool: { + toolCallId: `${idPrefix}-call-${i + 1}`, + toolName: "getPage", + input: { pageId: "page-1" }, + output: { id: "page-1", title: "Perf test page", content }, + }, + }); + } + const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN); + const totalChars = reasoningChars + toolChars + answerText.length; + return { + steps, + answerText, + reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN), + contextTokens: Math.round(totalChars / CHARS_PER_TOKEN), + maxContextTokens: 200_000, + totalChars, + approxTokens: Math.round(totalChars / CHARS_PER_TOKEN), + }; +} + +// --------------------------------------------------------------------------- +// Scenario A: persisted rows +// --------------------------------------------------------------------------- + +/** Number of user+assistant pairs the preset is split across for history. */ +const HISTORY_TURNS = 3; + +const USER_PROMPTS = [ + "Проанализируй документ и выдели ключевые тезисы по каждому разделу.", + "Now cross-check the migration matrix against the summary and list every mismatch.", + "Собери финальный план миграции с оценкой рисков по каждой области.", +]; + +/** Persisted UIMessage parts for one finished assistant turn. */ +function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] { + const parts: unknown[] = []; + for (const step of script.steps) { + parts.push({ type: "reasoning", text: step.reasoningText, state: "done" }); + parts.push({ + type: `tool-${step.tool.toolName}`, + toolCallId: step.tool.toolCallId, + state: "output-available", + input: step.tool.input, + output: step.tool.output, + }); + } + parts.push({ type: "text", text: script.answerText, state: "done" }); + return parts as UIMessage["parts"]; +} + +export interface PersistedFixture { + rows: IAiChatMessageRow[]; + totalChars: number; + approxTokens: number; +} + +/** + * Materialize the preset as a finished 3-turn transcript: user row + assistant + * row per turn, with the preset's steps/answer split across the assistant turns. + * Approximate accounting — the actual totals are reported back for display. + */ +export function buildPersistedRows(config: TurnConfig): PersistedFixture { + const rows: IAiChatMessageRow[] = []; + const baseTime = Date.now() - HISTORY_TURNS * 60_000; + let totalChars = 0; + + for (let t = 0; t < HISTORY_TURNS; t++) { + // Distribute steps as evenly as possible (earlier turns get the remainder). + const stepsForTurn = + Math.floor(config.steps / HISTORY_TURNS) + + (t < config.steps % HISTORY_TURNS ? 1 : 0); + const turnConfig: TurnConfig = { + steps: Math.max(1, stepsForTurn), + reasoningTokensPerStep: config.reasoningTokensPerStep, + toolOutputBytes: config.toolOutputBytes, + answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)), + }; + const script = buildTurnScript(turnConfig, `hist-${t + 1}`); + totalChars += script.totalChars; + + const userText = USER_PROMPTS[t % USER_PROMPTS.length]; + rows.push({ + id: `perf-row-u${t + 1}`, + role: "user", + content: userText, + metadata: null, + createdAt: new Date(baseTime + t * 60_000).toISOString(), + }); + rows.push({ + id: `perf-row-a${t + 1}`, + role: "assistant", + content: script.answerText, + metadata: { + parts: scriptToPersistedParts(script), + usage: { reasoningTokens: script.reasoningTokens }, + contextTokens: script.contextTokens, + maxContextTokens: script.maxContextTokens, + finishReason: "stop", + }, + createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(), + }); + } + + return { + rows, + totalChars, + approxTokens: Math.round(totalChars / CHARS_PER_TOKEN), + }; +} + +// --------------------------------------------------------------------------- +// Scenario B: SSE stream +// --------------------------------------------------------------------------- + +/** Streaming delta size in chars (reasoning/answer text is split into these). */ +const DELTA_CHARS = 200; + +function splitDeltas(text: string, size = DELTA_CHARS): string[] { + const deltas: string[] = []; + for (let i = 0; i < text.length; i += size) { + deltas.push(text.slice(i, i + size)); + } + return deltas; +} + +/** One pre-serialized SSE frame plus its visible-char contribution for stats. */ +interface SseFrame { + data: string; + chars: number; +} + +function frame(chunk: Record, chars = 0): SseFrame { + return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars }; +} + +/** + * Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames + * (excluding the final `data: [DONE]` terminator, appended by the pump). + */ +function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] { + const frames: SseFrame[] = []; + frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } })); + + script.steps.forEach((step, i) => { + frames.push(frame({ type: "start-step" })); + const reasoningId = `${messageId}-r${i + 1}`; + frames.push(frame({ type: "reasoning-start", id: reasoningId })); + for (const delta of splitDeltas(step.reasoningText)) { + frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length)); + } + frames.push(frame({ type: "reasoning-end", id: reasoningId })); + + const { toolCallId, toolName, input, output } = step.tool; + frames.push(frame({ type: "tool-input-start", toolCallId, toolName })); + frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input })); + // The tool result arrives as ONE chunk, like the real server sends it. + frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length)); + frames.push(frame({ type: "finish-step" })); + }); + + // Final step: the markdown answer. + frames.push(frame({ type: "start-step" })); + const textId = `${messageId}-answer`; + frames.push(frame({ type: "text-start", id: textId })); + for (const delta of splitDeltas(script.answerText)) { + frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length)); + } + frames.push(frame({ type: "text-end", id: textId })); + frames.push(frame({ type: "finish-step" })); + + frames.push( + frame({ + type: "finish", + messageMetadata: { + usage: { reasoningTokens: script.reasoningTokens }, + contextTokens: script.contextTokens, + maxContextTokens: script.maxContextTokens, + finishReason: "stop", + }, + }), + ); + return frames; +} + +export interface LiveStreamSettings { + script: TurnScript; + /** Delay between SSE chunks (one chunk per tick). */ + chunkIntervalMs: number; + /** Progress callback: cumulative emitted chunk count and visible chars. */ + onProgress?: (chunks: number, chars: number) => void; + /** Fired once after the `[DONE]` terminator is enqueued. */ + onDone?: () => void; + /** Fired if the client aborted the stream (Stop button). */ + onAbort?: () => void; +} + +/** + * Build a synthetic SSE Response streaming the scripted turn, one chunk every + * `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works. + */ +export function buildSseResponse( + settings: LiveStreamSettings, + signal?: AbortSignal | null, +): Response { + const messageId = `m-live-${Date.now()}`; + const frames = buildSseFrames(settings.script, messageId, "perf-chat"); + const encoder = new TextEncoder(); + let index = 0; + let emittedChars = 0; + let timer: number | undefined; + + const stream = new ReadableStream({ + start(controller) { + const stopPump = () => { + if (timer !== undefined) { + clearTimeout(timer); + timer = undefined; + } + }; + const pump = () => { + timer = undefined; + if (signal?.aborted) { + stopPump(); + try { + controller.close(); + } catch { + // Already closed/cancelled — nothing to do. + } + return; + } + if (index >= frames.length) { + try { + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + } catch { + // Cancelled mid-flight. + } + settings.onDone?.(); + return; + } + const next = frames[index]; + index += 1; + try { + controller.enqueue(encoder.encode(next.data)); + } catch { + stopPump(); + return; + } + emittedChars += next.chars; + settings.onProgress?.(index, emittedChars); + timer = window.setTimeout(pump, settings.chunkIntervalMs); + }; + signal?.addEventListener( + "abort", + () => { + stopPump(); + try { + controller.close(); + } catch { + // Reader already cancelled. + } + settings.onAbort?.(); + }, + { once: true }, + ); + timer = window.setTimeout(pump, settings.chunkIntervalMs); + }, + cancel() { + if (timer !== undefined) { + clearTimeout(timer); + timer = undefined; + } + }, + }); + + return new Response(stream, { + status: 200, + headers: { + "content-type": "text/event-stream", + "cache-control": "no-cache", + "x-vercel-ai-ui-message-stream": "v1", + }, + }); +} + +// --------------------------------------------------------------------------- +// window.fetch patch +// --------------------------------------------------------------------------- + +let currentLiveSettings: LiveStreamSettings | null = null; + +/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */ +export function setLiveStreamSettings(settings: LiveStreamSettings): void { + currentLiveSettings = settings; +} + +/** + * Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream` + * get the synthetic SSE Response; everything else passes through untouched. + */ +export function installAiChatStreamFetchPatch(): void { + const originalFetch = window.fetch.bind(window); + window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.href + : input.url; + if (url.includes("/api/ai-chat/stream")) { + const settings = currentLiveSettings; + if (!settings) { + return Promise.resolve( + new Response("perf harness: no live stream configured", { status: 500 }), + ); + } + return Promise.resolve(buildSseResponse(settings, init?.signal ?? null)); + } + return originalFetch(input, init); + }; +}