test(ai-chat): add dev-only perf harness for the chat stream pipeline
Mounts the real ChatThread against a synthetic AI SDK v6 UI-message SSE stream (multi-step reasoning + getPage tool calls + markdown answer; 5k/20k/50k-token presets, 15/5 ms chunk cadence) with long-task, FPS and mount-time instrumentation. Two scenarios: mount a persisted transcript (open-chat cost) and stream a live turn through the real useChat pipeline via a window.fetch patch scoped to /api/ai-chat/stream. Served only by the vite dev server at /perf/ai-chat-perf.html; the production build keeps its single index.html entry, so none of this ships. Also ignore local trace dumps under .claude/perf-traces/. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -43,6 +43,8 @@ lerna-debug.log*
|
||||
.nx/cache
|
||||
.claude/worktrees/
|
||||
.claude/tmp/
|
||||
# Local Chrome performance traces recorded by the AI-chat perf harness
|
||||
.claude/perf-traces/
|
||||
|
||||
# TypeScript incremental build artifacts
|
||||
*.tsbuildinfo
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at
|
||||
* /perf/ai-chat-perf.html; never part of the production build, which uses the
|
||||
* single default index.html entry).
|
||||
*
|
||||
* Mounts the minimal provider stack the real ChatThread needs (Mantine, router
|
||||
* for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE
|
||||
* React mounts so ChatThread's DefaultChatTransport requests to
|
||||
* /api/ai-chat/stream are answered by the synthetic SSE generator.
|
||||
*/
|
||||
|
||||
import "@mantine/core/styles.css";
|
||||
|
||||
import ReactDOM from "react-dom/client";
|
||||
import { MantineProvider } from "@mantine/core";
|
||||
import { MemoryRouter } from "react-router-dom";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { mantineCssResolver, theme } from "../src/theme.ts";
|
||||
// i18n side-effect init (http-backend). Translations load from /locales in dev;
|
||||
// missing keys fall back to the key text, which is fine for the harness.
|
||||
import "../src/i18n.ts";
|
||||
import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts";
|
||||
import PerfHarness from "./harness.tsx";
|
||||
|
||||
// MUST run before React mounts: ChatThread creates its transport with the
|
||||
// global fetch, so the patch has to be in place before the first send.
|
||||
installAiChatStreamFetchPatch();
|
||||
|
||||
const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
queries: {
|
||||
refetchOnMount: false,
|
||||
refetchOnWindowFocus: false,
|
||||
retry: false,
|
||||
staleTime: 5 * 60 * 1000,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const container = document.getElementById("root") as HTMLElement;
|
||||
|
||||
ReactDOM.createRoot(container).render(
|
||||
<MemoryRouter>
|
||||
<MantineProvider theme={theme} cssVariablesResolver={mantineCssResolver}>
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<PerfHarness />
|
||||
</QueryClientProvider>
|
||||
</MantineProvider>
|
||||
</MemoryRouter>,
|
||||
);
|
||||
@@ -0,0 +1,12 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AI chat perf harness</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="./ai-chat-perf-main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,390 @@
|
||||
/**
|
||||
* DEV-ONLY perf harness UI for the AI chat feature.
|
||||
*
|
||||
* Left panel: controls + live stats. Right side: a bordered box (~real chat
|
||||
* window size) hosting the REAL ChatThread component.
|
||||
*
|
||||
* Scenario A "Open existing chat": mount ChatThread seeded with a large
|
||||
* persisted transcript and measure click -> post-mount-paint time.
|
||||
* Scenario B "Live agent stream": mount an empty chat and auto-send a message;
|
||||
* the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream
|
||||
* through the real useChat pipeline.
|
||||
*/
|
||||
|
||||
import { useEffect, useMemo, useRef, useState } from "react";
|
||||
import type { CSSProperties, MutableRefObject } from "react";
|
||||
import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx";
|
||||
import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
|
||||
import {
|
||||
PRESETS,
|
||||
buildPersistedRows,
|
||||
buildTurnScript,
|
||||
setLiveStreamSettings,
|
||||
type PresetKey,
|
||||
} from "./synthetic-turn.ts";
|
||||
|
||||
const AUTO_SEND_TEXT = "Run the synthetic perf turn";
|
||||
const AUTO_SEND_TIMEOUT_MS = 1000;
|
||||
/** Stats display refresh period — 2x/s so the display itself stays cheap. */
|
||||
const STATS_FLUSH_MS = 500;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shared mutable stats (written from callbacks, flushed to state at 2 Hz)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface PerfStats {
|
||||
longtaskCount: number;
|
||||
longtaskTotalMs: number;
|
||||
longtaskMaxMs: number;
|
||||
fps: number;
|
||||
sseChunks: number;
|
||||
sseChars: number;
|
||||
mountAMs: number | null;
|
||||
streamState: "idle" | "streaming" | "done" | "aborted";
|
||||
}
|
||||
|
||||
function emptyStats(): PerfStats {
|
||||
return {
|
||||
longtaskCount: 0,
|
||||
longtaskTotalMs: 0,
|
||||
longtaskMaxMs: 0,
|
||||
fps: 0,
|
||||
sseChunks: 0,
|
||||
sseChars: 0,
|
||||
mountAMs: null,
|
||||
streamState: "idle",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Self-contained stats panel: owns the longtask observer, the FPS meter and the
|
||||
* 2 Hz flush interval. Isolated in its OWN component so its periodic setState
|
||||
* re-renders only this panel — NOT the ChatThread under measurement.
|
||||
*/
|
||||
function StatsPanel({ stats }: { stats: MutableRefObject<PerfStats> }) {
|
||||
const [snapshot, setSnapshot] = useState<PerfStats>(() => ({ ...stats.current }));
|
||||
|
||||
// Long tasks (main-thread blocks > 50ms).
|
||||
useEffect(() => {
|
||||
let observer: PerformanceObserver | null = null;
|
||||
try {
|
||||
observer = new PerformanceObserver((list) => {
|
||||
for (const entry of list.getEntries()) {
|
||||
stats.current.longtaskCount += 1;
|
||||
stats.current.longtaskTotalMs += entry.duration;
|
||||
stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration);
|
||||
}
|
||||
});
|
||||
observer.observe({ type: "longtask", buffered: true });
|
||||
} catch {
|
||||
// longtask entries unsupported in this browser — panel shows zeros.
|
||||
}
|
||||
return () => observer?.disconnect();
|
||||
}, [stats]);
|
||||
|
||||
// FPS: frames rendered within the trailing 1s window.
|
||||
useEffect(() => {
|
||||
let raf = 0;
|
||||
const frames: number[] = [];
|
||||
const loop = (now: number) => {
|
||||
frames.push(now);
|
||||
while (frames.length > 0 && frames[0] <= now - 1000) frames.shift();
|
||||
stats.current.fps = frames.length;
|
||||
raf = requestAnimationFrame(loop);
|
||||
};
|
||||
raf = requestAnimationFrame(loop);
|
||||
return () => cancelAnimationFrame(raf);
|
||||
}, [stats]);
|
||||
|
||||
// Flush the mutable stats into the display at most 2x/s.
|
||||
useEffect(() => {
|
||||
const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS);
|
||||
return () => window.clearInterval(id);
|
||||
}, [stats]);
|
||||
|
||||
const resetLongtasks = () => {
|
||||
stats.current.longtaskCount = 0;
|
||||
stats.current.longtaskTotalMs = 0;
|
||||
stats.current.longtaskMaxMs = 0;
|
||||
setSnapshot({ ...stats.current });
|
||||
};
|
||||
|
||||
const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 };
|
||||
return (
|
||||
<div style={{ fontFamily: "monospace", fontSize: 12, lineHeight: 1.7 }}>
|
||||
<div style={{ fontWeight: 700, marginBottom: 4 }}>Stats</div>
|
||||
<div style={row}><span>FPS (1s)</span><span>{snapshot.fps}</span></div>
|
||||
<div style={row}><span>Long tasks</span><span>{snapshot.longtaskCount}</span></div>
|
||||
<div style={row}><span>Long total</span><span>{snapshot.longtaskTotalMs.toFixed(0)} ms</span></div>
|
||||
<div style={row}><span>Long max</span><span>{snapshot.longtaskMaxMs.toFixed(0)} ms</span></div>
|
||||
<div style={row}><span>SSE chunks</span><span>{snapshot.sseChunks}</span></div>
|
||||
<div style={row}><span>SSE chars</span><span>{snapshot.sseChars.toLocaleString()}</span></div>
|
||||
<div style={row}><span>Stream</span><span>{snapshot.streamState}</span></div>
|
||||
<div style={row}>
|
||||
<span>Mount A</span>
|
||||
<span>{snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`}</span>
|
||||
</div>
|
||||
<button type="button" onClick={resetLongtasks} style={{ marginTop: 6 }}>
|
||||
Reset long tasks
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Auto-send (scenario B): drive the REAL composer in the mounted DOM
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Fill the composer textarea via the native value setter + an `input` event
|
||||
* (React 18 controlled-input pattern), then click the enabled "Send" button.
|
||||
* Retried on rAF until the elements exist (ChatThread mounts asynchronously).
|
||||
*/
|
||||
function autoSend(host: HTMLElement, text: string): void {
|
||||
const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS;
|
||||
|
||||
const tryClick = () => {
|
||||
const button = host.querySelector<HTMLButtonElement>('button[aria-label="Send"]');
|
||||
if (button && !button.disabled) {
|
||||
button.click();
|
||||
return;
|
||||
}
|
||||
if (performance.now() < deadline) requestAnimationFrame(tryClick);
|
||||
else console.error("[perf] auto-send: Send button never became clickable");
|
||||
};
|
||||
|
||||
const trySetValue = () => {
|
||||
const textarea = host.querySelector("textarea");
|
||||
if (!textarea) {
|
||||
if (performance.now() < deadline) requestAnimationFrame(trySetValue);
|
||||
else console.error("[perf] auto-send: textarea not found");
|
||||
return;
|
||||
}
|
||||
const setter = Object.getOwnPropertyDescriptor(
|
||||
window.HTMLTextAreaElement.prototype,
|
||||
"value",
|
||||
)?.set;
|
||||
setter?.call(textarea, text);
|
||||
textarea.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
// Click on a later frame so React commits the controlled value (which
|
||||
// enables the Send button) before we press it.
|
||||
requestAnimationFrame(tryClick);
|
||||
};
|
||||
|
||||
requestAnimationFrame(trySetValue);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Harness
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface MountState {
|
||||
mode: "A" | "B";
|
||||
key: number;
|
||||
chatId: string | null;
|
||||
rows: IAiChatMessageRow[];
|
||||
}
|
||||
|
||||
const noop = (): void => {};
|
||||
|
||||
export default function PerfHarness() {
|
||||
const [preset, setPreset] = useState<PresetKey>("20k");
|
||||
const [intervalMs, setIntervalMs] = useState<number>(15);
|
||||
const [mounted, setMounted] = useState<MountState | null>(null);
|
||||
const [fixtureInfo, setFixtureInfo] = useState<string | null>(null);
|
||||
|
||||
const statsRef = useRef<PerfStats>(emptyStats());
|
||||
const hostRef = useRef<HTMLDivElement>(null);
|
||||
const keyCounterRef = useRef(0);
|
||||
const mountStartRef = useRef(0);
|
||||
const pendingMountMeasureRef = useRef(false);
|
||||
|
||||
// The scripted live turn for the current preset (reused across B runs; the
|
||||
// script is immutable data, so rebuilding per run is unnecessary).
|
||||
const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]);
|
||||
|
||||
const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []);
|
||||
|
||||
// Scenario A: mount ChatThread seeded with a large persisted transcript.
|
||||
const handleMountA = () => {
|
||||
const fixture = buildPersistedRows(PRESETS[preset]);
|
||||
setFixtureInfo(
|
||||
`Persisted fixture: ${fixture.rows.length} rows, ` +
|
||||
`${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`,
|
||||
);
|
||||
statsRef.current.mountAMs = null;
|
||||
// Mark AFTER fixture generation: we measure mount cost, not generation cost
|
||||
// (production receives its rows from the network).
|
||||
performance.mark("perf:mountA:start");
|
||||
mountStartRef.current = performance.now();
|
||||
pendingMountMeasureRef.current = true;
|
||||
keyCounterRef.current += 1;
|
||||
setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows });
|
||||
};
|
||||
|
||||
// Measure scenario A: effect runs after the mount commit; double rAF lands
|
||||
// after the first paint of the mounted transcript.
|
||||
useEffect(() => {
|
||||
if (!pendingMountMeasureRef.current) return;
|
||||
pendingMountMeasureRef.current = false;
|
||||
requestAnimationFrame(() => {
|
||||
requestAnimationFrame(() => {
|
||||
statsRef.current.mountAMs = performance.now() - mountStartRef.current;
|
||||
performance.mark("perf:mountA:end");
|
||||
try {
|
||||
performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end");
|
||||
} catch {
|
||||
// Marks cleared mid-run — ignore.
|
||||
}
|
||||
});
|
||||
});
|
||||
}, [mounted]);
|
||||
|
||||
// Scenario B: mount an empty chat, arm the synthetic stream, auto-send.
|
||||
const handleStartB = () => {
|
||||
statsRef.current.sseChunks = 0;
|
||||
statsRef.current.sseChars = 0;
|
||||
statsRef.current.streamState = "streaming";
|
||||
setLiveStreamSettings({
|
||||
script: liveScript,
|
||||
chunkIntervalMs: intervalMs,
|
||||
onProgress: (chunks, chars) => {
|
||||
statsRef.current.sseChunks = chunks;
|
||||
statsRef.current.sseChars = chars;
|
||||
},
|
||||
onDone: () => {
|
||||
statsRef.current.streamState = "done";
|
||||
performance.mark("perf:streamB:end");
|
||||
try {
|
||||
performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end");
|
||||
} catch {
|
||||
// Start mark missing (e.g. marks cleared) — ignore.
|
||||
}
|
||||
},
|
||||
onAbort: () => {
|
||||
statsRef.current.streamState = "aborted";
|
||||
},
|
||||
});
|
||||
performance.mark("perf:streamB:start");
|
||||
keyCounterRef.current += 1;
|
||||
setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] });
|
||||
if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT);
|
||||
};
|
||||
|
||||
const handleUnmount = () => setMounted(null);
|
||||
|
||||
const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" };
|
||||
const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" };
|
||||
|
||||
return (
|
||||
<div style={{ display: "flex", height: "100vh", fontFamily: "system-ui, sans-serif" }}>
|
||||
{/* Left: controls + stats */}
|
||||
<div
|
||||
style={{
|
||||
width: 260,
|
||||
flex: "0 0 260px",
|
||||
padding: 12,
|
||||
borderRight: "1px solid #ccc",
|
||||
overflowY: "auto",
|
||||
boxSizing: "border-box",
|
||||
}}
|
||||
>
|
||||
<div style={{ fontWeight: 700, marginBottom: 4 }}>AI chat perf harness</div>
|
||||
|
||||
<label style={label}>Preset</label>
|
||||
<select
|
||||
value={preset}
|
||||
onChange={(e) => setPreset(e.target.value as PresetKey)}
|
||||
style={{ width: "100%" }}
|
||||
>
|
||||
<option value="5k">5k tokens</option>
|
||||
<option value="20k">20k tokens</option>
|
||||
<option value="50k">50k tokens</option>
|
||||
</select>
|
||||
|
||||
<label style={label}>Chunk interval (scenario B)</label>
|
||||
<select
|
||||
value={intervalMs}
|
||||
onChange={(e) => setIntervalMs(Number(e.target.value))}
|
||||
style={{ width: "100%" }}
|
||||
>
|
||||
<option value={15}>15 ms (normal)</option>
|
||||
<option value={5}>5 ms (stress)</option>
|
||||
</select>
|
||||
|
||||
<div style={{ marginTop: 12 }}>
|
||||
<button type="button" style={button} onClick={handleMountA}>
|
||||
Mount persisted chat (A)
|
||||
</button>
|
||||
<button type="button" style={button} onClick={handleStartB}>
|
||||
Start live stream (B)
|
||||
</button>
|
||||
<button type="button" style={button} onClick={handleUnmount} disabled={!mounted}>
|
||||
Unmount
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style={{ fontSize: 11, color: "#555", margin: "8px 0" }}>
|
||||
<div>
|
||||
Live turn: {liveScript.totalChars.toLocaleString()} chars ≈{" "}
|
||||
{liveScript.approxTokens.toLocaleString()} tokens
|
||||
</div>
|
||||
{fixtureInfo && <div>{fixtureInfo}</div>}
|
||||
{mounted && (
|
||||
<div>
|
||||
Mounted: scenario {mounted.mode} (key {mounted.key})
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<hr style={{ border: "none", borderTop: "1px solid #ddd" }} />
|
||||
<StatsPanel stats={statsRef} />
|
||||
</div>
|
||||
|
||||
{/* Right: the real ChatThread inside a real-window-sized box */}
|
||||
<div
|
||||
style={{
|
||||
flex: 1,
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
background: "#f4f4f5",
|
||||
}}
|
||||
>
|
||||
<div
|
||||
ref={hostRef}
|
||||
style={{
|
||||
width: 540,
|
||||
height: 680,
|
||||
border: "1px solid #bbb",
|
||||
borderRadius: 8,
|
||||
background: "#fff",
|
||||
padding: 8,
|
||||
boxSizing: "border-box",
|
||||
overflow: "hidden",
|
||||
}}
|
||||
>
|
||||
{mounted ? (
|
||||
<ChatThread
|
||||
key={mounted.key}
|
||||
chatId={mounted.chatId}
|
||||
threadKey={`perf-${mounted.key}`}
|
||||
initialRows={mounted.rows}
|
||||
openPage={openPage}
|
||||
roleId={null}
|
||||
roles={[]}
|
||||
onRolePicked={noop}
|
||||
assistantName="Perf agent"
|
||||
onTurnFinished={noop}
|
||||
onServerChatId={noop}
|
||||
/>
|
||||
) : (
|
||||
<div style={{ color: "#888", fontSize: 13, padding: 16 }}>
|
||||
ChatThread unmounted. Use the controls on the left.
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,517 @@
|
||||
/**
|
||||
* DEV-ONLY synthetic agent-turn generator for the AI chat perf harness.
|
||||
*
|
||||
* Produces one scripted agent turn (reasoning + tool calls + markdown answer)
|
||||
* from a size config, and materializes it two ways:
|
||||
* - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"),
|
||||
* served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`;
|
||||
* - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat").
|
||||
*
|
||||
* Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema`
|
||||
* (strict objects — only the exact field names below are accepted).
|
||||
*/
|
||||
|
||||
import type { UIMessage } from "@ai-sdk/react";
|
||||
import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config / presets
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** 1 token ~= 4 chars — the approximation used throughout this module. */
|
||||
const CHARS_PER_TOKEN = 4;
|
||||
|
||||
export interface TurnConfig {
|
||||
/** Number of agent steps; each step = one reasoning block + one tool call. */
|
||||
steps: number;
|
||||
/** Approximate reasoning tokens generated per step. */
|
||||
reasoningTokensPerStep: number;
|
||||
/** Size of each tool call's output `content` filler, in bytes (ASCII). */
|
||||
toolOutputBytes: number;
|
||||
/** Approximate size of the final markdown answer, in tokens. */
|
||||
answerTokens: number;
|
||||
}
|
||||
|
||||
export type PresetKey = "5k" | "20k" | "50k";
|
||||
|
||||
export const PRESETS: Record<PresetKey, TurnConfig> = {
|
||||
"5k": {
|
||||
steps: 3,
|
||||
reasoningTokensPerStep: 500,
|
||||
toolOutputBytes: 10_000,
|
||||
answerTokens: 600,
|
||||
},
|
||||
"20k": {
|
||||
steps: 6,
|
||||
reasoningTokensPerStep: 2500,
|
||||
toolOutputBytes: 20_000,
|
||||
answerTokens: 1500,
|
||||
},
|
||||
"50k": {
|
||||
steps: 10,
|
||||
reasoningTokensPerStep: 4000,
|
||||
toolOutputBytes: 40_000,
|
||||
answerTokens: 3000,
|
||||
},
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Text generators
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Mixed Russian/English prose sentences cycled to build reasoning text. */
|
||||
const REASONING_SENTENCES = [
|
||||
"Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.",
|
||||
"First I need to inspect the current page content to understand its overall structure.",
|
||||
"Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.",
|
||||
"The table in section three contains the migration matrix that I should cross-check against the summary.",
|
||||
"Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.",
|
||||
"Let me compare the numbers from the executive summary with the raw data in the appendix.",
|
||||
"Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.",
|
||||
"I should keep the page ids from the tool output so the final answer can cite the source pages.",
|
||||
"Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.",
|
||||
"The remaining sections look consistent, so I can move on to drafting the structured answer.",
|
||||
];
|
||||
|
||||
/**
|
||||
* Build realistic prose of ~`targetChars` characters, inserting a newline
|
||||
* roughly every 200 characters (mirrors how reasoning text tends to wrap).
|
||||
*/
|
||||
function makeProse(targetChars: number): string {
|
||||
const pieces: string[] = [];
|
||||
let length = 0;
|
||||
let sinceNewline = 0;
|
||||
let i = 0;
|
||||
while (length < targetChars) {
|
||||
const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length];
|
||||
i += 1;
|
||||
pieces.push(sentence);
|
||||
length += sentence.length + 1;
|
||||
sinceNewline += sentence.length + 1;
|
||||
if (sinceNewline >= 200) {
|
||||
pieces.push("\n");
|
||||
sinceNewline = 0;
|
||||
} else {
|
||||
pieces.push(" ");
|
||||
}
|
||||
}
|
||||
return pieces.join("").trimEnd();
|
||||
}
|
||||
|
||||
/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */
|
||||
function markdownSection(n: number): string {
|
||||
return [
|
||||
`## Section ${n}: migration analysis`,
|
||||
``,
|
||||
`The workspace contains **${n * 12} pages** that still reference the legacy API. ` +
|
||||
`Most of them live under [Perf test page](/p/page-1) and need the new transport. ` +
|
||||
`Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`,
|
||||
``,
|
||||
`- Update the fetch layer to the v6 transport`,
|
||||
`- Перенести таблицы соответствия идентификаторов`,
|
||||
`- Verify citation links after the move`,
|
||||
`- Проверить отображение длинных ответов в узкой панели`,
|
||||
``,
|
||||
`| Область | Страниц | Статус | Риск |`,
|
||||
`| --- | --- | --- | --- |`,
|
||||
`| API reference | ${n + 4} | migrated | low |`,
|
||||
`| Onboarding | ${n + 2} | in progress | medium |`,
|
||||
`| Release notes | ${n * 3} | pending | high |`,
|
||||
``,
|
||||
"```ts",
|
||||
`export function migrateSection${n}(rows: Row[]): Row[] {`,
|
||||
` return rows`,
|
||||
` .filter((row) => row.section === ${n})`,
|
||||
` .map((row) => ({ ...row, migrated: true }));`,
|
||||
`}`,
|
||||
"```",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */
|
||||
function makeMarkdownAnswer(targetChars: number): string {
|
||||
const sections: string[] = [];
|
||||
let length = 0;
|
||||
let n = 1;
|
||||
while (length < targetChars) {
|
||||
const section = markdownSection(n);
|
||||
sections.push(section);
|
||||
length += section.length + 2;
|
||||
n += 1;
|
||||
}
|
||||
return sections.join("\n\n");
|
||||
}
|
||||
|
||||
/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */
|
||||
function makeFiller(bytes: number): string {
|
||||
const unit = "Perf filler content for the synthetic getPage tool output. ";
|
||||
return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Turn script
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface TurnToolCall {
|
||||
toolCallId: string;
|
||||
toolName: "getPage";
|
||||
input: { pageId: string };
|
||||
output: { id: string; title: string; content: string };
|
||||
}
|
||||
|
||||
export interface TurnStep {
|
||||
reasoningText: string;
|
||||
tool: TurnToolCall;
|
||||
}
|
||||
|
||||
export interface TurnScript {
|
||||
steps: TurnStep[];
|
||||
answerText: string;
|
||||
/** Approximate reasoning tokens for the whole turn (chars / 4). */
|
||||
reasoningTokens: number;
|
||||
/** Approximate context size after this turn, in tokens. */
|
||||
contextTokens: number;
|
||||
maxContextTokens: number;
|
||||
/** Actual generated visible chars: reasoning + tool outputs + answer. */
|
||||
totalChars: number;
|
||||
/** totalChars / 4, rounded. */
|
||||
approxTokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the scripted agent turn for a config. `idPrefix` keeps tool call ids
|
||||
* unique when several scripts coexist (e.g. 3 persisted turns in one chat).
|
||||
*/
|
||||
export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript {
|
||||
const steps: TurnStep[] = [];
|
||||
let reasoningChars = 0;
|
||||
let toolChars = 0;
|
||||
for (let i = 0; i < config.steps; i++) {
|
||||
const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN);
|
||||
const content = makeFiller(config.toolOutputBytes);
|
||||
reasoningChars += reasoningText.length;
|
||||
toolChars += content.length;
|
||||
steps.push({
|
||||
reasoningText,
|
||||
tool: {
|
||||
toolCallId: `${idPrefix}-call-${i + 1}`,
|
||||
toolName: "getPage",
|
||||
input: { pageId: "page-1" },
|
||||
output: { id: "page-1", title: "Perf test page", content },
|
||||
},
|
||||
});
|
||||
}
|
||||
const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN);
|
||||
const totalChars = reasoningChars + toolChars + answerText.length;
|
||||
return {
|
||||
steps,
|
||||
answerText,
|
||||
reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN),
|
||||
contextTokens: Math.round(totalChars / CHARS_PER_TOKEN),
|
||||
maxContextTokens: 200_000,
|
||||
totalChars,
|
||||
approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario A: persisted rows
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Number of user+assistant pairs the preset is split across for history. */
|
||||
const HISTORY_TURNS = 3;
|
||||
|
||||
const USER_PROMPTS = [
|
||||
"Проанализируй документ и выдели ключевые тезисы по каждому разделу.",
|
||||
"Now cross-check the migration matrix against the summary and list every mismatch.",
|
||||
"Собери финальный план миграции с оценкой рисков по каждой области.",
|
||||
];
|
||||
|
||||
/** Persisted UIMessage parts for one finished assistant turn. */
|
||||
function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] {
|
||||
const parts: unknown[] = [];
|
||||
for (const step of script.steps) {
|
||||
parts.push({ type: "reasoning", text: step.reasoningText, state: "done" });
|
||||
parts.push({
|
||||
type: `tool-${step.tool.toolName}`,
|
||||
toolCallId: step.tool.toolCallId,
|
||||
state: "output-available",
|
||||
input: step.tool.input,
|
||||
output: step.tool.output,
|
||||
});
|
||||
}
|
||||
parts.push({ type: "text", text: script.answerText, state: "done" });
|
||||
return parts as UIMessage["parts"];
|
||||
}
|
||||
|
||||
export interface PersistedFixture {
|
||||
rows: IAiChatMessageRow[];
|
||||
totalChars: number;
|
||||
approxTokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize the preset as a finished 3-turn transcript: user row + assistant
|
||||
* row per turn, with the preset's steps/answer split across the assistant turns.
|
||||
* Approximate accounting — the actual totals are reported back for display.
|
||||
*/
|
||||
export function buildPersistedRows(config: TurnConfig): PersistedFixture {
|
||||
const rows: IAiChatMessageRow[] = [];
|
||||
const baseTime = Date.now() - HISTORY_TURNS * 60_000;
|
||||
let totalChars = 0;
|
||||
|
||||
for (let t = 0; t < HISTORY_TURNS; t++) {
|
||||
// Distribute steps as evenly as possible (earlier turns get the remainder).
|
||||
const stepsForTurn =
|
||||
Math.floor(config.steps / HISTORY_TURNS) +
|
||||
(t < config.steps % HISTORY_TURNS ? 1 : 0);
|
||||
const turnConfig: TurnConfig = {
|
||||
steps: Math.max(1, stepsForTurn),
|
||||
reasoningTokensPerStep: config.reasoningTokensPerStep,
|
||||
toolOutputBytes: config.toolOutputBytes,
|
||||
answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)),
|
||||
};
|
||||
const script = buildTurnScript(turnConfig, `hist-${t + 1}`);
|
||||
totalChars += script.totalChars;
|
||||
|
||||
const userText = USER_PROMPTS[t % USER_PROMPTS.length];
|
||||
rows.push({
|
||||
id: `perf-row-u${t + 1}`,
|
||||
role: "user",
|
||||
content: userText,
|
||||
metadata: null,
|
||||
createdAt: new Date(baseTime + t * 60_000).toISOString(),
|
||||
});
|
||||
rows.push({
|
||||
id: `perf-row-a${t + 1}`,
|
||||
role: "assistant",
|
||||
content: script.answerText,
|
||||
metadata: {
|
||||
parts: scriptToPersistedParts(script),
|
||||
usage: { reasoningTokens: script.reasoningTokens },
|
||||
contextTokens: script.contextTokens,
|
||||
maxContextTokens: script.maxContextTokens,
|
||||
finishReason: "stop",
|
||||
},
|
||||
createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
rows,
|
||||
totalChars,
|
||||
approxTokens: Math.round(totalChars / CHARS_PER_TOKEN),
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Scenario B: SSE stream
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Streaming delta size in chars (reasoning/answer text is split into these). */
|
||||
const DELTA_CHARS = 200;
|
||||
|
||||
function splitDeltas(text: string, size = DELTA_CHARS): string[] {
|
||||
const deltas: string[] = [];
|
||||
for (let i = 0; i < text.length; i += size) {
|
||||
deltas.push(text.slice(i, i + size));
|
||||
}
|
||||
return deltas;
|
||||
}
|
||||
|
||||
/** One pre-serialized SSE frame plus its visible-char contribution for stats. */
|
||||
interface SseFrame {
|
||||
data: string;
|
||||
chars: number;
|
||||
}
|
||||
|
||||
function frame(chunk: Record<string, unknown>, chars = 0): SseFrame {
|
||||
return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars };
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames
|
||||
* (excluding the final `data: [DONE]` terminator, appended by the pump).
|
||||
*/
|
||||
function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] {
|
||||
const frames: SseFrame[] = [];
|
||||
frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } }));
|
||||
|
||||
script.steps.forEach((step, i) => {
|
||||
frames.push(frame({ type: "start-step" }));
|
||||
const reasoningId = `${messageId}-r${i + 1}`;
|
||||
frames.push(frame({ type: "reasoning-start", id: reasoningId }));
|
||||
for (const delta of splitDeltas(step.reasoningText)) {
|
||||
frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length));
|
||||
}
|
||||
frames.push(frame({ type: "reasoning-end", id: reasoningId }));
|
||||
|
||||
const { toolCallId, toolName, input, output } = step.tool;
|
||||
frames.push(frame({ type: "tool-input-start", toolCallId, toolName }));
|
||||
frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input }));
|
||||
// The tool result arrives as ONE chunk, like the real server sends it.
|
||||
frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length));
|
||||
frames.push(frame({ type: "finish-step" }));
|
||||
});
|
||||
|
||||
// Final step: the markdown answer.
|
||||
frames.push(frame({ type: "start-step" }));
|
||||
const textId = `${messageId}-answer`;
|
||||
frames.push(frame({ type: "text-start", id: textId }));
|
||||
for (const delta of splitDeltas(script.answerText)) {
|
||||
frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length));
|
||||
}
|
||||
frames.push(frame({ type: "text-end", id: textId }));
|
||||
frames.push(frame({ type: "finish-step" }));
|
||||
|
||||
frames.push(
|
||||
frame({
|
||||
type: "finish",
|
||||
messageMetadata: {
|
||||
usage: { reasoningTokens: script.reasoningTokens },
|
||||
contextTokens: script.contextTokens,
|
||||
maxContextTokens: script.maxContextTokens,
|
||||
finishReason: "stop",
|
||||
},
|
||||
}),
|
||||
);
|
||||
return frames;
|
||||
}
|
||||
|
||||
export interface LiveStreamSettings {
|
||||
script: TurnScript;
|
||||
/** Delay between SSE chunks (one chunk per tick). */
|
||||
chunkIntervalMs: number;
|
||||
/** Progress callback: cumulative emitted chunk count and visible chars. */
|
||||
onProgress?: (chunks: number, chars: number) => void;
|
||||
/** Fired once after the `[DONE]` terminator is enqueued. */
|
||||
onDone?: () => void;
|
||||
/** Fired if the client aborted the stream (Stop button). */
|
||||
onAbort?: () => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a synthetic SSE Response streaming the scripted turn, one chunk every
|
||||
* `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works.
|
||||
*/
|
||||
export function buildSseResponse(
|
||||
settings: LiveStreamSettings,
|
||||
signal?: AbortSignal | null,
|
||||
): Response {
|
||||
const messageId = `m-live-${Date.now()}`;
|
||||
const frames = buildSseFrames(settings.script, messageId, "perf-chat");
|
||||
const encoder = new TextEncoder();
|
||||
let index = 0;
|
||||
let emittedChars = 0;
|
||||
let timer: number | undefined;
|
||||
|
||||
const stream = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
const stopPump = () => {
|
||||
if (timer !== undefined) {
|
||||
clearTimeout(timer);
|
||||
timer = undefined;
|
||||
}
|
||||
};
|
||||
const pump = () => {
|
||||
timer = undefined;
|
||||
if (signal?.aborted) {
|
||||
stopPump();
|
||||
try {
|
||||
controller.close();
|
||||
} catch {
|
||||
// Already closed/cancelled — nothing to do.
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (index >= frames.length) {
|
||||
try {
|
||||
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
|
||||
controller.close();
|
||||
} catch {
|
||||
// Cancelled mid-flight.
|
||||
}
|
||||
settings.onDone?.();
|
||||
return;
|
||||
}
|
||||
const next = frames[index];
|
||||
index += 1;
|
||||
try {
|
||||
controller.enqueue(encoder.encode(next.data));
|
||||
} catch {
|
||||
stopPump();
|
||||
return;
|
||||
}
|
||||
emittedChars += next.chars;
|
||||
settings.onProgress?.(index, emittedChars);
|
||||
timer = window.setTimeout(pump, settings.chunkIntervalMs);
|
||||
};
|
||||
signal?.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
stopPump();
|
||||
try {
|
||||
controller.close();
|
||||
} catch {
|
||||
// Reader already cancelled.
|
||||
}
|
||||
settings.onAbort?.();
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
timer = window.setTimeout(pump, settings.chunkIntervalMs);
|
||||
},
|
||||
cancel() {
|
||||
if (timer !== undefined) {
|
||||
clearTimeout(timer);
|
||||
timer = undefined;
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
status: 200,
|
||||
headers: {
|
||||
"content-type": "text/event-stream",
|
||||
"cache-control": "no-cache",
|
||||
"x-vercel-ai-ui-message-stream": "v1",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// window.fetch patch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let currentLiveSettings: LiveStreamSettings | null = null;
|
||||
|
||||
/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */
|
||||
export function setLiveStreamSettings(settings: LiveStreamSettings): void {
|
||||
currentLiveSettings = settings;
|
||||
}
|
||||
|
||||
/**
|
||||
* Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream`
|
||||
* get the synthetic SSE Response; everything else passes through untouched.
|
||||
*/
|
||||
export function installAiChatStreamFetchPatch(): void {
|
||||
const originalFetch = window.fetch.bind(window);
|
||||
window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
|
||||
const url =
|
||||
typeof input === "string"
|
||||
? input
|
||||
: input instanceof URL
|
||||
? input.href
|
||||
: input.url;
|
||||
if (url.includes("/api/ai-chat/stream")) {
|
||||
const settings = currentLiveSettings;
|
||||
if (!settings) {
|
||||
return Promise.resolve(
|
||||
new Response("perf harness: no live stream configured", { status: 500 }),
|
||||
);
|
||||
}
|
||||
return Promise.resolve(buildSseResponse(settings, init?.signal ?? null));
|
||||
}
|
||||
return originalFetch(input, init);
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user