diff --git a/.gitignore b/.gitignore index cf440100..4eb9e6fd 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,8 @@ lerna-debug.log* .nx/cache .claude/worktrees/ .claude/tmp/ +# Local Chrome performance traces recorded by the AI-chat perf harness +.claude/perf-traces/ # TypeScript incremental build artifacts *.tsbuildinfo diff --git a/apps/client/perf/ai-chat-perf-main.tsx b/apps/client/perf/ai-chat-perf-main.tsx new file mode 100644 index 00000000..0c75f68c --- /dev/null +++ b/apps/client/perf/ai-chat-perf-main.tsx @@ -0,0 +1,50 @@ +/** + * DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at + * /perf/ai-chat-perf.html; never part of the production build, which uses the + * single default index.html entry). + * + * Mounts the minimal provider stack the real ChatThread needs (Mantine, router + * for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE + * React mounts so ChatThread's DefaultChatTransport requests to + * /api/ai-chat/stream are answered by the synthetic SSE generator. + */ + +import "@mantine/core/styles.css"; + +import ReactDOM from "react-dom/client"; +import { MantineProvider } from "@mantine/core"; +import { MemoryRouter } from "react-router-dom"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { mantineCssResolver, theme } from "../src/theme.ts"; +// i18n side-effect init (http-backend). Translations load from /locales in dev; +// missing keys fall back to the key text, which is fine for the harness. +import "../src/i18n.ts"; +import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts"; +import PerfHarness from "./harness.tsx"; + +// MUST run before React mounts: ChatThread creates its transport with the +// global fetch, so the patch has to be in place before the first send. +installAiChatStreamFetchPatch(); + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + refetchOnMount: false, + refetchOnWindowFocus: false, + retry: false, + staleTime: 5 * 60 * 1000, + }, + }, +}); + +const container = document.getElementById("root") as HTMLElement; + +ReactDOM.createRoot(container).render( + + + + + + + , +); diff --git a/apps/client/perf/ai-chat-perf.html b/apps/client/perf/ai-chat-perf.html new file mode 100644 index 00000000..5509160b --- /dev/null +++ b/apps/client/perf/ai-chat-perf.html @@ -0,0 +1,12 @@ + + + + + + AI chat perf harness + + +
+ + + diff --git a/apps/client/perf/harness.tsx b/apps/client/perf/harness.tsx new file mode 100644 index 00000000..32af237d --- /dev/null +++ b/apps/client/perf/harness.tsx @@ -0,0 +1,390 @@ +/** + * DEV-ONLY perf harness UI for the AI chat feature. + * + * Left panel: controls + live stats. Right side: a bordered box (~real chat + * window size) hosting the REAL ChatThread component. + * + * Scenario A "Open existing chat": mount ChatThread seeded with a large + * persisted transcript and measure click -> post-mount-paint time. + * Scenario B "Live agent stream": mount an empty chat and auto-send a message; + * the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream + * through the real useChat pipeline. + */ + +import { useEffect, useMemo, useRef, useState } from "react"; +import type { CSSProperties, MutableRefObject } from "react"; +import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx"; +import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts"; +import { + PRESETS, + buildPersistedRows, + buildTurnScript, + setLiveStreamSettings, + type PresetKey, +} from "./synthetic-turn.ts"; + +const AUTO_SEND_TEXT = "Run the synthetic perf turn"; +const AUTO_SEND_TIMEOUT_MS = 1000; +/** Stats display refresh period — 2x/s so the display itself stays cheap. */ +const STATS_FLUSH_MS = 500; + +// --------------------------------------------------------------------------- +// Shared mutable stats (written from callbacks, flushed to state at 2 Hz) +// --------------------------------------------------------------------------- + +interface PerfStats { + longtaskCount: number; + longtaskTotalMs: number; + longtaskMaxMs: number; + fps: number; + sseChunks: number; + sseChars: number; + mountAMs: number | null; + streamState: "idle" | "streaming" | "done" | "aborted"; +} + +function emptyStats(): PerfStats { + return { + longtaskCount: 0, + longtaskTotalMs: 0, + longtaskMaxMs: 0, + fps: 0, + sseChunks: 0, + sseChars: 0, + mountAMs: null, + streamState: "idle", + }; +} + +/** + * Self-contained stats panel: owns the longtask observer, the FPS meter and the + * 2 Hz flush interval. Isolated in its OWN component so its periodic setState + * re-renders only this panel — NOT the ChatThread under measurement. + */ +function StatsPanel({ stats }: { stats: MutableRefObject }) { + const [snapshot, setSnapshot] = useState(() => ({ ...stats.current })); + + // Long tasks (main-thread blocks > 50ms). + useEffect(() => { + let observer: PerformanceObserver | null = null; + try { + observer = new PerformanceObserver((list) => { + for (const entry of list.getEntries()) { + stats.current.longtaskCount += 1; + stats.current.longtaskTotalMs += entry.duration; + stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration); + } + }); + observer.observe({ type: "longtask", buffered: true }); + } catch { + // longtask entries unsupported in this browser — panel shows zeros. + } + return () => observer?.disconnect(); + }, [stats]); + + // FPS: frames rendered within the trailing 1s window. + useEffect(() => { + let raf = 0; + const frames: number[] = []; + const loop = (now: number) => { + frames.push(now); + while (frames.length > 0 && frames[0] <= now - 1000) frames.shift(); + stats.current.fps = frames.length; + raf = requestAnimationFrame(loop); + }; + raf = requestAnimationFrame(loop); + return () => cancelAnimationFrame(raf); + }, [stats]); + + // Flush the mutable stats into the display at most 2x/s. + useEffect(() => { + const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS); + return () => window.clearInterval(id); + }, [stats]); + + const resetLongtasks = () => { + stats.current.longtaskCount = 0; + stats.current.longtaskTotalMs = 0; + stats.current.longtaskMaxMs = 0; + setSnapshot({ ...stats.current }); + }; + + const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 }; + return ( +
+
Stats
+
FPS (1s){snapshot.fps}
+
Long tasks{snapshot.longtaskCount}
+
Long total{snapshot.longtaskTotalMs.toFixed(0)} ms
+
Long max{snapshot.longtaskMaxMs.toFixed(0)} ms
+
SSE chunks{snapshot.sseChunks}
+
SSE chars{snapshot.sseChars.toLocaleString()}
+
Stream{snapshot.streamState}
+
+ Mount A + {snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`} +
+ +
+ ); +} + +// --------------------------------------------------------------------------- +// Auto-send (scenario B): drive the REAL composer in the mounted DOM +// --------------------------------------------------------------------------- + +/** + * Fill the composer textarea via the native value setter + an `input` event + * (React 18 controlled-input pattern), then click the enabled "Send" button. + * Retried on rAF until the elements exist (ChatThread mounts asynchronously). + */ +function autoSend(host: HTMLElement, text: string): void { + const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS; + + const tryClick = () => { + const button = host.querySelector('button[aria-label="Send"]'); + if (button && !button.disabled) { + button.click(); + return; + } + if (performance.now() < deadline) requestAnimationFrame(tryClick); + else console.error("[perf] auto-send: Send button never became clickable"); + }; + + const trySetValue = () => { + const textarea = host.querySelector("textarea"); + if (!textarea) { + if (performance.now() < deadline) requestAnimationFrame(trySetValue); + else console.error("[perf] auto-send: textarea not found"); + return; + } + const setter = Object.getOwnPropertyDescriptor( + window.HTMLTextAreaElement.prototype, + "value", + )?.set; + setter?.call(textarea, text); + textarea.dispatchEvent(new Event("input", { bubbles: true })); + // Click on a later frame so React commits the controlled value (which + // enables the Send button) before we press it. + requestAnimationFrame(tryClick); + }; + + requestAnimationFrame(trySetValue); +} + +// --------------------------------------------------------------------------- +// Harness +// --------------------------------------------------------------------------- + +interface MountState { + mode: "A" | "B"; + key: number; + chatId: string | null; + rows: IAiChatMessageRow[]; +} + +const noop = (): void => {}; + +export default function PerfHarness() { + const [preset, setPreset] = useState("20k"); + const [intervalMs, setIntervalMs] = useState(15); + const [mounted, setMounted] = useState(null); + const [fixtureInfo, setFixtureInfo] = useState(null); + + const statsRef = useRef(emptyStats()); + const hostRef = useRef(null); + const keyCounterRef = useRef(0); + const mountStartRef = useRef(0); + const pendingMountMeasureRef = useRef(false); + + // The scripted live turn for the current preset (reused across B runs; the + // script is immutable data, so rebuilding per run is unnecessary). + const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]); + + const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []); + + // Scenario A: mount ChatThread seeded with a large persisted transcript. + const handleMountA = () => { + const fixture = buildPersistedRows(PRESETS[preset]); + setFixtureInfo( + `Persisted fixture: ${fixture.rows.length} rows, ` + + `${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`, + ); + statsRef.current.mountAMs = null; + // Mark AFTER fixture generation: we measure mount cost, not generation cost + // (production receives its rows from the network). + performance.mark("perf:mountA:start"); + mountStartRef.current = performance.now(); + pendingMountMeasureRef.current = true; + keyCounterRef.current += 1; + setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows }); + }; + + // Measure scenario A: effect runs after the mount commit; double rAF lands + // after the first paint of the mounted transcript. + useEffect(() => { + if (!pendingMountMeasureRef.current) return; + pendingMountMeasureRef.current = false; + requestAnimationFrame(() => { + requestAnimationFrame(() => { + statsRef.current.mountAMs = performance.now() - mountStartRef.current; + performance.mark("perf:mountA:end"); + try { + performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end"); + } catch { + // Marks cleared mid-run — ignore. + } + }); + }); + }, [mounted]); + + // Scenario B: mount an empty chat, arm the synthetic stream, auto-send. + const handleStartB = () => { + statsRef.current.sseChunks = 0; + statsRef.current.sseChars = 0; + statsRef.current.streamState = "streaming"; + setLiveStreamSettings({ + script: liveScript, + chunkIntervalMs: intervalMs, + onProgress: (chunks, chars) => { + statsRef.current.sseChunks = chunks; + statsRef.current.sseChars = chars; + }, + onDone: () => { + statsRef.current.streamState = "done"; + performance.mark("perf:streamB:end"); + try { + performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end"); + } catch { + // Start mark missing (e.g. marks cleared) — ignore. + } + }, + onAbort: () => { + statsRef.current.streamState = "aborted"; + }, + }); + performance.mark("perf:streamB:start"); + keyCounterRef.current += 1; + setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] }); + if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT); + }; + + const handleUnmount = () => setMounted(null); + + const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" }; + const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" }; + + return ( +
+ {/* Left: controls + stats */} +
+
AI chat perf harness
+ + + + + + + +
+ + + +
+ +
+
+ Live turn: {liveScript.totalChars.toLocaleString()} chars ≈{" "} + {liveScript.approxTokens.toLocaleString()} tokens +
+ {fixtureInfo &&
{fixtureInfo}
} + {mounted && ( +
+ Mounted: scenario {mounted.mode} (key {mounted.key}) +
+ )} +
+ +
+ +
+ + {/* Right: the real ChatThread inside a real-window-sized box */} +
+
+ {mounted ? ( + + ) : ( +
+ ChatThread unmounted. Use the controls on the left. +
+ )} +
+
+
+ ); +} diff --git a/apps/client/perf/synthetic-turn.ts b/apps/client/perf/synthetic-turn.ts new file mode 100644 index 00000000..439a5ab0 --- /dev/null +++ b/apps/client/perf/synthetic-turn.ts @@ -0,0 +1,517 @@ +/** + * DEV-ONLY synthetic agent-turn generator for the AI chat perf harness. + * + * Produces one scripted agent turn (reasoning + tool calls + markdown answer) + * from a size config, and materializes it two ways: + * - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"), + * served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`; + * - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat"). + * + * Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema` + * (strict objects — only the exact field names below are accepted). + */ + +import type { UIMessage } from "@ai-sdk/react"; +import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts"; + +// --------------------------------------------------------------------------- +// Config / presets +// --------------------------------------------------------------------------- + +/** 1 token ~= 4 chars — the approximation used throughout this module. */ +const CHARS_PER_TOKEN = 4; + +export interface TurnConfig { + /** Number of agent steps; each step = one reasoning block + one tool call. */ + steps: number; + /** Approximate reasoning tokens generated per step. */ + reasoningTokensPerStep: number; + /** Size of each tool call's output `content` filler, in bytes (ASCII). */ + toolOutputBytes: number; + /** Approximate size of the final markdown answer, in tokens. */ + answerTokens: number; +} + +export type PresetKey = "5k" | "20k" | "50k"; + +export const PRESETS: Record = { + "5k": { + steps: 3, + reasoningTokensPerStep: 500, + toolOutputBytes: 10_000, + answerTokens: 600, + }, + "20k": { + steps: 6, + reasoningTokensPerStep: 2500, + toolOutputBytes: 20_000, + answerTokens: 1500, + }, + "50k": { + steps: 10, + reasoningTokensPerStep: 4000, + toolOutputBytes: 40_000, + answerTokens: 3000, + }, +}; + +// --------------------------------------------------------------------------- +// Text generators +// --------------------------------------------------------------------------- + +/** Mixed Russian/English prose sentences cycled to build reasoning text. */ +const REASONING_SENTENCES = [ + "Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.", + "First I need to inspect the current page content to understand its overall structure.", + "Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.", + "The table in section three contains the migration matrix that I should cross-check against the summary.", + "Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.", + "Let me compare the numbers from the executive summary with the raw data in the appendix.", + "Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.", + "I should keep the page ids from the tool output so the final answer can cite the source pages.", + "Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.", + "The remaining sections look consistent, so I can move on to drafting the structured answer.", +]; + +/** + * Build realistic prose of ~`targetChars` characters, inserting a newline + * roughly every 200 characters (mirrors how reasoning text tends to wrap). + */ +function makeProse(targetChars: number): string { + const pieces: string[] = []; + let length = 0; + let sinceNewline = 0; + let i = 0; + while (length < targetChars) { + const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length]; + i += 1; + pieces.push(sentence); + length += sentence.length + 1; + sinceNewline += sentence.length + 1; + if (sinceNewline >= 200) { + pieces.push("\n"); + sinceNewline = 0; + } else { + pieces.push(" "); + } + } + return pieces.join("").trimEnd(); +} + +/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */ +function markdownSection(n: number): string { + return [ + `## Section ${n}: migration analysis`, + ``, + `The workspace contains **${n * 12} pages** that still reference the legacy API. ` + + `Most of them live under [Perf test page](/p/page-1) and need the new transport. ` + + `Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`, + ``, + `- Update the fetch layer to the v6 transport`, + `- Перенести таблицы соответствия идентификаторов`, + `- Verify citation links after the move`, + `- Проверить отображение длинных ответов в узкой панели`, + ``, + `| Область | Страниц | Статус | Риск |`, + `| --- | --- | --- | --- |`, + `| API reference | ${n + 4} | migrated | low |`, + `| Onboarding | ${n + 2} | in progress | medium |`, + `| Release notes | ${n * 3} | pending | high |`, + ``, + "```ts", + `export function migrateSection${n}(rows: Row[]): Row[] {`, + ` return rows`, + ` .filter((row) => row.section === ${n})`, + ` .map((row) => ({ ...row, migrated: true }));`, + `}`, + "```", + ].join("\n"); +} + +/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */ +function makeMarkdownAnswer(targetChars: number): string { + const sections: string[] = []; + let length = 0; + let n = 1; + while (length < targetChars) { + const section = markdownSection(n); + sections.push(section); + length += section.length + 2; + n += 1; + } + return sections.join("\n\n"); +} + +/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */ +function makeFiller(bytes: number): string { + const unit = "Perf filler content for the synthetic getPage tool output. "; + return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes); +} + +// --------------------------------------------------------------------------- +// Turn script +// --------------------------------------------------------------------------- + +export interface TurnToolCall { + toolCallId: string; + toolName: "getPage"; + input: { pageId: string }; + output: { id: string; title: string; content: string }; +} + +export interface TurnStep { + reasoningText: string; + tool: TurnToolCall; +} + +export interface TurnScript { + steps: TurnStep[]; + answerText: string; + /** Approximate reasoning tokens for the whole turn (chars / 4). */ + reasoningTokens: number; + /** Approximate context size after this turn, in tokens. */ + contextTokens: number; + maxContextTokens: number; + /** Actual generated visible chars: reasoning + tool outputs + answer. */ + totalChars: number; + /** totalChars / 4, rounded. */ + approxTokens: number; +} + +/** + * Build the scripted agent turn for a config. `idPrefix` keeps tool call ids + * unique when several scripts coexist (e.g. 3 persisted turns in one chat). + */ +export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript { + const steps: TurnStep[] = []; + let reasoningChars = 0; + let toolChars = 0; + for (let i = 0; i < config.steps; i++) { + const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN); + const content = makeFiller(config.toolOutputBytes); + reasoningChars += reasoningText.length; + toolChars += content.length; + steps.push({ + reasoningText, + tool: { + toolCallId: `${idPrefix}-call-${i + 1}`, + toolName: "getPage", + input: { pageId: "page-1" }, + output: { id: "page-1", title: "Perf test page", content }, + }, + }); + } + const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN); + const totalChars = reasoningChars + toolChars + answerText.length; + return { + steps, + answerText, + reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN), + contextTokens: Math.round(totalChars / CHARS_PER_TOKEN), + maxContextTokens: 200_000, + totalChars, + approxTokens: Math.round(totalChars / CHARS_PER_TOKEN), + }; +} + +// --------------------------------------------------------------------------- +// Scenario A: persisted rows +// --------------------------------------------------------------------------- + +/** Number of user+assistant pairs the preset is split across for history. */ +const HISTORY_TURNS = 3; + +const USER_PROMPTS = [ + "Проанализируй документ и выдели ключевые тезисы по каждому разделу.", + "Now cross-check the migration matrix against the summary and list every mismatch.", + "Собери финальный план миграции с оценкой рисков по каждой области.", +]; + +/** Persisted UIMessage parts for one finished assistant turn. */ +function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] { + const parts: unknown[] = []; + for (const step of script.steps) { + parts.push({ type: "reasoning", text: step.reasoningText, state: "done" }); + parts.push({ + type: `tool-${step.tool.toolName}`, + toolCallId: step.tool.toolCallId, + state: "output-available", + input: step.tool.input, + output: step.tool.output, + }); + } + parts.push({ type: "text", text: script.answerText, state: "done" }); + return parts as UIMessage["parts"]; +} + +export interface PersistedFixture { + rows: IAiChatMessageRow[]; + totalChars: number; + approxTokens: number; +} + +/** + * Materialize the preset as a finished 3-turn transcript: user row + assistant + * row per turn, with the preset's steps/answer split across the assistant turns. + * Approximate accounting — the actual totals are reported back for display. + */ +export function buildPersistedRows(config: TurnConfig): PersistedFixture { + const rows: IAiChatMessageRow[] = []; + const baseTime = Date.now() - HISTORY_TURNS * 60_000; + let totalChars = 0; + + for (let t = 0; t < HISTORY_TURNS; t++) { + // Distribute steps as evenly as possible (earlier turns get the remainder). + const stepsForTurn = + Math.floor(config.steps / HISTORY_TURNS) + + (t < config.steps % HISTORY_TURNS ? 1 : 0); + const turnConfig: TurnConfig = { + steps: Math.max(1, stepsForTurn), + reasoningTokensPerStep: config.reasoningTokensPerStep, + toolOutputBytes: config.toolOutputBytes, + answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)), + }; + const script = buildTurnScript(turnConfig, `hist-${t + 1}`); + totalChars += script.totalChars; + + const userText = USER_PROMPTS[t % USER_PROMPTS.length]; + rows.push({ + id: `perf-row-u${t + 1}`, + role: "user", + content: userText, + metadata: null, + createdAt: new Date(baseTime + t * 60_000).toISOString(), + }); + rows.push({ + id: `perf-row-a${t + 1}`, + role: "assistant", + content: script.answerText, + metadata: { + parts: scriptToPersistedParts(script), + usage: { reasoningTokens: script.reasoningTokens }, + contextTokens: script.contextTokens, + maxContextTokens: script.maxContextTokens, + finishReason: "stop", + }, + createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(), + }); + } + + return { + rows, + totalChars, + approxTokens: Math.round(totalChars / CHARS_PER_TOKEN), + }; +} + +// --------------------------------------------------------------------------- +// Scenario B: SSE stream +// --------------------------------------------------------------------------- + +/** Streaming delta size in chars (reasoning/answer text is split into these). */ +const DELTA_CHARS = 200; + +function splitDeltas(text: string, size = DELTA_CHARS): string[] { + const deltas: string[] = []; + for (let i = 0; i < text.length; i += size) { + deltas.push(text.slice(i, i + size)); + } + return deltas; +} + +/** One pre-serialized SSE frame plus its visible-char contribution for stats. */ +interface SseFrame { + data: string; + chars: number; +} + +function frame(chunk: Record, chars = 0): SseFrame { + return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars }; +} + +/** + * Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames + * (excluding the final `data: [DONE]` terminator, appended by the pump). + */ +function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] { + const frames: SseFrame[] = []; + frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } })); + + script.steps.forEach((step, i) => { + frames.push(frame({ type: "start-step" })); + const reasoningId = `${messageId}-r${i + 1}`; + frames.push(frame({ type: "reasoning-start", id: reasoningId })); + for (const delta of splitDeltas(step.reasoningText)) { + frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length)); + } + frames.push(frame({ type: "reasoning-end", id: reasoningId })); + + const { toolCallId, toolName, input, output } = step.tool; + frames.push(frame({ type: "tool-input-start", toolCallId, toolName })); + frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input })); + // The tool result arrives as ONE chunk, like the real server sends it. + frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length)); + frames.push(frame({ type: "finish-step" })); + }); + + // Final step: the markdown answer. + frames.push(frame({ type: "start-step" })); + const textId = `${messageId}-answer`; + frames.push(frame({ type: "text-start", id: textId })); + for (const delta of splitDeltas(script.answerText)) { + frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length)); + } + frames.push(frame({ type: "text-end", id: textId })); + frames.push(frame({ type: "finish-step" })); + + frames.push( + frame({ + type: "finish", + messageMetadata: { + usage: { reasoningTokens: script.reasoningTokens }, + contextTokens: script.contextTokens, + maxContextTokens: script.maxContextTokens, + finishReason: "stop", + }, + }), + ); + return frames; +} + +export interface LiveStreamSettings { + script: TurnScript; + /** Delay between SSE chunks (one chunk per tick). */ + chunkIntervalMs: number; + /** Progress callback: cumulative emitted chunk count and visible chars. */ + onProgress?: (chunks: number, chars: number) => void; + /** Fired once after the `[DONE]` terminator is enqueued. */ + onDone?: () => void; + /** Fired if the client aborted the stream (Stop button). */ + onAbort?: () => void; +} + +/** + * Build a synthetic SSE Response streaming the scripted turn, one chunk every + * `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works. + */ +export function buildSseResponse( + settings: LiveStreamSettings, + signal?: AbortSignal | null, +): Response { + const messageId = `m-live-${Date.now()}`; + const frames = buildSseFrames(settings.script, messageId, "perf-chat"); + const encoder = new TextEncoder(); + let index = 0; + let emittedChars = 0; + let timer: number | undefined; + + const stream = new ReadableStream({ + start(controller) { + const stopPump = () => { + if (timer !== undefined) { + clearTimeout(timer); + timer = undefined; + } + }; + const pump = () => { + timer = undefined; + if (signal?.aborted) { + stopPump(); + try { + controller.close(); + } catch { + // Already closed/cancelled — nothing to do. + } + return; + } + if (index >= frames.length) { + try { + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + } catch { + // Cancelled mid-flight. + } + settings.onDone?.(); + return; + } + const next = frames[index]; + index += 1; + try { + controller.enqueue(encoder.encode(next.data)); + } catch { + stopPump(); + return; + } + emittedChars += next.chars; + settings.onProgress?.(index, emittedChars); + timer = window.setTimeout(pump, settings.chunkIntervalMs); + }; + signal?.addEventListener( + "abort", + () => { + stopPump(); + try { + controller.close(); + } catch { + // Reader already cancelled. + } + settings.onAbort?.(); + }, + { once: true }, + ); + timer = window.setTimeout(pump, settings.chunkIntervalMs); + }, + cancel() { + if (timer !== undefined) { + clearTimeout(timer); + timer = undefined; + } + }, + }); + + return new Response(stream, { + status: 200, + headers: { + "content-type": "text/event-stream", + "cache-control": "no-cache", + "x-vercel-ai-ui-message-stream": "v1", + }, + }); +} + +// --------------------------------------------------------------------------- +// window.fetch patch +// --------------------------------------------------------------------------- + +let currentLiveSettings: LiveStreamSettings | null = null; + +/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */ +export function setLiveStreamSettings(settings: LiveStreamSettings): void { + currentLiveSettings = settings; +} + +/** + * Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream` + * get the synthetic SSE Response; everything else passes through untouched. + */ +export function installAiChatStreamFetchPatch(): void { + const originalFetch = window.fetch.bind(window); + window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.href + : input.url; + if (url.includes("/api/ai-chat/stream")) { + const settings = currentLiveSettings; + if (!settings) { + return Promise.resolve( + new Response("perf harness: no live stream configured", { status: 500 }), + ); + } + return Promise.resolve(buildSseResponse(settings, init?.signal ?? null)); + } + return originalFetch(input, init); + }; +} diff --git a/apps/client/src/features/ai-chat/components/ai-chat.module.css b/apps/client/src/features/ai-chat/components/ai-chat.module.css index cd788cdd..7b99178c 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat.module.css +++ b/apps/client/src/features/ai-chat/components/ai-chat.module.css @@ -164,8 +164,8 @@ /* NOTE: `white-space: pre-wrap` is intentionally NOT set here. On the rendered markdown
it would turn the newlines between block tags (\n
  • ,

    \n
      ) into visible blank lines/indents on top of the - margins. The plain-text fallback that needs pre-wrap sets it - inline itself (see reasoning-block.tsx). */ + margins. The streaming plain-text path that needs pre-wrap sets it + per chunk instead, in PlainChunk (see streaming-plain-text.tsx). */ } .reasoningText p { diff --git a/apps/client/src/features/ai-chat/components/message-item.test.ts b/apps/client/src/features/ai-chat/components/message-item.test.ts index b5b6d96a..b8d9474e 100644 --- a/apps/client/src/features/ai-chat/components/message-item.test.ts +++ b/apps/client/src/features/ai-chat/components/message-item.test.ts @@ -65,6 +65,25 @@ describe("arePropsEqual", () => { expect(arePropsEqual(props(m), props(m))).toBe(true); }); + // REGRESSION (stranded reasoning part): a reasoning part is left at + // `state:"streaming"` forever when the turn ends without `reasoning-end` + // (manual Stop during thinking). The signature is EQUAL across that turn-end + // flip (nothing in the message changed), so the comparator must ALSO compare + // `turnStreaming` — otherwise the memo swallows the flip and ReasoningBlock + // never switches from chunked plain text to its one-time markdown parse. + it("returns false when turnStreaming differs despite an equal signature", () => { + const m = msg([ + { type: "reasoning", text: "thinking", state: "streaming" }, + { type: "text", text: "answer" }, + ]); + expect( + arePropsEqual( + props(m, { turnStreaming: true }), + props(m, { turnStreaming: false }), + ), + ).toBe(false); + }); + it("returns true for the same content in a different message object", () => { const a = msg([{ type: "text", text: "answer" }]); const b = msg([{ type: "text", text: "answer" }]); diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx index 46c25af2..4e645d8a 100644 --- a/apps/client/src/features/ai-chat/components/message-item.tsx +++ b/apps/client/src/features/ai-chat/components/message-item.tsx @@ -52,6 +52,20 @@ interface MessageItemProps { * absent; the public share passes the configured identity (agent role) name. */ assistantName?: string; + /** + * Whether the WHOLE turn is still streaming (MessageList's `isStreaming`). + * A reasoning part may be left `state: "streaming"` forever when the turn + * ends without a `reasoning-end` chunk (manual Stop during the thinking + * phase, or a provider that never emits it) — the AI SDK finalizes reasoning + * state ONLY on `reasoning-end`, not on `finish-step`/`finish`. So part-level + * state alone cannot prove liveness; the reasoning part is treated as live + * only while the whole turn is still streaming. Defaults to false. + * + * The parent passes it as "turn is live AND this is the tail row", so a + * stranded part in an EARLIER row never re-activates when a later turn + * streams. + */ + turnStreaming?: boolean; } /** @@ -105,6 +119,7 @@ function MessageItem({ showCitations = true, neutralizeInternalLinks = false, assistantName, + turnStreaming = false, }: MessageItemProps) { // `signature` is intentionally not read in the body — it exists solely as the // memo key (see arePropsEqual). The render reads `message` directly. @@ -155,8 +170,23 @@ function MessageItem({ const text = (part as { text?: string }).text ?? ""; if (!text.trim() && !(reasoningTokens && reasoningTokens > 0)) return null; + // Absent state (persisted rows) and "done" both mean finalized. + // `messageSignature` already includes each part's `state`, so the + // streaming→done flip changes the row signature and re-renders this + // row — which is what lets ReasoningBlock switch from chunked plain + // text to its one-time markdown parse (see reasoning-block.tsx). + // ALSO require the turn to be live: a part stranded at + // `state:"streaming"` after the turn ended (no `reasoning-end` — see + // the `turnStreaming` prop doc) must still finalize and parse. + const streaming = + turnStreaming && (part as { state?: string }).state === "streaming"; return ( - + ); } @@ -245,7 +275,11 @@ export function arePropsEqual( prev.signature === next.signature && prev.showCitations === next.showCitations && prev.neutralizeInternalLinks === next.neutralizeInternalLinks && - prev.assistantName === next.assistantName + prev.assistantName === next.assistantName && + // The turn-end flip re-renders every row once (cheap, terminal event) — + // that is what converts a stranded `state:"streaming"` reasoning part to + // its one-time markdown parse (see the `turnStreaming` prop doc). + prev.turnStreaming === next.turnStreaming ); } diff --git a/apps/client/src/features/ai-chat/components/message-list.test.tsx b/apps/client/src/features/ai-chat/components/message-list.test.tsx index b19470a0..20987a4a 100644 --- a/apps/client/src/features/ai-chat/components/message-list.test.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.test.tsx @@ -1,5 +1,5 @@ import { describe, expect, it, vi } from "vitest"; -import { render } from "@testing-library/react"; +import { fireEvent, render } from "@testing-library/react"; import { MantineProvider } from "@mantine/core"; import type { UIMessage } from "@ai-sdk/react"; @@ -50,8 +50,9 @@ vi.stubGlobal( // One assistant message wrapping the given `parts`. Reused across renders in the // regression test to model how the AI SDK hands back the SAME message object. -const msg = (parts: UIMessage["parts"]): UIMessage => - ({ id: "m1", role: "assistant", parts }) as UIMessage; +// Pass an explicit `id` when a test renders several rows at once. +const msg = (parts: UIMessage["parts"], id = "m1"): UIMessage => + ({ id, role: "assistant", parts }) as UIMessage; describe("MessageList", () => { it("wires the real MessageItem and supplies a valid signature end-to-end", () => { @@ -116,4 +117,102 @@ describe("MessageList", () => { renderChatMarkdownSpy.mock.calls.some((c) => c[0] === "streamed answer"), ).toBe(true); }); + + // REGRESSION (stranded reasoning part): the AI SDK sets a reasoning part's + // state to "done" ONLY on the `reasoning-end` chunk — `finish-step`/`finish` + // do NOT finalize it. A manual Stop during the thinking phase (or a provider + // that never emits `reasoning-end`) therefore leaves the part at + // `state:"streaming"` forever. MessageItem must derive ReasoningBlock's + // `streaming` from part state AND turn liveness (MessageList's `isStreaming`, + // forwarded as `turnStreaming`): while the turn streams the expanded block + // shows chunked plain text (no parse); once the turn ends — even though the + // part is still `state:"streaming"` — the block finalizes and does its + // one-time markdown parse. Note the message signature does NOT change across + // that flip, so this also exercises the `turnStreaming` memo comparison in + // arePropsEqual (without it the row would never re-render). + it("finalizes a reasoning part stranded at state:'streaming' when the turn ends", () => { + renderChatMarkdownSpy.mockClear(); + const reasoningText = "**bold** thinking"; + // Reasoning part stranded mid-stream + a non-empty answer part (a + // reasoning-only message renders nothing — see message-content.ts). + const message = msg([ + { type: "reasoning", text: reasoningText, state: "streaming" }, + { type: "text", text: "partial answer" }, + ]); + const parsesOfReasoning = () => + renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText) + .length; + + const { rerender, getByRole, queryByText } = render( + + + , + ); + // Expand the reasoning block (its toggle is the only button in the list). + fireEvent.click(getByRole("button")); + // Turn live + part streaming -> ReasoningBlock received streaming=true: + // the body is chunked plain text (raw markdown syntax), NOT parsed. + expect(queryByText(/bold/)).not.toBeNull(); + expect(parsesOfReasoning()).toBe(0); + + // The turn ends WITHOUT `reasoning-end`: the part object is untouched + // (still state:"streaming"), only the turn-level flag flips. + rerender( + + + , + ); + // ReasoningBlock now received streaming=false and did its one-time parse. + expect(parsesOfReasoning()).toBe(1); + }); + + // REGRESSION (turn-global liveness leaking into earlier rows): `isStreaming` + // is turn-global, so forwarding it to EVERY row would re-mark a reasoning + // part stranded at `state:"streaming"` in a PREVIOUS message (see the test + // above) as live again whenever a LATER turn streams — an expanded stranded + // block would flip markdown -> raw plain text -> markdown across turn + // boundaries, re-parsing each time. MessageList must gate `turnStreaming` + // to the TAIL row only. + it("keeps a stranded reasoning part in an earlier message finalized while a later turn streams", () => { + renderChatMarkdownSpy.mockClear(); + const reasoningText = "**bold** thinking"; + // First (earlier) assistant message: its turn was stopped during the + // thinking phase, leaving the reasoning part at state:"streaming". + const first = msg( + [ + { type: "reasoning", text: reasoningText, state: "streaming" }, + { type: "text", text: "first answer" }, + ], + "m1", + ); + // Second assistant message: the LATER turn, currently streaming. + const second = msg([{ type: "text", text: "second answer" }], "m2"); + const parsesOfReasoning = () => + renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText) + .length; + + const { rerender, getByRole, queryByText } = render( + + + , + ); + // Expand the first row's reasoning block (the only toggle in the list — + // the second message has no reasoning or tool parts). + fireEvent.click(getByRole("button")); + // The turn is live but the first row is NOT the tail: its ReasoningBlock + // received streaming=false, so the stranded part stays finalized and does + // its one-time markdown parse instead of dropping to chunked plain text. + expect(queryByText(/bold/)).not.toBeNull(); + expect(parsesOfReasoning()).toBe(1); + + // A later-turn delta re-renders the list; the earlier block must neither + // flip back to streaming nor re-parse. + (second.parts[0] as { text: string }).text = "second answer grows"; + rerender( + + + , + ); + expect(parsesOfReasoning()).toBe(1); + }); }); diff --git a/apps/client/src/features/ai-chat/components/message-list.tsx b/apps/client/src/features/ai-chat/components/message-list.tsx index 2cb2183c..25435aa5 100644 --- a/apps/client/src/features/ai-chat/components/message-list.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.tsx @@ -196,7 +196,7 @@ export default function MessageList({ return ( - {messages.map((message) => ( + {messages.map((message, index) => ( // `signature` is snapshotted HERE (parent render) into an immutable // string and handed to MessageItem as its memo key. It must NOT be // recomputed inside MessageItem's arePropsEqual: the AI SDK mutates the @@ -210,6 +210,13 @@ export default function MessageList({ showCitations={showCitations} neutralizeInternalLinks={neutralizeInternalLinks} assistantName={assistantName} + // Turn-level liveness, gated to the TAIL row: only the tail message + // can belong to the in-flight turn, so a reasoning part stranded at + // `state:"streaming"` in an EARLIER message (its turn ended without + // `reasoning-end`) stays finalized and doesn't flip back to plain + // text (and re-parse) whenever a later turn streams — see + // message-item.tsx. + turnStreaming={isStreaming && index === messages.length - 1} /> ))} {typing && ( diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx index ca3443fc..5754821d 100644 --- a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx +++ b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx @@ -28,7 +28,11 @@ import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts"; // matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. -function renderBlock(props: { text: string; tokens?: number }) { +function renderBlock(props: { + text: string; + tokens?: number; + streaming?: boolean; +}) { return render( @@ -84,4 +88,54 @@ describe("ReasoningBlock", () => { fireEvent.click(screen.getByRole("button")); expect(renderSpy).toHaveBeenCalledTimes(1); }); + + it("does not parse while expanded and STREAMING; shows chunked plain text", () => { + const renderSpy = vi.mocked(renderChatMarkdown); + renderSpy.mockClear(); + renderBlock({ + text: "первый абзац размышлений\n\nвторой абзац растёт", + tokens: 5, + streaming: true, + }); + fireEvent.click(screen.getByRole("button")); + // Expanded + still streaming: NO markdown parse and NO innerHTML swaps per + // delta — the body is chunked plain text (only the tail chunk updates). + // This is the O(n²) hole #302 left open (Safari whole-tab freeze). + expect(renderSpy).not.toHaveBeenCalled(); + // Both paragraph chunks' raw text is present in the body. + expect(screen.getByText(/первый абзац размышлений/)).toBeDefined(); + expect(screen.getByText(/второй абзац растёт/)).toBeDefined(); + }); + + it("parses exactly once when streaming flips to done while expanded", () => { + const renderSpy = vi.mocked(renderChatMarkdown); + renderSpy.mockClear(); + const { rerender } = renderBlock({ + text: "**bold** reasoning", + tokens: 5, + streaming: true, + }); + fireEvent.click(screen.getByRole("button")); + expect(renderSpy).not.toHaveBeenCalled(); + + // Finalization: the part's state flips streaming→done, the parent + // re-renders the row (the flip changes the message signature), and the + // block does its ONE markdown parse of the now-stable text. + rerender( + + + , + ); + expect(renderSpy).toHaveBeenCalledTimes(1); + // The parsed html branch rendered (the mock wraps the input in

      ). + expect(screen.getByText(/reasoning/)).toBeDefined(); + + // Further re-renders with unchanged props do not re-parse. + rerender( + + + , + ); + expect(renderSpy).toHaveBeenCalledTimes(1); + }); }); diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx index 25cc7459..8156730d 100644 --- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx +++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx @@ -5,6 +5,7 @@ import { useTranslation } from "react-i18next"; import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { collapseBlankLines } from "@/features/ai-chat/utils/collapse-blank-lines.ts"; import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts"; +import { StreamingPlainText } from "@/features/ai-chat/components/streaming-plain-text.tsx"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; interface ReasoningBlockProps { @@ -15,6 +16,10 @@ interface ReasoningBlockProps { * step/turn has finished. When absent (or 0) the count is estimated from the * text length so it ticks live as the reasoning streams in. */ tokens?: number; + /** True while the reasoning part is still streaming (part `state === + * "streaming"`). False means finalized: persisted history or `state === + * "done"`. Gates the markdown parse — see the invariant on the memo below. */ + streaming?: boolean; } /** @@ -27,26 +32,30 @@ interface ReasoningBlockProps { * Providers that don't stream reasoning TEXT still render this block from the * authoritative count alone (header only, empty body) so the cost is visible. */ -function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { +function ReasoningBlock({ text, tokens, streaming = false }: ReasoningBlockProps) { const { t } = useTranslation(); const [open, setOpen] = useState(false); // Authoritative count wins; otherwise estimate live from the streamed text. const count = tokens && tokens > 0 ? tokens : estimateTokens(text); const trimmed = text.trim(); - // Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the - // default and the common case during a long "thinking" stream: reasoning text - // streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only - // memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta - // — an O(n²) storm that pins the main thread and freezes the chat, all for a block - // the user isn't even looking at (the html is only shown inside - // below). Gating on `open` skips that hidden parsing entirely; expanding parses the - // current text once (an instant, user-initiated click), and further streaming while - // open is the normal per-delta append render, like the answer. + // Markdown parse invariant (per throttled ~20Hz stream delta the text GROWS): + // 1. Collapsed -> never parse (#302): the html is only shown inside + // , so parsing for a hidden body would be an O(n²) + // marked + DOMPurify storm. + // 2. Expanded + STREAMING -> no parse and no innerHTML swaps either: the body + // renders as chunked plain text (StreamingPlainText) with a memoized + // stable prefix, so each delta updates only the tail chunk's text node. + // This closes the O(n²) hole #302 left open ("expanded while streaming") + // that froze the whole tab in Safari when watching the thinking stream. + // 3. Finalized + expanded -> exactly one parse: `trimmed` and `streaming` + // are stable after the part is done, so this memo runs once per expand. const html = useMemo( () => - open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "", - [open, trimmed], + open && trimmed && !streaming + ? renderChatMarkdown(collapseBlankLines(trimmed), {}) + : "", + [open, trimmed, streaming], ); return ( @@ -83,12 +92,12 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { dangerouslySetInnerHTML={{ __html: html }} /> ) : ( - - {trimmed} - + // Still streaming (or markdown yielded nothing): chunked plain text. + // The wrapper carries the reasoningText styling; each chunk sets its + // own pre-wrap inline (NOT on this div — see ai-chat.module.css). +
      + +
      )}
      )} @@ -96,7 +105,7 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { ); } -// Memoized: re-renders only when `text`/`tokens` change (primitive props, default -// shallow compare), so a parent re-render during streaming of OTHER content does -// not re-run the markdown parse for an already-finalized reasoning block. +// Memoized: re-renders only when `text`/`tokens`/`streaming` change (primitive +// props, default shallow compare), so a parent re-render during streaming of OTHER +// content does not re-run the markdown parse for an already-finalized reasoning block. export default memo(ReasoningBlock); diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx new file mode 100644 index 00000000..3f6876bf --- /dev/null +++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx @@ -0,0 +1,146 @@ +import { describe, it, expect } from "vitest"; +import { render } from "@testing-library/react"; + +import { + splitPlainChunks, + StreamingPlainText, +} from "./streaming-plain-text"; + +describe("splitPlainChunks", () => { + // THE load-bearing property (see the invariant comment in the module): under + // append-only growth, every chunk except the LAST must be byte-identical + // between successive calls, so the memoized chunk components never re-render + // for the stable prefix and each stream delta touches only the tail chunk. + it("keeps all non-last chunks byte-identical across append-only growth", () => { + // A simulated reasoning stream covering: appends inside the last paragraph, + // appends that ADD new blank lines, growth of a trailing newline run, and a + // trailing separator later followed by text. + const steps = [ + "Пер", + "Первый абзац", + "Первый абзац\n", + "Первый абзац\n\n", + "Первый абзац\n\n\n", + "Первый абзац\n\n\nВторой", + "Первый абзац\n\n\nВторой абзац растёт", + "Первый абзац\n\n\nВторой абзац растёт\n\nТретий", + "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\n", + "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\nЧетвёртый", + ]; + let prev: string[] = []; + for (const text of steps) { + const next = splitPlainChunks(text); + // Lossless: chunks always reassemble into the exact input. + expect(next.join("")).toBe(text); + // Chunk count never shrinks (boundaries never disappear). + expect(next.length).toBeGreaterThanOrEqual(prev.length); + // Every previously-FINAL chunk (all but prev's last) is unchanged. + for (let i = 0; i < prev.length - 1; i++) { + expect(next[i]).toBe(prev[i]); + } + prev = next; + } + // Guard against a vacuous pass: the final split must be multi-chunk. + expect(prev.length).toBeGreaterThanOrEqual(4); + }); + + it("attaches the blank-line separator run to the preceding chunk", () => { + expect(splitPlainChunks("a\n\nb")).toEqual(["a\n\n", "b"]); + // A longer run is ONE separator, not several boundaries. + expect(splitPlainChunks("a\n\n\n\nb")).toEqual(["a\n\n\n\n", "b"]); + expect(splitPlainChunks("a\n\nb\n\n\nc")).toEqual(["a\n\n", "b\n\n\n", "c"]); + }); + + it("single newlines are not boundaries", () => { + expect(splitPlainChunks("a\nb\nc")).toEqual(["a\nb\nc"]); + }); + + // INTENTIONAL: CRLF blank lines are NOT boundaries (the regex is `\n{2,}` + // only). Supporting `(?:\r?\n){2,}` would break the stable-prefix invariant: + // a lone trailing `\r` is not a boundary, but a later-appended `\n` would + // merge with it into a new separator unit and retroactively create a boundary + // INSIDE previously-emitted text, moving old chunk edges. So CRLF input stays + // in one (still lossless) chunk — only granularity is coarser; LLM output is + // `\n` in practice. See the doc comment on splitPlainChunks. + it("keeps CRLF blank lines inside one chunk", () => { + expect(splitPlainChunks("a\r\n\r\nb")).toEqual(["a\r\n\r\nb"]); + // Mixed input: only pure-`\n` runs split. + expect(splitPlainChunks("a\r\n\r\nb\n\nc")).toEqual(["a\r\n\r\nb\n\n", "c"]); + }); + + it("never emits empty phantom chunks (multi-blank-line / trailing newlines)", () => { + expect(splitPlainChunks("")).toEqual([]); + // A trailing newline run stays inside the last chunk (it may still grow). + expect(splitPlainChunks("a\n")).toEqual(["a\n"]); + expect(splitPlainChunks("a\n\n")).toEqual(["a\n\n"]); + expect(splitPlainChunks("a\n\nb\n\n")).toEqual(["a\n\n", "b\n\n"]); + // Degenerate all-newlines input is a single deterministic chunk. + expect(splitPlainChunks("\n\n\n")).toEqual(["\n\n\n"]); + for (const text of ["a\n\n\nb\n\n", "x\n\n\n\n\ny\n\nz\n"]) { + for (const chunk of splitPlainChunks(text)) { + expect(chunk.length).toBeGreaterThan(0); + } + } + }); +}); + +describe("StreamingPlainText", () => { + it("renders one block per chunk, stripping trailing separator newlines at display time", () => { + const text = "первый абзац\n\nвторой абзац\n\n\nтретий"; + const { container } = render(); + const blocks = Array.from(container.querySelectorAll("div")); + // One block element per chunk. + expect(blocks.length).toBe(splitPlainChunks(text).length); + // DISPLAY-ONLY strip: each rendered block drops its chunk's trailing + // separator newlines — rendering them inside a pre-wrap block would add an + // empty line ON TOP of the block break (a doubled gap). The RAW chunks + // keep their separators (losslessness is asserted on splitPlainChunks + // above); multi-blank-line runs collapse to one uniform gap, consistent + // with collapseBlankLines on the finalized markdown path. + expect(blocks.map((b) => b.textContent)).toEqual([ + "первый абзац", + "второй абзац", + "третий", + ]); + // The uniform paragraph gap comes from the block margin instead (matches + // the `.reasoningText p { margin: 0 0 4px }` rhythm of the markdown path). + for (const block of blocks) { + expect((block as HTMLElement).style.marginBottom).toBe("4px"); + } + }); + + it("keeps interior newlines intact — only the trailing run is stripped", () => { + const text = "строка один\nстрока два\n\nхвост"; + const { container } = render(); + const blocks = Array.from(container.querySelectorAll("div")); + expect(blocks.map((b) => b.textContent)).toEqual([ + "строка один\nстрока два", + "хвост", + ]); + }); + + // SECURITY INVARIANT — the load-bearing property of the streaming path: the + // reasoning text is raw, untrusted model output rendered WITHOUT a sanitizer + // (no marked/DOMPurify, no innerHTML). PlainChunk emits it as a React text + // node, which escapes it, so HTML in the model output is inert. This test + // pins that the path is a TEXT sink, not an HTML sink: a future change to + // `dangerouslySetInnerHTML` (reintroducing XSS) MUST fail here. + // + // The existing tests assert via textContent, which strips tags and so cannot + // distinguish an escaped literal from injected DOM. This one asserts on the + // parsed DOM directly: if the markup were injected as HTML, the / + // would become real elements and querySelector would find them. + it("renders HTML-like reasoning as an escaped literal, never as injected DOM", () => { + const text = "\n\nhi"; + const { container } = render(); + // No DOM elements were created from the payload — it was NOT parsed as HTML. + expect(container.querySelector("img")).toBeNull(); + expect(container.querySelector("b")).toBeNull(); + // The raw markup survived verbatim as text (proving it is escaped, not + // interpreted). textContent alone can't prove this, but combined with the + // querySelector assertions above it does: the literals are present AND no + // elements exist. + expect(container.textContent).toContain("hi"); + expect(container.textContent).toContain(""); + }); +}); diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx new file mode 100644 index 00000000..bc72d790 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx @@ -0,0 +1,90 @@ +import { memo, useMemo } from "react"; + +/** + * Split plain text into chunks at blank-line (paragraph) boundaries, keeping + * each separator run attached to the END of the preceding chunk, so the chunks + * always reassemble byte-for-byte into the input. + * + * A boundary is the end of a maximal `\n{2,}` run that is followed by at least + * one more character. A newline run that is a SUFFIX of the text is NOT a + * boundary yet: under append-only growth it may still gain more newlines, and + * cutting there would move the boundary on the next call. + * + * CRITICAL INVARIANT (load-bearing for StreamingPlainText's memoization): for + * APPEND-ONLY growth of `text`, every chunk except the LAST is byte-identical + * between successive calls — previously-emitted boundaries never move. Proof + * sketch: appending never modifies existing characters, so (a) an existing + * boundary's newline run and its following character are untouched and the + * boundary persists at the same offset; (b) no NEW boundary can appear strictly + * inside the old text, because a `\n{2,}` run followed by a character entirely + * within the old text would already have been a boundary. New boundaries can + * only materialize at or after the old text's end, i.e. inside the last chunk. + * + * CRLF is deliberately NOT a boundary: supporting `(?:\r?\n){2,}` would BREAK + * the invariant above — a lone trailing `\r` is not a boundary, but a later- + * appended `\n` would merge with it into a new separator unit and retroactively + * create a boundary INSIDE previously-emitted text, moving old chunk edges. + * With `\n`-only runs, appended characters can never extend a run that is + * already followed by a non-`\n` character, so old boundaries are immutable. + * CRLF blank lines therefore intentionally stay inside one chunk: correctness/ + * losslessness are unaffected, only chunk granularity for CRLF input (LLM + * output is `\n` in practice). + */ +export function splitPlainChunks(text: string): string[] { + const chunks: string[] = []; + let start = 0; + for (const match of text.matchAll(/\n{2,}/g)) { + const end = match.index + match[0].length; + // Suffix run: not a stable boundary yet (see the invariant above). + if (end >= text.length) break; + chunks.push(text.slice(start, end)); + start = end; + } + if (start < text.length) chunks.push(text.slice(start)); + return chunks; +} + +/** + * One immutable chunk. Memoized on its string prop: during streaming only the + * TAIL chunk's text changes (see the splitPlainChunks invariant), so React + * skips every stable chunk and the per-delta DOM work is a single text-node + * update. `pre-wrap` is set per chunk (like the old raw-text fallback did), NOT + * on the surrounding markdown-styled container — see the note in + * ai-chat.module.css. Font/size/color are inherited from that container. + * + * DISPLAY-ONLY newline strip: the raw chunk keeps its trailing `\n{2,}` + * separator run attached (the splitPlainChunks invariant, load-bearing for the + * memo), but rendering those newlines inside a pre-wrap block would add an + * empty line ON TOP of the block break — a doubled gap. So the RENDERED string + * drops trailing newlines and the paragraph gap comes from `marginBottom: 4` + * instead, matching the `.reasoningText p { margin: 0 0 4px }` rhythm of the + * finalized markdown. Multi-blank-line runs thus collapse to one uniform gap, + * consistent with `collapseBlankLines` on the markdown path. The last chunk + * usually has no trailing newlines (strip is a no-op); its margin is harmless. + */ +const PlainChunk = memo(function PlainChunk({ text }: { text: string }) { + return ( +
      + {text.replace(/\n+$/, "")} +
      + ); +}); + +/** + * Renders still-streaming plain text as a list of paragraph chunks where only + * the tail chunk changes per delta. No markdown, no sanitizer, no innerHTML — + * this is the cheap streaming-time stand-in for the one-time markdown parse + * that happens after the part is finalized (see reasoning-block.tsx). + */ +export function StreamingPlainText({ text }: { text: string }) { + const chunks = useMemo(() => splitPlainChunks(text), [text]); + return ( + <> + {chunks.map((chunk, index) => ( + // Index keys are stable here: chunks are append-only (the invariant), + // so an index never gets a different chunk's content mid-stream. + + ))} + + ); +}