From 351860ba4b32d198e1b1447f315027bc030199f6 Mon Sep 17 00:00:00 2001 From: agent_vscode Date: Sat, 4 Jul 2026 03:50:48 +0300 Subject: [PATCH 1/3] perf(ai-chat): stop per-delta markdown re-parse in expanded streaming reasoning (#302 follow-up) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The expanded "Thinking" block re-ran marked+DOMPurify and re-set dangerouslySetInnerHTML with the whole growing reasoning text on every throttled stream delta (~20 Hz) — the O(n²) hole #302 deliberately left open ("expanded while streaming"). In Safari this saturates the main thread and freezes the entire tab during long agent runs, including while the window is minimized (the JS storm keeps running) and on re-expanding it mid-turn (one huge layout burst). - streaming-plain-text.tsx (new): chunked plain-text renderer; chunks split at blank-line boundaries with an append-only stable-prefix invariant, so per delta only the tail chunk's text node updates — no marked, no DOMPurify, no innerHTML swaps. - reasoning-block.tsx: parse markdown only when expanded AND finalized (one-time); while streaming, render chunked plain text; collapsed stays parse-free (#302 unchanged). - message-item.tsx / message-list.tsx: reasoning liveness = part state:"streaming" AND the turn is live AND the row is the tail — a part stranded at state:"streaming" (manual Stop during thinking, or a provider that never emits reasoning-end) finalizes at turn end and never re-activates when later turns stream. Verified with the Chrome perf harness: per-delta marked/DOMPurify work is gone from the hot path; collapsed streaming stays at 0 long tasks up to 143k tokens even at 4x CPU throttle; finalized expanded blocks still render parsed markdown. 245 client tests green. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai-chat/components/message-item.test.ts | 19 +++ .../ai-chat/components/message-item.tsx | 38 +++++- .../ai-chat/components/message-list.test.tsx | 105 ++++++++++++++- .../ai-chat/components/message-list.tsx | 9 +- .../components/reasoning-block.test.tsx | 56 +++++++- .../ai-chat/components/reasoning-block.tsx | 51 +++++--- .../components/streaming-plain-text.test.tsx | 121 ++++++++++++++++++ .../components/streaming-plain-text.tsx | 90 +++++++++++++ 8 files changed, 461 insertions(+), 28 deletions(-) create mode 100644 apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx create mode 100644 apps/client/src/features/ai-chat/components/streaming-plain-text.tsx diff --git a/apps/client/src/features/ai-chat/components/message-item.test.ts b/apps/client/src/features/ai-chat/components/message-item.test.ts index b5b6d96a..b8d9474e 100644 --- a/apps/client/src/features/ai-chat/components/message-item.test.ts +++ b/apps/client/src/features/ai-chat/components/message-item.test.ts @@ -65,6 +65,25 @@ describe("arePropsEqual", () => { expect(arePropsEqual(props(m), props(m))).toBe(true); }); + // REGRESSION (stranded reasoning part): a reasoning part is left at + // `state:"streaming"` forever when the turn ends without `reasoning-end` + // (manual Stop during thinking). The signature is EQUAL across that turn-end + // flip (nothing in the message changed), so the comparator must ALSO compare + // `turnStreaming` — otherwise the memo swallows the flip and ReasoningBlock + // never switches from chunked plain text to its one-time markdown parse. + it("returns false when turnStreaming differs despite an equal signature", () => { + const m = msg([ + { type: "reasoning", text: "thinking", state: "streaming" }, + { type: "text", text: "answer" }, + ]); + expect( + arePropsEqual( + props(m, { turnStreaming: true }), + props(m, { turnStreaming: false }), + ), + ).toBe(false); + }); + it("returns true for the same content in a different message object", () => { const a = msg([{ type: "text", text: "answer" }]); const b = msg([{ type: "text", text: "answer" }]); diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx index 46c25af2..4e645d8a 100644 --- a/apps/client/src/features/ai-chat/components/message-item.tsx +++ b/apps/client/src/features/ai-chat/components/message-item.tsx @@ -52,6 +52,20 @@ interface MessageItemProps { * absent; the public share passes the configured identity (agent role) name. */ assistantName?: string; + /** + * Whether the WHOLE turn is still streaming (MessageList's `isStreaming`). + * A reasoning part may be left `state: "streaming"` forever when the turn + * ends without a `reasoning-end` chunk (manual Stop during the thinking + * phase, or a provider that never emits it) — the AI SDK finalizes reasoning + * state ONLY on `reasoning-end`, not on `finish-step`/`finish`. So part-level + * state alone cannot prove liveness; the reasoning part is treated as live + * only while the whole turn is still streaming. Defaults to false. + * + * The parent passes it as "turn is live AND this is the tail row", so a + * stranded part in an EARLIER row never re-activates when a later turn + * streams. + */ + turnStreaming?: boolean; } /** @@ -105,6 +119,7 @@ function MessageItem({ showCitations = true, neutralizeInternalLinks = false, assistantName, + turnStreaming = false, }: MessageItemProps) { // `signature` is intentionally not read in the body — it exists solely as the // memo key (see arePropsEqual). The render reads `message` directly. @@ -155,8 +170,23 @@ function MessageItem({ const text = (part as { text?: string }).text ?? ""; if (!text.trim() && !(reasoningTokens && reasoningTokens > 0)) return null; + // Absent state (persisted rows) and "done" both mean finalized. + // `messageSignature` already includes each part's `state`, so the + // streaming→done flip changes the row signature and re-renders this + // row — which is what lets ReasoningBlock switch from chunked plain + // text to its one-time markdown parse (see reasoning-block.tsx). + // ALSO require the turn to be live: a part stranded at + // `state:"streaming"` after the turn ended (no `reasoning-end` — see + // the `turnStreaming` prop doc) must still finalize and parse. + const streaming = + turnStreaming && (part as { state?: string }).state === "streaming"; return ( - + ); } @@ -245,7 +275,11 @@ export function arePropsEqual( prev.signature === next.signature && prev.showCitations === next.showCitations && prev.neutralizeInternalLinks === next.neutralizeInternalLinks && - prev.assistantName === next.assistantName + prev.assistantName === next.assistantName && + // The turn-end flip re-renders every row once (cheap, terminal event) — + // that is what converts a stranded `state:"streaming"` reasoning part to + // its one-time markdown parse (see the `turnStreaming` prop doc). + prev.turnStreaming === next.turnStreaming ); } diff --git a/apps/client/src/features/ai-chat/components/message-list.test.tsx b/apps/client/src/features/ai-chat/components/message-list.test.tsx index b19470a0..20987a4a 100644 --- a/apps/client/src/features/ai-chat/components/message-list.test.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.test.tsx @@ -1,5 +1,5 @@ import { describe, expect, it, vi } from "vitest"; -import { render } from "@testing-library/react"; +import { fireEvent, render } from "@testing-library/react"; import { MantineProvider } from "@mantine/core"; import type { UIMessage } from "@ai-sdk/react"; @@ -50,8 +50,9 @@ vi.stubGlobal( // One assistant message wrapping the given `parts`. Reused across renders in the // regression test to model how the AI SDK hands back the SAME message object. -const msg = (parts: UIMessage["parts"]): UIMessage => - ({ id: "m1", role: "assistant", parts }) as UIMessage; +// Pass an explicit `id` when a test renders several rows at once. +const msg = (parts: UIMessage["parts"], id = "m1"): UIMessage => + ({ id, role: "assistant", parts }) as UIMessage; describe("MessageList", () => { it("wires the real MessageItem and supplies a valid signature end-to-end", () => { @@ -116,4 +117,102 @@ describe("MessageList", () => { renderChatMarkdownSpy.mock.calls.some((c) => c[0] === "streamed answer"), ).toBe(true); }); + + // REGRESSION (stranded reasoning part): the AI SDK sets a reasoning part's + // state to "done" ONLY on the `reasoning-end` chunk — `finish-step`/`finish` + // do NOT finalize it. A manual Stop during the thinking phase (or a provider + // that never emits `reasoning-end`) therefore leaves the part at + // `state:"streaming"` forever. MessageItem must derive ReasoningBlock's + // `streaming` from part state AND turn liveness (MessageList's `isStreaming`, + // forwarded as `turnStreaming`): while the turn streams the expanded block + // shows chunked plain text (no parse); once the turn ends — even though the + // part is still `state:"streaming"` — the block finalizes and does its + // one-time markdown parse. Note the message signature does NOT change across + // that flip, so this also exercises the `turnStreaming` memo comparison in + // arePropsEqual (without it the row would never re-render). + it("finalizes a reasoning part stranded at state:'streaming' when the turn ends", () => { + renderChatMarkdownSpy.mockClear(); + const reasoningText = "**bold** thinking"; + // Reasoning part stranded mid-stream + a non-empty answer part (a + // reasoning-only message renders nothing — see message-content.ts). + const message = msg([ + { type: "reasoning", text: reasoningText, state: "streaming" }, + { type: "text", text: "partial answer" }, + ]); + const parsesOfReasoning = () => + renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText) + .length; + + const { rerender, getByRole, queryByText } = render( + + + , + ); + // Expand the reasoning block (its toggle is the only button in the list). + fireEvent.click(getByRole("button")); + // Turn live + part streaming -> ReasoningBlock received streaming=true: + // the body is chunked plain text (raw markdown syntax), NOT parsed. + expect(queryByText(/bold/)).not.toBeNull(); + expect(parsesOfReasoning()).toBe(0); + + // The turn ends WITHOUT `reasoning-end`: the part object is untouched + // (still state:"streaming"), only the turn-level flag flips. + rerender( + + + , + ); + // ReasoningBlock now received streaming=false and did its one-time parse. + expect(parsesOfReasoning()).toBe(1); + }); + + // REGRESSION (turn-global liveness leaking into earlier rows): `isStreaming` + // is turn-global, so forwarding it to EVERY row would re-mark a reasoning + // part stranded at `state:"streaming"` in a PREVIOUS message (see the test + // above) as live again whenever a LATER turn streams — an expanded stranded + // block would flip markdown -> raw plain text -> markdown across turn + // boundaries, re-parsing each time. MessageList must gate `turnStreaming` + // to the TAIL row only. + it("keeps a stranded reasoning part in an earlier message finalized while a later turn streams", () => { + renderChatMarkdownSpy.mockClear(); + const reasoningText = "**bold** thinking"; + // First (earlier) assistant message: its turn was stopped during the + // thinking phase, leaving the reasoning part at state:"streaming". + const first = msg( + [ + { type: "reasoning", text: reasoningText, state: "streaming" }, + { type: "text", text: "first answer" }, + ], + "m1", + ); + // Second assistant message: the LATER turn, currently streaming. + const second = msg([{ type: "text", text: "second answer" }], "m2"); + const parsesOfReasoning = () => + renderChatMarkdownSpy.mock.calls.filter((c) => c[0] === reasoningText) + .length; + + const { rerender, getByRole, queryByText } = render( + + + , + ); + // Expand the first row's reasoning block (the only toggle in the list — + // the second message has no reasoning or tool parts). + fireEvent.click(getByRole("button")); + // The turn is live but the first row is NOT the tail: its ReasoningBlock + // received streaming=false, so the stranded part stays finalized and does + // its one-time markdown parse instead of dropping to chunked plain text. + expect(queryByText(/bold/)).not.toBeNull(); + expect(parsesOfReasoning()).toBe(1); + + // A later-turn delta re-renders the list; the earlier block must neither + // flip back to streaming nor re-parse. + (second.parts[0] as { text: string }).text = "second answer grows"; + rerender( + + + , + ); + expect(parsesOfReasoning()).toBe(1); + }); }); diff --git a/apps/client/src/features/ai-chat/components/message-list.tsx b/apps/client/src/features/ai-chat/components/message-list.tsx index 2cb2183c..25435aa5 100644 --- a/apps/client/src/features/ai-chat/components/message-list.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.tsx @@ -196,7 +196,7 @@ export default function MessageList({ return ( - {messages.map((message) => ( + {messages.map((message, index) => ( // `signature` is snapshotted HERE (parent render) into an immutable // string and handed to MessageItem as its memo key. It must NOT be // recomputed inside MessageItem's arePropsEqual: the AI SDK mutates the @@ -210,6 +210,13 @@ export default function MessageList({ showCitations={showCitations} neutralizeInternalLinks={neutralizeInternalLinks} assistantName={assistantName} + // Turn-level liveness, gated to the TAIL row: only the tail message + // can belong to the in-flight turn, so a reasoning part stranded at + // `state:"streaming"` in an EARLIER message (its turn ended without + // `reasoning-end`) stays finalized and doesn't flip back to plain + // text (and re-parse) whenever a later turn streams — see + // message-item.tsx. + turnStreaming={isStreaming && index === messages.length - 1} /> ))} {typing && ( diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx index ca3443fc..5754821d 100644 --- a/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx +++ b/apps/client/src/features/ai-chat/components/reasoning-block.test.tsx @@ -28,7 +28,11 @@ import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts"; // matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. -function renderBlock(props: { text: string; tokens?: number }) { +function renderBlock(props: { + text: string; + tokens?: number; + streaming?: boolean; +}) { return render( @@ -84,4 +88,54 @@ describe("ReasoningBlock", () => { fireEvent.click(screen.getByRole("button")); expect(renderSpy).toHaveBeenCalledTimes(1); }); + + it("does not parse while expanded and STREAMING; shows chunked plain text", () => { + const renderSpy = vi.mocked(renderChatMarkdown); + renderSpy.mockClear(); + renderBlock({ + text: "первый абзац размышлений\n\nвторой абзац растёт", + tokens: 5, + streaming: true, + }); + fireEvent.click(screen.getByRole("button")); + // Expanded + still streaming: NO markdown parse and NO innerHTML swaps per + // delta — the body is chunked plain text (only the tail chunk updates). + // This is the O(n²) hole #302 left open (Safari whole-tab freeze). + expect(renderSpy).not.toHaveBeenCalled(); + // Both paragraph chunks' raw text is present in the body. + expect(screen.getByText(/первый абзац размышлений/)).toBeDefined(); + expect(screen.getByText(/второй абзац растёт/)).toBeDefined(); + }); + + it("parses exactly once when streaming flips to done while expanded", () => { + const renderSpy = vi.mocked(renderChatMarkdown); + renderSpy.mockClear(); + const { rerender } = renderBlock({ + text: "**bold** reasoning", + tokens: 5, + streaming: true, + }); + fireEvent.click(screen.getByRole("button")); + expect(renderSpy).not.toHaveBeenCalled(); + + // Finalization: the part's state flips streaming→done, the parent + // re-renders the row (the flip changes the message signature), and the + // block does its ONE markdown parse of the now-stable text. + rerender( + + + , + ); + expect(renderSpy).toHaveBeenCalledTimes(1); + // The parsed html branch rendered (the mock wraps the input in

). + expect(screen.getByText(/reasoning/)).toBeDefined(); + + // Further re-renders with unchanged props do not re-parse. + rerender( + + + , + ); + expect(renderSpy).toHaveBeenCalledTimes(1); + }); }); diff --git a/apps/client/src/features/ai-chat/components/reasoning-block.tsx b/apps/client/src/features/ai-chat/components/reasoning-block.tsx index 25cc7459..8156730d 100644 --- a/apps/client/src/features/ai-chat/components/reasoning-block.tsx +++ b/apps/client/src/features/ai-chat/components/reasoning-block.tsx @@ -5,6 +5,7 @@ import { useTranslation } from "react-i18next"; import { estimateTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import { collapseBlankLines } from "@/features/ai-chat/utils/collapse-blank-lines.ts"; import { renderChatMarkdown } from "@/features/ai-chat/utils/markdown.ts"; +import { StreamingPlainText } from "@/features/ai-chat/components/streaming-plain-text.tsx"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; interface ReasoningBlockProps { @@ -15,6 +16,10 @@ interface ReasoningBlockProps { * step/turn has finished. When absent (or 0) the count is estimated from the * text length so it ticks live as the reasoning streams in. */ tokens?: number; + /** True while the reasoning part is still streaming (part `state === + * "streaming"`). False means finalized: persisted history or `state === + * "done"`. Gates the markdown parse — see the invariant on the memo below. */ + streaming?: boolean; } /** @@ -27,26 +32,30 @@ interface ReasoningBlockProps { * Providers that don't stream reasoning TEXT still render this block from the * authoritative count alone (header only, empty body) so the cost is visible. */ -function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { +function ReasoningBlock({ text, tokens, streaming = false }: ReasoningBlockProps) { const { t } = useTranslation(); const [open, setOpen] = useState(false); // Authoritative count wins; otherwise estimate live from the streamed text. const count = tokens && tokens > 0 ? tokens : estimateTokens(text); const trimmed = text.trim(); - // Parse the reasoning markdown ONLY while the block is expanded. Collapsed is the - // default and the common case during a long "thinking" stream: reasoning text - // streams in and grows with every throttled delta (~20Hz), so a `[trimmed]`-only - // memo re-parses the whole, ever-growing text (marked + DOMPurify) on every delta - // — an O(n²) storm that pins the main thread and freezes the chat, all for a block - // the user isn't even looking at (the html is only shown inside - // below). Gating on `open` skips that hidden parsing entirely; expanding parses the - // current text once (an instant, user-initiated click), and further streaming while - // open is the normal per-delta append render, like the answer. + // Markdown parse invariant (per throttled ~20Hz stream delta the text GROWS): + // 1. Collapsed -> never parse (#302): the html is only shown inside + // , so parsing for a hidden body would be an O(n²) + // marked + DOMPurify storm. + // 2. Expanded + STREAMING -> no parse and no innerHTML swaps either: the body + // renders as chunked plain text (StreamingPlainText) with a memoized + // stable prefix, so each delta updates only the tail chunk's text node. + // This closes the O(n²) hole #302 left open ("expanded while streaming") + // that froze the whole tab in Safari when watching the thinking stream. + // 3. Finalized + expanded -> exactly one parse: `trimmed` and `streaming` + // are stable after the part is done, so this memo runs once per expand. const html = useMemo( () => - open && trimmed ? renderChatMarkdown(collapseBlankLines(trimmed), {}) : "", - [open, trimmed], + open && trimmed && !streaming + ? renderChatMarkdown(collapseBlankLines(trimmed), {}) + : "", + [open, trimmed, streaming], ); return ( @@ -83,12 +92,12 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { dangerouslySetInnerHTML={{ __html: html }} /> ) : ( - - {trimmed} - + // Still streaming (or markdown yielded nothing): chunked plain text. + // The wrapper carries the reasoningText styling; each chunk sets its + // own pre-wrap inline (NOT on this div — see ai-chat.module.css). +
+ +
)}
)} @@ -96,7 +105,7 @@ function ReasoningBlock({ text, tokens }: ReasoningBlockProps) { ); } -// Memoized: re-renders only when `text`/`tokens` change (primitive props, default -// shallow compare), so a parent re-render during streaming of OTHER content does -// not re-run the markdown parse for an already-finalized reasoning block. +// Memoized: re-renders only when `text`/`tokens`/`streaming` change (primitive +// props, default shallow compare), so a parent re-render during streaming of OTHER +// content does not re-run the markdown parse for an already-finalized reasoning block. export default memo(ReasoningBlock); diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx new file mode 100644 index 00000000..499c6ee7 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx @@ -0,0 +1,121 @@ +import { describe, it, expect } from "vitest"; +import { render } from "@testing-library/react"; + +import { + splitPlainChunks, + StreamingPlainText, +} from "./streaming-plain-text"; + +describe("splitPlainChunks", () => { + // THE load-bearing property (see the invariant comment in the module): under + // append-only growth, every chunk except the LAST must be byte-identical + // between successive calls, so the memoized chunk components never re-render + // for the stable prefix and each stream delta touches only the tail chunk. + it("keeps all non-last chunks byte-identical across append-only growth", () => { + // A simulated reasoning stream covering: appends inside the last paragraph, + // appends that ADD new blank lines, growth of a trailing newline run, and a + // trailing separator later followed by text. + const steps = [ + "Пер", + "Первый абзац", + "Первый абзац\n", + "Первый абзац\n\n", + "Первый абзац\n\n\n", + "Первый абзац\n\n\nВторой", + "Первый абзац\n\n\nВторой абзац растёт", + "Первый абзац\n\n\nВторой абзац растёт\n\nТретий", + "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\n", + "Первый абзац\n\n\nВторой абзац растёт\n\nТретий абзац\n\nЧетвёртый", + ]; + let prev: string[] = []; + for (const text of steps) { + const next = splitPlainChunks(text); + // Lossless: chunks always reassemble into the exact input. + expect(next.join("")).toBe(text); + // Chunk count never shrinks (boundaries never disappear). + expect(next.length).toBeGreaterThanOrEqual(prev.length); + // Every previously-FINAL chunk (all but prev's last) is unchanged. + for (let i = 0; i < prev.length - 1; i++) { + expect(next[i]).toBe(prev[i]); + } + prev = next; + } + // Guard against a vacuous pass: the final split must be multi-chunk. + expect(prev.length).toBeGreaterThanOrEqual(4); + }); + + it("attaches the blank-line separator run to the preceding chunk", () => { + expect(splitPlainChunks("a\n\nb")).toEqual(["a\n\n", "b"]); + // A longer run is ONE separator, not several boundaries. + expect(splitPlainChunks("a\n\n\n\nb")).toEqual(["a\n\n\n\n", "b"]); + expect(splitPlainChunks("a\n\nb\n\n\nc")).toEqual(["a\n\n", "b\n\n\n", "c"]); + }); + + it("single newlines are not boundaries", () => { + expect(splitPlainChunks("a\nb\nc")).toEqual(["a\nb\nc"]); + }); + + // INTENTIONAL: CRLF blank lines are NOT boundaries (the regex is `\n{2,}` + // only). Supporting `(?:\r?\n){2,}` would break the stable-prefix invariant: + // a lone trailing `\r` is not a boundary, but a later-appended `\n` would + // merge with it into a new separator unit and retroactively create a boundary + // INSIDE previously-emitted text, moving old chunk edges. So CRLF input stays + // in one (still lossless) chunk — only granularity is coarser; LLM output is + // `\n` in practice. See the doc comment on splitPlainChunks. + it("keeps CRLF blank lines inside one chunk", () => { + expect(splitPlainChunks("a\r\n\r\nb")).toEqual(["a\r\n\r\nb"]); + // Mixed input: only pure-`\n` runs split. + expect(splitPlainChunks("a\r\n\r\nb\n\nc")).toEqual(["a\r\n\r\nb\n\n", "c"]); + }); + + it("never emits empty phantom chunks (multi-blank-line / trailing newlines)", () => { + expect(splitPlainChunks("")).toEqual([]); + // A trailing newline run stays inside the last chunk (it may still grow). + expect(splitPlainChunks("a\n")).toEqual(["a\n"]); + expect(splitPlainChunks("a\n\n")).toEqual(["a\n\n"]); + expect(splitPlainChunks("a\n\nb\n\n")).toEqual(["a\n\n", "b\n\n"]); + // Degenerate all-newlines input is a single deterministic chunk. + expect(splitPlainChunks("\n\n\n")).toEqual(["\n\n\n"]); + for (const text of ["a\n\n\nb\n\n", "x\n\n\n\n\ny\n\nz\n"]) { + for (const chunk of splitPlainChunks(text)) { + expect(chunk.length).toBeGreaterThan(0); + } + } + }); +}); + +describe("StreamingPlainText", () => { + it("renders one block per chunk, stripping trailing separator newlines at display time", () => { + const text = "первый абзац\n\nвторой абзац\n\n\nтретий"; + const { container } = render(); + const blocks = Array.from(container.querySelectorAll("div")); + // One block element per chunk. + expect(blocks.length).toBe(splitPlainChunks(text).length); + // DISPLAY-ONLY strip: each rendered block drops its chunk's trailing + // separator newlines — rendering them inside a pre-wrap block would add an + // empty line ON TOP of the block break (a doubled gap). The RAW chunks + // keep their separators (losslessness is asserted on splitPlainChunks + // above); multi-blank-line runs collapse to one uniform gap, consistent + // with collapseBlankLines on the finalized markdown path. + expect(blocks.map((b) => b.textContent)).toEqual([ + "первый абзац", + "второй абзац", + "третий", + ]); + // The uniform paragraph gap comes from the block margin instead (matches + // the `.reasoningText p { margin: 0 0 4px }` rhythm of the markdown path). + for (const block of blocks) { + expect((block as HTMLElement).style.marginBottom).toBe("4px"); + } + }); + + it("keeps interior newlines intact — only the trailing run is stripped", () => { + const text = "строка один\nстрока два\n\nхвост"; + const { container } = render(); + const blocks = Array.from(container.querySelectorAll("div")); + expect(blocks.map((b) => b.textContent)).toEqual([ + "строка один\nстрока два", + "хвост", + ]); + }); +}); diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx new file mode 100644 index 00000000..bc72d790 --- /dev/null +++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.tsx @@ -0,0 +1,90 @@ +import { memo, useMemo } from "react"; + +/** + * Split plain text into chunks at blank-line (paragraph) boundaries, keeping + * each separator run attached to the END of the preceding chunk, so the chunks + * always reassemble byte-for-byte into the input. + * + * A boundary is the end of a maximal `\n{2,}` run that is followed by at least + * one more character. A newline run that is a SUFFIX of the text is NOT a + * boundary yet: under append-only growth it may still gain more newlines, and + * cutting there would move the boundary on the next call. + * + * CRITICAL INVARIANT (load-bearing for StreamingPlainText's memoization): for + * APPEND-ONLY growth of `text`, every chunk except the LAST is byte-identical + * between successive calls — previously-emitted boundaries never move. Proof + * sketch: appending never modifies existing characters, so (a) an existing + * boundary's newline run and its following character are untouched and the + * boundary persists at the same offset; (b) no NEW boundary can appear strictly + * inside the old text, because a `\n{2,}` run followed by a character entirely + * within the old text would already have been a boundary. New boundaries can + * only materialize at or after the old text's end, i.e. inside the last chunk. + * + * CRLF is deliberately NOT a boundary: supporting `(?:\r?\n){2,}` would BREAK + * the invariant above — a lone trailing `\r` is not a boundary, but a later- + * appended `\n` would merge with it into a new separator unit and retroactively + * create a boundary INSIDE previously-emitted text, moving old chunk edges. + * With `\n`-only runs, appended characters can never extend a run that is + * already followed by a non-`\n` character, so old boundaries are immutable. + * CRLF blank lines therefore intentionally stay inside one chunk: correctness/ + * losslessness are unaffected, only chunk granularity for CRLF input (LLM + * output is `\n` in practice). + */ +export function splitPlainChunks(text: string): string[] { + const chunks: string[] = []; + let start = 0; + for (const match of text.matchAll(/\n{2,}/g)) { + const end = match.index + match[0].length; + // Suffix run: not a stable boundary yet (see the invariant above). + if (end >= text.length) break; + chunks.push(text.slice(start, end)); + start = end; + } + if (start < text.length) chunks.push(text.slice(start)); + return chunks; +} + +/** + * One immutable chunk. Memoized on its string prop: during streaming only the + * TAIL chunk's text changes (see the splitPlainChunks invariant), so React + * skips every stable chunk and the per-delta DOM work is a single text-node + * update. `pre-wrap` is set per chunk (like the old raw-text fallback did), NOT + * on the surrounding markdown-styled container — see the note in + * ai-chat.module.css. Font/size/color are inherited from that container. + * + * DISPLAY-ONLY newline strip: the raw chunk keeps its trailing `\n{2,}` + * separator run attached (the splitPlainChunks invariant, load-bearing for the + * memo), but rendering those newlines inside a pre-wrap block would add an + * empty line ON TOP of the block break — a doubled gap. So the RENDERED string + * drops trailing newlines and the paragraph gap comes from `marginBottom: 4` + * instead, matching the `.reasoningText p { margin: 0 0 4px }` rhythm of the + * finalized markdown. Multi-blank-line runs thus collapse to one uniform gap, + * consistent with `collapseBlankLines` on the markdown path. The last chunk + * usually has no trailing newlines (strip is a no-op); its margin is harmless. + */ +const PlainChunk = memo(function PlainChunk({ text }: { text: string }) { + return ( +
+ {text.replace(/\n+$/, "")} +
+ ); +}); + +/** + * Renders still-streaming plain text as a list of paragraph chunks where only + * the tail chunk changes per delta. No markdown, no sanitizer, no innerHTML — + * this is the cheap streaming-time stand-in for the one-time markdown parse + * that happens after the part is finalized (see reasoning-block.tsx). + */ +export function StreamingPlainText({ text }: { text: string }) { + const chunks = useMemo(() => splitPlainChunks(text), [text]); + return ( + <> + {chunks.map((chunk, index) => ( + // Index keys are stable here: chunks are append-only (the invariant), + // so an index never gets a different chunk's content mid-stream. + + ))} + + ); +} From d4d05c8e8b0f5e05c2a5b5840c570543e4c4e7f3 Mon Sep 17 00:00:00 2001 From: agent_vscode Date: Sat, 4 Jul 2026 03:51:22 +0300 Subject: [PATCH 2/3] test(ai-chat): add dev-only perf harness for the chat stream pipeline Mounts the real ChatThread against a synthetic AI SDK v6 UI-message SSE stream (multi-step reasoning + getPage tool calls + markdown answer; 5k/20k/50k-token presets, 15/5 ms chunk cadence) with long-task, FPS and mount-time instrumentation. Two scenarios: mount a persisted transcript (open-chat cost) and stream a live turn through the real useChat pipeline via a window.fetch patch scoped to /api/ai-chat/stream. Served only by the vite dev server at /perf/ai-chat-perf.html; the production build keeps its single index.html entry, so none of this ships. Also ignore local trace dumps under .claude/perf-traces/. Co-Authored-By: Claude Opus 4.8 (1M context) --- .gitignore | 2 + apps/client/perf/ai-chat-perf-main.tsx | 50 +++ apps/client/perf/ai-chat-perf.html | 12 + apps/client/perf/harness.tsx | 390 +++++++++++++++++++ apps/client/perf/synthetic-turn.ts | 517 +++++++++++++++++++++++++ 5 files changed, 971 insertions(+) create mode 100644 apps/client/perf/ai-chat-perf-main.tsx create mode 100644 apps/client/perf/ai-chat-perf.html create mode 100644 apps/client/perf/harness.tsx create mode 100644 apps/client/perf/synthetic-turn.ts diff --git a/.gitignore b/.gitignore index cf440100..4eb9e6fd 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,8 @@ lerna-debug.log* .nx/cache .claude/worktrees/ .claude/tmp/ +# Local Chrome performance traces recorded by the AI-chat perf harness +.claude/perf-traces/ # TypeScript incremental build artifacts *.tsbuildinfo diff --git a/apps/client/perf/ai-chat-perf-main.tsx b/apps/client/perf/ai-chat-perf-main.tsx new file mode 100644 index 00000000..0c75f68c --- /dev/null +++ b/apps/client/perf/ai-chat-perf-main.tsx @@ -0,0 +1,50 @@ +/** + * DEV-ONLY entry for the AI chat perf harness (served by the vite dev server at + * /perf/ai-chat-perf.html; never part of the production build, which uses the + * single default index.html entry). + * + * Mounts the minimal provider stack the real ChatThread needs (Mantine, router + * for tool-card Links, react-query, i18n) and patches `window.fetch` BEFORE + * React mounts so ChatThread's DefaultChatTransport requests to + * /api/ai-chat/stream are answered by the synthetic SSE generator. + */ + +import "@mantine/core/styles.css"; + +import ReactDOM from "react-dom/client"; +import { MantineProvider } from "@mantine/core"; +import { MemoryRouter } from "react-router-dom"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { mantineCssResolver, theme } from "../src/theme.ts"; +// i18n side-effect init (http-backend). Translations load from /locales in dev; +// missing keys fall back to the key text, which is fine for the harness. +import "../src/i18n.ts"; +import { installAiChatStreamFetchPatch } from "./synthetic-turn.ts"; +import PerfHarness from "./harness.tsx"; + +// MUST run before React mounts: ChatThread creates its transport with the +// global fetch, so the patch has to be in place before the first send. +installAiChatStreamFetchPatch(); + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + refetchOnMount: false, + refetchOnWindowFocus: false, + retry: false, + staleTime: 5 * 60 * 1000, + }, + }, +}); + +const container = document.getElementById("root") as HTMLElement; + +ReactDOM.createRoot(container).render( + + + + + + + , +); diff --git a/apps/client/perf/ai-chat-perf.html b/apps/client/perf/ai-chat-perf.html new file mode 100644 index 00000000..5509160b --- /dev/null +++ b/apps/client/perf/ai-chat-perf.html @@ -0,0 +1,12 @@ + + + + + + AI chat perf harness + + +
+ + + diff --git a/apps/client/perf/harness.tsx b/apps/client/perf/harness.tsx new file mode 100644 index 00000000..32af237d --- /dev/null +++ b/apps/client/perf/harness.tsx @@ -0,0 +1,390 @@ +/** + * DEV-ONLY perf harness UI for the AI chat feature. + * + * Left panel: controls + live stats. Right side: a bordered box (~real chat + * window size) hosting the REAL ChatThread component. + * + * Scenario A "Open existing chat": mount ChatThread seeded with a large + * persisted transcript and measure click -> post-mount-paint time. + * Scenario B "Live agent stream": mount an empty chat and auto-send a message; + * the fetch patch (see synthetic-turn.ts) answers with a synthetic SSE stream + * through the real useChat pipeline. + */ + +import { useEffect, useMemo, useRef, useState } from "react"; +import type { CSSProperties, MutableRefObject } from "react"; +import ChatThread from "../src/features/ai-chat/components/chat-thread.tsx"; +import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts"; +import { + PRESETS, + buildPersistedRows, + buildTurnScript, + setLiveStreamSettings, + type PresetKey, +} from "./synthetic-turn.ts"; + +const AUTO_SEND_TEXT = "Run the synthetic perf turn"; +const AUTO_SEND_TIMEOUT_MS = 1000; +/** Stats display refresh period — 2x/s so the display itself stays cheap. */ +const STATS_FLUSH_MS = 500; + +// --------------------------------------------------------------------------- +// Shared mutable stats (written from callbacks, flushed to state at 2 Hz) +// --------------------------------------------------------------------------- + +interface PerfStats { + longtaskCount: number; + longtaskTotalMs: number; + longtaskMaxMs: number; + fps: number; + sseChunks: number; + sseChars: number; + mountAMs: number | null; + streamState: "idle" | "streaming" | "done" | "aborted"; +} + +function emptyStats(): PerfStats { + return { + longtaskCount: 0, + longtaskTotalMs: 0, + longtaskMaxMs: 0, + fps: 0, + sseChunks: 0, + sseChars: 0, + mountAMs: null, + streamState: "idle", + }; +} + +/** + * Self-contained stats panel: owns the longtask observer, the FPS meter and the + * 2 Hz flush interval. Isolated in its OWN component so its periodic setState + * re-renders only this panel — NOT the ChatThread under measurement. + */ +function StatsPanel({ stats }: { stats: MutableRefObject }) { + const [snapshot, setSnapshot] = useState(() => ({ ...stats.current })); + + // Long tasks (main-thread blocks > 50ms). + useEffect(() => { + let observer: PerformanceObserver | null = null; + try { + observer = new PerformanceObserver((list) => { + for (const entry of list.getEntries()) { + stats.current.longtaskCount += 1; + stats.current.longtaskTotalMs += entry.duration; + stats.current.longtaskMaxMs = Math.max(stats.current.longtaskMaxMs, entry.duration); + } + }); + observer.observe({ type: "longtask", buffered: true }); + } catch { + // longtask entries unsupported in this browser — panel shows zeros. + } + return () => observer?.disconnect(); + }, [stats]); + + // FPS: frames rendered within the trailing 1s window. + useEffect(() => { + let raf = 0; + const frames: number[] = []; + const loop = (now: number) => { + frames.push(now); + while (frames.length > 0 && frames[0] <= now - 1000) frames.shift(); + stats.current.fps = frames.length; + raf = requestAnimationFrame(loop); + }; + raf = requestAnimationFrame(loop); + return () => cancelAnimationFrame(raf); + }, [stats]); + + // Flush the mutable stats into the display at most 2x/s. + useEffect(() => { + const id = window.setInterval(() => setSnapshot({ ...stats.current }), STATS_FLUSH_MS); + return () => window.clearInterval(id); + }, [stats]); + + const resetLongtasks = () => { + stats.current.longtaskCount = 0; + stats.current.longtaskTotalMs = 0; + stats.current.longtaskMaxMs = 0; + setSnapshot({ ...stats.current }); + }; + + const row: CSSProperties = { display: "flex", justifyContent: "space-between", gap: 8 }; + return ( +
+
Stats
+
FPS (1s){snapshot.fps}
+
Long tasks{snapshot.longtaskCount}
+
Long total{snapshot.longtaskTotalMs.toFixed(0)} ms
+
Long max{snapshot.longtaskMaxMs.toFixed(0)} ms
+
SSE chunks{snapshot.sseChunks}
+
SSE chars{snapshot.sseChars.toLocaleString()}
+
Stream{snapshot.streamState}
+
+ Mount A + {snapshot.mountAMs === null ? "—" : `${snapshot.mountAMs.toFixed(0)} ms`} +
+ +
+ ); +} + +// --------------------------------------------------------------------------- +// Auto-send (scenario B): drive the REAL composer in the mounted DOM +// --------------------------------------------------------------------------- + +/** + * Fill the composer textarea via the native value setter + an `input` event + * (React 18 controlled-input pattern), then click the enabled "Send" button. + * Retried on rAF until the elements exist (ChatThread mounts asynchronously). + */ +function autoSend(host: HTMLElement, text: string): void { + const deadline = performance.now() + AUTO_SEND_TIMEOUT_MS; + + const tryClick = () => { + const button = host.querySelector('button[aria-label="Send"]'); + if (button && !button.disabled) { + button.click(); + return; + } + if (performance.now() < deadline) requestAnimationFrame(tryClick); + else console.error("[perf] auto-send: Send button never became clickable"); + }; + + const trySetValue = () => { + const textarea = host.querySelector("textarea"); + if (!textarea) { + if (performance.now() < deadline) requestAnimationFrame(trySetValue); + else console.error("[perf] auto-send: textarea not found"); + return; + } + const setter = Object.getOwnPropertyDescriptor( + window.HTMLTextAreaElement.prototype, + "value", + )?.set; + setter?.call(textarea, text); + textarea.dispatchEvent(new Event("input", { bubbles: true })); + // Click on a later frame so React commits the controlled value (which + // enables the Send button) before we press it. + requestAnimationFrame(tryClick); + }; + + requestAnimationFrame(trySetValue); +} + +// --------------------------------------------------------------------------- +// Harness +// --------------------------------------------------------------------------- + +interface MountState { + mode: "A" | "B"; + key: number; + chatId: string | null; + rows: IAiChatMessageRow[]; +} + +const noop = (): void => {}; + +export default function PerfHarness() { + const [preset, setPreset] = useState("20k"); + const [intervalMs, setIntervalMs] = useState(15); + const [mounted, setMounted] = useState(null); + const [fixtureInfo, setFixtureInfo] = useState(null); + + const statsRef = useRef(emptyStats()); + const hostRef = useRef(null); + const keyCounterRef = useRef(0); + const mountStartRef = useRef(0); + const pendingMountMeasureRef = useRef(false); + + // The scripted live turn for the current preset (reused across B runs; the + // script is immutable data, so rebuilding per run is unnecessary). + const liveScript = useMemo(() => buildTurnScript(PRESETS[preset], "live"), [preset]); + + const openPage = useMemo(() => ({ id: "page-1", title: "Perf test page" }), []); + + // Scenario A: mount ChatThread seeded with a large persisted transcript. + const handleMountA = () => { + const fixture = buildPersistedRows(PRESETS[preset]); + setFixtureInfo( + `Persisted fixture: ${fixture.rows.length} rows, ` + + `${fixture.totalChars.toLocaleString()} chars ≈ ${fixture.approxTokens.toLocaleString()} tokens`, + ); + statsRef.current.mountAMs = null; + // Mark AFTER fixture generation: we measure mount cost, not generation cost + // (production receives its rows from the network). + performance.mark("perf:mountA:start"); + mountStartRef.current = performance.now(); + pendingMountMeasureRef.current = true; + keyCounterRef.current += 1; + setMounted({ mode: "A", key: keyCounterRef.current, chatId: "perf-chat", rows: fixture.rows }); + }; + + // Measure scenario A: effect runs after the mount commit; double rAF lands + // after the first paint of the mounted transcript. + useEffect(() => { + if (!pendingMountMeasureRef.current) return; + pendingMountMeasureRef.current = false; + requestAnimationFrame(() => { + requestAnimationFrame(() => { + statsRef.current.mountAMs = performance.now() - mountStartRef.current; + performance.mark("perf:mountA:end"); + try { + performance.measure("perf:mountA", "perf:mountA:start", "perf:mountA:end"); + } catch { + // Marks cleared mid-run — ignore. + } + }); + }); + }, [mounted]); + + // Scenario B: mount an empty chat, arm the synthetic stream, auto-send. + const handleStartB = () => { + statsRef.current.sseChunks = 0; + statsRef.current.sseChars = 0; + statsRef.current.streamState = "streaming"; + setLiveStreamSettings({ + script: liveScript, + chunkIntervalMs: intervalMs, + onProgress: (chunks, chars) => { + statsRef.current.sseChunks = chunks; + statsRef.current.sseChars = chars; + }, + onDone: () => { + statsRef.current.streamState = "done"; + performance.mark("perf:streamB:end"); + try { + performance.measure("perf:streamB", "perf:streamB:start", "perf:streamB:end"); + } catch { + // Start mark missing (e.g. marks cleared) — ignore. + } + }, + onAbort: () => { + statsRef.current.streamState = "aborted"; + }, + }); + performance.mark("perf:streamB:start"); + keyCounterRef.current += 1; + setMounted({ mode: "B", key: keyCounterRef.current, chatId: null, rows: [] }); + if (hostRef.current) autoSend(hostRef.current, AUTO_SEND_TEXT); + }; + + const handleUnmount = () => setMounted(null); + + const label: CSSProperties = { display: "block", fontSize: 12, margin: "10px 0 2px" }; + const button: CSSProperties = { display: "block", width: "100%", margin: "6px 0", padding: "6px 8px" }; + + return ( +
+ {/* Left: controls + stats */} +
+
AI chat perf harness
+ + + + + + + +
+ + + +
+ +
+
+ Live turn: {liveScript.totalChars.toLocaleString()} chars ≈{" "} + {liveScript.approxTokens.toLocaleString()} tokens +
+ {fixtureInfo &&
{fixtureInfo}
} + {mounted && ( +
+ Mounted: scenario {mounted.mode} (key {mounted.key}) +
+ )} +
+ +
+ +
+ + {/* Right: the real ChatThread inside a real-window-sized box */} +
+
+ {mounted ? ( + + ) : ( +
+ ChatThread unmounted. Use the controls on the left. +
+ )} +
+
+
+ ); +} diff --git a/apps/client/perf/synthetic-turn.ts b/apps/client/perf/synthetic-turn.ts new file mode 100644 index 00000000..439a5ab0 --- /dev/null +++ b/apps/client/perf/synthetic-turn.ts @@ -0,0 +1,517 @@ +/** + * DEV-ONLY synthetic agent-turn generator for the AI chat perf harness. + * + * Produces one scripted agent turn (reasoning + tool calls + markdown answer) + * from a size config, and materializes it two ways: + * - as an AI SDK v6 UI-message SSE stream (scenario B "live agent stream"), + * served by a `window.fetch` patch that intercepts `/api/ai-chat/stream`; + * - as persisted `IAiChatMessageRow[]` history (scenario A "open existing chat"). + * + * Wire format verified against the installed ai@6.0.207 `uiMessageChunkSchema` + * (strict objects — only the exact field names below are accepted). + */ + +import type { UIMessage } from "@ai-sdk/react"; +import type { IAiChatMessageRow } from "../src/features/ai-chat/types/ai-chat.types.ts"; + +// --------------------------------------------------------------------------- +// Config / presets +// --------------------------------------------------------------------------- + +/** 1 token ~= 4 chars — the approximation used throughout this module. */ +const CHARS_PER_TOKEN = 4; + +export interface TurnConfig { + /** Number of agent steps; each step = one reasoning block + one tool call. */ + steps: number; + /** Approximate reasoning tokens generated per step. */ + reasoningTokensPerStep: number; + /** Size of each tool call's output `content` filler, in bytes (ASCII). */ + toolOutputBytes: number; + /** Approximate size of the final markdown answer, in tokens. */ + answerTokens: number; +} + +export type PresetKey = "5k" | "20k" | "50k"; + +export const PRESETS: Record = { + "5k": { + steps: 3, + reasoningTokensPerStep: 500, + toolOutputBytes: 10_000, + answerTokens: 600, + }, + "20k": { + steps: 6, + reasoningTokensPerStep: 2500, + toolOutputBytes: 20_000, + answerTokens: 1500, + }, + "50k": { + steps: 10, + reasoningTokensPerStep: 4000, + toolOutputBytes: 40_000, + answerTokens: 3000, + }, +}; + +// --------------------------------------------------------------------------- +// Text generators +// --------------------------------------------------------------------------- + +/** Mixed Russian/English prose sentences cycled to build reasoning text. */ +const REASONING_SENTENCES = [ + "Пользователь просит проанализировать документ и выделить ключевые тезисы по каждому разделу.", + "First I need to inspect the current page content to understand its overall structure.", + "Судя по оглавлению, раздел с техническими требованиями находится ближе к концу документа.", + "The table in section three contains the migration matrix that I should cross-check against the summary.", + "Проверю, нет ли противоречий между описанием API и приведёнными в тексте примерами вызовов.", + "Let me compare the numbers from the executive summary with the raw data in the appendix.", + "Похоже, автор использует термины «воркспейс» и workspace взаимозаменяемо — это стоит нормализовать.", + "I should keep the page ids from the tool output so the final answer can cite the source pages.", + "Осталось свести найденные несоответствия в одну таблицу и предложить порядок исправлений.", + "The remaining sections look consistent, so I can move on to drafting the structured answer.", +]; + +/** + * Build realistic prose of ~`targetChars` characters, inserting a newline + * roughly every 200 characters (mirrors how reasoning text tends to wrap). + */ +function makeProse(targetChars: number): string { + const pieces: string[] = []; + let length = 0; + let sinceNewline = 0; + let i = 0; + while (length < targetChars) { + const sentence = REASONING_SENTENCES[i % REASONING_SENTENCES.length]; + i += 1; + pieces.push(sentence); + length += sentence.length + 1; + sinceNewline += sentence.length + 1; + if (sinceNewline >= 200) { + pieces.push("\n"); + sinceNewline = 0; + } else { + pieces.push(" "); + } + } + return pieces.join("").trimEnd(); +} + +/** One markdown section (~700 chars): heading, prose, bullets, GFM table, code. */ +function markdownSection(n: number): string { + return [ + `## Section ${n}: migration analysis`, + ``, + `The workspace contains **${n * 12} pages** that still reference the legacy API. ` + + `Most of them live under [Perf test page](/p/page-1) and need the new transport. ` + + `Ниже приведена сводка по разделу с оценкой трудозатрат и основных рисков.`, + ``, + `- Update the fetch layer to the v6 transport`, + `- Перенести таблицы соответствия идентификаторов`, + `- Verify citation links after the move`, + `- Проверить отображение длинных ответов в узкой панели`, + ``, + `| Область | Страниц | Статус | Риск |`, + `| --- | --- | --- | --- |`, + `| API reference | ${n + 4} | migrated | low |`, + `| Onboarding | ${n + 2} | in progress | medium |`, + `| Release notes | ${n * 3} | pending | high |`, + ``, + "```ts", + `export function migrateSection${n}(rows: Row[]): Row[] {`, + ` return rows`, + ` .filter((row) => row.section === ${n})`, + ` .map((row) => ({ ...row, migrated: true }));`, + `}`, + "```", + ].join("\n"); +} + +/** Realistic markdown answer of ~`targetChars` chars (sections repeated to size). */ +function makeMarkdownAnswer(targetChars: number): string { + const sections: string[] = []; + let length = 0; + let n = 1; + while (length < targetChars) { + const section = markdownSection(n); + sections.push(section); + length += section.length + 2; + n += 1; + } + return sections.join("\n\n"); +} + +/** Plain ASCII filler of exactly `bytes` characters for tool outputs. */ +function makeFiller(bytes: number): string { + const unit = "Perf filler content for the synthetic getPage tool output. "; + return unit.repeat(Math.ceil(bytes / unit.length)).slice(0, bytes); +} + +// --------------------------------------------------------------------------- +// Turn script +// --------------------------------------------------------------------------- + +export interface TurnToolCall { + toolCallId: string; + toolName: "getPage"; + input: { pageId: string }; + output: { id: string; title: string; content: string }; +} + +export interface TurnStep { + reasoningText: string; + tool: TurnToolCall; +} + +export interface TurnScript { + steps: TurnStep[]; + answerText: string; + /** Approximate reasoning tokens for the whole turn (chars / 4). */ + reasoningTokens: number; + /** Approximate context size after this turn, in tokens. */ + contextTokens: number; + maxContextTokens: number; + /** Actual generated visible chars: reasoning + tool outputs + answer. */ + totalChars: number; + /** totalChars / 4, rounded. */ + approxTokens: number; +} + +/** + * Build the scripted agent turn for a config. `idPrefix` keeps tool call ids + * unique when several scripts coexist (e.g. 3 persisted turns in one chat). + */ +export function buildTurnScript(config: TurnConfig, idPrefix = "live"): TurnScript { + const steps: TurnStep[] = []; + let reasoningChars = 0; + let toolChars = 0; + for (let i = 0; i < config.steps; i++) { + const reasoningText = makeProse(config.reasoningTokensPerStep * CHARS_PER_TOKEN); + const content = makeFiller(config.toolOutputBytes); + reasoningChars += reasoningText.length; + toolChars += content.length; + steps.push({ + reasoningText, + tool: { + toolCallId: `${idPrefix}-call-${i + 1}`, + toolName: "getPage", + input: { pageId: "page-1" }, + output: { id: "page-1", title: "Perf test page", content }, + }, + }); + } + const answerText = makeMarkdownAnswer(config.answerTokens * CHARS_PER_TOKEN); + const totalChars = reasoningChars + toolChars + answerText.length; + return { + steps, + answerText, + reasoningTokens: Math.round(reasoningChars / CHARS_PER_TOKEN), + contextTokens: Math.round(totalChars / CHARS_PER_TOKEN), + maxContextTokens: 200_000, + totalChars, + approxTokens: Math.round(totalChars / CHARS_PER_TOKEN), + }; +} + +// --------------------------------------------------------------------------- +// Scenario A: persisted rows +// --------------------------------------------------------------------------- + +/** Number of user+assistant pairs the preset is split across for history. */ +const HISTORY_TURNS = 3; + +const USER_PROMPTS = [ + "Проанализируй документ и выдели ключевые тезисы по каждому разделу.", + "Now cross-check the migration matrix against the summary and list every mismatch.", + "Собери финальный план миграции с оценкой рисков по каждой области.", +]; + +/** Persisted UIMessage parts for one finished assistant turn. */ +function scriptToPersistedParts(script: TurnScript): UIMessage["parts"] { + const parts: unknown[] = []; + for (const step of script.steps) { + parts.push({ type: "reasoning", text: step.reasoningText, state: "done" }); + parts.push({ + type: `tool-${step.tool.toolName}`, + toolCallId: step.tool.toolCallId, + state: "output-available", + input: step.tool.input, + output: step.tool.output, + }); + } + parts.push({ type: "text", text: script.answerText, state: "done" }); + return parts as UIMessage["parts"]; +} + +export interface PersistedFixture { + rows: IAiChatMessageRow[]; + totalChars: number; + approxTokens: number; +} + +/** + * Materialize the preset as a finished 3-turn transcript: user row + assistant + * row per turn, with the preset's steps/answer split across the assistant turns. + * Approximate accounting — the actual totals are reported back for display. + */ +export function buildPersistedRows(config: TurnConfig): PersistedFixture { + const rows: IAiChatMessageRow[] = []; + const baseTime = Date.now() - HISTORY_TURNS * 60_000; + let totalChars = 0; + + for (let t = 0; t < HISTORY_TURNS; t++) { + // Distribute steps as evenly as possible (earlier turns get the remainder). + const stepsForTurn = + Math.floor(config.steps / HISTORY_TURNS) + + (t < config.steps % HISTORY_TURNS ? 1 : 0); + const turnConfig: TurnConfig = { + steps: Math.max(1, stepsForTurn), + reasoningTokensPerStep: config.reasoningTokensPerStep, + toolOutputBytes: config.toolOutputBytes, + answerTokens: Math.max(50, Math.round(config.answerTokens / HISTORY_TURNS)), + }; + const script = buildTurnScript(turnConfig, `hist-${t + 1}`); + totalChars += script.totalChars; + + const userText = USER_PROMPTS[t % USER_PROMPTS.length]; + rows.push({ + id: `perf-row-u${t + 1}`, + role: "user", + content: userText, + metadata: null, + createdAt: new Date(baseTime + t * 60_000).toISOString(), + }); + rows.push({ + id: `perf-row-a${t + 1}`, + role: "assistant", + content: script.answerText, + metadata: { + parts: scriptToPersistedParts(script), + usage: { reasoningTokens: script.reasoningTokens }, + contextTokens: script.contextTokens, + maxContextTokens: script.maxContextTokens, + finishReason: "stop", + }, + createdAt: new Date(baseTime + t * 60_000 + 30_000).toISOString(), + }); + } + + return { + rows, + totalChars, + approxTokens: Math.round(totalChars / CHARS_PER_TOKEN), + }; +} + +// --------------------------------------------------------------------------- +// Scenario B: SSE stream +// --------------------------------------------------------------------------- + +/** Streaming delta size in chars (reasoning/answer text is split into these). */ +const DELTA_CHARS = 200; + +function splitDeltas(text: string, size = DELTA_CHARS): string[] { + const deltas: string[] = []; + for (let i = 0; i < text.length; i += size) { + deltas.push(text.slice(i, i + size)); + } + return deltas; +} + +/** One pre-serialized SSE frame plus its visible-char contribution for stats. */ +interface SseFrame { + data: string; + chars: number; +} + +function frame(chunk: Record, chars = 0): SseFrame { + return { data: `data: ${JSON.stringify(chunk)}\n\n`, chars }; +} + +/** + * Serialize the whole scripted turn into AI SDK v6 UI-message SSE frames + * (excluding the final `data: [DONE]` terminator, appended by the pump). + */ +function buildSseFrames(script: TurnScript, messageId: string, chatId: string): SseFrame[] { + const frames: SseFrame[] = []; + frames.push(frame({ type: "start", messageId, messageMetadata: { chatId } })); + + script.steps.forEach((step, i) => { + frames.push(frame({ type: "start-step" })); + const reasoningId = `${messageId}-r${i + 1}`; + frames.push(frame({ type: "reasoning-start", id: reasoningId })); + for (const delta of splitDeltas(step.reasoningText)) { + frames.push(frame({ type: "reasoning-delta", id: reasoningId, delta }, delta.length)); + } + frames.push(frame({ type: "reasoning-end", id: reasoningId })); + + const { toolCallId, toolName, input, output } = step.tool; + frames.push(frame({ type: "tool-input-start", toolCallId, toolName })); + frames.push(frame({ type: "tool-input-available", toolCallId, toolName, input })); + // The tool result arrives as ONE chunk, like the real server sends it. + frames.push(frame({ type: "tool-output-available", toolCallId, output }, output.content.length)); + frames.push(frame({ type: "finish-step" })); + }); + + // Final step: the markdown answer. + frames.push(frame({ type: "start-step" })); + const textId = `${messageId}-answer`; + frames.push(frame({ type: "text-start", id: textId })); + for (const delta of splitDeltas(script.answerText)) { + frames.push(frame({ type: "text-delta", id: textId, delta }, delta.length)); + } + frames.push(frame({ type: "text-end", id: textId })); + frames.push(frame({ type: "finish-step" })); + + frames.push( + frame({ + type: "finish", + messageMetadata: { + usage: { reasoningTokens: script.reasoningTokens }, + contextTokens: script.contextTokens, + maxContextTokens: script.maxContextTokens, + finishReason: "stop", + }, + }), + ); + return frames; +} + +export interface LiveStreamSettings { + script: TurnScript; + /** Delay between SSE chunks (one chunk per tick). */ + chunkIntervalMs: number; + /** Progress callback: cumulative emitted chunk count and visible chars. */ + onProgress?: (chunks: number, chars: number) => void; + /** Fired once after the `[DONE]` terminator is enqueued. */ + onDone?: () => void; + /** Fired if the client aborted the stream (Stop button). */ + onAbort?: () => void; +} + +/** + * Build a synthetic SSE Response streaming the scripted turn, one chunk every + * `chunkIntervalMs`. Honors the fetch `AbortSignal` so the real Stop button works. + */ +export function buildSseResponse( + settings: LiveStreamSettings, + signal?: AbortSignal | null, +): Response { + const messageId = `m-live-${Date.now()}`; + const frames = buildSseFrames(settings.script, messageId, "perf-chat"); + const encoder = new TextEncoder(); + let index = 0; + let emittedChars = 0; + let timer: number | undefined; + + const stream = new ReadableStream({ + start(controller) { + const stopPump = () => { + if (timer !== undefined) { + clearTimeout(timer); + timer = undefined; + } + }; + const pump = () => { + timer = undefined; + if (signal?.aborted) { + stopPump(); + try { + controller.close(); + } catch { + // Already closed/cancelled — nothing to do. + } + return; + } + if (index >= frames.length) { + try { + controller.enqueue(encoder.encode("data: [DONE]\n\n")); + controller.close(); + } catch { + // Cancelled mid-flight. + } + settings.onDone?.(); + return; + } + const next = frames[index]; + index += 1; + try { + controller.enqueue(encoder.encode(next.data)); + } catch { + stopPump(); + return; + } + emittedChars += next.chars; + settings.onProgress?.(index, emittedChars); + timer = window.setTimeout(pump, settings.chunkIntervalMs); + }; + signal?.addEventListener( + "abort", + () => { + stopPump(); + try { + controller.close(); + } catch { + // Reader already cancelled. + } + settings.onAbort?.(); + }, + { once: true }, + ); + timer = window.setTimeout(pump, settings.chunkIntervalMs); + }, + cancel() { + if (timer !== undefined) { + clearTimeout(timer); + timer = undefined; + } + }, + }); + + return new Response(stream, { + status: 200, + headers: { + "content-type": "text/event-stream", + "cache-control": "no-cache", + "x-vercel-ai-ui-message-stream": "v1", + }, + }); +} + +// --------------------------------------------------------------------------- +// window.fetch patch +// --------------------------------------------------------------------------- + +let currentLiveSettings: LiveStreamSettings | null = null; + +/** Arm the next `/api/ai-chat/stream` request with a scripted turn. */ +export function setLiveStreamSettings(settings: LiveStreamSettings): void { + currentLiveSettings = settings; +} + +/** + * Patch `window.fetch` BEFORE React mounts: requests to `/api/ai-chat/stream` + * get the synthetic SSE Response; everything else passes through untouched. + */ +export function installAiChatStreamFetchPatch(): void { + const originalFetch = window.fetch.bind(window); + window.fetch = (input: RequestInfo | URL, init?: RequestInit): Promise => { + const url = + typeof input === "string" + ? input + : input instanceof URL + ? input.href + : input.url; + if (url.includes("/api/ai-chat/stream")) { + const settings = currentLiveSettings; + if (!settings) { + return Promise.resolve( + new Response("perf harness: no live stream configured", { status: 500 }), + ); + } + return Promise.resolve(buildSseResponse(settings, init?.signal ?? null)); + } + return originalFetch(input, init); + }; +} From b1ede483194642a518f330dbfe32f8e21b820728 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 04:25:53 +0300 Subject: [PATCH 3/3] test(ai-chat): pin the streaming plain-text text-sink invariant + fix stale CSS ref (#323 F1/F2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F1: StreamingPlainText/PlainChunk render untrusted model reasoning as a React text node (escaped), NOT via innerHTML — the load-bearing security property. The existing tests asserted via textContent, which strips tags, so they couldn't tell an escaped literal from injected DOM: a future switch to dangerouslySetInnerHTML would reintroduce XSS with zero failing tests. Add a test feeding an + payload and asserting querySelector("img"/"b") is null AND the raw markup survives in textContent — non-vacuous (fails if the string were parsed as HTML). F2: the .reasoningText CSS note still described the removed pre-wrap fallback and pointed at reasoning-block.tsx (both stale), while PlainChunk's JSDoc points back to this note — a broken mutual reference. Update the note to point at PlainChunk / streaming-plain-text.tsx, where pre-wrap is now applied. No production rendering logic changed. vitest: 8 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai-chat/components/ai-chat.module.css | 4 +-- .../components/streaming-plain-text.test.tsx | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/apps/client/src/features/ai-chat/components/ai-chat.module.css b/apps/client/src/features/ai-chat/components/ai-chat.module.css index cd788cdd..7b99178c 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat.module.css +++ b/apps/client/src/features/ai-chat/components/ai-chat.module.css @@ -164,8 +164,8 @@ /* NOTE: `white-space: pre-wrap` is intentionally NOT set here. On the rendered markdown
it would turn the newlines between block tags (\n
  • ,

    \n
      ) into visible blank lines/indents on top of the - margins. The plain-text fallback that needs pre-wrap sets it - inline itself (see reasoning-block.tsx). */ + margins. The streaming plain-text path that needs pre-wrap sets it + per chunk instead, in PlainChunk (see streaming-plain-text.tsx). */ } .reasoningText p { diff --git a/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx index 499c6ee7..3f6876bf 100644 --- a/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx +++ b/apps/client/src/features/ai-chat/components/streaming-plain-text.test.tsx @@ -118,4 +118,29 @@ describe("StreamingPlainText", () => { "хвост", ]); }); + + // SECURITY INVARIANT — the load-bearing property of the streaming path: the + // reasoning text is raw, untrusted model output rendered WITHOUT a sanitizer + // (no marked/DOMPurify, no innerHTML). PlainChunk emits it as a React text + // node, which escapes it, so HTML in the model output is inert. This test + // pins that the path is a TEXT sink, not an HTML sink: a future change to + // `dangerouslySetInnerHTML` (reintroducing XSS) MUST fail here. + // + // The existing tests assert via textContent, which strips tags and so cannot + // distinguish an escaped literal from injected DOM. This one asserts on the + // parsed DOM directly: if the markup were injected as HTML, the / + // would become real elements and querySelector would find them. + it("renders HTML-like reasoning as an escaped literal, never as injected DOM", () => { + const text = "\n\nhi"; + const { container } = render(); + // No DOM elements were created from the payload — it was NOT parsed as HTML. + expect(container.querySelector("img")).toBeNull(); + expect(container.querySelector("b")).toBeNull(); + // The raw markup survived verbatim as text (proving it is escaped, not + // interpreted). textContent alone can't prove this, but combined with the + // querySelector assertions above it does: the literals are present AND no + // elements exist. + expect(container.textContent).toContain("hi"); + expect(container.textContent).toContain(""); + }); });