From 41dfeeb77a86b43fa5de33dcc75edd4437aceb7a Mon Sep 17 00:00:00 2001 From: vvzvlad Date: Wed, 17 Jun 2026 23:44:51 +0300 Subject: [PATCH] perf(ai-chat): compact large tool outputs before persisting them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read tools (getPage, getPageJson, getNode, diffPageVersions, exportPageMarkdown) return whole pages with no size cap. Their outputs were stored verbatim in metadata.parts and the tool_calls column, and metadata.parts is replayed to the provider on every later turn via convertToModelMessages. After reading a couple of large pages the prompt grew by full page bodies each turn — rising token cost, latency and DB row size. Add compactToolOutput(): a pure, recursive, size-bounded compactor used in assistantParts() and serializeSteps(). It preserves the value's kind and small scalar fields (id/title/pageId, which the client reads to build citations on reload) while truncating long strings, capping long arrays with a marker, and collapsing subtrees past a depth limit. Small outputs are returned unchanged by identity. Tool inputs are left intact so replayed tool_use arguments keep their object shape. Compaction runs only at persistence time (onFinish/onAbort), so the live stream and the current turn's multi-step reasoning still see full bodies. Add unit tests for compactToolOutput. --- .../src/core/ai-chat/ai-chat.service.spec.ts | 68 +++++++++++++ .../src/core/ai-chat/ai-chat.service.ts | 95 ++++++++++++++++++- 2 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 apps/server/src/core/ai-chat/ai-chat.service.spec.ts diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts new file mode 100644 index 00000000..f1f3461a --- /dev/null +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -0,0 +1,68 @@ +import { compactToolOutput } from './ai-chat.service'; + +/** + * Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool + * outputs before they are persisted (and re-sent to the provider on later + * turns). The contract is: small outputs pass through unchanged (by identity); + * large outputs keep their shape and small scalar fields (id/title/pageId — the + * client reads these to render citations) while big payloads are truncated. + */ +describe('compactToolOutput', () => { + it('returns a small object unchanged (by identity)', () => { + const small = { id: 'p1', title: 'Hello', trashed: true }; + expect(compactToolOutput(small)).toBe(small); + }); + + it('truncates a large getPage-shaped markdown body but keeps the title', () => { + const big = 'x'.repeat(20000); + const result = compactToolOutput({ title: 'T', markdown: big }) as { + title: string; + markdown: string; + }; + // Shallow scalar field is preserved (citations depend on it). + expect(result.title).toBe('T'); + // The big payload is shrunk far below the original size. + expect(result.markdown.length).toBeLessThan(20000); + expect(result.markdown).toContain('[truncated'); + }); + + it('caps a long array and appends a single truncation marker', () => { + // 200 small objects, each padded so the total serialized size > 4000 bytes. + const long = Array.from({ length: 200 }, (_, i) => ({ + id: 'n' + i, + pad: 'y'.repeat(40), + })); + const result = compactToolOutput(long) as Array>; + // 50 kept + 1 marker. + expect(result).toHaveLength(51); + const marker = result[result.length - 1]; + expect(marker._truncated).toBe(true); + expect(marker.omittedItems).toBe(150); + }); + + it('passes through null, undefined and primitives unchanged', () => { + expect(compactToolOutput(null)).toBeNull(); + expect(compactToolOutput(undefined)).toBeUndefined(); + expect(compactToolOutput(42)).toBe(42); + }); + + it('replaces a subtree beyond the depth cap with a marker', () => { + // Build a deeply nested object (> TOOL_OUTPUT_MAX_DEPTH levels) with a big + // string at the bottom so the total serialized size exceeds the threshold. + let nested: Record = { leaf: 'z'.repeat(8000) }; + for (let i = 0; i < 20; i++) { + nested = { child: nested }; + } + const result = compactToolOutput(nested); + expect(JSON.stringify(result)).toContain('nested content omitted'); + }); + + it('produces a much smaller JSON than the original for a large input', () => { + const big = 'x'.repeat(20000); + const original = { title: 'T', markdown: big }; + const result = compactToolOutput(original); + const originalBytes = Buffer.byteLength(JSON.stringify(original), 'utf8'); + const compactedBytes = Buffer.byteLength(JSON.stringify(result), 'utf8'); + expect(compactedBytes).toBeLessThan(originalBytes / 10); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 4094357d..20105169 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -427,6 +427,97 @@ type StepLike = { }>; }; +/** + * Compaction tunables for persisted tool OUTPUTS. Read tools (getPage, + * getPageJson, getNode, diffPageVersions, exportPageMarkdown, ...) return whole + * pages with no size cap. Their outputs are stored in `metadata.parts` and + * RE-SENT to the provider on every later turn via convertToModelMessages, so an + * uncompacted large body grows token cost, latency, and DB row size on every + * turn. We shrink the big payloads while preserving the object's shape and its + * small scalar fields (id/title/pageId) the client reads to render citations. + */ +// Only outputs whose JSON serialization exceeds this are compacted at all +// (fast path: smaller outputs are returned unchanged, by identity). +const MAX_TOOL_OUTPUT_BYTES = 4000; +// A string longer than this is truncated to a leading preview. +const TOOL_OUTPUT_STRING_LIMIT = 600; +// Number of leading characters kept from a truncated string. +const TOOL_OUTPUT_STRING_PREVIEW = 500; +// Maximum number of array elements kept; the rest are summarized by a marker. +const TOOL_OUTPUT_ARRAY_LIMIT = 50; +// Beyond this nesting depth a subtree is replaced with a marker, bounding the +// recursion and the size of pathological deeply-nested payloads. +const TOOL_OUTPUT_MAX_DEPTH = 8; + +/** + * Recursively compact a single tool output before it is persisted (and thus + * re-sent to the provider on later turns). Preserves the value's KIND and its + * keys/scalars (so the client can still extract id/title/pageId citations from + * `part.output`); only the large payloads (long strings, long arrays, very deep + * subtrees) are shrunk. Returns a plain JSON-serializable value. + * + * Exported only so the unit test can import the pure helper; exporting it does + * not change runtime behavior. + */ +export function compactToolOutput(output: unknown): unknown { + // Fast path: nothing to do for null/undefined or non-serializable values. + if (output === null || output === undefined) return output; + let serialized: string | undefined; + try { + serialized = JSON.stringify(output); + } catch { + // Non-serializable (e.g. circular): return unchanged, never throw here. + return output; + } + // JSON.stringify returns undefined for values like a bare function/symbol. + if (serialized === undefined) return output; + // Below the size threshold: return the original unchanged (by identity). + if (Buffer.byteLength(serialized, 'utf8') <= MAX_TOOL_OUTPUT_BYTES) { + return output; + } + return compactValue(output, 0); +} + +/** Recursive worker for compactToolOutput; see the constants above for limits. */ +function compactValue(value: unknown, depth: number): unknown { + if (typeof value === 'string') { + if (value.length > TOOL_OUTPUT_STRING_LIMIT) { + return `${value.slice(0, TOOL_OUTPUT_STRING_PREVIEW)}…[truncated ${ + value.length - TOOL_OUTPUT_STRING_PREVIEW + } chars]`; + } + return value; + } + if (Array.isArray(value)) { + const kept = value + .slice(0, TOOL_OUTPUT_ARRAY_LIMIT) + .map((el) => compactValue(el, depth + 1)); + if (value.length > TOOL_OUTPUT_ARRAY_LIMIT) { + // Append a marker summarizing the dropped tail so the size is bounded + // while signalling that the array was longer. + kept.push({ + _truncated: true, + omittedItems: value.length - TOOL_OUTPUT_ARRAY_LIMIT, + }); + } + return kept; + } + if (typeof value === 'object' && value !== null) { + if (depth >= TOOL_OUTPUT_MAX_DEPTH) { + return { _truncated: true, note: 'nested content omitted for replay' }; + } + // Rebuild the object preserving keys (keeps id/title/pageId), compacting + // each value one level deeper. + const out: Record = {}; + for (const [k, v] of Object.entries(value)) { + out[k] = compactValue(v, depth + 1); + } + return out; + } + // Numbers, booleans, etc.: nothing to shrink. + return value; +} + /** * Rebuild the FULL UIMessage `parts` for an assistant turn from the SDK steps, * so multi-turn history replays prior tool-calls/results to the model (not just @@ -467,7 +558,7 @@ function assistantParts( toolCallId: call.toolCallId, state: 'output-available', input: call.input, - output: resultsById.get(call.toolCallId), + output: compactToolOutput(resultsById.get(call.toolCallId)), }); } else { // No paired result (e.g. aborted mid-step). Persisting a bare @@ -529,7 +620,7 @@ function serializeSteps( calls.push({ toolName: call.toolName, input: call.input }); } for (const r of step.toolResults ?? []) { - calls.push({ toolName: r.toolName, output: r.output }); + calls.push({ toolName: r.toolName, output: compactToolOutput(r.output) }); } } return calls.length > 0 ? calls : null;