perf(ai-chat): compact large tool outputs before persisting them

Read tools (getPage, getPageJson, getNode, diffPageVersions,
exportPageMarkdown) return whole pages with no size cap. Their outputs
were stored verbatim in metadata.parts and the tool_calls column, and
metadata.parts is replayed to the provider on every later turn via
convertToModelMessages. After reading a couple of large pages the prompt
grew by full page bodies each turn — rising token cost, latency and DB
row size.

Add compactToolOutput(): a pure, recursive, size-bounded compactor used
in assistantParts() and serializeSteps(). It preserves the value's kind
and small scalar fields (id/title/pageId, which the client reads to build
citations on reload) while truncating long strings, capping long arrays
with a marker, and collapsing subtrees past a depth limit. Small outputs
are returned unchanged by identity. Tool inputs are left intact so
replayed tool_use arguments keep their object shape.

Compaction runs only at persistence time (onFinish/onAbort), so the live
stream and the current turn's multi-step reasoning still see full bodies.

Add unit tests for compactToolOutput.
This commit is contained in:
vvzvlad
2026-06-17 23:44:51 +03:00
parent 4379163c21
commit 41dfeeb77a
2 changed files with 161 additions and 2 deletions

View File

@@ -0,0 +1,68 @@
import { compactToolOutput } from './ai-chat.service';
/**
* Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool
* outputs before they are persisted (and re-sent to the provider on later
* turns). The contract is: small outputs pass through unchanged (by identity);
* large outputs keep their shape and small scalar fields (id/title/pageId — the
* client reads these to render citations) while big payloads are truncated.
*/
describe('compactToolOutput', () => {
it('returns a small object unchanged (by identity)', () => {
const small = { id: 'p1', title: 'Hello', trashed: true };
expect(compactToolOutput(small)).toBe(small);
});
it('truncates a large getPage-shaped markdown body but keeps the title', () => {
const big = 'x'.repeat(20000);
const result = compactToolOutput({ title: 'T', markdown: big }) as {
title: string;
markdown: string;
};
// Shallow scalar field is preserved (citations depend on it).
expect(result.title).toBe('T');
// The big payload is shrunk far below the original size.
expect(result.markdown.length).toBeLessThan(20000);
expect(result.markdown).toContain('[truncated');
});
it('caps a long array and appends a single truncation marker', () => {
// 200 small objects, each padded so the total serialized size > 4000 bytes.
const long = Array.from({ length: 200 }, (_, i) => ({
id: 'n' + i,
pad: 'y'.repeat(40),
}));
const result = compactToolOutput(long) as Array<Record<string, unknown>>;
// 50 kept + 1 marker.
expect(result).toHaveLength(51);
const marker = result[result.length - 1];
expect(marker._truncated).toBe(true);
expect(marker.omittedItems).toBe(150);
});
it('passes through null, undefined and primitives unchanged', () => {
expect(compactToolOutput(null)).toBeNull();
expect(compactToolOutput(undefined)).toBeUndefined();
expect(compactToolOutput(42)).toBe(42);
});
it('replaces a subtree beyond the depth cap with a marker', () => {
// Build a deeply nested object (> TOOL_OUTPUT_MAX_DEPTH levels) with a big
// string at the bottom so the total serialized size exceeds the threshold.
let nested: Record<string, unknown> = { leaf: 'z'.repeat(8000) };
for (let i = 0; i < 20; i++) {
nested = { child: nested };
}
const result = compactToolOutput(nested);
expect(JSON.stringify(result)).toContain('nested content omitted');
});
it('produces a much smaller JSON than the original for a large input', () => {
const big = 'x'.repeat(20000);
const original = { title: 'T', markdown: big };
const result = compactToolOutput(original);
const originalBytes = Buffer.byteLength(JSON.stringify(original), 'utf8');
const compactedBytes = Buffer.byteLength(JSON.stringify(result), 'utf8');
expect(compactedBytes).toBeLessThan(originalBytes / 10);
});
});

View File

@@ -427,6 +427,97 @@ type StepLike = {
}>;
};
/**
* Compaction tunables for persisted tool OUTPUTS. Read tools (getPage,
* getPageJson, getNode, diffPageVersions, exportPageMarkdown, ...) return whole
* pages with no size cap. Their outputs are stored in `metadata.parts` and
* RE-SENT to the provider on every later turn via convertToModelMessages, so an
* uncompacted large body grows token cost, latency, and DB row size on every
* turn. We shrink the big payloads while preserving the object's shape and its
* small scalar fields (id/title/pageId) the client reads to render citations.
*/
// Only outputs whose JSON serialization exceeds this are compacted at all
// (fast path: smaller outputs are returned unchanged, by identity).
const MAX_TOOL_OUTPUT_BYTES = 4000;
// A string longer than this is truncated to a leading preview.
const TOOL_OUTPUT_STRING_LIMIT = 600;
// Number of leading characters kept from a truncated string.
const TOOL_OUTPUT_STRING_PREVIEW = 500;
// Maximum number of array elements kept; the rest are summarized by a marker.
const TOOL_OUTPUT_ARRAY_LIMIT = 50;
// Beyond this nesting depth a subtree is replaced with a marker, bounding the
// recursion and the size of pathological deeply-nested payloads.
const TOOL_OUTPUT_MAX_DEPTH = 8;
/**
* Recursively compact a single tool output before it is persisted (and thus
* re-sent to the provider on later turns). Preserves the value's KIND and its
* keys/scalars (so the client can still extract id/title/pageId citations from
* `part.output`); only the large payloads (long strings, long arrays, very deep
* subtrees) are shrunk. Returns a plain JSON-serializable value.
*
* Exported only so the unit test can import the pure helper; exporting it does
* not change runtime behavior.
*/
export function compactToolOutput(output: unknown): unknown {
// Fast path: nothing to do for null/undefined or non-serializable values.
if (output === null || output === undefined) return output;
let serialized: string | undefined;
try {
serialized = JSON.stringify(output);
} catch {
// Non-serializable (e.g. circular): return unchanged, never throw here.
return output;
}
// JSON.stringify returns undefined for values like a bare function/symbol.
if (serialized === undefined) return output;
// Below the size threshold: return the original unchanged (by identity).
if (Buffer.byteLength(serialized, 'utf8') <= MAX_TOOL_OUTPUT_BYTES) {
return output;
}
return compactValue(output, 0);
}
/** Recursive worker for compactToolOutput; see the constants above for limits. */
function compactValue(value: unknown, depth: number): unknown {
if (typeof value === 'string') {
if (value.length > TOOL_OUTPUT_STRING_LIMIT) {
return `${value.slice(0, TOOL_OUTPUT_STRING_PREVIEW)}…[truncated ${
value.length - TOOL_OUTPUT_STRING_PREVIEW
} chars]`;
}
return value;
}
if (Array.isArray(value)) {
const kept = value
.slice(0, TOOL_OUTPUT_ARRAY_LIMIT)
.map((el) => compactValue(el, depth + 1));
if (value.length > TOOL_OUTPUT_ARRAY_LIMIT) {
// Append a marker summarizing the dropped tail so the size is bounded
// while signalling that the array was longer.
kept.push({
_truncated: true,
omittedItems: value.length - TOOL_OUTPUT_ARRAY_LIMIT,
});
}
return kept;
}
if (typeof value === 'object' && value !== null) {
if (depth >= TOOL_OUTPUT_MAX_DEPTH) {
return { _truncated: true, note: 'nested content omitted for replay' };
}
// Rebuild the object preserving keys (keeps id/title/pageId), compacting
// each value one level deeper.
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(value)) {
out[k] = compactValue(v, depth + 1);
}
return out;
}
// Numbers, booleans, etc.: nothing to shrink.
return value;
}
/**
* Rebuild the FULL UIMessage `parts` for an assistant turn from the SDK steps,
* so multi-turn history replays prior tool-calls/results to the model (not just
@@ -467,7 +558,7 @@ function assistantParts(
toolCallId: call.toolCallId,
state: 'output-available',
input: call.input,
output: resultsById.get(call.toolCallId),
output: compactToolOutput(resultsById.get(call.toolCallId)),
});
} else {
// No paired result (e.g. aborted mid-step). Persisting a bare
@@ -529,7 +620,7 @@ function serializeSteps(
calls.push({ toolName: call.toolName, input: call.input });
}
for (const r of step.toolResults ?? []) {
calls.push({ toolName: r.toolName, output: r.output });
calls.push({ toolName: r.toolName, output: compactToolOutput(r.output) });
}
}
return calls.length > 0 ? calls : null;