From 41dfeeb77a86b43fa5de33dcc75edd4437aceb7a Mon Sep 17 00:00:00 2001
From: vvzvlad <git@vvzvlad.xyz>
Date: Wed, 17 Jun 2026 23:44:51 +0300
Subject: [PATCH] perf(ai-chat): compact large tool outputs before persisting
 them
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Read tools (getPage, getPageJson, getNode, diffPageVersions,
exportPageMarkdown) return whole pages with no size cap. Their outputs
were stored verbatim in metadata.parts and the tool_calls column, and
metadata.parts is replayed to the provider on every later turn via
convertToModelMessages. After reading a couple of large pages the prompt
grew by full page bodies each turn — rising token cost, latency and DB
row size.

Add compactToolOutput(): a pure, recursive, size-bounded compactor used
in assistantParts() and serializeSteps(). It preserves the value's kind
and small scalar fields (id/title/pageId, which the client reads to build
citations on reload) while truncating long strings, capping long arrays
with a marker, and collapsing subtrees past a depth limit. Small outputs
are returned unchanged by identity. Tool inputs are left intact so
replayed tool_use arguments keep their object shape.

Compaction runs only at persistence time (onFinish/onAbort), so the live
stream and the current turn's multi-step reasoning still see full bodies.

Add unit tests for compactToolOutput.
---
 .../src/core/ai-chat/ai-chat.service.spec.ts  | 68 +++++++++++++
 .../src/core/ai-chat/ai-chat.service.ts       | 95 ++++++++++++++++++-
 2 files changed, 161 insertions(+), 2 deletions(-)
 create mode 100644 apps/server/src/core/ai-chat/ai-chat.service.spec.ts

diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
new file mode 100644
index 00000000..f1f3461a
--- /dev/null
+++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
@@ -0,0 +1,68 @@
+import { compactToolOutput } from './ai-chat.service';
+
+/**
+ * Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool
+ * outputs before they are persisted (and re-sent to the provider on later
+ * turns). The contract is: small outputs pass through unchanged (by identity);
+ * large outputs keep their shape and small scalar fields (id/title/pageId — the
+ * client reads these to render citations) while big payloads are truncated.
+ */
+describe('compactToolOutput', () => {
+  it('returns a small object unchanged (by identity)', () => {
+    const small = { id: 'p1', title: 'Hello', trashed: true };
+    expect(compactToolOutput(small)).toBe(small);
+  });
+
+  it('truncates a large getPage-shaped markdown body but keeps the title', () => {
+    const big = 'x'.repeat(20000);
+    const result = compactToolOutput({ title: 'T', markdown: big }) as {
+      title: string;
+      markdown: string;
+    };
+    // Shallow scalar field is preserved (citations depend on it).
+    expect(result.title).toBe('T');
+    // The big payload is shrunk far below the original size.
+    expect(result.markdown.length).toBeLessThan(20000);
+    expect(result.markdown).toContain('[truncated');
+  });
+
+  it('caps a long array and appends a single truncation marker', () => {
+    // 200 small objects, each padded so the total serialized size > 4000 bytes.
+    const long = Array.from({ length: 200 }, (_, i) => ({
+      id: 'n' + i,
+      pad: 'y'.repeat(40),
+    }));
+    const result = compactToolOutput(long) as Array<Record<string, unknown>>;
+    // 50 kept + 1 marker.
+    expect(result).toHaveLength(51);
+    const marker = result[result.length - 1];
+    expect(marker._truncated).toBe(true);
+    expect(marker.omittedItems).toBe(150);
+  });
+
+  it('passes through null, undefined and primitives unchanged', () => {
+    expect(compactToolOutput(null)).toBeNull();
+    expect(compactToolOutput(undefined)).toBeUndefined();
+    expect(compactToolOutput(42)).toBe(42);
+  });
+
+  it('replaces a subtree beyond the depth cap with a marker', () => {
+    // Build a deeply nested object (> TOOL_OUTPUT_MAX_DEPTH levels) with a big
+    // string at the bottom so the total serialized size exceeds the threshold.
+    let nested: Record<string, unknown> = { leaf: 'z'.repeat(8000) };
+    for (let i = 0; i < 20; i++) {
+      nested = { child: nested };
+    }
+    const result = compactToolOutput(nested);
+    expect(JSON.stringify(result)).toContain('nested content omitted');
+  });
+
+  it('produces a much smaller JSON than the original for a large input', () => {
+    const big = 'x'.repeat(20000);
+    const original = { title: 'T', markdown: big };
+    const result = compactToolOutput(original);
+    const originalBytes = Buffer.byteLength(JSON.stringify(original), 'utf8');
+    const compactedBytes = Buffer.byteLength(JSON.stringify(result), 'utf8');
+    expect(compactedBytes).toBeLessThan(originalBytes / 10);
+  });
+});
diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts
index 4094357d..20105169 100644
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -427,6 +427,97 @@ type StepLike = {
   }>;
 };
 
+/**
+ * Compaction tunables for persisted tool OUTPUTS. Read tools (getPage,
+ * getPageJson, getNode, diffPageVersions, exportPageMarkdown, ...) return whole
+ * pages with no size cap. Their outputs are stored in `metadata.parts` and
+ * RE-SENT to the provider on every later turn via convertToModelMessages, so an
+ * uncompacted large body grows token cost, latency, and DB row size on every
+ * turn. We shrink the big payloads while preserving the object's shape and its
+ * small scalar fields (id/title/pageId) the client reads to render citations.
+ */
+// Only outputs whose JSON serialization exceeds this are compacted at all
+// (fast path: smaller outputs are returned unchanged, by identity).
+const MAX_TOOL_OUTPUT_BYTES = 4000;
+// A string longer than this is truncated to a leading preview.
+const TOOL_OUTPUT_STRING_LIMIT = 600;
+// Number of leading characters kept from a truncated string.
+const TOOL_OUTPUT_STRING_PREVIEW = 500;
+// Maximum number of array elements kept; the rest are summarized by a marker.
+const TOOL_OUTPUT_ARRAY_LIMIT = 50;
+// Beyond this nesting depth a subtree is replaced with a marker, bounding the
+// recursion and the size of pathological deeply-nested payloads.
+const TOOL_OUTPUT_MAX_DEPTH = 8;
+
+/**
+ * Recursively compact a single tool output before it is persisted (and thus
+ * re-sent to the provider on later turns). Preserves the value's KIND and its
+ * keys/scalars (so the client can still extract id/title/pageId citations from
+ * `part.output`); only the large payloads (long strings, long arrays, very deep
+ * subtrees) are shrunk. Returns a plain JSON-serializable value.
+ *
+ * Exported only so the unit test can import the pure helper; exporting it does
+ * not change runtime behavior.
+ */
+export function compactToolOutput(output: unknown): unknown {
+  // Fast path: nothing to do for null/undefined or non-serializable values.
+  if (output === null || output === undefined) return output;
+  let serialized: string | undefined;
+  try {
+    serialized = JSON.stringify(output);
+  } catch {
+    // Non-serializable (e.g. circular): return unchanged, never throw here.
+    return output;
+  }
+  // JSON.stringify returns undefined for values like a bare function/symbol.
+  if (serialized === undefined) return output;
+  // Below the size threshold: return the original unchanged (by identity).
+  if (Buffer.byteLength(serialized, 'utf8') <= MAX_TOOL_OUTPUT_BYTES) {
+    return output;
+  }
+  return compactValue(output, 0);
+}
+
+/** Recursive worker for compactToolOutput; see the constants above for limits. */
+function compactValue(value: unknown, depth: number): unknown {
+  if (typeof value === 'string') {
+    if (value.length > TOOL_OUTPUT_STRING_LIMIT) {
+      return `${value.slice(0, TOOL_OUTPUT_STRING_PREVIEW)}…[truncated ${
+        value.length - TOOL_OUTPUT_STRING_PREVIEW
+      } chars]`;
+    }
+    return value;
+  }
+  if (Array.isArray(value)) {
+    const kept = value
+      .slice(0, TOOL_OUTPUT_ARRAY_LIMIT)
+      .map((el) => compactValue(el, depth + 1));
+    if (value.length > TOOL_OUTPUT_ARRAY_LIMIT) {
+      // Append a marker summarizing the dropped tail so the size is bounded
+      // while signalling that the array was longer.
+      kept.push({
+        _truncated: true,
+        omittedItems: value.length - TOOL_OUTPUT_ARRAY_LIMIT,
+      });
+    }
+    return kept;
+  }
+  if (typeof value === 'object' && value !== null) {
+    if (depth >= TOOL_OUTPUT_MAX_DEPTH) {
+      return { _truncated: true, note: 'nested content omitted for replay' };
+    }
+    // Rebuild the object preserving keys (keeps id/title/pageId), compacting
+    // each value one level deeper.
+    const out: Record<string, unknown> = {};
+    for (const [k, v] of Object.entries(value)) {
+      out[k] = compactValue(v, depth + 1);
+    }
+    return out;
+  }
+  // Numbers, booleans, etc.: nothing to shrink.
+  return value;
+}
+
 /**
  * Rebuild the FULL UIMessage `parts` for an assistant turn from the SDK steps,
  * so multi-turn history replays prior tool-calls/results to the model (not just
@@ -467,7 +558,7 @@ function assistantParts(
           toolCallId: call.toolCallId,
           state: 'output-available',
           input: call.input,
-          output: resultsById.get(call.toolCallId),
+          output: compactToolOutput(resultsById.get(call.toolCallId)),
         });
       } else {
         // No paired result (e.g. aborted mid-step). Persisting a bare
@@ -529,7 +620,7 @@ function serializeSteps(
       calls.push({ toolName: call.toolName, input: call.input });
     }
     for (const r of step.toolResults ?? []) {
-      calls.push({ toolName: r.toolName, output: r.output });
+      calls.push({ toolName: r.toolName, output: compactToolOutput(r.output) });
     }
   }
   return calls.length > 0 ? calls : null;