From 3ca4bcf80bc082376cbce71969f4f0499ac33b5b Mon Sep 17 00:00:00 2001 From: "glm5.2 agent 180" Date: Sat, 20 Jun 2026 04:53:26 +0300 Subject: [PATCH] feat(ai-chat): raise step limit to 20 and force a final answer The agent's per-turn step ceiling was 8. On research-heavy questions the model spent all 8 steps on tool calls and the turn ended with empty text - the user saw no answer and had to nudge with '?'. Two fixes: - Raise stopWhen from stepCountIs(8) to stepCountIs(MAX_AGENT_STEPS = 20). - On the LAST allowed step (stepNumber 19), forbid further tool calls (toolChoice: 'none') and append a synthesis instruction to the system prompt, so the model is forced to write the best answer it can from what it already gathered. prepareStep returns undefined for every earlier step, so natural early termination and the maxOutputTokens comment are untouched. The base system prompt is CONCATENATED with the instruction (a bare override would lose the persona/context). Logic extracted into a pure, exported prepareAgentStep(stepNumber, system) helper for unit testing. NOTE for future bumps: at AI SDK v7 the per-step 'system' field is renamed to 'instructions'; on v6 (`^6.0.134`) 'system' is correct. --- .../src/core/ai-chat/ai-chat.service.spec.ts | 42 +++++++++++++++- .../src/core/ai-chat/ai-chat.service.ts | 48 ++++++++++++++++++- 2 files changed, 88 insertions(+), 2 deletions(-) diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index f1f3461a..c9c9857b 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -1,4 +1,4 @@ -import { compactToolOutput } from './ai-chat.service'; +import { compactToolOutput, prepareAgentStep } from './ai-chat.service'; /** * Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool @@ -66,3 +66,43 @@ describe('compactToolOutput', () => { expect(compactedBytes).toBeLessThan(originalBytes / 10); }); }); + +/** + * Unit tests for prepareAgentStep: the pure helper that decides per-step + * overrides for the agent loop. Normal steps return undefined (default SDK + * behaviour); the FINAL allowed step forces a text-only synthesis + * (`toolChoice: 'none'`) and concatenates a synthesis instruction onto the base + * system prompt. MAX_AGENT_STEPS is 20, so the boundary is step 19 (0-indexed). + */ +describe('prepareAgentStep', () => { + it('returns undefined for early steps (default behaviour)', () => { + expect(prepareAgentStep(0, 'base')).toBeUndefined(); + expect(prepareAgentStep(1, 'base')).toBeUndefined(); + expect(prepareAgentStep(5, 'base')).toBeUndefined(); + expect(prepareAgentStep(10, 'base')).toBeUndefined(); + expect(prepareAgentStep(18, 'base')).toBeUndefined(); + }); + + it('forces a text-only synthesis on the final step', () => { + const result = prepareAgentStep(19, 'base'); + expect(result).toBeDefined(); + expect(result.toolChoice).toBe('none'); + }); + + it('preserves the base system prompt and adds the synthesis instruction', () => { + const result = prepareAgentStep(19, 'base'); + expect(result).toBeDefined(); + // The original system prompt must survive at the start (concatenation, not + // replacement — a bare override would lose the whole persona/context). + expect(result.system.startsWith('base')).toBe(true); + // The synthesis instruction is appended, including its key directive. + expect(result.system).toContain('Do NOT call any more tools'); + }); + + it('pins the off-by-one boundary (18 is not the last of 20, 19 is)', () => { + expect(prepareAgentStep(18, 'base')).toBeUndefined(); + const atBoundary = prepareAgentStep(19, 'base'); + expect(atBoundary).toBeDefined(); + expect(atBoundary.toolChoice).toBe('none'); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 3119c3c4..fae91f25 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -17,6 +17,44 @@ import { AiChatToolsService } from './tools/ai-chat-tools.service'; import { McpClientsService } from './external-mcp/mcp-clients.service'; import { buildSystemPrompt } from './ai-chat.prompt'; +// Max agent steps per turn. One step = one model generation; a step that calls +// tools is followed by another step carrying the tool results. Raised from 8 so +// multi-search research questions are not cut off mid-investigation. +const MAX_AGENT_STEPS = 20; + +// System-prompt addendum injected ONLY on the final step (see prepareAgentStep). +// It forbids further tool calls and tells the model to synthesize the best +// answer it can from what it already gathered, so a tool-heavy turn never ends +// empty. +const FINAL_STEP_INSTRUCTION = + 'You have reached the maximum number of tool-use steps for this turn. ' + + 'Do NOT call any more tools. Using only the information already gathered, ' + + "write the most complete, useful final answer you can now, in the user's " + + 'language. If the information is incomplete, say so explicitly: summarize ' + + 'what you found, what is still missing, and give your best partial conclusion.'; + +/** + * Per-step override for the agent loop. Returns `undefined` for normal steps + * (default SDK behaviour), and on the FINAL allowed step forces a text-only + * synthesis by setting `toolChoice: 'none'` and concatenating the synthesis + * instruction onto the base system prompt. Exported for unit testing. + * + * NOTE: at AI SDK v7 the per-step `system` field is renamed to `instructions`. + * On v6 (`^6.0.134`) `system` is the correct field — adjust when bumping. + */ +export function prepareAgentStep( + stepNumber: number, + system: string, +): { toolChoice: 'none'; system: string } | undefined { + if (stepNumber >= MAX_AGENT_STEPS - 1) { + return { + toolChoice: 'none', + system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`, + }; + } + return undefined; +} + /** * Payload accepted from the client `useChat` POST body. We do NOT bind a strict * DTO (the global ValidationPipe whitelist would strip the useChat-specific @@ -244,7 +282,15 @@ export class AiChatService { // cap would truncate complex tool calls mid-argument. Let the model use its // natural per-step budget. (Cost/credit limits are an account concern, not // something to enforce by silently breaking the agent.) - stopWhen: stepCountIs(8), + stopWhen: stepCountIs(MAX_AGENT_STEPS), + // Forced finalization: reserve the LAST allowed step for a text-only + // answer. Without this, a turn that spends all its steps on tool calls + // ends with no assistant text (an empty turn). On the final step we + // forbid further tool calls and append a synthesis instruction; the base + // system prompt is concatenated (a bare system override would REPLACE the + // whole persona/context, which we must not lose). Earlier steps return + // undefined -> default behaviour. + prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), abortSignal: signal, onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { await persistAssistant({