diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index f1f3461a..c9c9857b 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -1,4 +1,4 @@ -import { compactToolOutput } from './ai-chat.service'; +import { compactToolOutput, prepareAgentStep } from './ai-chat.service'; /** * Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool @@ -66,3 +66,43 @@ describe('compactToolOutput', () => { expect(compactedBytes).toBeLessThan(originalBytes / 10); }); }); + +/** + * Unit tests for prepareAgentStep: the pure helper that decides per-step + * overrides for the agent loop. Normal steps return undefined (default SDK + * behaviour); the FINAL allowed step forces a text-only synthesis + * (`toolChoice: 'none'`) and concatenates a synthesis instruction onto the base + * system prompt. MAX_AGENT_STEPS is 20, so the boundary is step 19 (0-indexed). + */ +describe('prepareAgentStep', () => { + it('returns undefined for early steps (default behaviour)', () => { + expect(prepareAgentStep(0, 'base')).toBeUndefined(); + expect(prepareAgentStep(1, 'base')).toBeUndefined(); + expect(prepareAgentStep(5, 'base')).toBeUndefined(); + expect(prepareAgentStep(10, 'base')).toBeUndefined(); + expect(prepareAgentStep(18, 'base')).toBeUndefined(); + }); + + it('forces a text-only synthesis on the final step', () => { + const result = prepareAgentStep(19, 'base'); + expect(result).toBeDefined(); + expect(result.toolChoice).toBe('none'); + }); + + it('preserves the base system prompt and adds the synthesis instruction', () => { + const result = prepareAgentStep(19, 'base'); + expect(result).toBeDefined(); + // The original system prompt must survive at the start (concatenation, not + // replacement — a bare override would lose the whole persona/context). + expect(result.system.startsWith('base')).toBe(true); + // The synthesis instruction is appended, including its key directive. + expect(result.system).toContain('Do NOT call any more tools'); + }); + + it('pins the off-by-one boundary (18 is not the last of 20, 19 is)', () => { + expect(prepareAgentStep(18, 'base')).toBeUndefined(); + const atBoundary = prepareAgentStep(19, 'base'); + expect(atBoundary).toBeDefined(); + expect(atBoundary.toolChoice).toBe('none'); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 3119c3c4..fae91f25 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -17,6 +17,44 @@ import { AiChatToolsService } from './tools/ai-chat-tools.service'; import { McpClientsService } from './external-mcp/mcp-clients.service'; import { buildSystemPrompt } from './ai-chat.prompt'; +// Max agent steps per turn. One step = one model generation; a step that calls +// tools is followed by another step carrying the tool results. Raised from 8 so +// multi-search research questions are not cut off mid-investigation. +const MAX_AGENT_STEPS = 20; + +// System-prompt addendum injected ONLY on the final step (see prepareAgentStep). +// It forbids further tool calls and tells the model to synthesize the best +// answer it can from what it already gathered, so a tool-heavy turn never ends +// empty. +const FINAL_STEP_INSTRUCTION = + 'You have reached the maximum number of tool-use steps for this turn. ' + + 'Do NOT call any more tools. Using only the information already gathered, ' + + "write the most complete, useful final answer you can now, in the user's " + + 'language. If the information is incomplete, say so explicitly: summarize ' + + 'what you found, what is still missing, and give your best partial conclusion.'; + +/** + * Per-step override for the agent loop. Returns `undefined` for normal steps + * (default SDK behaviour), and on the FINAL allowed step forces a text-only + * synthesis by setting `toolChoice: 'none'` and concatenating the synthesis + * instruction onto the base system prompt. Exported for unit testing. + * + * NOTE: at AI SDK v7 the per-step `system` field is renamed to `instructions`. + * On v6 (`^6.0.134`) `system` is the correct field — adjust when bumping. + */ +export function prepareAgentStep( + stepNumber: number, + system: string, +): { toolChoice: 'none'; system: string } | undefined { + if (stepNumber >= MAX_AGENT_STEPS - 1) { + return { + toolChoice: 'none', + system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`, + }; + } + return undefined; +} + /** * Payload accepted from the client `useChat` POST body. We do NOT bind a strict * DTO (the global ValidationPipe whitelist would strip the useChat-specific @@ -244,7 +282,15 @@ export class AiChatService { // cap would truncate complex tool calls mid-argument. Let the model use its // natural per-step budget. (Cost/credit limits are an account concern, not // something to enforce by silently breaking the agent.) - stopWhen: stepCountIs(8), + stopWhen: stepCountIs(MAX_AGENT_STEPS), + // Forced finalization: reserve the LAST allowed step for a text-only + // answer. Without this, a turn that spends all its steps on tool calls + // ends with no assistant text (an empty turn). On the final step we + // forbid further tool calls and append a synthesis instruction; the base + // system prompt is concatenated (a bare system override would REPLACE the + // whole persona/context, which we must not lose). Earlier steps return + // undefined -> default behaviour. + prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), abortSignal: signal, onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { await persistAssistant({