Files
gitmost/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
claude code agent 227 ed3b65c36b Merge remote-tracking branch 'gitea/develop' into batch/issues-2026-06-25
# Conflicts:
#	apps/server/src/core/ai-chat/ai-chat.service.spec.ts
#	apps/server/src/core/ai-chat/ai-chat.service.ts
2026-06-25 12:48:47 +03:00

634 lines
22 KiB
TypeScript

import { ForbiddenException } from '@nestjs/common';
import {
AiChatService,
compactToolOutput,
assistantParts,
serializeSteps,
rowToUiMessage,
prepareAgentStep,
flushAssistant,
chatStreamMetadata,
accumulateStepUsage,
MAX_AGENT_STEPS,
FINAL_STEP_INSTRUCTION,
} from './ai-chat.service';
import type { AiChatMessage, Workspace } from '@docmost/db/types/entity.types';
import { buildSystemPrompt } from './ai-chat.prompt';
import type { McpClientsService } from './external-mcp/mcp-clients.service';
/**
* Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool
* outputs before they are persisted (and re-sent to the provider on later
* turns). The contract is: small outputs pass through unchanged (by identity);
* large outputs keep their shape and small scalar fields (id/title/pageId — the
* client reads these to render citations) while big payloads are truncated.
*/
describe('compactToolOutput', () => {
it('returns a small object unchanged (by identity)', () => {
const small = { id: 'p1', title: 'Hello', trashed: true };
expect(compactToolOutput(small)).toBe(small);
});
it('truncates a large getPage-shaped markdown body but keeps the title', () => {
const big = 'x'.repeat(20000);
const result = compactToolOutput({ title: 'T', markdown: big }) as {
title: string;
markdown: string;
};
// Shallow scalar field is preserved (citations depend on it).
expect(result.title).toBe('T');
// The big payload is shrunk far below the original size.
expect(result.markdown.length).toBeLessThan(20000);
expect(result.markdown).toContain('[truncated');
});
it('caps a long array and appends a single truncation marker', () => {
// 200 small objects, each padded so the total serialized size > 4000 bytes.
const long = Array.from({ length: 200 }, (_, i) => ({
id: 'n' + i,
pad: 'y'.repeat(40),
}));
const result = compactToolOutput(long) as Array<Record<string, unknown>>;
// 50 kept + 1 marker.
expect(result).toHaveLength(51);
const marker = result[result.length - 1];
expect(marker._truncated).toBe(true);
expect(marker.omittedItems).toBe(150);
});
it('passes through null, undefined and primitives unchanged', () => {
expect(compactToolOutput(null)).toBeNull();
expect(compactToolOutput(undefined)).toBeUndefined();
expect(compactToolOutput(42)).toBe(42);
});
it('replaces a subtree beyond the depth cap with a marker', () => {
// Build a deeply nested object (> TOOL_OUTPUT_MAX_DEPTH levels) with a big
// string at the bottom so the total serialized size exceeds the threshold.
let nested: Record<string, unknown> = { leaf: 'z'.repeat(8000) };
for (let i = 0; i < 20; i++) {
nested = { child: nested };
}
const result = compactToolOutput(nested);
expect(JSON.stringify(result)).toContain('nested content omitted');
});
it('produces a much smaller JSON than the original for a large input', () => {
const big = 'x'.repeat(20000);
const original = { title: 'T', markdown: big };
const result = compactToolOutput(original);
const originalBytes = Buffer.byteLength(JSON.stringify(original), 'utf8');
const compactedBytes = Buffer.byteLength(JSON.stringify(result), 'utf8');
expect(compactedBytes).toBeLessThan(originalBytes / 10);
});
});
/**
* Tests for assistantParts: the pure function that rebuilds the persisted
* UIMessage parts for a turn. Its output decides whether the conversation
* replays correctly on the next turn. The crux: a tool-call WITHOUT a paired
* result must become a synthetic `output-error` part, so convertToModelMessages
* never throws MissingToolResultsError. This test MUST fail on pre-fix logic
* that persisted a bare input-available call.
*/
describe('assistantParts', () => {
type AnyPart = Record<string, unknown>;
it('emits output-available for a tool-call WITH a paired result', () => {
const steps = [
{
text: '',
toolCalls: [
{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
],
toolResults: [
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
],
},
];
const parts = assistantParts(steps, '') as AnyPart[];
const toolPart = parts.find((p) => p.type === 'tool-getPage');
expect(toolPart).toBeDefined();
expect(toolPart!.state).toBe('output-available');
expect(toolPart!.output).toEqual({ title: 'T' });
});
it('emits a synthetic output-error for an UNPAIRED tool-call (crux)', () => {
const steps = [
{
text: '',
toolCalls: [
{ toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } },
],
toolResults: [],
},
];
const parts = assistantParts(steps, '') as AnyPart[];
const toolPart = parts.find((p) => p.type === 'tool-insertNode');
expect(toolPart).toBeDefined();
// The unpaired call MUST become output-error (NOT input-available), so the
// rebuilt history is balanced for convertToModelMessages on the next turn.
expect(toolPart!.state).toBe('output-error');
expect(toolPart!.errorText).toBeTruthy();
expect(toolPart).not.toHaveProperty('output');
});
it('skips malformed tool-calls (missing toolName or toolCallId)', () => {
const steps = [
{
text: '',
toolCalls: [
{ toolCallId: 'c1', input: {} }, // no toolName
{ toolName: 'getPage', input: {} }, // no toolCallId
],
toolResults: [],
},
];
const parts = assistantParts(steps, '') as AnyPart[];
const toolParts = parts.filter(
(p) =>
typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
);
expect(toolParts).toHaveLength(0);
});
it('uses per-step text when present', () => {
const steps = [{ text: 'hello', toolCalls: [], toolResults: [] }];
const parts = assistantParts(steps, 'fallback-ignored') as AnyPart[];
expect(parts).toEqual([{ type: 'text', text: 'hello' }]);
});
it('falls back to a single text part when no step text', () => {
const parts = assistantParts([], 'final answer') as AnyPart[];
expect(parts).toEqual([{ type: 'text', text: 'final answer' }]);
});
});
describe('serializeSteps', () => {
it('returns null when there are no calls or results', () => {
expect(serializeSteps([])).toBeNull();
});
it('flattens calls and results into a compact trace', () => {
const trace = serializeSteps([
{
toolCalls: [{ toolName: 'getPage', input: { id: 'p1' } }],
toolResults: [{ toolName: 'getPage', output: { title: 'T' } }],
},
]) as Array<Record<string, unknown>>;
expect(trace).toHaveLength(2);
expect(trace[0]).toEqual({ toolName: 'getPage', input: { id: 'p1' } });
expect(trace[1]).toEqual({ toolName: 'getPage', output: { title: 'T' } });
});
});
describe('rowToUiMessage', () => {
it('prefers metadata.parts over content', () => {
const row = {
id: 'm1',
role: 'assistant',
content: 'plain text',
metadata: { parts: [{ type: 'text', text: 'rich part' }] },
} as unknown as AiChatMessage;
const ui = rowToUiMessage(row);
expect(ui.role).toBe('assistant');
expect(ui.parts).toEqual([{ type: 'text', text: 'rich part' }]);
});
it('falls back to a single text part from content when no metadata.parts', () => {
const row = {
id: 'm2',
role: 'user',
content: 'hi there',
metadata: null,
} as unknown as AiChatMessage;
const ui = rowToUiMessage(row);
expect(ui.role).toBe('user');
expect(ui.parts).toEqual([{ type: 'text', text: 'hi there' }]);
});
});
/**
* Unit tests for prepareAgentStep: the pure helper that decides per-step
* overrides for the agent loop. Early steps return undefined (default
* behavior); the final allowed step (stepNumber === MAX_AGENT_STEPS - 1) forces
* a text-only synthesis answer (toolChoice 'none') with the FINAL_STEP_INSTRUCTION
* appended onto — not replacing — the original system prompt.
*/
describe('prepareAgentStep', () => {
it('returns undefined for the first step', () => {
expect(prepareAgentStep(0, 'SYS')).toBeUndefined();
});
it('returns undefined for a non-final step (just before the last)', () => {
expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
});
it('forces a text-only synthesis on the final allowed step', () => {
const result = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
expect(result).toBeDefined();
expect(result?.toolChoice).toBe('none');
// The original persona is preserved (prefix), not replaced.
expect(result?.system.startsWith('SYS')).toBe(true);
// The synthesis instruction is appended.
expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
});
});
/**
* flushAssistant (#183): the PURE row builder behind the step-granular durable
* write path. It runs identically for the upfront insert (empty steps,
* 'streaming'), every per-step update, and the terminal finalize — so a future
* background worker can call the same function. These tests pin the four status
* shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on
* (per-step text + tool parts via assistantParts, in-progress text appended).
*/
describe('flushAssistant', () => {
type AnyPart = Record<string, unknown>;
const toolStep = {
text: 'looked it up',
toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
toolResults: [
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
],
};
it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
const f = flushAssistant([], '', 'streaming');
expect(f.status).toBe('streaming');
expect(f.content).toBe('');
expect(f.toolCalls).toBeNull();
expect(f.metadata.parts).toEqual([]);
// No finishReason while streaming (it is not a terminal state).
expect('finishReason' in f.metadata).toBe(false);
});
it('streaming update folds in finished steps but keeps status streaming', () => {
const f = flushAssistant([toolStep], '', 'streaming');
expect(f.status).toBe('streaming');
expect(f.content).toBe('looked it up');
const parts = f.metadata.parts as AnyPart[];
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
const toolPart = parts.find((p) => p.type === 'tool-getPage');
expect(toolPart!.state).toBe('output-available');
expect(f.toolCalls).not.toBeNull();
});
it('completed: attaches finishReason + normalized usage + contextTokens', () => {
const f = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop',
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
contextTokens: 15,
});
expect(f.status).toBe('completed');
expect(f.metadata.finishReason).toBe('stop');
expect(f.metadata.usage).toEqual({
inputTokens: 10,
outputTokens: 5,
totalTokens: 15,
reasoningTokens: undefined,
});
expect(f.metadata.contextTokens).toBe(15);
});
it('error: records the error and a derived finishReason', () => {
const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
expect(f.status).toBe('error');
expect(f.content).toBe('partial answer');
expect(f.metadata.error).toBe('boom');
// Derives finishReason from the terminal status when none is supplied.
expect(f.metadata.finishReason).toBe('error');
expect(f.metadata.parts).toEqual([
{ type: 'text', text: 'partial answer' },
]);
});
it('aborted: in-progress text appended last, no error key', () => {
const f = flushAssistant([toolStep], ' and then', 'aborted');
expect(f.status).toBe('aborted');
expect(f.metadata.finishReason).toBe('aborted');
expect('error' in f.metadata).toBe(false);
expect(f.content).toBe('looked it up and then');
const parts = f.metadata.parts as AnyPart[];
expect(parts[parts.length - 1]).toEqual({
type: 'text',
text: ' and then',
});
});
it('combines a finished tool step with trailing in-progress text (error path)', () => {
// The error path captures the PARTIAL answer the user already saw: each
// finished step's text + tool parts, then the in-progress step's text last.
const flushed = flushAssistant([toolStep], ' and then', 'error', {
error: 'boom',
});
const parts = flushed.metadata.parts as AnyPart[];
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
const toolPart = parts.find((p) => p.type === 'tool-getPage');
expect(toolPart!.state).toBe('output-available');
// In-progress text appended LAST so the parts match the stream order.
expect(parts[parts.length - 1]).toEqual({
type: 'text',
text: ' and then',
});
expect(flushed.content).toBe('looked it up and then');
expect(flushed.toolCalls).not.toBeNull();
expect(flushed.metadata.error).toBe('boom');
});
});
/**
* chatStreamMetadata: attach metadata to the streamed assistant UI message per
* part type — `chatId` on `start` (so the client adopts the real created chat id
* at the first chunk — see #137), and AUTHORITATIVE usage (incl. reasoning
* tokens) on `finish-step` and `finish` so the client's live token counter snaps
* to exact at each step/turn boundary.
*/
describe('chatStreamMetadata', () => {
it('returns { chatId } for the start part', () => {
expect(chatStreamMetadata({ type: 'start' }, 'chat-1')).toEqual({
chatId: 'chat-1',
});
});
it('returns the CUMULATIVE step usage passed in for the finish-step part', () => {
// finish-step usage is per-step in v6; the caller accumulates and passes the
// running sum, which this just wraps.
expect(
chatStreamMetadata(
{ type: 'finish-step', usage: { outputTokens: 100 } },
'chat-1',
{
inputTokens: 500,
outputTokens: 220,
totalTokens: 720,
reasoningTokens: 30,
},
),
).toEqual({
usage: {
inputTokens: 500,
outputTokens: 220,
totalTokens: 720,
reasoningTokens: 30,
},
});
});
it('returns turn usage for the finish part (reasoning from deprecated top-level field)', () => {
expect(
chatStreamMetadata(
{
type: 'finish',
totalUsage: {
inputTokens: 1000,
outputTokens: 250,
totalTokens: 1250,
reasoningTokens: 50,
},
},
'chat-1',
),
).toEqual({
usage: {
inputTokens: 1000,
outputTokens: 250,
totalTokens: 1250,
reasoningTokens: 50,
},
});
});
it('prefers outputTokenDetails.reasoningTokens over the deprecated field (finish)', () => {
expect(
chatStreamMetadata(
{
type: 'finish',
totalUsage: {
outputTokens: 100,
reasoningTokens: 5,
outputTokenDetails: { reasoningTokens: 30 },
},
},
'chat-1',
),
).toEqual({
usage: {
inputTokens: undefined,
outputTokens: 100,
totalTokens: undefined,
reasoningTokens: 30,
},
});
});
it('returns undefined for a finish-step with no accumulated usage', () => {
expect(
chatStreamMetadata({ type: 'finish-step' }, 'chat-1'),
).toBeUndefined();
});
it('returns undefined for an unrelated part (e.g. text-delta)', () => {
expect(
chatStreamMetadata({ type: 'text-delta' }, 'chat-1'),
).toBeUndefined();
});
});
/**
* accumulateStepUsage: sums per-step usage into a running cumulative total so the
* client never sees the live counter jump DOWN on a multi-step agent turn (#151).
*/
describe('accumulateStepUsage', () => {
it('sums every field across two steps', () => {
expect(
accumulateStepUsage(
{
inputTokens: 500,
outputTokens: 100,
totalTokens: 600,
reasoningTokens: 30,
},
{
inputTokens: 520,
outputTokens: 80,
totalTokens: 600,
reasoningTokens: 10,
},
),
).toEqual({
inputTokens: 1020,
outputTokens: 180,
totalTokens: 1200,
reasoningTokens: 40,
});
});
it('returns the step as-is when there is no accumulator yet', () => {
expect(accumulateStepUsage(undefined, { outputTokens: 10 })).toEqual({
outputTokens: 10,
});
});
it('returns the accumulator unchanged when the step usage is absent', () => {
const acc = { outputTokens: 10 };
expect(accumulateStepUsage(acc, undefined)).toBe(acc);
});
it('returns undefined when both sides are absent', () => {
expect(accumulateStepUsage(undefined, undefined)).toBeUndefined();
});
it('keeps a field undefined only when neither side has it', () => {
expect(
accumulateStepUsage({ outputTokens: 5 }, { outputTokens: 7 }),
).toEqual({
inputTokens: undefined,
outputTokens: 12,
totalTokens: undefined,
reasoningTokens: undefined,
});
});
});
/**
* Contract test for the #180 wiring in AiChatService.handle: the external MCP
* toolset must be built BEFORE the system prompt, and its per-server guidance
* threaded into buildSystemPrompt({ mcpInstructions }). The full streaming
* handle() is not unit-testable, so this reproduces the exact prompt-build call
* the service makes with a connected-server toolset and asserts the guidance is
* present. The toolsFor->buildSystemPrompt ordering is additionally enforced at
* compile time (the prompt input now consumes external.instructions).
*/
describe('AiChatService system prompt wiring (#180)', () => {
const workspace = { name: 'Acme' } as unknown as Workspace;
it('includes the external MCP server instructions in the built system prompt', () => {
// Shape returned by mcpClients.toolsFor (only `instructions` matters here).
const external: Pick<
Awaited<ReturnType<McpClientsService['toolsFor']>>,
'instructions'
> = {
instructions: [
{
serverName: 'Tavily',
toolPrefix: 'tavily',
instructions: 'Prefer tavily_search for current events.',
},
],
};
// Exactly the call the service makes after building the external toolset.
const system = buildSystemPrompt({
workspace,
adminPrompt: 'persona',
mcpInstructions: external.instructions,
});
expect(system).toContain('<mcp_tooling');
expect(system).toContain('Tavily');
expect(system).toContain('tavily_*');
expect(system).toContain('Prefer tavily_search for current events.');
});
it('renders no MCP block when there are no external servers (empty instructions)', () => {
const system = buildSystemPrompt({
workspace,
adminPrompt: 'persona',
mcpInstructions: [],
});
expect(system).not.toContain('<mcp_tooling');
});
});
/**
* resolveOpenPageContext: the open page the client sends is attacker-controllable
* (id AND title), so the service must validate the id against the DB and take the
* title from the DB row — never echo the client title (#159, AI edits the wrong
* page). Built with Object.create so the test exercises the real method without
* the service's full dependency graph (the constructor only assigns fields).
*/
describe('AiChatService.resolveOpenPageContext (#159 current-page validation)', () => {
const ws = { id: 'ws-1' } as Workspace;
const user = { id: 'u-1' } as any;
function makeService(opts: {
page?: { id: string; workspaceId: string; title: string | null } | null;
canView?: boolean | 'throw-other';
}) {
const svc = Object.create(AiChatService.prototype) as AiChatService;
(svc as any).logger = { warn: () => {} };
(svc as any).pageRepo = {
findById: async () => opts.page ?? undefined,
};
(svc as any).pageAccess = {
validateCanView: async () => {
if (opts.canView === 'throw-other') throw new Error('db down');
if (opts.canView === false) throw new ForbiddenException();
return true;
},
};
return svc;
}
const call = (svc: AiChatService, openPage: any) =>
(svc as any).resolveOpenPageContext(openPage, ws, user) as Promise<{
id: string;
title: string;
} | null>;
it('returns null when no page is open (no id)', async () => {
const svc = makeService({});
expect(await call(svc, null)).toBeNull();
expect(await call(svc, {})).toBeNull();
expect(await call(svc, { title: 'spoofed' })).toBeNull();
});
it('returns null when the page does not exist', async () => {
const svc = makeService({ page: null });
expect(await call(svc, { id: 'p-x' })).toBeNull();
});
it('returns null for a page in a DIFFERENT workspace (tenant isolation)', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-OTHER', title: 'Secret' },
});
expect(await call(svc, { id: 'p-1' })).toBeNull();
});
it('returns null when the user may not view the page (Forbidden)', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: 'Restricted' },
canView: false,
});
expect(await call(svc, { id: 'p-1' })).toBeNull();
});
it('returns null (fail-closed) on a non-Forbidden access-check fault', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: 'X' },
canView: 'throw-other',
});
expect(await call(svc, { id: 'p-1' })).toBeNull();
});
it('uses the AUTHORITATIVE DB title, IGNORING the client-supplied title', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: 'Real Title B' },
canView: true,
});
// The client claims it is on "Page A" but the id points at page B.
const result = await call(svc, { id: 'p-1', title: 'Page A' });
expect(result).toEqual({ id: 'p-1', title: 'Real Title B' });
});
it('coerces a null DB title to an empty string', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: null },
canView: true,
});
expect(await call(svc, { id: 'p-1' })).toEqual({ id: 'p-1', title: '' });
});
});