Files
gitmost/apps/server/src/core/ai-chat/ai-chat.service.spec.ts
T
agent_coder 6e681a9c66 fix(#274): escape page_changed injection surface, drop dead content_hash (review F1-F5)
F1: escape the collaborative page title before interpolating into
    <page_changed page="..."> (and the pre-existing openedPage attr) — strip
    <>" and collapse whitespace, so a crafted title can't break out of the
    attribute into the system prompt (cross-user injection).
F2: neutralize <page_changed>/</page_changed> occurrences inside the diff body
    so a crafted line can't close the block early.
F3: remove the dead content_hash column (written every turn, never read) —
    migration, repo, service hashing + crypto import, db.d.ts, spec asserts.
F4: test the best-effort catch branches (detectPageChange / snapshotOpenPage
    swallow errors and don't break the turn).
F5: soften the overstated 'diff cannot smuggle instructions' comment to
    defense-in-depth framing referencing the F1/F2 mitigations + safety sandwich.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-02 05:43:46 +03:00

975 lines
34 KiB
TypeScript

import { ForbiddenException } from '@nestjs/common';
import {
AiChatService,
compactToolOutput,
assistantParts,
serializeSteps,
rowToUiMessage,
prepareAgentStep,
flushAssistant,
chatStreamMetadata,
accumulateStepUsage,
isInterruptResume,
sameInstant,
MAX_AGENT_STEPS,
FINAL_STEP_INSTRUCTION,
} from './ai-chat.service';
import type { AiChatMessage, Workspace } from '@docmost/db/types/entity.types';
import { buildSystemPrompt } from './ai-chat.prompt';
import type { McpClientsService } from './external-mcp/mcp-clients.service';
/**
* Unit tests for compactToolOutput: the pure helper that shrinks LARGE tool
* outputs before they are persisted (and re-sent to the provider on later
* turns). The contract is: small outputs pass through unchanged (by identity);
* large outputs keep their shape and small scalar fields (id/title/pageId — the
* client reads these to render citations) while big payloads are truncated.
*/
describe('compactToolOutput', () => {
it('returns a small object unchanged (by identity)', () => {
const small = { id: 'p1', title: 'Hello', trashed: true };
expect(compactToolOutput(small)).toBe(small);
});
it('truncates a large getPage-shaped markdown body but keeps the title', () => {
const big = 'x'.repeat(20000);
const result = compactToolOutput({ title: 'T', markdown: big }) as {
title: string;
markdown: string;
};
// Shallow scalar field is preserved (citations depend on it).
expect(result.title).toBe('T');
// The big payload is shrunk far below the original size.
expect(result.markdown.length).toBeLessThan(20000);
expect(result.markdown).toContain('[truncated');
});
it('caps a long array and appends a single truncation marker', () => {
// 200 small objects, each padded so the total serialized size > 4000 bytes.
const long = Array.from({ length: 200 }, (_, i) => ({
id: 'n' + i,
pad: 'y'.repeat(40),
}));
const result = compactToolOutput(long) as Array<Record<string, unknown>>;
// 50 kept + 1 marker.
expect(result).toHaveLength(51);
const marker = result[result.length - 1];
expect(marker._truncated).toBe(true);
expect(marker.omittedItems).toBe(150);
});
it('passes through null, undefined and primitives unchanged', () => {
expect(compactToolOutput(null)).toBeNull();
expect(compactToolOutput(undefined)).toBeUndefined();
expect(compactToolOutput(42)).toBe(42);
});
it('replaces a subtree beyond the depth cap with a marker', () => {
// Build a deeply nested object (> TOOL_OUTPUT_MAX_DEPTH levels) with a big
// string at the bottom so the total serialized size exceeds the threshold.
let nested: Record<string, unknown> = { leaf: 'z'.repeat(8000) };
for (let i = 0; i < 20; i++) {
nested = { child: nested };
}
const result = compactToolOutput(nested);
expect(JSON.stringify(result)).toContain('nested content omitted');
});
it('produces a much smaller JSON than the original for a large input', () => {
const big = 'x'.repeat(20000);
const original = { title: 'T', markdown: big };
const result = compactToolOutput(original);
const originalBytes = Buffer.byteLength(JSON.stringify(original), 'utf8');
const compactedBytes = Buffer.byteLength(JSON.stringify(result), 'utf8');
expect(compactedBytes).toBeLessThan(originalBytes / 10);
});
});
/**
* Tests for assistantParts: the pure function that rebuilds the persisted
* UIMessage parts for a turn. Its output decides whether the conversation
* replays correctly on the next turn. The crux: a tool-call WITHOUT a paired
* result must become a synthetic `output-error` part, so convertToModelMessages
* never throws MissingToolResultsError. This test MUST fail on pre-fix logic
* that persisted a bare input-available call.
*/
describe('assistantParts', () => {
type AnyPart = Record<string, unknown>;
it('emits output-available for a tool-call WITH a paired result', () => {
const steps = [
{
text: '',
toolCalls: [
{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
],
toolResults: [
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
],
},
];
const parts = assistantParts(steps, '') as AnyPart[];
const toolPart = parts.find((p) => p.type === 'tool-getPage');
expect(toolPart).toBeDefined();
expect(toolPart!.state).toBe('output-available');
expect(toolPart!.output).toEqual({ title: 'T' });
});
it('emits a synthetic output-error for an UNPAIRED tool-call (crux)', () => {
const steps = [
{
text: '',
toolCalls: [
{ toolCallId: 'c9', toolName: 'insertNode', input: { node: {} } },
],
toolResults: [],
},
];
const parts = assistantParts(steps, '') as AnyPart[];
const toolPart = parts.find((p) => p.type === 'tool-insertNode');
expect(toolPart).toBeDefined();
// The unpaired call MUST become output-error (NOT input-available), so the
// rebuilt history is balanced for convertToModelMessages on the next turn.
expect(toolPart!.state).toBe('output-error');
expect(toolPart!.errorText).toBeTruthy();
expect(toolPart).not.toHaveProperty('output');
});
it('skips malformed tool-calls (missing toolName or toolCallId)', () => {
const steps = [
{
text: '',
toolCalls: [
{ toolCallId: 'c1', input: {} }, // no toolName
{ toolName: 'getPage', input: {} }, // no toolCallId
],
toolResults: [],
},
];
const parts = assistantParts(steps, '') as AnyPart[];
const toolParts = parts.filter(
(p) =>
typeof p.type === 'string' && (p.type as string).startsWith('tool-'),
);
expect(toolParts).toHaveLength(0);
});
it('uses per-step text when present', () => {
const steps = [{ text: 'hello', toolCalls: [], toolResults: [] }];
const parts = assistantParts(steps, 'fallback-ignored') as AnyPart[];
expect(parts).toEqual([{ type: 'text', text: 'hello' }]);
});
it('falls back to a single text part when no step text', () => {
const parts = assistantParts([], 'final answer') as AnyPart[];
expect(parts).toEqual([{ type: 'text', text: 'final answer' }]);
});
});
describe('serializeSteps', () => {
it('returns null when there are no calls or results', () => {
expect(serializeSteps([])).toBeNull();
});
it('flattens calls and results into a compact trace', () => {
const trace = serializeSteps([
{
toolCalls: [{ toolName: 'getPage', input: { id: 'p1' } }],
toolResults: [{ toolName: 'getPage', output: { title: 'T' } }],
},
]) as Array<Record<string, unknown>>;
expect(trace).toHaveLength(2);
expect(trace[0]).toEqual({ toolName: 'getPage', input: { id: 'p1' } });
expect(trace[1]).toEqual({ toolName: 'getPage', output: { title: 'T' } });
});
});
describe('rowToUiMessage', () => {
it('prefers metadata.parts over content', () => {
const row = {
id: 'm1',
role: 'assistant',
content: 'plain text',
metadata: { parts: [{ type: 'text', text: 'rich part' }] },
} as unknown as AiChatMessage;
const ui = rowToUiMessage(row);
expect(ui.role).toBe('assistant');
expect(ui.parts).toEqual([{ type: 'text', text: 'rich part' }]);
});
it('falls back to a single text part from content when no metadata.parts', () => {
const row = {
id: 'm2',
role: 'user',
content: 'hi there',
metadata: null,
} as unknown as AiChatMessage;
const ui = rowToUiMessage(row);
expect(ui.role).toBe('user');
expect(ui.parts).toEqual([{ type: 'text', text: 'hi there' }]);
});
});
/**
* Unit tests for prepareAgentStep: the pure helper that decides per-step
* overrides for the agent loop. Early steps return undefined (default
* behavior); the final allowed step (stepNumber === MAX_AGENT_STEPS - 1) forces
* a text-only synthesis answer (toolChoice 'none') with the FINAL_STEP_INSTRUCTION
* appended onto — not replacing — the original system prompt.
*/
describe('prepareAgentStep', () => {
it('returns undefined for the first step', () => {
expect(prepareAgentStep(0, 'SYS')).toBeUndefined();
});
it('returns undefined for a non-final step (just before the last)', () => {
expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
});
it('forces a text-only synthesis on the final allowed step', () => {
const result = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
expect(result).toBeDefined();
expect(result?.toolChoice).toBe('none');
// The original persona is preserved (prefix), not replaced.
expect(result?.system.startsWith('SYS')).toBe(true);
// The synthesis instruction is appended.
expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
});
});
/**
* flushAssistant (#183): the PURE row builder behind the step-granular durable
* write path. It runs identically for the upfront insert (empty steps,
* 'streaming'), every per-step update, and the terminal finalize — so a future
* background worker can call the same function. These tests pin the four status
* shapes and the `metadata.parts` shape that rowToUiMessage/findAllByChat depend on
* (per-step text + tool parts via assistantParts, in-progress text appended).
*/
describe('flushAssistant', () => {
type AnyPart = Record<string, unknown>;
const toolStep = {
text: 'looked it up',
toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
toolResults: [
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
],
};
it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
const f = flushAssistant([], '', 'streaming');
expect(f.status).toBe('streaming');
expect(f.content).toBe('');
expect(f.toolCalls).toBeNull();
expect(f.metadata.parts).toEqual([]);
// No finishReason while streaming (it is not a terminal state).
expect('finishReason' in f.metadata).toBe(false);
});
it('streaming update folds in finished steps but keeps status streaming', () => {
const f = flushAssistant([toolStep], '', 'streaming');
expect(f.status).toBe('streaming');
expect(f.content).toBe('looked it up');
const parts = f.metadata.parts as AnyPart[];
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
const toolPart = parts.find((p) => p.type === 'tool-getPage');
expect(toolPart!.state).toBe('output-available');
expect(f.toolCalls).not.toBeNull();
});
it('completed: attaches finishReason + normalized usage + contextTokens + maxContextTokens', () => {
const f = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop',
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
contextTokens: 15,
maxContextTokens: 200000,
});
expect(f.status).toBe('completed');
expect(f.metadata.finishReason).toBe('stop');
expect(f.metadata.usage).toEqual({
inputTokens: 10,
outputTokens: 5,
totalTokens: 15,
reasoningTokens: undefined,
});
expect(f.metadata.contextTokens).toBe(15);
expect(f.metadata.maxContextTokens).toBe(200000);
});
it('completed: omits maxContextTokens when unset or 0', () => {
// No maxContextTokens in the extra (admin set no context window).
const f = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop',
contextTokens: 15,
});
expect('maxContextTokens' in f.metadata).toBe(false);
// Explicit 0 is treated the same as unset (no limit -> key omitted).
const f0 = flushAssistant([toolStep], '', 'completed', {
finishReason: 'stop',
contextTokens: 15,
maxContextTokens: 0,
});
expect('maxContextTokens' in f0.metadata).toBe(false);
});
it('error: records the error and a derived finishReason', () => {
const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
expect(f.status).toBe('error');
expect(f.content).toBe('partial answer');
expect(f.metadata.error).toBe('boom');
// Derives finishReason from the terminal status when none is supplied.
expect(f.metadata.finishReason).toBe('error');
expect(f.metadata.parts).toEqual([
{ type: 'text', text: 'partial answer' },
]);
});
it('aborted: in-progress text appended last, no error key', () => {
const f = flushAssistant([toolStep], ' and then', 'aborted');
expect(f.status).toBe('aborted');
expect(f.metadata.finishReason).toBe('aborted');
expect('error' in f.metadata).toBe(false);
expect(f.content).toBe('looked it up and then');
const parts = f.metadata.parts as AnyPart[];
expect(parts[parts.length - 1]).toEqual({
type: 'text',
text: ' and then',
});
});
it('combines a finished tool step with trailing in-progress text (error path)', () => {
// The error path captures the PARTIAL answer the user already saw: each
// finished step's text + tool parts, then the in-progress step's text last.
const flushed = flushAssistant([toolStep], ' and then', 'error', {
error: 'boom',
});
const parts = flushed.metadata.parts as AnyPart[];
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
const toolPart = parts.find((p) => p.type === 'tool-getPage');
expect(toolPart!.state).toBe('output-available');
// In-progress text appended LAST so the parts match the stream order.
expect(parts[parts.length - 1]).toEqual({
type: 'text',
text: ' and then',
});
expect(flushed.content).toBe('looked it up and then');
expect(flushed.toolCalls).not.toBeNull();
expect(flushed.metadata.error).toBe('boom');
});
});
/**
* chatStreamMetadata: attach metadata to the streamed assistant UI message per
* part type — `chatId` on `start` (so the client adopts the real created chat id
* at the first chunk — see #137), and AUTHORITATIVE usage (incl. reasoning
* tokens) on `finish-step` and `finish` so the client's live token counter snaps
* to exact at each step/turn boundary.
*/
describe('chatStreamMetadata', () => {
it('returns { chatId } for the start part', () => {
expect(chatStreamMetadata({ type: 'start' }, 'chat-1')).toEqual({
chatId: 'chat-1',
});
});
it('returns the CUMULATIVE step usage passed in for the finish-step part', () => {
// finish-step usage is per-step in v6; the caller accumulates and passes the
// running sum, which this just wraps.
expect(
chatStreamMetadata(
{ type: 'finish-step', usage: { outputTokens: 100 } },
'chat-1',
{
inputTokens: 500,
outputTokens: 220,
totalTokens: 720,
reasoningTokens: 30,
},
),
).toEqual({
usage: {
inputTokens: 500,
outputTokens: 220,
totalTokens: 720,
reasoningTokens: 30,
},
});
});
it('returns turn usage for the finish part (reasoning from deprecated top-level field)', () => {
expect(
chatStreamMetadata(
{
type: 'finish',
totalUsage: {
inputTokens: 1000,
outputTokens: 250,
totalTokens: 1250,
reasoningTokens: 50,
},
},
'chat-1',
),
).toEqual({
usage: {
inputTokens: 1000,
outputTokens: 250,
totalTokens: 1250,
reasoningTokens: 50,
},
});
});
it('prefers outputTokenDetails.reasoningTokens over the deprecated field (finish)', () => {
expect(
chatStreamMetadata(
{
type: 'finish',
totalUsage: {
outputTokens: 100,
reasoningTokens: 5,
outputTokenDetails: { reasoningTokens: 30 },
},
},
'chat-1',
),
).toEqual({
usage: {
inputTokens: undefined,
outputTokens: 100,
totalTokens: undefined,
reasoningTokens: 30,
},
});
});
it('returns undefined for a finish-step with no accumulated usage', () => {
expect(
chatStreamMetadata({ type: 'finish-step' }, 'chat-1'),
).toBeUndefined();
});
it('returns undefined for an unrelated part (e.g. text-delta)', () => {
expect(
chatStreamMetadata({ type: 'text-delta' }, 'chat-1'),
).toBeUndefined();
});
});
/**
* accumulateStepUsage: sums per-step usage into a running cumulative total so the
* client never sees the live counter jump DOWN on a multi-step agent turn (#151).
*/
describe('accumulateStepUsage', () => {
it('sums every field across two steps', () => {
expect(
accumulateStepUsage(
{
inputTokens: 500,
outputTokens: 100,
totalTokens: 600,
reasoningTokens: 30,
},
{
inputTokens: 520,
outputTokens: 80,
totalTokens: 600,
reasoningTokens: 10,
},
),
).toEqual({
inputTokens: 1020,
outputTokens: 180,
totalTokens: 1200,
reasoningTokens: 40,
});
});
it('returns the step as-is when there is no accumulator yet', () => {
expect(accumulateStepUsage(undefined, { outputTokens: 10 })).toEqual({
outputTokens: 10,
});
});
it('returns the accumulator unchanged when the step usage is absent', () => {
const acc = { outputTokens: 10 };
expect(accumulateStepUsage(acc, undefined)).toBe(acc);
});
it('returns undefined when both sides are absent', () => {
expect(accumulateStepUsage(undefined, undefined)).toBeUndefined();
});
it('keeps a field undefined only when neither side has it', () => {
expect(
accumulateStepUsage({ outputTokens: 5 }, { outputTokens: 7 }),
).toEqual({
inputTokens: undefined,
outputTokens: 12,
totalTokens: undefined,
reasoningTokens: undefined,
});
});
});
/**
* Contract test for the #180 wiring in AiChatService.handle: the external MCP
* toolset must be built BEFORE the system prompt, and its per-server guidance
* threaded into buildSystemPrompt({ mcpInstructions }). The full streaming
* handle() is not unit-testable, so this reproduces the exact prompt-build call
* the service makes with a connected-server toolset and asserts the guidance is
* present. The toolsFor->buildSystemPrompt ordering is additionally enforced at
* compile time (the prompt input now consumes external.instructions).
*/
describe('AiChatService system prompt wiring (#180)', () => {
const workspace = { name: 'Acme' } as unknown as Workspace;
it('includes the external MCP server instructions in the built system prompt', () => {
// Shape returned by mcpClients.toolsFor (only `instructions` matters here).
const external: Pick<
Awaited<ReturnType<McpClientsService['toolsFor']>>,
'instructions'
> = {
instructions: [
{
serverName: 'Tavily',
toolPrefix: 'tavily',
instructions: 'Prefer tavily_search for current events.',
},
],
};
// Exactly the call the service makes after building the external toolset.
const system = buildSystemPrompt({
workspace,
adminPrompt: 'persona',
mcpInstructions: external.instructions,
});
expect(system).toContain('<mcp_tooling');
expect(system).toContain('Tavily');
expect(system).toContain('tavily_*');
expect(system).toContain('Prefer tavily_search for current events.');
});
it('renders no MCP block when there are no external servers (empty instructions)', () => {
const system = buildSystemPrompt({
workspace,
adminPrompt: 'persona',
mcpInstructions: [],
});
expect(system).not.toContain('<mcp_tooling');
});
});
/**
* resolveOpenPageContext: the open page the client sends is attacker-controllable
* (id AND title), so the service must validate the id against the DB and take the
* title from the DB row — never echo the client title (#159, AI edits the wrong
* page). Built with Object.create so the test exercises the real method without
* the service's full dependency graph (the constructor only assigns fields).
*/
describe('AiChatService.resolveOpenPageContext (#159 current-page validation)', () => {
const ws = { id: 'ws-1' } as Workspace;
const user = { id: 'u-1' } as any;
function makeService(opts: {
page?: {
id: string;
workspaceId: string;
title: string | null;
updatedAt?: Date;
} | null;
canView?: boolean | 'throw-other';
}) {
const svc = Object.create(AiChatService.prototype) as AiChatService;
(svc as any).logger = { warn: () => {} };
(svc as any).pageRepo = {
findById: async () => opts.page ?? undefined,
};
(svc as any).pageAccess = {
validateCanView: async () => {
if (opts.canView === 'throw-other') throw new Error('db down');
if (opts.canView === false) throw new ForbiddenException();
return true;
},
};
return svc;
}
const call = (svc: AiChatService, openPage: any) =>
(svc as any).resolveOpenPageContext(openPage, ws, user) as Promise<{
id: string;
title: string;
updatedAt: Date;
} | null>;
it('returns null when no page is open (no id)', async () => {
const svc = makeService({});
expect(await call(svc, null)).toBeNull();
expect(await call(svc, {})).toBeNull();
expect(await call(svc, { title: 'spoofed' })).toBeNull();
});
it('returns null when the page does not exist', async () => {
const svc = makeService({ page: null });
expect(await call(svc, { id: 'p-x' })).toBeNull();
});
it('returns null for a page in a DIFFERENT workspace (tenant isolation)', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-OTHER', title: 'Secret' },
});
expect(await call(svc, { id: 'p-1' })).toBeNull();
});
it('returns null when the user may not view the page (Forbidden)', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: 'Restricted' },
canView: false,
});
expect(await call(svc, { id: 'p-1' })).toBeNull();
});
it('returns null (fail-closed) on a non-Forbidden access-check fault', async () => {
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: 'X' },
canView: 'throw-other',
});
expect(await call(svc, { id: 'p-1' })).toBeNull();
});
it('uses the AUTHORITATIVE DB title + updatedAt, IGNORING the client-supplied title', async () => {
const updatedAt = new Date('2026-07-02T10:00:00Z');
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: 'Real Title B', updatedAt },
canView: true,
});
// The client claims it is on "Page A" but the id points at page B.
const result = await call(svc, { id: 'p-1', title: 'Page A' });
// updatedAt (#274 page-change fast path) is carried through from the DB row.
expect(result).toEqual({ id: 'p-1', title: 'Real Title B', updatedAt });
});
it('coerces a null DB title to an empty string', async () => {
const updatedAt = new Date('2026-07-02T10:00:00Z');
const svc = makeService({
page: { id: 'p-1', workspaceId: 'ws-1', title: null, updatedAt },
canView: true,
});
expect(await call(svc, { id: 'p-1' })).toEqual({
id: 'p-1',
title: '',
updatedAt,
});
});
});
/**
* sameInstant (#274 page-change fast path): equal instants => the open page is
* untouched since the snapshot, so detection can skip the render + diff. A
* missing/invalid timestamp must fall through (return false) so a bad value never
* causes a false "nothing changed" skip that would lose a human edit.
*/
describe('sameInstant', () => {
it('true for identical instants (Date and equivalent string)', () => {
const d = new Date('2026-07-02T10:00:00Z');
expect(sameInstant(d, new Date(d.getTime()))).toBe(true);
expect(sameInstant(d, '2026-07-02T10:00:00.000Z')).toBe(true);
});
it('false for different instants', () => {
expect(
sameInstant(
new Date('2026-07-02T10:00:00Z'),
new Date('2026-07-02T10:00:01Z'),
),
).toBe(false);
});
it('false when either side is null/undefined/invalid', () => {
const d = new Date('2026-07-02T10:00:00Z');
expect(sameInstant(null, d)).toBe(false);
expect(sameInstant(d, undefined)).toBe(false);
expect(sameInstant(d, 'not-a-date')).toBe(false);
});
});
/**
* Page-change lifecycle (#274): detectPageChange (turn start) + snapshotOpenPage
* (turn end) exercised with in-memory fakes (Object.create — no Nest graph, no
* DB). Covers detection happy path / no-change / first-turn-seed-only / fast
* path, the snapshot seed + deleted-page skip, and — the key regression — the
* abort/error branch: after an aborted turn where the AGENT edited the page, the
* snapshot must advance so the next turn does NOT mis-report the agent's own edit
* as a user edit.
*/
describe('AiChatService page-change lifecycle (#274)', () => {
const workspace = { id: 'ws-1' } as Workspace;
const user = { id: 'u-1' } as any;
const sessionId = 'sess-1';
const T0 = new Date('2026-07-02T10:00:00Z');
const T1 = new Date('2026-07-02T10:05:00Z');
function makeService(opts: {
snapshot?: { contentMd: string; pageUpdatedAt: Date };
exportMd?: string;
// pageRepo.findById result used by snapshotOpenPage. `null` models a deleted
// page; omitted defaults to a same-workspace page at T1.
page?: { workspaceId: string; updatedAt: Date } | null;
}) {
const store = new Map<string, any>();
if (opts.snapshot) {
store.set('c1|p1', {
chatId: 'c1',
pageId: 'p1',
workspaceId: 'ws-1',
...opts.snapshot,
});
}
// Mutable so a test can reconfigure between the abort-snapshot phase and the
// next-turn detect phase.
const state = {
exportMd: opts.exportMd ?? '',
page:
opts.page === undefined
? { workspaceId: 'ws-1', updatedAt: T1 }
: opts.page,
};
const exportCalls: string[] = [];
const svc = Object.create(AiChatService.prototype) as AiChatService;
(svc as any).logger = { warn: () => {}, error: () => {} };
(svc as any).aiChatPageSnapshotRepo = {
findByChatPage: async (chatId: string, pageId: string) =>
store.get(`${chatId}|${pageId}`),
upsert: async (v: any) => {
store.set(`${v.chatId}|${v.pageId}`, { ...v });
return v;
},
};
(svc as any).tools = {
exportPageMarkdown: async (
_u: unknown,
_s: unknown,
_ws: unknown,
_c: unknown,
pageId: string,
) => {
exportCalls.push(pageId);
return state.exportMd;
},
};
(svc as any).pageRepo = { findById: async () => state.page };
return { svc, store, state, exportCalls };
}
const detect = (
svc: AiChatService,
openPage: { id: string; title: string; updatedAt: Date } | null,
) =>
(svc as any).detectPageChange(
'c1',
openPage,
workspace,
user,
sessionId,
) as Promise<{ title: string; diff: string } | null>;
const snapshot = (svc: AiChatService) =>
(svc as any).snapshotOpenPage(
'c1',
'p1',
workspace,
user,
sessionId,
) as Promise<void>;
it('detect: no note when the page is not open', async () => {
const { svc } = makeService({});
expect(await detect(svc, null)).toBeNull();
});
it('detect: first turn (no snapshot) seeds only, no note', async () => {
const { svc, exportCalls } = makeService({});
const res = await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T0 });
expect(res).toBeNull();
// No snapshot => no render/diff at all.
expect(exportCalls).toHaveLength(0);
});
it('detect: fast path skips render+diff when updatedAt is unchanged', async () => {
const { svc, exportCalls } = makeService({
snapshot: { contentMd: 'S0', pageUpdatedAt: T0 },
});
const res = await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T0 });
expect(res).toBeNull();
expect(exportCalls).toHaveLength(0);
});
it('detect: user edit between turns yields a titled note + diff', async () => {
const { svc } = makeService({
snapshot: { contentMd: '# Title\n\nold body', pageUpdatedAt: T0 },
exportMd: '# Title\n\nnew body',
});
const res = await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T1 });
expect(res).not.toBeNull();
expect(res!.title).toBe('Doc');
expect(res!.diff).toContain('-old body');
expect(res!.diff).toContain('+new body');
});
it('detect: no note when content is unchanged despite a bumped updatedAt', async () => {
const { svc } = makeService({
snapshot: { contentMd: 'same content', pageUpdatedAt: T0 },
exportMd: 'same content',
});
expect(
await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T1 }),
).toBeNull();
});
it('snapshot: seeds the current Markdown + page updatedAt', async () => {
const { svc, store } = makeService({
exportMd: 'Sa',
page: { workspaceId: 'ws-1', updatedAt: T1 },
});
await snapshot(svc);
const row = store.get('c1|p1');
expect(row.contentMd).toBe('Sa');
expect(row.pageUpdatedAt).toBe(T1);
});
it('snapshot: skips the write when the page was deleted during the turn', async () => {
const { svc, store } = makeService({ exportMd: 'X', page: null });
await snapshot(svc);
expect(store.get('c1|p1')).toBeUndefined();
});
it('detect: swallows a best-effort fault (export throws) and returns null', async () => {
// Snapshot present + a bumped updatedAt, so detection gets past the fast path
// and calls exportPageMarkdown — which throws. The catch must downgrade to
// "no note" (null) so the turn is never broken (#274 F4).
const { svc } = makeService({
snapshot: { contentMd: 'S0', pageUpdatedAt: T0 },
});
(svc as any).tools.exportPageMarkdown = async () => {
throw new Error('export failed');
};
expect(
await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T1 }),
).toBeNull();
});
it('detect: swallows a repo fault (findByChatPage throws) and returns null', async () => {
const { svc } = makeService({
snapshot: { contentMd: 'S0', pageUpdatedAt: T0 },
});
(svc as any).aiChatPageSnapshotRepo.findByChatPage = async () => {
throw new Error('db down');
};
expect(
await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T1 }),
).toBeNull();
});
it('snapshot: swallows a best-effort fault (upsert throws) and does not throw', async () => {
const { svc } = makeService({
exportMd: 'Sa',
page: { workspaceId: 'ws-1', updatedAt: T1 },
});
(svc as any).aiChatPageSnapshotRepo.upsert = async () => {
throw new Error('write failed');
};
await expect(snapshot(svc)).resolves.toBeUndefined();
});
it('abort branch: advancing the snapshot after an agent edit prevents a false note next turn', async () => {
// Previous turn ended with the page at S0 @ T0.
const { svc, store, state } = makeService({
snapshot: { contentMd: 'S0 body', pageUpdatedAt: T0 },
});
// This turn the AGENT edited the page (committed to the DB) to "Sa body",
// bumping updatedAt to T1, and then the turn ABORTED. The abort path runs the
// same snapshot, which must advance the snapshot to what the agent left.
state.exportMd = 'Sa body';
state.page = { workspaceId: 'ws-1', updatedAt: T1 };
await snapshot(svc);
expect(store.get('c1|p1').contentMd).toBe('Sa body');
expect(store.get('c1|p1').pageUpdatedAt).toBe(T1);
// Next turn: nobody edited further; the page is still Sa @ T1. The agent's OWN
// edit must NOT surface as a "user edited the page" note.
const res = await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T1 });
expect(res).toBeNull();
});
it('abort branch: WITHOUT advancing the snapshot, the agent edit would wrongly surface (proves the fix)', async () => {
// Same setup but the snapshot is NOT advanced (the pre-fix behaviour where
// only onFinish snapshotted). The agent's committed edit then looks like a
// between-turns user edit — exactly the bug FIX 1 removes.
const { svc } = makeService({
snapshot: { contentMd: 'S0 body', pageUpdatedAt: T0 },
exportMd: 'Sa body',
});
const res = await detect(svc, { id: 'p1', title: 'Doc', updatedAt: T1 });
expect(res).not.toBeNull();
expect(res!.diff).toContain('+Sa body');
});
});
/**
* isInterruptResume (#198): the pure guard that decides whether the interrupt
* note is injected for a turn. The client "send now" flag is only a hint; it is
* honoured ONLY when the preceding assistant turn (history[len-2], since the new
* user row is the tail) really ended unfinished ('aborted', or still 'streaming'
* during the abort/resend race). A spoofed flag on an ordinary turn is ignored.
*/
describe('isInterruptResume', () => {
// history tail is the just-inserted user row; [len-2] is the previous turn.
const withPrev = (
prev: { role: string; status?: string | null } | null,
): Array<{ role: string; status?: string | null }> =>
prev
? [prev, { role: 'user', status: null }]
: [{ role: 'user', status: null }];
it('false when the client flag is not set', () => {
expect(
isInterruptResume(withPrev({ role: 'assistant', status: 'aborted' }), undefined),
).toBe(false);
expect(
isInterruptResume(withPrev({ role: 'assistant', status: 'aborted' }), false),
).toBe(false);
});
it('true when flagged AND the previous assistant turn is aborted', () => {
expect(
isInterruptResume(withPrev({ role: 'assistant', status: 'aborted' }), true),
).toBe(true);
});
it('true when flagged AND the previous assistant turn is still streaming (race)', () => {
expect(
isInterruptResume(withPrev({ role: 'assistant', status: 'streaming' }), true),
).toBe(true);
});
it('false when flagged but the previous assistant turn completed normally', () => {
expect(
isInterruptResume(withPrev({ role: 'assistant', status: 'completed' }), true),
).toBe(false);
});
it('false when flagged but the previous turn is not an assistant turn', () => {
expect(
isInterruptResume(withPrev({ role: 'user', status: 'aborted' }), true),
).toBe(false);
});
it('false when there is no preceding turn (only the new user row)', () => {
expect(isInterruptResume(withPrev(null), true)).toBe(false);
});
});