feat(ai-chat): deferred tool loading (tiers + loadTools meta-tool) (#332)
The in-app AI agent shipped all ~41 tool schemas on every model step. This adds a two-tier catalog: core tools (frequent or one-line) stay always-active; the rest are advertised as a compact catalog and their full schema is fetched on demand via the loadTools meta-tool, wired through ai@6 prepareStep's per-step activeTools. - tools/tool-tiers.ts: CORE_TOOL_KEYS, INLINE_TOOL_TIERS, applyLoadTools, catalog builders (+ tool-tiers.spec.ts, 13 cases). - ai-chat.service.ts prepareAgentStep: returns activeTools = [...CORE_TOOL_KEYS, loadTools, ...activatedTools]; per-turn activated Set. - ai-chat.prompt.ts: buildToolCatalogBlock renders the deferred catalog. - mcp/tool-specs.ts: tier + catalogLine metadata (external snake_case /mcp transport unchanged). - EnvironmentService.isAiChatDeferredToolsEnabled(): AI_CHAT_DEFERRED_TOOLS, default ON per issue intent (kill-switch =false restores old behavior). Gate: server ai-chat 631/631, tool-tiers 13/13, mcp 472/472, tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
committed by
agent_coder
parent
4369bbc53d
commit
e431b33bb1
@@ -1,4 +1,8 @@
|
||||
import { buildSystemPrompt, buildMcpToolingBlock } from './ai-chat.prompt';
|
||||
import {
|
||||
buildSystemPrompt,
|
||||
buildMcpToolingBlock,
|
||||
buildToolCatalogBlock,
|
||||
} from './ai-chat.prompt';
|
||||
import { Workspace } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
@@ -396,3 +400,62 @@ describe('buildSystemPrompt page-changed note (#274)', () => {
|
||||
expect(opens).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* #332 deferred tool loading — the <tool_catalog> block builder and its
|
||||
* gating inside buildSystemPrompt.
|
||||
*/
|
||||
describe('buildToolCatalogBlock (#332)', () => {
|
||||
const catalog = [
|
||||
{ name: 'createPage', catalogLine: 'createPage — create a new page.' },
|
||||
{ name: 'transformPage', catalogLine: 'transformPage — run a JS transform.' },
|
||||
];
|
||||
|
||||
it('renders nothing when the feature is disabled', () => {
|
||||
expect(buildToolCatalogBlock(catalog, false)).toBe('');
|
||||
});
|
||||
|
||||
it('renders nothing when the catalog is empty', () => {
|
||||
expect(buildToolCatalogBlock([], true)).toBe('');
|
||||
expect(buildToolCatalogBlock(undefined, true)).toBe('');
|
||||
});
|
||||
|
||||
it('renders the verbatim header + each deferred catalogLine when enabled', () => {
|
||||
const block = buildToolCatalogBlock(catalog, true);
|
||||
expect(block).toContain('<tool_catalog note="deferred tools;');
|
||||
expect(block).toContain('NEVER tell the user you lack a capability');
|
||||
expect(block).toContain('Deferred tools (name — purpose):');
|
||||
expect(block).toContain('- createPage — create a new page.');
|
||||
expect(block).toContain('- transformPage — run a JS transform.');
|
||||
expect(block).toContain('</tool_catalog>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildSystemPrompt <tool_catalog> gating (#332)', () => {
|
||||
const workspace = { name: 'Acme' } as unknown as Workspace;
|
||||
const catalog = [
|
||||
{ name: 'createPage', catalogLine: 'createPage — create a new page.' },
|
||||
];
|
||||
|
||||
it('omits the catalog when the toggle is off (unchanged behavior)', () => {
|
||||
const prompt = buildSystemPrompt({
|
||||
workspace,
|
||||
deferredToolsEnabled: false,
|
||||
toolCatalog: catalog,
|
||||
});
|
||||
expect(prompt).not.toContain('<tool_catalog');
|
||||
expect(prompt).not.toContain('createPage — create a new page.');
|
||||
});
|
||||
|
||||
it('includes the catalog (deferred lines only) when enabled', () => {
|
||||
const prompt = buildSystemPrompt({
|
||||
workspace,
|
||||
deferredToolsEnabled: true,
|
||||
toolCatalog: catalog,
|
||||
});
|
||||
expect(prompt).toContain('<tool_catalog');
|
||||
expect(prompt).toContain('createPage — create a new page.');
|
||||
// A core tool line is never in the catalog (the caller passes deferred only).
|
||||
expect(prompt).not.toContain('searchPages —');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { Workspace } from '@docmost/db/types/entity.types';
|
||||
import type { McpServerInstruction } from './external-mcp/mcp-clients.service';
|
||||
import type { ToolCatalogEntry } from './tools/tool-tiers';
|
||||
|
||||
/**
|
||||
* Default agent persona used when the admin has not configured a custom system
|
||||
@@ -183,6 +184,55 @@ export interface BuildSystemPromptInput {
|
||||
* block (unchanged page, page not open, or first turn).
|
||||
*/
|
||||
pageChanged?: { title: string; diff: string } | null;
|
||||
/**
|
||||
* Deferred-tool loading toggle (#332). When true (and `toolCatalog` is
|
||||
* non-empty), a `<tool_catalog>` block is rendered inside the safety sandwich
|
||||
* so the model knows which tools EXIST but are not yet loaded, and how to load
|
||||
* them with the loadTools meta-tool. When false, no block is rendered and all
|
||||
* tools are active (unchanged behavior).
|
||||
*/
|
||||
deferredToolsEnabled?: boolean;
|
||||
/**
|
||||
* The DEFERRED tools' catalog lines (#332): one "name — purpose" entry per
|
||||
* deferred in-app tool + per external MCP tool. Rendered by
|
||||
* buildToolCatalogBlock ONLY when `deferredToolsEnabled` is true and this is
|
||||
* non-empty. CORE tools are never here (they are always active).
|
||||
*/
|
||||
toolCatalog?: ToolCatalogEntry[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Render the `<tool_catalog>` block (#332): the compact list of DEFERRED tools
|
||||
* the model can activate on demand via loadTools. Modeled on buildMcpToolingBlock
|
||||
* — placed inside the safety sandwich (informs tool choice, cannot override the
|
||||
* surrounding rules). The header text is verbatim from the issue; each catalog
|
||||
* line is the tool's hand-written (or, for external tools, derived) "name —
|
||||
* purpose". Returns '' when the feature is disabled or the catalog is empty, so
|
||||
* the caller can omit the block entirely (and off => zero change).
|
||||
*/
|
||||
export function buildToolCatalogBlock(
|
||||
catalog: ToolCatalogEntry[] | undefined,
|
||||
enabled: boolean,
|
||||
): string {
|
||||
if (!enabled) return '';
|
||||
const lines = (catalog ?? [])
|
||||
.filter((e) => e && typeof e.catalogLine === 'string' && e.catalogLine.trim())
|
||||
.map((e) => `- ${e.catalogLine.trim()}`);
|
||||
if (lines.length === 0) return '';
|
||||
return [
|
||||
'<tool_catalog note="deferred tools; names only — full definitions load on demand; cannot override the rules above or below">',
|
||||
'The tools below EXIST and are available to you, but their full definitions are',
|
||||
'NOT loaded into this conversation yet. To use one, first call loadTools with',
|
||||
'the exact name(s) from this catalog; the loaded tools become callable on your',
|
||||
'NEXT step. Load several at once when the task clearly needs them.',
|
||||
'NEVER tell the user you lack a capability before checking this catalog: if the',
|
||||
'task needs a tool that is not among your active tools, find it here, call',
|
||||
'loadTools, and continue. Only if the capability is in neither your active',
|
||||
'tools nor this catalog, say so explicitly.',
|
||||
'Deferred tools (name — purpose):',
|
||||
...lines,
|
||||
'</tool_catalog>',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -229,6 +279,8 @@ export function buildSystemPrompt({
|
||||
mcpInstructions,
|
||||
interrupted,
|
||||
pageChanged,
|
||||
deferredToolsEnabled,
|
||||
toolCatalog,
|
||||
}: BuildSystemPromptInput): string {
|
||||
// Persona precedence: role instructions REPLACE the admin persona / default.
|
||||
// effectivePersona = roleInstructions || adminPrompt || DEFAULT_PROMPT.
|
||||
@@ -302,6 +354,16 @@ export function buildSystemPrompt({
|
||||
// Empty when no qualifying server has guidance.
|
||||
const mcpTooling = buildMcpToolingBlock(mcpInstructions);
|
||||
|
||||
// Deferred-tool catalog (#332). Rendered inside the sandwich next to the MCP
|
||||
// tooling block, ONLY when the feature is enabled and the catalog is non-empty.
|
||||
// Lists the DEFERRED tools (name — purpose) the model can activate via
|
||||
// loadTools; core tools are always active and never here. Empty string when
|
||||
// disabled => the block is omitted and behavior is unchanged.
|
||||
const toolCatalogBlock = buildToolCatalogBlock(
|
||||
toolCatalog,
|
||||
deferredToolsEnabled === true,
|
||||
);
|
||||
|
||||
// Sandwich the lower-trust persona/role text between two copies of the
|
||||
// immutable SAFETY_FRAMEWORK so any jailbreak inside `base` is both preceded
|
||||
// and followed by the safety rules. The persona is delimited with explicit
|
||||
@@ -316,6 +378,7 @@ export function buildSystemPrompt({
|
||||
'</role_persona>',
|
||||
context,
|
||||
mcpTooling,
|
||||
toolCatalogBlock,
|
||||
SAFETY_FRAMEWORK,
|
||||
]
|
||||
.filter((part) => part !== '')
|
||||
|
||||
@@ -53,6 +53,7 @@ describe('AiChatService.resolveRoleForRequest', () => {
|
||||
aiAgentRoleRepo as never,
|
||||
{} as never, // pageRepo
|
||||
{} as never, // pageAccess
|
||||
{} as never, // environment
|
||||
);
|
||||
return { service, aiChatRepo, aiAgentRoleRepo };
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ describe('AiChatService.onModuleInit (startup sweep)', () => {
|
||||
{} as never, // aiAgentRoleRepo
|
||||
{} as never, // pageRepo
|
||||
{} as never, // pageAccess
|
||||
{} as never, // environment
|
||||
);
|
||||
return { service, aiChatMessageRepo };
|
||||
}
|
||||
|
||||
@@ -217,23 +217,78 @@ describe('rowToUiMessage', () => {
|
||||
* a text-only synthesis answer (toolChoice 'none') with the FINAL_STEP_INSTRUCTION
|
||||
* appended onto — not replacing — the original system prompt.
|
||||
*/
|
||||
// Narrowing helpers for the prepareAgentStep union return type.
|
||||
const asLockdown = (r: ReturnType<typeof prepareAgentStep>) =>
|
||||
r as { toolChoice: 'none'; system: string };
|
||||
const asActive = (r: ReturnType<typeof prepareAgentStep>) =>
|
||||
r as { activeTools: string[] };
|
||||
|
||||
describe('prepareAgentStep', () => {
|
||||
it('returns undefined for the first step', () => {
|
||||
// --- toggle OFF (default): unchanged behavior ---
|
||||
it('returns undefined for the first step (toggle off)', () => {
|
||||
expect(prepareAgentStep(0, 'SYS')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('returns undefined for a non-final step (just before the last)', () => {
|
||||
it('returns undefined for a non-final step (toggle off)', () => {
|
||||
expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('forces a text-only synthesis on the final allowed step', () => {
|
||||
const result = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
|
||||
it('forces a text-only synthesis on the final allowed step (toggle off)', () => {
|
||||
const result = asLockdown(prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS'));
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.toolChoice).toBe('none');
|
||||
expect(result.toolChoice).toBe('none');
|
||||
// The original persona is preserved (prefix), not replaced.
|
||||
expect(result?.system.startsWith('SYS')).toBe(true);
|
||||
expect(result.system.startsWith('SYS')).toBe(true);
|
||||
// The synthesis instruction is appended.
|
||||
expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
|
||||
expect(result.system).toContain(FINAL_STEP_INSTRUCTION);
|
||||
});
|
||||
|
||||
it('does NOT narrow activeTools when the toggle is off', () => {
|
||||
const result = prepareAgentStep(0, 'SYS', new Set(['createPage']), false);
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
// --- toggle ON (#332): deferred tool visibility ---
|
||||
it('a non-final step exposes CORE + loadTools + activatedTools', () => {
|
||||
const activated = new Set<string>();
|
||||
const result = asActive(prepareAgentStep(0, 'SYS', activated, true));
|
||||
expect(result.activeTools).toContain('searchPages'); // core
|
||||
expect(result.activeTools).toContain('searchInPage'); // #330, core
|
||||
expect(result.activeTools).toContain('editPageText'); // core
|
||||
expect(result.activeTools).toContain('loadTools'); // meta-tool
|
||||
// No deferred tool is active before it is loaded.
|
||||
expect(result.activeTools).not.toContain('createPage');
|
||||
expect(result.activeTools).not.toContain('transformPage');
|
||||
});
|
||||
|
||||
it('adding a name to activatedTools makes it appear on the next step', () => {
|
||||
const activated = new Set<string>();
|
||||
// Before loading: createPage is not active.
|
||||
expect(
|
||||
asActive(prepareAgentStep(1, 'SYS', activated, true)).activeTools,
|
||||
).not.toContain('createPage');
|
||||
// loadTools grows the SAME set…
|
||||
activated.add('createPage');
|
||||
// …so the next step sees it.
|
||||
const next = asActive(prepareAgentStep(2, 'SYS', activated, true));
|
||||
expect(next.activeTools).toContain('createPage');
|
||||
expect(next.activeTools).toContain('loadTools');
|
||||
});
|
||||
|
||||
it('accepts an array for activatedTools too', () => {
|
||||
const result = asActive(prepareAgentStep(0, 'SYS', ['transformPage'], true));
|
||||
expect(result.activeTools).toContain('transformPage');
|
||||
expect(result.activeTools).toContain('loadTools');
|
||||
});
|
||||
|
||||
it('final-step lockdown WINS even when the toggle is on', () => {
|
||||
const result = asLockdown(
|
||||
prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS', new Set(['createPage']), true),
|
||||
);
|
||||
// The lockdown shape (toolChoice none + synthesis) — not the activeTools shape.
|
||||
expect(result.toolChoice).toBe('none');
|
||||
expect(result.system).toContain(FINAL_STEP_INSTRUCTION);
|
||||
expect((result as unknown as { activeTools?: string[] }).activeTools).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -30,7 +30,15 @@ import {
|
||||
} from '@docmost/db/types/entity.types';
|
||||
import { AiChatToolsService } from './tools/ai-chat-tools.service';
|
||||
import { McpClientsService } from './external-mcp/mcp-clients.service';
|
||||
import { EnvironmentService } from '../../integrations/environment/environment.service';
|
||||
import { buildSystemPrompt } from './ai-chat.prompt';
|
||||
import {
|
||||
CORE_TOOL_KEYS,
|
||||
CORE_TOOL_SET,
|
||||
LOAD_TOOLS_NAME,
|
||||
makeLoadToolsTool,
|
||||
buildExternalToolCatalog,
|
||||
} from './tools/tool-tiers';
|
||||
import { computePageChange } from './page-change/page-change.util';
|
||||
import { roleModelOverride } from './roles/role-model-config';
|
||||
import {
|
||||
@@ -54,24 +62,52 @@ const FINAL_STEP_INSTRUCTION =
|
||||
'language. If the information is incomplete, say so explicitly: summarize ' +
|
||||
'what you found, what is still missing, and give your best partial conclusion.';
|
||||
|
||||
// Pure, unit-testable: decide per-step overrides. Returns undefined for normal
|
||||
// steps; on the final allowed step forces a text-only synthesis answer.
|
||||
// Pure, unit-testable: decide per-step overrides. Two responsibilities:
|
||||
// 1. Final-step lockdown (always): on the final allowed step force a text-only
|
||||
// synthesis answer (toolChoice 'none' + FINAL_STEP_INSTRUCTION). This WINS —
|
||||
// it takes precedence over the deferred-tool narrowing below.
|
||||
// 2. Deferred tool visibility (#332): when `deferredEnabled` and NOT the final
|
||||
// step, expose only the CORE tools + loadTools + whatever loadTools has
|
||||
// activated so far this turn (`activatedTools`), via `activeTools`. Deferred
|
||||
// tools stay in the <tool_catalog> until the model loads them.
|
||||
// When `deferredEnabled` is false the behavior is unchanged: undefined on normal
|
||||
// steps (all tools active), lockdown on the final step.
|
||||
//
|
||||
// `system` is the in-scope system prompt; we CONCATENATE so the original
|
||||
// persona/context is preserved — a bare `system` override would REPLACE the
|
||||
// whole system prompt for the step.
|
||||
// whole system prompt for the step. `activatedTools` is PER-TURN mutable state
|
||||
// owned by the streaming loop (a closure Set grown by loadTools); it is passed
|
||||
// in (not module-global, not persisted) so this stays a pure function of its
|
||||
// arguments.
|
||||
//
|
||||
// NOTE: at AI SDK v7 the per-step `system` field is renamed to `instructions`.
|
||||
// On v6 (`^6.0.134`) `system` is the correct field — adjust when bumping.
|
||||
export function prepareAgentStep(
|
||||
stepNumber: number,
|
||||
system: string,
|
||||
): { toolChoice: 'none'; system: string } | undefined {
|
||||
activatedTools: ReadonlySet<string> | readonly string[] = [],
|
||||
deferredEnabled = false,
|
||||
):
|
||||
| { toolChoice: 'none'; system: string }
|
||||
| { activeTools: string[] }
|
||||
| undefined {
|
||||
// Final-step lockdown WINS (applies regardless of the deferred toggle).
|
||||
if (stepNumber >= MAX_AGENT_STEPS - 1) {
|
||||
return {
|
||||
toolChoice: 'none',
|
||||
system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`,
|
||||
};
|
||||
}
|
||||
// Deferred tool loading: narrow this step's visible tools to CORE + loadTools
|
||||
// + the tools already activated this turn.
|
||||
if (deferredEnabled) {
|
||||
const activated = Array.isArray(activatedTools)
|
||||
? activatedTools
|
||||
: [...activatedTools];
|
||||
return {
|
||||
activeTools: [...CORE_TOOL_KEYS, LOAD_TOOLS_NAME, ...activated],
|
||||
};
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -206,6 +242,9 @@ export class AiChatService implements OnModuleInit {
|
||||
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly pageAccess: PageAccessService,
|
||||
// Reads the AI_CHAT_DEFERRED_TOOLS toggle (#332). Injected last so existing
|
||||
// positional constructor callers (tests) only append one stub.
|
||||
private readonly environment: EnvironmentService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
@@ -625,9 +664,25 @@ export class AiChatService implements OnModuleInit {
|
||||
// Build the system prompt + Docmost toolset. If either throws after the
|
||||
// external MCP lease was taken above, release the lease before rethrowing so
|
||||
// the leased transports are not leaked (#185 review).
|
||||
// Deferred tool loading toggle (#332). When ON, the model sees a compact
|
||||
// <tool_catalog> and only CORE tools + loadTools are active each step; other
|
||||
// tools (fat/rare in-app tools + ALL external MCP tools) load on demand. When
|
||||
// OFF, every tool is active and nothing below changes.
|
||||
const deferredEnabled = this.environment.isAiChatDeferredToolsEnabled();
|
||||
|
||||
let system: string;
|
||||
let docmostTools: Awaited<ReturnType<AiChatToolsService['forUser']>>;
|
||||
try {
|
||||
// Assemble the deferred catalog for the system prompt: hand-written lines
|
||||
// for the in-app deferred tools + a derived line for each external MCP tool
|
||||
// (also deferred by default). Only built when the feature is enabled.
|
||||
const toolCatalog = deferredEnabled
|
||||
? [
|
||||
...(await this.tools.getInAppDeferredCatalog()),
|
||||
...buildExternalToolCatalog(external.tools),
|
||||
]
|
||||
: [];
|
||||
|
||||
system = buildSystemPrompt({
|
||||
workspace,
|
||||
adminPrompt: resolved?.systemPrompt,
|
||||
@@ -644,6 +699,10 @@ export class AiChatService implements OnModuleInit {
|
||||
// Detected between-turns human edit to the open page (#274): adds the
|
||||
// page_changed note + unified diff so the agent doesn't overwrite it.
|
||||
pageChanged,
|
||||
// Deferred tool loading (#332): renders the <tool_catalog> block (only
|
||||
// when enabled + non-empty) so the model can activate deferred tools.
|
||||
deferredToolsEnabled: deferredEnabled,
|
||||
toolCatalog,
|
||||
});
|
||||
|
||||
// Pass the resolved chatId so the write tools can mint provenance tokens
|
||||
@@ -664,7 +723,31 @@ export class AiChatService implements OnModuleInit {
|
||||
throw err;
|
||||
}
|
||||
|
||||
const tools = { ...external.tools, ...docmostTools };
|
||||
// Base toolset: external MCP tools + Docmost in-app tools (Docmost wins on a
|
||||
// name clash — external are namespaced, so no clash is expected).
|
||||
const baseTools = { ...external.tools, ...docmostTools };
|
||||
|
||||
// Deferred tool loading state (#332), scoped to THIS streaming loop:
|
||||
// - `activatedTools` is per-TURN mutable state — a fresh closure Set created
|
||||
// per streamText call, NOT module-global and NOT persisted, so a new turn
|
||||
// starts cold. loadTools.execute adds to it; prepareAgentStep reads it to
|
||||
// widen `activeTools` on the NEXT step.
|
||||
// - `validDeferredNames` = every tool that is NOT core (the in-app deferred
|
||||
// tools + ALL external MCP tools), computed from the ACTUAL toolset so an
|
||||
// external tool is loadable by its namespaced name. loadTools rejects any
|
||||
// name outside this set.
|
||||
const activatedTools = new Set<string>();
|
||||
const validDeferredNames = new Set<string>(
|
||||
Object.keys(baseTools).filter((k) => !CORE_TOOL_SET.has(k)),
|
||||
);
|
||||
// Add the loadTools meta-tool ONLY when the feature is enabled; when off the
|
||||
// toolset and behavior are exactly as before.
|
||||
const tools = deferredEnabled
|
||||
? {
|
||||
...baseTools,
|
||||
[LOAD_TOOLS_NAME]: makeLoadToolsTool(activatedTools, validDeferredNames),
|
||||
}
|
||||
: baseTools;
|
||||
|
||||
// Accumulate the turn's streamed output so a provider error / disconnect can
|
||||
// persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
|
||||
@@ -799,7 +882,8 @@ export class AiChatService implements OnModuleInit {
|
||||
// ends with no assistant text (an empty turn). prepareAgentStep forbids
|
||||
// further tool calls and appends a synthesis instruction on that step,
|
||||
// concatenated onto the original `system` so the persona is preserved.
|
||||
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
|
||||
prepareStep: ({ stepNumber }) =>
|
||||
prepareAgentStep(stepNumber, system, activatedTools, deferredEnabled),
|
||||
abortSignal: signal,
|
||||
onChunk: ({ chunk }) => {
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
|
||||
|
||||
@@ -17,6 +17,10 @@ import { resolveCurrentPageResult } from './current-page.util';
|
||||
import { parseNodeArg } from './parse-node-arg';
|
||||
import { modelFriendlyInput } from './model-friendly-input';
|
||||
import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
|
||||
import {
|
||||
buildInAppDeferredCatalog,
|
||||
type ToolCatalogEntry,
|
||||
} from './tool-tiers';
|
||||
|
||||
/**
|
||||
* Per-user, per-request adapter that exposes Docmost READ operations to the
|
||||
@@ -123,6 +127,18 @@ export class AiChatToolsService {
|
||||
return client.exportPageMarkdown(pageId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the IN-APP deferred <tool_catalog> entries (#332): one "name — purpose"
|
||||
* line per DEFERRED tool, merging the per-layer INLINE_TOOL_TIERS with the
|
||||
* shared registry's own catalogLine. Loads @docmost/mcp for the shared specs
|
||||
* (memoized). Core tools are always active and are NOT listed here. External
|
||||
* MCP tools are catalogued separately by the caller (they are runtime-scoped).
|
||||
*/
|
||||
async getInAppDeferredCatalog(): Promise<ToolCatalogEntry[]> {
|
||||
const { sharedToolSpecs } = await loadDocmostMcp();
|
||||
return buildInAppDeferredCatalog(sharedToolSpecs);
|
||||
}
|
||||
|
||||
async forUser(
|
||||
user: User,
|
||||
sessionId: string,
|
||||
|
||||
@@ -241,6 +241,11 @@ export interface SharedToolSpec {
|
||||
mcpName: string;
|
||||
inAppKey: string;
|
||||
description: string;
|
||||
// Deferred-tool metadata (#332). Optional in this mirror so an older/stale
|
||||
// @docmost/mcp build (pre-#332) still type-checks; the in-app catalog builder
|
||||
// reads them defensively. The external /mcp server ignores both fields.
|
||||
tier?: 'core' | 'deferred';
|
||||
catalogLine?: string;
|
||||
// Loose `z` on purpose: the registry is zod-agnostic so the server can pass
|
||||
// its own zod (v4) and the MCP package its own (v3) into the same builder.
|
||||
buildShape?: (z: any) => Record<string, unknown>;
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
import {
|
||||
CORE_TOOL_KEYS,
|
||||
CORE_TOOL_SET,
|
||||
LOAD_TOOLS_NAME,
|
||||
LOAD_TOOLS_DESCRIPTION,
|
||||
INLINE_TOOL_TIERS,
|
||||
buildInAppDeferredCatalog,
|
||||
buildExternalToolCatalog,
|
||||
shortenForCatalog,
|
||||
applyLoadTools,
|
||||
} from './tool-tiers';
|
||||
// The real shared registry, imported from source (same approach as the
|
||||
// SHARED_TOOL_SPECS contract spec) so the tier metadata is checked against
|
||||
// exactly what @docmost/mcp ships.
|
||||
import { SHARED_TOOL_SPECS } from '../../../../../../packages/mcp/src/tool-specs';
|
||||
|
||||
/**
|
||||
* #332 deferred tool loading — tier metadata, catalog assembly, and the
|
||||
* loadTools meta-tool. Pure units; no Nest graph, no @docmost/mcp build (the
|
||||
* registry is imported from TS source).
|
||||
*/
|
||||
|
||||
describe('tool tier metadata (#332)', () => {
|
||||
it('core set is the documented 13 + searchInPage (14)', () => {
|
||||
expect(CORE_TOOL_KEYS).toHaveLength(14);
|
||||
expect(CORE_TOOL_SET.has('searchInPage')).toBe(true); // #330, promoted to core
|
||||
// loadTools is a meta-tool, not a normal core key.
|
||||
expect(CORE_TOOL_SET.has(LOAD_TOOLS_NAME)).toBe(false);
|
||||
});
|
||||
|
||||
it('SHARED_TOOL_SPECS tier agrees with CORE_TOOL_SET for every shared tool', () => {
|
||||
for (const [key, spec] of Object.entries(SHARED_TOOL_SPECS)) {
|
||||
const isCoreByTier = spec.tier === 'core';
|
||||
const isCoreByList = CORE_TOOL_SET.has(key);
|
||||
expect(isCoreByTier).toBe(isCoreByList);
|
||||
// Every spec carries a non-empty catalogLine (core tools too).
|
||||
expect(typeof spec.catalogLine).toBe('string');
|
||||
expect(spec.catalogLine.trim().length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
it('every INLINE tool tier agrees with CORE_TOOL_SET and has a catalogLine', () => {
|
||||
for (const [key, meta] of Object.entries(INLINE_TOOL_TIERS)) {
|
||||
expect(meta.tier === 'core').toBe(CORE_TOOL_SET.has(key));
|
||||
expect(meta.catalogLine.trim().length).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildInAppDeferredCatalog (#332)', () => {
|
||||
const catalog = buildInAppDeferredCatalog(SHARED_TOOL_SPECS as never);
|
||||
const names = catalog.map((e) => e.name);
|
||||
|
||||
it('includes deferred tools from BOTH the inline map and the shared registry', () => {
|
||||
expect(names).toContain('transformPage'); // inline deferred
|
||||
expect(names).toContain('getPageJson'); // shared deferred
|
||||
expect(names).toContain('patchNode'); // shared deferred
|
||||
expect(names).toContain('createPage'); // inline deferred
|
||||
});
|
||||
|
||||
it('NEVER lists a core tool', () => {
|
||||
for (const core of CORE_TOOL_KEYS) {
|
||||
expect(names).not.toContain(core);
|
||||
}
|
||||
// spot-check a couple that are core in each source.
|
||||
expect(names).not.toContain('searchInPage'); // shared core
|
||||
expect(names).not.toContain('searchPages'); // inline core
|
||||
expect(names).not.toContain('editPageText'); // shared core
|
||||
});
|
||||
|
||||
it('lists all 28 deferred tools (16 inline + 12 shared)', () => {
|
||||
expect(catalog).toHaveLength(28);
|
||||
// Each entry is a "name — purpose" line.
|
||||
for (const entry of catalog) {
|
||||
expect(entry.catalogLine).toMatch(/ — /);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildExternalToolCatalog + shortenForCatalog (#332)', () => {
|
||||
it('derives a short "name — purpose" line from each external tool description', () => {
|
||||
const catalog = buildExternalToolCatalog({
|
||||
tavily_search: { description: 'Search the web for fresh results. More detail here.' },
|
||||
tavily_extract: { description: '' },
|
||||
});
|
||||
expect(catalog).toEqual([
|
||||
{ name: 'tavily_search', catalogLine: 'tavily_search — Search the web for fresh results.' },
|
||||
{ name: 'tavily_extract', catalogLine: 'tavily_extract — external tool' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('caps a very long description', () => {
|
||||
const long = 'x'.repeat(500);
|
||||
expect(shortenForCatalog(long).length).toBeLessThanOrEqual(140);
|
||||
expect(shortenForCatalog(long).endsWith('…')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('applyLoadTools (#332)', () => {
|
||||
const valid = new Set(['createPage', 'transformPage', 'tavily_search']);
|
||||
|
||||
it('adds valid names to the activated set and returns { loaded }', () => {
|
||||
const activated = new Set<string>();
|
||||
const result = applyLoadTools(['createPage', 'tavily_search'], activated, valid);
|
||||
expect(result).toEqual({ loaded: ['createPage', 'tavily_search'] });
|
||||
expect(activated.has('createPage')).toBe(true);
|
||||
expect(activated.has('tavily_search')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects an unknown name with an error listing the valid deferred names', () => {
|
||||
const activated = new Set<string>();
|
||||
expect(() => applyLoadTools(['nope'], activated, valid)).toThrow(/unknown tool name/i);
|
||||
try {
|
||||
applyLoadTools(['nope'], activated, valid);
|
||||
} catch (e) {
|
||||
const msg = (e as Error).message;
|
||||
// Lists every valid name (sorted).
|
||||
expect(msg).toContain('createPage');
|
||||
expect(msg).toContain('transformPage');
|
||||
expect(msg).toContain('tavily_search');
|
||||
}
|
||||
// Nothing is activated on a rejected call.
|
||||
expect(activated.size).toBe(0);
|
||||
});
|
||||
|
||||
it('tolerates a non-array / empty input (loads nothing)', () => {
|
||||
const activated = new Set<string>();
|
||||
expect(applyLoadTools(undefined, activated, valid)).toEqual({ loaded: [] });
|
||||
expect(applyLoadTools([], activated, valid)).toEqual({ loaded: [] });
|
||||
expect(activated.size).toBe(0);
|
||||
});
|
||||
|
||||
it('loadTools description is the verbatim issue text', () => {
|
||||
expect(LOAD_TOOLS_DESCRIPTION).toContain('only ACTIVATES them');
|
||||
expect(LOAD_TOOLS_DESCRIPTION).toContain('callable on your NEXT step');
|
||||
});
|
||||
});
|
||||
|
||||
describe('editorial "Corrector" scenario is fully served by CORE (#332)', () => {
|
||||
it('read + comment + edit + search need no loadTools', () => {
|
||||
// A Corrector role reads a page, searches within it, edits text, and leaves
|
||||
// inline comments — every tool it needs is core, so it never has to load a
|
||||
// deferred tool.
|
||||
const needed = [
|
||||
'getCurrentPage',
|
||||
'getPage',
|
||||
'searchPages',
|
||||
'searchInPage',
|
||||
'editPageText',
|
||||
'createComment',
|
||||
'listComments',
|
||||
'getComment',
|
||||
'resolveComment',
|
||||
];
|
||||
for (const t of needed) {
|
||||
expect(CORE_TOOL_SET.has(t)).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,309 @@
|
||||
import { tool, type Tool } from 'ai';
|
||||
import { z } from 'zod';
|
||||
import type { SharedToolSpec } from './docmost-client.loader';
|
||||
|
||||
/**
|
||||
* Deferred tool loading for the in-app AI chat (#332).
|
||||
*
|
||||
* The agent otherwise sends ALL ~41 tool definitions on EVERY model call every
|
||||
* step, bloating context. Instead we split the in-app tools into two tiers:
|
||||
*
|
||||
* - CORE (hot, always active): frequent OR tiny tools whose full schema is
|
||||
* always visible, plus the `loadTools` meta-tool. Deferring a one-line tool is
|
||||
* pure loss, so tiny tools stay core even if rare.
|
||||
* - DEFERRED (loaded on demand): the fat/rare tools + ALL external MCP tools by
|
||||
* default. The model sees only a compact <tool_catalog> (name — purpose) and
|
||||
* calls `loadTools(names)` to ACTIVATE a tool's full schema for the NEXT step
|
||||
* (one extra round-trip on first use).
|
||||
*
|
||||
* This module is the single source of truth for the IN-APP tiering:
|
||||
* - CORE_TOOL_KEYS / CORE_TOOL_SET — the authoritative core list (used by
|
||||
* prepareAgentStep to build per-step `activeTools`).
|
||||
* - INLINE_TOOL_TIERS — tier + catalogLine for the per-layer INLINE tools (the
|
||||
* ones NOT in @docmost/mcp's SHARED_TOOL_SPECS, which carry their own).
|
||||
* - buildInAppDeferredCatalog / buildExternalToolCatalog — assemble the
|
||||
* <tool_catalog> deferred lines.
|
||||
* - applyLoadTools / makeLoadToolsTool — the loadTools meta-tool.
|
||||
*
|
||||
* The tier/catalogLine fields on SHARED_TOOL_SPECS are IN-APP metadata only; the
|
||||
* external /mcp server ignores them and exposes every tool normally.
|
||||
*/
|
||||
|
||||
/** A single rendered <tool_catalog> line: the tool name + its "name — purpose". */
|
||||
export interface ToolCatalogEntry {
|
||||
/** Exact tool name the model must pass to loadTools. */
|
||||
name: string;
|
||||
/** Hand-written (in-app) or derived (external) "name — purpose" line. */
|
||||
catalogLine: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* CORE (always-active) in-app tool keys — 13 frequent/tiny tools. `searchInPage`
|
||||
* (#330) is added to core on top of the issue's original tier list: it is
|
||||
* frequent for the editorial roles this feature targets. `loadTools` is active
|
||||
* too but is not a normal tool key (it is added to activeTools separately).
|
||||
*/
|
||||
export const CORE_TOOL_KEYS = [
|
||||
'searchPages',
|
||||
'listPages',
|
||||
'listSpaces',
|
||||
'getWorkspace',
|
||||
'getCurrentPage',
|
||||
'getPage',
|
||||
'getOutline',
|
||||
'getNode',
|
||||
'createComment',
|
||||
'getComment',
|
||||
'listComments',
|
||||
'resolveComment',
|
||||
'editPageText',
|
||||
// #330 search_in_page — frequent for editorial sweeps; core despite predating
|
||||
// the issue's tier list.
|
||||
'searchInPage',
|
||||
] as const;
|
||||
|
||||
/** O(1) membership test for the core tier. */
|
||||
export const CORE_TOOL_SET: ReadonlySet<string> = new Set(CORE_TOOL_KEYS);
|
||||
|
||||
/** The meta-tool name (always active alongside the core tools when enabled). */
|
||||
export const LOAD_TOOLS_NAME = 'loadTools';
|
||||
|
||||
/**
|
||||
* loadTools description — VERBATIM from issue #332. Tells the model that the
|
||||
* catalog names EXIST, that loadTools only ACTIVATES them (callable next step),
|
||||
* and to load several at once.
|
||||
*/
|
||||
export const LOAD_TOOLS_DESCRIPTION =
|
||||
'loadTools — Load the full definitions of deferred tools from the <tool_catalog>\n' +
|
||||
'block in your instructions. Pass the EXACT tool names from the catalog; this\n' +
|
||||
'call only ACTIVATES them and returns { loaded: [...] } — the tools become\n' +
|
||||
'callable on your NEXT step. Load several names in one call when the task clearly\n' +
|
||||
'needs them. Unknown names are rejected with the list of valid ones.';
|
||||
|
||||
/**
|
||||
* Tier + catalogLine for the INLINE ai-chat tools — those defined per-layer in
|
||||
* ai-chat-tools.service.ts and NOT present in @docmost/mcp's SHARED_TOOL_SPECS
|
||||
* (which carries its own tier/catalogLine). Together with the shared registry
|
||||
* this describes every in-app tool. catalogLine is present for core tools too
|
||||
* (uniformity), but only DEFERRED tools are rendered into the catalog.
|
||||
*/
|
||||
export const INLINE_TOOL_TIERS: Record<
|
||||
string,
|
||||
{ tier: 'core' | 'deferred'; catalogLine: string }
|
||||
> = {
|
||||
// --- core inline ---
|
||||
searchPages: {
|
||||
tier: 'core',
|
||||
catalogLine: 'searchPages — hybrid semantic + keyword search across the wiki.',
|
||||
},
|
||||
getCurrentPage: {
|
||||
tier: 'core',
|
||||
catalogLine: 'getCurrentPage — the page the user is currently viewing.',
|
||||
},
|
||||
getPage: {
|
||||
tier: 'core',
|
||||
catalogLine: 'getPage — fetch a page as Markdown by its id.',
|
||||
},
|
||||
listPages: {
|
||||
tier: 'core',
|
||||
catalogLine: "listPages — list recent pages, or a space's full page tree.",
|
||||
},
|
||||
listComments: {
|
||||
tier: 'core',
|
||||
catalogLine: 'listComments — list all comments on a page (including resolved).',
|
||||
},
|
||||
getComment: {
|
||||
tier: 'core',
|
||||
catalogLine: 'getComment — fetch a single comment by id.',
|
||||
},
|
||||
createComment: {
|
||||
tier: 'core',
|
||||
catalogLine:
|
||||
'createComment — add an inline comment (optionally with a suggested edit).',
|
||||
},
|
||||
resolveComment: {
|
||||
tier: 'core',
|
||||
catalogLine: 'resolveComment — resolve or reopen a comment thread.',
|
||||
},
|
||||
|
||||
// --- deferred inline ---
|
||||
createPage: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'createPage — create a new page with a Markdown body in a space.',
|
||||
},
|
||||
updatePageContent: {
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"updatePageContent — replace a page's body (and optionally title) with new Markdown.",
|
||||
},
|
||||
renamePage: {
|
||||
tier: 'deferred',
|
||||
catalogLine: "renamePage — change a page's title only (body untouched).",
|
||||
},
|
||||
movePage: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'movePage — move a page under a new parent or to the space root.',
|
||||
},
|
||||
deletePage: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'deletePage — move a page to trash (soft delete, reversible).',
|
||||
},
|
||||
listSidebarPages: {
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"listSidebarPages — list a space's root pages or a page's direct children.",
|
||||
},
|
||||
getTable: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'getTable — read a table as a matrix of cell texts and cell ids.',
|
||||
},
|
||||
checkNewComments: {
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'checkNewComments — find comments in a space created after a timestamp.',
|
||||
},
|
||||
getPageHistory: {
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'getPageHistory — fetch one page-history version with its ProseMirror content.',
|
||||
},
|
||||
exportPageMarkdown: {
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'exportPageMarkdown — export a page to self-contained Markdown (body + comments).',
|
||||
},
|
||||
updatePageJson: {
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"updatePageJson — overwrite a page's body with a full ProseMirror document.",
|
||||
},
|
||||
tableInsertRow: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'tableInsertRow — insert a row of plain-text cells into a table.',
|
||||
},
|
||||
tableDeleteRow: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'tableDeleteRow — delete a table row at a 0-based index.',
|
||||
},
|
||||
tableUpdateCell: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'tableUpdateCell — set the text of a table cell at [row, col].',
|
||||
},
|
||||
sharePage: {
|
||||
tier: 'deferred',
|
||||
catalogLine: 'sharePage — make a page publicly accessible and return its URL.',
|
||||
},
|
||||
transformPage: {
|
||||
tier: 'deferred',
|
||||
catalogLine: "transformPage — run a sandboxed JS transform over a page's document.",
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Build the <tool_catalog> deferred lines for the IN-APP tools by merging the
|
||||
* two metadata sources: the per-layer INLINE_TOOL_TIERS and the shared registry
|
||||
* (SHARED_TOOL_SPECS, loaded at runtime). Only DEFERRED tools are included; core
|
||||
* tools are always active and never appear in the catalog. Pure — the caller
|
||||
* passes the loaded specs so this stays unit-testable.
|
||||
*/
|
||||
export function buildInAppDeferredCatalog(
|
||||
sharedToolSpecs: Record<string, SharedToolSpec>,
|
||||
): ToolCatalogEntry[] {
|
||||
const entries: ToolCatalogEntry[] = [];
|
||||
// Inline deferred tools (hand-written lines).
|
||||
for (const [name, meta] of Object.entries(INLINE_TOOL_TIERS)) {
|
||||
if (meta.tier === 'deferred') {
|
||||
entries.push({ name, catalogLine: meta.catalogLine });
|
||||
}
|
||||
}
|
||||
// Shared deferred tools (line comes from the registry's own catalogLine).
|
||||
for (const [name, spec] of Object.entries(sharedToolSpecs)) {
|
||||
if (spec.tier === 'deferred' && spec.catalogLine) {
|
||||
entries.push({ name, catalogLine: spec.catalogLine });
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cap an external tool's (untrusted) description into a short catalog purpose.
|
||||
* External MCP tools have no hand-written catalogLine, so we derive one from the
|
||||
* first sentence of the description, hard-capped. Whitespace is collapsed.
|
||||
*/
|
||||
export function shortenForCatalog(description: string, max = 140): string {
|
||||
const flat = description.replace(/\s+/g, ' ').trim();
|
||||
if (!flat) return 'external tool';
|
||||
// Prefer the first sentence if it is reasonably short.
|
||||
const firstSentence = flat.split(/(?<=[.!?])\s/)[0];
|
||||
const base =
|
||||
firstSentence.length > 0 && firstSentence.length <= max
|
||||
? firstSentence
|
||||
: flat;
|
||||
return base.length > max ? `${base.slice(0, max - 1).trimEnd()}…` : base;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build catalog lines for the EXTERNAL MCP tools (all deferred by default,
|
||||
* #332). Their names are the namespaced tool keys; the purpose is derived from
|
||||
* each tool's own description (no hand-written line exists). Pure.
|
||||
*/
|
||||
export function buildExternalToolCatalog(
|
||||
externalTools: Record<string, { description?: string } | undefined>,
|
||||
): ToolCatalogEntry[] {
|
||||
return Object.entries(externalTools).map(([name, t]) => ({
|
||||
name,
|
||||
catalogLine: `${name} — ${shortenForCatalog(t?.description ?? '')}`,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure core of the loadTools meta-tool. Validates the requested names against
|
||||
* the per-turn set of valid deferred names, ADDS the valid ones to the caller's
|
||||
* mutable `activatedTools` set (so they become callable next step), and returns
|
||||
* `{ loaded }`. An unknown name throws a clear error listing the valid deferred
|
||||
* names — surfaced to the model as a tool error so it can retry.
|
||||
*/
|
||||
export function applyLoadTools(
|
||||
names: unknown,
|
||||
activatedTools: Set<string>,
|
||||
validDeferredNames: ReadonlySet<string>,
|
||||
): { loaded: string[] } {
|
||||
const requested = Array.isArray(names)
|
||||
? names.filter((n): n is string => typeof n === 'string')
|
||||
: [];
|
||||
const unknown = requested.filter((n) => !validDeferredNames.has(n));
|
||||
if (unknown.length > 0) {
|
||||
const valid = [...validDeferredNames].sort().join(', ');
|
||||
throw new Error(
|
||||
`loadTools: unknown tool name(s): ${unknown.join(', ')}. ` +
|
||||
`Valid deferred tools are: ${valid || '(none)'}.`,
|
||||
);
|
||||
}
|
||||
for (const n of requested) activatedTools.add(n);
|
||||
return { loaded: requested };
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the loadTools AI-SDK tool bound to THIS turn's mutable state: the
|
||||
* `activatedTools` set (grown by execute, read by prepareAgentStep next step)
|
||||
* and the `validDeferredNames` set (every non-core tool in this turn's toolset,
|
||||
* incl. external MCP). Created per streamText call — never module-global.
|
||||
*/
|
||||
export function makeLoadToolsTool(
|
||||
activatedTools: Set<string>,
|
||||
validDeferredNames: ReadonlySet<string>,
|
||||
): Tool {
|
||||
return tool({
|
||||
description: LOAD_TOOLS_DESCRIPTION,
|
||||
inputSchema: z.object({
|
||||
names: z
|
||||
.array(z.string())
|
||||
.describe(
|
||||
'EXACT deferred tool names from the <tool_catalog> to activate for ' +
|
||||
'your next step.',
|
||||
),
|
||||
}),
|
||||
execute: async ({ names }) =>
|
||||
applyLoadTools(names, activatedTools, validDeferredNames),
|
||||
});
|
||||
}
|
||||
@@ -261,6 +261,21 @@ export class EnvironmentService {
|
||||
return disable === 'true';
|
||||
}
|
||||
|
||||
/**
|
||||
* Deferred tool loading for the in-app AI chat (#332). When enabled, the agent
|
||||
* sees a compact <tool_catalog> and only CORE tools + the loadTools meta-tool
|
||||
* are active each step; deferred tools (the fat/rare ones + all external MCP
|
||||
* tools) load on demand. Defaults to ENABLED — the issue treats deferred
|
||||
* loading as the new behavior; set AI_CHAT_DEFERRED_TOOLS=false to restore the
|
||||
* old "all tools always active" behavior.
|
||||
*/
|
||||
isAiChatDeferredToolsEnabled(): boolean {
|
||||
const enabled = this.configService
|
||||
.get<string>('AI_CHAT_DEFERRED_TOOLS', 'true')
|
||||
.toLowerCase();
|
||||
return enabled === 'true';
|
||||
}
|
||||
|
||||
getPostHogHost(): string {
|
||||
return this.configService.get<string>('POSTHOG_HOST');
|
||||
}
|
||||
|
||||
@@ -146,6 +146,9 @@ describe('AiChatService.stream [integration]', () => {
|
||||
{} as any, // aiAgentRoleRepo (role is pre-resolved + passed in)
|
||||
{} as any, // pageRepo (only used when body.openPage is set)
|
||||
{} as any, // pageAccess (idem)
|
||||
// environment (#332): keep deferred tool loading OFF for this lifecycle
|
||||
// harness so the toolset/behavior is exactly as before.
|
||||
{ isAiChatDeferredToolsEnabled: () => false } as any,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,22 @@ export interface SharedToolSpec {
|
||||
inAppKey: string;
|
||||
/** Single canonical model-facing description used by both layers. */
|
||||
description: string;
|
||||
/**
|
||||
* Deferred-tool tier for the IN-APP agent (#332). 'core' tools are always
|
||||
* active; 'deferred' tools are hidden behind the <tool_catalog> and loaded on
|
||||
* demand via the loadTools meta-tool. This is an IN-APP concern only: the
|
||||
* standalone /mcp server ignores this field and registers every tool normally
|
||||
* (registerShared in index.ts reads mcpName/description/buildShape only).
|
||||
*/
|
||||
tier: 'core' | 'deferred';
|
||||
/**
|
||||
* Hand-written one-liner "name — purpose" shown in the in-app agent's
|
||||
* <tool_catalog> for a DEFERRED tool (#332). Deliberately NOT derived from the
|
||||
* description's first sentence — a concise, accurate purpose line. Present on
|
||||
* every spec (core tools too) for uniformity; only deferred ones are rendered.
|
||||
* Inert for the external /mcp server.
|
||||
*/
|
||||
catalogLine: string;
|
||||
/**
|
||||
* Builds the tool's input schema as a plain object of zod fields (a
|
||||
* ZodRawShape). Called with the consumer's own zod namespace. Omitted for
|
||||
@@ -47,6 +63,8 @@ export const SHARED_TOOL_SPECS = {
|
||||
mcpName: 'get_workspace',
|
||||
inAppKey: 'getWorkspace',
|
||||
description: 'Fetch metadata about the current workspace (name, settings).',
|
||||
tier: 'core',
|
||||
catalogLine: 'getWorkspace — fetch current workspace metadata (name, settings).',
|
||||
},
|
||||
|
||||
listSpaces: {
|
||||
@@ -55,6 +73,8 @@ export const SHARED_TOOL_SPECS = {
|
||||
description:
|
||||
'List the spaces the current user can access. Returns the array of ' +
|
||||
'spaces (id, name, slug, ...).',
|
||||
tier: 'core',
|
||||
catalogLine: 'listSpaces — list the spaces the user can access (id, name, slug).',
|
||||
},
|
||||
|
||||
listShares: {
|
||||
@@ -62,6 +82,8 @@ export const SHARED_TOOL_SPECS = {
|
||||
inAppKey: 'listShares',
|
||||
description:
|
||||
'List all public shares in the workspace with page titles and public URLs.',
|
||||
tier: 'deferred',
|
||||
catalogLine: 'listShares — list all public shares in the workspace with their URLs.',
|
||||
},
|
||||
|
||||
// --- single-pageId read tools ---
|
||||
@@ -74,6 +96,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'includes block ids, callouts, tables, link/image attributes) plus the ' +
|
||||
'slugId used in URLs. Use the block ids it returns to make precise ' +
|
||||
'structural edits or surgical text edits without resending the page.',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"getPageJson — get a page's raw ProseMirror JSON (lossless, with block ids).",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
@@ -88,6 +113,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'count) WITHOUT the full document body. Use it to locate sections/tables ' +
|
||||
'and grab block ids cheaply before fetching, patching or inserting ' +
|
||||
'individual blocks.',
|
||||
tier: 'core',
|
||||
catalogLine:
|
||||
"getOutline — compact outline of a page's top-level blocks with their ids.",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
@@ -104,6 +132,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'outline or page-JSON view (works for headings/paragraphs/callouts/images), OR ' +
|
||||
'`#<index>` to fetch a top-level block by its outline index — use the ' +
|
||||
'`#<index>` form for tables/rows/cells, which carry no id.',
|
||||
tier: 'core',
|
||||
catalogLine:
|
||||
"getNode — fetch one block's ProseMirror subtree by block id or #index.",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
@@ -137,6 +168,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'caseSensitive:true to match case. Ideal for systematic ' +
|
||||
'editorial sweeps (unquoted "ё", straight quotes, "т.е.", stray units). An ' +
|
||||
'invalid regex or an empty query returns a clear error to fix.',
|
||||
tier: 'core',
|
||||
catalogLine:
|
||||
'searchInPage — find every occurrence of a string/regex inside one page, with locations.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1).describe('ID of the page to search'),
|
||||
query: z
|
||||
@@ -172,6 +206,8 @@ export const SHARED_TOOL_SPECS = {
|
||||
description:
|
||||
'Remove a single block by its attrs.id (from the page outline or ' +
|
||||
'page-JSON view) WITHOUT resending the whole document.',
|
||||
tier: 'deferred',
|
||||
catalogLine: 'deleteNode — remove a single content block by its block id.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
@@ -203,6 +239,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'JSON object or a JSON string (both accepted). Cheaper and safer than ' +
|
||||
'replacing the whole document for one-block structural edits. Reversible: ' +
|
||||
'the previous version is kept in page history.',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'patchNode — replace one block with a new ProseMirror node, keeping its id.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1).describe('ID of the page containing the block'),
|
||||
nodeId: z
|
||||
@@ -245,6 +284,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
|
||||
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
|
||||
'JSON object or a JSON string (both accepted). Reversible via page history.',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'insertNode — insert a block before/after an anchor, or append at the end.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
node: z
|
||||
@@ -278,6 +320,8 @@ export const SHARED_TOOL_SPECS = {
|
||||
mcpName: 'unshare_page',
|
||||
inAppKey: 'unsharePage',
|
||||
description: 'Remove the public share of a page (revokes the public URL).',
|
||||
tier: 'deferred',
|
||||
catalogLine: "unsharePage — revoke a page's public share (removes the public URL).",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1).describe('ID of the page to unshare'),
|
||||
}),
|
||||
@@ -295,6 +339,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
"`from`/`to` each accept a historyId, or null/'current' for the page's " +
|
||||
'current content (defaults: from=current, to=current — pass a historyId ' +
|
||||
'from the page-history list to compare against the live page).',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'diffPageVersions — diff two page versions and return the change set + summary.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
from: z
|
||||
@@ -315,6 +362,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
"List a page's saved versions (Docmost auto-snapshots on every save), " +
|
||||
'newest first, cursor-paginated. Returns { items, nextCursor }; each ' +
|
||||
"item's id is the historyId to pass to the page diff or restore tools.",
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"listPageHistory — list a page's saved versions (newest first, paginated).",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
cursor: z
|
||||
@@ -332,6 +382,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'as the page\'s current content (Docmost has no restore endpoint, so ' +
|
||||
'this creates a NEW history snapshot — the restore is itself revertible). ' +
|
||||
'Get the historyId from the page-history list.',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'restorePageVersion — restore a page to a saved history version (revertible).',
|
||||
buildShape: (z) => ({
|
||||
historyId: z.string().min(1),
|
||||
}),
|
||||
@@ -349,6 +402,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'thread records are NOT created/updated/deleted on the server by this ' +
|
||||
'tool — only the page body + inline comment marks are written; manage ' +
|
||||
'comment threads via the comment tools/UI.',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"importPageMarkdown — replace a page's content from exported Docmost Markdown.",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
markdown: z.string().min(1),
|
||||
@@ -365,6 +421,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'entirely server-side — the document is NOT sent through the model. The ' +
|
||||
'target keeps its own title and slug; only its body is replaced. Ideal ' +
|
||||
"for 'make page A's content equal to B' or 'replace A with B but keep A's URL'.",
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
"copyPageContent — replace one page's body with a copy of another page's body.",
|
||||
buildShape: (z) => ({
|
||||
sourcePageId: z.string().min(1).describe('Page to copy content FROM'),
|
||||
targetPageId: z
|
||||
@@ -402,6 +461,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'page JSON and use a structural node patch/update to set its marks. ' +
|
||||
'Examples: edits:[{find:"teh",replace:"the"}]; edits:[{find:"Hello ' +
|
||||
'world",replace:"Hello there"}] (crosses a bold boundary).',
|
||||
tier: 'core',
|
||||
catalogLine:
|
||||
"editPageText — surgical find/replace of plain text in a page, preserving ids/marks.",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().describe('ID of the page to edit'),
|
||||
edits: z
|
||||
@@ -440,6 +502,9 @@ export const SHARED_TOOL_SPECS = {
|
||||
'server instance that created it: in a multi-replica deployment without ' +
|
||||
'sticky sessions a blob stored on one instance is not retrievable via the ' +
|
||||
'sandbox URL on another (it 404s like an expired one).',
|
||||
tier: 'deferred',
|
||||
catalogLine:
|
||||
'stashPage — serialize a whole page to a short anonymous URL without loading its body.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
|
||||
Reference in New Issue
Block a user