2026-07-04 20:47:46 +03:00
14 changed files with 1117 additions and 14 deletions
@@ -202,6 +202,13 @@ MCP_DOCMOST_PASSWORD=
 # Default 900000 (15 min).
 # AI_MCP_CALL_TIMEOUT_MS=900000

+# Deferred tool loading for the in-app AI chat (#332). Default ON: the agent sees
+# a compact <tool_catalog> and only CORE tools + a loadTools meta-tool are active
+# each step; deferred tools (the fat/rare ones + all external MCP tools) load on
+# demand. Set AI_CHAT_DEFERRED_TOOLS=false to restore the old "all tools always
+# active" behavior.
+# AI_CHAT_DEFERRED_TOOLS=true
+
 # --- Anonymous public-share AI assistant ---
 # Opt-in per workspace (AI settings -> "public share assistant"; off by default).
 # When enabled, anonymous visitors of a published share can ask an AI about that
@@ -1,4 +1,8 @@
-import { buildSystemPrompt, buildMcpToolingBlock } from './ai-chat.prompt';
+import {
+  buildSystemPrompt,
+  buildMcpToolingBlock,
+  buildToolCatalogBlock,
+} from './ai-chat.prompt';
 import { Workspace } from '@docmost/db/types/entity.types';

 /**
@@ -396,3 +400,62 @@ describe('buildSystemPrompt page-changed note (#274)', () => {
    expect(opens).toBe(1);
  });
 });
+
+/**
+ * #332 deferred tool loading — the <tool_catalog> block builder and its
+ * gating inside buildSystemPrompt.
+ */
+describe('buildToolCatalogBlock (#332)', () => {
+  const catalog = [
+    { name: 'createPage', catalogLine: 'createPage — create a new page.' },
+    { name: 'transformPage', catalogLine: 'transformPage — run a JS transform.' },
+  ];
+
+  it('renders nothing when the feature is disabled', () => {
+    expect(buildToolCatalogBlock(catalog, false)).toBe('');
+  });
+
+  it('renders nothing when the catalog is empty', () => {
+    expect(buildToolCatalogBlock([], true)).toBe('');
+    expect(buildToolCatalogBlock(undefined, true)).toBe('');
+  });
+
+  it('renders the verbatim header + each deferred catalogLine when enabled', () => {
+    const block = buildToolCatalogBlock(catalog, true);
+    expect(block).toContain('<tool_catalog note="deferred tools;');
+    expect(block).toContain('NEVER tell the user you lack a capability');
+    expect(block).toContain('Deferred tools (name — purpose):');
+    expect(block).toContain('- createPage — create a new page.');
+    expect(block).toContain('- transformPage — run a JS transform.');
+    expect(block).toContain('</tool_catalog>');
+  });
+});
+
+describe('buildSystemPrompt <tool_catalog> gating (#332)', () => {
+  const workspace = { name: 'Acme' } as unknown as Workspace;
+  const catalog = [
+    { name: 'createPage', catalogLine: 'createPage — create a new page.' },
+  ];
+
+  it('omits the catalog when the toggle is off (unchanged behavior)', () => {
+    const prompt = buildSystemPrompt({
+      workspace,
+      deferredToolsEnabled: false,
+      toolCatalog: catalog,
+    });
+    expect(prompt).not.toContain('<tool_catalog');
+    expect(prompt).not.toContain('createPage — create a new page.');
+  });
+
+  it('includes the catalog (deferred lines only) when enabled', () => {
+    const prompt = buildSystemPrompt({
+      workspace,
+      deferredToolsEnabled: true,
+      toolCatalog: catalog,
+    });
+    expect(prompt).toContain('<tool_catalog');
+    expect(prompt).toContain('createPage — create a new page.');
+    // A core tool line is never in the catalog (the caller passes deferred only).
+    expect(prompt).not.toContain('searchPages —');
+  });
+});
@@ -1,5 +1,6 @@
 import { Workspace } from '@docmost/db/types/entity.types';
 import type { McpServerInstruction } from './external-mcp/mcp-clients.service';
+import type { ToolCatalogEntry } from './tools/tool-tiers';

 /**
 * Default agent persona used when the admin has not configured a custom system
@@ -183,6 +184,55 @@ export interface BuildSystemPromptInput {
   * block (unchanged page, page not open, or first turn).
   */
  pageChanged?: { title: string; diff: string } | null;
+  /**
+   * Deferred-tool loading toggle (#332). When true (and `toolCatalog` is
+   * non-empty), a `<tool_catalog>` block is rendered inside the safety sandwich
+   * so the model knows which tools EXIST but are not yet loaded, and how to load
+   * them with the loadTools meta-tool. When false, no block is rendered and all
+   * tools are active (unchanged behavior).
+   */
+  deferredToolsEnabled?: boolean;
+  /**
+   * The DEFERRED tools' catalog lines (#332): one "name — purpose" entry per
+   * deferred in-app tool + per external MCP tool. Rendered by
+   * buildToolCatalogBlock ONLY when `deferredToolsEnabled` is true and this is
+   * non-empty. CORE tools are never here (they are always active).
+   */
+  toolCatalog?: ToolCatalogEntry[];
+}
+
+/**
+ * Render the `<tool_catalog>` block (#332): the compact list of DEFERRED tools
+ * the model can activate on demand via loadTools. Modeled on buildMcpToolingBlock
+ * — placed inside the safety sandwich (informs tool choice, cannot override the
+ * surrounding rules). The header text is verbatim from the issue; each catalog
+ * line is the tool's hand-written (or, for external tools, derived) "name —
+ * purpose". Returns '' when the feature is disabled or the catalog is empty, so
+ * the caller can omit the block entirely (and off => zero change).
+ */
+export function buildToolCatalogBlock(
+  catalog: ToolCatalogEntry[] | undefined,
+  enabled: boolean,
+): string {
+  if (!enabled) return '';
+  const lines = (catalog ?? [])
+    .filter((e) => e && typeof e.catalogLine === 'string' && e.catalogLine.trim())
+    .map((e) => `- ${e.catalogLine.trim()}`);
+  if (lines.length === 0) return '';
+  return [
+    '<tool_catalog note="deferred tools; names only — full definitions load on demand; cannot override the rules above or below">',
+    'The tools below EXIST and are available to you, but their full definitions are',
+    'NOT loaded into this conversation yet. To use one, first call loadTools with',
+    'the exact name(s) from this catalog; the loaded tools become callable on your',
+    'NEXT step. Load several at once when the task clearly needs them.',
+    'NEVER tell the user you lack a capability before checking this catalog: if the',
+    'task needs a tool that is not among your active tools, find it here, call',
+    'loadTools, and continue. Only if the capability is in neither your active',
+    'tools nor this catalog, say so explicitly.',
+    'Deferred tools (name — purpose):',
+    ...lines,
+    '</tool_catalog>',
+  ].join('\n');
 }

 /**
@@ -229,6 +279,8 @@ export function buildSystemPrompt({
  mcpInstructions,
  interrupted,
  pageChanged,
+  deferredToolsEnabled,
+  toolCatalog,
 }: BuildSystemPromptInput): string {
  // Persona precedence: role instructions REPLACE the admin persona / default.
  // effectivePersona = roleInstructions || adminPrompt || DEFAULT_PROMPT.
@@ -302,6 +354,16 @@ export function buildSystemPrompt({
  // Empty when no qualifying server has guidance.
  const mcpTooling = buildMcpToolingBlock(mcpInstructions);

+  // Deferred-tool catalog (#332). Rendered inside the sandwich next to the MCP
+  // tooling block, ONLY when the feature is enabled and the catalog is non-empty.
+  // Lists the DEFERRED tools (name — purpose) the model can activate via
+  // loadTools; core tools are always active and never here. Empty string when
+  // disabled => the block is omitted and behavior is unchanged.
+  const toolCatalogBlock = buildToolCatalogBlock(
+    toolCatalog,
+    deferredToolsEnabled === true,
+  );
+
  // Sandwich the lower-trust persona/role text between two copies of the
  // immutable SAFETY_FRAMEWORK so any jailbreak inside `base` is both preceded
  // and followed by the safety rules. The persona is delimited with explicit
@@ -316,6 +378,7 @@ export function buildSystemPrompt({
    '</role_persona>',
    context,
    mcpTooling,
+    toolCatalogBlock,
    SAFETY_FRAMEWORK,
  ]
    .filter((part) => part !== '')
@@ -53,6 +53,7 @@ describe('AiChatService.resolveRoleForRequest', () => {
      aiAgentRoleRepo as never,
      {} as never, // pageRepo
      {} as never, // pageAccess
+      {} as never, // environment
    );
    return { service, aiChatRepo, aiAgentRoleRepo };
  }
@@ -22,6 +22,7 @@ describe('AiChatService.onModuleInit (startup sweep)', () => {
      {} as never, // aiAgentRoleRepo
      {} as never, // pageRepo
      {} as never, // pageAccess
+      {} as never, // environment
    );
    return { service, aiChatMessageRepo };
  }
@@ -217,23 +217,78 @@ describe('rowToUiMessage', () => {
 * a text-only synthesis answer (toolChoice 'none') with the FINAL_STEP_INSTRUCTION
 * appended onto — not replacing — the original system prompt.
 */
+// Narrowing helpers for the prepareAgentStep union return type.
+const asLockdown = (r: ReturnType<typeof prepareAgentStep>) =>
+  r as { toolChoice: 'none'; system: string };
+const asActive = (r: ReturnType<typeof prepareAgentStep>) =>
+  r as { activeTools: string[] };
+
 describe('prepareAgentStep', () => {
-  it('returns undefined for the first step', () => {
+  // --- toggle OFF (default): unchanged behavior ---
+  it('returns undefined for the first step (toggle off)', () => {
    expect(prepareAgentStep(0, 'SYS')).toBeUndefined();
  });

-  it('returns undefined for a non-final step (just before the last)', () => {
+  it('returns undefined for a non-final step (toggle off)', () => {
    expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
  });

-  it('forces a text-only synthesis on the final allowed step', () => {
-    const result = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
+  it('forces a text-only synthesis on the final allowed step (toggle off)', () => {
+    const result = asLockdown(prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS'));
    expect(result).toBeDefined();
-    expect(result?.toolChoice).toBe('none');
+    expect(result.toolChoice).toBe('none');
    // The original persona is preserved (prefix), not replaced.
-    expect(result?.system.startsWith('SYS')).toBe(true);
+    expect(result.system.startsWith('SYS')).toBe(true);
    // The synthesis instruction is appended.
-    expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
+    expect(result.system).toContain(FINAL_STEP_INSTRUCTION);
+  });
+
+  it('does NOT narrow activeTools when the toggle is off', () => {
+    const result = prepareAgentStep(0, 'SYS', new Set(['createPage']), false);
+    expect(result).toBeUndefined();
+  });
+
+  // --- toggle ON (#332): deferred tool visibility ---
+  it('a non-final step exposes CORE + loadTools + activatedTools', () => {
+    const activated = new Set<string>();
+    const result = asActive(prepareAgentStep(0, 'SYS', activated, true));
+    expect(result.activeTools).toContain('searchPages'); // core
+    expect(result.activeTools).toContain('searchInPage'); // #330, core
+    expect(result.activeTools).toContain('editPageText'); // core
+    expect(result.activeTools).toContain('loadTools'); // meta-tool
+    // No deferred tool is active before it is loaded.
+    expect(result.activeTools).not.toContain('createPage');
+    expect(result.activeTools).not.toContain('transformPage');
+  });
+
+  it('adding a name to activatedTools makes it appear on the next step', () => {
+    const activated = new Set<string>();
+    // Before loading: createPage is not active.
+    expect(
+      asActive(prepareAgentStep(1, 'SYS', activated, true)).activeTools,
+    ).not.toContain('createPage');
+    // loadTools grows the SAME set…
+    activated.add('createPage');
+    // …so the next step sees it.
+    const next = asActive(prepareAgentStep(2, 'SYS', activated, true));
+    expect(next.activeTools).toContain('createPage');
+    expect(next.activeTools).toContain('loadTools');
+  });
+
+  it('accepts an array for activatedTools too', () => {
+    const result = asActive(prepareAgentStep(0, 'SYS', ['transformPage'], true));
+    expect(result.activeTools).toContain('transformPage');
+    expect(result.activeTools).toContain('loadTools');
+  });
+
+  it('final-step lockdown WINS even when the toggle is on', () => {
+    const result = asLockdown(
+      prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS', new Set(['createPage']), true),
+    );
+    // The lockdown shape (toolChoice none + synthesis) — not the activeTools shape.
+    expect(result.toolChoice).toBe('none');
+    expect(result.system).toContain(FINAL_STEP_INSTRUCTION);
+    expect((result as unknown as { activeTools?: string[] }).activeTools).toBeUndefined();
  });
 });

@@ -30,7 +30,15 @@ import {
 } from '@docmost/db/types/entity.types';
 import { AiChatToolsService } from './tools/ai-chat-tools.service';
 import { McpClientsService } from './external-mcp/mcp-clients.service';
+import { EnvironmentService } from '../../integrations/environment/environment.service';
 import { buildSystemPrompt } from './ai-chat.prompt';
+import {
+  CORE_TOOL_KEYS,
+  CORE_TOOL_SET,
+  LOAD_TOOLS_NAME,
+  makeLoadToolsTool,
+  buildExternalToolCatalog,
+} from './tools/tool-tiers';
 import { computePageChange } from './page-change/page-change.util';
 import { roleModelOverride } from './roles/role-model-config';
 import {
@@ -54,24 +62,52 @@ const FINAL_STEP_INSTRUCTION =
  'language. If the information is incomplete, say so explicitly: summarize ' +
  'what you found, what is still missing, and give your best partial conclusion.';

-// Pure, unit-testable: decide per-step overrides. Returns undefined for normal
-// steps; on the final allowed step forces a text-only synthesis answer.
+// Pure, unit-testable: decide per-step overrides. Two responsibilities:
+//   1. Final-step lockdown (always): on the final allowed step force a text-only
+//      synthesis answer (toolChoice 'none' + FINAL_STEP_INSTRUCTION). This WINS —
+//      it takes precedence over the deferred-tool narrowing below.
+//   2. Deferred tool visibility (#332): when `deferredEnabled` and NOT the final
+//      step, expose only the CORE tools + loadTools + whatever loadTools has
+//      activated so far this turn (`activatedTools`), via `activeTools`. Deferred
+//      tools stay in the <tool_catalog> until the model loads them.
+// When `deferredEnabled` is false the behavior is unchanged: undefined on normal
+// steps (all tools active), lockdown on the final step.
+//
 // `system` is the in-scope system prompt; we CONCATENATE so the original
 // persona/context is preserved — a bare `system` override would REPLACE the
-// whole system prompt for the step.
+// whole system prompt for the step. `activatedTools` is PER-TURN mutable state
+// owned by the streaming loop (a closure Set grown by loadTools); it is passed
+// in (not module-global, not persisted) so this stays a pure function of its
+// arguments.
 //
 // NOTE: at AI SDK v7 the per-step `system` field is renamed to `instructions`.
 // On v6 (`^6.0.134`) `system` is the correct field — adjust when bumping.
 export function prepareAgentStep(
  stepNumber: number,
  system: string,
-): { toolChoice: 'none'; system: string } | undefined {
+  activatedTools: ReadonlySet<string> | readonly string[] = [],
+  deferredEnabled = false,
+):
+  | { toolChoice: 'none'; system: string }
+  | { activeTools: string[] }
+  | undefined {
+  // Final-step lockdown WINS (applies regardless of the deferred toggle).
  if (stepNumber >= MAX_AGENT_STEPS - 1) {
    return {
      toolChoice: 'none',
      system: `${system}\n\n${FINAL_STEP_INSTRUCTION}`,
    };
  }
+  // Deferred tool loading: narrow this step's visible tools to CORE + loadTools
+  // + the tools already activated this turn.
+  if (deferredEnabled) {
+    const activated = Array.isArray(activatedTools)
+      ? activatedTools
+      : [...activatedTools];
+    return {
+      activeTools: [...CORE_TOOL_KEYS, LOAD_TOOLS_NAME, ...activated],
+    };
+  }
  return undefined;
 }

@@ -206,6 +242,9 @@ export class AiChatService implements OnModuleInit {
    private readonly aiAgentRoleRepo: AiAgentRoleRepo,
    private readonly pageRepo: PageRepo,
    private readonly pageAccess: PageAccessService,
+    // Reads the AI_CHAT_DEFERRED_TOOLS toggle (#332). Injected last so existing
+    // positional constructor callers (tests) only append one stub.
+    private readonly environment: EnvironmentService,
  ) {}

  /**
@@ -625,9 +664,25 @@ export class AiChatService implements OnModuleInit {
    // Build the system prompt + Docmost toolset. If either throws after the
    // external MCP lease was taken above, release the lease before rethrowing so
    // the leased transports are not leaked (#185 review).
+    // Deferred tool loading toggle (#332). When ON, the model sees a compact
+    // <tool_catalog> and only CORE tools + loadTools are active each step; other
+    // tools (fat/rare in-app tools + ALL external MCP tools) load on demand. When
+    // OFF, every tool is active and nothing below changes.
+    const deferredEnabled = this.environment.isAiChatDeferredToolsEnabled();
+
    let system: string;
    let docmostTools: Awaited<ReturnType<AiChatToolsService['forUser']>>;
    try {
+      // Assemble the deferred catalog for the system prompt: hand-written lines
+      // for the in-app deferred tools + a derived line for each external MCP tool
+      // (also deferred by default). Only built when the feature is enabled.
+      const toolCatalog = deferredEnabled
+        ? [
+            ...(await this.tools.getInAppDeferredCatalog()),
+            ...buildExternalToolCatalog(external.tools),
+          ]
+        : [];
+
      system = buildSystemPrompt({
        workspace,
        adminPrompt: resolved?.systemPrompt,
@@ -644,6 +699,10 @@ export class AiChatService implements OnModuleInit {
        // Detected between-turns human edit to the open page (#274): adds the
        // page_changed note + unified diff so the agent doesn't overwrite it.
        pageChanged,
+        // Deferred tool loading (#332): renders the <tool_catalog> block (only
+        // when enabled + non-empty) so the model can activate deferred tools.
+        deferredToolsEnabled: deferredEnabled,
+        toolCatalog,
      });

      // Pass the resolved chatId so the write tools can mint provenance tokens
@@ -664,7 +723,31 @@ export class AiChatService implements OnModuleInit {
      throw err;
    }

-    const tools = { ...external.tools, ...docmostTools };
+    // Base toolset: external MCP tools + Docmost in-app tools (Docmost wins on a
+    // name clash — external are namespaced, so no clash is expected).
+    const baseTools = { ...external.tools, ...docmostTools };
+
+    // Deferred tool loading state (#332), scoped to THIS streaming loop:
+    //  - `activatedTools` is per-TURN mutable state — a fresh closure Set created
+    //    per streamText call, NOT module-global and NOT persisted, so a new turn
+    //    starts cold. loadTools.execute adds to it; prepareAgentStep reads it to
+    //    widen `activeTools` on the NEXT step.
+    //  - `validDeferredNames` = every tool that is NOT core (the in-app deferred
+    //    tools + ALL external MCP tools), computed from the ACTUAL toolset so an
+    //    external tool is loadable by its namespaced name. loadTools rejects any
+    //    name outside this set.
+    const activatedTools = new Set<string>();
+    const validDeferredNames = new Set<string>(
+      Object.keys(baseTools).filter((k) => !CORE_TOOL_SET.has(k)),
+    );
+    // Add the loadTools meta-tool ONLY when the feature is enabled; when off the
+    // toolset and behavior are exactly as before.
+    const tools = deferredEnabled
+      ? {
+          ...baseTools,
+          [LOAD_TOOLS_NAME]: makeLoadToolsTool(activatedTools, validDeferredNames),
+        }
+      : baseTools;

    // Accumulate the turn's streamed output so a provider error / disconnect can
    // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
@@ -799,7 +882,8 @@ export class AiChatService implements OnModuleInit {
        // ends with no assistant text (an empty turn). prepareAgentStep forbids
        // further tool calls and appends a synthesis instruction on that step,
        // concatenated onto the original `system` so the persona is preserved.
-        prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
+        prepareStep: ({ stepNumber }) =>
+          prepareAgentStep(stepNumber, system, activatedTools, deferredEnabled),
        abortSignal: signal,
        onChunk: ({ chunk }) => {
          // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
@@ -17,6 +17,10 @@ import { resolveCurrentPageResult } from './current-page.util';
 import { parseNodeArg } from './parse-node-arg';
 import { modelFriendlyInput } from './model-friendly-input';
 import { SandboxStore } from '../../../integrations/sandbox/sandbox.store';
+import {
+  buildInAppDeferredCatalog,
+  type ToolCatalogEntry,
+} from './tool-tiers';

 /**
 * Per-user, per-request adapter that exposes Docmost READ operations to the
@@ -123,6 +127,18 @@ export class AiChatToolsService {
    return client.exportPageMarkdown(pageId);
  }

+  /**
+   * Build the IN-APP deferred <tool_catalog> entries (#332): one "name — purpose"
+   * line per DEFERRED tool, merging the per-layer INLINE_TOOL_TIERS with the
+   * shared registry's own catalogLine. Loads @docmost/mcp for the shared specs
+   * (memoized). Core tools are always active and are NOT listed here. External
+   * MCP tools are catalogued separately by the caller (they are runtime-scoped).
+   */
+  async getInAppDeferredCatalog(): Promise<ToolCatalogEntry[]> {
+    const { sharedToolSpecs } = await loadDocmostMcp();
+    return buildInAppDeferredCatalog(sharedToolSpecs);
+  }
+
  async forUser(
    user: User,
    sessionId: string,
@@ -241,6 +241,11 @@ export interface SharedToolSpec {
  mcpName: string;
  inAppKey: string;
  description: string;
+  // Deferred-tool metadata (#332). Optional in this mirror so an older/stale
+  // @docmost/mcp build (pre-#332) still type-checks; the in-app catalog builder
+  // reads them defensively. The external /mcp server ignores both fields.
+  tier?: 'core' | 'deferred';
+  catalogLine?: string;
  // Loose `z` on purpose: the registry is zod-agnostic so the server can pass
  // its own zod (v4) and the MCP package its own (v3) into the same builder.
  buildShape?: (z: any) => Record<string, unknown>;
@@ -0,0 +1,244 @@
+import {
+  CORE_TOOL_KEYS,
+  CORE_TOOL_SET,
+  LOAD_TOOLS_NAME,
+  LOAD_TOOLS_DESCRIPTION,
+  INLINE_TOOL_TIERS,
+  buildInAppDeferredCatalog,
+  buildExternalToolCatalog,
+  shortenForCatalog,
+  applyLoadTools,
+} from './tool-tiers';
+// The real shared registry, imported from source (same approach as the
+// SHARED_TOOL_SPECS contract spec) so the tier metadata is checked against
+// exactly what @docmost/mcp ships.
+import { SHARED_TOOL_SPECS } from '../../../../../../packages/mcp/src/tool-specs';
+// For the live-toolset partition test (F3): the REAL adapter, so the catalog is
+// checked against the tools AiChatToolsService.forUser() actually builds — not a
+// static list that could drift from it.
+import { AiChatToolsService } from './ai-chat-tools.service';
+import * as loader from './docmost-client.loader';
+import type { DocmostClientLike } from './docmost-client.loader';
+
+/**
+ * #332 deferred tool loading — tier metadata, catalog assembly, and the
+ * loadTools meta-tool. Pure units; no Nest graph, no @docmost/mcp build (the
+ * registry is imported from TS source).
+ */
+
+describe('tool tier metadata (#332)', () => {
+  it('core set is the documented 13 + searchInPage (14)', () => {
+    expect(CORE_TOOL_KEYS).toHaveLength(14);
+    expect(CORE_TOOL_SET.has('searchInPage')).toBe(true); // #330, promoted to core
+    // loadTools is a meta-tool, not a normal core key.
+    expect(CORE_TOOL_SET.has(LOAD_TOOLS_NAME)).toBe(false);
+  });
+
+  it('SHARED_TOOL_SPECS tier agrees with CORE_TOOL_SET for every shared tool', () => {
+    for (const [key, spec] of Object.entries(SHARED_TOOL_SPECS)) {
+      const isCoreByTier = spec.tier === 'core';
+      const isCoreByList = CORE_TOOL_SET.has(key);
+      expect(isCoreByTier).toBe(isCoreByList);
+      // Every spec carries a non-empty catalogLine (core tools too).
+      expect(typeof spec.catalogLine).toBe('string');
+      expect(spec.catalogLine.trim().length).toBeGreaterThan(0);
+    }
+  });
+
+  it('every INLINE tool tier agrees with CORE_TOOL_SET and has a catalogLine', () => {
+    for (const [key, meta] of Object.entries(INLINE_TOOL_TIERS)) {
+      expect(meta.tier === 'core').toBe(CORE_TOOL_SET.has(key));
+      expect(meta.catalogLine.trim().length).toBeGreaterThan(0);
+    }
+  });
+});
+
+describe('buildInAppDeferredCatalog (#332)', () => {
+  const catalog = buildInAppDeferredCatalog(SHARED_TOOL_SPECS as never);
+  const names = catalog.map((e) => e.name);
+
+  it('includes deferred tools from BOTH the inline map and the shared registry', () => {
+    expect(names).toContain('transformPage'); // inline deferred
+    expect(names).toContain('getPageJson'); // shared deferred
+    expect(names).toContain('patchNode'); // shared deferred
+    expect(names).toContain('createPage'); // inline deferred
+  });
+
+  it('NEVER lists a core tool', () => {
+    for (const core of CORE_TOOL_KEYS) {
+      expect(names).not.toContain(core);
+    }
+    // spot-check a couple that are core in each source.
+    expect(names).not.toContain('searchInPage'); // shared core
+    expect(names).not.toContain('searchPages'); // inline core
+    expect(names).not.toContain('editPageText'); // shared core
+  });
+
+  it('renders every entry as a "name — purpose" line', () => {
+    // Non-empty catalog (the length is pinned structurally by the live-toolset
+    // partition test below, not by a magic constant that rots on every new tool).
+    expect(catalog.length).toBeGreaterThan(0);
+    for (const entry of catalog) {
+      expect(entry.catalogLine).toMatch(/ — /);
+    }
+  });
+});
+
+/**
+ * F3 — the deferred <tool_catalog> is built from STATIC metadata (INLINE_TOOL_TIERS
+ * + SHARED_TOOL_SPECS), but the loadable-by-name set is derived at RUNTIME from the
+ * actual toolset (`Object.keys(baseTools)` in ai-chat.service.ts). Those two must
+ * agree or a tool becomes loadable-but-invisible (agent thinks it doesn't exist) or
+ * catalogued-but-phantom. INLINE_TOOL_TIERS is a plain hand-maintained Record with
+ * no compile-time link to the tools AiChatToolsService.forUser() builds, so nothing
+ * else catches that drift. This test uses forUser()'s LIVE keys as the source of
+ * truth (mirroring ai-chat-tools.service.spec.ts's loader mock) and asserts a
+ * two-way partition against buildInAppDeferredCatalog — replacing the old magic
+ * toHaveLength(28), so a tool added to forUser() without a catalog line (or a
+ * catalog line without a real tool) fails the suite instead of silently vanishing.
+ */
+describe('deferred catalog ↔ live forUser() toolset partition (#332, F3)', () => {
+  let toolKeys: string[];
+  const catalogNames = buildInAppDeferredCatalog(SHARED_TOOL_SPECS as never).map(
+    (e) => e.name,
+  );
+
+  beforeAll(async () => {
+    // Intercept the ESM loader so forUser() builds against the TS-source shared
+    // specs (no @docmost/mcp build) and never touches the network.
+    jest.spyOn(loader, 'loadDocmostMcp').mockResolvedValue({
+      DocmostClient: function () {
+        return {} as DocmostClientLike;
+      } as unknown as loader.DocmostClientCtor,
+      sharedToolSpecs: SHARED_TOOL_SPECS as Record<string, loader.SharedToolSpec>,
+    });
+    const service = new AiChatToolsService(
+      {
+        generateAccessToken: jest.fn().mockResolvedValue('access-token'),
+        generateCollabToken: jest.fn().mockResolvedValue('collab-token'),
+      } as never,
+      {} as never, // aiService — not exercised while merely BUILDING the tools
+      {} as never, // pageEmbeddingRepo
+      {} as never, // spaceMemberRepo
+      {} as never, // pagePermissionRepo
+      // sandboxStore: forUser() eagerly calls asSink() to wire the stash tool.
+      {
+        asSink: () => ({ put: jest.fn(), has: jest.fn(), evict: jest.fn() }),
+      } as never,
+    );
+    const tools = await service.forUser(
+      { id: 'user-1', email: 'u@example.com', workspaceId: 'ws-1' } as never,
+      'session-1',
+      'ws-1',
+      'chat-1',
+    );
+    toolKeys = Object.keys(tools);
+  });
+
+  afterAll(() => {
+    jest.restoreAllMocks();
+  });
+
+  it('exposes a non-trivial toolset (sanity: the mock actually built tools)', () => {
+    expect(toolKeys.length).toBeGreaterThan(20);
+  });
+
+  it('every non-core live tool is present in the catalog (no capability silently hidden)', () => {
+    // forUser() does not itself add loadTools (ai-chat.service does), but guard
+    // anyway. Every remaining non-core key MUST have a catalog line.
+    const catalogSet = new Set(catalogNames);
+    const missing = toolKeys.filter(
+      (k) => !CORE_TOOL_SET.has(k) && k !== LOAD_TOOLS_NAME && !catalogSet.has(k),
+    );
+    expect(missing).toEqual([]);
+  });
+
+  it('every catalog entry corresponds to a real, non-core live tool (no phantom)', () => {
+    const liveSet = new Set(toolKeys);
+    const phantom = catalogNames.filter(
+      (n) => !liveSet.has(n) || CORE_TOOL_SET.has(n),
+    );
+    expect(phantom).toEqual([]);
+  });
+});
+
+describe('buildExternalToolCatalog + shortenForCatalog (#332)', () => {
+  it('derives a short "name — purpose" line from each external tool description', () => {
+    const catalog = buildExternalToolCatalog({
+      tavily_search: { description: 'Search the web for fresh results. More detail here.' },
+      tavily_extract: { description: '' },
+    });
+    expect(catalog).toEqual([
+      { name: 'tavily_search', catalogLine: 'tavily_search — Search the web for fresh results.' },
+      { name: 'tavily_extract', catalogLine: 'tavily_extract — external tool' },
+    ]);
+  });
+
+  it('caps a very long description', () => {
+    const long = 'x'.repeat(500);
+    expect(shortenForCatalog(long).length).toBeLessThanOrEqual(140);
+    expect(shortenForCatalog(long).endsWith('…')).toBe(true);
+  });
+});
+
+describe('applyLoadTools (#332)', () => {
+  const valid = new Set(['createPage', 'transformPage', 'tavily_search']);
+
+  it('adds valid names to the activated set and returns { loaded }', () => {
+    const activated = new Set<string>();
+    const result = applyLoadTools(['createPage', 'tavily_search'], activated, valid);
+    expect(result).toEqual({ loaded: ['createPage', 'tavily_search'] });
+    expect(activated.has('createPage')).toBe(true);
+    expect(activated.has('tavily_search')).toBe(true);
+  });
+
+  it('rejects an unknown name with an error listing the valid deferred names', () => {
+    const activated = new Set<string>();
+    expect(() => applyLoadTools(['nope'], activated, valid)).toThrow(/unknown tool name/i);
+    try {
+      applyLoadTools(['nope'], activated, valid);
+    } catch (e) {
+      const msg = (e as Error).message;
+      // Lists every valid name (sorted).
+      expect(msg).toContain('createPage');
+      expect(msg).toContain('transformPage');
+      expect(msg).toContain('tavily_search');
+    }
+    // Nothing is activated on a rejected call.
+    expect(activated.size).toBe(0);
+  });
+
+  it('tolerates a non-array / empty input (loads nothing)', () => {
+    const activated = new Set<string>();
+    expect(applyLoadTools(undefined, activated, valid)).toEqual({ loaded: [] });
+    expect(applyLoadTools([], activated, valid)).toEqual({ loaded: [] });
+    expect(activated.size).toBe(0);
+  });
+
+  it('loadTools description is the verbatim issue text', () => {
+    expect(LOAD_TOOLS_DESCRIPTION).toContain('only ACTIVATES them');
+    expect(LOAD_TOOLS_DESCRIPTION).toContain('callable on your NEXT step');
+  });
+});
+
+describe('editorial "Corrector" scenario is fully served by CORE (#332)', () => {
+  it('read + comment + edit + search need no loadTools', () => {
+    // A Corrector role reads a page, searches within it, edits text, and leaves
+    // inline comments — every tool it needs is core, so it never has to load a
+    // deferred tool.
+    const needed = [
+      'getCurrentPage',
+      'getPage',
+      'searchPages',
+      'searchInPage',
+      'editPageText',
+      'createComment',
+      'listComments',
+      'getComment',
+      'resolveComment',
+    ];
+    for (const t of needed) {
+      expect(CORE_TOOL_SET.has(t)).toBe(true);
+    }
+  });
+});
@@ -0,0 +1,309 @@
+import { tool, type Tool } from 'ai';
+import { z } from 'zod';
+import type { SharedToolSpec } from './docmost-client.loader';
+
+/**
+ * Deferred tool loading for the in-app AI chat (#332).
+ *
+ * The agent otherwise sends ALL ~41 tool definitions on EVERY model call every
+ * step, bloating context. Instead we split the in-app tools into two tiers:
+ *
+ *  - CORE (hot, always active): frequent OR tiny tools whose full schema is
+ *    always visible, plus the `loadTools` meta-tool. Deferring a one-line tool is
+ *    pure loss, so tiny tools stay core even if rare.
+ *  - DEFERRED (loaded on demand): the fat/rare tools + ALL external MCP tools by
+ *    default. The model sees only a compact <tool_catalog> (name — purpose) and
+ *    calls `loadTools(names)` to ACTIVATE a tool's full schema for the NEXT step
+ *    (one extra round-trip on first use).
+ *
+ * This module is the single source of truth for the IN-APP tiering:
+ *  - CORE_TOOL_KEYS / CORE_TOOL_SET — the authoritative core list (used by
+ *    prepareAgentStep to build per-step `activeTools`).
+ *  - INLINE_TOOL_TIERS — tier + catalogLine for the per-layer INLINE tools (the
+ *    ones NOT in @docmost/mcp's SHARED_TOOL_SPECS, which carry their own).
+ *  - buildInAppDeferredCatalog / buildExternalToolCatalog — assemble the
+ *    <tool_catalog> deferred lines.
+ *  - applyLoadTools / makeLoadToolsTool — the loadTools meta-tool.
+ *
+ * The tier/catalogLine fields on SHARED_TOOL_SPECS are IN-APP metadata only; the
+ * external /mcp server ignores them and exposes every tool normally.
+ */
+
+/** A single rendered <tool_catalog> line: the tool name + its "name — purpose". */
+export interface ToolCatalogEntry {
+  /** Exact tool name the model must pass to loadTools. */
+  name: string;
+  /** Hand-written (in-app) or derived (external) "name — purpose" line. */
+  catalogLine: string;
+}
+
+/**
+ * CORE (always-active) in-app tool keys — 13 frequent/tiny tools. `searchInPage`
+ * (#330) is added to core on top of the issue's original tier list: it is
+ * frequent for the editorial roles this feature targets. `loadTools` is active
+ * too but is not a normal tool key (it is added to activeTools separately).
+ */
+export const CORE_TOOL_KEYS = [
+  'searchPages',
+  'listPages',
+  'listSpaces',
+  'getWorkspace',
+  'getCurrentPage',
+  'getPage',
+  'getOutline',
+  'getNode',
+  'createComment',
+  'getComment',
+  'listComments',
+  'resolveComment',
+  'editPageText',
+  // #330 search_in_page — frequent for editorial sweeps; core despite predating
+  // the issue's tier list.
+  'searchInPage',
+] as const;
+
+/** O(1) membership test for the core tier. */
+export const CORE_TOOL_SET: ReadonlySet<string> = new Set(CORE_TOOL_KEYS);
+
+/** The meta-tool name (always active alongside the core tools when enabled). */
+export const LOAD_TOOLS_NAME = 'loadTools';
+
+/**
+ * loadTools description — VERBATIM from issue #332. Tells the model that the
+ * catalog names EXIST, that loadTools only ACTIVATES them (callable next step),
+ * and to load several at once.
+ */
+export const LOAD_TOOLS_DESCRIPTION =
+  'loadTools — Load the full definitions of deferred tools from the <tool_catalog>\n' +
+  'block in your instructions. Pass the EXACT tool names from the catalog; this\n' +
+  'call only ACTIVATES them and returns { loaded: [...] } — the tools become\n' +
+  'callable on your NEXT step. Load several names in one call when the task clearly\n' +
+  'needs them. Unknown names are rejected with the list of valid ones.';
+
+/**
+ * Tier + catalogLine for the INLINE ai-chat tools — those defined per-layer in
+ * ai-chat-tools.service.ts and NOT present in @docmost/mcp's SHARED_TOOL_SPECS
+ * (which carries its own tier/catalogLine). Together with the shared registry
+ * this describes every in-app tool. catalogLine is present for core tools too
+ * (uniformity), but only DEFERRED tools are rendered into the catalog.
+ */
+export const INLINE_TOOL_TIERS: Record<
+  string,
+  { tier: 'core' | 'deferred'; catalogLine: string }
+> = {
+  // --- core inline ---
+  searchPages: {
+    tier: 'core',
+    catalogLine: 'searchPages — hybrid semantic + keyword search across the wiki.',
+  },
+  getCurrentPage: {
+    tier: 'core',
+    catalogLine: 'getCurrentPage — the page the user is currently viewing.',
+  },
+  getPage: {
+    tier: 'core',
+    catalogLine: 'getPage — fetch a page as Markdown by its id.',
+  },
+  listPages: {
+    tier: 'core',
+    catalogLine: "listPages — list recent pages, or a space's full page tree.",
+  },
+  listComments: {
+    tier: 'core',
+    catalogLine: 'listComments — list all comments on a page (including resolved).',
+  },
+  getComment: {
+    tier: 'core',
+    catalogLine: 'getComment — fetch a single comment by id.',
+  },
+  createComment: {
+    tier: 'core',
+    catalogLine:
+      'createComment — add an inline comment (optionally with a suggested edit).',
+  },
+  resolveComment: {
+    tier: 'core',
+    catalogLine: 'resolveComment — resolve or reopen a comment thread.',
+  },
+
+  // --- deferred inline ---
+  createPage: {
+    tier: 'deferred',
+    catalogLine: 'createPage — create a new page with a Markdown body in a space.',
+  },
+  updatePageContent: {
+    tier: 'deferred',
+    catalogLine:
+      "updatePageContent — replace a page's body (and optionally title) with new Markdown.",
+  },
+  renamePage: {
+    tier: 'deferred',
+    catalogLine: "renamePage — change a page's title only (body untouched).",
+  },
+  movePage: {
+    tier: 'deferred',
+    catalogLine: 'movePage — move a page under a new parent or to the space root.',
+  },
+  deletePage: {
+    tier: 'deferred',
+    catalogLine: 'deletePage — move a page to trash (soft delete, reversible).',
+  },
+  listSidebarPages: {
+    tier: 'deferred',
+    catalogLine:
+      "listSidebarPages — list a space's root pages or a page's direct children.",
+  },
+  getTable: {
+    tier: 'deferred',
+    catalogLine: 'getTable — read a table as a matrix of cell texts and cell ids.',
+  },
+  checkNewComments: {
+    tier: 'deferred',
+    catalogLine:
+      'checkNewComments — find comments in a space created after a timestamp.',
+  },
+  getPageHistory: {
+    tier: 'deferred',
+    catalogLine:
+      'getPageHistory — fetch one page-history version with its ProseMirror content.',
+  },
+  exportPageMarkdown: {
+    tier: 'deferred',
+    catalogLine:
+      'exportPageMarkdown — export a page to self-contained Markdown (body + comments).',
+  },
+  updatePageJson: {
+    tier: 'deferred',
+    catalogLine:
+      "updatePageJson — overwrite a page's body with a full ProseMirror document.",
+  },
+  tableInsertRow: {
+    tier: 'deferred',
+    catalogLine: 'tableInsertRow — insert a row of plain-text cells into a table.',
+  },
+  tableDeleteRow: {
+    tier: 'deferred',
+    catalogLine: 'tableDeleteRow — delete a table row at a 0-based index.',
+  },
+  tableUpdateCell: {
+    tier: 'deferred',
+    catalogLine: 'tableUpdateCell — set the text of a table cell at [row, col].',
+  },
+  sharePage: {
+    tier: 'deferred',
+    catalogLine: 'sharePage — make a page publicly accessible and return its URL.',
+  },
+  transformPage: {
+    tier: 'deferred',
+    catalogLine: "transformPage — run a sandboxed JS transform over a page's document.",
+  },
+};
+
+/**
+ * Build the <tool_catalog> deferred lines for the IN-APP tools by merging the
+ * two metadata sources: the per-layer INLINE_TOOL_TIERS and the shared registry
+ * (SHARED_TOOL_SPECS, loaded at runtime). Only DEFERRED tools are included; core
+ * tools are always active and never appear in the catalog. Pure — the caller
+ * passes the loaded specs so this stays unit-testable.
+ */
+export function buildInAppDeferredCatalog(
+  sharedToolSpecs: Record<string, SharedToolSpec>,
+): ToolCatalogEntry[] {
+  const entries: ToolCatalogEntry[] = [];
+  // Inline deferred tools (hand-written lines).
+  for (const [name, meta] of Object.entries(INLINE_TOOL_TIERS)) {
+    if (meta.tier === 'deferred') {
+      entries.push({ name, catalogLine: meta.catalogLine });
+    }
+  }
+  // Shared deferred tools (line comes from the registry's own catalogLine).
+  for (const [name, spec] of Object.entries(sharedToolSpecs)) {
+    if (spec.tier === 'deferred' && spec.catalogLine) {
+      entries.push({ name, catalogLine: spec.catalogLine });
+    }
+  }
+  return entries;
+}
+
+/**
+ * Cap an external tool's (untrusted) description into a short catalog purpose.
+ * External MCP tools have no hand-written catalogLine, so we derive one from the
+ * first sentence of the description, hard-capped. Whitespace is collapsed.
+ */
+export function shortenForCatalog(description: string, max = 140): string {
+  const flat = description.replace(/\s+/g, ' ').trim();
+  if (!flat) return 'external tool';
+  // Prefer the first sentence if it is reasonably short.
+  const firstSentence = flat.split(/(?<=[.!?])\s/)[0];
+  const base =
+    firstSentence.length > 0 && firstSentence.length <= max
+      ? firstSentence
+      : flat;
+  return base.length > max ? `${base.slice(0, max - 1).trimEnd()}…` : base;
+}
+
+/**
+ * Build catalog lines for the EXTERNAL MCP tools (all deferred by default,
+ * #332). Their names are the namespaced tool keys; the purpose is derived from
+ * each tool's own description (no hand-written line exists). Pure.
+ */
+export function buildExternalToolCatalog(
+  externalTools: Record<string, { description?: string } | undefined>,
+): ToolCatalogEntry[] {
+  return Object.entries(externalTools).map(([name, t]) => ({
+    name,
+    catalogLine: `${name} — ${shortenForCatalog(t?.description ?? '')}`,
+  }));
+}
+
+/**
+ * Pure core of the loadTools meta-tool. Validates the requested names against
+ * the per-turn set of valid deferred names, ADDS the valid ones to the caller's
+ * mutable `activatedTools` set (so they become callable next step), and returns
+ * `{ loaded }`. An unknown name throws a clear error listing the valid deferred
+ * names — surfaced to the model as a tool error so it can retry.
+ */
+export function applyLoadTools(
+  names: unknown,
+  activatedTools: Set<string>,
+  validDeferredNames: ReadonlySet<string>,
+): { loaded: string[] } {
+  const requested = Array.isArray(names)
+    ? names.filter((n): n is string => typeof n === 'string')
+    : [];
+  const unknown = requested.filter((n) => !validDeferredNames.has(n));
+  if (unknown.length > 0) {
+    const valid = [...validDeferredNames].sort().join(', ');
+    throw new Error(
+      `loadTools: unknown tool name(s): ${unknown.join(', ')}. ` +
+        `Valid deferred tools are: ${valid || '(none)'}.`,
+    );
+  }
+  for (const n of requested) activatedTools.add(n);
+  return { loaded: requested };
+}
+
+/**
+ * Build the loadTools AI-SDK tool bound to THIS turn's mutable state: the
+ * `activatedTools` set (grown by execute, read by prepareAgentStep next step)
+ * and the `validDeferredNames` set (every non-core tool in this turn's toolset,
+ * incl. external MCP). Created per streamText call — never module-global.
+ */
+export function makeLoadToolsTool(
+  activatedTools: Set<string>,
+  validDeferredNames: ReadonlySet<string>,
+): Tool {
+  return tool({
+    description: LOAD_TOOLS_DESCRIPTION,
+    inputSchema: z.object({
+      names: z
+        .array(z.string())
+        .describe(
+          'EXACT deferred tool names from the <tool_catalog> to activate for ' +
+            'your next step.',
+        ),
+    }),
+    execute: async ({ names }) =>
+      applyLoadTools(names, activatedTools, validDeferredNames),
+  });
+}
@@ -261,6 +261,21 @@ export class EnvironmentService {
    return disable === 'true';
  }

+  /**
+   * Deferred tool loading for the in-app AI chat (#332). When enabled, the agent
+   * sees a compact <tool_catalog> and only CORE tools + the loadTools meta-tool
+   * are active each step; deferred tools (the fat/rare ones + all external MCP
+   * tools) load on demand. Defaults to ENABLED — the issue treats deferred
+   * loading as the new behavior; set AI_CHAT_DEFERRED_TOOLS=false to restore the
+   * old "all tools always active" behavior.
+   */
+  isAiChatDeferredToolsEnabled(): boolean {
+    const enabled = this.configService
+      .get<string>('AI_CHAT_DEFERRED_TOOLS', 'true')
+      .toLowerCase();
+    return enabled === 'true';
+  }
+
  getPostHogHost(): string {
    return this.configService.get<string>('POSTHOG_HOST');
  }
@@ -1,5 +1,7 @@
 import * as http from 'node:http';
 import { Kysely } from 'kysely';
+import { tool } from 'ai';
+import { z } from 'zod';
 import { MockLanguageModelV3, convertArrayToReadableStream } from 'ai/test';
 import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
 import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
@@ -146,6 +148,9 @@ describe('AiChatService.stream [integration]', () => {
      {} as any, // aiAgentRoleRepo (role is pre-resolved + passed in)
      {} as any, // pageRepo (only used when body.openPage is set)
      {} as any, // pageAccess (idem)
+      // environment (#332): keep deferred tool loading OFF for this lifecycle
+      // harness so the toolset/behavior is exactly as before.
+      { isAiChatDeferredToolsEnabled: () => false } as any,
    );
  }

@@ -315,4 +320,174 @@ describe('AiChatService.stream [integration]', () => {
      true,
    );
  });
+
+  /**
+   * #332 deferred tool loading, the ON path. The riskiest property is that the
+   * per-turn `activatedTools` Set is created FRESH inside each stream() call, so a
+   * tool a previous turn activated via loadTools is NOT still active when the next
+   * turn starts — the new turn begins "cold" (CORE + loadTools only). The unit
+   * tests only exercise pure prepareAgentStep with hand-fed Sets; this pins the
+   * real wiring end-to-end (loadTools.execute -> activatedTools -> prepareStep ->
+   * per-step activeTools) against the real streamText loop, and proves there is no
+   * cross-turn leak. We drive a MockLanguageModelV3 whose step 1 calls
+   * loadTools(['createPage']) and assert, via the model's recorded per-step
+   * CallOptions.tools (the AI SDK filters the provider tool list by activeTools),
+   * that the deferred tool becomes active on the SAME turn's next step but NOT on a
+   * fresh turn's first step.
+   */
+  describe('deferred tool loading ON — per-turn activation, no leak (#332)', () => {
+    // A stub deferred (non-core) tool the agent can activate. Its execute is never
+    // called — the model only needs to SEE it become active — but it must be a
+    // valid AI-SDK tool so the SDK includes it in a step's tool list once active.
+    const createPageStub = tool({
+      description: 'create a new page',
+      inputSchema: z.object({ title: z.string() }),
+      execute: async () => ({ id: 'p-stub' }),
+    });
+
+    // A CORE tool in the toolset, so a cold step shows CORE tools ARE active while
+    // the deferred createPage is not. `searchPages` is in CORE_TOOL_SET.
+    const searchPagesStub = tool({
+      description: 'search the wiki',
+      inputSchema: z.object({ query: z.string() }),
+      execute: async () => [],
+    });
+
+    // Same lifecycle harness as buildService() above, but with deferred loading ON
+    // and a toolset that exposes exactly one deferred tool (createPage) so it is
+    // catalogued + loadable-by-name. Kept separate so the OFF scenarios are
+    // untouched.
+    function buildDeferredService(): AiChatService {
+      return new AiChatService(
+        { getChatModel: async () => null } as any,
+        aiChatRepo,
+        msgRepo,
+        {} as any,
+        { resolve: async () => null } as any,
+        {
+          forUser: async () => ({
+            searchPages: searchPagesStub,
+            createPage: createPageStub,
+          }),
+          getInAppDeferredCatalog: async () => [
+            { name: 'createPage', catalogLine: 'createPage — create a new page.' },
+          ],
+        } as any,
+        mcpClients as any,
+        {} as any,
+        {} as any,
+        {} as any,
+        // #332: deferred tool loading ON — the property under test.
+        { isAiChatDeferredToolsEnabled: () => true } as any,
+      );
+    }
+
+    // Drive ONE stream() turn against `model` and wait for the assistant row to
+    // settle (mirrors runStream, but builds the deferred-ON service).
+    async function runDeferredTurn(
+      model: MockLanguageModelV3,
+      chatId: string,
+      body: any,
+    ): Promise<void> {
+      closeCalls = 0;
+      const service = buildDeferredService();
+      const { res, cleanup } = await makeRealResponse();
+      try {
+        await service.stream({
+          user: { id: userId, workspaceId } as any,
+          workspace: { id: workspaceId, name: 'WS' } as any,
+          sessionId: 'sess-1',
+          body,
+          res: { raw: res } as any,
+          signal: new AbortController().signal,
+          model: model as any,
+          role: null,
+        } as any);
+        await waitFor(async () => {
+          const rows = await msgRepo.findAllByChat(chatId, workspaceId);
+          return rows.some(
+            (r) =>
+              r.role === 'assistant' &&
+              ['completed', 'error', 'aborted'].includes(r.status as string),
+          );
+        });
+        await waitFor(() => closeCalls > 0, { timeoutMs: 5_000 });
+      } finally {
+        await cleanup();
+      }
+    }
+
+    // Tool names the provider actually received for a recorded step (activeTools
+    // filters this list, so it reflects what was active that step).
+    const toolNames = (call: any): string[] =>
+      ((call?.tools ?? []) as any[]).map((t) => t?.name).filter(Boolean);
+
+    // A model that, on step 1, calls loadTools(['createPage']); on step 2, answers.
+    function loadThenAnswerModel(): MockLanguageModelV3 {
+      let step = 0;
+      return new MockLanguageModelV3({
+        doStream: async () => {
+          const n = step++;
+          if (n === 0) {
+            return {
+              stream: convertArrayToReadableStream([
+                { type: 'stream-start', warnings: [] },
+                {
+                  type: 'tool-call',
+                  toolCallId: 'lt1',
+                  toolName: 'loadTools',
+                  input: JSON.stringify({ names: ['createPage'] }),
+                },
+                {
+                  type: 'finish',
+                  finishReason: 'tool-calls',
+                  usage: { inputTokens: 5, outputTokens: 3, totalTokens: 8 },
+                },
+              ] as any),
+            };
+          }
+          return { stream: successStream() };
+        },
+      } as any);
+    }
+
+    it('activates a deferred tool for the SAME turn, and a NEW turn starts cold (no leak)', async () => {
+      const chatId = (await createChat(db, { workspaceId, creatorId: userId })).id;
+
+      // --- Turn 1: loadTools(createPage) on step 1, then answer on step 2. ---
+      const model1 = loadThenAnswerModel();
+      await runDeferredTurn(model1, chatId, {
+        chatId,
+        messages: [userUiMessage('Make me a page')],
+      });
+
+      // The turn ran at least two steps (the load round-trip + the answer).
+      expect(model1.doStreamCalls.length).toBeGreaterThanOrEqual(2);
+      const step1Tools = toolNames(model1.doStreamCalls[0]);
+      const step2Tools = toolNames(model1.doStreamCalls[1]);
+
+      // Step 1 starts cold: CORE tools + the loadTools meta-tool are active, but
+      // the deferred createPage is NOT yet.
+      expect(step1Tools).toContain('loadTools');
+      expect(step1Tools).toContain('searchPages'); // a CORE tool, always active
+      expect(step1Tools).not.toContain('createPage');
+      // Step 2 of the SAME turn sees the just-activated deferred tool.
+      expect(step2Tools).toContain('createPage');
+
+      // --- Turn 2 on the SAME chat: must start cold again. ---
+      const model2 = new MockLanguageModelV3({
+        doStream: async () => ({ stream: successStream() }),
+      } as any);
+      await runDeferredTurn(model2, chatId, {
+        chatId,
+        messages: [userUiMessage('And another thing')],
+      });
+
+      const nextTurnFirstStep = toolNames(model2.doStreamCalls[0]);
+      expect(nextTurnFirstStep).toContain('loadTools');
+      // The activated set is per-turn: the prior turn's createPage did NOT leak,
+      // so the fresh turn's first step sees it deferred again.
+      expect(nextTurnFirstStep).not.toContain('createPage');
+    });
+  });
 });
@@ -31,6 +31,22 @@ export interface SharedToolSpec {
  inAppKey: string;
  /** Single canonical model-facing description used by both layers. */
  description: string;
+  /**
+   * Deferred-tool tier for the IN-APP agent (#332). 'core' tools are always
+   * active; 'deferred' tools are hidden behind the <tool_catalog> and loaded on
+   * demand via the loadTools meta-tool. This is an IN-APP concern only: the
+   * standalone /mcp server ignores this field and registers every tool normally
+   * (registerShared in index.ts reads mcpName/description/buildShape only).
+   */
+  tier: 'core' | 'deferred';
+  /**
+   * Hand-written one-liner "name — purpose" shown in the in-app agent's
+   * <tool_catalog> for a DEFERRED tool (#332). Deliberately NOT derived from the
+   * description's first sentence — a concise, accurate purpose line. Present on
+   * every spec (core tools too) for uniformity; only deferred ones are rendered.
+   * Inert for the external /mcp server.
+   */
+  catalogLine: string;
  /**
   * Builds the tool's input schema as a plain object of zod fields (a
   * ZodRawShape). Called with the consumer's own zod namespace. Omitted for
@@ -47,6 +63,8 @@ export const SHARED_TOOL_SPECS = {
    mcpName: 'get_workspace',
    inAppKey: 'getWorkspace',
    description: 'Fetch metadata about the current workspace (name, settings).',
+    tier: 'core',
+    catalogLine: 'getWorkspace — fetch current workspace metadata (name, settings).',
  },

  listSpaces: {
@@ -55,6 +73,8 @@ export const SHARED_TOOL_SPECS = {
    description:
      'List the spaces the current user can access. Returns the array of ' +
      'spaces (id, name, slug, ...).',
+    tier: 'core',
+    catalogLine: 'listSpaces — list the spaces the user can access (id, name, slug).',
  },

  listShares: {
@@ -62,6 +82,8 @@ export const SHARED_TOOL_SPECS = {
    inAppKey: 'listShares',
    description:
      'List all public shares in the workspace with page titles and public URLs.',
+    tier: 'deferred',
+    catalogLine: 'listShares — list all public shares in the workspace with their URLs.',
  },

  // --- single-pageId read tools ---
@@ -74,6 +96,9 @@ export const SHARED_TOOL_SPECS = {
      'includes block ids, callouts, tables, link/image attributes) plus the ' +
      'slugId used in URLs. Use the block ids it returns to make precise ' +
      'structural edits or surgical text edits without resending the page.',
+    tier: 'deferred',
+    catalogLine:
+      "getPageJson — get a page's raw ProseMirror JSON (lossless, with block ids).",
    buildShape: (z) => ({
      pageId: z.string().min(1),
    }),
@@ -88,6 +113,9 @@ export const SHARED_TOOL_SPECS = {
      'count) WITHOUT the full document body. Use it to locate sections/tables ' +
      'and grab block ids cheaply before fetching, patching or inserting ' +
      'individual blocks.',
+    tier: 'core',
+    catalogLine:
+      "getOutline — compact outline of a page's top-level blocks with their ids.",
    buildShape: (z) => ({
      pageId: z.string().min(1),
    }),
@@ -104,6 +132,9 @@ export const SHARED_TOOL_SPECS = {
      'outline or page-JSON view (works for headings/paragraphs/callouts/images), OR ' +
      '`#<index>` to fetch a top-level block by its outline index — use the ' +
      '`#<index>` form for tables/rows/cells, which carry no id.',
+    tier: 'core',
+    catalogLine:
+      "getNode — fetch one block's ProseMirror subtree by block id or #index.",
    buildShape: (z) => ({
      pageId: z.string().min(1),
      nodeId: z.string().min(1),
@@ -137,6 +168,9 @@ export const SHARED_TOOL_SPECS = {
      'caseSensitive:true to match case. Ideal for systematic ' +
      'editorial sweeps (unquoted "ё", straight quotes, "т.е.", stray units). An ' +
      'invalid regex or an empty query returns a clear error to fix.',
+    tier: 'core',
+    catalogLine:
+      'searchInPage — find every occurrence of a string/regex inside one page, with locations.',
    buildShape: (z) => ({
      pageId: z.string().min(1).describe('ID of the page to search'),
      query: z
@@ -172,6 +206,8 @@ export const SHARED_TOOL_SPECS = {
    description:
      'Remove a single block by its attrs.id (from the page outline or ' +
      'page-JSON view) WITHOUT resending the whole document.',
+    tier: 'deferred',
+    catalogLine: 'deleteNode — remove a single content block by its block id.',
    buildShape: (z) => ({
      pageId: z.string().min(1),
      nodeId: z.string().min(1),
@@ -203,6 +239,9 @@ export const SHARED_TOOL_SPECS = {
      'JSON object or a JSON string (both accepted). Cheaper and safer than ' +
      'replacing the whole document for one-block structural edits. Reversible: ' +
      'the previous version is kept in page history.',
+    tier: 'deferred',
+    catalogLine:
+      'patchNode — replace one block with a new ProseMirror node, keeping its id.',
    buildShape: (z) => ({
      pageId: z.string().min(1).describe('ID of the page containing the block'),
      nodeId: z
@@ -245,6 +284,9 @@ export const SHARED_TOOL_SPECS = {
      '[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
      '{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
      'JSON object or a JSON string (both accepted). Reversible via page history.',
+    tier: 'deferred',
+    catalogLine:
+      'insertNode — insert a block before/after an anchor, or append at the end.',
    buildShape: (z) => ({
      pageId: z.string().min(1),
      node: z
@@ -278,6 +320,8 @@ export const SHARED_TOOL_SPECS = {
    mcpName: 'unshare_page',
    inAppKey: 'unsharePage',
    description: 'Remove the public share of a page (revokes the public URL).',
+    tier: 'deferred',
+    catalogLine: "unsharePage — revoke a page's public share (removes the public URL).",
    buildShape: (z) => ({
      pageId: z.string().min(1).describe('ID of the page to unshare'),
    }),
@@ -295,6 +339,9 @@ export const SHARED_TOOL_SPECS = {
      "`from`/`to` each accept a historyId, or null/'current' for the page's " +
      'current content (defaults: from=current, to=current — pass a historyId ' +
      'from the page-history list to compare against the live page).',
+    tier: 'deferred',
+    catalogLine:
+      'diffPageVersions — diff two page versions and return the change set + summary.',
    buildShape: (z) => ({
      pageId: z.string().min(1),
      from: z
@@ -315,6 +362,9 @@ export const SHARED_TOOL_SPECS = {
      "List a page's saved versions (Docmost auto-snapshots on every save), " +
      'newest first, cursor-paginated. Returns { items, nextCursor }; each ' +
      "item's id is the historyId to pass to the page diff or restore tools.",
+    tier: 'deferred',
+    catalogLine:
+      "listPageHistory — list a page's saved versions (newest first, paginated).",
    buildShape: (z) => ({
      pageId: z.string().min(1),
      cursor: z
@@ -332,6 +382,9 @@ export const SHARED_TOOL_SPECS = {
      'as the page\'s current content (Docmost has no restore endpoint, so ' +
      'this creates a NEW history snapshot — the restore is itself revertible). ' +
      'Get the historyId from the page-history list.',
+    tier: 'deferred',
+    catalogLine:
+      'restorePageVersion — restore a page to a saved history version (revertible).',
    buildShape: (z) => ({
      historyId: z.string().min(1),
    }),
@@ -349,6 +402,9 @@ export const SHARED_TOOL_SPECS = {
      'thread records are NOT created/updated/deleted on the server by this ' +
      'tool — only the page body + inline comment marks are written; manage ' +
      'comment threads via the comment tools/UI.',
+    tier: 'deferred',
+    catalogLine:
+      "importPageMarkdown — replace a page's content from exported Docmost Markdown.",
    buildShape: (z) => ({
      pageId: z.string().min(1),
      markdown: z.string().min(1),
@@ -365,6 +421,9 @@ export const SHARED_TOOL_SPECS = {
      'entirely server-side — the document is NOT sent through the model. The ' +
      'target keeps its own title and slug; only its body is replaced. Ideal ' +
      "for 'make page A's content equal to B' or 'replace A with B but keep A's URL'.",
+    tier: 'deferred',
+    catalogLine:
+      "copyPageContent — replace one page's body with a copy of another page's body.",
    buildShape: (z) => ({
      sourcePageId: z.string().min(1).describe('Page to copy content FROM'),
      targetPageId: z
@@ -402,6 +461,9 @@ export const SHARED_TOOL_SPECS = {
      'page JSON and use a structural node patch/update to set its marks. ' +
      'Examples: edits:[{find:"teh",replace:"the"}]; edits:[{find:"Hello ' +
      'world",replace:"Hello there"}] (crosses a bold boundary).',
+    tier: 'core',
+    catalogLine:
+      "editPageText — surgical find/replace of plain text in a page, preserving ids/marks.",
    buildShape: (z) => ({
      pageId: z.string().describe('ID of the page to edit'),
      edits: z
@@ -440,6 +502,9 @@ export const SHARED_TOOL_SPECS = {
      'server instance that created it: in a multi-replica deployment without ' +
      'sticky sessions a blob stored on one instance is not retrievable via the ' +
      'sandbox URL on another (it 404s like an expired one).',
+    tier: 'deferred',
+    catalogLine:
+      'stashPage — serialize a whole page to a short anonymous URL without loading its body.',
    buildShape: (z) => ({
      pageId: z.string().min(1),
    }),