feat(mcp): add hierarchical tree mode to list_pages

list_pages gains an opt-in `tree` parameter on both surfaces (the
@docmost/mcp server tool and the AI-chat agent tool), which share the
same DocmostClient.listPages. Default behavior (recent-by-updatedAt flat
list) is unchanged.

- client.ts: listPages(spaceId?, limit=50, tree=false); when tree is
  true it requires spaceId (throws a specific error otherwise), walks the
  sidebar tree via the existing bounded/cycle-safe enumerateSpacePages,
  and returns a nested tree; limit is ignored in tree mode.
- lib/tree.ts: new pure buildPageTree() — lean nodes { id, slugId, title,
  children? }, children sorted by position (code-unit order), orphans
  promoted to roots, cycle-safe.
- index.ts + ai-chat-tools.service.ts: expose `tree` in the tool schemas
  and descriptions; docmost-client.loader.ts: mirror the new signature.
- tests: add packages/mcp/test/unit/tree.test.mjs (nesting, ordering,
  lean shape, orphan promotion, cycle/self-reference safety).
- rebuild @docmost/mcp (build/ is tracked and loaded at runtime).
This commit is contained in:
vvzvlad
2026-06-18 20:30:00 +03:00
parent 8178d21c00
commit 1e7a306f96
9 changed files with 407 additions and 18 deletions

View File

@@ -417,7 +417,8 @@ export class AiChatToolsService {
listPages: tool({
description:
'List the most recent pages, optionally scoped to a single space. ' +
'Returns a bounded list (default 50, max 100).',
'Returns a bounded list (default 50, max 100). Pass tree:true (with ' +
"spaceId) to instead get the space's full page hierarchy as a nested tree.",
inputSchema: z.object({
spaceId: z
.string()
@@ -430,9 +431,15 @@ export class AiChatToolsService {
.max(100)
.optional()
.describe('Maximum number of pages (1-100).'),
tree: z
.boolean()
.optional()
.describe(
'When true, return the full page hierarchy of the given space as a nested tree (children arrays) instead of the recent-pages flat list. Requires spaceId; ignores limit.',
),
}),
execute: async ({ spaceId, limit }) =>
await client.listPages(spaceId, limit),
execute: async ({ spaceId, limit, tree }) =>
await client.listPages(spaceId, limit, tree),
}),
listSidebarPages: tool({

View File

@@ -18,7 +18,11 @@ export interface DocmostClientLike {
): Promise<{ data: Record<string, unknown>; success: boolean }>;
getWorkspace(): Promise<{ data: Record<string, unknown>; success: boolean }>;
getSpaces(): Promise<unknown[]>;
listPages(spaceId?: string, limit?: number): Promise<unknown[]>;
listPages(
spaceId?: string,
limit?: number,
tree?: boolean,
): Promise<unknown[]>;
listSidebarPages(spaceId: string, pageId?: string): Promise<unknown[]>;
getOutline(pageId: string): Promise<Record<string, unknown>>;
getPageJson(pageId: string): Promise<Record<string, unknown>>;

View File

@@ -9,6 +9,7 @@ import WebSocket from "ws";
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js";
import { docmostExtensions } from "./lib/docmost-schema.js";
import { buildPageTree } from "./lib/tree.js";
import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js";
import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js";
import { withPageLock } from "./lib/page-lock.js";
@@ -440,12 +441,29 @@ export class DocmostClient {
return spaces.map((space) => filterSpace(space));
}
/**
* List most recent pages (bounded). Fetching the whole space can exceed
* MCP response/time limits on large instances, so a single bounded page
* of results is returned (default 50, max 100).
* List pages in one of two modes.
*
* Default (`tree` false): most recent pages by updatedAt (descending),
* bounded. Fetching the whole space can exceed MCP response/time limits on
* large instances, so a single bounded page of results is returned (default
* 50, max 100) via the `/pages/recent` feed.
*
* Tree (`tree` true): the space's FULL page hierarchy as a nested tree (each
* node has a `children` array). This mode REQUIRES `spaceId` (a page tree is
* scoped to one space) and IGNORES `limit` — the whole hierarchy is returned.
* It walks the sidebar tree via `enumerateSpacePages`, which performs N
* sidebar requests and is bounded by that method's 10000-node cap (and skips
* soft-deleted pages server-side).
*/
async listPages(spaceId, limit = 50) {
async listPages(spaceId, limit = 50, tree = false) {
await this.ensureAuthenticated();
if (tree) {
if (!spaceId) {
throw new Error("list_pages: tree mode requires a spaceId (a page tree is scoped to one space). Pass spaceId, or omit tree to get the recent-pages list.");
}
const nodes = await this.enumerateSpacePages(spaceId);
return buildPageTree(nodes);
}
const clampedLimit = Math.max(1, Math.min(100, limit));
const payload = { limit: clampedLimit, page: 1 };
if (spaceId)

View File

@@ -63,7 +63,8 @@ export function createDocmostMcpServer(config) {
server.registerTool("list_pages", {
description: "List most recent pages in a space ordered by updatedAt (descending). " +
"Returns a bounded list (default 50, max 100) — use search for lookups " +
"in large spaces.",
"in large spaces. Pass tree:true (with spaceId) to instead get the " +
"space's full page hierarchy as a nested tree.",
inputSchema: {
spaceId: z.string().optional(),
limit: z
@@ -73,9 +74,13 @@ export function createDocmostMcpServer(config) {
.max(100)
.optional()
.describe("Max pages to return (default 50, max 100)"),
tree: z
.boolean()
.optional()
.describe("When true, return the space's full page hierarchy as a nested tree (each node has a children array) instead of the recent-by-updatedAt flat list. Requires spaceId; ignores limit."),
},
}, async ({ spaceId, limit }) => {
const result = await docmostClient.listPages(spaceId, limit ?? 50);
}, async ({ spaceId, limit, tree }) => {
const result = await docmostClient.listPages(spaceId, limit ?? 50, tree ?? false);
return jsonContent(result);
});
// Tool: get_page

View File

@@ -0,0 +1,89 @@
/**
* Pure tree-builder: turn a flat array of sidebar-style page nodes (as produced
* by `enumerateSpacePages`) into a nested tree.
*
* Input: a flat array of nodes. Each node is expected to carry at least
* { id, slugId, title, position, parentPageId } (extra fields are ignored).
*
* Output: an array of ROOT nodes, each shaped as
* { id, slugId, title, children? }
* where `children` is the array of child nodes (same shape, recursively). The
* `children` key is OMITTED entirely when a node has no children — consistent
* with how `filterPage` omits an empty `subpages` array — to keep the payload
* lean (nesting alone conveys the structure; parentPageId/position/hasChildren
* are intentionally dropped from the output).
*
* Linking rule: a node is attached as a child of `parentPageId` only when that
* parent id is actually present in the input. Otherwise — including a null /
* undefined `parentPageId`, or a parent that was capped out of the bounded walk
* — the node is promoted to a ROOT. So "orphan whose parent is missing" is the
* defined behavior: it surfaces at the top level rather than disappearing.
*
* Ordering rule: the roots array and every `children` array are sorted ascending
* by the node's `position` string. The comparator is a plain code-unit (byte)
* comparison — NOT localeCompare — because the server orders sidebar pages by
* `collate "C"` (byte order), which a raw `<`/`>` compare approximates for the
* fractional-index ASCII keys (e.g. "a0", "a1"). Nodes with a missing/undefined
* `position` sort last.
*
* Pure: no I/O, no network, deterministic.
*/
export function buildPageTree(nodes) {
// Map id -> output node. Build the lean output shape up front.
const byId = new Map();
// Preserve the original position string for sorting (kept off the output).
const positionById = new Map();
for (const node of nodes) {
if (!node || typeof node !== "object" || !node.id)
continue;
// Defensive against duplicate ids: last one wins (overwrites the earlier
// entry). `enumerateSpacePages` already dedups, so this is belt-and-braces.
byId.set(node.id, {
id: node.id,
slugId: node.slugId,
title: node.title,
});
positionById.set(node.id, node.position);
}
// Stable comparator on the position string: code-unit order, missing last.
const byPosition = (aId, bId) => {
const a = positionById.get(aId);
const b = positionById.get(bId);
if (a === undefined || a === null)
return b === undefined || b === null ? 0 : 1;
if (b === undefined || b === null)
return -1;
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
};
const roots = [];
const childrenIdsByParent = new Map();
for (const node of nodes) {
if (!node || typeof node !== "object" || !node.id)
continue;
const parentId = node.parentPageId;
// Child only when the parent is actually present in the input; otherwise
// (null/undefined parent, or parent capped out of the walk) -> root.
if (parentId && byId.has(parentId)) {
const list = childrenIdsByParent.get(parentId) ?? [];
list.push(node.id);
childrenIdsByParent.set(parentId, list);
}
else {
roots.push(node.id);
}
}
// Attach sorted children arrays to each parent, omitting empty ones.
for (const [parentId, childIds] of childrenIdsByParent) {
const parent = byId.get(parentId);
if (!parent)
continue;
childIds.sort(byPosition);
parent.children = childIds.map((id) => byId.get(id));
}
roots.sort(byPosition);
return roots.map((id) => byId.get(id));
}

View File

@@ -23,6 +23,7 @@ import {
MutationResult,
} from "./lib/collaboration.js";
import { docmostExtensions } from "./lib/docmost-schema.js";
import { buildPageTree } from "./lib/tree.js";
import {
serializeDocmostMarkdown,
parseDocmostMarkdown,
@@ -581,12 +582,37 @@ export class DocmostClient {
}
/**
* List most recent pages (bounded). Fetching the whole space can exceed
* MCP response/time limits on large instances, so a single bounded page
* of results is returned (default 50, max 100).
* List pages in one of two modes.
*
* Default (`tree` false): most recent pages by updatedAt (descending),
* bounded. Fetching the whole space can exceed MCP response/time limits on
* large instances, so a single bounded page of results is returned (default
* 50, max 100) via the `/pages/recent` feed.
*
* Tree (`tree` true): the space's FULL page hierarchy as a nested tree (each
* node has a `children` array). This mode REQUIRES `spaceId` (a page tree is
* scoped to one space) and IGNORES `limit` — the whole hierarchy is returned.
* It walks the sidebar tree via `enumerateSpacePages`, which performs N
* sidebar requests and is bounded by that method's 10000-node cap (and skips
* soft-deleted pages server-side).
*/
async listPages(spaceId?: string, limit: number = 50) {
async listPages(
spaceId?: string,
limit: number = 50,
tree: boolean = false,
) {
await this.ensureAuthenticated();
if (tree) {
if (!spaceId) {
throw new Error(
"list_pages: tree mode requires a spaceId (a page tree is scoped to one space). Pass spaceId, or omit tree to get the recent-pages list.",
);
}
const nodes = await this.enumerateSpacePages(spaceId);
return buildPageTree(nodes);
}
const clampedLimit = Math.max(1, Math.min(100, limit));
const payload: Record<string, any> = { limit: clampedLimit, page: 1 };
if (spaceId) payload.spaceId = spaceId;

View File

@@ -92,7 +92,8 @@ server.registerTool(
description:
"List most recent pages in a space ordered by updatedAt (descending). " +
"Returns a bounded list (default 50, max 100) — use search for lookups " +
"in large spaces.",
"in large spaces. Pass tree:true (with spaceId) to instead get the " +
"space's full page hierarchy as a nested tree.",
inputSchema: {
spaceId: z.string().optional(),
limit: z
@@ -102,10 +103,16 @@ server.registerTool(
.max(100)
.optional()
.describe("Max pages to return (default 50, max 100)"),
tree: z
.boolean()
.optional()
.describe(
"When true, return the space's full page hierarchy as a nested tree (each node has a children array) instead of the recent-by-updatedAt flat list. Requires spaceId; ignores limit.",
),
},
},
async ({ spaceId, limit }) => {
const result = await docmostClient.listPages(spaceId, limit ?? 50);
async ({ spaceId, limit, tree }) => {
const result = await docmostClient.listPages(spaceId, limit ?? 50, tree ?? false);
return jsonContent(result);
},
);

View File

@@ -0,0 +1,94 @@
/**
* Pure tree-builder: turn a flat array of sidebar-style page nodes (as produced
* by `enumerateSpacePages`) into a nested tree.
*
* Input: a flat array of nodes. Each node is expected to carry at least
* { id, slugId, title, position, parentPageId } (extra fields are ignored).
*
* Output: an array of ROOT nodes, each shaped as
* { id, slugId, title, children? }
* where `children` is the array of child nodes (same shape, recursively). The
* `children` key is OMITTED entirely when a node has no children — consistent
* with how `filterPage` omits an empty `subpages` array — to keep the payload
* lean (nesting alone conveys the structure; parentPageId/position/hasChildren
* are intentionally dropped from the output).
*
* Linking rule: a node is attached as a child of `parentPageId` only when that
* parent id is actually present in the input. Otherwise — including a null /
* undefined `parentPageId`, or a parent that was capped out of the bounded walk
* — the node is promoted to a ROOT. So "orphan whose parent is missing" is the
* defined behavior: it surfaces at the top level rather than disappearing.
*
* Ordering rule: the roots array and every `children` array are sorted ascending
* by the node's `position` string. The comparator is a plain code-unit (byte)
* comparison — NOT localeCompare — because the server orders sidebar pages by
* `collate "C"` (byte order), which a raw `<`/`>` compare approximates for the
* fractional-index ASCII keys (e.g. "a0", "a1"). Nodes with a missing/undefined
* `position` sort last.
*
* Pure: no I/O, no network, deterministic.
*/
export function buildPageTree(nodes: any[]): any[] {
type OutputNode = {
id: string;
slugId: any;
title: any;
children?: OutputNode[];
};
// Map id -> output node. Build the lean output shape up front.
const byId = new Map<string, OutputNode>();
// Preserve the original position string for sorting (kept off the output).
const positionById = new Map<string, string | undefined>();
for (const node of nodes) {
if (!node || typeof node !== "object" || !node.id) continue;
// Defensive against duplicate ids: last one wins (overwrites the earlier
// entry). `enumerateSpacePages` already dedups, so this is belt-and-braces.
byId.set(node.id, {
id: node.id,
slugId: node.slugId,
title: node.title,
});
positionById.set(node.id, node.position);
}
// Stable comparator on the position string: code-unit order, missing last.
const byPosition = (aId: string, bId: string): number => {
const a = positionById.get(aId);
const b = positionById.get(bId);
if (a === undefined || a === null) return b === undefined || b === null ? 0 : 1;
if (b === undefined || b === null) return -1;
if (a < b) return -1;
if (a > b) return 1;
return 0;
};
const roots: string[] = [];
const childrenIdsByParent = new Map<string, string[]>();
for (const node of nodes) {
if (!node || typeof node !== "object" || !node.id) continue;
const parentId = node.parentPageId;
// Child only when the parent is actually present in the input; otherwise
// (null/undefined parent, or parent capped out of the walk) -> root.
if (parentId && byId.has(parentId)) {
const list = childrenIdsByParent.get(parentId) ?? [];
list.push(node.id);
childrenIdsByParent.set(parentId, list);
} else {
roots.push(node.id);
}
}
// Attach sorted children arrays to each parent, omitting empty ones.
for (const [parentId, childIds] of childrenIdsByParent) {
const parent = byId.get(parentId);
if (!parent) continue;
childIds.sort(byPosition);
parent.children = childIds.map((id) => byId.get(id)!);
}
roots.sort(byPosition);
return roots.map((id) => byId.get(id)!);
}

View File

@@ -0,0 +1,139 @@
import { test } from "node:test";
import assert from "node:assert/strict";
import { buildPageTree } from "../../build/lib/tree.js";
test("buildPageTree nests two children under their parent", () => {
const tree = buildPageTree([
{ id: "root", slugId: "s-root", title: "Root", position: "a0" },
{
id: "c1",
slugId: "s-c1",
title: "Child 1",
position: "a0",
parentPageId: "root",
},
{
id: "c2",
slugId: "s-c2",
title: "Child 2",
position: "a1",
parentPageId: "root",
},
]);
assert.equal(tree.length, 1);
assert.equal(tree[0].id, "root");
assert.equal(tree[0].children.length, 2);
assert.deepEqual(
tree[0].children.map((c) => c.id),
["c1", "c2"],
);
});
test("buildPageTree sorts children and roots ascending by position", () => {
const tree = buildPageTree([
// Roots provided out of order.
{ id: "r2", slugId: "s-r2", title: "R2", position: "a2" },
{ id: "r1", slugId: "s-r1", title: "R1", position: "a1" },
// Children provided out of order.
{
id: "c2",
slugId: "s-c2",
title: "C2",
position: "b1",
parentPageId: "r1",
},
{
id: "c1",
slugId: "s-c1",
title: "C1",
position: "b0",
parentPageId: "r1",
},
]);
assert.deepEqual(
tree.map((n) => n.id),
["r1", "r2"],
);
assert.deepEqual(
tree[0].children.map((c) => c.id),
["c1", "c2"],
);
});
test("buildPageTree omits the children key for leaf nodes", () => {
const tree = buildPageTree([
{ id: "leaf", slugId: "s-leaf", title: "Leaf", position: "a0" },
]);
assert.equal(tree.length, 1);
assert.equal("children" in tree[0], false);
});
test("buildPageTree promotes an orphan (missing parent) to a root", () => {
const tree = buildPageTree([
{
id: "orphan",
slugId: "s-orphan",
title: "Orphan",
position: "a0",
// parentPageId references an id NOT present in the input.
parentPageId: "does-not-exist",
},
]);
assert.equal(tree.length, 1);
assert.equal(tree[0].id, "orphan");
assert.equal("children" in tree[0], false);
});
test("buildPageTree is cycle-safe (two-node cycle does not recurse or appear in output)", () => {
// A <-> B cycle: each node's parent is present, so neither becomes a root.
// The cycle component is unreachable from the returned roots, so the output
// is finite and JSON-serializable (no infinite recursion / circular JSON).
const tree = buildPageTree([
{ id: "A", slugId: "s-A", title: "A", position: "a0", parentPageId: "B" },
{ id: "B", slugId: "s-B", title: "B", position: "a1", parentPageId: "A" },
]);
assert.deepEqual(tree, []);
// Must not throw on a structure that contains the cyclic component internally.
assert.doesNotThrow(() => JSON.stringify(tree));
});
test("buildPageTree is self-reference-safe (node parented to itself is dropped, no crash)", () => {
const tree = buildPageTree([
{ id: "root", slugId: "s-root", title: "Root", position: "a0" },
// Self-referencing node: its parent is present (itself) -> not a root.
{ id: "self", slugId: "s-self", title: "Self", position: "a0", parentPageId: "self" },
]);
assert.deepEqual(
tree.map((n) => n.id),
["root"],
);
assert.doesNotThrow(() => JSON.stringify(tree));
});
test("buildPageTree output shape is lean (drops position/parentPageId/hasChildren)", () => {
const tree = buildPageTree([
{
id: "p1",
slugId: "s-p1",
title: "P1",
position: "a0",
parentPageId: null,
hasChildren: false,
spaceId: "space-1",
},
]);
const node = tree[0];
assert.deepEqual(node, { id: "p1", slugId: "s-p1", title: "P1" });
assert.equal("position" in node, false);
assert.equal("parentPageId" in node, false);
assert.equal("hasChildren" in node, false);
assert.equal("spaceId" in node, false);
});