diff --git a/.env.example b/.env.example index 6c5756fa..99e47021 100644 --- a/.env.example +++ b/.env.example @@ -68,3 +68,12 @@ DEBUG_DB=false # Log http requests LOG_HTTP=false + +# MCP server (community): service account the embedded MCP uses to talk to this Docmost instance +MCP_DOCMOST_EMAIL= +MCP_DOCMOST_PASSWORD= +# MCP_DOCMOST_API_URL=http://127.0.0.1:3000/api +# Optional bearer token to protect the /mcp endpoint. If unset, /mcp relies on +# the workspace MCP toggle and network isolation (do not expose the port publicly). +# MCP_TOKEN= +# MCP_SESSION_IDLE_MS=1800000 diff --git a/Dockerfile b/Dockerfile index 2e608066..41edfa9c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,8 @@ COPY --from=builder /app/apps/server/package.json /app/apps/server/package.json # Copy packages COPY --from=builder /app/packages/editor-ext/dist /app/packages/editor-ext/dist COPY --from=builder /app/packages/editor-ext/package.json /app/packages/editor-ext/package.json +COPY --from=builder /app/packages/mcp/build /app/packages/mcp/build +COPY --from=builder /app/packages/mcp/package.json /app/packages/mcp/package.json # Copy root package files COPY --from=builder /app/package.json /app/package.json diff --git a/apps/client/src/features/workspace/components/settings/components/mcp-settings.tsx b/apps/client/src/features/workspace/components/settings/components/mcp-settings.tsx new file mode 100644 index 00000000..bc5f0016 --- /dev/null +++ b/apps/client/src/features/workspace/components/settings/components/mcp-settings.tsx @@ -0,0 +1,98 @@ +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; +import { useAtom } from "jotai"; +import { useState } from "react"; +import { updateWorkspace } from "@/features/workspace/services/workspace-service.ts"; +import { Switch, TextInput, Stack, ActionIcon, Tooltip } from "@mantine/core"; +import { notifications } from "@mantine/notifications"; +import { IconCopy, IconCheck } from "@tabler/icons-react"; +import useUserRole from "@/hooks/use-user-role.tsx"; +import { useTranslation } from "react-i18next"; +import { getAppUrl } from "@/lib/config.ts"; +import { CopyButton } from "@/components/common/copy-button.tsx"; + +export default function McpSettings() { + const { t } = useTranslation(); + const [workspace, setWorkspace] = useAtom(workspaceAtom); + const { isAdmin } = useUserRole(); + + const [checked, setChecked] = useState( + workspace?.settings?.ai?.mcp ?? false, + ); + const [isLoading, setIsLoading] = useState(false); + + const mcpUrl = `${getAppUrl()}/mcp`; + + async function handleToggle(value: boolean) { + setIsLoading(true); + const previous = checked; + setChecked(value); // optimistic update + try { + const updated = await updateWorkspace({ mcpEnabled: value }); + // Force settings.ai.mcp to the new value so the atom is consistent + // even if the response shape omits it. + setWorkspace({ + ...updated, + settings: { + ...updated.settings, + ai: { ...updated.settings?.ai, mcp: value }, + }, + }); + notifications.show({ message: t("Updated successfully") }); + } catch (err) { + console.log(err); + setChecked(previous); // revert on failure + notifications.show({ + message: t("Failed to update data"), + color: "red", + }); + } finally { + setIsLoading(false); + } + } + + return ( + + handleToggle(event.currentTarget.checked)} + /> + + {checked && ( + + {({ copied, copy }) => ( + + + {copied ? ( + + ) : ( + + )} + + + )} + + } + /> + )} + + ); +} diff --git a/apps/client/src/pages/settings/workspace/workspace-settings.tsx b/apps/client/src/pages/settings/workspace/workspace-settings.tsx index bb759a9b..6df9be74 100644 --- a/apps/client/src/pages/settings/workspace/workspace-settings.tsx +++ b/apps/client/src/pages/settings/workspace/workspace-settings.tsx @@ -1,9 +1,11 @@ import SettingsTitle from "@/components/settings/settings-title.tsx"; import WorkspaceNameForm from "@/features/workspace/components/settings/components/workspace-name-form"; import WorkspaceIcon from "@/features/workspace/components/settings/components/workspace-icon.tsx"; +import McpSettings from "@/features/workspace/components/settings/components/mcp-settings.tsx"; import { useTranslation } from "react-i18next"; import { getAppName } from "@/lib/config.ts"; import { Helmet } from "react-helmet-async"; +import { Divider } from "@mantine/core"; export default function WorkspaceSettings() { const { t } = useTranslation(); @@ -15,6 +17,11 @@ export default function WorkspaceSettings() { + + + + + ); } diff --git a/apps/server/package.json b/apps/server/package.json index dabfe8ec..8e4e565f 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -38,6 +38,7 @@ "@aws-sdk/s3-request-presigner": "3.1050.0", "@azure/storage-blob": "12.31.0", "@clickhouse/client": "^1.18.2", + "@docmost/mcp": "workspace:*", "@docmost/pdf-inspector": "1.9.6", "@fastify/cookie": "^11.0.2", "@fastify/multipart": "^10.0.0", diff --git a/apps/server/src/app.module.ts b/apps/server/src/app.module.ts index b8cfc587..6418ba3e 100644 --- a/apps/server/src/app.module.ts +++ b/apps/server/src/app.module.ts @@ -27,6 +27,7 @@ import { LoggerModule } from './common/logger/logger.module'; import { ClsModule } from 'nestjs-cls'; import { NoopAuditModule } from './integrations/audit/audit.module'; import { ThrottleModule } from './integrations/throttle/throttle.module'; +import { McpModule } from './integrations/mcp/mcp.module'; const enterpriseModules = []; try { @@ -85,6 +86,7 @@ try { SecurityModule, TelemetryModule, ThrottleModule, + McpModule, ...enterpriseModules, ], controllers: [AppController], diff --git a/apps/server/src/core/workspace/services/workspace.service.ts b/apps/server/src/core/workspace/services/workspace.service.ts index f3ab78e6..377e8215 100644 --- a/apps/server/src/core/workspace/services/workspace.service.ts +++ b/apps/server/src/core/workspace/services/workspace.service.ts @@ -345,13 +345,10 @@ export class WorkspaceService { throw new NotFoundException('Workspace not found'); } - if (typeof updateWorkspaceDto.mcpEnabled !== 'undefined') { - if (!this.licenseCheckService.hasFeature(ws.licenseKey, 'mcp', ws.plan)) { - throw new ForbiddenException( - 'This feature requires a valid license', - ); - } - } + // MCP is a community feature in this fork: the server itself serves the + // Model Context Protocol over HTTP at /mcp, so toggling it no longer + // requires an enterprise license. The toggle still persists via + // updateAiSettings(workspaceId, 'mcp', ...) below. if (typeof updateWorkspaceDto.isScimEnabled !== 'undefined') { if (!this.licenseCheckService.hasFeature(ws.licenseKey, Feature.SCIM, ws.plan)) { diff --git a/apps/server/src/integrations/mcp/mcp.controller.ts b/apps/server/src/integrations/mcp/mcp.controller.ts new file mode 100644 index 00000000..bdea7170 --- /dev/null +++ b/apps/server/src/integrations/mcp/mcp.controller.ts @@ -0,0 +1,39 @@ +import { Controller, Delete, Get, Post, Req, Res } from '@nestjs/common'; +import { FastifyReply, FastifyRequest } from 'fastify'; +import { McpService } from './mcp.service'; +import { SkipTransform } from '../../common/decorators/skip-transform.decorator'; + +// The global prefix in main.ts excludes 'mcp', so these handlers map to /mcp +// (not /api/mcp). The MCP Streamable-HTTP transport uses POST for JSON-RPC +// requests, GET for the SSE stream, and DELETE to terminate a session. +@Controller() +export class McpController { + constructor(private readonly mcpService: McpService) {} + + @SkipTransform() + @Post('mcp') + async post( + @Req() req: FastifyRequest, + @Res() res: FastifyReply, + ): Promise { + await this.mcpService.handle(req, res); + } + + @SkipTransform() + @Get('mcp') + async get( + @Req() req: FastifyRequest, + @Res() res: FastifyReply, + ): Promise { + await this.mcpService.handle(req, res); + } + + @SkipTransform() + @Delete('mcp') + async delete( + @Req() req: FastifyRequest, + @Res() res: FastifyReply, + ): Promise { + await this.mcpService.handle(req, res); + } +} diff --git a/apps/server/src/integrations/mcp/mcp.module.ts b/apps/server/src/integrations/mcp/mcp.module.ts new file mode 100644 index 00000000..5f927d60 --- /dev/null +++ b/apps/server/src/integrations/mcp/mcp.module.ts @@ -0,0 +1,16 @@ +import { Module } from '@nestjs/common'; +import { McpController } from './mcp.controller'; +import { McpService } from './mcp.service'; +import { DatabaseModule } from '@docmost/db/database.module'; +import { EnvironmentModule } from '../environment/environment.module'; + +// Community MCP feature: the server itself serves the Model Context Protocol +// over HTTP at /mcp. DatabaseModule (global) provides WorkspaceRepo and +// EnvironmentModule (global) provides EnvironmentService; both are imported +// explicitly for clarity. +@Module({ + imports: [DatabaseModule, EnvironmentModule], + controllers: [McpController], + providers: [McpService], +}) +export class McpModule {} diff --git a/apps/server/src/integrations/mcp/mcp.service.ts b/apps/server/src/integrations/mcp/mcp.service.ts new file mode 100644 index 00000000..be67e228 --- /dev/null +++ b/apps/server/src/integrations/mcp/mcp.service.ts @@ -0,0 +1,172 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { pathToFileURL } from 'node:url'; +import { FastifyReply, FastifyRequest } from 'fastify'; +import { EnvironmentService } from '../environment/environment.service'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; + +// Minimal shape of the embedded MCP HTTP handler exported by @docmost/mcp/http. +interface McpHttpHandler { + handleRequest( + req: unknown, + res: unknown, + parsedBody?: unknown, + ): Promise; +} + +interface McpHttpModule { + createMcpHttpHandler(config: { + apiUrl: string; + email: string; + password: string; + }): McpHttpHandler; +} + +// TS with module:commonjs downlevels a literal import() to require(), which +// cannot load the ESM-only @docmost/mcp package. Indirect through Function so +// the real dynamic import() survives compilation and can load ESM from +// CommonJS at runtime. +const esmImport = new Function( + 'specifier', + 'return import(specifier)', +) as (specifier: string) => Promise; + +@Injectable() +export class McpService { + private readonly logger = new Logger(McpService.name); + private handler: McpHttpHandler | null = null; + private handlerPromise: Promise | null = null; + private warnedMissingCreds = false; + + constructor( + private readonly environmentService: EnvironmentService, + private readonly workspaceRepo: WorkspaceRepo, + ) {} + + // Service account the embedded MCP uses to talk back to this Docmost + // instance over loopback REST + the collaboration WebSocket. + private getEmail(): string | undefined { + return process.env.MCP_DOCMOST_EMAIL; + } + + private getPassword(): string | undefined { + return process.env.MCP_DOCMOST_PASSWORD; + } + + private getApiUrl(): string { + return ( + process.env.MCP_DOCMOST_API_URL || + `http://127.0.0.1:${process.env.PORT || 3000}/api` + ); + } + + private credsConfigured(): boolean { + return Boolean(this.getEmail() && this.getPassword()); + } + + // MCP is a community feature gated by the workspace `ai.mcp` setting (the + // same toggle the settings UI writes). Docmost self-host is single-workspace, + // so we read the first/default workspace and treat settings.ai.mcp === true + // as enabled. + private async isEnabled(): Promise { + try { + const workspace = await this.workspaceRepo.findFirst(); + const settings = (workspace?.settings ?? {}) as { + ai?: { mcp?: boolean }; + }; + return settings?.ai?.mcp === true; + } catch (err) { + this.logger.error('Failed to read workspace MCP setting', err as Error); + return false; + } + } + + // Lazily create the HTTP handler exactly once. The import is indirected so + // the ESM-only @docmost/mcp package can be loaded from this CommonJS module. + private async getHandler(): Promise { + if (this.handler) { + return this.handler; + } + if (!this.handlerPromise) { + this.handlerPromise = (async () => { + // Resolve the package's HTTP entry to an absolute path, then import it as a + // file:// URL. require.resolve honours the package "exports" map without + // executing the module, avoiding bare-specifier resolution-base fragility. + const httpEntry = require.resolve('@docmost/mcp/http'); + const mod = (await esmImport( + pathToFileURL(httpEntry).href, + )) as McpHttpModule; + const handler = mod.createMcpHttpHandler({ + apiUrl: this.getApiUrl(), + email: this.getEmail()!, + password: this.getPassword()!, + }); + this.handler = handler; + return handler; + })().catch((err) => { + // Do not cache a rejected import — allow the next request to retry. + this.handlerPromise = null; + throw err; + }); + } + return this.handlerPromise; + } + + async handle(req: FastifyRequest, res: FastifyReply): Promise { + // Optional static bearer-token guard. When MCP_TOKEN is set, the request + // must carry a matching `Authorization: Bearer ` header. When unset, + // /mcp relies on the workspace toggle and network isolation (no auth). + const token = process.env.MCP_TOKEN; + if (token) { + const authHeader = req.headers['authorization']; + if (authHeader !== `Bearer ${token}`) { + res.status(401).send({ error: 'Unauthorized' }); + return; + } + } + + if (!(await this.isEnabled())) { + res.status(403).send({ error: 'MCP is disabled for this workspace' }); + return; + } + + if (!this.credsConfigured()) { + if (!this.warnedMissingCreds) { + this.warnedMissingCreds = true; + this.logger.warn( + 'MCP is enabled but not configured: set MCP_DOCMOST_EMAIL and MCP_DOCMOST_PASSWORD.', + ); + } + res.status(503).send({ + error: + 'MCP is not configured (set MCP_DOCMOST_EMAIL / MCP_DOCMOST_PASSWORD)', + }); + return; + } + + // Hand the raw Node req/res to the MCP transport. hijack() tells Fastify + // to stop managing this response so the transport can write to it directly. + res.hijack(); + + try { + const handler = await this.getHandler(); + await handler.handleRequest( + req.raw as unknown, + res.raw as unknown, + (req as unknown as { body?: unknown }).body, + ); + } catch (err) { + this.logger.error('MCP request handling failed', err as Error); + if (!res.raw.headersSent) { + res.raw.statusCode = 500; + res.raw.setHeader('Content-Type', 'application/json'); + res.raw.end( + JSON.stringify({ + jsonrpc: '2.0', + error: { code: -32603, message: 'Internal server error' }, + id: null, + }), + ); + } + } + } +} diff --git a/docs/ai-agent-chat-plan.md b/docs/ai-agent-chat-plan.md new file mode 100644 index 00000000..0ad50cd2 --- /dev/null +++ b/docs/ai-agent-chat-plan.md @@ -0,0 +1,420 @@ +# Чат с AI-агентом в gitmost + +> Статус: проектный документ, готов к реализации. +> Контекст: gitmost — форк Docmost; весь фронтенд EE-кода вырезан (community-сборка), +> но в бэкенде остался каркас AI-функций. Цель — собрать «чат с агентом» (как в +> EE-версии Docmost), но чистой реализацией поверх существующего каркаса. + +Документ фиксирует все принятые решения, целевую архитектуру и пошаговый план с +привязкой к конкретным файлам. По нему можно сразу начинать кодить по этапам A…D. + +Все комментарии в коде — на английском. Сниппеты ниже иллюстративные (targeted +edits, не полные замены файлов). + +--- + +## 1. TL;DR + +1. **Это не «с нуля», а достройка поверх готового каркаса.** Уже есть: схема БД + чата (`ai_chats`, `ai_chat_messages` с колонкой `tool_calls`), весь AI-стек в + зависимостях (Vercel **AI SDK v6** `ai`, `@ai-sdk/openai`, `@ai-sdk/google`, + `@ai-sdk/openai-compatible`, `ai-sdk-ollama`, `@langchain/*`), собственный + **MCP-тулсет** (`packages/mcp`) и серверный эндпоинт `/mcp`. +2. **Агент — полноценный (чтение + запись).** Пишет без подтверждения; защита от + необратимого — за счёт того, что агенту доступны **только обратимые** операции + (история версий + корзина), а перманентное удаление не экспонируется. +3. **Права: две независимые оси.** Агент ходит в Docmost **под JWT текущего юзера** + (права enforce'ятся самим Docmost через CASL), а к LLM — под системным конфигом + воркспейса. Ключ LLM никогда не попадает в браузер. +4. **Конфиг провайдера/модели/ключа — только из admin-UI/БД. Env-фолбэка нет.** + API-ключ шифруется (AES-256-GCM на `APP_SECRET`), хранится вне `settings`/ + `baseFields`, **write-only**, не возвращается ни одним эндпоинтом. +5. **Правки агента видно в истории** через аддитивный маркер (`last_updated_source` + = `agent` + ссылка на чат), без создания бот-пользователя. +6. **Поиск — оба механизма:** полнотекстовый (сразу, инфраструктура есть) и + векторный RAG (отдельная стадия D; нужна миграция pgvector + индексатор). + +--- + +## 2. Принятые решения (decision log) + +| # | Решение | Обоснование | +|---|---------|-------------| +| D1 | Агент умеет **читать и писать** страницы | запрошено явно | +| D2 | Запись **без подтверждения** | есть история версий + корзина; UX-трения не нужно | +| D3 | Guardrail «ничего необратимо»: агенту **не** экспонируется `permanentlyDelete`/`forceDelete`; удаление = только мягкое (корзина) | единственная необратимая операция в API | +| D4 | Поиск — **оба**: полнотекст сейчас, вектор RAG позже | баланс «быстрый старт / качество» | +| D5 | **Чистая реализация** в форке, не порт EE | форк специально вычищен от EE-кода и лицензии | +| D6 | Агент → Docmost под **JWT юзера** (per-request), а не сервис-аккаунт | пер-юзерные права «бесплатно» через CASL; нет privilege escalation | +| D7 | Маркер «правка агентом» — **аддитивный флаг**, не отдельный бот-юзер | бот сломал бы модель прав, засорил бы контрибьюторов/уведомления | +| D8 | Конфиг провайдера/модели/ключа — **только UI/БД**, env-фолбэка нет | единый источник правды, предсказуемость | +| D9 | API-ключ — **зашифрован** (AES-256-GCM на `APP_SECRET`), вне `settings`/`baseFields`, write-only | защита и от member-read (через API), и от утечки дампа БД | +| D10 | Тулсет агента **расширяется внешними MCP-серверами** (Tavily для веб-поиска и др.), настраиваемыми админом в UI | агенту нужен доступ в интернет/гугл; gitmost выступает MCP-**клиентом** к внешним серверам | +| D11 | **Системное сообщение (system prompt) настраивается** админом в UI | возможность задать роль/тон/правила агента под конкретную инсталляцию | + +--- + +## 3. Текущее состояние (что уже есть / чего нет) + +### 3.1. Уже есть в репозитории +- **Схема чата** — миграция `apps/server/src/database/migrations/20260409T132415-ai-chat.ts`: + - `ai_chats(id, workspace_id, creator_id, title, timestamps, deleted_at)`; + - `ai_chat_messages(id, chat_id, workspace_id, user_id, role, content, tool_calls jsonb, metadata jsonb, tsv, timestamps)` — обрати внимание на `tool_calls`: схема изначально под **агента с инструментами**; + - колонка `attachments.ai_chat_id`. + - Типы уже заведены в Kysely: `apps/server/src/database/types/db.d.ts` (`AiChats`, `AiChatMessages`, и `aiChatId` на attachments). +- **Тип эмбеддингов** — `apps/server/src/database/types/embeddings.types.ts` (`PageEmbeddings`), подключён в `db.interface.ts`. **Но таблицы и pgvector ещё нет** (только тип). +- **AI-стек в зависимостях** — `apps/server/package.json`: `ai` (v6), `@ai-sdk/openai`, `@ai-sdk/google`, `@ai-sdk/openai-compatible`, `ai-sdk-ollama`, `@langchain/core`, `@langchain/textsplitters`. Ставить ничего не нужно. +- **MCP-тулсет** — `packages/mcp/` (MIT): полноценный набор инструментов (поиск/чтение/создание/правка страниц, node-ops, markdown/prosemirror-конвертация, collab-правки через Hocuspocus). Серверный эндпоинт `/mcp` — `apps/server/src/integrations/mcp/` (`mcp.service.ts`, `mcp.controller.ts`, `mcp.module.ts`). +- **Тумблеры AI в настройках воркспейса** — `settings.ai = { generative, chat, search, mcp }`, апдейт через `WorkspaceRepo.updateAiSettings` (`apps/server/src/database/repos/workspace/workspace.repo.ts`). +- **Очередь `AI_QUEUE`** и хук реиндекса: `onStoreDocument` уже кидает `aiQueue PAGE_CONTENT_UPDATED` (`apps/server/src/collaboration/extensions/persistence.extension.ts`). +- **`TokenService`** — выпуск JWT любого типа: `generateAccessToken`, `generateCollabToken`, `generateApiKey` (`JwtType.API_KEY`) — `apps/server/src/core/auth/services/token.service.ts`. +- **`APP_SECRET`** в env — `EnvironmentService.getAppSecret()`. +- История версий: `page_history` (+ `contributorIds`), `PageHistoryService`, сохранение через `HistoryProcessor.saveHistory`. + +### 3.2. Чего нет (надо сделать) +- Серверный слой чата: репозитории + сервис (агентный цикл, стриминг, персист) + контроллер. +- Драйвер LLM (сборка провайдера AI SDK из конфига воркспейса). +- Хранение и шифрование API-ключа + CRUD/Test настроек провайдера. +- Адаптер MCP-тулсета под JWT юзера (внутренний путь, отдельный от `/mcp`). +- Маркер «правка агентом» (колонки + протяжка через collab). +- Пайплайн эмбеддингов/индексации + pgvector (стадия D). +- Весь фронтенд: панель чата + настройки провайдера + бейдж в истории. + +--- + +## 4. Целевая архитектура + +``` +Клиент (React/Mantine) Сервер (NestJS/Fastify) +───────────────────── ────────────────────────────── +features/ai-chat/ core/ai-chat/ (новый модуль) + AiChatPanel ──SSE stream──────────▶ ai-chat.controller (CRUD + /stream) + useChat (@ai-sdk/react) ai-chat.service (agent loop) + ToolCallCard (лог действий) │ streamText({ model, tools, stopWhen }) + Citations (ссылки на страницы) ├─▶ integrations/ai (driver per workspace) + │ └─ AI key из settings (decrypt) +settings/ai/ (admin) ├─▶ ai-chat/tools/ (MCP toolset под JWT юзера) + ProviderForm + Test connection │ └─ create*/update*/search* → loopback REST/WS as user + └─▶ repos: ai_chats / ai_chat_messages +``` + +### Две оси авторизации (ключевой принцип) +| Канал | Кто авторизует | Чем | +|-------|----------------|-----| +| Агент → **LLM** | деплой (система) | API-ключ из `settings` воркспейса (расшифрованный на сервере) | +| Агент → **Docmost** | конкретный **юзер** | его JWT (per-request, выписан `TokenService`) | +| Агент → **внешние MCP** (Tavily/веб-поиск и др.) | **админ** воркспейса | per-server креды (зашифрованы, как и LLM-ключ) | + +«Кто платит за модель» = воркспейс/деплой; «что агенту можно в вики» = права юзера. +Браузер видит только `/ai-chat/stream` (под сессией юзера); LLM-ключ остаётся на сервере. + +--- + +## 5. Модель данных и миграции + +### 5.1. Чат — уже есть +`ai_chats` / `ai_chat_messages` (см. §3.1). Нужны только **репозитории** +(`apps/server/src/database/repos/ai-chat/`): `ai-chat.repo.ts`, +`ai-chat-message.repo.ts`. Типы готовы. + +### 5.2. Новая миграция: маркер «правка агентом» +```ts +// pages: provenance of the current state (mirrors lastUpdatedById semantics) +pages.last_updated_source varchar default 'user' // 'user' | 'agent' +pages.last_updated_ai_chat_id uuid null // FK -> ai_chats(id) +// page_history: provenance snapshot, copied from pages at save time +page_history.last_updated_source varchar +page_history.ai_chat_id uuid null +``` + +### 5.3. Новая миграция: хранение ключа провайдера +```ts +// dedicated table, NEVER selected into workspace baseFields / API responses +ai_provider_credentials( + id uuid pk, + workspace_id uuid not null references workspaces(id) on delete cascade, + driver varchar not null, // 'openai' | 'gemini' | 'ollama' + api_key_enc text, // AES-256-GCM: base64(iv | authTag | ciphertext) + created_at, updated_at +) +// unique (workspace_id, driver) — позволяет хранить ключи разных провайдеров +``` +Несекретное (driver, chatModel, embeddingModel, baseUrl, dimension, **systemPrompt**) +— в `settings.ai.provider` (видно member'ам, не утечка). + +### 5.4. Новая миграция: внешние MCP-серверы +```ts +// per-workspace external MCP servers the agent may use (Tavily, etc.) +ai_mcp_servers( + id uuid pk, + workspace_id uuid not null references workspaces(id) on delete cascade, + name varchar not null, // display name, e.g. 'Tavily' + transport varchar not null, // 'http' | 'sse' + url text not null, // remote MCP endpoint + headers_enc text, // AES-256-GCM: encrypted JSON of auth headers + tool_allowlist jsonb null, // optional: restrict which remote tools to expose + enabled boolean not null default true, + created_at, updated_at +) +``` + +### 5.5. Стадия D: pgvector + эмбеддинги (отдельной миграцией) +```sql +CREATE EXTENSION IF NOT EXISTS vector; +-- таблица page_embeddings под существующий тип PageEmbeddings, +-- колонка embedding vector(), ANN-индекс (hnsw/ivfflat) +``` + +--- + +## 6. Бэкенд + +### 6.1. Модуль `core/ai-chat/` +- `ai-chat.controller.ts`: + - REST: `GET /ai-chat` (список диалогов), `GET /ai-chat/:id/messages`, `POST /ai-chat/:id` (rename), `DELETE /ai-chat/:id`. + - **`POST /ai-chat/stream`** — стриминг ответа. Под Fastify: `res.hijack()` (паттерн уже применён в `mcp.service.ts`) + `result.toUIMessageStreamResponse()` из AI SDK; отмена LLM-стрима по разрыву соединения (`abortSignal`). + - Гейт: `JwtAuthGuard` + проверка `settings.ai.chat`. Нет конфига провайдера → 503 «AI provider not configured». +- `ai-chat.service.ts` — агентный цикл: +```ts +// per-request agent loop, bound to the current user. +const result = streamText({ + model: await this.ai.getChatModel(workspaceId), // provider from workspace settings + system: buildSystemPrompt(workspace, openedPageCtx), + messages, // rebuilt from ai_chat_messages + tools: this.tools.forUser(user, session), // read+write, scoped by user's JWT + stopWhen: stepCountIs(8), // cap the agent loop (safety) + abortSignal, + onFinish: persistAssistantMessage, // content + tool_calls (jsonb) +}); +``` + - Создание чата при отсутствии `chatId`; генерация заголовка асинхронно дешёвой моделью. + - Обрезка/суммаризация длинной истории (контекст-окно). + - Сохранение частичного ответа при abort/ошибке. + +### 6.2. Драйвер LLM `integrations/ai/` +```ts +// ai.service.ts — config comes solely from workspace settings (NO env fallback). +async getChatModel(workspaceId: string) { + const cfg = await this.aiSettings.resolve(workspaceId); // settings.ai.provider + decrypted key + if (!cfg?.driver || !cfg?.chatModel || (cfg.driver !== 'ollama' && !cfg.apiKey)) { + throw new AiNotConfiguredException(); // controller -> 503 + } + switch (cfg.driver) { + case 'openai': return createOpenAI({ apiKey: cfg.apiKey, baseURL: cfg.baseUrl })(cfg.chatModel); + case 'gemini': return createGoogleGenerativeAI({ apiKey: cfg.apiKey })(cfg.chatModel); + case 'ollama': return createOllama({ baseURL: cfg.baseUrl })(cfg.chatModel); // no key + } +} +``` +Провайдер строится **динамически на воркспейс** (нельзя кешировать один глобальный +клиент). Расшифрованный ключ — в памяти с инвалидацией при сохранении настроек, +либо расшифровка на запрос (дёшево). Ключ не логируется. + +> Env-переменные `AI_*` больше не используются. Геттеры `getAiDriver/getAiChatModel/ +> getOpenAiApiKey/...` в `environment.service.ts` — удалить, если ничем больше не +> заняты, чтобы не было второго источника правды. `MCP_*` и `APP_SECRET` остаются. + +### 6.3. Шифрование `integrations/crypto/secret-box.ts` +```ts +// AES-256-GCM; key derived from APP_SECRET. Server-side only. +const key = scryptSync(env.getAppSecret(), 'ai-provider', 32); +encryptSecret(plain: string): string // -> base64(iv | authTag | ciphertext) +decryptSecret(blob: string): string // used only when building the provider +``` +Ротация `APP_SECRET` ломает расшифровку старых шифртекстов — документировать (надо +ввести ключ заново), и при ошибке расшифровки отдавать понятное «введите ключ +заново», а не падать. + +### 6.4. Настройки провайдера (admin-only) +- `GET /workspace/ai-settings` → `{ driver, chatModel, embeddingModel, baseUrl, hasApiKey }` — **ключ замаскирован**. +- `PATCH /workspace/ai-settings` → `{ driver?, chatModel?, baseUrl?, apiKey? }`: + - `apiKey` отсутствует → не трогаем; пустая строка → очистить; значение → зашифровать и сохранить. +- `POST /workspace/ai-settings/test` → дешёвый вызов провайдера (`generateText`/ping) → `{ ok } | { error }`; тело ошибки провайдера наружу не отдаём (только статус/короткое сообщение). +- Доступ — admin-ability воркспейса (как `POST /workspace/update`, который проверяет `WorkspaceCaslAction.Manage / WorkspaceCaslSubject.Settings`). + +### 6.5. Адаптер инструментов `ai-chat/tools/` (под JWT юзера) +- Оборачиваем логику `packages/mcp` в `tool()` AI SDK. **Внутренний путь — отдельный от кешированного `/mcp`-handler'а** (тот одно-идентичностный, под сервис-аккаунтом). +- Аутентификация — токеном текущего юзера: +```ts +// packages/mcp DocmostMcpConfig becomes a union: credentials OR a token getter. +type DocmostMcpConfig = { apiUrl: string } & ( + | { email: string; password: string } // external/service: performLogin + | { getToken: () => Promise } // internal: carry the user's JWT +); +// ai-chat.service: seed the toolset with the CURRENT user's token +const getToken = async () => this.tokenService.generateAccessToken(user, session.id); +``` + Сейчас `DocmostClient` принимает только `email/password` и зовёт `performLogin` + (`packages/mcp/src/lib/auth-utils.ts`). Нужно добавить токен-вариант: `login()` + при наличии `getToken` ставит Bearer из него и **не** логинится; на 401 — заново + зовёт `getToken()` (кредов для перелогина нет). +- Набор инструментов: **read** (`searchPages`, `getPage`) + **write** (`createPage`, + `updatePage`, `movePage`, `deletePage` — только мягкое). **Не экспонировать** + `permanentlyDelete`/`forceDelete` (D3). Удаление комментариев — по решению, мягко + или не давать. +- Права — каждый tool-вызов идёт под JWT юзера через loopback REST/WS → Docmost CASL + проверяет всё сам. Дополнительного слоя авторизации в адаптере не нужно. + +### 6.6. Маркер «правка агентом» — протяжка +- **Носитель** — claim в collab-токене (подписан, поэтому доверенный). Расширить + `TokenService.generateCollabToken(user, workspaceId, provenance?: { actor: 'agent'; aiChatId })`. +- `apps/server/src/collaboration/extensions/authentication.extension.ts` (`onAuthenticate`, + `verifyJwt(token, JwtType.COLLAB)`) → положить в контекст: `context.actor`, `context.aiChatId`. +- `apps/server/src/collaboration/extensions/persistence.extension.ts` (`onStoreDocument`): +```ts +await this.pageRepo.updatePage({ + content: tiptapJson, textContent, ydoc: ydocState, + lastUpdatedById: context.user.id, // human stays the responsible author + lastUpdatedSource: context.actor ?? 'user', // additive provenance marker + lastUpdatedAiChatId: context.aiChatId ?? null, + contributorIds, +}, pageId, trx); +// also add `source` to broadcastStateless('page.updated') so live viewers see it +``` +- `PageHistoryRepo.saveHistory` (`apps/server/src/database/repos/page/page-history.repo.ts`): + копировать `lastUpdatedSource`/`aiChatId` со страницы (как уже делается для + `lastUpdatedById`). История-джоба коалесцируется по `jobId: page.id` и перечитывает + страницу — поэтому маркер удобнее хранить на `pages`, а не в payload джобы. +- REST-путь (`page.service.ts` rename/move): инструменты передают `source: 'agent'` + + `aiChatId`, сервис проставляет те же поля. +- Audit: действия агента писать в `AuditEvent` с `source: 'agent'` + `aiChatId` (без значения ключа LLM). +- Тонкость: если правка человека и агента схлопнутся в один снапшот, `last_updated_source` + отразит последнего писавшего — для «видно, что агент приложил руку» достаточно; + поблочная атрибуция — отдельная задача, не для v1. + +### 6.7. Ретрив +- **Стадия 1 (сразу):** инструмент `searchPages` поверх существующего полнотекстового + поиска (Postgres `tsvector`). Инфраструктура есть. +- **Стадия D:** индексатор в `AI_QUEUE` (чанкинг `@langchain/textsplitters` → эмбеддинги + по конфигу воркспейса → `page_embeddings`), инструмент `semanticSearch` (embed запроса + + pgvector similarity). Реиндекс по `PAGE_CONTENT_UPDATED` (хук уже есть). Правки + агента реиндексируются автоматически. + +--- + +## 7. Фронтенд + +### 7.1. Фича `apps/client/src/features/ai-chat/` (шаблон — `features/comment/`) +- Правая панель/aside: `AiChatPanel`, `ConversationList`, `MessageList`, `MessageItem` + (markdown + карточки tool-calls как лог действий + цитаты-ссылки на страницы), `ChatInput`. +- Стриминг — хук `useChat` из `@ai-sdk/react`, направленный на `/ai-chat/stream`; + он ведёт состояние сообщений. Подтверждения write-операций **нет** (D2) — tool-calls + рисуются как лог выполненного. +- Точка входа — кнопка в шапке/aside; строки в i18n (i18next). + +### 7.2. Настройки провайдера (admin) +Раздел «AI / Модели» в настройках воркспейса: +- дропдаун провайдера → динамические поля (OpenAI: key + опц. Base URL + chat model; + Gemini: key + model; Ollama: Base URL + model, без ключа); поле эмбеддинг-модели; +- поле ключа: при наличии — плейсхолдер «•••• задан», ввод заменяет, пусто = не менять; +- кнопка **Test connection**; сохранение. + +### 7.3. Бейдж в истории версий +На версиях с `last_updated_source = 'agent'` — бейдж «AI-агент» рядом с аватаром +человека, тултип «Изменено AI-агентом от имени {имя}», ссылка на чат по `ai_chat_id`. +Бейдж добавляется, автор не заменяется. + +--- + +## 8. Безопасность (чеклист — читать до старта) +1. API-ключ **только зашифрованным** (AES-256-GCM на `APP_SECRET`), вне `settings`/`baseFields`; в ответах — маска/`hasApiKey`. +2. Ключ — **write-only**: PATCH принимает, GET никогда не возвращает (даже зашифрованным). +3. Расшифровка/использование — только на сервере; ключ не уходит в браузер, не пишется в логи/audit/тела ошибок (в т.ч. в ответ Test connection). +4. Доступ к настройкам провайдера — под admin-ability воркспейса. +5. Агент → Docmost строго под **JWT юзера**; внутренний путь не переиспользует сервис-аккаунтовый `/mcp`-handler. Никакого обхода CASL. +6. Агенту экспонируются **только обратимые** инструменты (D3): нет перманентного удаления. +7. Лимит шагов агентного цикла (`stopWhen`), таймауты; rate-limit запросов чата на юзера через `integrations/throttle`. +8. Все запросы скоупятся по `workspace_id`. +9. Внимание к `/workspace/info`: он отдаёт `settings` **любому участнику** (только `JwtAuthGuard`, без admin-гейта) — поэтому секрет туда класть нельзя. + +--- + +## 9. План реализации по этапам + +### Этап A — бэкенд-ядро (без записи, без RAG) +1. Репозитории `ai_chats`/`ai_chat_messages`. +2. Миграция + хранилище ключа (`ai_provider_credentials`) + `secret-box` (шифрование). +3. `integrations/ai` драйвер (конфиг только из настроек воркспейса). +4. Настройки провайдера: GET (маска) / PATCH (write-only ключ) / Test connection, admin-only. +5. Модуль `core/ai-chat` (CRUD диалогов + `POST /ai-chat/stream` через SSE). +6. Агентный цикл с **read**-инструментами + `searchPages` (полнотекст). +7. Гейт `settings.ai.chat`, 503 при отсутствии конфига. +- → `review`-субагент → верификация. + +### Этап B — запись + маркер агента +1. Токен-вариант в `packages/mcp` (`getToken`) + адаптер инструментов под JWT юзера. +2. **Write**-инструменты (только обратимые), под CASL. +3. Миграция маркера (`pages`/`page_history`), claim в collab-токене, протяжка через + `authentication.extension` / `persistence.extension` / `saveHistory`. +4. Audit-события действий агента. +- → `review` → верификация. + +### Этап C — фронтенд +1. Панель чата на `useChat` (список диалогов, стрим, tool-calls как лог, цитаты). +2. Раздел настроек «AI / Модели» (провайдер, ключ, модель, Test connection). +3. Бейдж «AI-агент» в истории версий. i18n. Точка входа. +- → `review` → верификация. + +### Этап D — векторный RAG +1. Миграция pgvector + `page_embeddings` (+ pgvector в Docker/CI образе Postgres). +2. Индексатор в `AI_QUEUE` (чанкинг + эмбеддинги), реиндекс по `PAGE_CONTENT_UPDATED`. +3. Инструмент `semanticSearch`. Конфиг эмбеддинг-модели — в настройках провайдера. +- → `review` → верификация. + +Каждый этап делегируется coder-агенту с детальным брифом, затем обязательный +`review`-субагент и верификация ведущим. + +--- + +## 10. Зависимости (npm) +Всё уже в `apps/server/package.json`: `ai` (v6), `@ai-sdk/openai`, +`@ai-sdk/google`, `@ai-sdk/openai-compatible`, `ai-sdk-ollama`, `@langchain/core`, +`@langchain/textsplitters`. На фронт — `@ai-sdk/react` (проверить наличие; при +отсутствии добавить). Доп. инфраструктура для стадии D: pgvector в образе Postgres. + +> Перед кодом подтянуть актуальную доку AI SDK v6 (`streamText` + `tools` + `stopWhen`, +> `toUIMessageStreamResponse`, `useChat`) через context7 — в v6 API заметно отличается +> от v4/v5. + +--- + +## 11. Подводные камни +- **AI SDK v6 ≠ v4/v5** — сверять API по докам, не по памяти. +- **Стриминг под Fastify** — `res.hijack()`, отмена LLM-стрима по разрыву, персист частичного ответа. +- **Per-workspace провайдер** — не кешировать один глобальный клиент; не логировать ключ. +- **Токен юзера и время жизни** — выписывать на сообщение; для длинных turn'ов — `getToken()`-рефреш. +- **Коалесцинг истории** — маркер хранить на `pages`, не в payload джобы. +- **Ротация `APP_SECRET`** — старые ключи перестают расшифровываться (внятная ошибка, не падение). +- **pgvector в окружении** — образ Postgres должен иметь расширение `vector` (docker-compose/CI). +- **`/workspace/info` отдаёт `settings` любому member'у** — секрет туда нельзя. + +--- + +## 12. Открытые вопросы (зафиксировать до/во время реализации) +- Выбор модели: v1 — одна модель на воркспейс (из настроек). Пер-чатовый пикер из + allowlist — возможное расширение (поле модели в `ai_chats`/`metadata` + дропдаун). +- Удаление комментариев агентом — давать мягко или не давать вовсе. +- Хранить ключи нескольких провайдеров одновременно (таблица `ai_provider_credentials` + с `unique(workspace_id, driver)`) или один активный — влияет только на UX переключения. +- Лимиты стоимости (потолок токенов на диалог) — нужно ли в v1. + +--- + +## 13. Чеклист реализации +- [ ] A1 репозитории чата +- [ ] A2 миграция + `ai_provider_credentials` + `secret-box` +- [ ] A3 драйвер `integrations/ai` (конфиг только из БД) +- [ ] A4 настройки провайдера: GET (маска) / PATCH (write-only) / Test, admin-only +- [ ] A5 модуль `core/ai-chat` (CRUD + SSE-стрим) +- [ ] A6 агентный цикл + read-инструменты + полнотекстовый `searchPages` +- [ ] A7 гейт `settings.ai.chat` + 503 +- [ ] B1 токен-вариант `packages/mcp` + адаптер под JWT юзера +- [ ] B2 write-инструменты (только обратимые) +- [ ] B3 маркер агента (миграция + collab-протяжка + `saveHistory`) +- [ ] B4 audit-события агента +- [ ] C1 панель чата (`useChat`) +- [ ] C2 настройки провайдера в UI +- [ ] C3 бейдж в истории версий + i18n +- [ ] D1 миграция pgvector + `page_embeddings` +- [ ] D2 индексатор + реиндекс по событиям +- [ ] D3 инструмент `semanticSearch` diff --git a/packages/mcp/README.md b/packages/mcp/README.md new file mode 100644 index 00000000..fc61a2b2 --- /dev/null +++ b/packages/mcp/README.md @@ -0,0 +1,357 @@ +# Docmost MCP Server + +**English** · [Русский](README.ru.md) + +A Model Context Protocol (MCP) server for [Docmost](https://docmost.com/) that lets +AI agents **read, search, write, restructure, review, version, comment on, illustrate +and publish** documentation — safely, against a live instance, without an enterprise +license. + +> **Written by an agent, for agents.** A human edits a document with their eyes and hands: +> they read it, click into the editor, and retype. An agent works differently — it is far +> better at *writing a small function that fixes the text* than at re-reading and +> re-emitting a whole document. So this server is built around the way a model actually +> wants to edit: address a block by id, run a find/replace, or hand it a +> `(doc, ctx) => doc` transform and let it *program* the change. `docmost_transform` is +> that interface. Other Docmost MCPs are human-shaped — they expose "open the page" and +> "replace the page"; this one exposes the editing primitives a model is good at. + +It exposes **38 tools** built around three ideas that the other Docmost MCPs do not +combine: + +1. **Surgical, token-cheap edits.** Address a single block by id and patch it, or run + a find/replace, instead of round-tripping a whole ~100 KB document through the model. +2. **Safe live writes.** Every mutation goes through Docmost's real-time collaboration + layer (the same WebSocket the web editor uses), serialized per page, so it never + clobbers a concurrent human edit and is confirmed persisted before the tool returns. +3. **A real safety net.** Version history, a Docmost-equivalent diff, a one-call + restore, and a dry-run preview for scripted rewrites — so an agent can edit + boldly and you can always see and undo what it did. + +--- + +## Why this server (vs. the alternatives) + +There are several Docmost MCPs. Here is a capability-by-capability comparison. +"Official" is Docmost's built-in MCP; the others are the community projects on GitHub. + +| Capability | **This server** | Official (built-in) | MrMartiniMo/docmost-mcp | cyborgx0x/mcp-docmost | aleksvin8888 / isak-landin | +| --- | :---: | :---: | :---: | :---: | :---: | +| **Enterprise license required** | **No** | **Yes** | No | No | No | +| Authentication | email + password, **auto re-auth** | API key | email + password | cookie `authToken` (copy from DevTools) | Docmost API / **direct PostgreSQL** | +| Read page as Markdown | ✅ | ✅ | ✅ | ✅ | ✅ (read-only) | +| **Lossless Markdown round-trip** (export / import, keeps comment anchors) | ✅ | — | — | — | — | +| Read **lossless ProseMirror JSON** (with block ids) | ✅ | — | — | — | — | +| **Compact page outline** (cheap block-id lookup) | ✅ | — | — | — | — | +| **Fetch a single block** (by id or index) | ✅ | — | — | — | — | +| Create / move / delete pages | ✅ | ✅ | ✅ | ✅ | — | +| **Per-block edits** (patch/insert/delete by id) | ✅ | — | — | — | — | +| **Surgical find/replace** (structure-preserving) | ✅ | — | — | — | — | +| **Scripted JS transform** (sandboxed, dry-run diff) | ✅ | — | — | — | — | +| **Structured table editing** (row / cell CRUD) | ✅ | — | — | — | — | +| Page **version history** | ✅ | — | — | ✅ | — | +| **Diff two versions** | ✅ | — | — | — | — | +| **Restore a version** (revertible) | ✅ | — | — | — | — | +| **Comments** (CRUD + inline anchoring) | ✅ | — | — | ✅ | — | +| **Poll for new comments** since a timestamp | ✅ | — | — | — | — | +| **Images** (insert / replace) | ✅ | — | — | — | — | +| **Public share links** (create / revoke / list) | ✅ | — | — | — | — | +| Export to HTML / PDF | — | — | — | ✅ | — | +| **Safe real-time-collab writes** (no clobber, confirmed) | ✅ | n/a | ✅ | — | n/a (read-only) | + +### What that means in practice + +- **No enterprise tax.** Docmost's official MCP is an enterprise feature: it needs an + active enterprise license. This server is MIT and + talks to *any* self-hosted Docmost over the standard API + collaboration socket, with + nothing but an account email and password. + +- **Token-efficient editing.** Most Docmost MCPs (and the official one) only offer + "replace the whole page" writes — the agent must download the entire document, mutate + it, and upload it back, paying for the full document **twice** on every tiny fix. + This server lets the agent change exactly one block (`patch_node` / `insert_node` / + `delete_node`), do a structure-preserving find/replace (`edit_page_text`), or copy a + whole page server-side (`copy_page_content`) — **without the document ever passing + through the model**. + +- **Writes that don't fight the editor.** Naive REST writes race with whatever a human + is typing and can silently overwrite their edits, or fail against Docmost's debounced + save. This server applies every change through the live collaboration document + (Hocuspocus/Yjs), reading and writing **synchronously inside one sync tick** so no + concurrent edit can interleave, serializing writes **per page** with a mutex, and + **waiting for the server to acknowledge persistence** before returning. If the socket + drops mid-write, the tool errors instead of falsely reporting success. + +- **Agent-native editing model.** Human-facing servers expose "open the page" and "replace + the page", because that mirrors how a person works. A model edits better by *programming* + the change — addressing blocks by id, running a find/replace, or supplying a + `(doc, ctx) => doc` transform (`docmost_transform`, with a dry-run diff before it + commits). This server is shaped around that, which is why it has editing primitives the + others simply don't. + +- **An editing safety net the others lack.** `list_page_history` → `diff_page_versions` + → `restore_page_version` give an agent (and you) a full view-and-undo loop. The diff + uses the *same* `recreateTransform → ChangeSet → simplifyChanges` pipeline Docmost's + own history viewer uses, so what you see matches the product. + +- **Convenience over cookie-scraping.** Some community servers authenticate by making + you copy a session cookie out of your browser's DevTools (it expires), or by reaching + **directly into the PostgreSQL database**. This server logs in with credentials and + **transparently re-authenticates on + a 401/403** (with in-flight de-duplication), so long-running agents don't die when a + token expires. It also respects Docmost's own access control, because it goes through + the API and the collaboration server like a normal user. + +--- + +## Tools + +All 38 tools, grouped by what you'd reach for them. + +### Exploration & retrieval + +- **`get_workspace`** — Information about the current Docmost workspace. +- **`list_spaces`** — All spaces in the workspace. +- **`list_pages`** — Recent pages in a space, ordered by `updatedAt` desc (default 50, + max 100). Use `search` for lookups in large spaces. +- **`search`** — Full-text search across pages and content (bounded by `limit`, max 100). +- **`get_page`** — A page's content as clean **Markdown** (convenient, but a *lossy* + view — block ids and exact table/callout structure are approximated). +- **`get_page_json`** — A page's **lossless ProseMirror/TipTap JSON**, including every + block's `attrs.id` and the `slugId` used in URLs. This is what the per-block editing + tools consume. +- **`get_outline`** — A compact outline of a page's top-level blocks (`{index, type, id, + level, firstText}`; tables add row/column counts and their header-cell texts, lists add + item counts) **without** the document body. The cheap way to locate a section or table + and grab its block id before + `get_node` / `patch_node` / `insert_node`. +- **`get_node`** — Fetch a single block's full ProseMirror subtree (lossless) without + pulling the whole page. Address it by a block id (from `get_outline` / `get_page_json`), + or by `#` for a top-level block — use the `#` form for tables/rows/cells, + which carry no id. + +### Page lifecycle + +- **`create_page`** — Create a page from Markdown and place it in the hierarchy (optional + `parentPageId`) in one call. Uses Docmost's import API for clean Markdown→ProseMirror. +- **`rename_page`** — Change a page's title only, without touching or resending content. +- **`move_page`** — Re-parent a page (nest it, or move to root); supports fractional-index + positioning. Returns only on a *positively confirmed* success. +- **`delete_page`** — Delete a single page. +- **`copy_page_content`** — Replace one page's body with a copy of another's, **entirely + server-side** — the document never passes through the model. The target keeps its own + title and slug (so its URL is preserved). + +### Editing + +- **`edit_page_text`** — Surgical find/replace inside a page's text. Preserves **all** + structure: block ids, marks, links, callouts, tables. The preferred tool for fixing + wording, typos, numbers and names. +- **`patch_node`** — Replace a single block addressed by its `attrs.id` (from + `get_page_json`), without resending the document. +- **`insert_node`** — Insert a block before/after another (by `attrs.id` or anchor text), + or append at the end. +- **`delete_node`** — Remove a single block by its `attrs.id`. +- **`update_page_json`** — Replace a page's entire content with a ProseMirror document + (bulk rewrites, or when nodes lack ids). `content` is optional — omit it to update only + the title. Keeps the block ids you pass in, so heading anchors and history stay stable. +- **`docmost_transform`** — The agent-native editing interface: instead of retyping a + document, the agent **writes a function that fixes it**. Edit a page by running an + arbitrary **`(doc, ctx) => doc` JavaScript transform** against its *live* ProseMirror + document. Runs **sandboxed** + (no `require`/`process`/`fs`/network, 5 s timeout). **Dry-run by default**: returns a + diff preview without writing; set `dryRun:false` to apply atomically. `ctx` exposes the + page's comments and a toolbox of helpers (`walk`, `getList`, `blockText`, + `insertMarkerAfter`, `setCalloutRange`, `commentsToFootnotes`, …) for multi-step, + coordinated rewrites such as renumbering, or turning inline comments into numbered + footnotes. + +### Tables + +- **`table_get`** — Read a table as a matrix: `{rows, cols, cells (text[][]), cellIds}` + (a paragraph id per cell, or `null`). Address the table by `#` (from + `get_outline`) or any block id inside it. Use `cellIds` with `patch_node` for + rich-formatted cell edits. +- **`table_insert_row`** — Insert a row of plain-text cells, padded to the table's column + count (passing more cells than columns is an error). `index` is the 0-based insert + position (0 inserts before the header); omit it to append at the end. +- **`table_delete_row`** — Delete the row at a 0-based `index`. Refuses to delete a table's + only row; deleting row 0 promotes the next row to header. +- **`table_update_cell`** — Set the plain-text content of cell `[row, col]` (0-based). For + rich formatting, `patch_node` the cell's paragraph id from `table_get`. + +### Markdown round-trip + +- **`export_page_markdown`** — Export a page to a single self-contained, **lossless + Docmost-flavoured Markdown** file: a meta header, the body with inline comment anchors + and diagrams, and a trailing comments-thread block. Built for a download → edit body → + `import_page_markdown` round-trip that preserves everything, including comment highlights. +- **`import_page_markdown`** — Replace a page's content from a Docmost-flavoured Markdown + file produced by `export_page_markdown`, restoring comment-highlight anchors and diagrams + from their inline HTML. (Comment *threads* in the file are not re-created on the server — + only the page body and inline comment marks are written; manage threads via the comment + tools/UI.) + +### Images + +- **`insert_image`** — Upload a local image and insert it in one step: append it, drop it + in place of a text placeholder (`replaceText`), or put it after a given block + (`afterText`). Preserves all other block ids. +- **`replace_image`** — Swap an existing image. Uploads the new file as a **fresh + attachment** (clean URL that renders and busts browser caches), then re-points every + node referencing the old attachment (recursively, including callouts/tables) via the + live document, preserving comments, alignment and alt text. (In-place overwrite is + deliberately avoided — some Docmost versions corrupt the attachment on overwrite.) + +### Comments + +- **`create_comment`** — Add a page comment, optionally **anchored inline** to an exact + span of text (the first occurrence is wrapped in a comment mark). +- **`list_comments`** — List a page's comments (content returned as Markdown). +- **`update_comment`** — Edit an existing comment. +- **`delete_comment`** — Delete a comment. +- **`check_new_comments`** — Find comments created after a given ISO-8601 timestamp across + a space, optionally scoped to a page subtree — ideal for an agent that watches a doc for + feedback. + +### Versioning & history + +- **`list_page_history`** — A page's saved versions (Docmost auto-snapshots on save), + newest first, cursor-paginated. Each item's id is the `historyId`. +- **`diff_page_versions`** — Diff two versions (or a version against the live page). + Returns inserted/deleted text, integrity counts (images, links, tables, callouts, + footnote markers), and a human-readable Markdown summary — computed with the same + pipeline Docmost's own history viewer uses. +- **`restore_page_version`** — Write a saved version back as the current content. Docmost + has no restore endpoint, so this creates a **new** snapshot — the restore is itself + revertible. + +### Sharing + +- **`share_page`** — Make a page publicly accessible (idempotent) and return its public + URL (`/share//p/`); optional search-engine indexing. +- **`unshare_page`** — Revoke a page's public share. +- **`list_shares`** — All public shares in the workspace, with titles and public URLs. + +--- + +## Choosing the right editing tool + +This same guidance is also delivered at runtime via the MCP server `instructions` field, +so capable clients steer the model automatically. + +- **Text fixes** (wording, typos, numbers): `edit_page_text`. +- **One block** (paragraph/heading/callout/table cell): `patch_node` / `insert_node` / + `delete_node`, addressing the node by its `attrs.id` from `get_page_json`. +- **Images**: `insert_image` / `replace_image`. +- **A new page**: `create_page`. +- **Bulk rewrite, or nodes without ids**: `update_page_json`. +- **Multi-step / scripted rewrite** (renumbering, footnotes, coordinated edits): + `docmost_transform` — preview with `dryRun`, then apply. +- **Copy a whole page's content from another page** (server-side): `copy_page_content`. +- **Rename a page** (title only): `rename_page`. +- **Reads**: `get_page` (Markdown) / `get_page_json` (lossless ProseMirror with ids). +- **Review changes**: `list_page_history` → `diff_page_versions` → `restore_page_version`. +- **Comments**: `create_comment` (with optional inline anchoring) / `list_comments` / + `update_comment` / `delete_comment` / `check_new_comments`. +- **Navigate a page cheaply** (find a section/table, grab a block id): `get_outline` → + `get_node`. +- **Tables** (add/remove a row, set a cell): `table_get` / `table_insert_row` / + `table_delete_row` / `table_update_cell`. +- **Round-trip a page as Markdown** (download, edit, re-upload losslessly with comments): + `export_page_markdown` / `import_page_markdown`. + +--- + +## How it works (technical details) + +- **Safe real-time-collaboration writes.** Content mutations are applied through Docmost's + collaboration WebSocket (Hocuspocus + Yjs). The server connects, waits for the initial + sync so its local doc mirrors the authoritative server doc (including edits not yet in + the debounced REST snapshot), then **reads → transforms → writes synchronously** in one + tick so no remote update can interleave, and **waits for persistence acknowledgement** + before returning. +- **Per-page write serialization.** A per-`pageId` async mutex ensures two MCP writes to + the same page never overlap; different pages never block each other. +- **Transparent re-authentication.** Login uses email/password; expired tokens are + refreshed automatically on the first 401/403 (covering JSON, multipart upload, and the + collaboration-token path), with in-flight login de-duplication so a burst of calls + triggers a single re-login. +- **Lossless and lossy reads.** `get_page_json` returns the exact ProseMirror tree with + block ids; `get_page` returns clean Markdown for convenience. +- **Full Docmost schema.** Markdown↔ProseMirror conversion supports callouts (including + nested), task lists (bullet *and* numbered checklists), tables, math blocks, embeds, + highlights, sub/superscript and more, with defensive caps against pathological input. +- **Structured tables & lossless Markdown round-trip.** Tables can be edited as a matrix + (read, insert/delete rows, set cells by `[row,col]`) without resending the document, and + a page can be exported to and re-imported from a self-contained Docmost-flavoured + Markdown file that preserves inline comment anchors and diagrams. +- **Token-optimized responses.** API responses are filtered down to the fields agents + actually need, and large collections (spaces, pages, comments, history) are paginated. +- **Hardened runtime.** Global handlers keep a stray socket error from tearing down the + stdio server; `move_page` requires a positively confirmed success; the diff engine + falls back to a coarse block diff rather than hard-failing on a pathological document. + +--- + +## Installation + +```bash +npm install +npm run build +``` + +## Configuration + +The server requires three environment variables: + +- `DOCMOST_API_URL` — full URL to your Docmost API (e.g. `https://docs.example.com/api`). +- `DOCMOST_EMAIL` — account email for authentication. +- `DOCMOST_PASSWORD` — account password. + +## Usage with Claude Desktop / a generic MCP client + +Add the server to your MCP configuration (e.g. `claude_desktop_config.json`): + +```json +{ + "mcpServers": { + "docmost-local": { + "command": "node", + "args": ["./build/index.js"], + "env": { + "DOCMOST_API_URL": "http://localhost:3000/api", + "DOCMOST_EMAIL": "test@docmost.com", + "DOCMOST_PASSWORD": "test" + } + } + } +} +``` + +## Development + +```bash +# Watch mode +npm run watch + +# Build +npm run build + +# Tests (unit + mock; the live end-to-end suite needs a running Docmost) +npm test +npm run test:e2e +``` + +## Lineage & acknowledgements + +This project began as a fork of [MrMartiniMo/docmost-mcp](https://github.com/MrMartiniMo/docmost-mcp) +(by Moritz Krause) and extends it substantially — adding per-block node editing, +surgical text edits, the sandboxed `docmost_transform`, version history / diff / restore, +comments, image insert/replace, public sharing, server-side page copy, dual +JSON/Markdown reads, transparent re-authentication and significant hardening. The comment +tools were ported from upstream PR #3 by Max Nikitin. Thanks to both. + +## License + +MIT diff --git a/packages/mcp/README.ru.md b/packages/mcp/README.ru.md new file mode 100644 index 00000000..ebf02f3d --- /dev/null +++ b/packages/mcp/README.ru.md @@ -0,0 +1,371 @@ +# Docmost MCP Server + +[English](README.md) · **Русский** + +Сервер Model Context Protocol (MCP) для [Docmost](https://docmost.com/), который +позволяет ИИ-агентам **читать, искать, писать, реструктурировать, рецензировать, вести +версии, комментировать, иллюстрировать и публиковать** документацию — безопасно, на живом +инстансе и без enterprise-лицензии. + +> **Написан агентом для агентов.** Человек правит документ глазами и руками: читает, +> заходит в редактор, перепечатывает. Агент работает иначе — ему гораздо проще *написать +> небольшую функцию, которая чинит текст*, чем перечитывать и заново выдавать весь +> документ. Поэтому сервер построен вокруг того, как модели на самом деле удобно +> редактировать: адресовать блок по id, сделать find/replace или передать трансформ +> `(doc, ctx) => doc` и позволить модели *запрограммировать* правку. `docmost_transform` — +> это и есть такой интерфейс. Другие Docmost-MCP «заточены под человека» — они дают +> «открыть страницу» и «заменить страницу»; этот даёт примитивы редактирования, в которых +> модель сильна. + +Сервер предоставляет **38 инструментов**, построенных вокруг трёх идей, которые другие +Docmost-MCP не сочетают: + +1. **Точечные, экономичные по токенам правки.** Адресуйте отдельный блок по id и патчите + его или делайте find/replace вместо того, чтобы гонять весь документ ~100 КБ через + модель. +2. **Безопасная запись на живой документ.** Каждая мутация проходит через слой + коллаборации реального времени (тот же WebSocket, что использует веб-редактор), + сериализуется по странице, поэтому никогда не затирает параллельную правку человека и + подтверждается как сохранённая до возврата из инструмента. +3. **Настоящая страховка.** История версий, дифф, эквивалентный Docmost, восстановление + одним вызовом и предпросмотр (dry-run) для скриптовых правок — чтобы агент мог + редактировать смело, а вы всегда могли увидеть и откатить сделанное. + +--- + +## Почему именно этот сервер (в сравнении с альтернативами) + +Существует несколько Docmost-MCP. Ниже — сравнение по возможностям. +«Официальный» — встроенный MCP Docmost; остальные — community-проекты на GitHub. + +| Возможность | **Этот сервер** | Официальный (встроенный) | MrMartiniMo/docmost-mcp | cyborgx0x/mcp-docmost | aleksvin8888 / isak-landin | +| --- | :---: | :---: | :---: | :---: | :---: | +| **Нужна enterprise-лицензия** | **Нет** | **Да** | Нет | Нет | Нет | +| Аутентификация | email + пароль, **авто-переавторизация** | API-ключ | email + пароль | cookie `authToken` (копировать из DevTools) | API Docmost / **напрямую PostgreSQL** | +| Чтение страницы как Markdown | ✅ | ✅ | ✅ | ✅ | ✅ (только чтение) | +| **Lossless Markdown round-trip** (экспорт/импорт, сохраняет якоря комментариев) | ✅ | — | — | — | — | +| Чтение **lossless ProseMirror JSON** (с id блоков) | ✅ | — | — | — | — | +| **Компактная структура страницы** (дешёвый поиск id блока) | ✅ | — | — | — | — | +| **Получение одного блока** (по id или индексу) | ✅ | — | — | — | — | +| Создание / перемещение / удаление страниц | ✅ | ✅ | ✅ | ✅ | — | +| **Поблочные правки** (patch/insert/delete по id) | ✅ | — | — | — | — | +| **Хирургический find/replace** (с сохранением структуры) | ✅ | — | — | — | — | +| **Скриптовый JS-трансформ** (песочница, dry-run дифф) | ✅ | — | — | — | — | +| **Структурное редактирование таблиц** (CRUD строк/ячеек) | ✅ | — | — | — | — | +| **История версий** страницы | ✅ | — | — | ✅ | — | +| **Дифф двух версий** | ✅ | — | — | — | — | +| **Восстановление версии** (обратимое) | ✅ | — | — | — | — | +| **Комментарии** (CRUD + inline-привязка) | ✅ | — | — | ✅ | — | +| **Поллинг новых комментариев** с момента времени | ✅ | — | — | — | — | +| **Изображения** (вставка / замена) | ✅ | — | — | — | — | +| **Публичные ссылки** (создать / отозвать / список) | ✅ | — | — | — | — | +| Экспорт в HTML / PDF | — | — | — | ✅ | — | +| **Безопасная запись через real-time-collab** (без затирания, с подтверждением) | ✅ | n/a | ✅ | — | n/a (только чтение) | + +### Что это даёт на практике + +- **Никакого enterprise-налога.** Официальный MCP Docmost — enterprise-функция: нужна + активная enterprise-лицензия. Этот сервер — MIT и работает с *любым* self-hosted Docmost + через стандартный API + сокет коллаборации, имея лишь email и пароль аккаунта. + +- **Экономия токенов при редактировании.** Большинство Docmost-MCP (и официальный) + предлагают только запись «заменить всю страницу» — агент вынужден скачать весь документ, + изменить и загрузить обратно, оплачивая весь документ **дважды** на каждой мелкой + правке. Этот сервер позволяет агенту изменить ровно один блок (`patch_node` / + `insert_node` / `delete_node`), сделать find/replace с сохранением структуры + (`edit_page_text`) или скопировать страницу на стороне сервера (`copy_page_content`) — + **причём документ ни разу не проходит через модель**. + +- **Записи, которые не воюют с редактором.** Наивная запись через REST конфликтует с тем, + что в этот момент печатает человек, и может молча затереть его правки или упасть на + дебаунс-сохранении Docmost. Этот сервер применяет каждое изменение через живой документ + коллаборации (Hocuspocus/Yjs), читая и записывая **синхронно в пределах одного тика + синхронизации**, чтобы никакая параллельная правка не вклинилась, сериализует записи + **по странице** мьютексом и **ждёт подтверждения сохранения от сервера** до возврата. + Если сокет отвалился посреди записи, инструмент возвращает ошибку, а не ложный успех. + +- **Агентоориентированная модель редактирования.** Серверы «под человека» дают «открыть + страницу» и «заменить страницу», потому что это отражает то, как работает человек. Модель + редактирует лучше, *программируя* правку — адресуя блоки по id, делая find/replace или + передавая трансформ `(doc, ctx) => doc` (`docmost_transform`, с dry-run диффом перед + коммитом). Этот сервер построен вокруг этого — поэтому у него есть примитивы + редактирования, которых у остальных просто нет. + +- **Страховка при редактировании, которой нет у других.** `list_page_history` → + `diff_page_versions` → `restore_page_version` дают агенту (и вам) полный цикл «посмотреть + и откатить». Дифф использует *тот же* конвейер `recreateTransform → ChangeSet → + simplifyChanges`, что и встроенный просмотр истории Docmost, так что результат совпадает + с продуктом. + +- **Удобство вместо выковыривания cookie.** Некоторые community-серверы аутентифицируются, + заставляя вас копировать сессионный cookie из DevTools браузера (он истекает), либо лезут + **напрямую в базу PostgreSQL**. Этот сервер логинится по учётным данным и **прозрачно + переавторизуется на 401/403** (с дедупликацией + параллельных логинов), поэтому долгоживущие агенты не падают, когда токен истёк. Он также + соблюдает контроль доступа Docmost, потому что ходит через API и сервер коллаборации как + обычный пользователь. + +--- + +## Инструменты + +Все 38 инструментов, сгруппированы по задачам, для которых вы их возьмёте. + +### Чтение и поиск + +- **`get_workspace`** — Информация о текущем воркспейсе Docmost. +- **`list_spaces`** — Все пространства воркспейса. +- **`list_pages`** — Недавние страницы пространства, по убыванию `updatedAt` (по умолчанию + 50, максимум 100). Для поиска в больших пространствах используйте `search`. +- **`search`** — Полнотекстовый поиск по страницам и контенту (ограничен `limit`, максимум + 100). +- **`get_page`** — Контент страницы как чистый **Markdown** (удобно, но это + *lossy*-представление — id блоков и точная структура таблиц/коллаутов аппроксимируются). +- **`get_page_json`** — **Lossless ProseMirror/TipTap JSON** страницы, включая `attrs.id` + каждого блока и `slugId`, используемый в URL. Именно его потребляют инструменты + поблочного редактирования. +- **`get_outline`** — Компактная структура страницы из блоков верхнего уровня (`{index, + type, id, level, firstText}`; для таблиц добавляются число строк/столбцов и тексты ячеек + заголовка, для списков — число пунктов) **без** тела документа. Дешёвый способ найти раздел или таблицу и получить + id блока перед `get_node` / `patch_node` / `insert_node`. +- **`get_node`** — Получить полное ProseMirror-поддерево одного блока (lossless), не + вытягивая всю страницу. Адресуйте его по id блока (из `get_outline` / `get_page_json`) + или формой `#` для блока верхнего уровня — используйте `#` для + таблиц/строк/ячеек, у которых нет id. + +### Жизненный цикл страниц + +- **`create_page`** — Создать страницу из Markdown и поместить в иерархию (опционально + `parentPageId`) одним вызовом. Использует import API Docmost для чистой конвертации + Markdown→ProseMirror. +- **`rename_page`** — Изменить только заголовок страницы, не трогая и не пересылая контент. +- **`move_page`** — Сменить родителя страницы (вложить или вынести в корень); поддерживает + позиционирование по fractional-index. Возвращает успех только при *положительно + подтверждённом* результате. +- **`delete_page`** — Удалить одну страницу. +- **`copy_page_content`** — Заменить тело одной страницы копией тела другой, **полностью на + стороне сервера** — документ не проходит через модель. У целевой страницы сохраняются + собственные заголовок и slug (URL не меняется). + +### Редактирование + +- **`edit_page_text`** — Хирургический find/replace внутри текста страницы. Сохраняет + **всю** структуру: id блоков, marks, ссылки, коллауты, таблицы. Предпочтительный + инструмент для правки формулировок, опечаток, чисел и имён. +- **`patch_node`** — Заменить один блок, адресованный по `attrs.id` (из `get_page_json`), + без пересылки документа. +- **`insert_node`** — Вставить блок до/после другого (по `attrs.id` или по якорному тексту) + либо добавить в конец. +- **`delete_node`** — Удалить один блок по его `attrs.id`. +- **`update_page_json`** — Заменить весь контент страницы документом ProseMirror (массовые + перезаписи или когда у узлов нет id). `content` опционален — опустите его, чтобы изменить + только заголовок. Сохраняет переданные id блоков, поэтому якоря заголовков и история + остаются стабильными. +- **`docmost_transform`** — Агентоориентированный интерфейс редактирования: вместо + перепечатывания документа агент **пишет функцию, которая его чинит**. Редактирует + страницу, запуская произвольный **JS-трансформ `(doc, ctx) => doc`** на её *живом* + документе ProseMirror. Работает в **песочнице** (без `require`/`process`/`fs`/сети, + таймаут 5 с). **По умолчанию dry-run**: возвращает предпросмотр диффа без записи; + установите `dryRun:false`, чтобы применить атомарно. `ctx` даёт доступ к комментариям + страницы и набору хелперов (`walk`, `getList`, `blockText`, `insertMarkerAfter`, + `setCalloutRange`, `commentsToFootnotes`, …) для многошаговых согласованных перезаписей — + например перенумерации или превращения inline-комментариев в нумерованные сноски. + +### Таблицы + +- **`table_get`** — Прочитать таблицу как матрицу: `{rows, cols, cells (text[][]), + cellIds}` (id абзаца на ячейку или `null`). Адресуйте таблицу через `#` (из + `get_outline`) или любой id блока внутри неё. Используйте `cellIds` вместе с `patch_node` + для правок ячеек с форматированием. +- **`table_insert_row`** — Вставить строку из текстовых ячеек, дополненную до числа + столбцов таблицы (передать ячеек больше числа столбцов — ошибка). `index` — 0-based + позиция вставки (0 вставляет перед заголовком); опустите, чтобы добавить в конец. +- **`table_delete_row`** — Удалить строку по 0-based `index`. Отказывается удалять + единственную строку таблицы; удаление строки 0 делает заголовком следующую строку. +- **`table_update_cell`** — Задать текстовое содержимое ячейки `[row, col]` (0-based). Для + форматирования используйте `patch_node` по id абзаца ячейки из `table_get`. + +### Markdown: экспорт и импорт + +- **`export_page_markdown`** — Экспортировать страницу в один самодостаточный, **lossless + Markdown в диалекте Docmost**: мета-заголовок, тело с inline-якорями комментариев и + диаграммами и завершающий блок тредов комментариев. Рассчитан на цикл «скачать → + отредактировать тело → `import_page_markdown`», сохраняющий всё, включая выделения + комментариев. +- **`import_page_markdown`** — Заменить контент страницы из Markdown-файла в диалекте + Docmost, созданного `export_page_markdown`, восстанавливая якоря-выделения комментариев и + диаграммы из их inline-HTML. (Треды комментариев из файла не пересоздаются на сервере — + записываются только тело страницы и inline-марки комментариев; тредами управляйте через + инструменты/UI комментариев.) + +### Изображения + +- **`insert_image`** — Загрузить локальное изображение и вставить за один шаг: добавить в + конец, поставить вместо текстового плейсхолдера (`replaceText`) или после заданного блока + (`afterText`). Сохраняет id всех остальных блоков. +- **`replace_image`** — Заменить существующее изображение. Загружает новый файл как **новое + вложение** (чистый URL, который рендерится и сбрасывает кэш браузера), затем + перенаправляет все узлы, ссылавшиеся на старое вложение (рекурсивно, включая + коллауты/таблицы), через живой документ, сохраняя комментарии, выравнивание и alt-текст. + (Перезапись «по месту» намеренно не используется — некоторые версии Docmost портят + вложение при перезаписи.) + +### Комментарии + +- **`create_comment`** — Добавить комментарий к странице, опционально **привязав inline** к + точному фрагменту текста (первое вхождение оборачивается comment-маркой). +- **`list_comments`** — Список комментариев страницы (контент возвращается как Markdown). +- **`update_comment`** — Изменить существующий комментарий. +- **`delete_comment`** — Удалить комментарий. +- **`check_new_comments`** — Найти комментарии, созданные после заданной метки времени + ISO-8601, по пространству, опционально в рамках поддерева страниц — идеально для агента, + который следит за обратной связью в документе. + +### Версии и история + +- **`list_page_history`** — Сохранённые версии страницы (Docmost авто-снапшотит при каждом + сохранении), новые сверху, курсорная пагинация. id каждого элемента — это `historyId`. +- **`diff_page_versions`** — Дифф двух версий (или версии против живой страницы). + Возвращает вставленный/удалённый текст, счётчики целостности (изображения, ссылки, + таблицы, коллауты, маркеры сносок) и человекочитаемую Markdown-сводку — посчитано тем же + конвейером, что использует встроенный просмотр истории Docmost. +- **`restore_page_version`** — Записать сохранённую версию обратно как текущий контент. У + Docmost нет эндпоинта восстановления, поэтому создаётся **новый** снапшот — само + восстановление тоже обратимо. + +### Публикация + +- **`share_page`** — Сделать страницу публично доступной (идемпотентно) и вернуть её + публичный URL (`/share//p/`); опционально индексирование поисковиками. +- **`unshare_page`** — Отозвать публичный доступ к странице. +- **`list_shares`** — Все публичные ссылки воркспейса с заголовками и публичными URL. + +--- + +## Как выбрать инструмент редактирования + +Та же подсказка отдаётся в рантайме через поле `instructions` MCP-сервера, так что +подходящие клиенты направляют модель автоматически. + +- **Правки текста** (формулировки, опечатки, числа): `edit_page_text`. +- **Один блок** (абзац/заголовок/коллаут/ячейка таблицы): `patch_node` / `insert_node` / + `delete_node`, адресуя узел по его `attrs.id` из `get_page_json`. +- **Изображения**: `insert_image` / `replace_image`. +- **Новая страница**: `create_page`. +- **Массовая перезапись или узлы без id**: `update_page_json`. +- **Многошаговая / скриптовая перезапись** (перенумерация, сноски, согласованные правки): + `docmost_transform` — предпросмотр через `dryRun`, затем применение. +- **Скопировать контент целой страницы из другой** (на стороне сервера): + `copy_page_content`. +- **Переименовать страницу** (только заголовок): `rename_page`. +- **Чтение**: `get_page` (Markdown) / `get_page_json` (lossless ProseMirror с id). +- **Просмотр изменений**: `list_page_history` → `diff_page_versions` → + `restore_page_version`. +- **Комментарии**: `create_comment` (с опциональной inline-привязкой) / `list_comments` / + `update_comment` / `delete_comment` / `check_new_comments`. +- **Дешёвая навигация по странице** (найти раздел/таблицу, получить id блока): `get_outline` + → `get_node`. +- **Таблицы** (добавить/удалить строку, задать ячейку): `table_get` / `table_insert_row` / + `table_delete_row` / `table_update_cell`. +- **Round-trip страницы через Markdown** (скачать, отредактировать, залить обратно без + потерь, с комментариями): `export_page_markdown` / `import_page_markdown`. + +--- + +## Как это устроено (технические детали) + +- **Безопасная запись через коллаборацию реального времени.** Мутации контента применяются + через WebSocket коллаборации Docmost (Hocuspocus + Yjs). Сервер подключается, ждёт + первичной синхронизации, чтобы локальный документ отражал авторитетный серверный (включая + правки, которых ещё нет в дебаунс-снапшоте REST), затем **читает → трансформирует → + пишет синхронно** в одном тике, чтобы никакое удалённое обновление не вклинилось, и + **ждёт подтверждения сохранения** до возврата. +- **Сериализация записи по странице.** Асинхронный мьютекс по `pageId` гарантирует, что + две записи MCP в одну страницу никогда не пересекаются; разные страницы друг друга не + блокируют. +- **Прозрачная переавторизация.** Логин по email/паролю; истёкшие токены обновляются + автоматически на первом 401/403 (покрывая JSON, multipart-загрузку и путь токена + коллаборации), с дедупликацией параллельных логинов, так что пачка вызовов вызывает один + повторный логин. +- **Lossless- и lossy-чтение.** `get_page_json` возвращает точное дерево ProseMirror с id + блоков; `get_page` возвращает чистый Markdown для удобства. +- **Полная схема Docmost.** Конвертация Markdown↔ProseMirror поддерживает коллауты + (включая вложенные), списки задач (маркированные *и* нумерованные чек-листы), таблицы, + блоки формул, эмбеды, выделение, под/надстрочный текст и прочее, с защитными лимитами + против патологического ввода. +- **Структурные таблицы и lossless Markdown round-trip.** Таблицы можно редактировать как + матрицу (чтение, вставка/удаление строк, задание ячеек по `[row, col]`) без пересылки + документа, а страницу — экспортировать и заново импортировать как самодостаточный + Markdown-файл в диалекте Docmost, сохраняющий inline-якоря комментариев и диаграммы. +- **Ответы, оптимизированные по токенам.** Ответы API урезаются до полей, действительно + нужных агентам, а большие коллекции (пространства, страницы, комментарии, история) + пагинируются. +- **Закалённый рантайм.** Глобальные обработчики не дают случайной ошибке сокета уронить + stdio-сервер; `move_page` требует положительно подтверждённого успеха; движок диффа + откатывается к грубому поблочному диффу, а не падает на патологическом документе. + +--- + +## Установка + +```bash +npm install +npm run build +``` + +## Конфигурация + +Серверу нужны три переменные окружения: + +- `DOCMOST_API_URL` — полный URL к API вашего Docmost (например, + `https://docs.example.com/api`). +- `DOCMOST_EMAIL` — email аккаунта для аутентификации. +- `DOCMOST_PASSWORD` — пароль аккаунта. + +## Использование с Claude Desktop / произвольным MCP-клиентом + +Добавьте сервер в конфигурацию MCP (например, `claude_desktop_config.json`): + +```json +{ + "mcpServers": { + "docmost-local": { + "command": "node", + "args": ["./build/index.js"], + "env": { + "DOCMOST_API_URL": "http://localhost:3000/api", + "DOCMOST_EMAIL": "test@docmost.com", + "DOCMOST_PASSWORD": "test" + } + } + } +} +``` + +## Разработка + +```bash +# Режим наблюдения +npm run watch + +# Сборка +npm run build + +# Тесты (unit + mock; live end-to-end набор требует запущенного Docmost) +npm test +npm run test:e2e +``` + +## Происхождение и благодарности + +Проект начинался как форк +[MrMartiniMo/docmost-mcp](https://github.com/MrMartiniMo/docmost-mcp) (автор Moritz Krause) +и существенно его расширяет — добавлены поблочное редактирование узлов, хирургические +правки текста, песочница `docmost_transform`, история версий / дифф / восстановление, +комментарии, вставка/замена изображений, публичные ссылки, серверное копирование страниц, +двойное чтение JSON/Markdown, прозрачная переавторизация и значительное упрочнение. +Инструменты комментариев портированы из upstream PR #3 от Max Nikitin. Спасибо обоим. + +## Лицензия + +MIT diff --git a/packages/mcp/TEST-PLAN.md b/packages/mcp/TEST-PLAN.md new file mode 100644 index 00000000..24fdd733 --- /dev/null +++ b/packages/mcp/TEST-PLAN.md @@ -0,0 +1,89 @@ +# Docmost MCP — Test Plan (editing & image tools) + +Manual/E2E test plan for every content-mutating tool, with special focus on +images and image replacement. Executed against a live Docmost instance +(`docs.vvzvlad.xyz`) and verified visually in Chrome (public share + authenticated +editor). + +## How to run the automated part + +``` +DOCMOST_API_URL=https:///api \ +DOCMOST_EMAIL= \ +DOCMOST_PASSWORD= \ +node test-e2e.mjs +``` + +`test-e2e.mjs` creates a throwaway page, exercises every code path (including the +image upload/insert/replace cycle) and deletes the page afterwards. Collab writes +are debounced server-side, so the script waits ~16 s before reading back via REST. + +## Test matrix + +| # | Tool / path | What is checked | Expected | +|---|-------------|-----------------|----------| +| 1 | `create_page` | title with spaces, slugId returned | page created, title intact | +| 2 | `update_page` (markdown) | headings, **bold**/*italic*/~~strike~~/`code`/link, nested bullet + ordered lists, blockquote, code block, `:::callout:::`, table | all structures survive re-import | +| 3 | `get_page_json` | lossless ProseMirror, block ids, callout/table nodes | present (note: reads the **debounced** REST snapshot — recent collab writes may lag a few seconds) | +| 4 | `edit_page_text` | surgical replace; block ids + marks preserved; ambiguous match rejected; missing match reported | edits applied, ids stable, errors correct | +| 5 | `update_page_json` | full lossless write; custom block ids preserved; existing content (text edits, images, callout, table) not lost | round-trips intact | +| 6 | `upload_image` | uploads attachment, returns node | src is a **clean** `/api/files//` URL, served `200 image/*` | +| 7 | `insert_image` (append / `replaceText` / `afterText`) | three placements | image lands in the right place, all other block ids preserved | +| 8 | **`replace_image`** | swap an existing figure for new bytes; comments/align/alt preserved; **the new URL must actually serve the image** | new image renders (`200`), old node repointed | + +## Image-specific assertions (the recurring bug area) + +For every uploaded/inserted/replaced image, assert at the HTTP level that the +`src` actually serves bytes — this is what catches "broken image" regressions: + +* `GET ` → `200`, `Content-Type: image/*`, body starts with the image magic + (`89 50 4E 47` for PNG, etc.). +* `src` does **not** contain a `?v=` query (see "Known pitfalls"). +* After `replace_image`: the returned `newAttachmentId` **differs** from the old + one (replacement uses a fresh attachment → fresh URL), and `GET ` → `200`. +* The old image node on the page is repointed to the new attachmentId. + +## Browser verification (Chrome) + +Open the page (public `/share//p/` URL, or the authenticated editor) +and check each ``: + +```js +[...document.querySelectorAll('.ProseMirror img')].map(im => ({ + src: im.getAttribute('src'), + loaded: im.naturalWidth > 0, // 0 ⇒ broken +})); +``` + +`loaded === true` (naturalWidth > 0) means the image really rendered; `0` means a +broken/empty figure. + +## Known pitfalls (root-caused during testing) + +1. **In-place attachment overwrite corrupts the file (HTTP 500).** + Uploading with an existing `attachmentId` (`POST /files/upload` + `attachmentId`) + overwrites the bytes in place. On this Docmost the attachment then returns + **500 for every URL** (clean, `?v=`, any filename) → broken image. Therefore + `replace_image` must upload a **new** attachment and repoint the nodes; the new + id yields a new URL that both renders and busts the browser cache. The old + attachment is left as an unreferenced orphan: Docmost exposes **no HTTP API to + delete a single content attachment** (verified against the attachment + controller/service and by probing ~20 route variants live — all 404; an + attachment unlinked from a page stays reachable with no auto-GC). Attachments + are removed only by cascade (page/space/user deletion). This matches Docmost's + own editor, which also orphans attachments on image removal/replacement. + +2. **`?v=` cache-buster is unnecessary and was a red herring.** + The file endpoint serves `…/file.png?v=` exactly like the clean URL + (`200 image/*`) — verified at the HTTP layer, on the public share, and in the + authenticated editor. The broken images people saw came from pitfall #1, not + from `?v=`. Image `src` is kept clean (`/api/files//`); cache-busting + on replace is achieved by the new attachment id. + +3. **REST snapshot lag.** `get_page_json` reads the debounced DB snapshot, so a + write made moments earlier may not be visible yet. Wait (~16 s) before reading + back, and never feed a possibly-stale snapshot straight into `update_page_json`. + +4. **Callout type narrowing (minor, open).** A `:::warning` callout is imported as + `type: "info"` — the markdown→callout conversion does not carry non-`info` + types through. Cosmetic; tracked separately. diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js new file mode 100644 index 00000000..cf9e5f1a --- /dev/null +++ b/packages/mcp/build/client.js @@ -0,0 +1,2159 @@ +import FormData from "form-data"; +import axios from "axios"; +import { readFileSync, statSync } from "fs"; +import { basename, extname } from "path"; +import { filterWorkspace, filterSpace, filterPage, filterComment, filterSearchResult, } from "./lib/filters.js"; +import { HocuspocusProvider } from "@hocuspocus/provider"; +import { TiptapTransformer } from "@hocuspocus/transformer"; +import * as Y from "yjs"; +import WebSocket from "ws"; +import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js"; +import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js"; +import { docmostExtensions } from "./lib/docmost-schema.js"; +import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js"; +import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; +import { withPageLock } from "./lib/page-lock.js"; +import { applyTextEdits } from "./lib/json-edit.js"; +import { getCollabToken, performLogin } from "./lib/auth-utils.js"; +import { diffDocs } from "./lib/diff.js"; +import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js"; +import vm from "node:vm"; +export class DocmostClient { + client; + token = null; + apiUrl; + email; + password; + // In-flight login dedup: when the token expires, the 401 interceptor, + // ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries + // can all call login() at once. Memoizing a single promise collapses that + // thundering herd into ONE /auth/login request that everyone awaits. + loginPromise = null; + constructor(baseURL, email, password) { + this.apiUrl = baseURL; + this.email = email; + this.password = password; + this.client = axios.create({ + baseURL, + // Default request timeout so a hung connection cannot wedge a per-page + // lock or block the server indefinitely. Multipart uploads override this + // with a longer per-request timeout. + timeout: 30000, + headers: { + "Content-Type": "application/json", + }, + }); + // Re-authenticate transparently on a 401/403 once: the JWT authToken can + // expire while the server is long-running, after which every cached-token + // request would otherwise fail until a manual restart. On such a response, + // clear the stale token, perform a fresh login, and replay the original + // request exactly once (guarded by config._retry to avoid infinite loops; + // the login request itself is never retried). + this.client.interceptors.response.use((response) => response, async (error) => { + const config = error.config; + const status = error.response?.status; + const isAuthError = status === 401 || status === 403; + const isLoginRequest = typeof config?.url === "string" && config.url.includes("/auth/login"); + if (config && isAuthError && !config._retry && !isLoginRequest) { + config._retry = true; + // Drop the stale token + Authorization header before re-login. + this.token = null; + delete this.client.defaults.headers.common["Authorization"]; + try { + await this.login(); + } + catch (loginError) { + // Re-login failed: surface the original error to the caller. + return Promise.reject(error); + } + // Re-issue the original request with the freshly minted Bearer token. + // Read it from the default header that login() just set, not from + // this.token, to avoid a theoretical "Bearer null" if this.token was + // cleared between login() resolving and this point. + config.headers = config.headers || {}; + config.headers["Authorization"] = + this.client.defaults.headers.common["Authorization"]; + return this.client.request(config); + } + return Promise.reject(error); + }); + } + /** Application base URL (API URL without the /api suffix). */ + get appUrl() { + return this.apiUrl.replace(/\/api\/?$/, ""); + } + async login() { + // Reuse an in-flight login if one is already running so concurrent callers + // share a single /auth/login request instead of each issuing their own. + if (!this.loginPromise) { + this.loginPromise = performLogin(this.apiUrl, this.email, this.password) + .then((token) => { + this.token = token; + this.client.defaults.headers.common["Authorization"] = + `Bearer ${token}`; + }) + .finally(() => { + this.loginPromise = null; + }); + } + return this.loginPromise; + } + async ensureAuthenticated() { + if (!this.token) { + await this.login(); + } + } + /** + * Fetch a collaboration token, transparently re-authenticating once on a + * 401/403. getCollabToken() uses bare axios internally, so it is NOT covered + * by this.client's response interceptor; this helper replicates that + * behaviour for collab-token requests: ensure a token, try once, and on an + * expired-token auth error perform a fresh login and retry exactly once. + */ + async getCollabTokenWithReauth() { + await this.ensureAuthenticated(); + try { + return await getCollabToken(this.apiUrl, this.token); + } + catch (e) { + // getCollabToken wraps the AxiosError in a plain Error but attaches the + // HTTP status as `.status`, so detect an auth failure via either the raw + // AxiosError shape OR the attached status. + const axiosStatus = axios.isAxiosError(e) ? e.response?.status : undefined; + const attachedStatus = e?.status; + const isAuthError = axiosStatus === 401 || + axiosStatus === 403 || + attachedStatus === 401 || + attachedStatus === 403; + if (isAuthError) { + await this.login(); + return await getCollabToken(this.apiUrl, this.token); + } + throw e; + } + } + /** + * Connect to the collaboration websocket, read the live doc, apply + * `transform`, write the result, and wait for the server to persist it — + * WITHOUT acquiring the per-page lock. + * + * This mirrors collaboration.mutatePageContent EXCEPT that it does not call + * withPageLock. It exists solely so replaceImage can hold ONE withPageLock + * across its scan -> upload -> write sequence: the per-page mutex is NOT + * reentrant, so calling the normal (self-locking) mutatePageContent inside an + * outer withPageLock for the same pageId would deadlock. The caller MUST hold + * the page lock for the whole operation; this helper assumes that invariant. + * + * `transform` receives the live ProseMirror doc and returns the NEW full doc + * to write, or `null` to abort with no write. Errors thrown by `transform` + * propagate to the caller. + */ + mutateLiveContentUnlocked(pageId, collabToken, transform) { + const CONNECT_TIMEOUT_MS = 25000; + const PERSIST_TIMEOUT_MS = 20000; + const ydoc = new Y.Doc(); + const wsUrl = buildCollabWsUrl(this.apiUrl); + return new Promise((resolve, reject) => { + let provider; + let applied = false; // onSynced may fire again on reconnect — apply once. + let settled = false; + let connectionLost = false; + let connectTimer; + let persistTimer; + let unsyncedHandler; + let lastWrittenDoc; + const cleanup = () => { + if (connectTimer) + clearTimeout(connectTimer); + if (persistTimer) + clearTimeout(persistTimer); + if (provider) { + if (unsyncedHandler) { + try { + provider.off("unsyncedChanges", unsyncedHandler); + } + catch (err) { } + } + try { + provider.destroy(); + } + catch (err) { } + } + }; + const finish = (err, value) => { + if (settled) + return; + settled = true; + cleanup(); + if (err) + reject(err); + else + resolve(value); + }; + connectTimer = setTimeout(() => { + finish(new Error("Connection timeout to collaboration server")); + }, CONNECT_TIMEOUT_MS); + const waitForPersistence = () => { + if (settled) + return; + if (!provider) { + finish(new Error("collab provider gone before persistence")); + return; + } + if (provider.unsyncedChanges === 0) { + finish(null, lastWrittenDoc); + return; + } + persistTimer = setTimeout(() => { + finish(new Error("Timeout waiting for collaboration server to persist the update")); + }, PERSIST_TIMEOUT_MS); + unsyncedHandler = (data) => { + if (data.number === 0 && !connectionLost) { + finish(null, lastWrittenDoc); + } + }; + provider.on("unsyncedChanges", unsyncedHandler); + }; + provider = new HocuspocusProvider({ + url: wsUrl, + name: `page.${pageId}`, + document: ydoc, + token: collabToken, + // @ts-ignore - Required for Node.js environment + WebSocketPolyfill: WebSocket, + onDisconnect: () => { + connectionLost = true; + finish(new Error("Collaboration connection closed before the update was persisted/synced")); + }, + onClose: () => { + connectionLost = true; + finish(new Error("Collaboration connection closed before the update was persisted/synced")); + }, + onSynced: () => { + if (applied || settled) + return; + applied = true; + // CRITICAL: keep everything between reading and writing the live doc + // synchronous (no await) so no remote update can interleave. + let newDoc; + try { + let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); + if (!liveDoc || + typeof liveDoc !== "object" || + !Array.isArray(liveDoc.content)) { + liveDoc = { type: "doc", content: [] }; + } + newDoc = transform(liveDoc); + if (newDoc == null) { + // Transform aborted — write nothing, return the live doc. + lastWrittenDoc = liveDoc; + finish(null, liveDoc); + return; + } + const tempDoc = TiptapTransformer.toYdoc(newDoc, "default", docmostExtensions); + const fragment = ydoc.getXmlFragment("default"); + ydoc.transact(() => { + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); + }); + } + catch (e) { + finish(e instanceof Error ? e : new Error(String(e))); + return; + } + lastWrittenDoc = newDoc; + waitForPersistence(); + }, + onAuthenticationFailed: () => { + finish(new Error("Authentication failed for collaboration connection")); + }, + }); + }); + } + /** + * Generic pagination handler for Docmost API endpoints + */ + async paginateAll(endpoint, basePayload = {}, limit = 100) { + await this.ensureAuthenticated(); + const clampedLimit = Math.max(1, Math.min(100, limit)); + // Hard ceiling on the number of pages to fetch: guards against a server + // that returns a perpetually-true hasNextPage (which would otherwise loop + // forever and accumulate duplicates). + const MAX_PAGES = 50; + let page = 1; + let allItems = []; + let hasNextPage = true; + while (hasNextPage && page <= MAX_PAGES) { + const response = await this.client.post(endpoint, { + ...basePayload, + limit: clampedLimit, + page, + }); + const data = response.data; + const items = data.data?.items || data.items || []; + const meta = data.data?.meta || data.meta; + allItems = allItems.concat(items); + // Stop if the page is empty or shorter than the requested size: a full + // page worth of items is the only situation where another page can exist, + // so this defends against a stuck hasNextPage flag in addition to it. + if (items.length === 0 || items.length < clampedLimit) { + break; + } + hasNextPage = meta?.hasNextPage || false; + page++; + } + // If the loop stopped because it hit the MAX_PAGES ceiling while the server + // still reported more results (hasNextPage true and the last page was + // full), the result set is truncated — warn so the caller is not silently + // handed an incomplete list. + if (hasNextPage && page > MAX_PAGES) { + console.warn(`paginateAll: results from "${endpoint}" truncated at the ${MAX_PAGES}-page cap; more pages exist on the server`); + } + return allItems; + } + async getWorkspace() { + await this.ensureAuthenticated(); + const response = await this.client.post("/workspace/info", {}); + return { + data: filterWorkspace(response.data?.data ?? response.data), + success: response.data.success, + }; + } + async getSpaces() { + const spaces = await this.paginateAll("/spaces", {}); + return spaces.map((space) => filterSpace(space)); + } + /** + * List most recent pages (bounded). Fetching the whole space can exceed + * MCP response/time limits on large instances, so a single bounded page + * of results is returned (default 50, max 100). + */ + async listPages(spaceId, limit = 50) { + await this.ensureAuthenticated(); + const clampedLimit = Math.max(1, Math.min(100, limit)); + const payload = { limit: clampedLimit, page: 1 }; + if (spaceId) + payload.spaceId = spaceId; + const response = await this.client.post("/pages/recent", payload); + const data = response.data; + const items = data.data?.items || data.items || []; + return items.map((page) => filterPage(page)); + } + /** + * List sidebar pages for a space. With no pageId the request returns the + * space ROOT pages; with a pageId it returns the direct CHILDREN of that + * page. pageId is therefore optional and is only included in the POST body + * when provided (an empty/undefined pageId would otherwise change the + * semantics on the server). + */ + async listSidebarPages(spaceId, pageId) { + await this.ensureAuthenticated(); + // Paginate: the endpoint returns server-paged children, so posting only + // { page: 1 } silently dropped every child beyond the first page. Loop on + // meta.hasNextPage (with a MAX_PAGES ceiling like paginateAll, guarding + // against a stuck hasNextPage flag) and accumulate all children. + const MAX_PAGES = 50; + let page = 1; + let allItems = []; + let hasNextPage = true; + while (hasNextPage && page <= MAX_PAGES) { + // Only send pageId when scoping to a page's children; omit it for roots. + const payload = { spaceId, page }; + if (pageId) + payload.pageId = pageId; + const response = await this.client.post("/pages/sidebar-pages", payload); + const data = response.data?.data ?? response.data; + const items = data?.items || []; + allItems = allItems.concat(items); + hasNextPage = data?.meta?.hasNextPage || false; + page++; + } + return allItems; + } + /** + * Enumerate EVERY page in a space (or in a subtree, when rootPageId is given) + * by walking the sidebar-pages tree. + * + * Starting set: the children of rootPageId when provided, otherwise the + * space root pages. From there it does an iterative breadth-first walk: each + * node is collected, and when node.hasChildren is true its direct children + * are fetched via listSidebarPages(spaceId, node.id) and enqueued. + * + * This replaces the old "/pages/recent" enumeration, which is a bounded + * recent-activity feed (~5000 cap) and therefore misses comments on older + * pages that were never recently touched. + * + * Safeguards: a `visited` Set of page ids prevents re-processing a node + * (cycles / duplicate references), and a hard node cap bounds pathological + * trees so the walk always terminates. + */ + async enumerateSpacePages(spaceId, rootPageId) { + const MAX_NODES = 10000; + const result = []; + const visited = new Set(); + // Seed the queue with the starting level (subtree children or roots). + const queue = await this.listSidebarPages(spaceId, rootPageId); + while (queue.length > 0 && result.length < MAX_NODES) { + const node = queue.shift(); + if (!node || typeof node !== "object" || !node.id) + continue; + // Skip already-seen ids to guard against cycles / duplicate references. + if (visited.has(node.id)) + continue; + visited.add(node.id); + result.push(node); + if (node.hasChildren) { + try { + const children = await this.listSidebarPages(spaceId, node.id); + for (const child of children) + queue.push(child); + } + catch (e) { + // A failure fetching one node's children must not abort the whole + // walk: skip this branch and keep enumerating the rest. + } + } + } + return result; + } + /** Raw page info including the ProseMirror JSON content and slugId. */ + async getPageRaw(pageId) { + await this.ensureAuthenticated(); + const response = await this.client.post("/pages/info", { pageId }); + return response.data?.data ?? response.data; + } + async getPage(pageId) { + await this.ensureAuthenticated(); + const resultData = await this.getPageRaw(pageId); + let content = resultData.content + ? convertProseMirrorToMarkdown(resultData.content) + : ""; + // Always fetch subpages to provide context to the agent + let subpages = []; + try { + subpages = await this.listSidebarPages(resultData.spaceId, pageId); + } + catch (e) { + console.warn("Failed to fetch subpages:", e); + } + // Resolve subpages if the placeholder exists + if (content && content.includes("{{SUBPAGES}}")) { + if (subpages && subpages.length > 0) { + const list = subpages + .map((p) => `- [${p.title}](page:${p.id})`) + .join("\n"); + content = content.replace("{{SUBPAGES}}", `### Subpages\n${list}`); + } + else { + content = content.replace("{{SUBPAGES}}", ""); + } + } + return { + data: filterPage(resultData, content, subpages), + success: true, + }; + } + /** Page info + raw ProseMirror JSON content (lossless representation). */ + async getPageJson(pageId) { + const data = await this.getPageRaw(pageId); + return { + id: data.id, + slugId: data.slugId, + title: data.title, + parentPageId: data.parentPageId, + spaceId: data.spaceId, + updatedAt: data.updatedAt, + content: data.content || { type: "doc", content: [] }, + }; + } + /** + * Compact outline of a page's top-level blocks (no full document body). + * Cheap way to locate sections/tables and grab block ids before drilling in + * with get_node / patch_node / insert_node. + */ + async getOutline(pageId) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + return { + pageId, + slugId: data.slugId, + title: data.title, + outline: buildOutline(data.content ?? { type: "doc", content: [] }), + }; + } + /** + * Fetch a single node's full ProseMirror subtree (lossless) by reference: + * a block id (headings/paragraphs/callouts/images), or `#` to select + * a top-level block by its outline index (the only way to reach tables/rows/ + * cells, which carry no id). + */ + async getNode(pageId, nodeId) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + const hit = getNodeByRef(data.content ?? { type: "doc", content: [] }, nodeId); + if (!hit) { + throw new Error(`get_node: no node found for "${nodeId}" on page ${pageId} (use a block id from get_outline, or "#" for a top-level block such as a table)`); + } + return { + pageId, + ref: nodeId, + path: hit.path, + type: hit.type, + node: hit.node, + }; + } + /** + * Read a table as a matrix. `tableRef` is `#` (from get_outline) or a + * block id of any node inside the table. Returns the cell texts plus a + * parallel cellIds matrix (each cell's first paragraph id, or null) so a + * caller can patch_node a cell for rich-formatted edits. Throws when no table + * resolves for the reference. + */ + async getTable(pageId, tableRef) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + const t = readTable(data.content ?? { type: "doc", content: [] }, tableRef); + if (!t) { + throw new Error(`table_get: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); + } + return { + pageId, + table: tableRef, + rows: t.rows, + cols: t.cols, + path: t.path, + cells: t.cells, + cellIds: t.cellIds, + }; + } + /** + * Insert a row of plain-text cells into a table on the LIVE collab document. + * `tableRef` is `#` or a block id inside the target table. `cells` is + * padded to the table's column count (more cells than columns throws); `index` + * is a 0-based insert position (omit/out-of-range to append). Throws when no + * table resolves for the reference. + */ + async tableInsertRow(pageId, tableRef, cells, index) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + // Track insertion in an outer var, reset per-transform, so a collab retry + // recomputes it cleanly (mirrors insertNode's pattern). + let inserted = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + inserted = false; + const { doc: nd, inserted: ins } = insertTableRow(liveDoc, tableRef, cells, index); + inserted = ins; + if (!inserted) + return null; // table not found -> skip the write entirely + return nd; + }); + if (!inserted) { + throw new Error(`table_insert_row: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); + } + return { success: true, table: tableRef, inserted: true }; + } + /** + * Delete the row at 0-based `index` from a table on the LIVE collab document. + * `tableRef` is `#` or a block id inside the target table. The helper's + * out-of-range and last-row errors propagate; a missing table throws here. + */ + async tableDeleteRow(pageId, tableRef, index) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + let deleted = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + deleted = false; + const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index); + deleted = del; + if (!deleted) + return null; // table not found -> skip the write entirely + return nd; + }); + if (!deleted) { + throw new Error(`table_delete_row: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); + } + return { success: true, table: tableRef, deleted: true }; + } + /** + * Set the plain-text content of cell `[row, col]` (0-based) in a table on the + * LIVE collab document, replacing the cell's content with a single text + * paragraph (the cell's first-paragraph id is preserved). `tableRef` is + * `#` or a block id inside the target table. The helper's out-of-range + * error propagates; a missing table throws here. + */ + async tableUpdateCell(pageId, tableRef, row, col, text) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + let updated = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + updated = false; + const { doc: nd, updated: upd } = updateTableCell(liveDoc, tableRef, row, col, text); + updated = upd; + if (!updated) + return null; // table not found -> skip the write entirely + return nd; + }); + if (!updated) { + throw new Error(`table_update_cell: no table found for "${tableRef}" on page ${pageId} (use "#" from get_outline, or a block id inside the table)`); + } + return { success: true, table: tableRef, row, col }; + } + /** + * Create a new page with title and content. + * Uses the /pages/import workaround (the only endpoint accepting content), + * then moves the page and restores the exact title: the import endpoint + * derives the title from the FILENAME and replaces spaces with + * underscores, so we explicitly re-set it via /pages/update afterwards. + */ + async createPage(title, content, spaceId, parentPageId) { + await this.ensureAuthenticated(); + if (parentPageId) { + try { + await this.getPage(parentPageId); + } + catch (e) { + throw new Error(`Parent page with ID ${parentPageId} not found.`); + } + } + // 1. Create content via Import (using multipart/form-data). + // Build a FRESH FormData per send attempt: a FormData body is a single-use + // stream consumed on the first send, so it cannot be replayed by + // this.client's response interceptor (replay fails with 'socket hang up'). + // Multipart re-auth is therefore done here with bare axios and an explicit + // one-shot 401/403 retry that rebuilds the body. + const fileContent = Buffer.from(content, "utf-8"); + const buildForm = () => { + const form = new FormData(); + form.append("spaceId", spaceId); + form.append("file", fileContent, { + filename: `${title || "import"}.md`, + contentType: "text/markdown", + }); + return form; + }; + const importUrl = `${this.apiUrl}/pages/import`; + let response; + try { + // Call buildForm() ONCE per attempt and reuse the instance for both + // getHeaders() and the body so the Content-Type boundary matches the body. + const form = buildForm(); + // Read the Authorization header from this.client's defaults (set by + // login(), only ever deleted — never set to null) instead of building + // `Bearer ${this.token}`: a concurrent JSON 401 can null this.token + // mid-flight, which would otherwise produce a literal "Bearer null". + // ensureAuthenticated() above guarantees login() ran, so the default + // header exists here. + response = await axios.post(importUrl, form, { + headers: { + ...form.getHeaders(), + Authorization: this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } + catch (error) { + // On an expired-token auth error, re-login and retry exactly once with a + // freshly-rebuilt FormData (the previous one was already consumed). + if (axios.isAxiosError(error) && + (error.response?.status === 401 || error.response?.status === 403)) { + await this.login(); + const form2 = buildForm(); + response = await axios.post(importUrl, form2, { + headers: { + ...form2.getHeaders(), + Authorization: this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } + else { + throw error; + } + } + const newPageId = (response.data?.data ?? response.data).id; + // 2. Move to parent if needed + if (parentPageId) { + await this.movePage(newPageId, parentPageId); + } + // 3. Restore the exact title (import mangles spaces into underscores) + if (title) { + await this.client.post("/pages/update", { pageId: newPageId, title }); + } + return this.getPage(newPageId); + } + /** + * Update a page's content from markdown and optionally its title. + * NOTE: full re-import — block ids regenerate. For surgical changes + * use editPageText / updatePageJson instead. + */ + async updatePage(pageId, content, title) { + await this.ensureAuthenticated(); + if (title) { + await this.client.post("/pages/update", { pageId, title }); + } + let collabToken = ""; + try { + collabToken = await this.getCollabTokenWithReauth(); + await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl); + } + catch (error) { + // Verbose diagnostics (incl. anything that could expose a token prefix) + // are gated behind DEBUG; the thrown Error below carries no token data. + if (process.env.DEBUG) { + console.error("Failed to update page content via realtime collaboration:", error); + const tokenPreview = collabToken + ? collabToken.substring(0, 15) + "..." + : "null"; + console.error(`Collab token preview: ${tokenPreview}`); + } + throw new Error(`Failed to update page content: ${error.message}`); + } + return { + success: true, + modified: true, + message: "Page updated successfully.", + pageId: pageId, + }; + } + /** + * Validate a URL string against a scheme allowlist for a given context. + * + * The markdown link path enforces safe schemes via TipTap, but the raw + * JSON path (updatePageJson) bypasses that — so this is the sanitization + * choke point for ProseMirror JSON written directly by the caller. + * + * - "link": reject javascript:, vbscript:, data: (any scheme that can + * execute or smuggle script when the href is clicked). + * - "src": allow only http(s):, mailto:, /api/files paths, or a + * scheme-less relative/absolute path; reject + * javascript:/vbscript:/data:/file:. + */ + isSafeUrl(url, context) { + if (typeof url !== "string") + return false; + const trimmed = url.trim(); + if (trimmed === "") + return true; // empty href/src is harmless + // Extract a leading "scheme:" if present. A scheme must start with a + // letter and contain only letters/digits/+/-/. before the colon. Strip + // whitespace and ASCII control chars first so a tab/newline embedded in + // the scheme cannot smuggle a dangerous scheme past the check. + const cleaned = trimmed.replace(/[\s\x00-\x1f]+/g, ""); + const schemeMatch = /^([a-zA-Z][a-zA-Z0-9+.-]*):/.exec(cleaned); + const scheme = schemeMatch ? schemeMatch[1].toLowerCase() : null; + const dangerous = new Set(["javascript", "vbscript", "data", "file"]); + if (context === "link") { + if (scheme === null) + return true; // relative/anchor link is fine + // For links, data: is also blocked (can carry script payloads). + return !new Set(["javascript", "vbscript", "data"]).has(scheme); + } + // context === "src" + if (scheme === null) + return true; // relative/absolute path (incl. /api/files) + if (dangerous.has(scheme)) + return false; + return scheme === "http" || scheme === "https" || scheme === "mailto"; + } + /** + * Recursively walk a ProseMirror doc and reject any unsafe URL on a link + * mark href or on a media node's src/url. Media nodes covered: image, + * attachment, video, plus embed (rendered as an iframe), youtube, drawio + * and excalidraw — all of which carry a user-controlled URL that Docmost + * renders. Throws a clear error on the first violation. A max-depth guard + * turns an over-deep document into a clean error instead of a RangeError + * stack overflow. + */ + validateDocUrls(node, depth = 0) { + const MAX_DEPTH = 200; + if (depth > MAX_DEPTH) { + throw new Error(`document nesting exceeds the maximum depth of ${MAX_DEPTH}`); + } + if (!node || typeof node !== "object") + return; + // Link marks on text nodes: validate the href. + if (Array.isArray(node.marks)) { + for (const mark of node.marks) { + if (mark && mark.type === "link" && mark.attrs) { + if (!this.isSafeUrl(mark.attrs.href, "link")) { + throw new Error(`unsafe link href rejected: "${mark.attrs.href}"`); + } + } + } + } + // Media nodes: validate src/url against the stricter src allowlist. + // embed renders as an iframe (highest risk); youtube/drawio/excalidraw + // likewise carry a user-controlled URL Docmost renders, so they get the + // same scheme check as image/attachment/video. + if (node.type === "image" || + node.type === "attachment" || + node.type === "video" || + node.type === "embed" || + node.type === "youtube" || + node.type === "drawio" || + node.type === "excalidraw" || + node.type === "audio" || + node.type === "pdf") { + const attrs = node.attrs || {}; + for (const key of ["src", "url"]) { + if (attrs[key] != null && !this.isSafeUrl(attrs[key], "src")) { + throw new Error(`unsafe ${node.type} ${key} rejected: "${attrs[key]}"`); + } + } + } + if (Array.isArray(node.content)) { + for (const child of node.content) { + this.validateDocUrls(child, depth + 1); + } + } + } + /** + * Recursively validate the STRUCTURE of a ProseMirror node (reuses the + * recursion shape of validateDocUrls). Every node must be an object with a + * string `type`; when present, `content` must be an array, `marks` must be + * an array of objects each with a string `type`, and a text node's `text` + * must be a string. Throws a clear "invalid ProseMirror document" error on + * the first violation. A max-depth guard turns an over-deep document into a + * clean error instead of a RangeError stack overflow. + */ + validateDocStructure(node, depth = 0) { + const MAX_DEPTH = 200; + if (depth > MAX_DEPTH) { + throw new Error(`invalid ProseMirror document: nesting exceeds the maximum depth of ${MAX_DEPTH}`); + } + if (!node || typeof node !== "object" || typeof node.type !== "string") { + throw new Error("invalid ProseMirror document: every node must be an object with a string `type`"); + } + if ("text" in node && node.type === "text" && typeof node.text !== "string") { + throw new Error("invalid ProseMirror document: a text node must have a string `text`"); + } + if (node.marks !== undefined) { + if (!Array.isArray(node.marks)) { + throw new Error("invalid ProseMirror document: `marks` must be an array"); + } + for (const mark of node.marks) { + if (!mark || typeof mark !== "object" || typeof mark.type !== "string") { + throw new Error("invalid ProseMirror document: every mark must be an object with a string `type`"); + } + } + } + if (node.content !== undefined) { + if (!Array.isArray(node.content)) { + throw new Error("invalid ProseMirror document: `content` must be an array when present"); + } + for (const child of node.content) { + this.validateDocStructure(child, depth + 1); + } + } + } + /** + * Replace page content with a raw ProseMirror JSON document (lossless) and/or + * update its title. Both `doc` and `title` are optional, but at least one must + * be supplied: + * - `doc` provided -> validate + full-overwrite the body (and update the + * title too when `title` is also given). + * - `doc` omitted, `title` given -> title-only update; the body is NOT + * touched/resent (no collab write happens). + * - neither given -> throws (nothing to update). + */ + async updatePageJson(pageId, doc, title) { + await this.ensureAuthenticated(); + // Title-only / no-op handling: when no document is supplied, do NOT write + // the body. Update the title if one was given; otherwise there is nothing + // to do, so fail loudly rather than silently no-op. + if (doc == null) { + if (!title) { + throw new Error("update_page_json: nothing to update (provide content and/or title)"); + } + await this.client.post("/pages/update", { pageId, title }); + return { + success: true, + modified: true, + message: "Page title updated (content left unchanged).", + pageId, + }; + } + // Validate the document shape before a full overwrite: a malformed doc + // would otherwise silently corrupt the page (full-overwrite is the + // documented behaviour; no optimistic-concurrency is applied here). + if (typeof doc !== "object" || + doc.type !== "doc" || + !Array.isArray(doc.content)) { + throw new Error('content must be a ProseMirror document ({"type":"doc","content":[...]}) ' + + "where content is an array of nodes each having a string `type`"); + } + // Recurse the WHOLE document so a malformed nested node (e.g. a node with a + // non-string type, a non-array content/marks, or a text node missing its + // string text) is rejected up front rather than silently corrupting the + // page on overwrite. + this.validateDocStructure(doc); + // Sanitize URLs before writing. This closes the JSON-path bypass: unlike + // the markdown link path (which TipTap sanitizes), raw JSON could otherwise + // inject javascript:/data: link hrefs or media srcs straight into the doc. + this.validateDocUrls(doc); + if (title) { + await this.client.post("/pages/update", { pageId, title }); + } + const collabToken = await this.getCollabTokenWithReauth(); + await replacePageContent(pageId, doc, collabToken, this.apiUrl); + return { + success: true, + modified: true, + message: "Page content replaced from ProseMirror JSON.", + pageId, + }; + } + /** + * Export a page to a single self-contained Docmost-flavoured markdown file: + * meta block + body (with inline comment anchors + diagrams) + comment + * threads. Lossless round-trip target; see importPageMarkdown for the inverse. + */ + async exportPageMarkdown(pageId) { + await this.ensureAuthenticated(); + const page = await this.getPageRaw(pageId); + const body = page.content + ? convertProseMirrorToMarkdown(page.content) + : ""; + let comments = []; + try { + comments = await this.listComments(pageId); + } + catch (e) { + // A comments fetch failure must not lose the body; export with [] and let + // the caller see the (empty) comments block. Log under DEBUG only. + if (process.env.DEBUG) + console.error("export: listComments failed", e); + } + const meta = { + version: 1, + pageId: page.id, + slugId: page.slugId, + title: page.title, + spaceId: page.spaceId, + parentPageId: page.parentPageId ?? null, + }; + return serializeDocmostMarkdown(meta, body, comments); + } + /** + * Import a self-contained Docmost markdown file back into a page. Parses out + * the meta + comments metadata blocks, converts the body to ProseMirror + * (restoring comment marks + diagrams from their inline HTML), and replaces + * the page content. Comment THREAD records are NOT written to the server in + * this version — they are preserved in the file and the inline marks are + * re-applied so the highlights survive; managing comment records stays with + * the comment tools/UI. + */ + async importPageMarkdown(pageId, fullMarkdown) { + await this.ensureAuthenticated(); + const { meta, body, comments } = parseDocmostMarkdown(fullMarkdown); + const doc = await markdownToProseMirror(body); + const collabToken = await this.getCollabTokenWithReauth(); + await replacePageContent(pageId, doc, collabToken, this.apiUrl); + // Collect distinct comment ids that actually became comment marks in the doc. + const collectCommentIds = (node, acc) => { + if (!node || typeof node !== "object") + return acc; + if (Array.isArray(node.marks)) { + for (const mk of node.marks) { + if (mk && mk.type === "comment" && mk.attrs?.commentId) { + acc.add(mk.attrs.commentId); + } + } + } + if (Array.isArray(node.content)) { + for (const child of node.content) + collectCommentIds(child, acc); + } + return acc; + }; + // Count reflects the comment marks present in the written document, so an id + // that only appears as inert text (e.g. inside a fenced code block) is not + // counted because it never becomes a comment mark. + const anchoredIds = collectCommentIds(doc, new Set()); + const result = { + success: true, + pageId, + anchoredCommentCount: anchoredIds.size, + commentsInFile: Array.isArray(comments) ? comments.length : 0, + }; + // Warn (non-fatal) if the file was exported from a DIFFERENT page. + if (meta?.pageId && meta.pageId !== pageId) { + result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; + } + return result; + } + /** + * Rename a page (change its title only) without touching or resending its + * content. The slug is derived from the page record, not the body, so it is + * left intact too. + */ + async renamePage(pageId, title) { + await this.ensureAuthenticated(); + await this.client.post("/pages/update", { pageId, title }); + return { success: true, pageId, title }; + } + /** + * Copy the WHOLE content of one page onto another, entirely server-side: the + * source's ProseMirror document is read and written verbatim onto the target + * via the live collab path, so the document never passes through the model. + * + * Only the target's BODY is replaced — its title and slug live on the page + * record (not in the content), so they are untouched. The source page is not + * modified at all. + */ + async copyPageContent(sourcePageId, targetPageId) { + await this.ensureAuthenticated(); + // A self-copy would be a no-op overwrite; reject it explicitly so a caller + // mistake surfaces as a clear error rather than a silent round-trip. + if (sourcePageId === targetPageId) { + throw new Error("copy_page_content: sourcePageId and targetPageId are the same page (no-op copy)"); + } + const source = await this.getPageRaw(sourcePageId); + const content = source?.content; + if (!content || + typeof content !== "object" || + content.type !== "doc" || + !Array.isArray(content.content)) { + throw new Error(`copy_page_content: source page ${sourcePageId} has no usable ProseMirror content to copy`); + } + // Defense-in-depth: run the same URL-scheme sanitizer the JSON write path + // uses, so copying never lands a javascript:/data: href/src on the target + // (parity with updatePageJson; harmless for already-stored source content). + this.validateDocUrls(content); + const collabToken = await this.getCollabTokenWithReauth(); + await replacePageContent(targetPageId, content, collabToken, this.apiUrl); + return { + success: true, + sourcePageId, + targetPageId, + copiedNodes: content.content.length, + }; + } + /** + * Surgical text edits: find/replace inside text nodes of the live + * document. Preserves all block ids, marks, callouts and tables. + */ + async editPageText(pageId, edits) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + // Apply the edits against the LIVE synced document, not the debounced REST + // snapshot, so concurrent human edits/comments are preserved. applyTextEdits + // throws descriptive errors on zero/multiple matches — let them propagate. + let results; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const r = applyTextEdits(liveDoc, edits); + results = r.results; + return r.doc; + }); + return { + success: true, + pageId, + edits: results, + message: "Text edits applied (node ids and formatting preserved).", + }; + } + /** + * Replace EVERY node whose attrs.id === nodeId (recursively, including nodes + * nested in callouts/tables) with the supplied node. Operates on the LIVE + * collab document so comments and concurrent edits are preserved. + * + * The replacement node's block id is preserved: if node.attrs is missing it + * is created, and if node.attrs.id is missing it is set to nodeId so the + * replacement keeps the same id it replaced. Throws if no node matches. + */ + async patchNode(pageId, nodeId, node) { + await this.ensureAuthenticated(); + if (!node || typeof node !== "object" || typeof node.type !== "string") { + throw new Error("patch_node: `node` must be an object with a string `type`"); + } + // Preserve the block id WITHOUT mutating the caller's object: build a local + // copy whose attrs.id === nodeId (so the swapped-in node keeps the id of the + // node it replaces). + const target = { + ...node, + attrs: { + ...(node.attrs && typeof node.attrs === "object" ? node.attrs : {}), + }, + }; + if (target.attrs.id == null) { + target.attrs.id = nodeId; + } + const collabToken = await this.getCollabTokenWithReauth(); + // Track the replacement count in an outer var, reset per-transform, so a + // collab retry recomputes it cleanly (mirrors replaceImage's pattern). + let replaced = 0; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + replaced = 0; + const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target); + replaced = r; + if (replaced === 0) + return null; // no match -> skip the write entirely + return nd; + }); + if (replaced === 0) { + throw new Error(`patch_node: no node with id "${nodeId}" found on page ${pageId}`); + } + return { success: true, replaced, nodeId }; + } + /** + * Insert a node relative to an anchor (or append it at the top level). + * Operates on the LIVE collab document so comments and concurrent edits are + * preserved. + * + * opts.position: + * - "append": push the node at the end of the top-level content. + * - "before"/"after": insert the node as a sibling of the anchor, just + * before/after it. Exactly one of anchorNodeId / anchorText must be given; + * anchorNodeId locates a node anywhere by attrs.id, anchorText matches the + * first top-level block whose plain text includes it. + * + * Throws if the anchor cannot be found. + */ + async insertNode(pageId, node, opts) { + await this.ensureAuthenticated(); + if (!node || typeof node !== "object" || typeof node.type !== "string") { + throw new Error("insert_node: `node` must be an object with a string `type`"); + } + if (!opts || + (opts.position !== "before" && + opts.position !== "after" && + opts.position !== "append")) { + throw new Error('insert_node: `position` must be one of "before", "after", "append"'); + } + if (opts.position === "before" || opts.position === "after") { + // before/after require EXACTLY ONE anchor (an id or a text fragment). + const hasId = typeof opts.anchorNodeId === "string" && opts.anchorNodeId.length > 0; + const hasText = typeof opts.anchorText === "string" && opts.anchorText.length > 0; + if (hasId === hasText) { + throw new Error(`insert_node: position "${opts.position}" requires exactly one of anchorNodeId or anchorText`); + } + } + const collabToken = await this.getCollabTokenWithReauth(); + // Track insertion in an outer var, reset per-transform, so a collab retry + // recomputes it cleanly (mirrors replaceImage's pattern). + let inserted = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + inserted = false; + const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts); + inserted = ins; + if (!inserted) + return null; // anchor not found -> skip the write entirely + return nd; + }); + if (!inserted) { + const anchorDesc = opts.anchorNodeId + ? `anchorNodeId "${opts.anchorNodeId}"` + : `anchorText "${opts.anchorText}"`; + throw new Error(`insert_node: anchor not found (${anchorDesc}) on page ${pageId}`); + } + return { success: true, inserted: true, position: opts.position }; + } + /** + * Remove EVERY node whose attrs.id === nodeId (recursively, including nodes + * nested in callouts/tables) from its parent content array. Operates on the + * LIVE collab document so comments and concurrent edits are preserved. + * Throws if no node matches. + */ + async deleteNode(pageId, nodeId) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + // Track the deletion count in an outer var, reset per-transform, so a + // collab retry recomputes it cleanly (mirrors replaceImage's pattern). + let deleted = 0; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + deleted = 0; + const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId); + deleted = d; + if (deleted === 0) + return null; // no match -> skip the write entirely + return nd; + }); + if (deleted === 0) { + throw new Error(`delete_node: no node with id "${nodeId}" found on page ${pageId}`); + } + return { success: true, deleted, nodeId }; + } + /** Build the public share URL for a page. */ + shareUrl(shareKey, slugId) { + return `${this.appUrl}/share/${shareKey}/p/${slugId}`; + } + /** Share a page publicly (idempotent) and return the public URL. */ + async sharePage(pageId, searchIndexing = true) { + await this.ensureAuthenticated(); + const response = await this.client.post("/shares/create", { + pageId, + includeSubPages: false, + searchIndexing, + }); + const share = response.data?.data ?? response.data; + const slugId = share.page?.slugId || (await this.getPageRaw(pageId)).slugId; + return { + shareId: share.id, + key: share.key, + pageId: share.pageId, + publicUrl: this.shareUrl(share.key, slugId), + searchIndexing: share.searchIndexing, + }; + } + /** List all public shares in the workspace with their URLs. */ + async listShares() { + const shares = await this.paginateAll("/shares", {}); + return shares.map((s) => ({ + shareId: s.id, + key: s.key, + pageId: s.pageId, + pageTitle: s.page?.title, + publicUrl: s.page?.slugId ? this.shareUrl(s.key, s.page.slugId) : null, + searchIndexing: s.searchIndexing, + createdAt: s.createdAt, + })); + } + /** Remove the public share of a page. */ + async unsharePage(pageId) { + await this.ensureAuthenticated(); + const shares = await this.listShares(); + const share = shares.find((s) => s.pageId === pageId); + if (!share) { + throw new Error(`Page ${pageId} is not shared.`); + } + await this.client.post("/shares/delete", { shareId: share.shareId }); + return { success: true, removedShareId: share.shareId, pageId }; + } + async search(query, spaceId, limit) { + await this.ensureAuthenticated(); + const payload = { query, spaceId }; + // Clamp an optional caller-supplied limit into a sane 1..100 range before + // forwarding it to the server; omit it entirely when not provided so the + // server applies its own default. + if (limit !== undefined) { + payload.limit = Math.max(1, Math.min(100, limit)); + } + const response = await this.client.post("/search", payload); + // Normalize both response shapes: bare array and paginated { items: [...] } + const data = response.data?.data; + const items = Array.isArray(data) ? data : data?.items || []; + const filteredItems = items.map((item) => filterSearchResult(item)); + return { + items: filteredItems, + success: response.data?.success || false, + }; + } + async movePage(pageId, parentPageId, position) { + await this.ensureAuthenticated(); + // Docmost requires position >= 5 chars. + const validPosition = position || "a00000"; + return this.client + .post("/pages/move", { + pageId, + parentPageId, + position: validPosition, + }) + .then((res) => res.data); + } + async deletePage(pageId) { + await this.ensureAuthenticated(); + return this.client + .post("/pages/delete", { pageId }) + .then((res) => res.data); + } + // --- Comment methods (ported from upstream PR #3 by Max Nikitin) --- + /** + * Normalize a comment's `content` into a ProseMirror doc object before + * markdown conversion. createComment/updateComment send content as a + * JSON.stringify(...) STRING, and the server stores it as-is, so on read it + * comes back as a string. convertProseMirrorToMarkdown returns "" for a + * string, so parse it first (guarded — fall back to the raw value on any + * parse failure so a non-JSON legacy value is still handled gracefully). + */ + parseCommentContent(content) { + if (typeof content !== "string") + return content; + try { + return JSON.parse(content); + } + catch { + return content; + } + } + /** List all comments on a page (cursor-paginated), content as markdown. */ + async listComments(pageId) { + await this.ensureAuthenticated(); + let allComments = []; + let cursor = null; + do { + const payload = { pageId, limit: 100 }; + if (cursor) + payload.cursor = cursor; + const response = await this.client.post("/comments", payload); + const data = response.data.data || response.data; + const items = data.items || []; + allComments = allComments.concat(items); + cursor = data.meta?.nextCursor || null; + } while (cursor); + return allComments.map((comment) => { + const markdown = comment.content + ? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content)) + : ""; + return filterComment(comment, markdown); + }); + } + async getComment(commentId) { + await this.ensureAuthenticated(); + const response = await this.client.post("/comments/info", { commentId }); + const comment = response.data.data || response.data; + const markdown = comment.content + ? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content)) + : ""; + return { + data: filterComment(comment, markdown), + success: true, + }; + } + /** Create a page-level or inline comment; content is markdown. */ + async createComment(pageId, content, type = "page", selection, parentCommentId) { + await this.ensureAuthenticated(); + // Convert through the full Docmost schema (consistent with page paths) + const jsonContent = await markdownToProseMirror(content); + const payload = { + pageId, + content: JSON.stringify(jsonContent), + type, + }; + if (selection) + payload.selection = selection; + if (parentCommentId) + payload.parentCommentId = parentCommentId; + const response = await this.client.post("/comments/create", payload); + const comment = response.data.data || response.data; + const markdown = comment.content + ? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content)) + : content; + const result = { + data: filterComment(comment, markdown), + success: true, + }; + // Anchor the comment in the document. The /comments/create API records the + // comment + its `selection` text, but it does NOT insert the comment MARK + // into the page content, so without this the inline comment has no + // highlight/anchor and is not clickable. Only top-level inline comments are + // anchored: replies (parentCommentId set) inherit their parent's anchor, + // and page-type comments have no text range. + if (type === "inline" && selection && !parentCommentId && comment?.id) { + const newCommentId = comment.id; + let anchored = false; + try { + const collabToken = await this.getCollabTokenWithReauth(); + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const doc = liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + // Find the FIRST text node containing the selection text, then + // split it into before / marked / after, copying the node's + // existing marks onto all three parts and adding the comment mark + // only to the middle part. Returns true once a match is wrapped. + const wrapInFirstMatch = (nodes, depth) => { + const MAX_DEPTH = 200; + if (depth > MAX_DEPTH || !Array.isArray(nodes)) + return false; + for (let i = 0; i < nodes.length; i++) { + const n = nodes[i]; + if (!n || typeof n !== "object") + continue; + if (n.type === "text" && + typeof n.text === "string" && + n.text.includes(selection)) { + const idx = n.text.indexOf(selection); + const before = n.text.slice(0, idx); + const middleText = selection; + const after = n.text.slice(idx + selection.length); + const baseMarks = Array.isArray(n.marks) ? n.marks : []; + // Drop any pre-existing comment mark from the marks applied to + // the middle fragment so it ends up with exactly one comment + // mark (the new one) rather than two. Other fragments and the + // base marks list are left untouched. + const middleBaseMarks = baseMarks.filter((m) => !(m && m.type === "comment")); + const commentMark = { + type: "comment", + // The comment mark schema declares both commentId and + // resolved; include resolved:false for completeness. + attrs: { commentId: newCommentId, resolved: false }, + }; + const parts = []; + if (before.length > 0) { + parts.push({ ...n, text: before, marks: [...baseMarks] }); + } + parts.push({ + ...n, + text: middleText, + marks: [...middleBaseMarks, commentMark], + }); + if (after.length > 0) { + parts.push({ ...n, text: after, marks: [...baseMarks] }); + } + nodes.splice(i, 1, ...parts); + return true; + } + if (Array.isArray(n.content)) { + if (wrapInFirstMatch(n.content, depth + 1)) + return true; + } + } + return false; + }; + if (Array.isArray(doc.content) && wrapInFirstMatch(doc.content, 0)) { + anchored = true; + return doc; + } + // Selection text not found: do NOT fail (the comment already + // exists). Abort the write so nothing changes. + return null; + }); + } + catch (e) { + // The comment record already exists; an anchoring failure must not turn + // a successful create into an error. Report anchored:false instead. + if (process.env.DEBUG) { + console.error("Failed to anchor inline comment mark:", e); + } + anchored = false; + } + result.anchored = anchored; + } + return result; + } + async updateComment(commentId, content) { + await this.ensureAuthenticated(); + const jsonContent = await markdownToProseMirror(content); + await this.client.post("/comments/update", { + commentId, + content: JSON.stringify(jsonContent), + }); + return { + success: true, + commentId, + message: "Comment updated successfully.", + }; + } + async deleteComment(commentId) { + await this.ensureAuthenticated(); + return this.client + .post("/comments/delete", { commentId }) + .then((res) => res.data); + } + /** + * Check for new comments across pages in a space (optionally scoped to a + * subtree): pages updated after `since` are scanned and their comments + * filtered by createdAt > since. + */ + async checkNewComments(spaceId, since, parentPageId) { + await this.ensureAuthenticated(); + const sinceDate = new Date(since); + // Reject an unparseable `since`: comparing against an Invalid Date silently + // yields zero new comments (every `>` against NaN is false), which would + // mask a malformed input as "nothing new" instead of erroring. + if (Number.isNaN(sinceDate.getTime())) { + throw new Error(`checkNewComments: invalid "since" date "${since}"; expected an ISO-8601 timestamp`); + } + // 1. Enumerate the FULL set of pages in scope by walking the sidebar-pages + // tree (a complete page index), NOT the bounded "/pages/recent" feed which + // caps at ~5000 recent items and silently misses comments on older pages. + // + // Subtree scope: when parentPageId is given, the scope is that page ITSELF + // plus every descendant (enumerateSpacePages walks its children). Otherwise + // the scope is the whole space (all roots and their descendants). + // + // NOTE: do NOT pre-filter by page.updatedAt — creating a comment does not + // bump it (verified on a live server), so such a filter silently misses + // comments on pages that were not otherwise edited. The complete tree walk + // already restricts the scope correctly, so no recent-feed allow-list is + // needed any more. + let pagesInScope; + if (parentPageId) { + const subtree = await this.enumerateSpacePages(spaceId, parentPageId); + // Include the parent page node itself alongside its descendants. Fetch it + // so its title/id are available even though it is not returned by its own + // children listing. + let parentNode = { id: parentPageId }; + try { + parentNode = await this.getPageRaw(parentPageId); + } + catch (e) { + // Fall back to a minimal node if the parent can't be fetched; its + // comments are still attempted below (the fetch there is non-fatal). + } + pagesInScope = [parentNode, ...subtree]; + } + else { + pagesInScope = await this.enumerateSpacePages(spaceId); + } + // 2. Fetch comments for each page, keep ones created after since + const results = []; + for (const page of pagesInScope) { + try { + const comments = await this.listComments(page.id); + const newComments = comments.filter((c) => new Date(c.createdAt) > sinceDate); + if (newComments.length > 0) { + results.push({ + pageId: page.id, + pageTitle: page.title, + comments: newComments, + }); + } + } + catch (e) { + // Skip pages with errors (e.g. deleted between calls) + } + } + const totalNewComments = results.reduce((sum, r) => sum + r.comments.length, 0); + // enumerateSpacePages caps traversal at 10000 nodes; flag when that cap was + // hit so the caller knows the scan may be incomplete (some pages skipped). + const truncated = pagesInScope.length >= 10000; + return { + since, + scope: parentPageId ? `subtree of ${parentPageId}` : `space ${spaceId}`, + checkedPages: pagesInScope.length, + pagesWithNewComments: results.length, + totalNewComments, + truncated, + comments: results, + }; + } + // --- Image upload / embedding --- + /** Map a file extension to a supported image MIME type (throws otherwise). */ + imageMimeFromPath(filePath) { + const ext = extname(filePath).toLowerCase(); + const map = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", + }; + const mime = map[ext]; + if (!mime) { + throw new Error(`unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`); + } + return mime; + } + /** Build a Docmost ProseMirror image node from an uploaded attachment. */ + buildImageNode(att, align, alt) { + // Clean file URL, matching Docmost's native behaviour. No cache-busting + // query: the server serves the bare URL correctly, and replacement creates + // a new attachment id (a new URL) which busts caches naturally. + const src = `/api/files/${att.id}/${att.fileName}`; + const node = { + type: "image", + attrs: { + src, + attachmentId: att.id, + // Default to null when the server omits fileSize so the attr is never + // undefined (undefined would be dropped on serialization / break the + // ProseMirror image schema which expects size present). + size: att.fileSize ?? null, + align: align || "center", + width: null, + }, + }; + if (alt) + node.attrs.alt = alt; + return node; + } + /** + * Upload a local image file as an attachment of a page and return the + * attachment metadata plus a ready-to-insert ProseMirror image node. + */ + async uploadImage(pageId, filePath) { + await this.ensureAuthenticated(); + // HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at + // the server host's local filesystem, so it must be validated BEFORE any + // bytes are read. Without these guards a caller could (a) read an arbitrary + // file via path traversal, (b) follow a symlink to a sensitive target, or + // (c) exhaust memory by reading a huge file. Order matters: validate the + // extension, then stat (regular-file + size cap), and only then read. + // (a) Extension allowlist first — cheap, and rejects non-images up front. + const mime = this.imageMimeFromPath(filePath); + // (b) Stat the path: it must be a regular file (rejects directories, FIFOs, + // devices, sockets) and stay under the size cap. statSync follows symlinks, + // so a symlink is only accepted when its TARGET is a regular file within + // the cap — the intended behaviour for a local image path. + const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB + let stat; + try { + stat = statSync(filePath); + } + catch (e) { + throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`); + } + if (!stat.isFile()) { + throw new Error(`Not a regular file: "${filePath}"`); + } + if (stat.size > MAX_IMAGE_BYTES) { + throw new Error(`Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`); + } + // (c) Only now read the bytes. + let fileBuffer; + try { + fileBuffer = readFileSync(filePath); + } + catch (e) { + throw new Error(`Cannot read image file at "${filePath}": ${e.message}`); + } + // Build a FRESH FormData for every send attempt. A FormData body is a + // single-use stream that is CONSUMED on the first send, so it cannot be + // replayed by this.client's response interceptor (replaying a consumed + // stream fails with 'socket hang up'). Multipart re-auth is therefore done + // here with bare axios and an explicit one-shot 401/403 retry that rebuilds + // the body. Field order matters: text fields must precede the file part so + // the server reads them; the server always generates a fresh attachment id. + const buildForm = () => { + const form = new FormData(); + form.append("pageId", pageId); + form.append("file", fileBuffer, { + filename: basename(filePath), + contentType: mime, + }); + return form; + }; + const url = `${this.apiUrl}/files/upload`; + let response; + try { + // Call buildForm() ONCE per attempt and reuse the instance for both + // getHeaders() and the body so the Content-Type boundary matches the body. + const form = buildForm(); + // Read the Authorization header from this.client's defaults (set by + // login(), only ever deleted — never set to null) instead of building + // `Bearer ${this.token}`: a concurrent JSON 401 can null this.token + // mid-flight, which would otherwise produce a literal "Bearer null". + // ensureAuthenticated() above guarantees login() ran, so the default + // header exists here. A 60s timeout keeps a hung upload from wedging the + // per-page lock (replaceImage holds withPageLock across this call). + response = await axios.post(url, form, { + headers: { + ...form.getHeaders(), + Authorization: this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } + catch (error) { + // On an expired-token auth error, re-login and retry exactly once with a + // freshly-rebuilt FormData (the previous one was already consumed). + if (axios.isAxiosError(error) && + (error.response?.status === 401 || error.response?.status === 403)) { + await this.login(); + const form2 = buildForm(); + response = await axios.post(url, form2, { + headers: { + ...form2.getHeaders(), + Authorization: this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } + else if (axios.isAxiosError(error)) { + // Keep the thrown message free of the raw response body (it may echo + // request data or server internals); surface only status/statusText. + // The full body is logged under DEBUG for diagnostics. + if (process.env.DEBUG) { + console.error("Image upload failed; response body:", JSON.stringify(error.response?.data)); + } + throw new Error(`Image upload failed: ${error.response?.status} ${error.response?.statusText}`); + } + else { + throw error; + } + } + // The attachment may arrive bare or wrapped in a { data } envelope. + const att = response.data?.data ?? response.data; + if (!att?.id || !att?.fileName) { + throw new Error("Unexpected /files/upload response: " + JSON.stringify(response.data)); + } + // Some Docmost versions omit fileSize from the upload response. Fall back + // to the local stat size (the bytes we just uploaded) so callers never get + // an undefined size. + const localSize = stat.size; + const resolvedSize = att.fileSize ?? localSize; + return { + attachmentId: att.id, + fileName: att.fileName, + fileSize: resolvedSize, + src: `/api/files/${att.id}/${att.fileName}`, + imageNode: this.buildImageNode({ ...att, fileSize: resolvedSize }), + }; + } + /** + * Upload a local image and insert it into a page in one step. + * By default the image is appended at the end. With replaceText, the first + * top-level block whose text contains the string is replaced; with afterText, + * the image is inserted right after the first matching block. All other + * block ids are preserved (only one top-level block is added or swapped). + */ + async insertImage(pageId, filePath, opts = {}) { + const up = await this.uploadImage(pageId, filePath); + // Reuse the node from uploadImage (clean /api/files// src), then + // apply align/alt onto a shallow attrs copy. + const node = { ...up.imageNode, attrs: { ...up.imageNode.attrs } }; + if (opts.align) + node.attrs.align = opts.align; + if (opts.alt) + node.attrs.alt = opts.alt; + const collabToken = await this.getCollabTokenWithReauth(); + // Recursively collect the plain text of a top-level block. + const blockText = (n) => { + let out = ""; + if (n.type === "text") + out += n.text || ""; + for (const child of n.content || []) + out += blockText(child); + return out; + }; + // Insert into the LIVE synced document, not the debounced REST snapshot, so + // concurrent edits/comments/images are preserved and parallel insert_image + // calls (serialized by the per-page lock) each see the previous insertion. + let placement; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const doc = liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (!Array.isArray(doc.content)) + doc.content = []; + if (opts.replaceText) { + // Ambiguity guard (mirrors editPageText): count matching top-level + // blocks first, so a non-unique fragment cannot silently replace the + // wrong block (e.g. text that also appears inside a callout/table). + const matches = doc.content.filter((b) => blockText(b).includes(opts.replaceText)); + if (matches.length === 0) { + throw new Error(`replaceText not found: "${opts.replaceText}"`); + } + if (matches.length > 1) { + throw new Error(`replaceText "${opts.replaceText}" matches ${matches.length} blocks; use a longer unique fragment`); + } + const idx = doc.content.findIndex((b) => blockText(b).includes(opts.replaceText)); + // Data-loss guard: replaceText swaps the WHOLE top-level block, so if + // the fragment only appears nested inside a container (table, callout, + // list, blockquote) the entire structure would be destroyed. Refuse + // when the matched block is a container rather than a leaf + // paragraph/heading and point the caller at a safer tool. + const CONTAINER_TYPES = new Set([ + "table", + "callout", + "bulletList", + "orderedList", + "taskList", + "blockquote", + ]); + const matchedBlock = doc.content[idx]; + if (matchedBlock && CONTAINER_TYPES.has(matchedBlock.type)) { + throw new Error(`replaceText matched a ${matchedBlock.type} container block; replacing it would destroy the whole structure. ` + + `Use afterText to insert near it, or update_page_json for surgical edits.`); + } + doc.content.splice(idx, 1, node); + placement = "replaced"; + } + else if (opts.afterText) { + // Ambiguity guard (mirrors editPageText): refuse a non-unique fragment. + const matches = doc.content.filter((b) => blockText(b).includes(opts.afterText)); + if (matches.length === 0) { + throw new Error(`afterText not found: "${opts.afterText}"`); + } + if (matches.length > 1) { + throw new Error(`afterText "${opts.afterText}" matches ${matches.length} blocks; use a longer unique fragment`); + } + const idx = doc.content.findIndex((b) => blockText(b).includes(opts.afterText)); + doc.content.splice(idx + 1, 0, node); + placement = "after"; + } + else { + doc.content.push(node); + placement = "appended"; + } + return doc; + }); + return { + success: true, + pageId, + attachmentId: up.attachmentId, + src: up.src, + placement, + }; + } + /** + * Replace an existing image in a page with a new file. Uploads the new file as + * a brand-new attachment, which yields a fresh clean URL that both renders + * correctly and busts browser caches (the URL changed). Finds every image node + * whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested + * in callouts/tables) and repoints its src/attachmentId/size, preserving + * comments, alignment and alt. Operates on the live collab document so comments + * and concurrent edits are preserved. Throws if no matching image is found. + * + * The OLD attachment is left in place as an unreferenced orphan: Docmost + * exposes NO HTTP API to delete a single content attachment (verified against + * the attachment controller/service and by probing the live API — deletion + * happens only by cascade when the page, space or user is removed). This is the + * same outcome as Docmost's own editor when an image is removed/replaced. + * In-place byte overwrite is deliberately NOT used because some Docmost + * versions corrupt the attachment (HTTP 500) when its bytes are overwritten. + */ + async replaceImage(pageId, oldAttachmentId, filePath, opts = {}) { + const collabToken = await this.getCollabTokenWithReauth(); + // Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write). + // Previously the scan and the write were two separate mutatePageContent + // calls, each acquiring + releasing the lock, with the upload happening in + // the UNLOCKED gap between them. A concurrent op could interleave there: it + // could remove the target image so the write pass matches nothing, leaving + // the freshly-uploaded attachment as an un-deletable orphan (Docmost has no + // API to delete a single content attachment). Acquiring the lock once and + // using the non-locking collab helper inside (the per-page mutex is NOT + // reentrant, so the self-locking mutatePageContent would deadlock here) + // closes that TOCTOU window. uploadImage hits /files/upload over plain HTTP + // and does not touch the page lock, so it is safe to call while held. + return withPageLock(pageId, async () => { + // STEP 1: read-only live check. Scan the live document for any image node + // matching oldAttachmentId BEFORE uploading anything, so a wrong/stale id + // throws without ever creating an orphan attachment. + let matchFound = false; + const scan = (nodes) => { + for (const node of nodes) { + if (!node) + continue; + if (node.type === "image" && + node.attrs && + node.attrs.attachmentId === oldAttachmentId) { + matchFound = true; + } + if (Array.isArray(node.content)) + scan(node.content); + } + }; + await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { + matchFound = false; // reset per-transform (collab may retry the read). + const doc = liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (Array.isArray(doc.content)) + scan(doc.content); + return null; // read-only: never write on the check pass. + }); + if (!matchFound) { + throw new Error(`replace_image: no image with attachmentId "${oldAttachmentId}" found on page ${pageId}`); + } + // STEP 2: a match exists — upload the new file as a FRESH attachment (new + // id, new clean URL) and repoint every matching node in a second pass. + // Still inside the SAME lock, so no other op can have changed the page + // since the scan. + const up = await this.uploadImage(pageId, filePath); + let replaced = 0; + // Swap the source of one image node, preserving align/alt/title/geometry. + const repoint = (node) => { + node.attrs = { + ...node.attrs, + src: up.src, + attachmentId: up.attachmentId, + // Default to null when fileSize is unknown so the attr is never + // undefined. + size: up.fileSize ?? null, + }; + if (opts.align) + node.attrs.align = opts.align; + if (opts.alt !== undefined) + node.attrs.alt = opts.alt; + replaced++; + }; + // Recursively repoint every image node (incl. ones nested in callouts/tables). + const walk = (nodes) => { + for (const node of nodes) { + if (!node) + continue; + if (node.type === "image" && + node.attrs && + node.attrs.attachmentId === oldAttachmentId) { + repoint(node); + } + if (Array.isArray(node.content)) + walk(node.content); + } + }; + await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { + // Reset per-transform so collab retries recompute cleanly (no double-count). + replaced = 0; + const doc = liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (!Array.isArray(doc.content)) + doc.content = []; + walk(doc.content); + if (replaced === 0) + return null; // no match -> skip the write entirely + return doc; + }); + if (replaced === 0) { + // The pass-1 SCAN found the target (matchFound was true) and we already + // uploaded the new attachment, but pass-2 matched nothing — a concurrent + // editor must have removed the node between the two passes. Do NOT throw + // here (that would leak the just-uploaded attachment AND report failure); + // instead report success with the upload flagged as an unreferenced + // orphan so the caller knows. (The early throw above still covers the + // case where pass-1 finds nothing, before any upload happens.) + return { + success: true, + replaced: 0, + pageId, + oldAttachmentId, + newAttachmentId: up.attachmentId, + src: up.src, + orphanedAttachmentId: up.attachmentId, + warning: "target image was removed concurrently; uploaded attachment is unreferenced", + }; + } + return { + success: true, + pageId, + replaced, + oldAttachmentId, + newAttachmentId: up.attachmentId, + src: up.src, + }; + }); + } + // --- Page history / diff / transform --- + /** + * List the saved versions (history snapshots) of a page, newest first. + * Docmost auto-snapshots on every save. Returns one cursor-paginated page of + * results: `{ items, nextCursor }`. The history record's id field is `id`. + */ + async listPageHistory(pageId, cursor) { + await this.ensureAuthenticated(); + const payload = { pageId }; + if (cursor) + payload.cursor = cursor; + const response = await this.client.post("/pages/history", payload); + const data = response.data?.data ?? response.data; + return { + items: data?.items ?? [], + nextCursor: data?.meta?.nextCursor ?? null, + }; + } + /** + * Fetch a single page-history version including its lossless ProseMirror + * `content`. The version also carries pageId/title/createdAt. + */ + async getPageHistory(historyId) { + await this.ensureAuthenticated(); + const response = await this.client.post("/pages/history/info", { + historyId, + }); + return response.data?.data ?? response.data; + } + /** + * "Restore" a version: Docmost has NO restore endpoint, so we take the + * version's `content` and write it as the page's current content via the live + * collab path (which itself creates a new history snapshot). Returns the + * affected pageId and the source historyId. + */ + async restorePageVersion(historyId) { + await this.ensureAuthenticated(); + const version = await this.getPageHistory(historyId); + if (!version || + !version.pageId || + !version.content || + typeof version.content !== "object") { + throw new Error(`restore_page_version: history ${historyId} has no usable content`); + } + // Defense-in-depth: sanitize URLs in the restored content (parity with the + // JSON write path) before writing it back. + this.validateDocUrls(version.content); + const collabToken = await this.getCollabTokenWithReauth(); + await mutatePageContent(version.pageId, collabToken, this.apiUrl, () => version.content); + return { pageId: version.pageId, restoredFrom: historyId }; + } + /** + * Diff two versions of a page and return a Docmost-equivalent change set. + * `from`/`to` each resolve to a ProseMirror doc: + * - null / undefined / "current" -> the page's CURRENT content; + * - any other string -> that historyId's content. + * Returns the diff plus the resolved version metadata for each side. + */ + async diffPageVersions(pageId, from, to) { + await this.ensureAuthenticated(); + const isCurrent = (v) => v == null || v === "" || v === "current"; + const resolveSide = async (v) => { + if (isCurrent(v)) { + const raw = await this.getPageRaw(pageId); + return { + doc: raw.content || { type: "doc", content: [] }, + meta: { + kind: "current", + pageId, + title: raw.title, + updatedAt: raw.updatedAt, + }, + }; + } + const version = await this.getPageHistory(v); + return { + doc: version.content || { type: "doc", content: [] }, + meta: { + kind: "history", + historyId: version.id, + pageId: version.pageId, + title: version.title, + createdAt: version.createdAt, + }, + }; + }; + const fromSide = await resolveSide(from); + const toSide = await resolveSide(to); + const diff = diffDocs(fromSide.doc, toSide.doc); + return { from: fromSide.meta, to: toSide.meta, diff }; + } + /** + * Edit a page by running an arbitrary user-supplied JS transform against the + * live document, with a diff preview + page-history safety net. + * + * The transform string is evaluated as `(doc, ctx) => doc` inside a node:vm + * sandbox: it gets ONLY `{ doc, ctx, structuredClone, console }` as globals, + * a 5s timeout, and NO access to require/process/fs/network. It must return a + * `{ type: "doc" }` node, which is validated structurally before any write. + * + * `ctx` exposes: + * - comments: the page's comments (fetched before the live read); + * - log: an array the transform can push diagnostics to (via console.log); + * - consume(id): mark a comment id as consumed (for deleteComments); + * - helpers: the transforms.ts primitives + commentsToFootnotes. + * + * Footnote convention used by the helpers: footnote markers are plain "[N]" + * text in the body, and the notes are an orderedList under a heading whose + * text is "Примечания переводчика". + * + * dryRun (default true): read the page's current content, run the transform, + * and return `{ pushed:false, diff, log }` WITHOUT opening the collab socket. + * Otherwise the transform runs atomically inside mutatePageContent, optionally + * deletes consumed comments, and returns the new historyId + diff + log. + */ + async transformPage(pageId, transformJs, opts = {}) { + const dryRun = opts.dryRun ?? true; + const deleteComments = opts.deleteComments ?? false; + await this.ensureAuthenticated(); + const comments = await this.listComments(pageId); + // ctx handed to the sandbox. consume() records ids; helpers are the pure + // transform primitives. log is captured from console.log inside the sandbox. + const ctx = { + comments, + log: [], + consumed: new Set(), + consume(id) { + this.consumed.add(id); + }, + helpers: { + blockText, + walk, + getList, + insertMarkerAfter, + setCalloutRange, + noteItem, + mdToInlineNodes, + commentsToFootnotes, + }, + }; + // Captured oldDoc / newDoc for the diff (set inside runTransform). + let oldDoc; + let newDoc; + // SYNCHRONOUS transform runner — safe to call inside mutatePageContent's + // onSynced (no await between the live read and the write). + const runTransform = (liveDoc) => { + oldDoc = structuredClone(liveDoc); + const sandbox = { + doc: structuredClone(liveDoc), + ctx, + structuredClone, + console: { + log: (...a) => ctx.log.push(a.map((x) => String(x)).join(" ")), + }, + }; + // Wrap the provided string in parentheses so both an expression-arrow + // (`(doc, ctx) => {...}`) and a parenthesized function work. Run it in a + // fresh context with no require/process/module so the transform cannot + // touch fs/network/process. 5s wall-clock timeout. + let fn; + try { + fn = vm.runInNewContext("(" + transformJs + ")", sandbox, { + timeout: 5000, + }); + } + catch (e) { + throw new Error(`transform did not compile: ${e?.message ?? e}`); + } + if (typeof fn !== "function") { + throw new Error("transform must evaluate to a function (doc, ctx) => doc"); + } + const result = vm.runInNewContext("f(d, c)", { f: fn, d: sandbox.doc, c: ctx }, { timeout: 5000 }); + if (!result || + typeof result !== "object" || + result.type !== "doc" || + !Array.isArray(result.content)) { + throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })'); + } + // Validate the returned doc before it can be written. + this.validateDocStructure(result); + this.validateDocUrls(result); + newDoc = result; + return result; + }; + if (dryRun) { + // Preview only: run against the current REST snapshot, never open the + // socket. oldDoc/newDoc are captured by runTransform. + const raw = await this.getPageRaw(pageId); + const current = raw.content || { type: "doc", content: [] }; + runTransform(current); + // Exercise the same Yjs encoder the apply path uses, so the preview + // fails with the SAME descriptive error when the doc is not encodable + // instead of returning a misleadingly-green diff. + assertYjsEncodable(newDoc); + return { + pushed: false, + diff: diffDocs(oldDoc, newDoc), + log: ctx.log, + }; + } + // Apply atomically against the live doc. + const collabToken = await this.getCollabTokenWithReauth(); + await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform); + // Optionally delete consumed comments (best-effort; a delete failure must + // not undo the successful write). + const deletedComments = []; + if (deleteComments) { + for (const id of ctx.consumed) { + try { + await this.deleteComment(id); + deletedComments.push(id); + } + catch (e) { + if (process.env.DEBUG) { + console.error(`transform: failed to delete comment ${id}:`, e); + } + } + } + } + // Fetch the newest historyId (Docmost snapshots on the write above). + let historyId = null; + try { + const hist = await this.listPageHistory(pageId); + historyId = hist.items?.[0]?.id ?? null; + } + catch (e) { + if (process.env.DEBUG) { + console.error("transform: failed to fetch history id:", e); + } + } + return { + pushed: true, + historyId, + diff: diffDocs(oldDoc, newDoc), + deletedComments, + log: ctx.log, + }; + } +} diff --git a/packages/mcp/build/http.js b/packages/mcp/build/http.js new file mode 100644 index 00000000..f22cc694 --- /dev/null +++ b/packages/mcp/build/http.js @@ -0,0 +1,92 @@ +import { randomUUID } from "node:crypto"; +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; +import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js"; +import { createDocmostMcpServer } from "./index.js"; +/** + * Build a stateful Streamable-HTTP handler for the Docmost MCP server. The + * embedding host (the gitmost NestJS server) bridges its raw Node req/res into + * `handleRequest`. One McpServer + transport is created per MCP session and + * kept alive between requests, keyed by the `mcp-session-id` header. + */ +export function createMcpHttpHandler(config) { + // One transport (and one McpServer) per MCP session, keyed by session id. + const transports = {}; + // Last activity timestamp per session id, used for idle eviction. + const lastSeen = {}; + // Idle session TTL (ms): a session with no activity for this long is evicted. + // Defaults to 30 min; overridable via MCP_SESSION_IDLE_MS. + const idleTtlMs = (() => { + const parsed = parseInt(process.env.MCP_SESSION_IDLE_MS ?? "", 10); + return Number.isFinite(parsed) && parsed > 0 ? parsed : 30 * 60 * 1000; + })(); + // Periodically close transports idle longer than the TTL. transport.close() + // triggers its onclose, which removes it from `transports`; we also drop the + // lastSeen entry. unref() so this timer never keeps the process alive. + const sweepIntervalMs = 5 * 60 * 1000; + const sweepTimer = setInterval(() => { + const now = Date.now(); + for (const sid of Object.keys(transports)) { + if (now - (lastSeen[sid] ?? 0) > idleTtlMs) { + void transports[sid].close(); + delete lastSeen[sid]; + } + } + }, sweepIntervalMs); + sweepTimer.unref(); + async function handleRequest(req, res, parsedBody) { + const sessionId = req.headers["mcp-session-id"]; + const method = (req.method || "GET").toUpperCase(); + let transport = sessionId ? transports[sessionId] : undefined; + if (method === "POST" && !transport) { + // A new session may only be created by an initialize request without a + // session id. + if (sessionId || !isInitializeRequest(parsedBody)) { + res.statusCode = 400; + res.setHeader("Content-Type", "application/json"); + res.end(JSON.stringify({ + jsonrpc: "2.0", + error: { + code: -32000, + message: "Bad Request: no valid session ID provided", + }, + id: null, + })); + return; + } + transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => randomUUID(), + onsessioninitialized: (sid) => { + transports[sid] = transport; + lastSeen[sid] = Date.now(); + }, + }); + transport.onclose = () => { + const sid = transport.sessionId; + if (sid && transports[sid]) + delete transports[sid]; + }; + const server = createDocmostMcpServer(config); + await server.connect(transport); + await transport.handleRequest(req, res, parsedBody); + return; + } + if (!transport) { + res.statusCode = 400; + res.setHeader("Content-Type", "application/json"); + res.end(JSON.stringify({ + jsonrpc: "2.0", + error: { + code: -32000, + message: "Bad Request: no valid session ID provided", + }, + id: null, + })); + return; + } + // Routing to an existing transport: refresh its idle timestamp. + if (sessionId) + lastSeen[sessionId] = Date.now(); + await transport.handleRequest(req, res, parsedBody); + } + return { handleRequest }; +} diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js new file mode 100644 index 00000000..c05df0f4 --- /dev/null +++ b/packages/mcp/build/index.js @@ -0,0 +1,777 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { z } from "zod"; +import { readFileSync } from "fs"; +import { fileURLToPath } from "url"; +import { dirname, join } from "path"; +import { DocmostClient } from "./client.js"; +// Read version from package.json +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const packageJson = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8")); +const VERSION = packageJson.version; +// --- Modern McpServer Implementation --- +// Editing guide surfaced to MCP clients in the initialize result so they can +// pick the right tool by intent and avoid resending whole documents. +const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent: fix wording/typos/numbers (text inside blocks) -> edit_page_text (no node id needed). Change ONE block (paragraph/heading/callout/table cell/etc.) structurally -> patch_node (address by attrs.id from get_page_json). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Images -> insert_image (place a local image file) / replace_image (swap an existing image file). New page -> create_page (Markdown). Bulk/structural rewrite or nodes without an id -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Rename a page (title only) -> rename_page. Read -> get_page (Markdown, lossy) or get_page_json (lossless ProseMirror with block ids). Comments -> create_comment (an inline comment anchors to its selection text), list_comments, update_comment, delete_comment, check_new_comments. Tip: read block ids via get_page_json, then use patch_node/insert_node/delete_node so you never resend the full document. " + + "Complex/scripted rewrite (multiple coordinated edits, footnotes, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes. " + + "Review what changed -> diff_page_versions (compare a historyId to current, or two history versions). See a page's saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). " + + "Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown."; +// Helper to format JSON responses +const jsonContent = (data) => ({ + content: [{ type: "text", text: JSON.stringify(data, null, 2) }], +}); +/** + * Create a fully configured Docmost MCP server. Side-effect-free: it does not + * read environment variables and does not connect any transport — the caller + * decides how to expose it (stdio or HTTP). The client talks to Docmost over + * REST + the collaboration WebSocket using the provided service-account + * credentials and auto-re-authenticates. + */ +export function createDocmostMcpServer(config) { + const docmostClient = new DocmostClient(config.apiUrl, config.email, config.password); + const server = new McpServer({ + name: "docmost-mcp", + version: VERSION, + }, { instructions: SERVER_INSTRUCTIONS }); + // Tool: get_workspace + server.registerTool("get_workspace", { + description: "Get the current Docmost workspace", + }, async () => { + const workspace = await docmostClient.getWorkspace(); + return jsonContent(workspace); + }); + // Tool: list_spaces + server.registerTool("list_spaces", { + description: "List all available spaces in Docmost", + }, async () => { + const spaces = await docmostClient.getSpaces(); + return jsonContent(spaces); + }); + // Tool: list_pages + server.registerTool("list_pages", { + description: "List most recent pages in a space ordered by updatedAt (descending). " + + "Returns a bounded list (default 50, max 100) — use search for lookups " + + "in large spaces.", + inputSchema: { + spaceId: z.string().optional(), + limit: z + .number() + .int() + .min(1) + .max(100) + .optional() + .describe("Max pages to return (default 50, max 100)"), + }, + }, async ({ spaceId, limit }) => { + const result = await docmostClient.listPages(spaceId, limit ?? 50); + return jsonContent(result); + }); + // Tool: get_page + server.registerTool("get_page", { + description: "Get page details with content converted to Markdown. The conversion is " + + "LOSSY (block ids, exact table/callout structure are approximated); for a " + + "lossless representation use get_page_json.", + inputSchema: { + pageId: z.string().min(1), + }, + }, async ({ pageId }) => { + const page = await docmostClient.getPage(pageId); + return jsonContent(page); + }); + // Tool: get_page_json + server.registerTool("get_page_json", { + description: "Get page details with the raw ProseMirror JSON content (lossless: " + + "includes block ids, callouts, tables, link/image attributes) plus the " + + "slugId used in URLs. Use together with update_page_json for precise " + + "structural edits, or edit_page_text for simple text fixes.", + inputSchema: { + pageId: z.string().min(1), + }, + }, async ({ pageId }) => { + const page = await docmostClient.getPageJson(pageId); + return jsonContent(page); + }); + // Tool: get_outline + server.registerTool("get_outline", { + description: "Return a COMPACT outline of a page's top-level blocks ({index, type, " + + "id, level, firstText}; tables add rows/cols/header; lists add item " + + "count) WITHOUT the full document body. Use it to locate sections/tables " + + "and grab block ids cheaply before get_node / patch_node / insert_node.", + inputSchema: { + pageId: z.string().min(1), + }, + }, async ({ pageId }) => { + const result = await docmostClient.getOutline(pageId); + return jsonContent(result); + }); + // Tool: get_node + server.registerTool("get_node", { + description: "Fetch a single node's full ProseMirror subtree (lossless) without " + + "pulling the whole document. `nodeId` is a block id from get_outline/" + + "get_page_json (works for headings/paragraphs/callouts/images), OR " + + "`#` to fetch a top-level block by its outline index — use the " + + "`#` form for tables/rows/cells, which carry no id.", + inputSchema: { + pageId: z.string().min(1), + nodeId: z.string().min(1), + }, + }, async ({ pageId, nodeId }) => { + const result = await docmostClient.getNode(pageId, nodeId); + return jsonContent(result); + }); + // Tool: table_get + server.registerTool("table_get", { + description: "Read a table as a matrix. Returns {rows, cols, cells (text[][]), " + + "cellIds (paragraph id per cell, or null)}. `table` = `#` from " + + "get_outline, or any block id inside the table. Use cellIds with " + + "patch_node for rich-formatted cell edits. `cols` is the FIRST row's " + + "width; ragged tables may vary per row, so use the per-row length of " + + "`cells` for each row.", + inputSchema: { + pageId: z.string().min(1), + table: z.string().min(1), + }, + }, async ({ pageId, table }) => { + const result = await docmostClient.getTable(pageId, table); + return jsonContent(result); + }); + // Tool: table_insert_row + server.registerTool("table_insert_row", { + description: "Insert a row of plain-text cells into a table. `table` = `#` or " + + "a block id inside it. `cells` = text per column (padded to the table's " + + "column count; error if more cells than columns). `index` = 0-based " + + "insert position (0 inserts before the header); omit to append at the end.", + inputSchema: { + pageId: z.string().min(1), + table: z.string().min(1), + cells: z.array(z.string()), + index: z.number().int().optional(), + }, + }, async ({ pageId, table, cells, index }) => { + const result = await docmostClient.tableInsertRow(pageId, table, cells, index); + return jsonContent(result); + }); + // Tool: table_delete_row + server.registerTool("table_delete_row", { + description: "Delete the row at 0-based `index` from a table (`table` = `#` or " + + "a block id inside it). Refuses to delete the table's only row. An " + + "out-of-range `index` throws. Deleting `index` 0 removes the header row, " + + "and the next row becomes the new header.", + inputSchema: { + pageId: z.string().min(1), + table: z.string().min(1), + index: z.number().int(), + }, + }, async ({ pageId, table, index }) => { + const result = await docmostClient.tableDeleteRow(pageId, table, index); + return jsonContent(result); + }); + // Tool: table_update_cell + server.registerTool("table_update_cell", { + description: "Set the plain-text content of cell [row,col] (0-based) in a table " + + "(`table` = `#` or a block id inside it). Replaces the cell's " + + "content with a single text paragraph; for rich formatting use patch_node " + + "on the cell's paragraph id from table_get.", + inputSchema: { + pageId: z.string().min(1), + table: z.string().min(1), + row: z.number().int(), + col: z.number().int(), + text: z.string(), + }, + }, async ({ pageId, table, row, col, text }) => { + const result = await docmostClient.tableUpdateCell(pageId, table, row, col, text); + return jsonContent(result); + }); + // Tool: create_page + server.registerTool("create_page", { + description: "Create a new page with content (automatically moves it to the correct hierarchy).", + inputSchema: { + title: z.string().min(1).describe("Title of the page"), + content: z.string().min(1).describe("Markdown content"), + spaceId: z.string().min(1), + parentPageId: z + .string() + .optional() + .describe("Optional parent page ID to nest under"), + }, + }, async ({ title, content, spaceId, parentPageId }) => { + const result = await docmostClient.createPage(title, content, spaceId, parentPageId); + return jsonContent(result); + }); + // Tool: update_page_json + server.registerTool("update_page_json", { + description: "Replace a page's content with a raw ProseMirror JSON document " + + "(lossless write: preserves the block ids, callouts, tables and " + + "attributes you pass in). Typical flow: get_page_json -> modify the " + + "JSON -> update_page_json. Keep existing node ids intact so heading " + + "anchors and history stay stable. `content` is OPTIONAL: omit it to " + + "update only the title (though prefer rename_page for a title-only " + + "change). Supplying neither content nor title is an error.", + inputSchema: { + pageId: z.string().min(1).describe("ID of the page to update"), + content: z + .any() + .optional() + .describe('ProseMirror document: {"type":"doc","content":[...]}. Omit to rename only.'), + title: z.string().optional().describe("Optional new title"), + }, + }, async ({ pageId, content, title }) => { + // Only parse/validate the document when it was actually supplied; when it + // is omitted, pass it straight through so the client performs a title-only + // (or no-op) update. + let doc; + if (content === undefined || content === null) { + doc = undefined; + } + else if (typeof content === "string") { + try { + doc = JSON.parse(content); + } + catch { + throw new Error("content was a string but not valid JSON"); + } + } + else { + doc = content; + } + const result = await docmostClient.updatePageJson(pageId, doc, title); + return jsonContent(result); + }); + // Tool: export_page_markdown + server.registerTool("export_page_markdown", { + description: "Export a page to a single self-contained, lossless Docmost-flavoured " + + "Markdown file (custom extensions): YAML-free meta header, body with " + + "inline comment anchors and diagrams, and a trailing comments-thread " + + "block. Designed for a download -> edit body -> import_page_markdown " + + "round-trip that preserves everything, including comment highlights. " + + "Comment THREADS are preserved in the file but are not re-pushed to the " + + "server on import.", + inputSchema: { + pageId: z.string().min(1), + }, + }, async ({ pageId }) => { + const md = await docmostClient.exportPageMarkdown(pageId); + return { content: [{ type: "text", text: md }] }; + }); + // Tool: import_page_markdown + server.registerTool("import_page_markdown", { + description: "Replace a page's content from a self-contained Docmost-flavoured " + + "Markdown file produced by export_page_markdown. Restores comment " + + "highlight anchors and diagrams from their inline HTML. NOTE: comment " + + "thread records are NOT created/updated/deleted on the server by this " + + "tool — only the page body + inline comment marks are written; manage " + + "comment threads via the comment tools/UI.", + inputSchema: { + pageId: z.string().min(1), + markdown: z.string().min(1), + }, + }, async ({ pageId, markdown }) => { + const res = await docmostClient.importPageMarkdown(pageId, markdown); + return jsonContent(res); + }); + // Tool: copy_page_content + server.registerTool("copy_page_content", { + description: "Replace targetPageId's content with a copy of sourcePageId's content, " + + "entirely server-side — the document is NOT sent through the model. The " + + "target keeps its own title and slug; only its body is replaced. Ideal " + + "for 'make page A's content equal to B' or 'replace A with B but keep A's URL'.", + inputSchema: { + sourcePageId: z.string().min(1).describe("Page to copy content FROM"), + targetPageId: z + .string() + .min(1) + .describe("Page whose content is REPLACED (title/slug kept)"), + }, + }, async ({ sourcePageId, targetPageId }) => { + const result = await docmostClient.copyPageContent(sourcePageId, targetPageId); + return jsonContent(result); + }); + // Tool: rename_page + server.registerTool("rename_page", { + description: "Rename a page (change its title only) without touching or resending " + + "its content.", + inputSchema: { + pageId: z.string().min(1).describe("ID of the page to rename"), + title: z.string().min(1).describe("New title"), + }, + }, async ({ pageId, title }) => { + const result = await docmostClient.renamePage(pageId, title); + return jsonContent(result); + }); + // Tool: edit_page_text + server.registerTool("edit_page_text", { + description: "Surgical find/replace inside a page's text. Preserves ALL structure: " + + "block ids, marks, links, callouts, tables. Each `find` must match " + + "exactly once (or set replaceAll). A match must lie inside one " + + "formatting run; if the target text crosses bold/link boundaries the " + + "tool reports it — use a shorter fragment or update_page_json then. " + + "This is the preferred tool for fixing wording, typos, numbers, names.", + inputSchema: { + pageId: z.string().describe("ID of the page to edit"), + edits: z + .array(z.object({ + find: z.string().describe("Exact text to find"), + replace: z.string().describe("Replacement text (may be empty)"), + replaceAll: z + .boolean() + .optional() + .describe("Replace every occurrence (default: must match once)"), + })) + .min(1) + .describe("List of find/replace operations, applied in order"), + }, + }, async ({ pageId, edits }) => { + const result = await docmostClient.editPageText(pageId, edits); + return jsonContent(result); + }); + // Tool: patch_node + server.registerTool("patch_node", { + description: "Replaces a single block identified by its attrs.id WITHOUT resending the " + + "whole document. Get the block id from get_page_json, then pass a " + + "ProseMirror node to put in its place. Cheaper and safer than " + + "update_page_json for one-block structural edits.", + inputSchema: { + pageId: z.string().min(1), + nodeId: z.string().min(1), + node: z + .any() + .describe("ProseMirror node JSON to put in place of the node with this id"), + }, + }, async ({ pageId, nodeId, node }) => { + let parsedNode; + if (typeof node === "string") { + try { + parsedNode = JSON.parse(node); + } + catch { + throw new Error("node was a string but not valid JSON"); + } + } + else { + parsedNode = node; + } + const result = await docmostClient.patchNode(pageId, nodeId, parsedNode); + return jsonContent(result); + }); + // Tool: insert_node + server.registerTool("insert_node", { + description: "Insert a block before/after another block (by attrs.id or anchor text) " + + "or append at the end. Get anchor block ids from get_page_json. Avoids " + + "resending the whole document. Can also insert table structure: to add a " + + "tableRow, pass a tableRow node with position before/after and anchor " + + "INSIDE the target table — anchorNodeId of any block/cell in it, or " + + "anchorText matching the table; to add a tableCell/tableHeader, use " + + "anchorNodeId of a block inside the target row (anchorText only resolves " + + "top-level blocks, so it cannot target a row). Note: append is top-level " + + "only and rejects structural table nodes.", + inputSchema: { + pageId: z.string().min(1), + node: z.any(), + position: z.enum(["before", "after", "append"]), + anchorNodeId: z.string().optional(), + anchorText: z.string().optional(), + }, + }, async ({ pageId, node, position, anchorNodeId, anchorText }) => { + let parsedNode; + if (typeof node === "string") { + try { + parsedNode = JSON.parse(node); + } + catch { + throw new Error("node was a string but not valid JSON"); + } + } + else { + parsedNode = node; + } + const result = await docmostClient.insertNode(pageId, parsedNode, { + position, + anchorNodeId, + anchorText, + }); + return jsonContent(result); + }); + // Tool: delete_node + server.registerTool("delete_node", { + description: "Remove a single block by its attrs.id (from get_page_json) WITHOUT " + + "resending the whole document.", + inputSchema: { + pageId: z.string().min(1), + nodeId: z.string().min(1), + }, + }, async ({ pageId, nodeId }) => { + const result = await docmostClient.deleteNode(pageId, nodeId); + return jsonContent(result); + }); + // Tool: insert_image + server.registerTool("insert_image", { + description: "Upload a local image and insert it into a page in one step. By default " + + "appends the image at the end of the page. With replaceText, replaces the " + + "first top-level block whose text contains that string (handy for " + + 'swapping a text placeholder like "[image: foo.png]" for the real image). ' + + "With afterText, inserts the image right after the first block containing " + + "that string. Preserves all other block ids.", + inputSchema: { + pageId: z.string().min(1), + filePath: z + .string() + .min(1) + .describe("Absolute local path to the image file"), + align: z.enum(["left", "center", "right"]).optional(), + alt: z.string().optional(), + replaceText: z + .string() + .optional() + .describe("Replace the first top-level block whose text contains this string with the image"), + afterText: z + .string() + .optional() + .describe("Insert the image right after the first top-level block whose text contains this string"), + }, + }, async ({ pageId, filePath, align, alt, replaceText, afterText }) => { + const result = await docmostClient.insertImage(pageId, filePath, { + align, + alt, + replaceText, + afterText, + }); + return jsonContent(result); + }); + // Tool: replace_image + server.registerTool("replace_image", { + description: "Replace an existing image on a page: uploads the new file as a NEW " + + "attachment (fresh clean URL that renders and busts browser caches), then " + + "repoints every image node referencing the old attachmentId (recursively, " + + "incl. callouts/tables) via the live document, preserving comments, " + + "alignment and alt. The old attachment is left as an unreferenced orphan " + + "(Docmost has no API to delete a single attachment; it is removed only when " + + "the page/space is deleted). In-place byte overwrite is avoided because some " + + "Docmost versions corrupt the attachment (HTTP 500) on overwrite.", + inputSchema: { + pageId: z.string().min(1), + attachmentId: z + .string() + .min(1) + .describe("attachmentId of the image currently in the page to replace"), + filePath: z + .string() + .min(1) + .describe("Absolute local path to the new image file"), + align: z.enum(["left", "center", "right"]).optional(), + alt: z.string().optional(), + }, + }, async ({ pageId, attachmentId, filePath, align, alt }) => { + const result = await docmostClient.replaceImage(pageId, attachmentId, filePath, { + align, + alt, + }); + return jsonContent(result); + }); + // Tool: share_page + server.registerTool("share_page", { + description: "Make a page publicly accessible (idempotent) and return its public " + + "URL. The URL format is /share//p/.", + inputSchema: { + pageId: z.string().min(1).describe("ID of the page to share"), + searchIndexing: z + .boolean() + .optional() + .describe("Allow search engines to index the page (default true)"), + }, + }, async ({ pageId, searchIndexing }) => { + const result = await docmostClient.sharePage(pageId, searchIndexing ?? true); + return jsonContent(result); + }); + // Tool: unshare_page + server.registerTool("unshare_page", { + description: "Remove the public share of a page (revokes the public URL).", + inputSchema: { + pageId: z.string().min(1).describe("ID of the page to unshare"), + }, + }, async ({ pageId }) => { + const result = await docmostClient.unsharePage(pageId); + return jsonContent(result); + }); + // Tool: list_shares + server.registerTool("list_shares", { + description: "List all public shares in the workspace with page titles and public URLs.", + }, async () => { + const result = await docmostClient.listShares(); + return jsonContent(result); + }); + // Tool: move_page + server.registerTool("move_page", { + description: "Move a page to a new parent (nesting) or root. Essential for organizing pages created via 'create_page'.", + inputSchema: { + pageId: z.string().min(1), + parentPageId: z + .string() + .nullable() + .optional() + .describe("Target parent page ID. Pass 'null' or empty string to move to root."), + position: z + .string() + .min(5) + .optional() + .describe("fractional-index position key; min 5 chars; omit to append at the end."), + }, + }, async ({ pageId, parentPageId, position }) => { + const finalParentId = parentPageId === "" || parentPageId === "null" ? null : parentPageId; + // Cheap cycle guard: a page cannot be moved directly under itself. + // (Deeper descendant-cycle detection is intentionally out of scope.) + if (finalParentId !== null && finalParentId === pageId) { + throw new Error("cannot move a page under itself"); + } + const result = await docmostClient.movePage(pageId, finalParentId || null, position); + // Require POSITIVE confirmation: the live /pages/move success shape is + // exactly { success: true, status: 200 }. An empty body, a 204, or any odd + // shape lacking success === true must NOT be reported as a successful move, + // so we surface the raw API result instead of declaring success. + if (!(result && typeof result === "object" && result.success === true)) { + throw new Error(`Failed to move page ${pageId}: ${JSON.stringify(result)}`); + } + return jsonContent({ + message: `Successfully moved page ${pageId} to parent ${finalParentId || "root"}`, + result, + }); + }); + // Tool: delete_page + server.registerTool("delete_page", { + description: "Delete a single page by ID.", + inputSchema: { + pageId: z.string().min(1), + }, + }, async ({ pageId }) => { + await docmostClient.deletePage(pageId); + return { + content: [ + { type: "text", text: `Successfully deleted page ${pageId}` }, + ], + }; + }); + // --- Comment tools (ported from upstream PR #3 by Max Nikitin) --- + // Tool: list_comments + server.registerTool("list_comments", { + description: "List all comments on a page (paginated). Content is returned as Markdown.", + inputSchema: { + pageId: z.string().describe("ID of the page"), + }, + }, async ({ pageId }) => { + const comments = await docmostClient.listComments(pageId); + return jsonContent(comments); + }); + // Tool: create_comment + server.registerTool("create_comment", { + description: "Create a new comment on a page. Content is provided as Markdown and " + + "automatically converted to the required format.", + inputSchema: { + pageId: z.string().describe("ID of the page to comment on"), + content: z.string().min(1).describe("Comment content in Markdown format"), + type: z + .enum(["page", "inline"]) + .optional() + .describe("Comment type: 'page' for general page comment (default), 'inline' for text selection comment"), + selection: z + .string() + // Enforce the documented 250-char cap to match the description above. + .max(250) + .optional() + .describe("For an inline comment, the EXACT text in the page to anchor/highlight the comment on (the first occurrence of this text is wrapped in a comment mark). Max 250 chars. Required when type is 'inline'."), + parentCommentId: z + .string() + .optional() + .describe("Parent comment ID to create a reply (max 2 nesting levels)"), + }, + }, async ({ pageId, content, type, selection, parentCommentId }) => { + const result = await docmostClient.createComment(pageId, content, type || "page", selection, parentCommentId); + return jsonContent(result); + }); + // Tool: update_comment + server.registerTool("update_comment", { + description: "Update an existing comment's content. Only the comment creator can " + + "update it. Content is provided as Markdown.", + inputSchema: { + commentId: z.string().min(1).describe("ID of the comment to update"), + content: z + .string() + .min(1) + .describe("New comment content in Markdown format"), + }, + }, async ({ commentId, content }) => { + const result = await docmostClient.updateComment(commentId, content); + return jsonContent(result); + }); + // Tool: delete_comment + server.registerTool("delete_comment", { + description: "Delete a comment. Only the comment creator or space admin can delete it.", + inputSchema: { + commentId: z.string().min(1).describe("ID of the comment to delete"), + }, + }, async ({ commentId }) => { + await docmostClient.deleteComment(commentId); + return { + content: [ + { + type: "text", + text: `Successfully deleted comment ${commentId}`, + }, + ], + }; + }); + // Tool: check_new_comments + server.registerTool("check_new_comments", { + description: "Check for new comments across pages in a space since a given timestamp. " + + "Optionally scope to a page subtree (folder). Returns only comments " + + "created after the specified time.", + inputSchema: { + spaceId: z.string().describe("Space ID to check for new comments"), + since: z + .string() + .min(1) + .describe("ISO 8601 timestamp — only return comments created after this time (e.g. '2026-03-10T00:00:00Z')"), + parentPageId: z + .string() + .optional() + .describe("Optional root page ID to scope the check to a subtree (folder). " + + "Only pages under this parent will be checked."), + }, + }, async ({ spaceId, since, parentPageId }) => { + // Reject an unparseable timestamp up front: otherwise the comparison + // against NaN silently treats every comment as "not new" and the tool + // returns zero results without signalling the bad input. + if (Number.isNaN(Date.parse(since))) { + throw new Error(`Invalid 'since' timestamp: ${JSON.stringify(since)} — expected an ISO 8601 date (e.g. '2026-03-10T00:00:00Z')`); + } + const result = await docmostClient.checkNewComments(spaceId, since, parentPageId); + return jsonContent(result); + }); + // Tool: search + server.registerTool("search", { + description: "Search for pages and content. Results are bounded by `limit` " + + "(default applied by the client, max 100).", + inputSchema: { + query: z.string().min(1).describe("Search query"), + limit: z + .number() + .int() + .min(1) + .max(100) + .optional() + .describe("Max results to return (max 100)"), + }, + }, async ({ query, limit }) => { + // The tool exposes no spaceId filter, so pass undefined for the client's + // optional spaceId parameter and forward limit into its correct slot. + const result = await docmostClient.search(query, undefined, limit); + return jsonContent(result); + }); + // Tool: docmost_transform + server.registerTool("docmost_transform", { + description: "Edit a page by running an arbitrary JS transform `(doc, ctx) => doc` " + + "against its LIVE ProseMirror document, with a diff preview and page " + + "history as the safety net. By default dryRun=true: returns a diff " + + "preview WITHOUT writing. Set dryRun=false to apply (atomic, won't " + + "clobber concurrent edits). `doc` is the lossless ProseMirror document " + + "({type:'doc',content:[...]}); return a new doc of the same shape. " + + "`ctx` gives you: comments (the page's comments, each {id, content " + + "(markdown), selection, type}); log (array; console.log pushes to it); " + + "consume(id) (mark a comment id as consumed — those are deleted when " + + "deleteComments=true after a successful apply); and helpers: " + + "blockText(node) (plain text), walk(node, fn) (depth-first over all " + + "nodes incl. callouts/tables/lists), getList(doc, predicate) (find a " + + "node even without attrs.id), insertMarkerAfter(doc, anchor, marker, " + + "{beforeBlock}) (insert a plain unmarked text run after anchor, " + + "mark-safe), setCalloutRange(doc, n) (sync a [1]…[K] callout range to " + + "[1]…[n]), noteItem(inlineNodes) (wrap inline nodes in a listItem with a " + + "fresh id), mdToInlineNodes(markdown) (comment markdown -> inline nodes), " + + "and commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " + + "comments into numbered footnotes). Footnote convention: markers are " + + "plain '[N]' text in the body; the notes are an orderedList under a " + + "heading whose text is 'Примечания переводчика'. The transform runs " + + "sandboxed (no require/process/fs/network, 5s timeout) and must return a " + + "{type:'doc'} node.", + inputSchema: { + pageId: z.string().min(1), + transformJs: z + .string() + .min(1) + .describe("A JS function `(doc, ctx) => doc` (expression-arrow or " + + "parenthesized function). It receives a clone of the live doc and " + + "ctx (comments, log, consume(id), helpers: blockText/walk/getList/" + + "insertMarkerAfter/setCalloutRange/noteItem/mdToInlineNodes/" + + "commentsToFootnotes) and must return a {type:'doc'} node."), + dryRun: z + .boolean() + .optional() + .default(true) + .describe("Preview only (no write) when true (default)."), + deleteComments: z + .boolean() + .optional() + .default(false) + .describe("After a successful apply, delete every comment id passed to " + + "ctx.consume(id)."), + }, + }, async ({ pageId, transformJs, dryRun, deleteComments }) => { + const result = await docmostClient.transformPage(pageId, transformJs, { + dryRun, + deleteComments, + }); + return jsonContent(result); + }); + // Tool: diff_page_versions + server.registerTool("diff_page_versions", { + description: "Diff two versions of a page and return a Docmost-equivalent change set " + + "(inserted/deleted text, integrity counts for images/links/tables/" + + "callouts/footnote markers, and a human-readable markdown summary). " + + "`from`/`to` each accept a historyId, or null/'current' for the page's " + + "current content (defaults: from=current, to=current — pass a historyId " + + "from list_page_history to compare against the live page).", + inputSchema: { + pageId: z.string().min(1), + from: z + .string() + .optional() + .describe("historyId, or 'current'/omit for current content"), + to: z + .string() + .optional() + .describe("historyId, or 'current'/omit for current content"), + }, + }, async ({ pageId, from, to }) => { + const result = await docmostClient.diffPageVersions(pageId, from, to); + return jsonContent(result); + }); + // Tool: list_page_history + server.registerTool("list_page_history", { + description: "List a page's saved versions (Docmost auto-snapshots on every save), " + + "newest first, cursor-paginated. Returns { items, nextCursor }; each " + + "item's id is the historyId to pass to diff_page_versions or " + + "restore_page_version.", + inputSchema: { + pageId: z.string().min(1), + cursor: z + .string() + .optional() + .describe("Pagination cursor from a previous nextCursor"), + }, + }, async ({ pageId, cursor }) => { + const result = await docmostClient.listPageHistory(pageId, cursor); + return jsonContent(result); + }); + // Tool: restore_page_version + server.registerTool("restore_page_version", { + description: "Restore a page to a saved version: writes that version's content back " + + "as the page's current content (Docmost has no restore endpoint, so " + + "this creates a NEW history snapshot — the restore is itself revertible). " + + "Get the historyId from list_page_history.", + inputSchema: { + historyId: z.string().min(1), + }, + }, async ({ historyId }) => { + const result = await docmostClient.restorePageVersion(historyId); + return jsonContent(result); + }); + return server; +} diff --git a/packages/mcp/build/lib/auth-utils.js b/packages/mcp/build/lib/auth-utils.js new file mode 100644 index 00000000..cc61481c --- /dev/null +++ b/packages/mcp/build/lib/auth-utils.js @@ -0,0 +1,74 @@ +import axios from "axios"; +export async function getCollabToken(baseUrl, apiToken) { + try { + const response = await axios.post(`${baseUrl}/auth/collab-token`, {}, { + headers: { + Authorization: `Bearer ${apiToken}`, + "Content-Type": "application/json", + }, + }); + // console.error('Collab Token Response:', response.data); + // Response is wrapped in { data: { token: ... } } + return response.data.data?.token || response.data.token; + } + catch (error) { + if (axios.isAxiosError(error)) { + // Attach the HTTP status to the plain Error so callers (e.g. + // getCollabTokenWithReauth) can still detect a 401/403 after the + // original AxiosError has been wrapped away. + // Avoid leaking the full server response body by default; include only + // status + statusText. Append the body only when DEBUG is set. + let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`; + if (process.env.DEBUG) { + message += ` - ${JSON.stringify(error.response?.data)}`; + } + const err = new Error(message); + err.status = error.response?.status; + throw err; + } + throw error; + } +} +export async function performLogin(baseUrl, email, password) { + try { + const response = await axios.post(`${baseUrl}/auth/login`, { + email, + password, + }); + // Extract token from Set-Cookie header + const cookies = response.headers["set-cookie"]; + if (!cookies) { + throw new Error("No Set-Cookie header found in login response"); + } + // Match the cookie name exactly to avoid matching a future + // authTokenRefresh cookie (startsWith would catch it). + const authCookie = cookies.find((c) => { + const kv = c.split(";")[0]; + return kv.slice(0, kv.indexOf("=")) === "authToken"; + }); + if (!authCookie) { + throw new Error("No authToken cookie found in login response"); + } + // Take everything after the FIRST "=" up to the first ";". + // Splitting on "=" would truncate base64 values containing "=" padding. + const kv = authCookie.split(";")[0]; + const token = kv.slice(kv.indexOf("=") + 1); + return token; + } + catch (error) { + // Avoid leaking the full server response body by default; log only the + // HTTP status. Log the verbose body only when DEBUG is set. + if (axios.isAxiosError(error)) { + if (process.env.DEBUG) { + console.error("Login failed:", error.response?.data); + } + else { + console.error("Login failed:", error.response?.status); + } + } + else { + console.error("Login failed:", error.message); + } + throw error; + } +} diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js new file mode 100644 index 00000000..6c7386e1 --- /dev/null +++ b/packages/mcp/build/lib/collaboration.js @@ -0,0 +1,553 @@ +import { HocuspocusProvider } from "@hocuspocus/provider"; +import { TiptapTransformer } from "@hocuspocus/transformer"; +import * as Y from "yjs"; +import WebSocket from "ws"; +import { marked } from "marked"; +import { generateJSON } from "@tiptap/html"; +import { JSDOM } from "jsdom"; +import { docmostExtensions } from "./docmost-schema.js"; +import { withPageLock } from "./page-lock.js"; +import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +// Setup DOM environment for Tiptap HTML parsing in Node.js +const dom = new JSDOM(""); +global.window = dom.window; +global.document = dom.window.document; +// @ts-ignore +global.Element = dom.window.Element; +// @ts-ignore +global.WebSocket = WebSocket; +// Navigator is read-only in newer Node versions and already exists +// global.navigator = dom.window.navigator; +/** + * Hard ceiling above which we skip callout preprocessing entirely. The linear + * scanner below has no quadratic blow-up, but we still cap input defensively so + * a pathological multi-megabyte payload cannot tie up the event loop; in that + * case the markdown is passed through verbatim (callouts are simply not + * detected) rather than risking a slow scan. + */ +const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB +/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */ +const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/; +/** Matches a bare closing callout fence: `:::`. */ +const CALLOUT_CLOSE_RE = /^:::\s*$/; +/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */ +const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +/** + * Pre-process Docmost-flavoured markdown: convert `:::type ... :::` + * callout blocks (the syntax our markdown export produces) into HTML + * divs that the callout extension parses. The inner content is rendered + * through marked as regular markdown. + * + * Implemented as a single linear pass over the lines (no quadratic regex + * rescan). It: + * - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a + * `:::` line that lives inside a code fence as a callout delimiter, so a + * callout body that itself contains a fenced code block with a `:::` line is + * no longer corrupted; + * - matches an opening `:::type` line with the next CLOSING `:::` at the SAME + * nesting level, supporting NESTED callouts via a depth counter (an inner + * `:::type` opens a deeper level and consumes a matching `:::`); + * - emits the same `
` output + * (inner rendered through marked) as the previous regex implementation. + */ +async function preprocessCallouts(markdown) { + // Defensive cap: skip preprocessing for pathologically large inputs. + if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) { + return markdown; + } + // Recursively transform a slice of lines, converting top-level callouts in + // that slice into
blocks and rendering their inner content (which may + // itself contain nested callouts) through this same function. + const transform = async (lines) => { + const out = []; + let inCodeFence = false; + let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it + let i = 0; + while (i < lines.length) { + const line = lines[i]; + // Inside a code fence, only its matching closing fence is significant; + // everything else (including `:::` lines) is copied through verbatim. + if (inCodeFence) { + out.push(line); + const fence = line.match(CODE_FENCE_RE); + if (fence && fence[2].startsWith(codeFenceMarker[0]) && + fence[2].length >= codeFenceMarker.length) { + inCodeFence = false; + codeFenceMarker = ""; + } + i++; + continue; + } + // A code fence opening outside any callout body: enter code-fence mode. + const fenceOpen = line.match(CODE_FENCE_RE); + if (fenceOpen) { + inCodeFence = true; + codeFenceMarker = fenceOpen[2]; + out.push(line); + i++; + continue; + } + // An opening callout fence: scan forward (with code-fence and nested + // callout awareness) for its matching closing `:::` at the same level. + const open = line.match(CALLOUT_OPEN_RE); + if (open) { + const type = open[1].toLowerCase(); + const bodyLines = []; + let depth = 1; + let innerInCodeFence = false; + let innerCodeFenceMarker = ""; + let j = i + 1; + for (; j < lines.length; j++) { + const bl = lines[j]; + if (innerInCodeFence) { + const f = bl.match(CODE_FENCE_RE); + if (f && f[2].startsWith(innerCodeFenceMarker[0]) && + f[2].length >= innerCodeFenceMarker.length) { + innerInCodeFence = false; + innerCodeFenceMarker = ""; + } + bodyLines.push(bl); + continue; + } + const innerFence = bl.match(CODE_FENCE_RE); + if (innerFence) { + innerInCodeFence = true; + innerCodeFenceMarker = innerFence[2]; + bodyLines.push(bl); + continue; + } + if (CALLOUT_OPEN_RE.test(bl)) { + depth++; + bodyLines.push(bl); + continue; + } + if (CALLOUT_CLOSE_RE.test(bl)) { + depth--; + if (depth === 0) + break; // matching close for THIS callout + bodyLines.push(bl); + continue; + } + bodyLines.push(bl); + } + if (j < lines.length) { + // Found the matching closing fence: render the body (recursively, so + // nested callouts are handled) and emit the callout div. + const inner = await transform(bodyLines); + const renderedInner = await marked.parse(inner); + out.push(`\n
${renderedInner}
\n`); + i = j + 1; // skip past the closing `:::` + continue; + } + // No matching close (unterminated callout): treat the opener as a + // literal line and continue, preserving the original text. + out.push(line); + i++; + continue; + } + out.push(line); + i++; + } + return out.join("\n"); + }; + return transform(markdown.split("\n")); +} +/** + * Bridge marked's checkbox lists to TipTap task lists. + * + * marked renders GitHub task list items (`- [x] done`) as a plain + * `
  • text

` WITHOUT the + * markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists + * into the shape those extensions expect: + * TaskList parseHTML matches `ul[data-type="taskList"]`, + * TaskItem matches `li[data-type="taskItem"]`, + * the checked state is read from `data-checked === "true"`. + * + * A list is only converted when it has at least one `
  • ` and EVERY direct + * `
  • ` contains a checkbox input. Both `
      ` and `
        ` are considered: a + * numbered checklist (`1. [x] a`, which marked renders as an `
          ` of checkbox + * `
        1. `s) would otherwise lose its task state. TipTap task lists are unordered, + * so a matching `
            ` is emitted as `data-type="taskList"` exactly like a + * `
              `. Mixed or ordinary lists (including ordinary `
                ` lists) are left + * untouched so they keep rendering as bullet/numbered lists. The marked `

                ` + * wrapper is kept inside the `

              1. ` because TaskItem content allows paragraphs. + */ +function bridgeTaskLists(html) { + // Cheap early-out: if the markup contains no checkbox input at all there is + // nothing to bridge, so skip the expensive JSDOM parse entirely. This is the + // common case (most pages have no task lists). + if (!/type=["']?checkbox/i.test(html)) { + return html; + } + // Defensive cap (consistent with preprocessCallouts): skip the bridge for + // pathologically large inputs rather than running a second expensive JSDOM + // parse on a multi-megabyte payload. The markup is passed through verbatim. + if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) { + return html; + } + const dom = new JSDOM(html); + const document = dom.window.document; + // Collect the checkbox(es) that belong to THIS
              2. directly: either direct + // child elements or ones inside the
              3. 's direct

                + // child (the shape marked emits: `

              4. text

              5. `). + // Checkboxes nested deeper (e.g. inside a child
                  /
                    ) are excluded so a + // bullet
                  1. that merely contains a nested task sublist is not misdetected. + // Raw inline HTML can put more than one checkbox in a single
                  2. ; we gather + // ALL of them so none survive into the converted item. + const directCheckboxes = (li) => { + const found = []; + for (const child of Array.from(li.children)) { + if (child.tagName === "INPUT" && + child.getAttribute("type") === "checkbox") { + found.push(child); + continue; + } + if (child.tagName === "P") { + for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) { + found.push(inp); + } + } + } + return found; + }; + // Both
                      and
                        are candidates: an
                          whose every direct
                        1. carries + // its own checkbox is a numbered checklist that must also become a taskList. + const lists = Array.from(document.querySelectorAll("ul, ol")); + for (const list of lists) { + // Only consider DIRECT child
                        2. elements; nested lists are handled by + // their own iteration of the outer loop. + const items = Array.from(list.children).filter((child) => child.tagName === "LI"); + if (items.length === 0) + continue; + const itemCheckboxes = items.map((li) => directCheckboxes(li)); + // Convert only when every direct
                        3. carries at least one OWN checkbox. + if (!itemCheckboxes.every((boxes) => boxes.length > 0)) + continue; + // A numbered checklist arrives as an
                            . We must NOT leave the tag as + //
                              while tagging it data-type="taskList": generateJSON would then match + // BOTH the orderedList rule (tag ol) and the taskList rule (data-type), + // emitting a phantom empty orderedList beside the real taskList. So rename a + // qualifying
                                to a
                                  — move its
                                • children over and replace it — + // leaving only the taskList rule to match. Already-
                                    lists are unchanged. + let target = list; + if (list.tagName === "OL") { + const ul = document.createElement("ul"); + // Carry over existing attributes (e.g. class) so nothing is silently lost. + for (const attr of Array.from(list.attributes)) { + ul.setAttribute(attr.name, attr.value); + } + // Move every child node (including the
                                  • s we collected) into the
                                      . + while (list.firstChild) { + ul.appendChild(list.firstChild); + } + list.replaceWith(ul); + target = ul; + } + target.setAttribute("data-type", "taskList"); + items.forEach((li, index) => { + const boxes = itemCheckboxes[index]; + // The first checkbox determines the checked state (matches the previous + // single-checkbox behaviour); any extras only need removing. + const input = boxes[0] ?? null; + li.setAttribute("data-type", "taskItem"); + const checked = input != null && + (input.hasAttribute("checked") || input.checked); + li.setAttribute("data-checked", checked ? "true" : "false"); + // Remove ALL direct checkbox inputs so none survive into the content + // (a raw-inline-HTML
                                    • may carry more than one). + for (const box of boxes) { + box.remove(); + } + }); + } + return document.body.innerHTML; +} +/** Convert markdown to a ProseMirror doc using the full Docmost schema. */ +export async function markdownToProseMirror(markdownContent) { + const withCallouts = await preprocessCallouts(markdownContent); + const html = await marked.parse(withCallouts); + const bridged = bridgeTaskLists(html); + return generateJSON(bridged, docmostExtensions); +} +/** + * Build the collaboration WebSocket URL from an API base URL: + * switch http(s)->ws(s), strip a trailing /api, mount on /collab. + * Shared by the live read and the mutate path so both target the same socket. + */ +export function buildCollabWsUrl(baseUrl) { + let wsUrl = baseUrl.replace(/^http/, "ws"); + try { + const urlObj = new URL(wsUrl); + if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) { + urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, ""); + } + urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab"; + // Drop any query/hash from the base URL so it is not carried into the + // collaboration ws URL. + urlObj.search = ""; + urlObj.hash = ""; + wsUrl = urlObj.toString(); + } + catch (e) { + // Fallback if URL parsing fails + if (!wsUrl.endsWith("/collab")) { + wsUrl = wsUrl.replace(/\/$/, "") + "/collab"; + } + } + return wsUrl; +} +/** + * Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning + * the opaque yjs "Unexpected content type" failure into a descriptive error. + * + * `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of + * the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point + * at the offending attribute path. + */ +export function buildYDoc(doc) { + const safe = sanitizeForYjs(doc); + try { + return TiptapTransformer.toYdoc(safe, "default", docmostExtensions); + } + catch (e) { + const bad = findUnstorableAttr(safe); + throw new Error(`Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`); + } +} +/** + * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc. + * Throws the same descriptive error as the apply path when it is not. Used by + * the dry-run preview so it fails identically to apply. + */ +export function assertYjsEncodable(doc) { + buildYDoc(doc); +} +/** Time we wait for the initial handshake/sync before giving up. */ +const CONNECT_TIMEOUT_MS = 25000; +/** Time we wait for the server to acknowledge our write before giving up. */ +const PERSIST_TIMEOUT_MS = 20000; +/** + * Safely mutate the live content of a page over the collaboration websocket. + * + * This is the single safe write path for every MCP content mutation. It: + * 1. serializes per-page writes through withPageLock (no two MCP writes on + * the same page overlap); + * 2. connects to Hocuspocus and waits for the initial sync so the local ydoc + * mirrors the authoritative server doc — INCLUDING edits/comments/images + * that are not yet in the debounced REST snapshot; + * 3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and + * writes the result back — with no `await` between read and write so no + * remote update can interleave and clobber concurrent human edits; + * 4. waits for the server to acknowledge the write (unsyncedChanges -> 0) + * before resolving, so the next operation observes our change. + * + * `transform` receives the live ProseMirror doc and returns the NEW full + * ProseMirror doc to write, or `null` to abort with no write (a no-op). If + * `transform` throws, the error is propagated to the caller (not swallowed). + * + * Returns the doc that was written, or the live doc when the transform aborted. + */ +export async function mutatePageContent(pageId, collabToken, baseUrl, transform) { + return withPageLock(pageId, () => { + if (process.env.DEBUG) { + console.error(`Starting realtime content mutate for page ${pageId}`); + // Token prefix is sensitive; only log it under DEBUG. + console.error(`Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`); + } + const ydoc = new Y.Doc(); + const wsUrl = buildCollabWsUrl(baseUrl); + if (process.env.DEBUG) + console.error(`Connecting to WebSocket: ${wsUrl}`); + return new Promise((resolve, reject) => { + let provider; + let applied = false; // onSynced may fire again on reconnect — apply once. + let settled = false; + // Set true on disconnect/close so a reconnect-driven unsyncedChanges->0 + // cannot be mistaken for a successful persist of our write. + let connectionLost = false; + let connectTimer; + let persistTimer; + let unsyncedHandler; + const cleanup = () => { + if (connectTimer) + clearTimeout(connectTimer); + if (persistTimer) + clearTimeout(persistTimer); + if (provider) { + if (unsyncedHandler) { + try { + provider.off("unsyncedChanges", unsyncedHandler); + } + catch (err) { } + } + try { + provider.destroy(); + } + catch (err) { } + } + }; + const finish = (err, value) => { + if (settled) + return; + settled = true; + cleanup(); + if (err) + reject(err); + else + resolve(value); + }; + connectTimer = setTimeout(() => { + finish(new Error("Connection timeout to collaboration server")); + }, CONNECT_TIMEOUT_MS); + // Resolve once the server has acknowledged our update. The provider + // increments unsyncedChanges when our local update is sent and + // decrements it when the server replies with a SyncStatus(applied=true); + // reaching 0 means the authoritative in-memory ydoc on the server now + // contains our write. + const waitForPersistence = () => { + if (settled) + return; + // A missing provider is a failure, not a success: without it the write + // can never have been acknowledged. Only an actual unsyncedChanges===0 + // on a live provider counts as persisted. + if (!provider) { + finish(new Error("collab provider gone before persistence")); + return; + } + if (provider.unsyncedChanges === 0) { + finish(null, lastWrittenDoc); + return; + } + persistTimer = setTimeout(() => { + finish(new Error("Timeout waiting for collaboration server to persist the update")); + }, PERSIST_TIMEOUT_MS); + unsyncedHandler = (data) => { + // Only treat unsyncedChanges->0 as success when the connection is + // still up. A transient disconnect + reconnect handshake can drive + // the counter back to 0 without our write being re-transmitted; in + // that case let the disconnect/close error win instead. + if (data.number === 0 && !connectionLost) { + finish(null, lastWrittenDoc); + } + }; + provider.on("unsyncedChanges", unsyncedHandler); + }; + let lastWrittenDoc; + provider = new HocuspocusProvider({ + url: wsUrl, + name: `page.${pageId}`, + document: ydoc, + token: collabToken, + // @ts-ignore - Required for Node.js environment + WebSocketPolyfill: WebSocket, + onConnect: () => { + if (process.env.DEBUG) + console.error("WS Connect"); + }, + // An unexpected disconnect/close while we are still waiting (during the + // connect-wait before onSynced, or during the persistence wait after the + // write) means the update will never be acknowledged — surface it now + // instead of hanging until the connect/persist timeout fires. `finish` + // is idempotent via the `settled` flag, so the onClose that our own + // cleanup()->provider.destroy() triggers (after settled=true is set) is + // a harmless no-op and cannot cause a double-resolve. + onDisconnect: () => { + if (process.env.DEBUG) + console.error("WS Disconnect"); + // Mark BEFORE finish so the unsyncedChanges handler (if it races) + // sees the connection as lost and won't report a false success. + connectionLost = true; + finish(new Error("Collaboration connection closed before the update was persisted/synced")); + }, + onClose: () => { + if (process.env.DEBUG) + console.error("WS Close"); + // Mark BEFORE finish so the unsyncedChanges handler (if it races) + // sees the connection as lost and won't report a false success. + connectionLost = true; + finish(new Error("Collaboration connection closed before the update was persisted/synced")); + }, + onSynced: () => { + if (applied || settled) + return; + applied = true; + if (process.env.DEBUG) + console.error("Connected and synced!"); + // CRITICAL: everything between reading the live doc and writing it + // back must stay synchronous (no await). While the JS event loop is + // not yielded, no incoming remote update can interleave, so any + // already-synced concurrent edits are preserved in liveDoc. + let newDoc; + try { + let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); + if (!liveDoc || + typeof liveDoc !== "object" || + !Array.isArray(liveDoc.content)) { + liveDoc = { type: "doc", content: [] }; + } + newDoc = transform(liveDoc); + if (newDoc == null) { + // Transform aborted — write nothing, return the live doc. + lastWrittenDoc = liveDoc; + finish(null, liveDoc); + return; + } + const tempDoc = buildYDoc(newDoc); + // Fetch the fragment immediately before the transact that mutates + // it, rather than reusing a handle grabbed across the transform. + const fragment = ydoc.getXmlFragment("default"); + ydoc.transact(() => { + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); + }); + } + catch (e) { + // Includes errors thrown by transform (e.g. "afterText not found", + // "text not found"): propagate them verbatim to the caller. + finish(e instanceof Error ? e : new Error(String(e))); + return; + } + lastWrittenDoc = newDoc; + if (process.env.DEBUG) + console.error("Content written, waiting for server to persist..."); + waitForPersistence(); + }, + onAuthenticationFailed: () => { + finish(new Error("Authentication failed for collaboration connection")); + }, + }); + }); + }); +} +/** + * Replace the live content of a page over the collaboration websocket. + * Accepts a ready ProseMirror JSON document; the caller controls whether + * it was produced from markdown (ids regenerate) or edited in place + * (existing block ids preserved). + * + * This is an intentional full replace (used by update_page / update_page_json), + * but now runs under the per-page lock and waits for server persistence via + * mutatePageContent. + */ +export async function replacePageContent(pageId, prosemirrorDoc, collabToken, baseUrl) { + // Fail fast on a bad document instead of deferring the failure into the + // collaboration write (where TiptapTransformer.toYdoc(undefined) used to + // throw). The transform must return a valid ProseMirror doc. + if (prosemirrorDoc == null || + typeof prosemirrorDoc !== "object" || + prosemirrorDoc.type !== "doc") { + throw new Error("replacePageContent: invalid ProseMirror document"); + } + await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc); +} +/** + * Markdown update path (kept for backwards compatibility). + * NOTE: this re-imports the whole document — block ids are regenerated. + * Tables and :::callout::: blocks survive thanks to the full schema. + */ +export async function updatePageContentRealtime(pageId, markdownContent, collabToken, baseUrl) { + const tiptapJson = await markdownToProseMirror(markdownContent); + await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson); +} diff --git a/packages/mcp/build/lib/diff.js b/packages/mcp/build/lib/diff.js new file mode 100644 index 00000000..5205aff1 --- /dev/null +++ b/packages/mcp/build/lib/diff.js @@ -0,0 +1,273 @@ +/** + * Headless, Docmost-equivalent document diff. + * + * Docmost's history editor computes a change set with the exact pipeline below + * (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as + * editor decorations. This module runs the SAME computation but serializes the + * result to text + integrity counts instead of decorations, so a diff can be + * previewed without a browser. + * + * recreateTransform here comes from @fellow/prosemirror-recreate-transform, the + * maintained published fork of the MIT prosemirror-recreate-steps source that + * Docmost vendors in @docmost/editor-ext; it exposes the identical + * recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff }) + * signature. + * + * If recreateTransform / the changeset throws on a pathological document pair, + * we fall back to a coarse block-level text diff so the tool never hard-fails. + */ +import { getSchema } from "@tiptap/core"; +import { Node } from "@tiptap/pm/model"; +import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset"; +import { recreateTransform } from "@fellow/prosemirror-recreate-transform"; +import { docmostExtensions } from "./docmost-schema.js"; +/** Build the schema once; it is pure and reused across calls. */ +const schema = getSchema(docmostExtensions); +/** Recursively concatenate the plain text of a JSON node. */ +function plainText(node) { + if (!node || typeof node !== "object") + return ""; + let out = ""; + if (typeof node.text === "string") + out += node.text; + if (Array.isArray(node.content)) { + for (const child of node.content) + out += plainText(child); + } + return out; +} +/** Count nodes in a JSON doc that satisfy `pred` (recursive). */ +function countNodes(doc, pred) { + let n = 0; + const visit = (node) => { + if (!node || typeof node !== "object") + return; + if (pred(node)) + n++; + if (Array.isArray(node.content)) + for (const c of node.content) + visit(c); + }; + visit(doc); + return n; +} +/** + * Count UNIQUE links in a JSON doc by their `href`. A single link can be split + * across several adjacent text runs (e.g. a "link+bold" run followed by a "link" + * run); counting link-bearing runs would over-count it. Walking the tree and + * collecting hrefs into a Set keys each distinct link once. Link marks with a + * missing/empty href are bucketed under a single "" key so a malformed link is + * still counted as one. + */ +function countUniqueLinks(doc) { + const hrefs = new Set(); + const visit = (node) => { + if (!node || typeof node !== "object") + return; + if (node.type === "text" && Array.isArray(node.marks)) { + for (const m of node.marks) { + if (m && m.type === "link") { + const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : ""; + hrefs.add(href); + } + } + } + if (Array.isArray(node.content)) + for (const c of node.content) + visit(c); + }; + visit(doc); + return hrefs.size; +} +/** + * Parse the ordered list of integers from `[N]` footnote markers found in the + * BODY only (every top-level block before the first "Примечания..." notes + * heading; if no such heading, the whole doc). Returned in reading order. + */ +function footnoteMarkers(doc, notesHeading) { + const top = Array.isArray(doc?.content) ? doc.content : []; + const notesIdx = top.findIndex((n) => n && + n.type === "heading" && + plainText(n).trim() === notesHeading); + const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top; + const markers = []; + const re = /\[(\d+)\]/g; + for (const block of bodyBlocks) { + const text = plainText(block); + let m; + re.lastIndex = 0; + while ((m = re.exec(text)) !== null) { + markers.push(Number(m[1])); + } + } + return markers; +} +/** Compute the [old,new] integrity tuples for two JSON docs. */ +function computeIntegrity(oldDoc, newDoc, notesHeading) { + const images = [ + countNodes(oldDoc, (n) => n.type === "image"), + countNodes(newDoc, (n) => n.type === "image"), + ]; + const links = [ + countUniqueLinks(oldDoc), + countUniqueLinks(newDoc), + ]; + const tables = [ + countNodes(oldDoc, (n) => n.type === "table"), + countNodes(newDoc, (n) => n.type === "table"), + ]; + const callouts = [ + countNodes(oldDoc, (n) => n.type === "callout"), + countNodes(newDoc, (n) => n.type === "callout"), + ]; + const fns = [ + footnoteMarkers(oldDoc, notesHeading), + footnoteMarkers(newDoc, notesHeading), + ]; + return { images, links, tables, callouts, footnoteMarkers: fns }; +} +/** + * Resolve the lead text of the top-level block in a ProseMirror Node that + * contains the given document position. Returns "" when out of range. + */ +function blockContextAt(node, pos) { + try { + const clamped = Math.max(0, Math.min(pos, node.content.size)); + const $pos = node.resolve(clamped); + // depth 1 is the top-level block in a doc node. + const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0); + const text = block.textContent || ""; + return text.length > 80 ? text.slice(0, 77) + "..." : text; + } + catch { + return ""; + } +} +/** Truncate a string for the markdown summary. */ +function truncate(s, n = 120) { + return s.length > n ? s.slice(0, n - 3) + "..." : s; +} +/** + * Coarse fallback: a block-by-block plain-text diff. Used only when the precise + * changeset pipeline throws, so the tool degrades gracefully instead of failing. + */ +function coarseDiff(oldDoc, newDoc) { + const oldBlocks = Array.isArray(oldDoc?.content) ? oldDoc.content : []; + const newBlocks = Array.isArray(newDoc?.content) ? newDoc.content : []; + const oldTexts = oldBlocks.map(plainText); + const newTexts = newBlocks.map(plainText); + const oldSet = new Set(oldTexts); + const newSet = new Set(newTexts); + const changes = []; + for (const t of oldTexts) { + if (!newSet.has(t) && t.trim() !== "") { + changes.push({ op: "delete", block: truncate(t, 80), text: t }); + } + } + for (const t of newTexts) { + if (!oldSet.has(t) && t.trim() !== "") { + changes.push({ op: "insert", block: truncate(t, 80), text: t }); + } + } + return changes; +} +/** Build the human-readable unified-ish markdown summary. */ +function renderMarkdown(result, fellBack) { + const lines = []; + const { summary, integrity, changes } = result; + lines.push(`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`); + if (fellBack) { + lines.push(""); + lines.push("> note: precise diff failed; coarse block-level diff shown."); + } + lines.push(""); + lines.push("## Integrity (old -> new)"); + lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`); + lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`); + lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`); + lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`); + lines.push(`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`); + lines.push(""); + lines.push("## Changes"); + if (changes.length === 0) { + lines.push("(no textual changes)"); + } + else { + for (const c of changes) { + const sign = c.op === "insert" ? "+" : "-"; + const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : ""; + lines.push(`${sign} ${truncate(c.text)}${ctx}`); + } + } + return lines.join("\n"); +} +/** + * Diff two ProseMirror JSON documents the way Docmost's history editor does and + * serialize the result to text + integrity counts. + * + * @param oldDocJson the earlier document + * @param newDocJson the later document + * @param notesHeading heading delimiting body from notes for footnote counting + */ +export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") { + const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading); + let changes = []; + let inserted = 0; + let deleted = 0; + let fellBack = false; + const changedBlocks = new Set(); + try { + const oldNode = Node.fromJSON(schema, oldDocJson); + const newNode = Node.fromJSON(schema, newDocJson); + const tr = recreateTransform(oldNode, newNode, { + complexSteps: false, + wordDiffs: true, + simplifyDiff: true, + }); + const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []); + const simplified = simplifyChanges(changeSet.changes, newNode); + for (const change of simplified) { + // Deleted text lives in the OLD doc coordinate range [fromA, toA). + if (change.toA > change.fromA) { + const text = oldNode.textBetween(change.fromA, change.toA, "\n", " "); + if (text.length > 0) { + deleted += text.length; + const block = blockContextAt(oldNode, change.fromA); + changes.push({ op: "delete", block, text }); + if (block) + changedBlocks.add("d:" + block); + } + } + // Inserted text lives in the NEW doc coordinate range [fromB, toB). + if (change.toB > change.fromB) { + const text = newNode.textBetween(change.fromB, change.toB, "\n", " "); + if (text.length > 0) { + inserted += text.length; + const block = blockContextAt(newNode, change.fromB); + changes.push({ op: "insert", block, text }); + if (block) + changedBlocks.add("i:" + block); + } + } + } + } + catch { + // Pathological pair: degrade to a coarse block-level diff so we never throw. + fellBack = true; + changes = coarseDiff(oldDocJson, newDocJson); + for (const c of changes) { + if (c.op === "insert") + inserted += c.text.length; + else + deleted += c.text.length; + if (c.block) + changedBlocks.add(c.op[0] + ":" + c.block); + } + } + const partial = { + summary: { inserted, deleted, blocksChanged: changedBlocks.size }, + integrity, + changes, + }; + return { ...partial, markdown: renderMarkdown(partial, fellBack) }; +} diff --git a/packages/mcp/build/lib/docmost-schema.js b/packages/mcp/build/lib/docmost-schema.js new file mode 100644 index 00000000..97cdcafd --- /dev/null +++ b/packages/mcp/build/lib/docmost-schema.js @@ -0,0 +1,999 @@ +/** + * Full TipTap extension set matching the real Docmost document schema. + * + * The default StarterKit-only schema silently destroys Docmost-specific + * nodes (callout, table) and drops attributes it does not know about + * (node ids, image sizing, link targets). Every code path that converts + * to or from ProseMirror JSON must use THIS set, otherwise a round-trip + * loses content. + */ +import StarterKit from "@tiptap/starter-kit"; +import Image from "@tiptap/extension-image"; +import TaskList from "@tiptap/extension-task-list"; +import TaskItem from "@tiptap/extension-task-item"; +import Highlight from "@tiptap/extension-highlight"; +import Subscript from "@tiptap/extension-subscript"; +import Superscript from "@tiptap/extension-superscript"; +import { Node, Extension, Mark } from "@tiptap/core"; +// Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this +// package can stay on the same @tiptap/core version as the editor and avoid a +// duplicate-tiptap version split in the monorepo. Reads a single declaration +// from an element's inline `style` attribute, last-wins, case-insensitive. +function getStyleProperty(element, propertyName) { + const styleAttr = element.getAttribute("style"); + if (!styleAttr) { + return null; + } + const decls = styleAttr.split(";").map((decl) => decl.trim()).filter(Boolean); + const target = propertyName.toLowerCase(); + for (let i = decls.length - 1; i >= 0; i -= 1) { + const decl = decls[i]; + const colonIndex = decl.indexOf(":"); + if (colonIndex === -1) { + continue; + } + const prop = decl.slice(0, colonIndex).trim().toLowerCase(); + if (prop === target) { + return decl.slice(colonIndex + 1).trim(); + } + } + return null; +} +/** Allowed Docmost callout types; anything else falls back to "info". */ +const CALLOUT_TYPES = ["info", "warning", "danger", "success"]; +export const clampCalloutType = (value) => value && CALLOUT_TYPES.includes(value.toLowerCase()) + ? value.toLowerCase() + : "info"; +/** + * Allowlist guard for CSS color values imported from HTML. + * + * Docmost interpolates stored mark colors straight into an inline style + * attribute (e.g. style="background-color: ${color}" / "color: ${color}"). + * An unsanitized value such as `red; --x: url(...)` or `red">