diff --git a/.dockerignore b/.dockerignore index 2e4e2d0..e684bc6 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,6 +1,8 @@ .git node_modules/ build/ +packages/*/dist/ +packages/*/node_modules/ .env data/ test/ diff --git a/AGENTS.md b/AGENTS.md index d468640..c1198fb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,6 +14,13 @@ and the phased plan before adding engine logic. ## Project structure +The project is now an **npm-workspaces monorepo**. `packages/docmost-client` is +the extracted `DocmostClient` + `lib/` — a verbatim 1:1 copy of `docmost-mcp/src/` +with the sync-specific methods appended under a clear banner (changes are +backported into `docmost-mcp` manually). The **ROOT remains the engine app** +(`src/`, `test/`, `build/`, `data/`) and depends on `docmost-client`. `npm run +build` builds the lib first, then compiles the app to `build/`. + - `src/` — application code. - `src/settings.ts` — the single config entry point (zod schema keyed by the real ENV var names; `parseSettings` is pure, `loadSettings` reads `.env`). diff --git a/Dockerfile b/Dockerfile index 86d7526..5c62b5c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,15 +2,18 @@ FROM node:22-slim WORKDIR /app -# Dependencies first (better layer caching): copy manifests, install from lock. +# Dependencies first (better layer caching): copy the root manifest, the lock, +# and the workspace package manifest so `npm ci` can link the workspace. COPY package.json package-lock.json ./ +COPY packages/docmost-client/package.json packages/docmost-client/package.json RUN npm ci # Runtime state directory (mounted as a volume in production). RUN mkdir -p data -# Source + TS config, then compile to build/. -COPY tsconfig.json ./ +# Source + TS config, then build the workspace lib and compile the app to build/. +COPY tsconfig.json tsconfig.base.json ./ +COPY packages/ packages/ COPY src/ src/ RUN npm run build diff --git a/Makefile b/Makefile index 1154f66..b52ab73 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ .DEFAULT_GOAL := help -.PHONY: help install env build test run dev clean +.PHONY: help install env build test run dev roundtrip pull clean help: ## Show this help @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \ @@ -27,5 +27,11 @@ run: build ## Build and run the app dev: install ## Run in watch mode (tsx) npm run dev -clean: ## Remove build artifacts and node_modules - rm -rf build node_modules +roundtrip: build ## Run the offline round-trip idempotency harness (SPEC §11) + npm run roundtrip + +pull: build ## Mirror the configured Docmost space into the vault (read-only) + npm run pull + +clean: ## Remove build artifacts and node_modules (incl. the workspace lib) + rm -rf build node_modules packages/*/dist diff --git a/README.md b/README.md index 80c18de..f03d19c 100644 --- a/README.md +++ b/README.md @@ -2,52 +2,92 @@ Bidirectional sync between Docmost articles and a local Markdown git vault — the git repository is the state store. For the full design and the phased -implementation plan, see [`SPEC.md`](./SPEC.md). +implementation plan, see [`SPEC.md`](./SPEC.md) (the authoritative spec). -> **Status: scaffold only — the sync engine is not implemented yet.** -> `src/index.ts` validates configuration and exits. The engine described in -> `SPEC.md` is out of scope for this scaffold. +> **Status: Increment 1 — monorepo scaffold + read-only `pull` + Phase-0 +> round-trip harness.** Continuous two-way sync is not implemented yet; see the +> phased plan in `SPEC.md`. -It reuses the sibling project **docmost-mcp** as a library (DocmostClient, -ProseMirror ↔ Markdown converter, collab-write). +It reuses the sibling project **docmost-mcp** as a library: the `DocmostClient` +REST client and the lossless ProseMirror ↔ Markdown converter are extracted into +this monorepo (so changes can be backported file-by-file). + +## Layout + +This is an npm-workspaces monorepo: + +- **`packages/docmost-client`** (`docmost-client`) — the Docmost REST client and + its `lib/` (converter, markdown-document, collaboration, …). Its source layout + mirrors `docmost-mcp/src/` 1:1 so diffs can be backported by copying files. + Sync-specific REST methods are added under clearly marked `docmost-sync + additions` banners. +- **the repo ROOT** — the sync engine app (`src/`, `test/`, `build/`, `data/`). + It depends on `docmost-client` and holds the config (`src/settings.ts`), + filename sanitization (`src/sanitize.ts`), the Phase-0 round-trip idempotency + harness (`src/roundtrip.ts`), and the read-only `pull` (`src/pull.ts`). + +## Install & build + +Requires Node >= 20. + +```sh +npm install # links the workspace packages +npm run build # builds docmost-client, then compiles the app into build/ +``` + +`docmost-client` must build before the app (the app consumes its built output); +the root `build` script builds the lib first, then runs `tsc`. ## Configuration -All config comes from ENV / `.env` (see [`.env.example`](./.env.example)), read -through the single settings layer in `src/settings.ts`. A missing required -variable fails at startup with a clear message that names it. +Copy [`.env.example`](./.env.example) to `.env` and fill in real values. The +config is read through [`src/settings.ts`](./src/settings.ts). -| Variable | Required | Default | Meaning | -| ------------------ | :------: | ------------ | -------------------------------------------------------------- | -| `DOCMOST_API_URL` | yes | — | Base URL of our Docmost instance (used for `/auth/login`). | -| `DOCMOST_EMAIL` | yes | — | Docmost login email. | -| `DOCMOST_PASSWORD` | yes | — | Docmost login password. | -| `DOCMOST_SPACE_ID` | yes | — | The Docmost space to mirror. | -| `VAULT_PATH` | no | `data/vault` | Local git vault path (kept under `data/` for the volume). | -| `GIT_REMOTE` | no | _(unset)_ | Optional git remote the vault pushes to; empty = local-only. | -| `POLL_INTERVAL_MS` | no | `15000` | How often to poll Docmost for changes (ms). | -| `DEBOUNCE_MS` | no | `2000` | Debounce window for local file changes (ms). | -| `LOG_LEVEL` | no | `info` | One of `debug`, `info`, `warn`, `error`. | +| Variable | Required | Meaning | +| ------------------- | :------: | -------------------------------------------------------- | +| `DOCMOST_API_URL` | yes | Base URL of our Docmost instance. | +| `DOCMOST_EMAIL` | yes | Docmost service-user login email. | +| `DOCMOST_PASSWORD` | yes | Docmost service-user login password. | +| `DOCMOST_SPACE_ID` | yes | Which Docmost space to mirror. | +| `VAULT_PATH` | no | Local vault directory (default `data/vault`). | +| `GIT_REMOTE` | no | Optional git remote the vault pushes to. | +| `POLL_INTERVAL_MS` | no | Poll interval in ms (default `15000`). | +| `DEBOUNCE_MS` | no | Debounce window in ms (default `2000`). | +| `LOG_LEVEL` | no | `debug` \| `info` \| `warn` \| `error` (default `info`). | -Credentials and the address of our own Docmost instance have NO default — they -go ONLY into `.env`, never into code or inline command-line env vars. +**Real secrets go in `.env`, which is git-ignored — never commit them.** The +git remote grants access to the whole vault, so protect it no less than Docmost +itself (SPEC §12). -## Quick start +## Running + +### Round-trip idempotency harness (Phase 0, SPEC §11) + +Verifies that `export → import → export` is byte-stable. Runs offline against a +fixture (the default for CI) — **no Docmost credentials needed**: ```sh -make install # install dependencies (npm ci) -make env # create .env from .env.example, then fill it in -make test # run the test suite (vitest) -make run # build and run -make dev # run in watch mode (tsx) +npm run build +node build/roundtrip.js --fixture test/fixtures/sample-doc.json ``` -`make` (or `make help`) lists all targets. +Or against a live page (needs `.env`): -## Deploy +```sh +node build/roundtrip.js --page +``` -Production runs a prebuilt image from `ghcr.io` (no build on prod): -`docker-compose.yml` pulls `ghcr.io/vvzvlad/docmost-sync:latest`, mounts a -volume at `/app/data`, and [watchtower](https://containrrr.dev/watchtower/) -auto-updates the container when a new image is published. CI (GitHub Actions) -builds and pushes the image; the `build` job runs only after `test` passes. +Exit code is 0 when the markdown is byte-stable, 1 on a markdown divergence +(CI-able). A document-level divergence after stripping block ids is a known +SPEC §11 finding and does not fail the run. + +### Pull (Docmost → filesystem mirror, SPEC §6) + +Read-only mirror: walks the configured space's page tree and writes one `.md` +per page under `/<…ancestors>/.md`. **Requires a `.env` with +real Docmost credentials** — it makes live REST calls and does not touch Docmost +state (read-only this increment): + +```sh +npm run pull +``` diff --git a/SPEC.md b/SPEC.md index 1f54bc7..b66c33f 100644 --- a/SPEC.md +++ b/SPEC.md @@ -29,8 +29,8 @@ Реализация — **monorepo (npm workspaces)**: `packages/docmost-client` (выносной `DocmostClient` + `lib/*`, лейаут 1:1 с `docmost-mcp/src/` — sync-методы дописываем -сюда, изменения бэкпортятся в `docmost-mcp` вручную) и `packages/sync` (движок -синхронизации). +сюда, изменения бэкпортятся в `docmost-mcp` вручную), а движок синхронизации — +приложение в корне репозитория (`src/`, по конвенциям `AGENTS.md`). **Важно:** MCP-инструменты — это тонкая обёртка над HTTP API Docmost. Синк-движок ходит в REST Docmost **напрямую** и волен использовать любые эндпойнты, которых @@ -475,7 +475,11 @@ append|prepend|replace, format: json|markdown|html }`) перезаписыва ### Подводные камни 1. Пагинация **курсорная** (`cursor` / `beforeCursor` / `limit` ≤ 100) в теле - JSON — не `page` / `offset`. + JSON — не `page` / `offset`. `listRecentSince` (changes-since) идёт по + `cursor`/`nextCursor` согласно этому, с обрывом по `updatedAt ≤ T_last`. + Примечание: общий `paginateAll` переиспользуемого клиента и `sidebar-pages` + исторически ходят через `page`/`limit` (сервер принимает) — на них опираются + `listTrash` / `listAllSpacePages`. 2. Корзина и `recent` могут быть пер-спейс → перечисляем спейсы. 3. `content` отдают только `/info` и `/trash`; `/recent` — без тела. 4. Запись тела — collab-путь, не `/update` (см. выше). diff --git a/package-lock.json b/package-lock.json index 6b47904..4d62c0a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,11 @@ "name": "docmost-sync", "version": "0.1.0", "license": "MIT", + "workspaces": [ + "packages/*" + ], "dependencies": { + "docmost-client": "*", "dotenv": "17.4.2", "zod": "3.25.76" }, @@ -22,6 +26,158 @@ "node": ">=20" } }, + "node_modules/@acemir/cssom": { + "version": "0.9.31", + "license": "MIT" + }, + "node_modules/@asamuzakjp/css-color": { + "version": "4.1.2", + "license": "MIT", + "dependencies": { + "@csstools/css-calc": "^3.0.0", + "@csstools/css-color-parser": "^4.0.1", + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0", + "lru-cache": "^11.2.5" + } + }, + "node_modules/@asamuzakjp/dom-selector": { + "version": "6.8.1", + "license": "MIT", + "dependencies": { + "@asamuzakjp/nwsapi": "^2.3.9", + "bidi-js": "^1.0.3", + "css-tree": "^3.1.0", + "is-potential-custom-element-name": "^1.0.1", + "lru-cache": "^11.2.6" + } + }, + "node_modules/@asamuzakjp/nwsapi": { + "version": "2.3.9", + "license": "MIT" + }, + "node_modules/@csstools/color-helpers": { + "version": "6.0.2", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "engines": { + "node": ">=20.19.0" + } + }, + "node_modules/@csstools/css-calc": { + "version": "3.2.1", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-color-parser": { + "version": "4.1.7", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "dependencies": { + "@csstools/color-helpers": "^6.0.2", + "@csstools/css-calc": "^3.2.1" + }, + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-parser-algorithms": "^4.0.0", + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-parser-algorithms": { + "version": "4.0.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + }, + "peerDependencies": { + "@csstools/css-tokenizer": "^4.0.0" + } + }, + "node_modules/@csstools/css-syntax-patches-for-csstree": { + "version": "1.1.5", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT-0", + "peerDependencies": { + "css-tree": "^3.2.1" + }, + "peerDependenciesMeta": { + "css-tree": { + "optional": true + } + } + }, + "node_modules/@csstools/css-tokenizer": { + "version": "4.0.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/csstools" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/csstools" + } + ], + "license": "MIT", + "engines": { + "node": ">=20.19.0" + } + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.28.1", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.1.tgz", @@ -92,8 +248,6 @@ }, "node_modules/@esbuild/darwin-arm64": { "version": "0.28.1", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.28.1.tgz", - "integrity": "sha512-TZbWkQY7kvTAXbXUT7uVACR5cMHsDiSz9z7ZKAX/RTq/WJEk3QyRr0wZpNhBDX+/0CtdqUIJlOiodQcta6tY3Q==", "cpu": [ "arm64" ], @@ -464,13 +618,77 @@ "node": ">=18" } }, + "node_modules/@exodus/bytes": { + "version": "1.15.1", + "license": "MIT", + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + }, + "peerDependencies": { + "@noble/hashes": "^1.8.0 || ^2.0.0" + }, + "peerDependenciesMeta": { + "@noble/hashes": { + "optional": true + } + } + }, + "node_modules/@fellow/prosemirror-recreate-transform": { + "version": "1.2.3", + "license": "Apache-2.0", + "dependencies": { + "diff": "^5.1.0", + "prosemirror-model": "^1.18.1", + "prosemirror-transform": "^1.7.0", + "rfc6902": "^5.0.1" + }, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/@hocuspocus/common": { + "version": "3.4.4", + "license": "MIT", + "dependencies": { + "lib0": "^0.2.87" + } + }, + "node_modules/@hocuspocus/provider": { + "version": "3.4.4", + "license": "MIT", + "dependencies": { + "@hocuspocus/common": "^3.4.4", + "@lifeomic/attempt": "^3.0.2", + "lib0": "^0.2.87", + "ws": "^8.17.1" + }, + "peerDependencies": { + "y-protocols": "^1.0.6", + "yjs": "^13.6.8" + } + }, + "node_modules/@hocuspocus/transformer": { + "version": "3.4.4", + "license": "MIT", + "dependencies": { + "@tiptap/starter-kit": "^3.0.1" + }, + "peerDependencies": { + "@tiptap/core": "^3.0.1", + "@tiptap/pm": "^3.0.1", + "y-prosemirror": "^1.2.1", + "yjs": "^13.6.8" + } + }, "node_modules/@jridgewell/sourcemap-codec": { "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", - "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", "dev": true, "license": "MIT" }, + "node_modules/@lifeomic/attempt": { + "version": "3.1.0", + "license": "MIT" + }, "node_modules/@rollup/rollup-android-arm-eabi": { "version": "4.62.0", "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.62.0.tgz", @@ -501,8 +719,6 @@ }, "node_modules/@rollup/rollup-darwin-arm64": { "version": "4.62.0", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.62.0.tgz", - "integrity": "sha512-BqCoMoIbn0keKys+dEAdBa70EtOwV1bEsQCUgU9FdiZmmMge/Zk7LlkYGqbrdHR+Frnt0E1FOanly+rlwvvQzw==", "cpu": [ "arm64" ], @@ -821,10 +1037,407 @@ "win32" ] }, + "node_modules/@tiptap/core": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-blockquote": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-bold": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-bullet-list": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extension-list": "3.26.1" + } + }, + "node_modules/@tiptap/extension-code": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-code-block": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-document": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-dropcursor": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extensions": "3.26.1" + } + }, + "node_modules/@tiptap/extension-gapcursor": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extensions": "3.26.1" + } + }, + "node_modules/@tiptap/extension-hard-break": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-heading": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-highlight": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-horizontal-rule": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-image": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-italic": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-link": { + "version": "3.26.1", + "license": "MIT", + "dependencies": { + "linkifyjs": "^4.3.3" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-list": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-list-item": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extension-list": "3.26.1" + } + }, + "node_modules/@tiptap/extension-list-keymap": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extension-list": "3.26.1" + } + }, + "node_modules/@tiptap/extension-ordered-list": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extension-list": "3.26.1" + } + }, + "node_modules/@tiptap/extension-paragraph": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-strike": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-subscript": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-superscript": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/extension-task-item": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extension-list": "3.26.1" + } + }, + "node_modules/@tiptap/extension-task-list": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/extension-list": "3.26.1" + } + }, + "node_modules/@tiptap/extension-text": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extension-underline": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1" + } + }, + "node_modules/@tiptap/extensions": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1" + } + }, + "node_modules/@tiptap/html": { + "version": "3.26.1", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + }, + "peerDependencies": { + "@tiptap/core": "3.26.1", + "@tiptap/pm": "3.26.1", + "happy-dom": "^20.8.9" + } + }, + "node_modules/@tiptap/pm": { + "version": "3.26.1", + "license": "MIT", + "dependencies": { + "prosemirror-changeset": "^2.3.0", + "prosemirror-commands": "^1.6.2", + "prosemirror-dropcursor": "^1.8.1", + "prosemirror-gapcursor": "^1.3.2", + "prosemirror-history": "^1.4.1", + "prosemirror-inputrules": "^1.4.0", + "prosemirror-keymap": "^1.2.3", + "prosemirror-model": "^1.25.7", + "prosemirror-schema-list": "^1.5.0", + "prosemirror-state": "^1.4.4", + "prosemirror-tables": "^1.8.0", + "prosemirror-transform": "^1.12.0", + "prosemirror-view": "^1.41.8" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + } + }, + "node_modules/@tiptap/starter-kit": { + "version": "3.26.1", + "license": "MIT", + "dependencies": { + "@tiptap/core": "^3.26.1", + "@tiptap/extension-blockquote": "^3.26.1", + "@tiptap/extension-bold": "^3.26.1", + "@tiptap/extension-bullet-list": "^3.26.1", + "@tiptap/extension-code": "^3.26.1", + "@tiptap/extension-code-block": "^3.26.1", + "@tiptap/extension-document": "^3.26.1", + "@tiptap/extension-dropcursor": "^3.26.1", + "@tiptap/extension-gapcursor": "^3.26.1", + "@tiptap/extension-hard-break": "^3.26.1", + "@tiptap/extension-heading": "^3.26.1", + "@tiptap/extension-horizontal-rule": "^3.26.1", + "@tiptap/extension-italic": "^3.26.1", + "@tiptap/extension-link": "^3.26.1", + "@tiptap/extension-list": "^3.26.1", + "@tiptap/extension-list-item": "^3.26.1", + "@tiptap/extension-list-keymap": "^3.26.1", + "@tiptap/extension-ordered-list": "^3.26.1", + "@tiptap/extension-paragraph": "^3.26.1", + "@tiptap/extension-strike": "^3.26.1", + "@tiptap/extension-text": "^3.26.1", + "@tiptap/extension-underline": "^3.26.1", + "@tiptap/extensions": "^3.26.1", + "@tiptap/pm": "^3.26.1" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/ueberdosis" + } + }, "node_modules/@types/chai": { "version": "5.2.3", - "resolved": "https://registry.npmjs.org/@types/chai/-/chai-5.2.3.tgz", - "integrity": "sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==", "dev": true, "license": "MIT", "dependencies": { @@ -834,32 +1447,50 @@ }, "node_modules/@types/deep-eql": { "version": "4.0.2", - "resolved": "https://registry.npmjs.org/@types/deep-eql/-/deep-eql-4.0.2.tgz", - "integrity": "sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==", "dev": true, "license": "MIT" }, "node_modules/@types/estree": { "version": "1.0.9", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.9.tgz", - "integrity": "sha512-GhdPgy1el4/ImP05X05Uw4cw2/M93BCUmnEvWZNStlCzEKME4Fkk+YpoA5OiHNQmoS7Cafb8Xa3Pya8m1Qrzeg==", "dev": true, "license": "MIT" }, + "node_modules/@types/jsdom": { + "version": "27.0.0", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0" + } + }, "node_modules/@types/node": { "version": "22.19.21", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.21.tgz", - "integrity": "sha512-VMeFBSCKQKmm2swI2kW51SFusDqekC6q9trBCvJ/JliDchFSuoYYKN7yVNjPthP1HKZcx3U1gI/wTcEBjEFKTA==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~6.21.0" } }, + "node_modules/@types/tough-cookie": { + "version": "4.0.5", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/whatwg-mimetype": { + "version": "3.0.2", + "license": "MIT", + "peer": true + }, + "node_modules/@types/ws": { + "version": "8.18.1", + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@vitest/expect": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/expect/-/expect-3.2.6.tgz", - "integrity": "sha512-1+7q9BtaKzEmO+fmNT3kYvoNn5Y71XWAx2Q5HRim4tTVRQVRv4uJFAQ5FbK0OPUeNP/WmVCpxYxoJdvuHVjzBQ==", "dev": true, "license": "MIT", "dependencies": { @@ -875,8 +1506,6 @@ }, "node_modules/@vitest/mocker": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/mocker/-/mocker-3.2.6.tgz", - "integrity": "sha512-EZOrpDbkKotFAP7wPAQV1UIyoGOk4oX7ynWhBhLB7v+meMHbQhU16oPpIYGTTe4oFlhpryGpgpcZP/sin3hYuw==", "dev": true, "license": "MIT", "dependencies": { @@ -902,8 +1531,6 @@ }, "node_modules/@vitest/pretty-format": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/pretty-format/-/pretty-format-3.2.6.tgz", - "integrity": "sha512-lb7XXXzmm2h2ASzFnRvQpDo6onT1NmMJA3tkGTWiBFtRJ9lxGY3d3mm/Apt36gej2bkkOVLL/yTOtufDaFa/jA==", "dev": true, "license": "MIT", "dependencies": { @@ -915,8 +1542,6 @@ }, "node_modules/@vitest/runner": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/runner/-/runner-3.2.6.tgz", - "integrity": "sha512-HYcoSj1w5tcgUnzoF0HcyaAQjpA1gj9ftUJ7iSJSuipc02jW9gKkigwZbjFldAfYHA1fa8UZVRftdMY5msWM9Q==", "dev": true, "license": "MIT", "dependencies": { @@ -930,8 +1555,6 @@ }, "node_modules/@vitest/snapshot": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/snapshot/-/snapshot-3.2.6.tgz", - "integrity": "sha512-H+ZjNTWGpObenh0YnlBctAPnJSI20P81PL8BPzWpx54YXLLTm8hEsWawtcYLMrwvpK48hGxLLbCS+1KRXhsKhw==", "dev": true, "license": "MIT", "dependencies": { @@ -945,8 +1568,6 @@ }, "node_modules/@vitest/spy": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/spy/-/spy-3.2.6.tgz", - "integrity": "sha512-oq6BbH68WzcWmwtBrU9nqLeaXTR4XwJF7FSLkKEZo4i6eoXcrxjcwSuTvWBIRUTC6VC72nXYunzqgZA+IKdtxg==", "dev": true, "license": "MIT", "dependencies": { @@ -958,8 +1579,6 @@ }, "node_modules/@vitest/utils": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/@vitest/utils/-/utils-3.2.6.tgz", - "integrity": "sha512-lI23nIs4bnT3T8NIoh+vFaz5s2/DdP0Jgt2jxwgWljvwn82cLJtyi/If+fjFyoLMGIOz0U/fKvWE0d4jsNQEfg==", "dev": true, "license": "MIT", "dependencies": { @@ -971,30 +1590,77 @@ "url": "https://opencollective.com/vitest" } }, + "node_modules/agent-base": { + "version": "6.0.2", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, "node_modules/assertion-error": { "version": "2.0.1", - "resolved": "https://registry.npmjs.org/assertion-error/-/assertion-error-2.0.1.tgz", - "integrity": "sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==", "dev": true, "license": "MIT", "engines": { "node": ">=12" } }, + "node_modules/asynckit": { + "version": "0.4.0", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.18.0", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.16.0", + "form-data": "^4.0.5", + "https-proxy-agent": "^5.0.1", + "proxy-from-env": "^2.1.0" + } + }, + "node_modules/bidi-js": { + "version": "1.0.3", + "license": "MIT", + "dependencies": { + "require-from-string": "^2.0.2" + } + }, + "node_modules/buffer-image-size": { + "version": "0.6.4", + "license": "MIT", + "peer": true, + "dependencies": { + "@types/node": "*" + }, + "engines": { + "node": ">=4.0" + } + }, "node_modules/cac": { "version": "6.7.14", - "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", - "integrity": "sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==", "dev": true, "license": "MIT", "engines": { "node": ">=8" } }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/chai": { "version": "5.3.3", - "resolved": "https://registry.npmjs.org/chai/-/chai-5.3.3.tgz", - "integrity": "sha512-4zNhdJD/iOjSH0A05ea+Ke6MU5mmpQcbQsSOkgdaUMJ9zTlDTD/GYlwohmIE2u0gaxHYiVHEn1Fw9mZ/ktJWgw==", "dev": true, "license": "MIT", "dependencies": { @@ -1010,19 +1676,66 @@ }, "node_modules/check-error": { "version": "2.1.3", - "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.3.tgz", - "integrity": "sha512-PAJdDJusoxnwm1VwW07VWwUN1sl7smmC3OKggvndJFadxxDRyFJBX/ggnu/KE4kQAB7a3Dp8f/YXC1FlUprWmA==", "dev": true, "license": "MIT", "engines": { "node": ">= 16" } }, + "node_modules/combined-stream": { + "version": "1.0.8", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/css-tree": { + "version": "3.2.1", + "license": "MIT", + "dependencies": { + "mdn-data": "2.27.1", + "source-map-js": "^1.2.1" + }, + "engines": { + "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0" + } + }, + "node_modules/cssstyle": { + "version": "5.3.7", + "license": "MIT", + "dependencies": { + "@asamuzakjp/css-color": "^4.1.1", + "@csstools/css-syntax-patches-for-csstree": "^1.0.21", + "css-tree": "^3.1.0", + "lru-cache": "^11.2.4" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls": { + "version": "6.0.1", + "license": "MIT", + "dependencies": { + "whatwg-mimetype": "^5.0.0", + "whatwg-url": "^15.1.0" + }, + "engines": { + "node": ">=20" + } + }, + "node_modules/data-urls/node_modules/whatwg-mimetype": { + "version": "5.0.0", + "license": "MIT", + "engines": { + "node": ">=20" + } + }, "node_modules/debug": { "version": "4.4.3", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", - "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", - "dev": true, "license": "MIT", "dependencies": { "ms": "^2.1.3" @@ -1036,20 +1749,38 @@ } } }, + "node_modules/decimal.js": { + "version": "10.6.0", + "license": "MIT" + }, "node_modules/deep-eql": { "version": "5.0.2", - "resolved": "https://registry.npmjs.org/deep-eql/-/deep-eql-5.0.2.tgz", - "integrity": "sha512-h5k/5U50IJJFpzfL6nO9jaaumfjO/f2NjK/oYB2Djzm4p9L+3T9qWpZqZ2hAbLPuuYq9wrU08WQyBTL5GbPk5Q==", "dev": true, "license": "MIT", "engines": { "node": ">=6" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/diff": { + "version": "5.2.2", + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/docmost-client": { + "resolved": "packages/docmost-client", + "link": true + }, "node_modules/dotenv": { "version": "17.4.2", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", - "integrity": "sha512-nI4U3TottKAcAD9LLud4Cb7b2QztQMUEfHbvhTH09bqXTxnSie8WnjPALV/WMCrJZ6UV/qHJ6L03OqO3LcdYZw==", "license": "BSD-2-Clause", "engines": { "node": ">=12" @@ -1058,17 +1789,73 @@ "url": "https://dotenvx.com" } }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/entities": { + "version": "7.0.1", + "license": "BSD-2-Clause", + "peer": true, + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/es-module-lexer": { "version": "1.7.0", - "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz", - "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==", "dev": true, "license": "MIT" }, + "node_modules/es-object-atoms": { + "version": "1.1.2", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/esbuild": { "version": "0.28.1", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.28.1.tgz", - "integrity": "sha512-HrJrvZv5ayxBzPfwphOoNzkzOIIlifzk0KJrGK2c8R4+LKpMtpYLQeUdjnwjWv/LZlkH2laZk+4w78pi99D4Vw==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -1109,8 +1896,6 @@ }, "node_modules/estree-walker": { "version": "3.0.3", - "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz", - "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==", "dev": true, "license": "MIT", "dependencies": { @@ -1119,8 +1904,6 @@ }, "node_modules/expect-type": { "version": "1.3.0", - "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz", - "integrity": "sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==", "dev": true, "license": "Apache-2.0", "engines": { @@ -1129,8 +1912,6 @@ }, "node_modules/fdir": { "version": "6.5.0", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", - "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", "dev": true, "license": "MIT", "engines": { @@ -1145,12 +1926,41 @@ } } }, + "node_modules/follow-redirects": { + "version": "1.16.0", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.6", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.4", + "mime-types": "^2.1.35" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/fsevents": { "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", "dev": true, - "hasInstallScript": true, "license": "MIT", "optional": true, "os": [ @@ -1160,41 +1970,331 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, + "node_modules/function-bind": { + "version": "1.1.2", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/happy-dom": { + "version": "20.10.4", + "license": "MIT", + "peer": true, + "dependencies": { + "@types/node": ">=20.0.0", + "@types/whatwg-mimetype": "^3.0.2", + "@types/ws": "^8.18.1", + "buffer-image-size": "^0.6.4", + "entities": "^7.0.1", + "whatwg-mimetype": "^3.0.0", + "ws": "^8.21.0" + }, + "engines": { + "node": ">=20.0.0" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.4", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/html-encoding-sniffer": { + "version": "6.0.0", + "license": "MIT", + "dependencies": { + "@exodus/bytes": "^1.6.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + } + }, + "node_modules/http-proxy-agent": { + "version": "7.0.2", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/http-proxy-agent/node_modules/agent-base": { + "version": "7.1.4", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/is-potential-custom-element-name": { + "version": "1.0.1", + "license": "MIT" + }, + "node_modules/isomorphic.js": { + "version": "0.2.5", + "license": "MIT", + "funding": { + "type": "GitHub Sponsors ❤", + "url": "https://github.com/sponsors/dmonad" + } + }, "node_modules/js-tokens": { "version": "9.0.1", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-9.0.1.tgz", - "integrity": "sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==", "dev": true, "license": "MIT" }, + "node_modules/jsdom": { + "version": "27.4.0", + "license": "MIT", + "dependencies": { + "@acemir/cssom": "^0.9.28", + "@asamuzakjp/dom-selector": "^6.7.6", + "@exodus/bytes": "^1.6.0", + "cssstyle": "^5.3.4", + "data-urls": "^6.0.0", + "decimal.js": "^10.6.0", + "html-encoding-sniffer": "^6.0.0", + "http-proxy-agent": "^7.0.2", + "https-proxy-agent": "^7.0.6", + "is-potential-custom-element-name": "^1.0.1", + "parse5": "^8.0.0", + "saxes": "^6.0.0", + "symbol-tree": "^3.2.4", + "tough-cookie": "^6.0.0", + "w3c-xmlserializer": "^5.0.0", + "webidl-conversions": "^8.0.0", + "whatwg-mimetype": "^4.0.0", + "whatwg-url": "^15.1.0", + "ws": "^8.18.3", + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": "^20.19.0 || ^22.12.0 || >=24.0.0" + }, + "peerDependencies": { + "canvas": "^3.0.0" + }, + "peerDependenciesMeta": { + "canvas": { + "optional": true + } + } + }, + "node_modules/jsdom/node_modules/agent-base": { + "version": "7.1.4", + "license": "MIT", + "engines": { + "node": ">= 14" + } + }, + "node_modules/jsdom/node_modules/entities": { + "version": "8.0.0", + "license": "BSD-2-Clause", + "engines": { + "node": ">=20.19.0" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/jsdom/node_modules/https-proxy-agent": { + "version": "7.0.6", + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/jsdom/node_modules/parse5": { + "version": "8.0.1", + "license": "MIT", + "dependencies": { + "entities": "^8.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/jsdom/node_modules/whatwg-mimetype": { + "version": "4.0.0", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/lib0": { + "version": "0.2.117", + "license": "MIT", + "dependencies": { + "isomorphic.js": "^0.2.4" + }, + "bin": { + "0ecdsa-generate-keypair": "bin/0ecdsa-generate-keypair.js", + "0gentesthtml": "bin/gentesthtml.js", + "0serve": "bin/0serve.js" + }, + "engines": { + "node": ">=16" + }, + "funding": { + "type": "GitHub Sponsors ❤", + "url": "https://github.com/sponsors/dmonad" + } + }, + "node_modules/linkifyjs": { + "version": "4.3.3", + "license": "MIT" + }, "node_modules/loupe": { "version": "3.2.1", - "resolved": "https://registry.npmjs.org/loupe/-/loupe-3.2.1.tgz", - "integrity": "sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==", "dev": true, "license": "MIT" }, + "node_modules/lru-cache": { + "version": "11.5.1", + "license": "BlueOak-1.0.0", + "engines": { + "node": "20 || >=22" + } + }, "node_modules/magic-string": { "version": "0.30.21", - "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", - "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", "dev": true, "license": "MIT", "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, + "node_modules/marked": { + "version": "17.0.6", + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 20" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mdn-data": { + "version": "2.27.1", + "license": "CC0-1.0" + }, + "node_modules/mime-db": { + "version": "1.52.0", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/ms": { "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, "license": "MIT" }, "node_modules/nanoid": { "version": "3.3.12", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.12.tgz", - "integrity": "sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ==", "dev": true, "funding": [ { @@ -1210,17 +2310,39 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/orderedmap": { + "version": "2.1.1", + "license": "MIT" + }, + "node_modules/parse5": { + "version": "7.3.0", + "dev": true, + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/pathe": { "version": "2.0.3", - "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", - "integrity": "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==", "dev": true, "license": "MIT" }, "node_modules/pathval": { "version": "2.0.1", - "resolved": "https://registry.npmjs.org/pathval/-/pathval-2.0.1.tgz", - "integrity": "sha512-//nshmD55c46FuFw26xV/xFAaB5HF9Xdap7HJBBnrKdAd6/GxDBaNA1870O79+9ueg61cZLSVc+OaFlfmObYVQ==", "dev": true, "license": "MIT", "engines": { @@ -1229,15 +2351,11 @@ }, "node_modules/picocolors": { "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", "dev": true, "license": "ISC" }, "node_modules/picomatch": { "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", "engines": { @@ -1249,8 +2367,6 @@ }, "node_modules/postcss": { "version": "8.5.15", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.15.tgz", - "integrity": "sha512-FfR8sjd4em2T6fb3I2MwAJU7HWVMr9zba+enmQeeWFfCbm+UOC/0X4DS8XtpUTMwWMGbjKYP7xjfNekzyGmB3A==", "dev": true, "funding": [ { @@ -1276,10 +2392,146 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/prosemirror-changeset": { + "version": "2.4.1", + "license": "MIT", + "dependencies": { + "prosemirror-transform": "^1.0.0" + } + }, + "node_modules/prosemirror-commands": { + "version": "1.7.1", + "license": "MIT", + "dependencies": { + "prosemirror-model": "^1.0.0", + "prosemirror-state": "^1.0.0", + "prosemirror-transform": "^1.10.2" + } + }, + "node_modules/prosemirror-dropcursor": { + "version": "1.8.2", + "license": "MIT", + "dependencies": { + "prosemirror-state": "^1.0.0", + "prosemirror-transform": "^1.1.0", + "prosemirror-view": "^1.1.0" + } + }, + "node_modules/prosemirror-gapcursor": { + "version": "1.4.1", + "license": "MIT", + "dependencies": { + "prosemirror-keymap": "^1.0.0", + "prosemirror-model": "^1.0.0", + "prosemirror-state": "^1.0.0", + "prosemirror-view": "^1.0.0" + } + }, + "node_modules/prosemirror-history": { + "version": "1.5.0", + "license": "MIT", + "dependencies": { + "prosemirror-state": "^1.2.2", + "prosemirror-transform": "^1.0.0", + "prosemirror-view": "^1.31.0", + "rope-sequence": "^1.3.0" + } + }, + "node_modules/prosemirror-inputrules": { + "version": "1.5.1", + "license": "MIT", + "dependencies": { + "prosemirror-state": "^1.0.0", + "prosemirror-transform": "^1.0.0" + } + }, + "node_modules/prosemirror-keymap": { + "version": "1.2.3", + "license": "MIT", + "dependencies": { + "prosemirror-state": "^1.0.0", + "w3c-keyname": "^2.2.0" + } + }, + "node_modules/prosemirror-model": { + "version": "1.25.9", + "license": "MIT", + "dependencies": { + "orderedmap": "^2.0.0" + } + }, + "node_modules/prosemirror-schema-list": { + "version": "1.5.1", + "license": "MIT", + "dependencies": { + "prosemirror-model": "^1.0.0", + "prosemirror-state": "^1.0.0", + "prosemirror-transform": "^1.7.3" + } + }, + "node_modules/prosemirror-state": { + "version": "1.4.4", + "license": "MIT", + "dependencies": { + "prosemirror-model": "^1.0.0", + "prosemirror-transform": "^1.0.0", + "prosemirror-view": "^1.27.0" + } + }, + "node_modules/prosemirror-tables": { + "version": "1.8.5", + "license": "MIT", + "dependencies": { + "prosemirror-keymap": "^1.2.3", + "prosemirror-model": "^1.25.4", + "prosemirror-state": "^1.4.4", + "prosemirror-transform": "^1.10.5", + "prosemirror-view": "^1.41.4" + } + }, + "node_modules/prosemirror-transform": { + "version": "1.12.0", + "license": "MIT", + "dependencies": { + "prosemirror-model": "^1.21.0" + } + }, + "node_modules/prosemirror-view": { + "version": "1.41.9", + "license": "MIT", + "dependencies": { + "prosemirror-model": "^1.25.8", + "prosemirror-state": "^1.0.0", + "prosemirror-transform": "^1.1.0" + } + }, + "node_modules/proxy-from-env": { + "version": "2.1.0", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/rfc6902": { + "version": "5.2.0", + "license": "MIT" + }, "node_modules/rollup": { "version": "4.62.0", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.62.0.tgz", - "integrity": "sha512-nc72Wgq62I7rtDV4izT5/aaS0zxy3kttkinf9586ApknY3jZO9NYsmtc24fUckA0X7Q2v+ML4a15pdUlV5V/jA==", "dev": true, "license": "MIT", "dependencies": { @@ -1321,18 +2573,27 @@ "fsevents": "~2.3.2" } }, + "node_modules/rope-sequence": { + "version": "1.3.4", + "license": "MIT" + }, + "node_modules/saxes": { + "version": "6.0.0", + "license": "ISC", + "dependencies": { + "xmlchars": "^2.2.0" + }, + "engines": { + "node": ">=v12.22.7" + } + }, "node_modules/siginfo": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/siginfo/-/siginfo-2.0.0.tgz", - "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==", "dev": true, "license": "ISC" }, "node_modules/source-map-js": { "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "dev": true, "license": "BSD-3-Clause", "engines": { "node": ">=0.10.0" @@ -1340,22 +2601,16 @@ }, "node_modules/stackback": { "version": "0.0.2", - "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", - "integrity": "sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==", "dev": true, "license": "MIT" }, "node_modules/std-env": { "version": "3.10.0", - "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", - "integrity": "sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==", "dev": true, "license": "MIT" }, "node_modules/strip-literal": { "version": "3.1.0", - "resolved": "https://registry.npmjs.org/strip-literal/-/strip-literal-3.1.0.tgz", - "integrity": "sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==", "dev": true, "license": "MIT", "dependencies": { @@ -1365,24 +2620,22 @@ "url": "https://github.com/sponsors/antfu" } }, + "node_modules/symbol-tree": { + "version": "3.2.4", + "license": "MIT" + }, "node_modules/tinybench": { "version": "2.9.0", - "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", - "integrity": "sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==", "dev": true, "license": "MIT" }, "node_modules/tinyexec": { "version": "0.3.2", - "resolved": "https://registry.npmjs.org/tinyexec/-/tinyexec-0.3.2.tgz", - "integrity": "sha512-KQQR9yN7R5+OSwaK0XQoj22pwHoTlgYqmUscPYoknOoWCWfj/5/ABTMRi69FrKU5ffPVh5QcFikpWJI/P1ocHA==", "dev": true, "license": "MIT" }, "node_modules/tinyglobby": { "version": "0.2.17", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.17.tgz", - "integrity": "sha512-wXR/dYpcqKmfWpEdZjiKJOwCNFndD0DMnrW/cYjVGttEkBfVgcLFHoNrlj47mjOVic9yyNu65alsgF4NQyTa2g==", "dev": true, "license": "MIT", "dependencies": { @@ -1398,8 +2651,6 @@ }, "node_modules/tinypool": { "version": "1.1.1", - "resolved": "https://registry.npmjs.org/tinypool/-/tinypool-1.1.1.tgz", - "integrity": "sha512-Zba82s87IFq9A9XmjiX5uZA/ARWDrB03OHlq+Vw1fSdt0I+4/Kutwy8BP4Y/y/aORMo61FQ0vIb5j44vSo5Pkg==", "dev": true, "license": "MIT", "engines": { @@ -1408,8 +2659,6 @@ }, "node_modules/tinyrainbow": { "version": "2.0.0", - "resolved": "https://registry.npmjs.org/tinyrainbow/-/tinyrainbow-2.0.0.tgz", - "integrity": "sha512-op4nsTR47R6p0vMUUoYl/a+ljLFVtlfaXkLQmqfLR1qHma1h/ysYk4hEXZ880bf2CYgTskvTa/e196Vd5dDQXw==", "dev": true, "license": "MIT", "engines": { @@ -1418,18 +2667,48 @@ }, "node_modules/tinyspy": { "version": "4.0.4", - "resolved": "https://registry.npmjs.org/tinyspy/-/tinyspy-4.0.4.tgz", - "integrity": "sha512-azl+t0z7pw/z958Gy9svOTuzqIk6xq+NSheJzn5MMWtWTFywIacg2wUlzKFGtt3cthx0r2SxMK0yzJOR0IES7Q==", "dev": true, "license": "MIT", "engines": { "node": ">=14.0.0" } }, + "node_modules/tldts": { + "version": "7.4.3", + "license": "MIT", + "dependencies": { + "tldts-core": "^7.4.3" + }, + "bin": { + "tldts": "bin/cli.js" + } + }, + "node_modules/tldts-core": { + "version": "7.4.3", + "license": "MIT" + }, + "node_modules/tough-cookie": { + "version": "6.0.1", + "license": "BSD-3-Clause", + "dependencies": { + "tldts": "^7.0.5" + }, + "engines": { + "node": ">=16" + } + }, + "node_modules/tr46": { + "version": "6.0.0", + "license": "MIT", + "dependencies": { + "punycode": "^2.3.1" + }, + "engines": { + "node": ">=20" + } + }, "node_modules/tsx": { "version": "4.22.4", - "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.22.4.tgz", - "integrity": "sha512-X8EX+XV4QR5xCsrgxaED954zTDfY8KqlDtskKEL0cHhyS/P8b4IFOvGDQpsC9Q1XnLq915wEfwwY/zzskCtmhg==", "dev": true, "license": "MIT", "dependencies": { @@ -1447,8 +2726,6 @@ }, "node_modules/typescript": { "version": "5.9.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", - "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", "bin": { @@ -1461,15 +2738,10 @@ }, "node_modules/undici-types": { "version": "6.21.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", - "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, "license": "MIT" }, "node_modules/vite": { "version": "7.3.5", - "resolved": "https://registry.npmjs.org/vite/-/vite-7.3.5.tgz", - "integrity": "sha512-KuOaNhcnGFN2zIPGA7wRmzF+lJA1sea7rHq17aiJ++9lzY1WWG6Jpwqwe1KNbRVPIqHmr8GLYx7jbrQcN/7/ww==", "dev": true, "license": "MIT", "dependencies": { @@ -1543,8 +2815,6 @@ }, "node_modules/vite-node": { "version": "3.2.4", - "resolved": "https://registry.npmjs.org/vite-node/-/vite-node-3.2.4.tgz", - "integrity": "sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==", "dev": true, "license": "MIT", "dependencies": { @@ -1634,8 +2904,6 @@ }, "node_modules/vite/node_modules/@esbuild/darwin-arm64": { "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", - "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", "cpu": [ "arm64" ], @@ -2008,8 +3276,6 @@ }, "node_modules/vite/node_modules/esbuild": { "version": "0.27.7", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", - "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", "dev": true, "hasInstallScript": true, "license": "MIT", @@ -2050,8 +3316,6 @@ }, "node_modules/vitest": { "version": "3.2.6", - "resolved": "https://registry.npmjs.org/vitest/-/vitest-3.2.6.tgz", - "integrity": "sha512-xejya+bT/j/+R/AGa1XOfRxLmNUlLtlwjRsFUILF+xHfzElmGcmFydy2gqqIrd62ptIEfwVMofd19uNWD9L7Nw==", "dev": true, "license": "MIT", "dependencies": { @@ -2121,10 +3385,48 @@ } } }, + "node_modules/w3c-keyname": { + "version": "2.2.8", + "license": "MIT" + }, + "node_modules/w3c-xmlserializer": { + "version": "5.0.0", + "license": "MIT", + "dependencies": { + "xml-name-validator": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/webidl-conversions": { + "version": "8.0.1", + "license": "BSD-2-Clause", + "engines": { + "node": ">=20" + } + }, + "node_modules/whatwg-mimetype": { + "version": "3.0.0", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=12" + } + }, + "node_modules/whatwg-url": { + "version": "15.1.0", + "license": "MIT", + "dependencies": { + "tr46": "^6.0.0", + "webidl-conversions": "^8.0.0" + }, + "engines": { + "node": ">=20" + } + }, "node_modules/why-is-node-running": { "version": "2.3.0", - "resolved": "https://registry.npmjs.org/why-is-node-running/-/why-is-node-running-2.3.0.tgz", - "integrity": "sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==", "dev": true, "license": "MIT", "dependencies": { @@ -2138,14 +3440,137 @@ "node": ">=8" } }, + "node_modules/ws": { + "version": "8.21.0", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xml-name-validator": { + "version": "5.0.0", + "license": "Apache-2.0", + "engines": { + "node": ">=18" + } + }, + "node_modules/xmlchars": { + "version": "2.2.0", + "license": "MIT" + }, + "node_modules/y-prosemirror": { + "version": "1.3.7", + "license": "MIT", + "peer": true, + "dependencies": { + "lib0": "^0.2.109" + }, + "engines": { + "node": ">=16.0.0", + "npm": ">=8.0.0" + }, + "funding": { + "type": "GitHub Sponsors ❤", + "url": "https://github.com/sponsors/dmonad" + }, + "peerDependencies": { + "prosemirror-model": "^1.7.1", + "prosemirror-state": "^1.2.3", + "prosemirror-view": "^1.9.10", + "y-protocols": "^1.0.1", + "yjs": "^13.5.38" + } + }, + "node_modules/y-protocols": { + "version": "1.0.7", + "license": "MIT", + "peer": true, + "dependencies": { + "lib0": "^0.2.85" + }, + "engines": { + "node": ">=16.0.0", + "npm": ">=8.0.0" + }, + "funding": { + "type": "GitHub Sponsors ❤", + "url": "https://github.com/sponsors/dmonad" + }, + "peerDependencies": { + "yjs": "^13.0.0" + } + }, + "node_modules/yjs": { + "version": "13.6.31", + "license": "MIT", + "dependencies": { + "lib0": "^0.2.99" + }, + "engines": { + "node": ">=16.0.0", + "npm": ">=8.0.0" + }, + "funding": { + "type": "GitHub Sponsors ❤", + "url": "https://github.com/sponsors/dmonad" + } + }, "node_modules/zod": { "version": "3.25.76", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", - "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", "funding": { "url": "https://github.com/sponsors/colinhacks" } + }, + "packages/docmost-client": { + "version": "0.1.0", + "dependencies": { + "@fellow/prosemirror-recreate-transform": "^1.2.3", + "@hocuspocus/provider": "^3.4.4", + "@hocuspocus/transformer": "^3.4.4", + "@tiptap/core": "^3.18.0", + "@tiptap/extension-highlight": "^3.26.1", + "@tiptap/extension-image": "^3.18.0", + "@tiptap/extension-subscript": "^3.26.1", + "@tiptap/extension-superscript": "^3.26.1", + "@tiptap/extension-task-item": "^3.26.1", + "@tiptap/extension-task-list": "^3.26.1", + "@tiptap/html": "^3.18.0", + "@tiptap/pm": "^3.18.0", + "@tiptap/starter-kit": "^3.18.0", + "axios": "^1.6.0", + "form-data": "^4.0.0", + "jsdom": "^27.4.0", + "marked": "^17.0.1", + "ws": "^8.19.0", + "yjs": "^13.6.29" + }, + "devDependencies": { + "@types/jsdom": "^27.0.0", + "@types/node": "^20.0.0", + "@types/ws": "^8.5.10", + "typescript": "^5.0.0" + } + }, + "packages/docmost-client/node_modules/@types/node": { + "version": "20.19.43", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } } } } diff --git a/package.json b/package.json index 56a7bf6..87bcbc2 100644 --- a/package.json +++ b/package.json @@ -3,19 +3,21 @@ "version": "0.1.0", "private": true, "type": "module", - "description": "Bidirectional sync daemon between Docmost articles and a local Markdown git vault.", + "description": "Bidirectional sync daemon between Docmost articles and a local Markdown git vault (git is the state store). See SPEC.md.", "license": "MIT", - "engines": { - "node": ">=20" - }, + "workspaces": ["packages/*"], + "engines": { "node": ">=20" }, "scripts": { - "build": "tsc", + "build": "npm run build -w docmost-client && tsc", "start": "node build/index.js", "dev": "tsx watch src/index.ts", "test": "vitest run", - "test:watch": "vitest" + "test:watch": "vitest", + "roundtrip": "node build/roundtrip.js", + "pull": "node build/pull.js" }, "dependencies": { + "docmost-client": "*", "dotenv": "17.4.2", "zod": "3.25.76" }, diff --git a/packages/docmost-client/package.json b/packages/docmost-client/package.json new file mode 100644 index 0000000..fe6cacd --- /dev/null +++ b/packages/docmost-client/package.json @@ -0,0 +1,44 @@ +{ + "name": "docmost-client", + "version": "0.1.0", + "private": true, + "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "build": "tsc -p tsconfig.json" + }, + "dependencies": { + "@fellow/prosemirror-recreate-transform": "^1.2.3", + "@hocuspocus/provider": "^3.4.4", + "@hocuspocus/transformer": "^3.4.4", + "@tiptap/core": "^3.18.0", + "@tiptap/extension-highlight": "^3.26.1", + "@tiptap/extension-image": "^3.18.0", + "@tiptap/extension-subscript": "^3.26.1", + "@tiptap/extension-superscript": "^3.26.1", + "@tiptap/extension-task-item": "^3.26.1", + "@tiptap/extension-task-list": "^3.26.1", + "@tiptap/html": "^3.18.0", + "@tiptap/pm": "^3.18.0", + "@tiptap/starter-kit": "^3.18.0", + "axios": "^1.6.0", + "form-data": "^4.0.0", + "jsdom": "^27.4.0", + "marked": "^17.0.1", + "ws": "^8.19.0", + "yjs": "^13.6.29" + }, + "devDependencies": { + "@types/jsdom": "^27.0.0", + "@types/node": "^20.0.0", + "@types/ws": "^8.5.10", + "typescript": "^5.0.0" + } +} diff --git a/packages/docmost-client/src/client.ts b/packages/docmost-client/src/client.ts new file mode 100644 index 0000000..109c1be --- /dev/null +++ b/packages/docmost-client/src/client.ts @@ -0,0 +1,2770 @@ +import FormData from "form-data"; +import axios, { AxiosInstance } from "axios"; +import { readFileSync, statSync } from "fs"; +import { basename, extname } from "path"; +import { + filterWorkspace, + filterSpace, + filterPage, + filterComment, + filterSearchResult, +} from "./lib/filters.js"; +import { HocuspocusProvider } from "@hocuspocus/provider"; +import { TiptapTransformer } from "@hocuspocus/transformer"; +import * as Y from "yjs"; +import WebSocket from "ws"; +import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js"; +import { + updatePageContentRealtime, + replacePageContent, + markdownToProseMirror, + mutatePageContent, + buildCollabWsUrl, + assertYjsEncodable, +} from "./lib/collaboration.js"; +import { docmostExtensions } from "./lib/docmost-schema.js"; +import { + serializeDocmostMarkdown, + parseDocmostMarkdown, + serializeDocmostMarkdownBody, +} from "./lib/markdown-document.js"; +import { + replaceNodeById, + deleteNodeById, + insertNodeRelative, + buildOutline, + getNodeByRef, + readTable, + insertTableRow, + deleteTableRow, + updateTableCell, +} from "./lib/node-ops.js"; +import { withPageLock } from "./lib/page-lock.js"; +import { applyTextEdits, TextEdit, TextEditResult } from "./lib/json-edit.js"; +import { getCollabToken, performLogin } from "./lib/auth-utils.js"; +import { diffDocs } from "./lib/diff.js"; +import { + blockText, + walk, + getList, + insertMarkerAfter, + setCalloutRange, + noteItem, + mdToInlineNodes, + commentsToFootnotes, +} from "./lib/transforms.js"; +import vm from "node:vm"; + +export class DocmostClient { + private client: AxiosInstance; + private token: string | null = null; + private apiUrl: string; + private email: string; + private password: string; + // In-flight login dedup: when the token expires, the 401 interceptor, + // ensureAuthenticated, getCollabTokenWithReauth and the two multipart retries + // can all call login() at once. Memoizing a single promise collapses that + // thundering herd into ONE /auth/login request that everyone awaits. + private loginPromise: Promise<void> | null = null; + + constructor(baseURL: string, email: string, password: string) { + this.apiUrl = baseURL; + this.email = email; + this.password = password; + this.client = axios.create({ + baseURL, + // Default request timeout so a hung connection cannot wedge a per-page + // lock or block the server indefinitely. Multipart uploads override this + // with a longer per-request timeout. + timeout: 30000, + headers: { + "Content-Type": "application/json", + }, + }); + + // Re-authenticate transparently on a 401/403 once: the JWT authToken can + // expire while the server is long-running, after which every cached-token + // request would otherwise fail until a manual restart. On such a response, + // clear the stale token, perform a fresh login, and replay the original + // request exactly once (guarded by config._retry to avoid infinite loops; + // the login request itself is never retried). + this.client.interceptors.response.use( + (response) => response, + async (error) => { + const config = error.config; + const status = error.response?.status; + const isAuthError = status === 401 || status === 403; + const isLoginRequest = + typeof config?.url === "string" && config.url.includes("/auth/login"); + + if (config && isAuthError && !config._retry && !isLoginRequest) { + config._retry = true; + // Drop the stale token + Authorization header before re-login. + this.token = null; + delete this.client.defaults.headers.common["Authorization"]; + try { + await this.login(); + } catch (loginError) { + // Re-login failed: surface the original error to the caller. + return Promise.reject(error); + } + // Re-issue the original request with the freshly minted Bearer token. + // Read it from the default header that login() just set, not from + // this.token, to avoid a theoretical "Bearer null" if this.token was + // cleared between login() resolving and this point. + config.headers = config.headers || {}; + config.headers["Authorization"] = + this.client.defaults.headers.common["Authorization"]; + return this.client.request(config); + } + + return Promise.reject(error); + }, + ); + } + + /** Application base URL (API URL without the /api suffix). */ + get appUrl(): string { + return this.apiUrl.replace(/\/api\/?$/, ""); + } + + async login() { + // Reuse an in-flight login if one is already running so concurrent callers + // share a single /auth/login request instead of each issuing their own. + if (!this.loginPromise) { + this.loginPromise = performLogin(this.apiUrl, this.email, this.password) + .then((token) => { + this.token = token; + this.client.defaults.headers.common["Authorization"] = + `Bearer ${token}`; + }) + .finally(() => { + this.loginPromise = null; + }); + } + return this.loginPromise; + } + + async ensureAuthenticated() { + if (!this.token) { + await this.login(); + } + } + + /** + * Fetch a collaboration token, transparently re-authenticating once on a + * 401/403. getCollabToken() uses bare axios internally, so it is NOT covered + * by this.client's response interceptor; this helper replicates that + * behaviour for collab-token requests: ensure a token, try once, and on an + * expired-token auth error perform a fresh login and retry exactly once. + */ + private async getCollabTokenWithReauth(): Promise<string> { + await this.ensureAuthenticated(); + try { + return await getCollabToken(this.apiUrl, this.token!); + } catch (e) { + // getCollabToken wraps the AxiosError in a plain Error but attaches the + // HTTP status as `.status`, so detect an auth failure via either the raw + // AxiosError shape OR the attached status. + const axiosStatus = axios.isAxiosError(e) ? e.response?.status : undefined; + const attachedStatus = (e as any)?.status; + const isAuthError = + axiosStatus === 401 || + axiosStatus === 403 || + attachedStatus === 401 || + attachedStatus === 403; + if (isAuthError) { + await this.login(); + return await getCollabToken(this.apiUrl, this.token!); + } + throw e; + } + } + + /** + * Connect to the collaboration websocket, read the live doc, apply + * `transform`, write the result, and wait for the server to persist it — + * WITHOUT acquiring the per-page lock. + * + * This mirrors collaboration.mutatePageContent EXCEPT that it does not call + * withPageLock. It exists solely so replaceImage can hold ONE withPageLock + * across its scan -> upload -> write sequence: the per-page mutex is NOT + * reentrant, so calling the normal (self-locking) mutatePageContent inside an + * outer withPageLock for the same pageId would deadlock. The caller MUST hold + * the page lock for the whole operation; this helper assumes that invariant. + * + * `transform` receives the live ProseMirror doc and returns the NEW full doc + * to write, or `null` to abort with no write. Errors thrown by `transform` + * propagate to the caller. + */ + private mutateLiveContentUnlocked( + pageId: string, + collabToken: string, + transform: (liveDoc: any) => any | null, + ): Promise<any> { + const CONNECT_TIMEOUT_MS = 25000; + const PERSIST_TIMEOUT_MS = 20000; + const ydoc = new Y.Doc(); + const wsUrl = buildCollabWsUrl(this.apiUrl); + + return new Promise<any>((resolve, reject) => { + let provider: HocuspocusProvider | undefined; + let applied = false; // onSynced may fire again on reconnect — apply once. + let settled = false; + let connectionLost = false; + let connectTimer: ReturnType<typeof setTimeout> | undefined; + let persistTimer: ReturnType<typeof setTimeout> | undefined; + let unsyncedHandler: ((data: { number: number }) => void) | undefined; + let lastWrittenDoc: any; + + const cleanup = () => { + if (connectTimer) clearTimeout(connectTimer); + if (persistTimer) clearTimeout(persistTimer); + if (provider) { + if (unsyncedHandler) { + try { + provider.off("unsyncedChanges", unsyncedHandler); + } catch (err) {} + } + try { + provider.destroy(); + } catch (err) {} + } + }; + + const finish = (err: Error | null, value?: any) => { + if (settled) return; + settled = true; + cleanup(); + if (err) reject(err); + else resolve(value); + }; + + connectTimer = setTimeout(() => { + finish(new Error("Connection timeout to collaboration server")); + }, CONNECT_TIMEOUT_MS); + + const waitForPersistence = () => { + if (settled) return; + if (!provider) { + finish(new Error("collab provider gone before persistence")); + return; + } + if (provider.unsyncedChanges === 0) { + finish(null, lastWrittenDoc); + return; + } + persistTimer = setTimeout(() => { + finish( + new Error( + "Timeout waiting for collaboration server to persist the update", + ), + ); + }, PERSIST_TIMEOUT_MS); + unsyncedHandler = (data: { number: number }) => { + if (data.number === 0 && !connectionLost) { + finish(null, lastWrittenDoc); + } + }; + provider.on("unsyncedChanges", unsyncedHandler); + }; + + provider = new HocuspocusProvider({ + url: wsUrl, + name: `page.${pageId}`, + document: ydoc, + token: collabToken, + // @ts-ignore - Required for Node.js environment + WebSocketPolyfill: WebSocket, + onDisconnect: () => { + connectionLost = true; + finish( + new Error( + "Collaboration connection closed before the update was persisted/synced", + ), + ); + }, + onClose: () => { + connectionLost = true; + finish( + new Error( + "Collaboration connection closed before the update was persisted/synced", + ), + ); + }, + onSynced: () => { + if (applied || settled) return; + applied = true; + + // CRITICAL: keep everything between reading and writing the live doc + // synchronous (no await) so no remote update can interleave. + let newDoc: any; + try { + let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); + if ( + !liveDoc || + typeof liveDoc !== "object" || + !Array.isArray(liveDoc.content) + ) { + liveDoc = { type: "doc", content: [] }; + } + + newDoc = transform(liveDoc); + + if (newDoc == null) { + // Transform aborted — write nothing, return the live doc. + lastWrittenDoc = liveDoc; + finish(null, liveDoc); + return; + } + + const tempDoc = TiptapTransformer.toYdoc( + newDoc, + "default", + docmostExtensions, + ); + const fragment = ydoc.getXmlFragment("default"); + ydoc.transact(() => { + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); + }); + } catch (e) { + finish(e instanceof Error ? e : new Error(String(e))); + return; + } + + lastWrittenDoc = newDoc; + waitForPersistence(); + }, + onAuthenticationFailed: () => { + finish( + new Error("Authentication failed for collaboration connection"), + ); + }, + }); + }); + } + + /** + * Generic pagination handler for Docmost API endpoints + */ + async paginateAll<T = any>( + endpoint: string, + basePayload: Record<string, any> = {}, + limit: number = 100, + ): Promise<T[]> { + await this.ensureAuthenticated(); + + const clampedLimit = Math.max(1, Math.min(100, limit)); + + // Hard ceiling on the number of pages to fetch: guards against a server + // that returns a perpetually-true hasNextPage (which would otherwise loop + // forever and accumulate duplicates). + const MAX_PAGES = 50; + + let page = 1; + let allItems: T[] = []; + let hasNextPage = true; + + while (hasNextPage && page <= MAX_PAGES) { + const response = await this.client.post(endpoint, { + ...basePayload, + limit: clampedLimit, + page, + }); + + const data = response.data; + const items = data.data?.items || data.items || []; + const meta = data.data?.meta || data.meta; + + allItems = allItems.concat(items); + + // Stop if the page is empty or shorter than the requested size: a full + // page worth of items is the only situation where another page can exist, + // so this defends against a stuck hasNextPage flag in addition to it. + if (items.length === 0 || items.length < clampedLimit) { + break; + } + + hasNextPage = meta?.hasNextPage || false; + page++; + } + + // If the loop stopped because it hit the MAX_PAGES ceiling while the server + // still reported more results (hasNextPage true and the last page was + // full), the result set is truncated — warn so the caller is not silently + // handed an incomplete list. + if (hasNextPage && page > MAX_PAGES) { + console.warn( + `paginateAll: results from "${endpoint}" truncated at the ${MAX_PAGES}-page cap; more pages exist on the server`, + ); + } + + return allItems; + } + + async getWorkspace() { + await this.ensureAuthenticated(); + const response = await this.client.post("/workspace/info", {}); + return { + data: filterWorkspace(response.data?.data ?? response.data), + success: response.data.success, + }; + } + + async getSpaces() { + const spaces = await this.paginateAll("/spaces", {}); + return spaces.map((space) => filterSpace(space)); + } + + /** + * List most recent pages (bounded). Fetching the whole space can exceed + * MCP response/time limits on large instances, so a single bounded page + * of results is returned (default 50, max 100). + */ + async listPages(spaceId?: string, limit: number = 50) { + await this.ensureAuthenticated(); + const clampedLimit = Math.max(1, Math.min(100, limit)); + const payload: Record<string, any> = { limit: clampedLimit, page: 1 }; + if (spaceId) payload.spaceId = spaceId; + const response = await this.client.post("/pages/recent", payload); + const data = response.data; + const items = data.data?.items || data.items || []; + return items.map((page: any) => filterPage(page)); + } + + /** + * List sidebar pages for a space. With no pageId the request returns the + * space ROOT pages; with a pageId it returns the direct CHILDREN of that + * page. pageId is therefore optional and is only included in the POST body + * when provided (an empty/undefined pageId would otherwise change the + * semantics on the server). + */ + async listSidebarPages(spaceId: string, pageId?: string) { + await this.ensureAuthenticated(); + + // Paginate: the endpoint returns server-paged children, so posting only + // { page: 1 } silently dropped every child beyond the first page. Loop on + // meta.hasNextPage (with a MAX_PAGES ceiling like paginateAll, guarding + // against a stuck hasNextPage flag) and accumulate all children. + const MAX_PAGES = 50; + let page = 1; + let allItems: any[] = []; + let hasNextPage = true; + + while (hasNextPage && page <= MAX_PAGES) { + // Only send pageId when scoping to a page's children; omit it for roots. + const payload: Record<string, any> = { spaceId, page }; + if (pageId) payload.pageId = pageId; + + const response = await this.client.post("/pages/sidebar-pages", payload); + const data = response.data?.data ?? response.data; + const items = data?.items || []; + allItems = allItems.concat(items); + + hasNextPage = data?.meta?.hasNextPage || false; + page++; + } + + return allItems; + } + + /** + * Enumerate EVERY page in a space (or in a subtree, when rootPageId is given) + * by walking the sidebar-pages tree. + * + * Starting set: the children of rootPageId when provided, otherwise the + * space root pages. From there it does an iterative breadth-first walk: each + * node is collected, and when node.hasChildren is true its direct children + * are fetched via listSidebarPages(spaceId, node.id) and enqueued. + * + * This replaces the old "/pages/recent" enumeration, which is a bounded + * recent-activity feed (~5000 cap) and therefore misses comments on older + * pages that were never recently touched. + * + * Safeguards: a `visited` Set of page ids prevents re-processing a node + * (cycles / duplicate references), and a hard node cap bounds pathological + * trees so the walk always terminates. + */ + private async enumerateSpacePages( + spaceId: string, + rootPageId?: string, + ): Promise<any[]> { + const MAX_NODES = 10000; + const result: any[] = []; + const visited = new Set<string>(); + + // Seed the queue with the starting level (subtree children or roots). + const queue: any[] = await this.listSidebarPages(spaceId, rootPageId); + + while (queue.length > 0 && result.length < MAX_NODES) { + const node = queue.shift(); + if (!node || typeof node !== "object" || !node.id) continue; + + // Skip already-seen ids to guard against cycles / duplicate references. + if (visited.has(node.id)) continue; + visited.add(node.id); + + result.push(node); + + if (node.hasChildren) { + try { + const children = await this.listSidebarPages(spaceId, node.id); + for (const child of children) queue.push(child); + } catch (e: any) { + // A failure fetching one node's children must not abort the whole + // walk: skip this branch and keep enumerating the rest. + } + } + } + + return result; + } + + /** Raw page info including the ProseMirror JSON content and slugId. */ + async getPageRaw(pageId: string) { + await this.ensureAuthenticated(); + const response = await this.client.post("/pages/info", { pageId }); + return response.data?.data ?? response.data; + } + + async getPage(pageId: string) { + await this.ensureAuthenticated(); + const resultData = await this.getPageRaw(pageId); + + let content = resultData.content + ? convertProseMirrorToMarkdown(resultData.content) + : ""; + + // Always fetch subpages to provide context to the agent + let subpages: any[] = []; + try { + subpages = await this.listSidebarPages(resultData.spaceId, pageId); + } catch (e: any) { + console.warn("Failed to fetch subpages:", e); + } + + // Resolve subpages if the placeholder exists + if (content && content.includes("{{SUBPAGES}}")) { + if (subpages && subpages.length > 0) { + const list = subpages + .map((p: any) => `- [${p.title}](page:${p.id})`) + .join("\n"); + content = content.replace("{{SUBPAGES}}", `### Subpages\n${list}`); + } else { + content = content.replace("{{SUBPAGES}}", ""); + } + } + + return { + data: filterPage(resultData, content, subpages), + success: true, + }; + } + + /** Page info + raw ProseMirror JSON content (lossless representation). */ + async getPageJson(pageId: string) { + const data = await this.getPageRaw(pageId); + return { + id: data.id, + slugId: data.slugId, + title: data.title, + parentPageId: data.parentPageId, + spaceId: data.spaceId, + updatedAt: data.updatedAt, + content: data.content || { type: "doc", content: [] }, + }; + } + + /** + * Compact outline of a page's top-level blocks (no full document body). + * Cheap way to locate sections/tables and grab block ids before drilling in + * with get_node / patch_node / insert_node. + */ + async getOutline(pageId: string) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + return { + pageId, + slugId: data.slugId, + title: data.title, + outline: buildOutline(data.content ?? { type: "doc", content: [] }), + }; + } + + /** + * Fetch a single node's full ProseMirror subtree (lossless) by reference: + * a block id (headings/paragraphs/callouts/images), or `#<index>` to select + * a top-level block by its outline index (the only way to reach tables/rows/ + * cells, which carry no id). + */ + async getNode(pageId: string, nodeId: string) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + const hit = getNodeByRef( + data.content ?? { type: "doc", content: [] }, + nodeId, + ); + if (!hit) { + throw new Error( + `get_node: no node found for "${nodeId}" on page ${pageId} (use a block id from get_outline, or "#<index>" for a top-level block such as a table)`, + ); + } + return { + pageId, + ref: nodeId, + path: hit.path, + type: hit.type, + node: hit.node, + }; + } + + /** + * Read a table as a matrix. `tableRef` is `#<index>` (from get_outline) or a + * block id of any node inside the table. Returns the cell texts plus a + * parallel cellIds matrix (each cell's first paragraph id, or null) so a + * caller can patch_node a cell for rich-formatted edits. Throws when no table + * resolves for the reference. + */ + async getTable(pageId: string, tableRef: string) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + const t = readTable(data.content ?? { type: "doc", content: [] }, tableRef); + if (!t) { + throw new Error( + `table_get: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`, + ); + } + return { + pageId, + table: tableRef, + rows: t.rows, + cols: t.cols, + path: t.path, + cells: t.cells, + cellIds: t.cellIds, + }; + } + + /** + * Insert a row of plain-text cells into a table on the LIVE collab document. + * `tableRef` is `#<index>` or a block id inside the target table. `cells` is + * padded to the table's column count (more cells than columns throws); `index` + * is a 0-based insert position (omit/out-of-range to append). Throws when no + * table resolves for the reference. + */ + async tableInsertRow( + pageId: string, + tableRef: string, + cells: string[], + index?: number, + ) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + + // Track insertion in an outer var, reset per-transform, so a collab retry + // recomputes it cleanly (mirrors insertNode's pattern). + let inserted = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + inserted = false; + const { doc: nd, inserted: ins } = insertTableRow( + liveDoc, + tableRef, + cells, + index, + ); + inserted = ins; + if (!inserted) return null; // table not found -> skip the write entirely + return nd; + }); + + if (!inserted) { + throw new Error( + `table_insert_row: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`, + ); + } + return { success: true, table: tableRef, inserted: true }; + } + + /** + * Delete the row at 0-based `index` from a table on the LIVE collab document. + * `tableRef` is `#<index>` or a block id inside the target table. The helper's + * out-of-range and last-row errors propagate; a missing table throws here. + */ + async tableDeleteRow(pageId: string, tableRef: string, index: number) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + + let deleted = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + deleted = false; + const { doc: nd, deleted: del } = deleteTableRow(liveDoc, tableRef, index); + deleted = del; + if (!deleted) return null; // table not found -> skip the write entirely + return nd; + }); + + if (!deleted) { + throw new Error( + `table_delete_row: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`, + ); + } + return { success: true, table: tableRef, deleted: true }; + } + + /** + * Set the plain-text content of cell `[row, col]` (0-based) in a table on the + * LIVE collab document, replacing the cell's content with a single text + * paragraph (the cell's first-paragraph id is preserved). `tableRef` is + * `#<index>` or a block id inside the target table. The helper's out-of-range + * error propagates; a missing table throws here. + */ + async tableUpdateCell( + pageId: string, + tableRef: string, + row: number, + col: number, + text: string, + ) { + await this.ensureAuthenticated(); + const collabToken = await this.getCollabTokenWithReauth(); + + let updated = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + updated = false; + const { doc: nd, updated: upd } = updateTableCell( + liveDoc, + tableRef, + row, + col, + text, + ); + updated = upd; + if (!updated) return null; // table not found -> skip the write entirely + return nd; + }); + + if (!updated) { + throw new Error( + `table_update_cell: no table found for "${tableRef}" on page ${pageId} (use "#<index>" from get_outline, or a block id inside the table)`, + ); + } + return { success: true, table: tableRef, row, col }; + } + + /** + * Create a new page with title and content. + * Uses the /pages/import workaround (the only endpoint accepting content), + * then moves the page and restores the exact title: the import endpoint + * derives the title from the FILENAME and replaces spaces with + * underscores, so we explicitly re-set it via /pages/update afterwards. + */ + async createPage( + title: string, + content: string, + spaceId: string, + parentPageId?: string, + ) { + await this.ensureAuthenticated(); + + if (parentPageId) { + try { + await this.getPage(parentPageId); + } catch (e) { + throw new Error(`Parent page with ID ${parentPageId} not found.`); + } + } + + // 1. Create content via Import (using multipart/form-data). + // Build a FRESH FormData per send attempt: a FormData body is a single-use + // stream consumed on the first send, so it cannot be replayed by + // this.client's response interceptor (replay fails with 'socket hang up'). + // Multipart re-auth is therefore done here with bare axios and an explicit + // one-shot 401/403 retry that rebuilds the body. + const fileContent = Buffer.from(content, "utf-8"); + const buildForm = () => { + const form = new FormData(); + form.append("spaceId", spaceId); + form.append("file", fileContent, { + filename: `${title || "import"}.md`, + contentType: "text/markdown", + }); + return form; + }; + + const importUrl = `${this.apiUrl}/pages/import`; + let response; + try { + // Call buildForm() ONCE per attempt and reuse the instance for both + // getHeaders() and the body so the Content-Type boundary matches the body. + const form = buildForm(); + // Read the Authorization header from this.client's defaults (set by + // login(), only ever deleted — never set to null) instead of building + // `Bearer ${this.token}`: a concurrent JSON 401 can null this.token + // mid-flight, which would otherwise produce a literal "Bearer null". + // ensureAuthenticated() above guarantees login() ran, so the default + // header exists here. + response = await axios.post(importUrl, form, { + headers: { + ...form.getHeaders(), + Authorization: this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } catch (error) { + // On an expired-token auth error, re-login and retry exactly once with a + // freshly-rebuilt FormData (the previous one was already consumed). + if ( + axios.isAxiosError(error) && + (error.response?.status === 401 || error.response?.status === 403) + ) { + await this.login(); + const form2 = buildForm(); + response = await axios.post(importUrl, form2, { + headers: { + ...form2.getHeaders(), + Authorization: + this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } else { + throw error; + } + } + const newPageId = (response.data?.data ?? response.data).id; + + // 2. Move to parent if needed + if (parentPageId) { + await this.movePage(newPageId, parentPageId); + } + + // 3. Restore the exact title (import mangles spaces into underscores) + if (title) { + await this.client.post("/pages/update", { pageId: newPageId, title }); + } + + return this.getPage(newPageId); + } + + /** + * Update a page's content from markdown and optionally its title. + * NOTE: full re-import — block ids regenerate. For surgical changes + * use editPageText / updatePageJson instead. + */ + async updatePage(pageId: string, content: string, title?: string) { + await this.ensureAuthenticated(); + + if (title) { + await this.client.post("/pages/update", { pageId, title }); + } + + let collabToken = ""; + try { + collabToken = await this.getCollabTokenWithReauth(); + await updatePageContentRealtime(pageId, content, collabToken, this.apiUrl); + } catch (error: any) { + // Verbose diagnostics (incl. anything that could expose a token prefix) + // are gated behind DEBUG; the thrown Error below carries no token data. + if (process.env.DEBUG) { + console.error( + "Failed to update page content via realtime collaboration:", + error, + ); + const tokenPreview = collabToken + ? collabToken.substring(0, 15) + "..." + : "null"; + console.error(`Collab token preview: ${tokenPreview}`); + } + throw new Error(`Failed to update page content: ${error.message}`); + } + + return { + success: true, + modified: true, + message: "Page updated successfully.", + pageId: pageId, + }; + } + + /** + * Validate a URL string against a scheme allowlist for a given context. + * + * The markdown link path enforces safe schemes via TipTap, but the raw + * JSON path (updatePageJson) bypasses that — so this is the sanitization + * choke point for ProseMirror JSON written directly by the caller. + * + * - "link": reject javascript:, vbscript:, data: (any scheme that can + * execute or smuggle script when the href is clicked). + * - "src": allow only http(s):, mailto:, /api/files paths, or a + * scheme-less relative/absolute path; reject + * javascript:/vbscript:/data:/file:. + */ + private isSafeUrl(url: unknown, context: "link" | "src"): boolean { + if (typeof url !== "string") return false; + const trimmed = url.trim(); + if (trimmed === "") return true; // empty href/src is harmless + + // Extract a leading "scheme:" if present. A scheme must start with a + // letter and contain only letters/digits/+/-/. before the colon. Strip + // whitespace and ASCII control chars first so a tab/newline embedded in + // the scheme cannot smuggle a dangerous scheme past the check. + const cleaned = trimmed.replace(/[\s\x00-\x1f]+/g, ""); + const schemeMatch = /^([a-zA-Z][a-zA-Z0-9+.-]*):/.exec(cleaned); + const scheme = schemeMatch ? schemeMatch[1].toLowerCase() : null; + + const dangerous = new Set(["javascript", "vbscript", "data", "file"]); + + if (context === "link") { + if (scheme === null) return true; // relative/anchor link is fine + // For links, data: is also blocked (can carry script payloads). + return !new Set(["javascript", "vbscript", "data"]).has(scheme); + } + + // context === "src" + if (scheme === null) return true; // relative/absolute path (incl. /api/files) + if (dangerous.has(scheme)) return false; + return scheme === "http" || scheme === "https" || scheme === "mailto"; + } + + /** + * Recursively walk a ProseMirror doc and reject any unsafe URL on a link + * mark href or on a media node's src/url. Media nodes covered: image, + * attachment, video, plus embed (rendered as an iframe), youtube, drawio + * and excalidraw — all of which carry a user-controlled URL that Docmost + * renders. Throws a clear error on the first violation. A max-depth guard + * turns an over-deep document into a clean error instead of a RangeError + * stack overflow. + */ + private validateDocUrls(node: any, depth: number = 0): void { + const MAX_DEPTH = 200; + if (depth > MAX_DEPTH) { + throw new Error( + `document nesting exceeds the maximum depth of ${MAX_DEPTH}`, + ); + } + if (!node || typeof node !== "object") return; + + // Link marks on text nodes: validate the href. + if (Array.isArray(node.marks)) { + for (const mark of node.marks) { + if (mark && mark.type === "link" && mark.attrs) { + if (!this.isSafeUrl(mark.attrs.href, "link")) { + throw new Error( + `unsafe link href rejected: "${mark.attrs.href}"`, + ); + } + } + } + } + + // Media nodes: validate src/url against the stricter src allowlist. + // embed renders as an iframe (highest risk); youtube/drawio/excalidraw + // likewise carry a user-controlled URL Docmost renders, so they get the + // same scheme check as image/attachment/video. + if ( + node.type === "image" || + node.type === "attachment" || + node.type === "video" || + node.type === "embed" || + node.type === "youtube" || + node.type === "drawio" || + node.type === "excalidraw" || + node.type === "audio" || + node.type === "pdf" + ) { + const attrs = node.attrs || {}; + for (const key of ["src", "url"]) { + if (attrs[key] != null && !this.isSafeUrl(attrs[key], "src")) { + throw new Error( + `unsafe ${node.type} ${key} rejected: "${attrs[key]}"`, + ); + } + } + } + + if (Array.isArray(node.content)) { + for (const child of node.content) { + this.validateDocUrls(child, depth + 1); + } + } + } + + /** + * Recursively validate the STRUCTURE of a ProseMirror node (reuses the + * recursion shape of validateDocUrls). Every node must be an object with a + * string `type`; when present, `content` must be an array, `marks` must be + * an array of objects each with a string `type`, and a text node's `text` + * must be a string. Throws a clear "invalid ProseMirror document" error on + * the first violation. A max-depth guard turns an over-deep document into a + * clean error instead of a RangeError stack overflow. + */ + private validateDocStructure(node: any, depth: number = 0): void { + const MAX_DEPTH = 200; + if (depth > MAX_DEPTH) { + throw new Error( + `invalid ProseMirror document: nesting exceeds the maximum depth of ${MAX_DEPTH}`, + ); + } + if (!node || typeof node !== "object" || typeof node.type !== "string") { + throw new Error( + "invalid ProseMirror document: every node must be an object with a string `type`", + ); + } + if ("text" in node && node.type === "text" && typeof node.text !== "string") { + throw new Error( + "invalid ProseMirror document: a text node must have a string `text`", + ); + } + if (node.marks !== undefined) { + if (!Array.isArray(node.marks)) { + throw new Error( + "invalid ProseMirror document: `marks` must be an array", + ); + } + for (const mark of node.marks) { + if (!mark || typeof mark !== "object" || typeof mark.type !== "string") { + throw new Error( + "invalid ProseMirror document: every mark must be an object with a string `type`", + ); + } + } + } + if (node.content !== undefined) { + if (!Array.isArray(node.content)) { + throw new Error( + "invalid ProseMirror document: `content` must be an array when present", + ); + } + for (const child of node.content) { + this.validateDocStructure(child, depth + 1); + } + } + } + + /** + * Replace page content with a raw ProseMirror JSON document (lossless) and/or + * update its title. Both `doc` and `title` are optional, but at least one must + * be supplied: + * - `doc` provided -> validate + full-overwrite the body (and update the + * title too when `title` is also given). + * - `doc` omitted, `title` given -> title-only update; the body is NOT + * touched/resent (no collab write happens). + * - neither given -> throws (nothing to update). + */ + async updatePageJson(pageId: string, doc?: any, title?: string) { + await this.ensureAuthenticated(); + + // Title-only / no-op handling: when no document is supplied, do NOT write + // the body. Update the title if one was given; otherwise there is nothing + // to do, so fail loudly rather than silently no-op. + if (doc == null) { + if (!title) { + throw new Error( + "update_page_json: nothing to update (provide content and/or title)", + ); + } + await this.client.post("/pages/update", { pageId, title }); + return { + success: true, + modified: true, + message: "Page title updated (content left unchanged).", + pageId, + }; + } + + // Validate the document shape before a full overwrite: a malformed doc + // would otherwise silently corrupt the page (full-overwrite is the + // documented behaviour; no optimistic-concurrency is applied here). + if ( + typeof doc !== "object" || + doc.type !== "doc" || + !Array.isArray(doc.content) + ) { + throw new Error( + 'content must be a ProseMirror document ({"type":"doc","content":[...]}) ' + + "where content is an array of nodes each having a string `type`", + ); + } + + // Recurse the WHOLE document so a malformed nested node (e.g. a node with a + // non-string type, a non-array content/marks, or a text node missing its + // string text) is rejected up front rather than silently corrupting the + // page on overwrite. + this.validateDocStructure(doc); + + // Sanitize URLs before writing. This closes the JSON-path bypass: unlike + // the markdown link path (which TipTap sanitizes), raw JSON could otherwise + // inject javascript:/data: link hrefs or media srcs straight into the doc. + this.validateDocUrls(doc); + + if (title) { + await this.client.post("/pages/update", { pageId, title }); + } + + const collabToken = await this.getCollabTokenWithReauth(); + await replacePageContent(pageId, doc, collabToken, this.apiUrl); + + return { + success: true, + modified: true, + message: "Page content replaced from ProseMirror JSON.", + pageId, + }; + } + + /** + * Export a page to a single self-contained Docmost-flavoured markdown file: + * meta block + body (with inline comment anchors + diagrams) + comment + * threads. Lossless round-trip target; see importPageMarkdown for the inverse. + */ + async exportPageMarkdown(pageId: string): Promise<string> { + await this.ensureAuthenticated(); + const page = await this.getPageRaw(pageId); + const body = page.content + ? convertProseMirrorToMarkdown(page.content) + : ""; + let comments: any[] = []; + try { + comments = await this.listComments(pageId); + } catch (e) { + // A comments fetch failure must not lose the body; export with [] and let + // the caller see the (empty) comments block. Log under DEBUG only. + if (process.env.DEBUG) console.error("export: listComments failed", e); + } + const meta = { + version: 1, + pageId: page.id, + slugId: page.slugId, + title: page.title, + spaceId: page.spaceId, + parentPageId: page.parentPageId ?? null, + }; + return serializeDocmostMarkdown(meta, body, comments); + } + + /** + * Import a self-contained Docmost markdown file back into a page. Parses out + * the meta + comments metadata blocks, converts the body to ProseMirror + * (restoring comment marks + diagrams from their inline HTML), and replaces + * the page content. Comment THREAD records are NOT written to the server in + * this version — they are preserved in the file and the inline marks are + * re-applied so the highlights survive; managing comment records stays with + * the comment tools/UI. + */ + async importPageMarkdown(pageId: string, fullMarkdown: string): Promise<any> { + await this.ensureAuthenticated(); + const { meta, body, comments } = parseDocmostMarkdown(fullMarkdown); + const doc = await markdownToProseMirror(body); + const collabToken = await this.getCollabTokenWithReauth(); + await replacePageContent(pageId, doc, collabToken, this.apiUrl); + // Collect distinct comment ids that actually became comment marks in the doc. + const collectCommentIds = (node: any, acc: Set<string>): Set<string> => { + if (!node || typeof node !== "object") return acc; + if (Array.isArray(node.marks)) { + for (const mk of node.marks) { + if (mk && mk.type === "comment" && mk.attrs?.commentId) { + acc.add(mk.attrs.commentId); + } + } + } + if (Array.isArray(node.content)) { + for (const child of node.content) collectCommentIds(child, acc); + } + return acc; + }; + // Count reflects the comment marks present in the written document, so an id + // that only appears as inert text (e.g. inside a fenced code block) is not + // counted because it never becomes a comment mark. + const anchoredIds = collectCommentIds(doc, new Set<string>()); + const result: any = { + success: true, + pageId, + anchoredCommentCount: anchoredIds.size, + commentsInFile: Array.isArray(comments) ? comments.length : 0, + }; + // Warn (non-fatal) if the file was exported from a DIFFERENT page. + if (meta?.pageId && meta.pageId !== pageId) { + result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; + } + return result; + } + + /** + * Rename a page (change its title only) without touching or resending its + * content. The slug is derived from the page record, not the body, so it is + * left intact too. + */ + async renamePage(pageId: string, title: string) { + await this.ensureAuthenticated(); + await this.client.post("/pages/update", { pageId, title }); + return { success: true, pageId, title }; + } + + /** + * Copy the WHOLE content of one page onto another, entirely server-side: the + * source's ProseMirror document is read and written verbatim onto the target + * via the live collab path, so the document never passes through the model. + * + * Only the target's BODY is replaced — its title and slug live on the page + * record (not in the content), so they are untouched. The source page is not + * modified at all. + */ + async copyPageContent(sourcePageId: string, targetPageId: string) { + await this.ensureAuthenticated(); + + // A self-copy would be a no-op overwrite; reject it explicitly so a caller + // mistake surfaces as a clear error rather than a silent round-trip. + if (sourcePageId === targetPageId) { + throw new Error( + "copy_page_content: sourcePageId and targetPageId are the same page (no-op copy)", + ); + } + + const source = await this.getPageRaw(sourcePageId); + const content = source?.content; + if ( + !content || + typeof content !== "object" || + content.type !== "doc" || + !Array.isArray(content.content) + ) { + throw new Error( + `copy_page_content: source page ${sourcePageId} has no usable ProseMirror content to copy`, + ); + } + + // Defense-in-depth: run the same URL-scheme sanitizer the JSON write path + // uses, so copying never lands a javascript:/data: href/src on the target + // (parity with updatePageJson; harmless for already-stored source content). + this.validateDocUrls(content); + + const collabToken = await this.getCollabTokenWithReauth(); + await replacePageContent(targetPageId, content, collabToken, this.apiUrl); + + return { + success: true, + sourcePageId, + targetPageId, + copiedNodes: content.content.length, + }; + } + + /** + * Surgical text edits: find/replace inside text nodes of the live + * document. Preserves all block ids, marks, callouts and tables. + */ + async editPageText(pageId: string, edits: TextEdit[]) { + await this.ensureAuthenticated(); + + const collabToken = await this.getCollabTokenWithReauth(); + + // Apply the edits against the LIVE synced document, not the debounced REST + // snapshot, so concurrent human edits/comments are preserved. applyTextEdits + // throws descriptive errors on zero/multiple matches — let them propagate. + let results: TextEditResult[] | undefined; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const r = applyTextEdits(liveDoc, edits); + results = r.results; + return r.doc; + }); + + return { + success: true, + pageId, + edits: results, + message: "Text edits applied (node ids and formatting preserved).", + }; + } + + /** + * Replace EVERY node whose attrs.id === nodeId (recursively, including nodes + * nested in callouts/tables) with the supplied node. Operates on the LIVE + * collab document so comments and concurrent edits are preserved. + * + * The replacement node's block id is preserved: if node.attrs is missing it + * is created, and if node.attrs.id is missing it is set to nodeId so the + * replacement keeps the same id it replaced. Throws if no node matches. + */ + async patchNode(pageId: string, nodeId: string, node: any) { + await this.ensureAuthenticated(); + + if (!node || typeof node !== "object" || typeof node.type !== "string") { + throw new Error( + "patch_node: `node` must be an object with a string `type`", + ); + } + // Preserve the block id WITHOUT mutating the caller's object: build a local + // copy whose attrs.id === nodeId (so the swapped-in node keeps the id of the + // node it replaces). + const target = { + ...node, + attrs: { + ...(node.attrs && typeof node.attrs === "object" ? node.attrs : {}), + }, + }; + if (target.attrs.id == null) { + target.attrs.id = nodeId; + } + + const collabToken = await this.getCollabTokenWithReauth(); + + // Track the replacement count in an outer var, reset per-transform, so a + // collab retry recomputes it cleanly (mirrors replaceImage's pattern). + let replaced = 0; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + replaced = 0; + const { doc: nd, replaced: r } = replaceNodeById(liveDoc, nodeId, target); + replaced = r; + if (replaced === 0) return null; // no match -> skip the write entirely + return nd; + }); + + if (replaced === 0) { + throw new Error( + `patch_node: no node with id "${nodeId}" found on page ${pageId}`, + ); + } + + return { success: true, replaced, nodeId }; + } + + /** + * Insert a node relative to an anchor (or append it at the top level). + * Operates on the LIVE collab document so comments and concurrent edits are + * preserved. + * + * opts.position: + * - "append": push the node at the end of the top-level content. + * - "before"/"after": insert the node as a sibling of the anchor, just + * before/after it. Exactly one of anchorNodeId / anchorText must be given; + * anchorNodeId locates a node anywhere by attrs.id, anchorText matches the + * first top-level block whose plain text includes it. + * + * Throws if the anchor cannot be found. + */ + async insertNode( + pageId: string, + node: any, + opts: { + position: "before" | "after" | "append"; + anchorNodeId?: string; + anchorText?: string; + }, + ) { + await this.ensureAuthenticated(); + + if (!node || typeof node !== "object" || typeof node.type !== "string") { + throw new Error( + "insert_node: `node` must be an object with a string `type`", + ); + } + if ( + !opts || + (opts.position !== "before" && + opts.position !== "after" && + opts.position !== "append") + ) { + throw new Error( + 'insert_node: `position` must be one of "before", "after", "append"', + ); + } + if (opts.position === "before" || opts.position === "after") { + // before/after require EXACTLY ONE anchor (an id or a text fragment). + const hasId = + typeof opts.anchorNodeId === "string" && opts.anchorNodeId.length > 0; + const hasText = + typeof opts.anchorText === "string" && opts.anchorText.length > 0; + if (hasId === hasText) { + throw new Error( + `insert_node: position "${opts.position}" requires exactly one of anchorNodeId or anchorText`, + ); + } + } + + const collabToken = await this.getCollabTokenWithReauth(); + + // Track insertion in an outer var, reset per-transform, so a collab retry + // recomputes it cleanly (mirrors replaceImage's pattern). + let inserted = false; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + inserted = false; + const { doc: nd, inserted: ins } = insertNodeRelative(liveDoc, node, opts); + inserted = ins; + if (!inserted) return null; // anchor not found -> skip the write entirely + return nd; + }); + + if (!inserted) { + const anchorDesc = opts.anchorNodeId + ? `anchorNodeId "${opts.anchorNodeId}"` + : `anchorText "${opts.anchorText}"`; + throw new Error( + `insert_node: anchor not found (${anchorDesc}) on page ${pageId}`, + ); + } + + return { success: true, inserted: true, position: opts.position }; + } + + /** + * Remove EVERY node whose attrs.id === nodeId (recursively, including nodes + * nested in callouts/tables) from its parent content array. Operates on the + * LIVE collab document so comments and concurrent edits are preserved. + * Throws if no node matches. + */ + async deleteNode(pageId: string, nodeId: string) { + await this.ensureAuthenticated(); + + const collabToken = await this.getCollabTokenWithReauth(); + + // Track the deletion count in an outer var, reset per-transform, so a + // collab retry recomputes it cleanly (mirrors replaceImage's pattern). + let deleted = 0; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + deleted = 0; + const { doc: nd, deleted: d } = deleteNodeById(liveDoc, nodeId); + deleted = d; + if (deleted === 0) return null; // no match -> skip the write entirely + return nd; + }); + + if (deleted === 0) { + throw new Error( + `delete_node: no node with id "${nodeId}" found on page ${pageId}`, + ); + } + + return { success: true, deleted, nodeId }; + } + + /** Build the public share URL for a page. */ + private shareUrl(shareKey: string, slugId: string): string { + return `${this.appUrl}/share/${shareKey}/p/${slugId}`; + } + + /** Share a page publicly (idempotent) and return the public URL. */ + async sharePage(pageId: string, searchIndexing: boolean = true) { + await this.ensureAuthenticated(); + const response = await this.client.post("/shares/create", { + pageId, + includeSubPages: false, + searchIndexing, + }); + const share = response.data?.data ?? response.data; + const slugId = share.page?.slugId || (await this.getPageRaw(pageId)).slugId; + return { + shareId: share.id, + key: share.key, + pageId: share.pageId, + publicUrl: this.shareUrl(share.key, slugId), + searchIndexing: share.searchIndexing, + }; + } + + /** List all public shares in the workspace with their URLs. */ + async listShares() { + const shares = await this.paginateAll("/shares", {}); + return shares.map((s: any) => ({ + shareId: s.id, + key: s.key, + pageId: s.pageId, + pageTitle: s.page?.title, + publicUrl: s.page?.slugId ? this.shareUrl(s.key, s.page.slugId) : null, + searchIndexing: s.searchIndexing, + createdAt: s.createdAt, + })); + } + + /** Remove the public share of a page. */ + async unsharePage(pageId: string) { + await this.ensureAuthenticated(); + const shares = await this.listShares(); + const share = shares.find((s: any) => s.pageId === pageId); + if (!share) { + throw new Error(`Page ${pageId} is not shared.`); + } + await this.client.post("/shares/delete", { shareId: share.shareId }); + return { success: true, removedShareId: share.shareId, pageId }; + } + + async search(query: string, spaceId?: string, limit?: number) { + await this.ensureAuthenticated(); + const payload: Record<string, any> = { query, spaceId }; + // Clamp an optional caller-supplied limit into a sane 1..100 range before + // forwarding it to the server; omit it entirely when not provided so the + // server applies its own default. + if (limit !== undefined) { + payload.limit = Math.max(1, Math.min(100, limit)); + } + const response = await this.client.post("/search", payload); + + // Normalize both response shapes: bare array and paginated { items: [...] } + const data = response.data?.data; + const items = Array.isArray(data) ? data : data?.items || []; + const filteredItems = items.map((item: any) => filterSearchResult(item)); + + return { + items: filteredItems, + success: response.data?.success || false, + }; + } + + async movePage( + pageId: string, + parentPageId: string | null, + position?: string, + ) { + await this.ensureAuthenticated(); + // Docmost requires position >= 5 chars. + const validPosition = position || "a00000"; + + return this.client + .post("/pages/move", { + pageId, + parentPageId, + position: validPosition, + }) + .then((res) => res.data); + } + + async deletePage(pageId: string) { + await this.ensureAuthenticated(); + return this.client + .post("/pages/delete", { pageId }) + .then((res) => res.data); + } + + // --- Comment methods (ported from upstream PR #3 by Max Nikitin) --- + + /** + * Normalize a comment's `content` into a ProseMirror doc object before + * markdown conversion. createComment/updateComment send content as a + * JSON.stringify(...) STRING, and the server stores it as-is, so on read it + * comes back as a string. convertProseMirrorToMarkdown returns "" for a + * string, so parse it first (guarded — fall back to the raw value on any + * parse failure so a non-JSON legacy value is still handled gracefully). + */ + private parseCommentContent(content: any): any { + if (typeof content !== "string") return content; + try { + return JSON.parse(content); + } catch { + return content; + } + } + + /** List all comments on a page (cursor-paginated), content as markdown. */ + async listComments(pageId: string) { + await this.ensureAuthenticated(); + let allComments: any[] = []; + let cursor: string | null = null; + + do { + const payload: Record<string, any> = { pageId, limit: 100 }; + if (cursor) payload.cursor = cursor; + + const response = await this.client.post("/comments", payload); + const data = response.data.data || response.data; + const items = data.items || []; + allComments = allComments.concat(items); + cursor = data.meta?.nextCursor || null; + } while (cursor); + + return allComments.map((comment: any) => { + const markdown = comment.content + ? convertProseMirrorToMarkdown( + this.parseCommentContent(comment.content), + ) + : ""; + return filterComment(comment, markdown); + }); + } + + async getComment(commentId: string) { + await this.ensureAuthenticated(); + const response = await this.client.post("/comments/info", { commentId }); + const comment = response.data.data || response.data; + const markdown = comment.content + ? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content)) + : ""; + return { + data: filterComment(comment, markdown), + success: true, + }; + } + + /** Create a page-level or inline comment; content is markdown. */ + async createComment( + pageId: string, + content: string, + type: "page" | "inline" = "page", + selection?: string, + parentCommentId?: string, + ) { + await this.ensureAuthenticated(); + // Convert through the full Docmost schema (consistent with page paths) + const jsonContent = await markdownToProseMirror(content); + const payload: Record<string, any> = { + pageId, + content: JSON.stringify(jsonContent), + type, + }; + if (selection) payload.selection = selection; + if (parentCommentId) payload.parentCommentId = parentCommentId; + + const response = await this.client.post("/comments/create", payload); + const comment = response.data.data || response.data; + const markdown = comment.content + ? convertProseMirrorToMarkdown(this.parseCommentContent(comment.content)) + : content; + const result: any = { + data: filterComment(comment, markdown), + success: true, + }; + + // Anchor the comment in the document. The /comments/create API records the + // comment + its `selection` text, but it does NOT insert the comment MARK + // into the page content, so without this the inline comment has no + // highlight/anchor and is not clickable. Only top-level inline comments are + // anchored: replies (parentCommentId set) inherit their parent's anchor, + // and page-type comments have no text range. + if (type === "inline" && selection && !parentCommentId && comment?.id) { + const newCommentId: string = comment.id; + let anchored = false; + try { + const collabToken = await this.getCollabTokenWithReauth(); + await mutatePageContent( + pageId, + collabToken, + this.apiUrl, + (liveDoc) => { + const doc = + liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + + // Find the FIRST text node containing the selection text, then + // split it into before / marked / after, copying the node's + // existing marks onto all three parts and adding the comment mark + // only to the middle part. Returns true once a match is wrapped. + const wrapInFirstMatch = ( + nodes: any[], + depth: number, + ): boolean => { + const MAX_DEPTH = 200; + if (depth > MAX_DEPTH || !Array.isArray(nodes)) return false; + for (let i = 0; i < nodes.length; i++) { + const n = nodes[i]; + if (!n || typeof n !== "object") continue; + if ( + n.type === "text" && + typeof n.text === "string" && + n.text.includes(selection) + ) { + const idx = n.text.indexOf(selection); + const before = n.text.slice(0, idx); + const middleText = selection; + const after = n.text.slice(idx + selection.length); + const baseMarks = Array.isArray(n.marks) ? n.marks : []; + // Drop any pre-existing comment mark from the marks applied to + // the middle fragment so it ends up with exactly one comment + // mark (the new one) rather than two. Other fragments and the + // base marks list are left untouched. + const middleBaseMarks = baseMarks.filter( + (m: any) => !(m && m.type === "comment"), + ); + const commentMark = { + type: "comment", + // The comment mark schema declares both commentId and + // resolved; include resolved:false for completeness. + attrs: { commentId: newCommentId, resolved: false }, + }; + const parts: any[] = []; + if (before.length > 0) { + parts.push({ ...n, text: before, marks: [...baseMarks] }); + } + parts.push({ + ...n, + text: middleText, + marks: [...middleBaseMarks, commentMark], + }); + if (after.length > 0) { + parts.push({ ...n, text: after, marks: [...baseMarks] }); + } + nodes.splice(i, 1, ...parts); + return true; + } + if (Array.isArray(n.content)) { + if (wrapInFirstMatch(n.content, depth + 1)) return true; + } + } + return false; + }; + + if (Array.isArray(doc.content) && wrapInFirstMatch(doc.content, 0)) { + anchored = true; + return doc; + } + // Selection text not found: do NOT fail (the comment already + // exists). Abort the write so nothing changes. + return null; + }, + ); + } catch (e) { + // The comment record already exists; an anchoring failure must not turn + // a successful create into an error. Report anchored:false instead. + if (process.env.DEBUG) { + console.error("Failed to anchor inline comment mark:", e); + } + anchored = false; + } + result.anchored = anchored; + } + + return result; + } + + async updateComment(commentId: string, content: string) { + await this.ensureAuthenticated(); + const jsonContent = await markdownToProseMirror(content); + await this.client.post("/comments/update", { + commentId, + content: JSON.stringify(jsonContent), + }); + return { + success: true, + commentId, + message: "Comment updated successfully.", + }; + } + + async deleteComment(commentId: string) { + await this.ensureAuthenticated(); + return this.client + .post("/comments/delete", { commentId }) + .then((res) => res.data); + } + + /** + * Check for new comments across pages in a space (optionally scoped to a + * subtree): pages updated after `since` are scanned and their comments + * filtered by createdAt > since. + */ + async checkNewComments(spaceId: string, since: string, parentPageId?: string) { + await this.ensureAuthenticated(); + + const sinceDate = new Date(since); + + // Reject an unparseable `since`: comparing against an Invalid Date silently + // yields zero new comments (every `>` against NaN is false), which would + // mask a malformed input as "nothing new" instead of erroring. + if (Number.isNaN(sinceDate.getTime())) { + throw new Error( + `checkNewComments: invalid "since" date "${since}"; expected an ISO-8601 timestamp`, + ); + } + + // 1. Enumerate the FULL set of pages in scope by walking the sidebar-pages + // tree (a complete page index), NOT the bounded "/pages/recent" feed which + // caps at ~5000 recent items and silently misses comments on older pages. + // + // Subtree scope: when parentPageId is given, the scope is that page ITSELF + // plus every descendant (enumerateSpacePages walks its children). Otherwise + // the scope is the whole space (all roots and their descendants). + // + // NOTE: do NOT pre-filter by page.updatedAt — creating a comment does not + // bump it (verified on a live server), so such a filter silently misses + // comments on pages that were not otherwise edited. The complete tree walk + // already restricts the scope correctly, so no recent-feed allow-list is + // needed any more. + let pagesInScope: any[]; + if (parentPageId) { + const subtree = await this.enumerateSpacePages(spaceId, parentPageId); + // Include the parent page node itself alongside its descendants. Fetch it + // so its title/id are available even though it is not returned by its own + // children listing. + let parentNode: any = { id: parentPageId }; + try { + parentNode = await this.getPageRaw(parentPageId); + } catch (e: any) { + // Fall back to a minimal node if the parent can't be fetched; its + // comments are still attempted below (the fetch there is non-fatal). + } + pagesInScope = [parentNode, ...subtree]; + } else { + pagesInScope = await this.enumerateSpacePages(spaceId); + } + + // 2. Fetch comments for each page, keep ones created after since + const results: any[] = []; + for (const page of pagesInScope) { + try { + const comments = await this.listComments(page.id); + const newComments = comments.filter( + (c: any) => new Date(c.createdAt) > sinceDate, + ); + if (newComments.length > 0) { + results.push({ + pageId: page.id, + pageTitle: page.title, + comments: newComments, + }); + } + } catch (e: any) { + // Skip pages with errors (e.g. deleted between calls) + } + } + + const totalNewComments = results.reduce( + (sum, r) => sum + r.comments.length, + 0, + ); + + // enumerateSpacePages caps traversal at 10000 nodes; flag when that cap was + // hit so the caller knows the scan may be incomplete (some pages skipped). + const truncated = pagesInScope.length >= 10000; + + return { + since, + scope: parentPageId ? `subtree of ${parentPageId}` : `space ${spaceId}`, + checkedPages: pagesInScope.length, + pagesWithNewComments: results.length, + totalNewComments, + truncated, + comments: results, + }; + } + + // --- Image upload / embedding --- + + /** Map a file extension to a supported image MIME type (throws otherwise). */ + private imageMimeFromPath(filePath: string): string { + const ext = extname(filePath).toLowerCase(); + const map: Record<string, string> = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", + }; + const mime = map[ext]; + if (!mime) { + throw new Error( + `unsupported image type ${ext || "(none)"}; supported: png, jpg, jpeg, gif, webp, svg`, + ); + } + return mime; + } + + /** Build a Docmost ProseMirror image node from an uploaded attachment. */ + private buildImageNode( + att: { id: string; fileName: string; fileSize?: number }, + align?: "left" | "center" | "right", + alt?: string, + ): any { + // Clean file URL, matching Docmost's native behaviour. No cache-busting + // query: the server serves the bare URL correctly, and replacement creates + // a new attachment id (a new URL) which busts caches naturally. + const src = `/api/files/${att.id}/${att.fileName}`; + const node: any = { + type: "image", + attrs: { + src, + attachmentId: att.id, + // Default to null when the server omits fileSize so the attr is never + // undefined (undefined would be dropped on serialization / break the + // ProseMirror image schema which expects size present). + size: att.fileSize ?? null, + align: align || "center", + width: null, + }, + }; + if (alt) node.attrs.alt = alt; + return node; + } + + /** + * Upload a local image file as an attachment of a page and return the + * attachment metadata plus a ready-to-insert ProseMirror image node. + */ + async uploadImage(pageId: string, filePath: string) { + await this.ensureAuthenticated(); + + // HOST-FS TRUST BOUNDARY: filePath comes from the MCP caller and points at + // the server host's local filesystem, so it must be validated BEFORE any + // bytes are read. Without these guards a caller could (a) read an arbitrary + // file via path traversal, (b) follow a symlink to a sensitive target, or + // (c) exhaust memory by reading a huge file. Order matters: validate the + // extension, then stat (regular-file + size cap), and only then read. + + // (a) Extension allowlist first — cheap, and rejects non-images up front. + const mime = this.imageMimeFromPath(filePath); + + // (b) Stat the path: it must be a regular file (rejects directories, FIFOs, + // devices, sockets) and stay under the size cap. statSync follows symlinks, + // so a symlink is only accepted when its TARGET is a regular file within + // the cap — the intended behaviour for a local image path. + const MAX_IMAGE_BYTES = 20 * 1024 * 1024; // 20 MiB + let stat; + try { + stat = statSync(filePath); + } catch (e: any) { + throw new Error(`Cannot stat image file at "${filePath}": ${e.message}`); + } + if (!stat.isFile()) { + throw new Error(`Not a regular file: "${filePath}"`); + } + if (stat.size > MAX_IMAGE_BYTES) { + throw new Error( + `Image too large: ${stat.size} bytes exceeds the ${MAX_IMAGE_BYTES}-byte cap`, + ); + } + + // (c) Only now read the bytes. + let fileBuffer: Buffer; + try { + fileBuffer = readFileSync(filePath); + } catch (e: any) { + throw new Error(`Cannot read image file at "${filePath}": ${e.message}`); + } + + // Build a FRESH FormData for every send attempt. A FormData body is a + // single-use stream that is CONSUMED on the first send, so it cannot be + // replayed by this.client's response interceptor (replaying a consumed + // stream fails with 'socket hang up'). Multipart re-auth is therefore done + // here with bare axios and an explicit one-shot 401/403 retry that rebuilds + // the body. Field order matters: text fields must precede the file part so + // the server reads them; the server always generates a fresh attachment id. + const buildForm = () => { + const form = new FormData(); + form.append("pageId", pageId); + form.append("file", fileBuffer, { + filename: basename(filePath), + contentType: mime, + }); + return form; + }; + + const url = `${this.apiUrl}/files/upload`; + let response; + try { + // Call buildForm() ONCE per attempt and reuse the instance for both + // getHeaders() and the body so the Content-Type boundary matches the body. + const form = buildForm(); + // Read the Authorization header from this.client's defaults (set by + // login(), only ever deleted — never set to null) instead of building + // `Bearer ${this.token}`: a concurrent JSON 401 can null this.token + // mid-flight, which would otherwise produce a literal "Bearer null". + // ensureAuthenticated() above guarantees login() ran, so the default + // header exists here. A 60s timeout keeps a hung upload from wedging the + // per-page lock (replaceImage holds withPageLock across this call). + response = await axios.post(url, form, { + headers: { + ...form.getHeaders(), + Authorization: this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } catch (error) { + // On an expired-token auth error, re-login and retry exactly once with a + // freshly-rebuilt FormData (the previous one was already consumed). + if ( + axios.isAxiosError(error) && + (error.response?.status === 401 || error.response?.status === 403) + ) { + await this.login(); + const form2 = buildForm(); + response = await axios.post(url, form2, { + headers: { + ...form2.getHeaders(), + Authorization: + this.client.defaults.headers.common["Authorization"], + }, + timeout: 60000, + }); + } else if (axios.isAxiosError(error)) { + // Keep the thrown message free of the raw response body (it may echo + // request data or server internals); surface only status/statusText. + // The full body is logged under DEBUG for diagnostics. + if (process.env.DEBUG) { + console.error( + "Image upload failed; response body:", + JSON.stringify(error.response?.data), + ); + } + throw new Error( + `Image upload failed: ${error.response?.status} ${error.response?.statusText}`, + ); + } else { + throw error; + } + } + // The attachment may arrive bare or wrapped in a { data } envelope. + const att = response.data?.data ?? response.data; + if (!att?.id || !att?.fileName) { + throw new Error( + "Unexpected /files/upload response: " + JSON.stringify(response.data), + ); + } + + // Some Docmost versions omit fileSize from the upload response. Fall back + // to the local stat size (the bytes we just uploaded) so callers never get + // an undefined size. + const localSize = stat.size; + const resolvedSize = att.fileSize ?? localSize; + + return { + attachmentId: att.id, + fileName: att.fileName, + fileSize: resolvedSize, + src: `/api/files/${att.id}/${att.fileName}`, + imageNode: this.buildImageNode({ ...att, fileSize: resolvedSize }), + }; + } + + /** + * Upload a local image and insert it into a page in one step. + * By default the image is appended at the end. With replaceText, the first + * top-level block whose text contains the string is replaced; with afterText, + * the image is inserted right after the first matching block. All other + * block ids are preserved (only one top-level block is added or swapped). + */ + async insertImage( + pageId: string, + filePath: string, + opts: { + align?: "left" | "center" | "right"; + alt?: string; + replaceText?: string; + afterText?: string; + } = {}, + ) { + const up = await this.uploadImage(pageId, filePath); + // Reuse the node from uploadImage (clean /api/files/<id>/<file> src), then + // apply align/alt onto a shallow attrs copy. + const node: any = { ...up.imageNode, attrs: { ...up.imageNode.attrs } }; + if (opts.align) node.attrs.align = opts.align; + if (opts.alt) node.attrs.alt = opts.alt; + + const collabToken = await this.getCollabTokenWithReauth(); + + // Recursively collect the plain text of a top-level block. + const blockText = (n: any): string => { + let out = ""; + if (n.type === "text") out += n.text || ""; + for (const child of n.content || []) out += blockText(child); + return out; + }; + + // Insert into the LIVE synced document, not the debounced REST snapshot, so + // concurrent edits/comments/images are preserved and parallel insert_image + // calls (serialized by the per-page lock) each see the previous insertion. + let placement: "replaced" | "after" | "appended" | undefined; + await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => { + const doc = + liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (!Array.isArray(doc.content)) doc.content = []; + + if (opts.replaceText) { + // Ambiguity guard (mirrors editPageText): count matching top-level + // blocks first, so a non-unique fragment cannot silently replace the + // wrong block (e.g. text that also appears inside a callout/table). + const matches = doc.content.filter((b: any) => + blockText(b).includes(opts.replaceText!), + ); + if (matches.length === 0) { + throw new Error(`replaceText not found: "${opts.replaceText}"`); + } + if (matches.length > 1) { + throw new Error( + `replaceText "${opts.replaceText}" matches ${matches.length} blocks; use a longer unique fragment`, + ); + } + const idx = doc.content.findIndex((b: any) => + blockText(b).includes(opts.replaceText!), + ); + // Data-loss guard: replaceText swaps the WHOLE top-level block, so if + // the fragment only appears nested inside a container (table, callout, + // list, blockquote) the entire structure would be destroyed. Refuse + // when the matched block is a container rather than a leaf + // paragraph/heading and point the caller at a safer tool. + const CONTAINER_TYPES = new Set([ + "table", + "callout", + "bulletList", + "orderedList", + "taskList", + "blockquote", + ]); + const matchedBlock = doc.content[idx]; + if (matchedBlock && CONTAINER_TYPES.has(matchedBlock.type)) { + throw new Error( + `replaceText matched a ${matchedBlock.type} container block; replacing it would destroy the whole structure. ` + + `Use afterText to insert near it, or update_page_json for surgical edits.`, + ); + } + doc.content.splice(idx, 1, node); + placement = "replaced"; + } else if (opts.afterText) { + // Ambiguity guard (mirrors editPageText): refuse a non-unique fragment. + const matches = doc.content.filter((b: any) => + blockText(b).includes(opts.afterText!), + ); + if (matches.length === 0) { + throw new Error(`afterText not found: "${opts.afterText}"`); + } + if (matches.length > 1) { + throw new Error( + `afterText "${opts.afterText}" matches ${matches.length} blocks; use a longer unique fragment`, + ); + } + const idx = doc.content.findIndex((b: any) => + blockText(b).includes(opts.afterText!), + ); + doc.content.splice(idx + 1, 0, node); + placement = "after"; + } else { + doc.content.push(node); + placement = "appended"; + } + + return doc; + }); + + return { + success: true, + pageId, + attachmentId: up.attachmentId, + src: up.src, + placement, + }; + } + + /** + * Replace an existing image in a page with a new file. Uploads the new file as + * a brand-new attachment, which yields a fresh clean URL that both renders + * correctly and busts browser caches (the URL changed). Finds every image node + * whose attrs.attachmentId === oldAttachmentId (recursively, incl. nodes nested + * in callouts/tables) and repoints its src/attachmentId/size, preserving + * comments, alignment and alt. Operates on the live collab document so comments + * and concurrent edits are preserved. Throws if no matching image is found. + * + * The OLD attachment is left in place as an unreferenced orphan: Docmost + * exposes NO HTTP API to delete a single content attachment (verified against + * the attachment controller/service and by probing the live API — deletion + * happens only by cascade when the page, space or user is removed). This is the + * same outcome as Docmost's own editor when an image is removed/replaced. + * In-place byte overwrite is deliberately NOT used because some Docmost + * versions corrupt the attachment (HTTP 500) when its bytes are overwritten. + */ + async replaceImage( + pageId: string, + oldAttachmentId: string, + filePath: string, + opts: { align?: "left" | "center" | "right"; alt?: string } = {}, + ) { + const collabToken = await this.getCollabTokenWithReauth(); + + // Hold ONE per-page lock for the WHOLE operation (scan -> upload -> write). + // Previously the scan and the write were two separate mutatePageContent + // calls, each acquiring + releasing the lock, with the upload happening in + // the UNLOCKED gap between them. A concurrent op could interleave there: it + // could remove the target image so the write pass matches nothing, leaving + // the freshly-uploaded attachment as an un-deletable orphan (Docmost has no + // API to delete a single content attachment). Acquiring the lock once and + // using the non-locking collab helper inside (the per-page mutex is NOT + // reentrant, so the self-locking mutatePageContent would deadlock here) + // closes that TOCTOU window. uploadImage hits /files/upload over plain HTTP + // and does not touch the page lock, so it is safe to call while held. + return withPageLock(pageId, async () => { + // STEP 1: read-only live check. Scan the live document for any image node + // matching oldAttachmentId BEFORE uploading anything, so a wrong/stale id + // throws without ever creating an orphan attachment. + let matchFound = false; + const scan = (nodes: any[]) => { + for (const node of nodes) { + if (!node) continue; + if ( + node.type === "image" && + node.attrs && + node.attrs.attachmentId === oldAttachmentId + ) { + matchFound = true; + } + if (Array.isArray(node.content)) scan(node.content); + } + }; + + await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { + matchFound = false; // reset per-transform (collab may retry the read). + const doc = + liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (Array.isArray(doc.content)) scan(doc.content); + return null; // read-only: never write on the check pass. + }); + + if (!matchFound) { + throw new Error( + `replace_image: no image with attachmentId "${oldAttachmentId}" found on page ${pageId}`, + ); + } + + // STEP 2: a match exists — upload the new file as a FRESH attachment (new + // id, new clean URL) and repoint every matching node in a second pass. + // Still inside the SAME lock, so no other op can have changed the page + // since the scan. + const up = await this.uploadImage(pageId, filePath); + + let replaced = 0; + + // Swap the source of one image node, preserving align/alt/title/geometry. + const repoint = (node: any) => { + node.attrs = { + ...node.attrs, + src: up.src, + attachmentId: up.attachmentId, + // Default to null when fileSize is unknown so the attr is never + // undefined. + size: up.fileSize ?? null, + }; + if (opts.align) node.attrs.align = opts.align; + if (opts.alt !== undefined) node.attrs.alt = opts.alt; + replaced++; + }; + + // Recursively repoint every image node (incl. ones nested in callouts/tables). + const walk = (nodes: any[]) => { + for (const node of nodes) { + if (!node) continue; + if ( + node.type === "image" && + node.attrs && + node.attrs.attachmentId === oldAttachmentId + ) { + repoint(node); + } + if (Array.isArray(node.content)) walk(node.content); + } + }; + + await this.mutateLiveContentUnlocked(pageId, collabToken, (liveDoc) => { + // Reset per-transform so collab retries recompute cleanly (no double-count). + replaced = 0; + const doc = + liveDoc && liveDoc.type === "doc" + ? liveDoc + : { type: "doc", content: [] }; + if (!Array.isArray(doc.content)) doc.content = []; + walk(doc.content); + if (replaced === 0) return null; // no match -> skip the write entirely + return doc; + }); + + if (replaced === 0) { + // The pass-1 SCAN found the target (matchFound was true) and we already + // uploaded the new attachment, but pass-2 matched nothing — a concurrent + // editor must have removed the node between the two passes. Do NOT throw + // here (that would leak the just-uploaded attachment AND report failure); + // instead report success with the upload flagged as an unreferenced + // orphan so the caller knows. (The early throw above still covers the + // case where pass-1 finds nothing, before any upload happens.) + return { + success: true, + replaced: 0, + pageId, + oldAttachmentId, + newAttachmentId: up.attachmentId, + src: up.src, + orphanedAttachmentId: up.attachmentId, + warning: + "target image was removed concurrently; uploaded attachment is unreferenced", + }; + } + + return { + success: true, + pageId, + replaced, + oldAttachmentId, + newAttachmentId: up.attachmentId, + src: up.src, + }; + }); + } + + // --- Page history / diff / transform --- + + /** + * List the saved versions (history snapshots) of a page, newest first. + * Docmost auto-snapshots on every save. Returns one cursor-paginated page of + * results: `{ items, nextCursor }`. The history record's id field is `id`. + */ + async listPageHistory(pageId: string, cursor?: string) { + await this.ensureAuthenticated(); + const payload: Record<string, any> = { pageId }; + if (cursor) payload.cursor = cursor; + const response = await this.client.post("/pages/history", payload); + const data = response.data?.data ?? response.data; + return { + items: data?.items ?? [], + nextCursor: data?.meta?.nextCursor ?? null, + }; + } + + /** + * Fetch a single page-history version including its lossless ProseMirror + * `content`. The version also carries pageId/title/createdAt. + */ + async getPageHistory(historyId: string) { + await this.ensureAuthenticated(); + const response = await this.client.post("/pages/history/info", { + historyId, + }); + return response.data?.data ?? response.data; + } + + /** + * "Restore" a version: Docmost has NO restore endpoint, so we take the + * version's `content` and write it as the page's current content via the live + * collab path (which itself creates a new history snapshot). Returns the + * affected pageId and the source historyId. + */ + async restorePageVersion(historyId: string) { + await this.ensureAuthenticated(); + const version = await this.getPageHistory(historyId); + if ( + !version || + !version.pageId || + !version.content || + typeof version.content !== "object" + ) { + throw new Error( + `restore_page_version: history ${historyId} has no usable content`, + ); + } + // Defense-in-depth: sanitize URLs in the restored content (parity with the + // JSON write path) before writing it back. + this.validateDocUrls(version.content); + const collabToken = await this.getCollabTokenWithReauth(); + await mutatePageContent( + version.pageId, + collabToken, + this.apiUrl, + () => version.content, + ); + return { pageId: version.pageId, restoredFrom: historyId }; + } + + /** + * Diff two versions of a page and return a Docmost-equivalent change set. + * `from`/`to` each resolve to a ProseMirror doc: + * - null / undefined / "current" -> the page's CURRENT content; + * - any other string -> that historyId's content. + * Returns the diff plus the resolved version metadata for each side. + */ + async diffPageVersions(pageId: string, from?: string, to?: string) { + await this.ensureAuthenticated(); + + const isCurrent = (v?: string) => + v == null || v === "" || v === "current"; + + const resolveSide = async ( + v?: string, + ): Promise<{ doc: any; meta: any }> => { + if (isCurrent(v)) { + const raw = await this.getPageRaw(pageId); + return { + doc: raw.content || { type: "doc", content: [] }, + meta: { + kind: "current", + pageId, + title: raw.title, + updatedAt: raw.updatedAt, + }, + }; + } + const version = await this.getPageHistory(v as string); + return { + doc: version.content || { type: "doc", content: [] }, + meta: { + kind: "history", + historyId: version.id, + pageId: version.pageId, + title: version.title, + createdAt: version.createdAt, + }, + }; + }; + + const fromSide = await resolveSide(from); + const toSide = await resolveSide(to); + const diff = diffDocs(fromSide.doc, toSide.doc); + return { from: fromSide.meta, to: toSide.meta, diff }; + } + + /** + * Edit a page by running an arbitrary user-supplied JS transform against the + * live document, with a diff preview + page-history safety net. + * + * The transform string is evaluated as `(doc, ctx) => doc` inside a node:vm + * sandbox: it gets ONLY `{ doc, ctx, structuredClone, console }` as globals, + * a 5s timeout, and NO access to require/process/fs/network. It must return a + * `{ type: "doc" }` node, which is validated structurally before any write. + * + * `ctx` exposes: + * - comments: the page's comments (fetched before the live read); + * - log: an array the transform can push diagnostics to (via console.log); + * - consume(id): mark a comment id as consumed (for deleteComments); + * - helpers: the transforms.ts primitives + commentsToFootnotes. + * + * Footnote convention used by the helpers: footnote markers are plain "[N]" + * text in the body, and the notes are an orderedList under a heading whose + * text is "Примечания переводчика". + * + * dryRun (default true): read the page's current content, run the transform, + * and return `{ pushed:false, diff, log }` WITHOUT opening the collab socket. + * Otherwise the transform runs atomically inside mutatePageContent, optionally + * deletes consumed comments, and returns the new historyId + diff + log. + */ + async transformPage( + pageId: string, + transformJs: string, + opts: { dryRun?: boolean; deleteComments?: boolean } = {}, + ) { + const dryRun = opts.dryRun ?? true; + const deleteComments = opts.deleteComments ?? false; + + await this.ensureAuthenticated(); + const comments = await this.listComments(pageId); + + // ctx handed to the sandbox. consume() records ids; helpers are the pure + // transform primitives. log is captured from console.log inside the sandbox. + const ctx = { + comments, + log: [] as string[], + consumed: new Set<string>(), + consume(id: string) { + this.consumed.add(id); + }, + helpers: { + blockText, + walk, + getList, + insertMarkerAfter, + setCalloutRange, + noteItem, + mdToInlineNodes, + commentsToFootnotes, + }, + }; + + // Captured oldDoc / newDoc for the diff (set inside runTransform). + let oldDoc: any; + let newDoc: any; + + // SYNCHRONOUS transform runner — safe to call inside mutatePageContent's + // onSynced (no await between the live read and the write). + const runTransform = (liveDoc: any): any => { + oldDoc = structuredClone(liveDoc); + const sandbox: Record<string, any> = { + doc: structuredClone(liveDoc), + ctx, + structuredClone, + console: { + log: (...a: any[]) => ctx.log.push(a.map((x) => String(x)).join(" ")), + }, + }; + // Wrap the provided string in parentheses so both an expression-arrow + // (`(doc, ctx) => {...}`) and a parenthesized function work. Run it in a + // fresh context with no require/process/module so the transform cannot + // touch fs/network/process. 5s wall-clock timeout. + let fn: any; + try { + fn = vm.runInNewContext("(" + transformJs + ")", sandbox, { + timeout: 5000, + }); + } catch (e: any) { + throw new Error(`transform did not compile: ${e?.message ?? e}`); + } + if (typeof fn !== "function") { + throw new Error("transform must evaluate to a function (doc, ctx) => doc"); + } + const result = vm.runInNewContext( + "f(d, c)", + { f: fn, d: sandbox.doc, c: ctx }, + { timeout: 5000 }, + ); + if ( + !result || + typeof result !== "object" || + result.type !== "doc" || + !Array.isArray(result.content) + ) { + throw new Error( + 'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })', + ); + } + // Validate the returned doc before it can be written. + this.validateDocStructure(result); + this.validateDocUrls(result); + newDoc = result; + return result; + }; + + if (dryRun) { + // Preview only: run against the current REST snapshot, never open the + // socket. oldDoc/newDoc are captured by runTransform. + const raw = await this.getPageRaw(pageId); + const current = raw.content || { type: "doc", content: [] }; + runTransform(current); + // Exercise the same Yjs encoder the apply path uses, so the preview + // fails with the SAME descriptive error when the doc is not encodable + // instead of returning a misleadingly-green diff. + assertYjsEncodable(newDoc); + return { + pushed: false, + diff: diffDocs(oldDoc, newDoc), + log: ctx.log, + }; + } + + // Apply atomically against the live doc. + const collabToken = await this.getCollabTokenWithReauth(); + await mutatePageContent(pageId, collabToken, this.apiUrl, runTransform); + + // Optionally delete consumed comments (best-effort; a delete failure must + // not undo the successful write). + const deletedComments: string[] = []; + if (deleteComments) { + for (const id of ctx.consumed) { + try { + await this.deleteComment(id); + deletedComments.push(id); + } catch (e) { + if (process.env.DEBUG) { + console.error(`transform: failed to delete comment ${id}:`, e); + } + } + } + } + + // Fetch the newest historyId (Docmost snapshots on the write above). + let historyId: string | null = null; + try { + const hist = await this.listPageHistory(pageId); + historyId = hist.items?.[0]?.id ?? null; + } catch (e) { + if (process.env.DEBUG) { + console.error("transform: failed to fetch history id:", e); + } + } + + return { + pushed: true, + historyId, + diff: diffDocs(oldDoc, newDoc), + deletedComments, + log: ctx.log, + }; + } + + // --- docmost-sync additions (backport target: docmost-mcp/src/client.ts) --- + // + // REST-only helpers added for the docmost-sync engine. They reuse the + // existing patterns above (this.client.post, this.ensureAuthenticated(), + // this.paginateAll, the private enumerateSpacePages) so the diff can be + // copied back into docmost-mcp verbatim. + + /** + * List the contents of a space's trash (soft-deleted pages). + * + * Per SPEC §8 the trash endpoint is PER-SPACE — there is no workspace-wide + * variant — so callers must enumerate spaces and poll each one. The response + * items carry `deletedAt`, `parentPageId`, `spaceId` (and even `content`), + * which is enough to detect deletions precisely rather than inferring them + * from a pageId disappearing from the active tree. + */ + async listTrash(spaceId: string): Promise<any[]> { + return this.paginateAll("/pages/trash", { spaceId }); + } + + /** + * Restore a soft-deleted page from trash (resets its `deletedAt`). + */ + async restorePage(pageId: string) { + await this.ensureAuthenticated(); + return this.client.post("/pages/restore", { pageId }).then((r) => r.data); + } + + /** + * Public wrapper for a full space tree walk via sidebar-pages. + * + * Returns every page node in the space (or in the subtree rooted at + * rootPageId), each carrying `id`, `slugId`, `title`, `position`, + * `parentPageId`, `icon`, `hasChildren` — but NOT `content`. Use getPageRaw / + * exportPageBody per node to fetch the body. + */ + async listAllSpacePages( + spaceId: string, + rootPageId?: string, + ): Promise<any[]> { + return this.enumerateSpacePages(spaceId, rootPageId); + } + + /** + * "Changes since T" scan (SPEC §16). There is NO server-side `updatedAt` + * filter in Docmost and `/pages/recent` is CURSOR-paginated, so this is a + * descending CURSOR scan with a client-side cutoff: each page of items is + * sorted `updatedAt DESC`, we accumulate them and STOP as soon as we hit an + * item whose `updatedAt` is `<= sinceIso` (that item and everything after it + * is already known, so it is NOT included). Only items strictly newer than + * `sinceIso` are returned, in server (descending) order. + * + * - `spaceId` is optional: omit it to scan the whole workspace, pass it to + * scope to one space. + * - `sinceIso === null` means "no previous cutoff" — return just the first + * page (the most recent activity), which seeds the initial `T_last`. + * - `hardPageCap` is a safety ceiling on the number of pages fetched; if it + * is hit before the cutoff is reached a warning is logged because the + * result may be incomplete. + * + * The pagination loop itself lives in the pure, testable `collectRecentSince` + * helper below; this method only supplies a real `fetchPage` bound to the + * REST client. It mirrors the cursor pattern used by `listComments` / + * `listPageHistory` (payload `cursor` + `data.meta?.nextCursor`). + */ + async listRecentSince( + spaceId: string | undefined, + sinceIso: string | null, + hardPageCap = 50, + ): Promise<any[]> { + return collectRecentSince( + async (cursor) => { + await this.ensureAuthenticated(); + const response = await this.client.post("/pages/recent", { + limit: 100, + ...(spaceId ? { spaceId } : {}), + ...(cursor ? { cursor } : {}), + }); + const data = response.data?.data ?? response.data; + return { + items: data?.items || [], + nextCursor: data?.meta?.nextCursor || null, + }; + }, + sinceIso, + hardPageCap, + ); + } + + /** + * Export a page as a self-contained markdown file with meta + body ONLY — + * NO `docmost:comments` block and WITHOUT calling `/comments` at all. + * + * This is the docmost-sync export mode (SPEC §3): the sync never touches + * comment threads, so the file carries page identity (meta) and the body, + * with comment threads surviving only as inline `<span data-comment-id>` + * anchor marks inside the body. Contrast with `exportPageMarkdown`, which + * additionally fetches and appends the comment threads block. + */ + async exportPageBody(pageId: string): Promise<string> { + const page = await this.getPageRaw(pageId); + const body = page.content + ? convertProseMirrorToMarkdown(page.content) + : ""; + const meta = { + version: 1, + pageId: page.id, + slugId: page.slugId, + title: page.title, + spaceId: page.spaceId, + parentPageId: page.parentPageId ?? null, + }; + return serializeDocmostMarkdownBody(meta, body); + } +} + +// --- docmost-sync additions (module scope) --------------------------------- +// +// Pure pagination helper extracted from DocmostClient.listRecentSince so the +// cursor-walk logic is unit-testable without a live server (the method only +// binds a real `fetchPage`). Lives at module scope because it is `export`ed; +// the class method above delegates to it. + +/** + * Walk a cursor-paginated "recent" feed (sorted updatedAt DESC) newest-first, + * collecting items strictly newer than sinceIso and stopping at the first item + * with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id + * guards a server that ignores the cursor; hardPageCap bounds the walk. + */ +export async function collectRecentSince( + fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>, + sinceIso: string | null, + hardPageCap = 50, +): Promise<any[]> { + const collected: any[] = []; + // Track every page id we have already accepted so we can dedup, AND stop when + // a fetched page yields zero NEW ids. This guards against a server that + // ignores the cursor and re-returns the same page forever: without it the + // loop would re-collect the same items up to hardPageCap. + const seen = new Set<string>(); + let cursor: string | null = null; + let pages = 0; + let reachedCutoff = false; + + while (pages < hardPageCap) { + const data = await fetchPage(cursor); + pages++; + const items: any[] = data.items || []; + + let newThisPage = 0; + for (const item of items) { + // Descending scan: the first item at or below the cutoff means every + // remaining item is older too, so stop scanning entirely. + if ( + sinceIso !== null && + item.updatedAt != null && + item.updatedAt <= sinceIso + ) { + reachedCutoff = true; + break; + } + // Skip ids we have already accepted (a server that ignores the cursor + // will re-serve the same items); only genuinely new ids count toward + // progress and get collected. + if (item.id != null && seen.has(item.id)) { + continue; + } + if (item.id != null) seen.add(item.id); + collected.push(item); + newThisPage++; + } + + // With a null cutoff we only want the first page. + if (sinceIso === null) break; + if (reachedCutoff) break; + // No next cursor means there are no further pages to fetch. + if (!data.nextCursor) break; + // A page that added no unseen items means the server is not advancing (it + // ignored the cursor), so further fetches cannot make progress — stop. + if (newThisPage === 0) break; + + cursor = data.nextCursor; + } + + if (sinceIso !== null && !reachedCutoff && pages >= hardPageCap) { + console.warn( + `collectRecentSince: hit hardPageCap=${hardPageCap} before reaching the updatedAt cutoff; the result may be truncated`, + ); + } + + return collected; +} diff --git a/packages/docmost-client/src/index.ts b/packages/docmost-client/src/index.ts new file mode 100644 index 0000000..24d0549 --- /dev/null +++ b/packages/docmost-client/src/index.ts @@ -0,0 +1,23 @@ +/** + * Public surface of the `docmost-client` package. + * + * This is a NEW barrel authored for docmost-sync (it is NOT copied from + * docmost-mcp, whose `src/index.ts` is the MCP-server entry point and is + * deliberately not part of this package). It re-exports the pieces the sync + * engine and other consumers need: the REST client, the self-contained + * markdown (de)serializers, and the lossless ProseMirror <-> Markdown + * converter. + */ + +export { DocmostClient, collectRecentSince } from "./client.js"; + +export { + serializeDocmostMarkdown, + parseDocmostMarkdown, + serializeDocmostMarkdownBody, +} from "./lib/markdown-document.js"; +export type { DocmostMdMeta } from "./lib/markdown-document.js"; + +export { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js"; + +export { markdownToProseMirror } from "./lib/collaboration.js"; diff --git a/packages/docmost-client/src/lib/auth-utils.ts b/packages/docmost-client/src/lib/auth-utils.ts new file mode 100644 index 0000000..d677be2 --- /dev/null +++ b/packages/docmost-client/src/lib/auth-utils.ts @@ -0,0 +1,86 @@ +import axios from "axios"; + +export async function getCollabToken( + baseUrl: string, + apiToken: string, +): Promise<string> { + try { + const response = await axios.post( + `${baseUrl}/auth/collab-token`, + {}, + { + headers: { + Authorization: `Bearer ${apiToken}`, + "Content-Type": "application/json", + }, + }, + ); + + // console.error('Collab Token Response:', response.data); + // Response is wrapped in { data: { token: ... } } + return response.data.data?.token || response.data.token; + } catch (error) { + if (axios.isAxiosError(error)) { + // Attach the HTTP status to the plain Error so callers (e.g. + // getCollabTokenWithReauth) can still detect a 401/403 after the + // original AxiosError has been wrapped away. + // Avoid leaking the full server response body by default; include only + // status + statusText. Append the body only when DEBUG is set. + let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`; + if (process.env.DEBUG) { + message += ` - ${JSON.stringify(error.response?.data)}`; + } + const err: any = new Error(message); + err.status = error.response?.status; + throw err; + } + throw error; + } +} + +export async function performLogin( + baseUrl: string, + email: string, + password: string, +): Promise<string> { + try { + const response = await axios.post(`${baseUrl}/auth/login`, { + email, + password, + }); + + // Extract token from Set-Cookie header + const cookies = response.headers["set-cookie"]; + if (!cookies) { + throw new Error("No Set-Cookie header found in login response"); + } + // Match the cookie name exactly to avoid matching a future + // authTokenRefresh cookie (startsWith would catch it). + const authCookie = cookies.find((c: string) => { + const kv = c.split(";")[0]; + return kv.slice(0, kv.indexOf("=")) === "authToken"; + }); + if (!authCookie) { + throw new Error("No authToken cookie found in login response"); + } + + // Take everything after the FIRST "=" up to the first ";". + // Splitting on "=" would truncate base64 values containing "=" padding. + const kv = authCookie.split(";")[0]; + const token = kv.slice(kv.indexOf("=") + 1); + return token; + } catch (error: any) { + // Avoid leaking the full server response body by default; log only the + // HTTP status. Log the verbose body only when DEBUG is set. + if (axios.isAxiosError(error)) { + if (process.env.DEBUG) { + console.error("Login failed:", error.response?.data); + } else { + console.error("Login failed:", error.response?.status); + } + } else { + console.error("Login failed:", error.message); + } + throw error; + } +} diff --git a/packages/docmost-client/src/lib/collaboration.ts b/packages/docmost-client/src/lib/collaboration.ts new file mode 100644 index 0000000..e8840c6 --- /dev/null +++ b/packages/docmost-client/src/lib/collaboration.ts @@ -0,0 +1,618 @@ +import { HocuspocusProvider } from "@hocuspocus/provider"; +import { TiptapTransformer } from "@hocuspocus/transformer"; +import * as Y from "yjs"; +import WebSocket from "ws"; +import { marked } from "marked"; +import { generateJSON } from "@tiptap/html"; +import { JSDOM } from "jsdom"; +import { docmostExtensions } from "./docmost-schema.js"; +import { withPageLock } from "./page-lock.js"; +import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; + +// Setup DOM environment for Tiptap HTML parsing in Node.js +const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>"); +global.window = dom.window as any; +global.document = dom.window.document; +// @ts-ignore +global.Element = dom.window.Element; +// @ts-ignore +global.WebSocket = WebSocket; +// Navigator is read-only in newer Node versions and already exists +// global.navigator = dom.window.navigator; + +/** + * Hard ceiling above which we skip callout preprocessing entirely. The linear + * scanner below has no quadratic blow-up, but we still cap input defensively so + * a pathological multi-megabyte payload cannot tie up the event loop; in that + * case the markdown is passed through verbatim (callouts are simply not + * detected) rather than risking a slow scan. + */ +const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB + +/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */ +const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/; +/** Matches a bare closing callout fence: `:::`. */ +const CALLOUT_CLOSE_RE = /^:::\s*$/; +/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */ +const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/; + +/** + * Pre-process Docmost-flavoured markdown: convert `:::type ... :::` + * callout blocks (the syntax our markdown export produces) into HTML + * divs that the callout extension parses. The inner content is rendered + * through marked as regular markdown. + * + * Implemented as a single linear pass over the lines (no quadratic regex + * rescan). It: + * - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a + * `:::` line that lives inside a code fence as a callout delimiter, so a + * callout body that itself contains a fenced code block with a `:::` line is + * no longer corrupted; + * - matches an opening `:::type` line with the next CLOSING `:::` at the SAME + * nesting level, supporting NESTED callouts via a depth counter (an inner + * `:::type` opens a deeper level and consumes a matching `:::`); + * - emits the same `<div data-type="callout" data-callout-type="TYPE">` output + * (inner rendered through marked) as the previous regex implementation. + */ +async function preprocessCallouts(markdown: string): Promise<string> { + // Defensive cap: skip preprocessing for pathologically large inputs. + if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) { + return markdown; + } + + // Recursively transform a slice of lines, converting top-level callouts in + // that slice into <div> blocks and rendering their inner content (which may + // itself contain nested callouts) through this same function. + const transform = async (lines: string[]): Promise<string> => { + const out: string[] = []; + let inCodeFence = false; + let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + + // Inside a code fence, only its matching closing fence is significant; + // everything else (including `:::` lines) is copied through verbatim. + if (inCodeFence) { + out.push(line); + const fence = line.match(CODE_FENCE_RE); + if (fence && fence[2].startsWith(codeFenceMarker[0]) && + fence[2].length >= codeFenceMarker.length) { + inCodeFence = false; + codeFenceMarker = ""; + } + i++; + continue; + } + + // A code fence opening outside any callout body: enter code-fence mode. + const fenceOpen = line.match(CODE_FENCE_RE); + if (fenceOpen) { + inCodeFence = true; + codeFenceMarker = fenceOpen[2]; + out.push(line); + i++; + continue; + } + + // An opening callout fence: scan forward (with code-fence and nested + // callout awareness) for its matching closing `:::` at the same level. + const open = line.match(CALLOUT_OPEN_RE); + if (open) { + const type = open[1].toLowerCase(); + const bodyLines: string[] = []; + let depth = 1; + let innerInCodeFence = false; + let innerCodeFenceMarker = ""; + let j = i + 1; + for (; j < lines.length; j++) { + const bl = lines[j]; + if (innerInCodeFence) { + const f = bl.match(CODE_FENCE_RE); + if (f && f[2].startsWith(innerCodeFenceMarker[0]) && + f[2].length >= innerCodeFenceMarker.length) { + innerInCodeFence = false; + innerCodeFenceMarker = ""; + } + bodyLines.push(bl); + continue; + } + const innerFence = bl.match(CODE_FENCE_RE); + if (innerFence) { + innerInCodeFence = true; + innerCodeFenceMarker = innerFence[2]; + bodyLines.push(bl); + continue; + } + if (CALLOUT_OPEN_RE.test(bl)) { + depth++; + bodyLines.push(bl); + continue; + } + if (CALLOUT_CLOSE_RE.test(bl)) { + depth--; + if (depth === 0) break; // matching close for THIS callout + bodyLines.push(bl); + continue; + } + bodyLines.push(bl); + } + + if (j < lines.length) { + // Found the matching closing fence: render the body (recursively, so + // nested callouts are handled) and emit the callout div. + const inner = await transform(bodyLines); + const renderedInner = await marked.parse(inner); + out.push( + `\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`, + ); + i = j + 1; // skip past the closing `:::` + continue; + } + // No matching close (unterminated callout): treat the opener as a + // literal line and continue, preserving the original text. + out.push(line); + i++; + continue; + } + + out.push(line); + i++; + } + + return out.join("\n"); + }; + + return transform(markdown.split("\n")); +} + +/** + * Bridge marked's checkbox lists to TipTap task lists. + * + * marked renders GitHub task list items (`- [x] done`) as a plain + * `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the + * markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists + * into the shape those extensions expect: + * TaskList parseHTML matches `ul[data-type="taskList"]`, + * TaskItem matches `li[data-type="taskItem"]`, + * the checked state is read from `data-checked === "true"`. + * + * A list is only converted when it has at least one `<li>` and EVERY direct + * `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a + * numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox + * `<li>`s) would otherwise lose its task state. TipTap task lists are unordered, + * so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a + * `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left + * untouched so they keep rendering as bullet/numbered lists. The marked `<p>` + * wrapper is kept inside the `<li>` because TaskItem content allows paragraphs. + */ +function bridgeTaskLists(html: string): string { + // Cheap early-out: if the markup contains no checkbox input at all there is + // nothing to bridge, so skip the expensive JSDOM parse entirely. This is the + // common case (most pages have no task lists). + if (!/type=["']?checkbox/i.test(html)) { + return html; + } + // Defensive cap (consistent with preprocessCallouts): skip the bridge for + // pathologically large inputs rather than running a second expensive JSDOM + // parse on a multi-megabyte payload. The markup is passed through verbatim. + if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) { + return html; + } + const dom = new JSDOM(html); + const document = dom.window.document; + // Collect the checkbox(es) that belong to THIS <li> directly: either direct + // child <input type="checkbox"> elements or ones inside the <li>'s direct <p> + // child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`). + // Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a + // bullet <li> that merely contains a nested task sublist is not misdetected. + // Raw inline HTML can put more than one checkbox in a single <li>; we gather + // ALL of them so none survive into the converted item. + const directCheckboxes = (li: Element): Element[] => { + const found: Element[] = []; + for (const child of Array.from(li.children)) { + if ( + child.tagName === "INPUT" && + child.getAttribute("type") === "checkbox" + ) { + found.push(child); + continue; + } + if (child.tagName === "P") { + for (const inp of Array.from( + child.querySelectorAll(":scope > input[type='checkbox']"), + )) { + found.push(inp); + } + } + } + return found; + }; + // Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries + // its own checkbox is a numbered checklist that must also become a taskList. + const lists = Array.from(document.querySelectorAll("ul, ol")); + for (const list of lists) { + // Only consider DIRECT child <li> elements; nested lists are handled by + // their own iteration of the outer loop. + const items = Array.from(list.children).filter( + (child) => child.tagName === "LI", + ); + if (items.length === 0) continue; + const itemCheckboxes = items.map((li) => directCheckboxes(li)); + // Convert only when every direct <li> carries at least one OWN checkbox. + if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue; + + // A numbered checklist arrives as an <ol>. We must NOT leave the tag as + // <ol> while tagging it data-type="taskList": generateJSON would then match + // BOTH the orderedList rule (tag ol) and the taskList rule (data-type), + // emitting a phantom empty orderedList beside the real taskList. So rename a + // qualifying <ol> to a <ul> — move its <li> children over and replace it — + // leaving only the taskList rule to match. Already-<ul> lists are unchanged. + let target: Element = list; + if (list.tagName === "OL") { + const ul = document.createElement("ul"); + // Carry over existing attributes (e.g. class) so nothing is silently lost. + for (const attr of Array.from(list.attributes)) { + ul.setAttribute(attr.name, attr.value); + } + // Move every child node (including the <li>s we collected) into the <ul>. + while (list.firstChild) { + ul.appendChild(list.firstChild); + } + list.replaceWith(ul); + target = ul; + } + + target.setAttribute("data-type", "taskList"); + items.forEach((li, index) => { + const boxes = itemCheckboxes[index]; + // The first checkbox determines the checked state (matches the previous + // single-checkbox behaviour); any extras only need removing. + const input = boxes[0] ?? null; + li.setAttribute("data-type", "taskItem"); + const checked = + input != null && + (input.hasAttribute("checked") || (input as any).checked); + li.setAttribute("data-checked", checked ? "true" : "false"); + // Remove ALL direct checkbox inputs so none survive into the content + // (a raw-inline-HTML <li> may carry more than one). + for (const box of boxes) { + box.remove(); + } + }); + } + return document.body.innerHTML; +} + +/** Convert markdown to a ProseMirror doc using the full Docmost schema. */ +export async function markdownToProseMirror( + markdownContent: string, +): Promise<any> { + const withCallouts = await preprocessCallouts(markdownContent); + const html = await marked.parse(withCallouts); + const bridged = bridgeTaskLists(html); + return generateJSON(bridged, docmostExtensions); +} + +/** + * Build the collaboration WebSocket URL from an API base URL: + * switch http(s)->ws(s), strip a trailing /api, mount on /collab. + * Shared by the live read and the mutate path so both target the same socket. + */ +export function buildCollabWsUrl(baseUrl: string): string { + let wsUrl = baseUrl.replace(/^http/, "ws"); + try { + const urlObj = new URL(wsUrl); + if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) { + urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, ""); + } + urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab"; + // Drop any query/hash from the base URL so it is not carried into the + // collaboration ws URL. + urlObj.search = ""; + urlObj.hash = ""; + wsUrl = urlObj.toString(); + } catch (e) { + // Fallback if URL parsing fails + if (!wsUrl.endsWith("/collab")) { + wsUrl = wsUrl.replace(/\/$/, "") + "/collab"; + } + } + return wsUrl; +} + +/** + * Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning + * the opaque yjs "Unexpected content type" failure into a descriptive error. + * + * `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of + * the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point + * at the offending attribute path. + */ +export function buildYDoc(doc: any): Y.Doc { + const safe = sanitizeForYjs(doc); + try { + return TiptapTransformer.toYdoc(safe, "default", docmostExtensions); + } catch (e) { + const bad = findUnstorableAttr(safe); + throw new Error( + `Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`, + ); + } +} + +/** + * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc. + * Throws the same descriptive error as the apply path when it is not. Used by + * the dry-run preview so it fails identically to apply. + */ +export function assertYjsEncodable(doc: any): void { + buildYDoc(doc); +} + +/** Time we wait for the initial handshake/sync before giving up. */ +const CONNECT_TIMEOUT_MS = 25000; +/** Time we wait for the server to acknowledge our write before giving up. */ +const PERSIST_TIMEOUT_MS = 20000; + +/** + * Safely mutate the live content of a page over the collaboration websocket. + * + * This is the single safe write path for every MCP content mutation. It: + * 1. serializes per-page writes through withPageLock (no two MCP writes on + * the same page overlap); + * 2. connects to Hocuspocus and waits for the initial sync so the local ydoc + * mirrors the authoritative server doc — INCLUDING edits/comments/images + * that are not yet in the debounced REST snapshot; + * 3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and + * writes the result back — with no `await` between read and write so no + * remote update can interleave and clobber concurrent human edits; + * 4. waits for the server to acknowledge the write (unsyncedChanges -> 0) + * before resolving, so the next operation observes our change. + * + * `transform` receives the live ProseMirror doc and returns the NEW full + * ProseMirror doc to write, or `null` to abort with no write (a no-op). If + * `transform` throws, the error is propagated to the caller (not swallowed). + * + * Returns the doc that was written, or the live doc when the transform aborted. + */ +export async function mutatePageContent( + pageId: string, + collabToken: string, + baseUrl: string, + transform: (liveDoc: any) => any | null, +): Promise<any> { + return withPageLock(pageId, () => { + if (process.env.DEBUG) { + console.error(`Starting realtime content mutate for page ${pageId}`); + // Token prefix is sensitive; only log it under DEBUG. + console.error( + `Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`, + ); + } + + const ydoc = new Y.Doc(); + const wsUrl = buildCollabWsUrl(baseUrl); + if (process.env.DEBUG) console.error(`Connecting to WebSocket: ${wsUrl}`); + + return new Promise<any>((resolve, reject) => { + let provider: HocuspocusProvider | undefined; + let applied = false; // onSynced may fire again on reconnect — apply once. + let settled = false; + // Set true on disconnect/close so a reconnect-driven unsyncedChanges->0 + // cannot be mistaken for a successful persist of our write. + let connectionLost = false; + let connectTimer: ReturnType<typeof setTimeout> | undefined; + let persistTimer: ReturnType<typeof setTimeout> | undefined; + let unsyncedHandler: ((data: { number: number }) => void) | undefined; + + const cleanup = () => { + if (connectTimer) clearTimeout(connectTimer); + if (persistTimer) clearTimeout(persistTimer); + if (provider) { + if (unsyncedHandler) { + try { + provider.off("unsyncedChanges", unsyncedHandler); + } catch (err) {} + } + try { + provider.destroy(); + } catch (err) {} + } + }; + + const finish = (err: Error | null, value?: any) => { + if (settled) return; + settled = true; + cleanup(); + if (err) reject(err); + else resolve(value); + }; + + connectTimer = setTimeout(() => { + finish(new Error("Connection timeout to collaboration server")); + }, CONNECT_TIMEOUT_MS); + + // Resolve once the server has acknowledged our update. The provider + // increments unsyncedChanges when our local update is sent and + // decrements it when the server replies with a SyncStatus(applied=true); + // reaching 0 means the authoritative in-memory ydoc on the server now + // contains our write. + const waitForPersistence = () => { + if (settled) return; + // A missing provider is a failure, not a success: without it the write + // can never have been acknowledged. Only an actual unsyncedChanges===0 + // on a live provider counts as persisted. + if (!provider) { + finish(new Error("collab provider gone before persistence")); + return; + } + if (provider.unsyncedChanges === 0) { + finish(null, lastWrittenDoc); + return; + } + persistTimer = setTimeout(() => { + finish( + new Error( + "Timeout waiting for collaboration server to persist the update", + ), + ); + }, PERSIST_TIMEOUT_MS); + unsyncedHandler = (data: { number: number }) => { + // Only treat unsyncedChanges->0 as success when the connection is + // still up. A transient disconnect + reconnect handshake can drive + // the counter back to 0 without our write being re-transmitted; in + // that case let the disconnect/close error win instead. + if (data.number === 0 && !connectionLost) { + finish(null, lastWrittenDoc); + } + }; + provider.on("unsyncedChanges", unsyncedHandler); + }; + + let lastWrittenDoc: any; + + provider = new HocuspocusProvider({ + url: wsUrl, + name: `page.${pageId}`, + document: ydoc, + token: collabToken, + // @ts-ignore - Required for Node.js environment + WebSocketPolyfill: WebSocket, + onConnect: () => { + if (process.env.DEBUG) console.error("WS Connect"); + }, + // An unexpected disconnect/close while we are still waiting (during the + // connect-wait before onSynced, or during the persistence wait after the + // write) means the update will never be acknowledged — surface it now + // instead of hanging until the connect/persist timeout fires. `finish` + // is idempotent via the `settled` flag, so the onClose that our own + // cleanup()->provider.destroy() triggers (after settled=true is set) is + // a harmless no-op and cannot cause a double-resolve. + onDisconnect: () => { + if (process.env.DEBUG) console.error("WS Disconnect"); + // Mark BEFORE finish so the unsyncedChanges handler (if it races) + // sees the connection as lost and won't report a false success. + connectionLost = true; + finish( + new Error( + "Collaboration connection closed before the update was persisted/synced", + ), + ); + }, + onClose: () => { + if (process.env.DEBUG) console.error("WS Close"); + // Mark BEFORE finish so the unsyncedChanges handler (if it races) + // sees the connection as lost and won't report a false success. + connectionLost = true; + finish( + new Error( + "Collaboration connection closed before the update was persisted/synced", + ), + ); + }, + onSynced: () => { + if (applied || settled) return; + applied = true; + if (process.env.DEBUG) console.error("Connected and synced!"); + + // CRITICAL: everything between reading the live doc and writing it + // back must stay synchronous (no await). While the JS event loop is + // not yielded, no incoming remote update can interleave, so any + // already-synced concurrent edits are preserved in liveDoc. + let newDoc: any; + try { + let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); + if ( + !liveDoc || + typeof liveDoc !== "object" || + !Array.isArray(liveDoc.content) + ) { + liveDoc = { type: "doc", content: [] }; + } + + newDoc = transform(liveDoc); + + if (newDoc == null) { + // Transform aborted — write nothing, return the live doc. + lastWrittenDoc = liveDoc; + finish(null, liveDoc); + return; + } + + const tempDoc = buildYDoc(newDoc); + // Fetch the fragment immediately before the transact that mutates + // it, rather than reusing a handle grabbed across the transform. + const fragment = ydoc.getXmlFragment("default"); + ydoc.transact(() => { + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); + }); + } catch (e) { + // Includes errors thrown by transform (e.g. "afterText not found", + // "text not found"): propagate them verbatim to the caller. + finish(e instanceof Error ? e : new Error(String(e))); + return; + } + + lastWrittenDoc = newDoc; + if (process.env.DEBUG) + console.error("Content written, waiting for server to persist..."); + waitForPersistence(); + }, + onAuthenticationFailed: () => { + finish( + new Error("Authentication failed for collaboration connection"), + ); + }, + }); + }); + }); +} + +/** + * Replace the live content of a page over the collaboration websocket. + * Accepts a ready ProseMirror JSON document; the caller controls whether + * it was produced from markdown (ids regenerate) or edited in place + * (existing block ids preserved). + * + * This is an intentional full replace (used by update_page / update_page_json), + * but now runs under the per-page lock and waits for server persistence via + * mutatePageContent. + */ +export async function replacePageContent( + pageId: string, + prosemirrorDoc: any, + collabToken: string, + baseUrl: string, +): Promise<void> { + // Fail fast on a bad document instead of deferring the failure into the + // collaboration write (where TiptapTransformer.toYdoc(undefined) used to + // throw). The transform must return a valid ProseMirror doc. + if ( + prosemirrorDoc == null || + typeof prosemirrorDoc !== "object" || + prosemirrorDoc.type !== "doc" + ) { + throw new Error("replacePageContent: invalid ProseMirror document"); + } + await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc); +} + +/** + * Markdown update path (kept for backwards compatibility). + * NOTE: this re-imports the whole document — block ids are regenerated. + * Tables and :::callout::: blocks survive thanks to the full schema. + */ +export async function updatePageContentRealtime( + pageId: string, + markdownContent: string, + collabToken: string, + baseUrl: string, +): Promise<void> { + const tiptapJson = await markdownToProseMirror(markdownContent); + await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson); +} diff --git a/packages/docmost-client/src/lib/diff.ts b/packages/docmost-client/src/lib/diff.ts new file mode 100644 index 0000000..25b1914 --- /dev/null +++ b/packages/docmost-client/src/lib/diff.ts @@ -0,0 +1,319 @@ +/** + * Headless, Docmost-equivalent document diff. + * + * Docmost's history editor computes a change set with the exact pipeline below + * (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as + * editor decorations. This module runs the SAME computation but serializes the + * result to text + integrity counts instead of decorations, so a diff can be + * previewed without a browser. + * + * recreateTransform here comes from @fellow/prosemirror-recreate-transform, the + * maintained published fork of the MIT prosemirror-recreate-steps source that + * Docmost vendors in @docmost/editor-ext; it exposes the identical + * recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff }) + * signature. + * + * If recreateTransform / the changeset throws on a pathological document pair, + * we fall back to a coarse block-level text diff so the tool never hard-fails. + */ + +import { getSchema } from "@tiptap/core"; +import { Node } from "@tiptap/pm/model"; +import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset"; +import { recreateTransform } from "@fellow/prosemirror-recreate-transform"; +import { docmostExtensions } from "./docmost-schema.js"; + +/** A single inserted/deleted change with its containing-block context. */ +export interface DiffChange { + op: "insert" | "delete"; + /** Lead (plain) text of the block that contains the change, for context. */ + block: string; + /** The inserted or deleted text. */ + text: string; +} + +/** Integrity counts as [old, new] tuples; footnoteMarkers as [oldList, newList]. */ +export interface DiffIntegrity { + images: [number, number]; + links: [number, number]; + tables: [number, number]; + callouts: [number, number]; + footnoteMarkers: [number[], number[]]; +} + +export interface DiffResult { + summary: { inserted: number; deleted: number; blocksChanged: number }; + integrity: DiffIntegrity; + changes: DiffChange[]; + /** Human-readable unified-ish summary. */ + markdown: string; +} + +/** Build the schema once; it is pure and reused across calls. */ +const schema = getSchema(docmostExtensions); + +/** Recursively concatenate the plain text of a JSON node. */ +function plainText(node: any): string { + if (!node || typeof node !== "object") return ""; + let out = ""; + if (typeof node.text === "string") out += node.text; + if (Array.isArray(node.content)) { + for (const child of node.content) out += plainText(child); + } + return out; +} + +/** Count nodes in a JSON doc that satisfy `pred` (recursive). */ +function countNodes(doc: any, pred: (node: any) => boolean): number { + let n = 0; + const visit = (node: any): void => { + if (!node || typeof node !== "object") return; + if (pred(node)) n++; + if (Array.isArray(node.content)) for (const c of node.content) visit(c); + }; + visit(doc); + return n; +} + +/** + * Count UNIQUE links in a JSON doc by their `href`. A single link can be split + * across several adjacent text runs (e.g. a "link+bold" run followed by a "link" + * run); counting link-bearing runs would over-count it. Walking the tree and + * collecting hrefs into a Set keys each distinct link once. Link marks with a + * missing/empty href are bucketed under a single "" key so a malformed link is + * still counted as one. + */ +function countUniqueLinks(doc: any): number { + const hrefs = new Set<string>(); + const visit = (node: any): void => { + if (!node || typeof node !== "object") return; + if (node.type === "text" && Array.isArray(node.marks)) { + for (const m of node.marks) { + if (m && m.type === "link") { + const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : ""; + hrefs.add(href); + } + } + } + if (Array.isArray(node.content)) for (const c of node.content) visit(c); + }; + visit(doc); + return hrefs.size; +} + +/** + * Parse the ordered list of integers from `[N]` footnote markers found in the + * BODY only (every top-level block before the first "Примечания..." notes + * heading; if no such heading, the whole doc). Returned in reading order. + */ +function footnoteMarkers(doc: any, notesHeading: string): number[] { + const top: any[] = Array.isArray(doc?.content) ? doc.content : []; + const notesIdx = top.findIndex( + (n) => + n && + n.type === "heading" && + plainText(n).trim() === notesHeading, + ); + const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top; + const markers: number[] = []; + const re = /\[(\d+)\]/g; + for (const block of bodyBlocks) { + const text = plainText(block); + let m: RegExpExecArray | null; + re.lastIndex = 0; + while ((m = re.exec(text)) !== null) { + markers.push(Number(m[1])); + } + } + return markers; +} + +/** Compute the [old,new] integrity tuples for two JSON docs. */ +function computeIntegrity( + oldDoc: any, + newDoc: any, + notesHeading: string, +): DiffIntegrity { + const images: [number, number] = [ + countNodes(oldDoc, (n) => n.type === "image"), + countNodes(newDoc, (n) => n.type === "image"), + ]; + const links: [number, number] = [ + countUniqueLinks(oldDoc), + countUniqueLinks(newDoc), + ]; + const tables: [number, number] = [ + countNodes(oldDoc, (n) => n.type === "table"), + countNodes(newDoc, (n) => n.type === "table"), + ]; + const callouts: [number, number] = [ + countNodes(oldDoc, (n) => n.type === "callout"), + countNodes(newDoc, (n) => n.type === "callout"), + ]; + const fns: [number[], number[]] = [ + footnoteMarkers(oldDoc, notesHeading), + footnoteMarkers(newDoc, notesHeading), + ]; + return { images, links, tables, callouts, footnoteMarkers: fns }; +} + +/** + * Resolve the lead text of the top-level block in a ProseMirror Node that + * contains the given document position. Returns "" when out of range. + */ +function blockContextAt(node: Node, pos: number): string { + try { + const clamped = Math.max(0, Math.min(pos, node.content.size)); + const $pos = node.resolve(clamped); + // depth 1 is the top-level block in a doc node. + const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0); + const text = block.textContent || ""; + return text.length > 80 ? text.slice(0, 77) + "..." : text; + } catch { + return ""; + } +} + +/** Truncate a string for the markdown summary. */ +function truncate(s: string, n = 120): string { + return s.length > n ? s.slice(0, n - 3) + "..." : s; +} + +/** + * Coarse fallback: a block-by-block plain-text diff. Used only when the precise + * changeset pipeline throws, so the tool degrades gracefully instead of failing. + */ +function coarseDiff(oldDoc: any, newDoc: any): DiffChange[] { + const oldBlocks: any[] = Array.isArray(oldDoc?.content) ? oldDoc.content : []; + const newBlocks: any[] = Array.isArray(newDoc?.content) ? newDoc.content : []; + const oldTexts = oldBlocks.map(plainText); + const newTexts = newBlocks.map(plainText); + const oldSet = new Set(oldTexts); + const newSet = new Set(newTexts); + const changes: DiffChange[] = []; + for (const t of oldTexts) { + if (!newSet.has(t) && t.trim() !== "") { + changes.push({ op: "delete", block: truncate(t, 80), text: t }); + } + } + for (const t of newTexts) { + if (!oldSet.has(t) && t.trim() !== "") { + changes.push({ op: "insert", block: truncate(t, 80), text: t }); + } + } + return changes; +} + +/** Build the human-readable unified-ish markdown summary. */ +function renderMarkdown( + result: Omit<DiffResult, "markdown">, + fellBack: boolean, +): string { + const lines: string[] = []; + const { summary, integrity, changes } = result; + lines.push( + `# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`, + ); + if (fellBack) { + lines.push(""); + lines.push("> note: precise diff failed; coarse block-level diff shown."); + } + lines.push(""); + lines.push("## Integrity (old -> new)"); + lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`); + lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`); + lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`); + lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`); + lines.push( + `- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`, + ); + lines.push(""); + lines.push("## Changes"); + if (changes.length === 0) { + lines.push("(no textual changes)"); + } else { + for (const c of changes) { + const sign = c.op === "insert" ? "+" : "-"; + const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : ""; + lines.push(`${sign} ${truncate(c.text)}${ctx}`); + } + } + return lines.join("\n"); +} + +/** + * Diff two ProseMirror JSON documents the way Docmost's history editor does and + * serialize the result to text + integrity counts. + * + * @param oldDocJson the earlier document + * @param newDocJson the later document + * @param notesHeading heading delimiting body from notes for footnote counting + */ +export function diffDocs( + oldDocJson: any, + newDocJson: any, + notesHeading: string = "Примечания переводчика", +): DiffResult { + const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading); + + let changes: DiffChange[] = []; + let inserted = 0; + let deleted = 0; + let fellBack = false; + const changedBlocks = new Set<string>(); + + try { + const oldNode = Node.fromJSON(schema, oldDocJson); + const newNode = Node.fromJSON(schema, newDocJson); + const tr = recreateTransform(oldNode, newNode, { + complexSteps: false, + wordDiffs: true, + simplifyDiff: true, + }); + const changeSet = ChangeSet.create(oldNode).addSteps( + tr.doc, + tr.mapping.maps, + [], + ); + const simplified = simplifyChanges(changeSet.changes, newNode); + + for (const change of simplified) { + // Deleted text lives in the OLD doc coordinate range [fromA, toA). + if (change.toA > change.fromA) { + const text = oldNode.textBetween(change.fromA, change.toA, "\n", " "); + if (text.length > 0) { + deleted += text.length; + const block = blockContextAt(oldNode, change.fromA); + changes.push({ op: "delete", block, text }); + if (block) changedBlocks.add("d:" + block); + } + } + // Inserted text lives in the NEW doc coordinate range [fromB, toB). + if (change.toB > change.fromB) { + const text = newNode.textBetween(change.fromB, change.toB, "\n", " "); + if (text.length > 0) { + inserted += text.length; + const block = blockContextAt(newNode, change.fromB); + changes.push({ op: "insert", block, text }); + if (block) changedBlocks.add("i:" + block); + } + } + } + } catch { + // Pathological pair: degrade to a coarse block-level diff so we never throw. + fellBack = true; + changes = coarseDiff(oldDocJson, newDocJson); + for (const c of changes) { + if (c.op === "insert") inserted += c.text.length; + else deleted += c.text.length; + if (c.block) changedBlocks.add(c.op[0] + ":" + c.block); + } + } + + const partial: Omit<DiffResult, "markdown"> = { + summary: { inserted, deleted, blocksChanged: changedBlocks.size }, + integrity, + changes, + }; + return { ...partial, markdown: renderMarkdown(partial, fellBack) }; +} diff --git a/packages/docmost-client/src/lib/docmost-schema.ts b/packages/docmost-client/src/lib/docmost-schema.ts new file mode 100644 index 0000000..331d589 --- /dev/null +++ b/packages/docmost-client/src/lib/docmost-schema.ts @@ -0,0 +1,1065 @@ +/** + * Full TipTap extension set matching the real Docmost document schema. + * + * The default StarterKit-only schema silently destroys Docmost-specific + * nodes (callout, table) and drops attributes it does not know about + * (node ids, image sizing, link targets). Every code path that converts + * to or from ProseMirror JSON must use THIS set, otherwise a round-trip + * loses content. + */ +import StarterKit from "@tiptap/starter-kit"; +import Image from "@tiptap/extension-image"; +import TaskList from "@tiptap/extension-task-list"; +import TaskItem from "@tiptap/extension-task-item"; +import Highlight from "@tiptap/extension-highlight"; +import Subscript from "@tiptap/extension-subscript"; +import Superscript from "@tiptap/extension-superscript"; +import { Node, Extension, Mark, getStyleProperty } from "@tiptap/core"; + +/** Allowed Docmost callout types; anything else falls back to "info". */ +const CALLOUT_TYPES = ["info", "warning", "danger", "success"]; +export const clampCalloutType = (value: string | null | undefined): string => + value && CALLOUT_TYPES.includes(value.toLowerCase()) + ? value.toLowerCase() + : "info"; + +/** + * Allowlist guard for CSS color values imported from HTML. + * + * Docmost interpolates stored mark colors straight into an inline style + * attribute (e.g. style="background-color: ${color}" / "color: ${color}"). + * An unsanitized value such as `red; --x: url(...)` or `red"><script>` would + * let a crafted document break out of the style attribute. We therefore only + * accept a narrow, well-formed subset of CSS <color> syntax and reject (-> null) + * anything else. + * + * Accepted forms: + * - named colors: letters only, e.g. "red", "rebeccapurple" + * - hex: #rgb, #rgba, #rrggbb, #rrggbbaa + * - functional notation: rgb()/rgba()/hsl()/hsla() containing only + * digits, %, ., commas, spaces and slashes + */ +const SAFE_COLOR_RE = + /^(?:[a-zA-Z]+|#(?:[0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|(?:rgb|rgba|hsl|hsla)\([0-9.,%/\s]+\))$/; +export const sanitizeCssColor = ( + value: string | null | undefined, +): string | null => { + if (typeof value !== "string") return null; + const color = value.trim(); + return color && SAFE_COLOR_RE.test(color) ? color : null; +}; + +/** Docmost callout (info/warning/danger/success banner). */ +const Callout = Node.create({ + name: "callout", + group: "block", + content: "block+", + defining: true, + addAttributes() { + return { + // Read the type from data-callout-type so generateJSON(html) preserves + // it; without an explicit parseHTML every imported callout became "info". + type: { + default: "info", + parseHTML: (el: HTMLElement) => + clampCalloutType(el.getAttribute("data-callout-type")), + renderHTML: (attrs: Record<string, any>) => ({ + "data-callout-type": clampCalloutType(attrs.type), + }), + }, + icon: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-icon"), + renderHTML: (attrs: Record<string, any>) => + attrs.icon ? { "data-icon": attrs.icon } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="callout"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "callout", ...HTMLAttributes }, 0]; + }, +}); + +/** Minimal table family: enough for schema round-trips and HTML parsing. */ +const Table = Node.create({ + name: "table", + group: "block", + content: "tableRow+", + isolating: true, + parseHTML() { + return [{ tag: "table" }]; + }, + renderHTML() { + return ["table", ["tbody", 0]]; + }, +}); + +const TableRow = Node.create({ + name: "tableRow", + content: "(tableCell | tableHeader)*", + parseHTML() { + return [{ tag: "tr" }]; + }, + renderHTML() { + return ["tr", 0]; + }, +}); + +const cellAttributes = () => ({ + colspan: { default: 1 }, + rowspan: { default: 1 }, + colwidth: { default: null }, + backgroundColor: { default: null }, + backgroundColorName: { default: null }, + // Column alignment so GFM aligned tables (|:--|:-:|--:|) round-trip. + align: { + default: null, + parseHTML: (el: HTMLElement) => + el.getAttribute("align") || el.style.textAlign || null, + renderHTML: (attrs: Record<string, any>) => + attrs.align ? { align: attrs.align } : {}, + }, +}); + +const TableCell = Node.create({ + name: "tableCell", + content: "block+", + isolating: true, + addAttributes: cellAttributes, + parseHTML() { + return [{ tag: "td" }]; + }, + renderHTML() { + return ["td", 0]; + }, +}); + +const TableHeader = Node.create({ + name: "tableHeader", + content: "block+", + isolating: true, + addAttributes: cellAttributes, + parseHTML() { + return [{ tag: "th" }]; + }, + renderHTML() { + return ["th", 0]; + }, +}); + +/** + * Attributes Docmost stores on standard nodes that the stock extensions + * do not declare. Without these, Node.fromJSON silently drops them — + * including the block ids that heading anchors rely on. + */ +const DocmostAttributes = Extension.create({ + name: "docmostAttributes", + addGlobalAttributes() { + return [ + { + types: ["heading", "paragraph"], + attributes: { + id: { default: null }, + indent: { default: null }, + textAlign: { default: null }, + }, + }, + { + types: ["image"], + attributes: { + align: { default: null }, + attachmentId: { default: null }, + aspectRatio: { default: null }, + height: { default: null }, + placeholder: { default: null }, + size: { default: null }, + width: { default: null }, + }, + }, + { + types: ["orderedList"], + attributes: { type: { default: null } }, + }, + { + types: ["link"], + attributes: { internal: { default: null }, title: { default: null } }, + }, + ]; + }, +}); + +/** + * Docmost inline comment mark. Anchors a comment thread to a text range via + * `commentId`. Without it, any document containing comment highlights fails to + * round-trip through the schema ("There is no mark type comment in this schema"), + * which breaks update_page_json and edit_page_text on every commented page. + * Mirrors Docmost's @docmost/editor-ext comment mark (commentId / resolved). + */ +const Comment = Mark.create({ + name: "comment", + exitable: true, + inclusive: false, + addAttributes() { + return { + commentId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-comment-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.commentId ? { "data-comment-id": attrs.commentId } : {}, + }, + resolved: { + default: false, + parseHTML: (el: HTMLElement) => + el.getAttribute("data-resolved") === "true", + renderHTML: (attrs: Record<string, any>) => + attrs.resolved ? { "data-resolved": "true" } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: "span[data-comment-id]" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["span", { class: "comment-mark", ...HTMLAttributes }, 0]; + }, +}); + +/** + * Text color mark. The markdown-converter emits colored text as + * <span style="color: ...">, but with no mark parsing it back the color was + * silently dropped on import. This mirrors TipTap's @tiptap/extension-text-style + * `textStyle` mark (the name Docmost expects) and carries a single `color` + * attribute. The parsed color is passed through the allowlist guard so a crafted + * style cannot break out of the attribute when Docmost re-renders it. + */ +const TextStyle = Mark.create({ + name: "textStyle", + addAttributes() { + return { + color: { + default: null, + parseHTML: (el: HTMLElement) => + sanitizeCssColor( + el.style.color || el.getAttribute("data-color"), + ), + renderHTML: (attrs: Record<string, any>) => { + const color = sanitizeCssColor(attrs.color); + return color ? { style: `color: ${color}` } : {}; + }, + }, + }; + }, + parseHTML() { + return [ + { + tag: "span", + // Only claim a plain colored span. Do NOT match spans that are already a + // comment mark (data-comment-id) or a mention node (data-type=mention), + // otherwise importing such HTML would silently drop the comment/mention. + getAttrs: (el: HTMLElement) => + el.style.color && + !el.getAttribute("data-comment-id") && + el.getAttribute("data-type") !== "mention" + ? {} + : false, + }, + ]; + }, + renderHTML({ HTMLAttributes }) { + return ["span", HTMLAttributes, 0]; + }, +}); + +/** + * Passthrough definitions for the remaining Docmost-specific nodes. + * + * TiptapTransformer.toYdoc (the write path every mutation uses) throws + * "Unknown node type: X" for any node not registered here, so editing ANY + * page that contains one of these nodes used to fail outright. The read path + * (fromYdoc) accepts them, which is why they appear in real documents. + * + * Each node below mirrors the real @docmost/editor-ext definition's name, + * group, content, inline/atom flags and attribute keys (with the same data-* + * HTML mapping) so that a fromYdoc -> transform -> toYdoc round-trip both + * validates and preserves attributes faithfully. Interactive concerns + * (node views, commands, keyboard shortcuts, input rules, suggestion plugins) + * are intentionally omitted: the MCP server never renders these nodes, it only + * needs the schema to accept and carry them. The Callout node above is the + * pattern these follow. + */ + +/** Docmost @mention (user/page reference). Inline atom. */ +const Mention = Node.create({ + name: "mention", + group: "inline", + inline: true, + selectable: true, + atom: true, + draggable: true, + addAttributes() { + return { + id: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.id ? { "data-id": attrs.id } : {}, + }, + label: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-label"), + renderHTML: (attrs: Record<string, any>) => + attrs.label ? { "data-label": attrs.label } : {}, + }, + entityType: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-entity-type"), + renderHTML: (attrs: Record<string, any>) => + attrs.entityType ? { "data-entity-type": attrs.entityType } : {}, + }, + entityId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-entity-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.entityId ? { "data-entity-id": attrs.entityId } : {}, + }, + slugId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-slug-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.slugId ? { "data-slug-id": attrs.slugId } : {}, + }, + creatorId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-creator-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.creatorId ? { "data-creator-id": attrs.creatorId } : {}, + }, + anchorId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-anchor-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.anchorId ? { "data-anchor-id": attrs.anchorId } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'span[data-type="mention"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["span", { "data-type": "mention", ...HTMLAttributes }, 0]; + }, +}); + +/** Inline KaTeX expression. Carries the LaTeX source in `text`. */ +const MathInline = Node.create({ + name: "mathInline", + group: "inline", + inline: true, + atom: true, + addAttributes() { + return { + text: { default: "" }, + }; + }, + parseHTML() { + return [{ tag: 'span[data-type="mathInline"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return [ + "span", + { "data-type": "mathInline", "data-katex": "true" }, + `${HTMLAttributes.text ?? ""}`, + ]; + }, +}); + +/** Block KaTeX expression. Carries the LaTeX source in `text`. */ +const MathBlock = Node.create({ + name: "mathBlock", + group: "block", + atom: true, + isolating: true, + addAttributes() { + return { + text: { default: "" }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="mathBlock"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return [ + "div", + { "data-type": "mathBlock", "data-katex": "true" }, + `${HTMLAttributes.text ?? ""}`, + ]; + }, +}); + +/** Collapsible <details> wrapper: summary + content children. */ +const Details = Node.create({ + name: "details", + group: "block", + content: "detailsSummary detailsContent", + defining: true, + isolating: true, + addAttributes() { + return { + open: { + default: false, + parseHTML: (el: HTMLElement) => el.getAttribute("open"), + renderHTML: (attrs: Record<string, any>) => + attrs.open ? { open: "" } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: "details" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["details", { ...HTMLAttributes }, 0]; + }, +}); + +/** Clickable summary line of a <details> block. */ +const DetailsSummary = Node.create({ + name: "detailsSummary", + group: "block", + content: "inline*", + defining: true, + isolating: true, + selectable: false, + parseHTML() { + return [{ tag: "summary" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["summary", { "data-type": "detailsSummary", ...HTMLAttributes }, 0]; + }, +}); + +/** Body of a <details> block. Permissive content so fromYdoc output validates. */ +const DetailsContent = Node.create({ + name: "detailsContent", + group: "block", + // Docmost declares block* (an empty details body is valid); block+ would + // reject a collapsed/empty details on round-trip. + content: "block*", + defining: true, + selectable: false, + parseHTML() { + return [{ tag: 'div[data-type="detailsContent"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "detailsContent", ...HTMLAttributes }, 0]; + }, +}); + +/** File attachment card (non-image upload). Block atom. */ +const Attachment = Node.create({ + name: "attachment", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + url: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-url"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-attachment-url": attrs.url ?? "", + }), + }, + name: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-name"), + renderHTML: (attrs: Record<string, any>) => + attrs.name ? { "data-attachment-name": attrs.name } : {}, + }, + mime: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-mime"), + renderHTML: (attrs: Record<string, any>) => + attrs.mime ? { "data-attachment-mime": attrs.mime } : {}, + }, + size: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-size"), + renderHTML: (attrs: Record<string, any>) => + attrs.size != null ? { "data-attachment-size": attrs.size } : {}, + }, + attachmentId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.attachmentId + ? { "data-attachment-id": attrs.attachmentId } + : {}, + }, + // Docmost declares `placeholder` (a transient upload key, not rendered + // to HTML). Carry it so a round-trip never hits "Unsupported attribute". + placeholder: { default: null }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="attachment"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "attachment", ...HTMLAttributes }, 0]; + }, +}); + +/** Uploaded <video> player. Block atom. */ +const Video = Node.create({ + name: "video", + group: "block", + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + src: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("src"), + renderHTML: (attrs: Record<string, any>) => ({ src: attrs.src ?? "" }), + }, + alt: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("aria-label"), + renderHTML: (attrs: Record<string, any>) => + attrs.alt ? { "aria-label": attrs.alt } : {}, + }, + attachmentId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.attachmentId + ? { "data-attachment-id": attrs.attachmentId } + : {}, + }, + width: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("width"), + renderHTML: (attrs: Record<string, any>) => + attrs.width != null ? { width: attrs.width } : {}, + }, + height: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("height"), + renderHTML: (attrs: Record<string, any>) => + attrs.height != null ? { height: attrs.height } : {}, + }, + size: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-size"), + renderHTML: (attrs: Record<string, any>) => + attrs.size != null ? { "data-size": attrs.size } : {}, + }, + align: { + default: "center", + parseHTML: (el: HTMLElement) => el.getAttribute("data-align"), + renderHTML: (attrs: Record<string, any>) => + attrs.align ? { "data-align": attrs.align } : {}, + }, + aspectRatio: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-aspect-ratio"), + renderHTML: (attrs: Record<string, any>) => + attrs.aspectRatio != null + ? { "data-aspect-ratio": attrs.aspectRatio } + : {}, + }, + // Docmost declares `placeholder` (a transient upload key, not rendered + // to HTML). Carry it so a round-trip never hits "Unsupported attribute". + placeholder: { default: null }, + }; + }, + parseHTML() { + return [{ tag: "video" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["video", { controls: "true", ...HTMLAttributes }]; + }, +}); + +/** + * Defensive passthrough for a `youtube` node. Docmost itself has no dedicated + * youtube node (YouTube is handled via `embed`), but the converter read path + * references this type, so accept it as a generic block atom that preserves + * its src so legacy/external documents survive a round-trip. + */ +const Youtube = Node.create({ + name: "youtube", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + src: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("data-src"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-src": attrs.src ?? "", + }), + }, + width: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-width"), + renderHTML: (attrs: Record<string, any>) => + attrs.width != null ? { "data-width": attrs.width } : {}, + }, + height: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-height"), + renderHTML: (attrs: Record<string, any>) => + attrs.height != null ? { "data-height": attrs.height } : {}, + }, + align: { + default: "center", + parseHTML: (el: HTMLElement) => el.getAttribute("data-align"), + renderHTML: (attrs: Record<string, any>) => + attrs.align ? { "data-align": attrs.align } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="youtube"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "youtube", ...HTMLAttributes }, 0]; + }, +}); + +/** Generic embed (provider iframe). Block atom. */ +const Embed = Node.create({ + name: "embed", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + src: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("data-src"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-src": attrs.src ?? "", + }), + }, + provider: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("data-provider"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-provider": attrs.provider ?? "", + }), + }, + align: { + default: "center", + parseHTML: (el: HTMLElement) => el.getAttribute("data-align"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-align": attrs.align ?? "center", + }), + }, + width: { + default: 800, + parseHTML: (el: HTMLElement) => el.getAttribute("data-width"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-width": attrs.width, + }), + }, + height: { + default: 600, + parseHTML: (el: HTMLElement) => el.getAttribute("data-height"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-height": attrs.height, + }), + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="embed"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "embed", ...HTMLAttributes }, 0]; + }, +}); + +/** Shared attribute set for drawio/excalidraw diagram nodes. */ +const diagramAttributes = () => ({ + src: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("data-src"), + renderHTML: (attrs: Record<string, any>) => ({ + "data-src": attrs.src ?? "", + }), + }, + title: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-title"), + renderHTML: (attrs: Record<string, any>) => + attrs.title ? { "data-title": attrs.title } : {}, + }, + alt: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-alt"), + renderHTML: (attrs: Record<string, any>) => + attrs.alt ? { "data-alt": attrs.alt } : {}, + }, + width: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-width"), + renderHTML: (attrs: Record<string, any>) => + attrs.width != null ? { "data-width": attrs.width } : {}, + }, + height: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-height"), + renderHTML: (attrs: Record<string, any>) => + attrs.height != null ? { "data-height": attrs.height } : {}, + }, + size: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-size"), + renderHTML: (attrs: Record<string, any>) => + attrs.size != null ? { "data-size": attrs.size } : {}, + }, + aspectRatio: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-aspect-ratio"), + renderHTML: (attrs: Record<string, any>) => + attrs.aspectRatio != null + ? { "data-aspect-ratio": attrs.aspectRatio } + : {}, + }, + align: { + default: "center", + parseHTML: (el: HTMLElement) => el.getAttribute("data-align"), + renderHTML: (attrs: Record<string, any>) => + attrs.align ? { "data-align": attrs.align } : {}, + }, + attachmentId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.attachmentId ? { "data-attachment-id": attrs.attachmentId } : {}, + }, +}); + +/** draw.io diagram. Block atom (image-backed). */ +const Drawio = Node.create({ + name: "drawio", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes: diagramAttributes, + parseHTML() { + return [{ tag: 'div[data-type="drawio"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "drawio", ...HTMLAttributes }, 0]; + }, +}); + +/** Excalidraw diagram. Block atom (image-backed). */ +const Excalidraw = Node.create({ + name: "excalidraw", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes: diagramAttributes, + parseHTML() { + return [{ tag: 'div[data-type="excalidraw"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "excalidraw", ...HTMLAttributes }, 0]; + }, +}); + +/** Multi-column layout container holding one or more `column` children. */ +const Columns = Node.create({ + name: "columns", + group: "block", + content: "column+", + defining: true, + isolating: true, + addAttributes() { + return { + layout: { + default: "two_equal", + parseHTML: (el: HTMLElement) => el.getAttribute("data-layout"), + renderHTML: (attrs: Record<string, any>) => + attrs.layout ? { "data-layout": attrs.layout } : {}, + }, + widthMode: { + default: "normal", + parseHTML: (el: HTMLElement) => + el.getAttribute("data-width-mode") || "normal", + renderHTML: (attrs: Record<string, any>) => + attrs.widthMode && attrs.widthMode !== "normal" + ? { "data-width-mode": attrs.widthMode } + : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="columns"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "columns", ...HTMLAttributes }, 0]; + }, +}); + +/** Single column within a `columns` layout. */ +const Column = Node.create({ + name: "column", + group: "block", + content: "block+", + defining: true, + isolating: true, + selectable: false, + addAttributes() { + return { + width: { + default: null, + parseHTML: (el: HTMLElement) => { + const value = el.getAttribute("data-width"); + return value ? parseFloat(value) : null; + }, + renderHTML: (attrs: Record<string, any>) => + attrs.width ? { "data-width": attrs.width } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="column"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "column", ...HTMLAttributes }, 0]; + }, +}); + +/** + * Subpages listing block (auto-generated index of child pages). Docmost + * declares no attributes; the markdown-converter has a `case "subpages"`, so + * the read path can emit it and toYdoc must accept it. Block atom. + */ +const Subpages = Node.create({ + name: "subpages", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + parseHTML() { + return [{ tag: 'div[data-type="subpages"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "subpages", ...HTMLAttributes }, 0]; + }, +}); + +/** Uploaded <audio> player. Block atom. Mirrors Docmost audio attrs. */ +const Audio = Node.create({ + name: "audio", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + src: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("src"), + renderHTML: (attrs: Record<string, any>) => ({ src: attrs.src ?? "" }), + }, + attachmentId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.attachmentId + ? { "data-attachment-id": attrs.attachmentId } + : {}, + }, + size: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-size"), + renderHTML: (attrs: Record<string, any>) => + attrs.size != null ? { "data-size": attrs.size } : {}, + }, + // Transient upload key Docmost declares with rendered:false; carried so + // a round-trip never hits "Unsupported attribute". + placeholder: { default: null }, + }; + }, + parseHTML() { + return [{ tag: "audio" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["audio", { controls: "true", ...HTMLAttributes }]; + }, +}); + +/** Embedded PDF viewer. Block atom. Mirrors Docmost pdf attrs. */ +const Pdf = Node.create({ + name: "pdf", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + src: { + default: "", + parseHTML: (el: HTMLElement) => el.getAttribute("src"), + renderHTML: (attrs: Record<string, any>) => ({ src: attrs.src ?? "" }), + }, + name: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-name"), + renderHTML: (attrs: Record<string, any>) => + attrs.name ? { "data-name": attrs.name } : {}, + }, + attachmentId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-id"), + renderHTML: (attrs: Record<string, any>) => + attrs.attachmentId + ? { "data-attachment-id": attrs.attachmentId } + : {}, + }, + size: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-size"), + renderHTML: (attrs: Record<string, any>) => + attrs.size != null ? { "data-size": attrs.size } : {}, + }, + width: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("width"), + renderHTML: (attrs: Record<string, any>) => + attrs.width != null ? { width: attrs.width } : {}, + }, + height: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("height"), + renderHTML: (attrs: Record<string, any>) => + attrs.height != null ? { height: attrs.height } : {}, + }, + // Transient upload key Docmost declares with rendered:false; carried so + // a round-trip never hits "Unsupported attribute". + placeholder: { default: null }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="pdf"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "pdf", ...HTMLAttributes }, 0]; + }, +}); + +/** Page break (print/export divider). Block atom; Docmost declares no attrs. */ +const PageBreak = Node.create({ + name: "pageBreak", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + parseHTML() { + return [{ tag: 'div[data-type="pageBreak"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "pageBreak", ...HTMLAttributes }]; + }, +}); + +/** + * Full extension list. Image is block-level (matches Docmost); the + * ProseMirror DOM parser hoists <img> found inside <p> automatically. + * StarterKit v3 already bundles the link extension, configured here. + */ +export const docmostExtensions = [ + StarterKit.configure({ + codeBlock: {}, + heading: {}, + link: { openOnClick: false }, + }), + Image.configure({ inline: false }), + TaskList, + TaskItem.configure({ nested: true }), + // Highlight stores its color unescaped and Docmost interpolates it into + // style="background-color: ${color}". Wrap the color attribute's parseHTML + // with the same allowlist guard used by textStyle so a crafted import color + // cannot break out of the style attribute. Multicolor behavior is preserved. + Highlight.extend({ + addAttributes() { + const parent = this.parent?.() ?? {}; + return { + ...parent, + color: { + ...(parent as Record<string, any>).color, + parseHTML: (el: HTMLElement) => + sanitizeCssColor( + el.getAttribute("data-color") || + getStyleProperty(el, "background-color") || + el.style.backgroundColor, + ), + }, + }; + }, + }).configure({ multicolor: true }), + Subscript, + Superscript, + // StarterKit does not provide a textStyle mark, so register ours; without it + // generateJSON drops <span style="color: ...">, defeating the color import. + TextStyle, + Comment, + Callout, + Table, + TableRow, + TableCell, + TableHeader, + Mention, + MathInline, + MathBlock, + Details, + DetailsSummary, + DetailsContent, + Attachment, + Video, + Youtube, + Embed, + Drawio, + Excalidraw, + Columns, + Column, + Subpages, + Audio, + Pdf, + PageBreak, + DocmostAttributes, +]; diff --git a/packages/docmost-client/src/lib/filters.ts b/packages/docmost-client/src/lib/filters.ts new file mode 100644 index 0000000..f1104d5 --- /dev/null +++ b/packages/docmost-client/src/lib/filters.ts @@ -0,0 +1,93 @@ +/** + * Filter functions to extract only relevant information from API responses + * for better agent consumption + */ + +export function filterWorkspace(data: any) { + return { + id: data.id, + name: data.name, + description: data.description, + defaultSpaceId: data.defaultSpaceId, + createdAt: data.createdAt, + updatedAt: data.updatedAt, + deletedAt: data.deletedAt, + }; +} + +export function filterSpace(space: any) { + return { + id: space.id, + name: space.name, + description: space.description, + slug: space.slug, + visibility: space.visibility, + createdAt: space.createdAt, + updatedAt: space.updatedAt, + deletedAt: space.deletedAt, + }; +} + +export function filterGroup(group: any) { + return { + id: group.id, + name: group.name, + description: group.description, + workspaceId: group.workspaceId, + createdAt: group.createdAt, + updatedAt: group.updatedAt, + deletedAt: group.deletedAt, + }; +} + +export function filterPage(page: any, content?: string, subpages?: any[]) { + return { + id: page.id, + slugId: page.slugId, + title: page.title, + parentPageId: page.parentPageId, + spaceId: page.spaceId, + isLocked: page.isLocked, + createdAt: page.createdAt, + updatedAt: page.updatedAt, + deletedAt: page.deletedAt, + // Include converted markdown content if valid string (even empty) + ...(typeof content === "string" && { content }), + // Include subpages if provided + ...(subpages && + subpages.length > 0 && { + subpages: subpages.map((p) => ({ id: p.id, title: p.title })), + }), + }; +} + +export function filterComment(comment: any, markdownContent?: string) { + return { + id: comment.id, + pageId: comment.pageId, + content: markdownContent ?? comment.content, + selection: comment.selection || null, + type: comment.type || "page", + parentCommentId: comment.parentCommentId || null, + creatorId: comment.creatorId, + creatorName: comment.creator?.name || null, + createdAt: comment.createdAt, + editedAt: comment.editedAt || null, + resolvedAt: comment.resolvedAt || null, + resolvedById: comment.resolvedById || null, + }; +} + +export function filterSearchResult(result: any) { + return { + id: result.id, + title: result.title, + parentPageId: result.parentPageId, + createdAt: result.createdAt, + updatedAt: result.updatedAt, + rank: result.rank, + highlight: result.highlight, + spaceId: result.space?.id, + spaceName: result.space?.name, + }; +} diff --git a/packages/docmost-client/src/lib/json-edit.ts b/packages/docmost-client/src/lib/json-edit.ts new file mode 100644 index 0000000..d452cd9 --- /dev/null +++ b/packages/docmost-client/src/lib/json-edit.ts @@ -0,0 +1,127 @@ +/** + * Surgical text edits on a ProseMirror document without re-importing it. + * + * Each edit replaces an exact substring inside individual text nodes, + * preserving every node id, mark and attribute around it. This is the + * safe alternative to a full markdown re-import for small wording fixes. + */ + +export interface TextEdit { + find: string; + replace: string; + /** Replace every occurrence; otherwise the edit must match exactly once. */ + replaceAll?: boolean; +} + +export interface TextEditResult { + find: string; + replacements: number; +} + +/** Collect plain text of the whole document (for span-detection hints). */ +function collectText(node: any): string { + let out = ""; + if (node.type === "text") out += node.text || ""; + for (const child of node.content || []) out += collectText(child); + return out; +} + +function countOccurrences(haystack: string, needle: string): number { + if (!needle) return 0; + let count = 0; + let idx = haystack.indexOf(needle); + while (idx !== -1) { + count++; + idx = haystack.indexOf(needle, idx + needle.length); + } + return count; +} + +/** + * Apply text edits to a ProseMirror doc (mutates a deep copy, returns it). + * Throws a descriptive error when an edit matches zero times or matches + * multiple times without replaceAll — so the caller can refine `find`. + */ +export function applyTextEdits( + doc: any, + edits: TextEdit[], +): { doc: any; results: TextEditResult[] } { + const copy = JSON.parse(JSON.stringify(doc)); + const results: TextEditResult[] = []; + + for (const edit of edits) { + if (!edit.find) throw new Error("edit.find must be a non-empty string"); + + // Count matches inside individual text nodes first. + let nodeMatches = 0; + (function count(node: any) { + if (node.type === "text" && node.text) { + nodeMatches += countOccurrences(node.text, edit.find); + } + for (const child of node.content || []) count(child); + })(copy); + + if (nodeMatches === 0) { + // Distinguish "text not present" from "text spans formatting runs". + const fullText = collectText(copy); + if (fullText.includes(edit.find)) { + throw new Error( + `Edit "${truncate(edit.find)}": the text exists in the document but spans ` + + `multiple formatting runs (bold/link/italic boundaries). Use a shorter ` + + `fragment that stays inside one run, or use update_page_json for ` + + `structural changes.`, + ); + } + throw new Error( + `Edit "${truncate(edit.find)}": text not found in the document.`, + ); + } + + if (nodeMatches > 1 && !edit.replaceAll) { + throw new Error( + `Edit "${truncate(edit.find)}": matches ${nodeMatches} times. ` + + `Provide a longer, unique fragment or set replaceAll: true.`, + ); + } + + // Perform the replacement(s). + let done = 0; + (function replace(node: any) { + if (node.type === "text" && node.text && node.text.includes(edit.find)) { + if (edit.replaceAll) { + done += countOccurrences(node.text, edit.find); + node.text = node.text.split(edit.find).join(edit.replace); + } else if (done === 0) { + // Avoid String.replace: its second arg treats $&, $1, $`, $', $$ as + // special patterns, expanding them instead of inserting literally. + // Splice the first occurrence by index to keep the replacement literal. + const idx = node.text.indexOf(edit.find); + node.text = + node.text.slice(0, idx) + + edit.replace + + node.text.slice(idx + edit.find.length); + done = 1; + } + } + for (const child of node.content || []) replace(child); + })(copy); + + results.push({ find: edit.find, replacements: done }); + } + + // Drop text nodes that became empty (ProseMirror forbids empty text nodes). + (function prune(node: any) { + if (Array.isArray(node.content)) { + node.content = node.content.filter( + (child: any) => !(child.type === "text" && child.text === ""), + ); + for (const child of node.content) prune(child); + } + })(copy); + + return { doc: copy, results }; +} + +function truncate(s: string): string { + return s.length > 60 ? s.slice(0, 57) + "..." : s; +} diff --git a/packages/docmost-client/src/lib/markdown-converter.ts b/packages/docmost-client/src/lib/markdown-converter.ts new file mode 100644 index 0000000..cbaa704 --- /dev/null +++ b/packages/docmost-client/src/lib/markdown-converter.ts @@ -0,0 +1,861 @@ +/** + * Convert ProseMirror/TipTap JSON content to Markdown + * Supports all Docmost-specific node types and extensions + */ +export function convertProseMirrorToMarkdown(content: any): string { + if (!content || !content.content) return ""; + + // Escape a value interpolated into an HTML double-quoted attribute value + // (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the + // ATTRIBUTE context only the quote that delimits the value and the ampersand + // that starts an entity are special, so we escape ONLY & " (and ' for safety + // when single-quoted delimiters are used). We deliberately do NOT escape < or + // >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode + // </> back inside attribute values, so escaping them would corrupt the + // stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on + // every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & " + // keeps the value inert against attribute-injection while staying idempotent. + // NOTE: escape ONLY & and " here. The value is always wrapped in double + // quotes, so " is the only delimiter; ' is NOT special in a double-quoted + // value, and parse5 does not decode ' back inside attribute values, so + // escaping ' would (like < >) corrupt the value and accumulate & on every + // round-trip. Escaping & and " is idempotent (parse5 decodes them back). + const escapeAttr = (value: unknown): string => + String(value) + .replace(/&/g, "&") + .replace(/"/g, """); + + // Escape a value placed as HTML element TEXT content (between tags), where + // <, >, and & are all significant. Used for text rendered inside raw-HTML + // blocks (table cells / columns) so stored characters cannot inject markup. + const escapeHtmlText = (value: unknown): string => + String(value) + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">"); + + // Percent-encode characters that would break out of a markdown URL target + // (...) — whitespace/newlines and parentheses — so a stored src stays a + // single inert token (used for image/video/youtube srcs). + const encodeMdUrl = (value: unknown): string => + String(value || "") + .replace(/\s/g, (c: string) => (c === " " ? "%20" : encodeURIComponent(c))) + .replace(/\(/g, "%28") + .replace(/\)/g, "%29"); + + const processNode = (node: any): string => { + const type = node.type; + const nodeContent = node.content || []; + + switch (type) { + case "doc": + return nodeContent.map(processNode).join("\n\n"); + + case "paragraph": + const text = nodeContent.map(processNode).join(""); + const align = node.attrs?.textAlign; + if (align && align !== "left") { + return `<div align="${escapeAttr(align)}">${text}</div>`; + } + return text || ""; + + case "heading": + const level = node.attrs?.level || 1; + const headingText = nodeContent.map(processNode).join(""); + return "#".repeat(level) + " " + headingText; + + case "text": + let textContent = node.text || ""; + // Apply marks (bold, italic, code, etc.) + if (node.marks) { + // Markdown code spans (`...`) cannot carry inner formatting, so when a + // run has the `code` mark alongside ANY other mark, backtick syntax + // would leak literal ** / []() into the code text. In that case emit + // nested HTML (<code> innermost, the other marks wrapping it as HTML) + // so the output is at least well-formed and re-parseable. + // + // NOTE: this does NOT round-trip both marks. The schema's `code` mark + // has `excludes: "_"` (it excludes every other mark), so on import the + // co-occurring mark is always dropped — the run comes back as `code` + // only. We keep the emission simple and accept that the other mark is + // lost; preserving both is impossible while `code` excludes them. + // Only use the backtick form when `code` is the sole mark. + const markTypes = node.marks.map((m: any) => m.type); + const hasCode = markTypes.includes("code"); + const codeCombined = hasCode && markTypes.length > 1; + for (const mark of node.marks) { + switch (mark.type) { + case "bold": + textContent = codeCombined + ? `<strong>${textContent}</strong>` + : `**${textContent}**`; + break; + case "italic": + textContent = codeCombined + ? `<em>${textContent}</em>` + : `*${textContent}*`; + break; + case "code": + // When combined with another mark, wrap as <code> so the + // surrounding HTML marks can nest around it; otherwise use the + // plain backtick span. + textContent = codeCombined + ? `<code>${textContent}</code>` + : `\`${textContent}\``; + break; + case "link": { + const href = mark.attrs?.href || ""; + const title = mark.attrs?.title; + if (codeCombined) { + // Emit an HTML anchor so it can wrap the nested <code>. + const safeHref = escapeAttr(href); + if (title) { + textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`; + } else { + textContent = `<a href="${safeHref}">${textContent}</a>`; + } + } else if (title) { + // Emit the optional markdown link title; escape an embedded + // double-quote so it cannot terminate the title string early. + const safeTitle = String(title).replace(/"/g, '\\"'); + textContent = `[${textContent}](${href} "${safeTitle}")`; + } else { + textContent = `[${textContent}](${href})`; + } + break; + } + case "strike": + textContent = codeCombined + ? `<s>${textContent}</s>` + : `~~${textContent}~~`; + break; + case "underline": + textContent = `<u>${textContent}</u>`; + break; + case "subscript": + textContent = `<sub>${textContent}</sub>`; + break; + case "superscript": + textContent = `<sup>${textContent}</sup>`; + break; + case "highlight": { + // Preserve a null/empty color as a plain highlight (a bare + // <mark> with no background-color); only emit the style when a + // color is actually set, so a plain highlight is not forced to + // yellow on export. + const color = mark.attrs?.color; + textContent = color + ? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>` + : `<mark>${textContent}</mark>`; + break; + } + case "textStyle": + if (mark.attrs?.color) { + textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`; + } + break; + case "comment": { + // Emit the inline comment anchor so highlights round-trip. The + // schema's Comment mark parses span[data-comment-id] (attrs + // commentId/resolved). + const cid = mark.attrs?.commentId; + if (cid) { + const resolvedAttr = mark.attrs?.resolved + ? ` data-resolved="true"` + : ""; + textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`; + } + break; + } + } + } + } + return textContent; + + case "codeBlock": + const language = node.attrs?.language || ""; + // Strip ALL trailing newlines so the export is idempotent: marked + // re-adds exactly one trailing "\n" on import, so trimming only one + // here would let the text grow by "\n" on each round-trip. Removing + // every trailing newline makes repeated cycles stable. + const code = nodeContent + .map(processNode) + .join("") + .replace(/\n+$/, ""); + return "```" + language + "\n" + code + "\n```"; + + case "bulletList": + return nodeContent + .map((item: any) => processListItem(item, "-")) + .join("\n"); + + case "orderedList": + return nodeContent + .map((item: any, index: number) => + processListItem(item, `${index + 1}.`), + ) + .join("\n"); + + case "taskList": + return nodeContent.map((item: any) => processTaskItem(item)).join("\n"); + + case "taskItem": + // Delegate to the same helper used by taskList so multi-block and + // nested task items render and indent consistently. + return processTaskItem(node); + + case "listItem": + return nodeContent.map(processNode).join("\n"); + + case "blockquote": + // Prefix EVERY line of EVERY child with "> " and separate block-level + // children with a blank ">" line so code blocks / multi-paragraph + // quotes round-trip correctly. + return nodeContent + .map((n: any) => + processNode(n) + .split("\n") + .map((line: string) => (line.length ? `> ${line}` : ">")) + .join("\n"), + ) + .join("\n>\n"); + + case "horizontalRule": + return "---"; + + case "hardBreak": + // Two trailing spaces before the newline encode a markdown hard break; + // a bare "\n" would be reimported as a soft break and lost. + return " \n"; + + case "image": + const imgAlt = node.attrs?.alt || ""; + // Neutralize characters that could break out of the markdown image + // URL: spaces/newlines and parentheses would terminate the (...) target + // and let a stored src inject following markdown/HTML. Percent-encode + // them so the URL stays a single inert token. + const imgSrc = encodeMdUrl(node.attrs?.src); + // No "caption" attribute exists in the Docmost image schema, so we do + // not emit one (the previous caption branch was dead). + return `![${imgAlt}](${imgSrc})`; + + case "video": { + // Emit the schema-matching <video> element so generateJSON rebuilds the + // node with its attrs intact. The schema's parseHTML reads src/aria-label + // from the standard attributes and the remaining attrs from data-*. + const attrs = node.attrs || {}; + const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`]; + if (attrs.alt) parts.push(`aria-label="${escapeAttr(attrs.alt)}"`); + if (attrs.attachmentId) + parts.push( + `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`, + ); + if (attrs.width != null) + parts.push(`width="${escapeAttr(attrs.width)}"`); + if (attrs.height != null) + parts.push(`height="${escapeAttr(attrs.height)}"`); + if (attrs.size != null) + parts.push(`data-size="${escapeAttr(attrs.size)}"`); + if (attrs.align) + parts.push(`data-align="${escapeAttr(attrs.align)}"`); + if (attrs.aspectRatio != null) + parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`); + // Wrap in a block <div> so marked treats it as a block (a bare <video> + // is inline-level HTML and marked wraps it in <p>, leaving a spurious + // empty paragraph beside the hoisted block atom). The wrapper has no + // data-type, so the schema parser ignores it and just hoists the video. + return `<div><video ${parts.join(" ")}></video></div>`; + } + + case "youtube": { + // Emit the schema-matching div[data-type="youtube"]; the schema reads + // src from data-src and width/height/align from data-* attributes. + const attrs = node.attrs || {}; + const parts: string[] = [ + `data-type="youtube"`, + `data-src="${escapeAttr(attrs.src ?? "")}"`, + ]; + if (attrs.width != null) + parts.push(`data-width="${escapeAttr(attrs.width)}"`); + if (attrs.height != null) + parts.push(`data-height="${escapeAttr(attrs.height)}"`); + if (attrs.align) + parts.push(`data-align="${escapeAttr(attrs.align)}"`); + return `<div ${parts.join(" ")}></div>`; + } + + case "table": { + // A GFM pipe table cannot represent merged cells. If ANY cell carries + // colspan>1 or rowspan>1, a pipe table would corrupt the grid on + // re-import, so emit the WHOLE table as raw HTML <table> instead: the + // schema's table family parseHTML (tag table/tr/td/th, with colspan/ + // rowspan read from the same-named HTML attrs and align via parseHTML) + // round-trips it faithfully. Otherwise keep the lighter GFM pipe table. + const tableRows: any[] = nodeContent; + if (tableRows.length === 0) return ""; + const hasSpan = tableRows.some((row: any) => + (row.content || []).some( + (cell: any) => + (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1, + ), + ); + + if (hasSpan) { + // Render each cell's block children to HTML (marked does NOT parse + // markdown inside a raw HTML block, so emitting markdown here would + // leak literal ** / `` into the cell). blockToHtml mirrors the schema + // HTML so inner formatting re-parses into the right marks/nodes. + const renderHtmlCell = (cell: any): string => { + const tag = cell.type === "tableHeader" ? "th" : "td"; + const a = cell.attrs || {}; + const cellParts: string[] = []; + if ((a.colspan ?? 1) > 1) + cellParts.push(`colspan="${escapeAttr(a.colspan)}"`); + if ((a.rowspan ?? 1) > 1) + cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`); + if (a.align) cellParts.push(`align="${escapeAttr(a.align)}"`); + const open = cellParts.length + ? `<${tag} ${cellParts.join(" ")}>` + : `<${tag}>`; + const inner = (cell.content || []) + .map((block: any) => blockToHtml(block)) + .join(""); + return `${open}${inner}</${tag}>`; + }; + const htmlRows = tableRows + .map( + (row: any) => + `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`, + ) + .join(""); + return `<table><tbody>${htmlRows}</tbody></table>`; + } + + // No merged cells: emit a GFM table (header row + separator) so the + // markdown can be parsed back into a table on re-import. + const rows = tableRows.map(processNode); + const headerCells = tableRows[0]?.content || []; + const columns = headerCells.length || 1; + // Derive alignment markers (:--, :-:, --:) from each header cell. + const markers = Array.from({ length: columns }, (_, i) => { + const align = headerCells[i]?.attrs?.align; + switch (align) { + case "left": + return ":--"; + case "center": + return ":-:"; + case "right": + return "--:"; + default: + return "---"; + } + }); + const separator = "| " + markers.join(" | ") + " |"; + return [rows[0], separator, ...rows.slice(1)].join("\n"); + } + + case "tableRow": + return "| " + nodeContent.map(processNode).join(" | ") + " |"; + + case "tableCell": + case "tableHeader": { + // Join multiple block children with a space (not "") so adjacent blocks + // like a paragraph followed by a list don't collide into "line1- a". + // Then collapse newlines and escape pipes so a cell containing "|" or a + // line break cannot corrupt the surrounding GFM row. + return nodeContent + .map(processNode) + .join(" ") + .replace(/\r?\n/g, " ") + .replace(/\|/g, "\\|"); + } + + case "callout": + const calloutType = node.attrs?.type || "info"; + const calloutContent = nodeContent.map(processNode).join("\n"); + return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`; + + case "details": + return nodeContent.map(processNode).join("\n"); + + case "detailsSummary": + const summaryText = nodeContent.map(processNode).join(""); + return `<details>\n<summary>${summaryText}</summary>\n`; + + case "detailsContent": + const detailsText = nodeContent.map(processNode).join("\n"); + return `${detailsText}\n</details>`; + + case "mathInline": { + // The schema's `text` attribute has no parseHTML, so TipTap's default + // parser reads it from the `text` HTML attribute (NOT the element's text + // content). Emit span[data-type="mathInline"] carrying the LaTeX in a + // `text="..."` attribute so it round-trips. marked cannot parse $...$ + // back, so the previous form was lossy. + const inlineMath = node.attrs?.text || ""; + return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`; + } + + case "mathBlock": { + // Same as mathInline: the LaTeX must ride in the `text` HTML attribute + // for the schema's default parser to recover it. + const blockMath = node.attrs?.text || ""; + return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`; + } + + case "mention": { + // Emit span[data-type="mention"] with the schema's data-* attributes so + // generateJSON rebuilds the mention node instead of leaving "@label" + // plain text that cannot re-parse. + const attrs = node.attrs || {}; + const parts: string[] = [`data-type="mention"`]; + if (attrs.id) parts.push(`data-id="${escapeAttr(attrs.id)}"`); + if (attrs.label) + parts.push(`data-label="${escapeAttr(attrs.label)}"`); + if (attrs.entityType) + parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`); + if (attrs.entityId) + parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`); + if (attrs.slugId) + parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`); + if (attrs.creatorId) + parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`); + if (attrs.anchorId) + parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`); + // Keep the label as visible text content too; the schema reads attrs + // from data-*, so the inner text is purely cosmetic and harmless. + const mentionLabel = attrs.label || attrs.id || ""; + // The label is visible element TEXT content here (the data-* attrs above + // carry the real values), so escape it for the text context, not attrs. + return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`; + } + + case "attachment": { + // BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but + // the schema stores name/url (plus mime/size/attachmentId). Emit the + // schema-matching div[data-type="attachment"] with data-attachment-* + // attrs so the node round-trips instead of degrading to a markdown link. + const attrs = node.attrs || {}; + const parts: string[] = [ + `data-type="attachment"`, + `data-attachment-url="${escapeAttr(attrs.url ?? "")}"`, + ]; + if (attrs.name) + parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`); + if (attrs.mime) + parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`); + if (attrs.size != null) + parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`); + if (attrs.attachmentId) + parts.push( + `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`, + ); + return `<div ${parts.join(" ")}></div>`; + } + + case "drawio": + case "excalidraw": { + // Emit the schema-matching div[data-type=...] carrying the diagram's + // attrs as data-* (the schema's diagramAttributes reads src/title/alt/ + // width/height/size/aspectRatio/align/attachmentId from data-*), so the + // diagram round-trips instead of degrading to a lossy placeholder. + const attrs = node.attrs || {}; + const parts: string[] = [ + `data-type="${type}"`, + `data-src="${escapeAttr(attrs.src ?? "")}"`, + ]; + if (attrs.title != null) + parts.push(`data-title="${escapeAttr(attrs.title)}"`); + if (attrs.alt != null) parts.push(`data-alt="${escapeAttr(attrs.alt)}"`); + if (attrs.width != null) + parts.push(`data-width="${escapeAttr(attrs.width)}"`); + if (attrs.height != null) + parts.push(`data-height="${escapeAttr(attrs.height)}"`); + if (attrs.size != null) + parts.push(`data-size="${escapeAttr(attrs.size)}"`); + if (attrs.aspectRatio != null) + parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`); + if (attrs.align) + parts.push(`data-align="${escapeAttr(attrs.align)}"`); + if (attrs.attachmentId) + parts.push( + `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`, + ); + return `<div ${parts.join(" ")}></div>`; + } + + case "embed": { + // Emit the schema-matching div[data-type="embed"]; the schema reads + // src/provider/align/width/height from data-* attributes so the node + // (and its provider iframe info) survives the round-trip. + const attrs = node.attrs || {}; + const parts: string[] = [ + `data-type="embed"`, + `data-src="${escapeAttr(attrs.src ?? "")}"`, + `data-provider="${escapeAttr(attrs.provider ?? "")}"`, + ]; + if (attrs.align) + parts.push(`data-align="${escapeAttr(attrs.align)}"`); + if (attrs.width != null) + parts.push(`data-width="${escapeAttr(attrs.width)}"`); + if (attrs.height != null) + parts.push(`data-height="${escapeAttr(attrs.height)}"`); + return `<div ${parts.join(" ")}></div>`; + } + + case "audio": { + // Emit the schema-matching <audio> element (was emitting nothing). The + // schema reads src from src and attachmentId/size from data-*. + const attrs = node.attrs || {}; + const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`]; + if (attrs.attachmentId) + parts.push( + `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`, + ); + if (attrs.size != null) + parts.push(`data-size="${escapeAttr(attrs.size)}"`); + // Wrap in a block <div> for the same reason as video: a bare <audio> is + // inline-level HTML that marked would wrap in <p>. + return `<div><audio ${parts.join(" ")}></audio></div>`; + } + + case "pdf": { + // Emit the schema-matching div[data-type="pdf"] (was emitting nothing). + // The schema reads src/width/height from standard attrs and name/ + // attachmentId/size from data-*. + const attrs = node.attrs || {}; + const parts: string[] = [ + `data-type="pdf"`, + `src="${escapeAttr(attrs.src ?? "")}"`, + ]; + if (attrs.name) parts.push(`data-name="${escapeAttr(attrs.name)}"`); + if (attrs.attachmentId) + parts.push( + `data-attachment-id="${escapeAttr(attrs.attachmentId)}"`, + ); + if (attrs.size != null) + parts.push(`data-size="${escapeAttr(attrs.size)}"`); + if (attrs.width != null) + parts.push(`width="${escapeAttr(attrs.width)}"`); + if (attrs.height != null) + parts.push(`height="${escapeAttr(attrs.height)}"`); + return `<div ${parts.join(" ")}></div>`; + } + + case "columns": { + // Emit the schema-matching div[data-type="columns"] wrapper so the + // multi-column layout survives. Without a case the children were + // concatenated with no separator and the text merged. The schema reads + // layout from data-layout and widthMode from data-width-mode. The whole + // block is raw HTML, so render children via blockToHtml (NOT markdown, + // which marked would not re-parse inside a raw HTML block). + const attrs = node.attrs || {}; + const parts: string[] = [`data-type="columns"`]; + if (attrs.layout) + parts.push(`data-layout="${escapeAttr(attrs.layout)}"`); + if (attrs.widthMode && attrs.widthMode !== "normal") + parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`); + const inner = nodeContent.map((n: any) => blockToHtml(n)).join(""); + return `<div ${parts.join(" ")}>${inner}</div>`; + } + + case "column": { + // Emit the schema-matching div[data-type="column"]; the schema reads the + // column width from data-width. Children are rendered as HTML so their + // formatting survives inside this raw HTML block. + const attrs = node.attrs || {}; + const parts: string[] = [`data-type="column"`]; + if (attrs.width) + parts.push(`data-width="${escapeAttr(attrs.width)}"`); + const inner = nodeContent.map((n: any) => blockToHtml(n)).join(""); + return `<div ${parts.join(" ")}>${inner}</div>`; + } + + case "subpages": + return "{{SUBPAGES}}"; + + default: + // Fallback: process children + return nodeContent.map(processNode).join(""); + } + }; + + // Render inline content (text runs + their marks) to HTML. Used by the raw + // HTML fallbacks (spanned tables, columns) where marked will NOT re-parse + // markdown, so backtick/asterisk/bracket syntax would otherwise leak as + // literal characters. Each mark is mirrored to the HTML the schema's parseHTML + // accepts so it re-imports as the matching ProseMirror mark. + const inlineToHtml = (inlineNodes: any[]): string => + (inlineNodes || []) + .map((n: any) => { + if (n.type === "hardBreak") return "<br>"; + if (n.type !== "text") { + // Inline atoms (mention, mathInline) already emit schema HTML. + return processNode(n); + } + let t = escapeHtmlText(n.text || ""); + for (const mark of n.marks || []) { + switch (mark.type) { + case "bold": + t = `<strong>${t}</strong>`; + break; + case "italic": + t = `<em>${t}</em>`; + break; + case "code": + t = `<code>${t}</code>`; + break; + case "strike": + t = `<s>${t}</s>`; + break; + case "underline": + t = `<u>${t}</u>`; + break; + case "subscript": + t = `<sub>${t}</sub>`; + break; + case "superscript": + t = `<sup>${t}</sup>`; + break; + case "link": + t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`; + break; + case "highlight": + t = mark.attrs?.color + ? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>` + : `<mark>${t}</mark>`; + break; + case "textStyle": + if (mark.attrs?.color) + t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`; + break; + case "comment": + // Inline comment anchor inside a raw-HTML container (columns / + // spanned table cells), so commented text there also round-trips. + if (mark.attrs?.commentId) { + const r = mark.attrs?.resolved ? ` data-resolved="true"` : ""; + t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`; + } + break; + } + } + return t; + }) + .join(""); + + // Emit the schema-matching <img> for an image node. Shared so the image is + // emitted as real HTML wherever a raw-HTML container needs it (inside a column + // or a spanned table cell), where markdown `![](...)` would NOT be re-parsed + // and would survive as literal text. The Image extension reads src/alt from + // the standard attributes; the Docmost extra attrs (width/height/align/size/ + // attachmentId/aspectRatio) are global attributes read from same-named DOM + // attributes, so emit them by name. + const imageToHtml = (node: any): string => { + const attrs = node.attrs || {}; + const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`]; + if (attrs.alt) parts.push(`alt="${escapeAttr(attrs.alt)}"`); + if (attrs.title) parts.push(`title="${escapeAttr(attrs.title)}"`); + if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`); + if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`); + if (attrs.align) parts.push(`align="${escapeAttr(attrs.align)}"`); + if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`); + if (attrs.attachmentId) + parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`); + if (attrs.aspectRatio != null) + parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`); + return `<img ${parts.join(" ")}>`; + }; + + // Emit the schema-matching div[data-type="callout"] for a callout node. The + // schema reads the banner type from data-callout-type. Children are rendered + // as HTML so they survive inside a raw-HTML container. + const calloutToHtml = (node: any): string => { + const type = (node.attrs?.type || "info").toLowerCase(); + const inner = (node.content || []).map(blockToHtml).join(""); + return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`; + }; + + // Emit a schema-matching <details> tree. The schema parses <details>, + // summary[data-type="detailsSummary"], and div[data-type="detailsContent"]. + const detailsToHtml = (node: any): string => { + const inner = (node.content || []).map(blockToHtml).join(""); + return `<details>${inner}</details>`; + }; + const detailsSummaryToHtml = (node: any): string => + `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`; + const detailsContentToHtml = (node: any): string => { + const inner = (node.content || []).map(blockToHtml).join(""); + return `<div data-type="detailsContent">${inner}</div>`; + }; + + // Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in + // collaboration.ts) recognizes ul[data-type="taskList"] with + // li[data-type="taskItem"][data-checked]; emitting that directly here keeps + // task lists inside columns/cells from degrading to literal "- [ ]" text. + const taskListToHtml = (node: any): string => { + const items = (node.content || []) + .map((it: any) => { + const checked = it.attrs?.checked ? "true" : "false"; + return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`; + }) + .join(""); + return `<ul data-type="taskList">${items}</ul>`; + }; + + // Render a block node to HTML for the raw-HTML containers (spanned tables, + // columns). marked does NOT re-parse markdown inside a raw-HTML block, so + // EVERY block type that can appear inside a column or a spanned cell must be + // emitted as schema-matching HTML here — never as markdown, or it would land + // as literal text on re-import. Nodes whose processNode case already produces + // schema-matching HTML (math/media/embed/attachment/nested columns/spanned + // table) are delegated to processNode; the markdown-emitting cases + // (image/blockquote/callout/details/hr/taskList) get explicit HTML here. + const blockToHtml = (block: any): string => { + const children = block.content || []; + switch (block.type) { + case "paragraph": + return `<p>${inlineToHtml(children)}</p>`; + case "heading": { + const level = block.attrs?.level || 1; + return `<h${level}>${inlineToHtml(children)}</h${level}>`; + } + case "bulletList": + return `<ul>${children + .map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`) + .join("")}</ul>`; + case "orderedList": + return `<ol>${children + .map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`) + .join("")}</ol>`; + case "codeBlock": { + const lang = block.attrs?.language || ""; + // The code itself is element TEXT content (between <code> tags), so it + // must escape < > & — NOT the attribute escaper. The language rides in + // a class ATTRIBUTE, so it uses escapeAttr. + const code = escapeHtmlText( + children + .map(processNode) + .join("") + .replace(/\n+$/, ""), + ); + const cls = lang ? ` class="language-${escapeAttr(lang)}"` : ""; + return `<pre><code${cls}>${code}</code></pre>`; + } + case "image": + return imageToHtml(block); + case "blockquote": + return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`; + case "horizontalRule": + return "<hr>"; + case "callout": + return calloutToHtml(block); + case "details": + return detailsToHtml(block); + case "detailsSummary": + return detailsSummaryToHtml(block); + case "detailsContent": + return detailsContentToHtml(block); + case "taskList": + return taskListToHtml(block); + case "taskItem": + // A bare taskItem (outside a taskList) still needs a wrapping list so + // the schema parses it; wrap it in a single-item taskList. + return taskListToHtml({ content: [block] }); + // table (incl. spanned), columns/column, math, media, embed, attachment, + // mention, etc. already emit schema-matching HTML from processNode. + case "table": + case "columns": + case "column": + case "mathBlock": + case "video": + case "audio": + case "pdf": + case "youtube": + case "embed": + case "attachment": + case "drawio": + case "excalidraw": + return processNode(block); + default: + // Any still-unhandled block type: NEVER fall back to markdown inside a + // raw-HTML block (it would become literal text). Wrap its rendered + // children in a <div> so their content is preserved; if it has no block + // children, render its inline content instead. + if (children.length && children.some((c: any) => c.type !== "text")) { + return `<div>${children.map(blockToHtml).join("")}</div>`; + } + return `<div>${inlineToHtml(children)}</div>`; + } + }; + + // Render the block children of a list item to HTML (a listItem holds block+ + // content). Mirrors processListItem but for the HTML fallback path. + const blockChildrenToHtml = (item: any): string => + (item.content || []).map((b: any) => blockToHtml(b)).join(""); + + // Indent the rendered children of a list item under a marker prefix. + // Each child block is a (possibly multi-line) string. The very first physical + // line of the first child carries the marker (e.g. "- " or "1. "); EVERY + // other line — the remaining lines of the first child AND all lines of every + // subsequent child (nested lists, code blocks, extra paragraphs) — is indented + // to align under the marker. Without indenting these continuation lines, the + // 2nd/3rd line of a nested child collapses to column 0 and escapes the list. + // + // The continuation indent MUST equal the LIST marker width, which is not the + // same as the visible prefix width: + // - bullet "- " -> 2 columns + // - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2 + // - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits + // CommonMark anchors nested content to the marker column, so an ordered item + // indented to only 2 columns would be re-parsed as a sibling/loose content on + // re-import. Callers therefore pass the exact indent width to use. + const indentItemChildren = ( + childStrings: string[], + prefix: string, + indentWidth: number, + ): string => { + const indent = " ".repeat(indentWidth); + const lines: string[] = []; + childStrings.forEach((child, childIndex) => { + child.split("\n").forEach((line, lineIndex) => { + if (childIndex === 0 && lineIndex === 0) { + // First physical line of the first block gets the marker. + lines.push(`${prefix} ${line}`); + } else { + // Indent every continuation line by the marker width; keep blank + // lines blank rather than emitting trailing whitespace. + lines.push(line.length ? `${indent}${line}` : ""); + } + }); + }); + return lines.join("\n"); + }; + + const processListItem = (item: any, prefix: string): string => { + const itemContent = item.content || []; + const childStrings = itemContent.map(processNode); + if (childStrings.length === 0) return prefix; + // The rendered marker is `${prefix} ` (prefix + one space), so its width — + // and thus the continuation indent — is prefix.length + 1. This is correct + // for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers, + // since for those the visible prefix IS the list marker. + return indentItemChildren(childStrings, prefix, prefix.length + 1); + }; + + const processTaskItem = (item: any): string => { + const checked = item.attrs?.checked || false; + const checkbox = checked ? "[x]" : "[ ]"; + const prefix = `- ${checkbox}`; + const itemContent = item.content || []; + const childStrings = itemContent.map(processNode); + // An empty task item still needs its checkbox marker; without this guard + // the indent below produces "" and the "- [ ]"/"- [x]" row disappears. + if (childStrings.length === 0) return prefix; + // The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] " + // checkbox is item content, NOT part of the marker. So the continuation + // indent is a fixed 2 — do NOT derive it from the wider prefix.length. + return indentItemChildren(childStrings, prefix, 2); + }; + + return processNode(content).trim(); +} diff --git a/packages/docmost-client/src/lib/markdown-document.ts b/packages/docmost-client/src/lib/markdown-document.ts new file mode 100644 index 0000000..3588e13 --- /dev/null +++ b/packages/docmost-client/src/lib/markdown-document.ts @@ -0,0 +1,156 @@ +/** + * Self-contained Docmost-flavoured Markdown document (custom extensions). + * + * A single `.md` file that packages everything needed to losslessly round-trip + * a page through "download -> edit body -> re-upload": + * - a leading `docmost:meta` block: a one-line JSON object with page identity; + * - the Markdown body (carrying inline comment anchors and diagrams as HTML); + * - a trailing `docmost:comments` block: a one-line JSON array of comment + * threads. + * + * Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON` + * drop HTML comments, so even if the WHOLE file were ever fed straight to the + * importer without first stripping the blocks, the metadata cannot leak into the + * document. (A fenced ```docmost-comments``` block would WRONGLY become a + * codeBlock node, so a fenced block is deliberately NOT used.) + * + * The delimiter literals may legitimately appear in the BODY too (e.g. a user + * re-pastes an exported `.md` into a page, or a page documents this very + * format). To stay robust, parsing treats only the FINAL, document-ending + * `docmost:comments` block as metadata: it is the last `<!-- docmost:comments` + * opener whose closing `-->` sits at the very end of the file. Any earlier + * literal occurrence is left in the body untouched. + * + * NOTE on comments: in this version the comment THREAD records are preserved in + * the file but are NOT pushed back to the server on import — only the inline + * comment marks (anchors) embedded in the body are restored. Managing comment + * records stays with the comment tools/UI. + */ + +export interface DocmostMdMeta { + version: number; + pageId?: string; + slugId?: string; + title?: string; + spaceId?: string; + parentPageId?: string | null; +} + +// Match the leading meta block (allow leading whitespace). Capture group 1 is +// the JSON text between the markers. +const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/; +// Match a `docmost:comments` opener. Used globally to scan for the LAST opener +// rather than end-anchoring a single regex (which would mis-capture across a +// literal opener that appears earlier in the body). +const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g; + +/** + * Assemble the full self-contained markdown file: meta block, body, and the + * comments block. The meta block is always emitted; the comments block is always + * emitted too (with `[]` when there are no comments) so the format stays uniform + * and parsing stays simple. + */ +export function serializeDocmostMarkdown( + meta: DocmostMdMeta, + body: string, + comments: any[], +): string { + const metaJson = JSON.stringify(meta); + const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []); + const trimmedBody = (body ?? "").trim(); + return ( + `<!-- docmost:meta\n${metaJson}\n-->\n\n` + + `${trimmedBody}\n\n` + + `<!-- docmost:comments\n${commentsJson}\n-->\n` + ); +} + +/** + * Split a self-contained file back into its parts. Tolerant: if the meta or + * comments block is missing (e.g. a hand-written plain-markdown file), the + * corresponding value is returned as `null` and the whole input is treated as + * the body. This never throws on a MISSING block; only a `JSON.parse` failure + * inside a block that IS present is surfaced as a thrown Error with a clear + * message. Robust to `\r\n` line endings. + */ +export function parseDocmostMarkdown(full: string): { + meta: DocmostMdMeta | null; + body: string; + comments: any[] | null; +} { + // Normalize line endings so the anchored regexes work regardless of CRLF. + const normalized = (full ?? "").replace(/\r\n/g, "\n"); + + // Extract the leading meta block (start-anchored — already unambiguous). + let meta: DocmostMdMeta | null = null; + let metaEnd = 0; + const metaMatch = normalized.match(META_RE); + if (metaMatch) { + try { + meta = JSON.parse(metaMatch[1]); + } catch (e) { + throw new Error( + `Invalid docmost:meta JSON block: ${ + e instanceof Error ? e.message : String(e) + }`, + ); + } + // Body starts right after the matched meta block. + metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length; + } + + // Find the LAST `<!-- docmost:comments` opener; the real file-level block is + // the final one whose closing `-->` ends the document. Any earlier literal + // occurrence inside the body (e.g. a re-pasted export) is left in the body. + let lastOpenStart = -1; + let lastOpenEnd = -1; + let m: RegExpExecArray | null; + COMMENTS_OPEN_RE.lastIndex = 0; + while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) { + lastOpenStart = m.index; + lastOpenEnd = m.index + m[0].length; + } + + let comments: any[] | null = null; + let bodyEnd = normalized.length; + if (lastOpenStart !== -1) { + const rest = normalized.slice(lastOpenEnd); + const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc + if (close) { + const jsonText = rest.slice(0, close.index); + try { + comments = JSON.parse(jsonText); + } catch (e) { + throw new Error( + `Invalid docmost:comments JSON block: ${ + e instanceof Error ? e.message : String(e) + }`, + ); + } + bodyEnd = lastOpenStart; // strip from the opener to end of document + } + } + + const body = normalized.slice(metaEnd, bodyEnd).trim(); + return { meta, body, comments }; +} + +// --- docmost-sync addition (backport target: docmost-mcp/src/lib/markdown-document.ts) --- + +/** + * Serialize a self-contained markdown file with the meta block + body ONLY — + * NO trailing `docmost:comments` block. The docmost-sync engine never touches + * `/comments` (SPEC §3): the synced file carries just page identity (meta) and + * the body, where comment threads survive only as inline `<span + * data-comment-id>` anchor marks inside the body. + * + * `parseDocmostMarkdown` already tolerates a missing comments block (it returns + * `comments: null` and treats the rest as body), so a file produced here + * round-trips cleanly through the parser. + */ +export function serializeDocmostMarkdownBody( + meta: DocmostMdMeta, + body: string, +): string { + return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`; +} diff --git a/packages/docmost-client/src/lib/node-ops.ts b/packages/docmost-client/src/lib/node-ops.ts new file mode 100644 index 0000000..4934b21 --- /dev/null +++ b/packages/docmost-client/src/lib/node-ops.ts @@ -0,0 +1,897 @@ +/** + * Pure, network-free helpers for manipulating a ProseMirror/TipTap document + * tree by node id. + * + * A ProseMirror node here is a plain JSON object of the shape produced by + * Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the + * `content` array; a node carries a stable id in `attrs.id`. Callouts and + * table cells hold their children in `content` just like any other block, so a + * single recursive walk reaches them all. + * + * Every exported function operates on a DEEP CLONE of the input document and + * returns the new document. The input doc and any `newNode`/`node` argument are + * never mutated. All functions are defensively null-safe: missing/!Array + * `content`, non-object nodes, and absent `attrs` are tolerated. + */ + +/** Deep-clone a JSON-serializable value without mutating the original. */ +function clone<T>(value: T): T { + if (typeof structuredClone === "function") { + return structuredClone(value); + } + // Fallback for environments without structuredClone. + return JSON.parse(JSON.stringify(value)) as T; +} + +/** True if `value` is a non-null object (and not an array). */ +function isObject(value: any): value is Record<string, any> { + return value != null && typeof value === "object" && !Array.isArray(value); +} + +/** True if `node` carries the given id in `node.attrs.id`. */ +function matchesId(node: any, nodeId: string): boolean { + return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId; +} + +/** + * Recursively concatenate all text contained in a node. + * + * Text nodes contribute their `text` string; container nodes contribute the + * joined `blockPlainText` of their `content` children. Returns "" for nullish + * or non-object inputs. + */ +export function blockPlainText(node: any): string { + if (!isObject(node)) return ""; + let out = ""; + if (typeof node.text === "string") { + out += node.text; + } + if (Array.isArray(node.content)) { + for (const child of node.content) { + out += blockPlainText(child); + } + } + return out; +} + +/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */ +function truncate(text: string, n: number): string { + return text.length > n ? text.slice(0, n) + "…" : text; +} + +/** One compact outline entry for a single top-level block. */ +export interface OutlineEntry { + index: number; + type: string | undefined; + id: string | null; + firstText: string; + /** Present for headings only. */ + level?: number | null; + /** Present for tables only. */ + rows?: number; + cols?: number; + header?: string[]; + /** Present for list blocks only (bulletList/orderedList/taskList). */ + items?: number; +} + +/** + * Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in + * `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or + * table cells — compactness is the point; use `getNodeByRef` to drill into a + * specific block. + * + * Each entry carries `{ index, type, id, firstText }`, plus type-specific + * extras: headings add `level`; tables add `rows`/`cols` and the first row's + * cell texts as `header`; list blocks (types ending in "List") add `items`. + * `firstText` is the block's plain text truncated to 100 chars. Null-safe: + * a missing or non-object doc/content yields `[]`. + */ +export function buildOutline(doc: any): OutlineEntry[] { + if (!isObject(doc) || !Array.isArray(doc.content)) return []; + + const out: OutlineEntry[] = []; + for (let i = 0; i < doc.content.length; i++) { + const block = doc.content[i]; + const type = isObject(block) ? block.type : undefined; + const entry: OutlineEntry = { + index: i, + type, + id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null, + firstText: truncate(blockPlainText(block), 100), + }; + + if (type === "heading") { + entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null; + } else if (type === "table") { + const headerRow = block.content?.[0]?.content ?? []; + entry.rows = block.content?.length ?? 0; + entry.cols = block.content?.[0]?.content?.length ?? 0; + entry.header = headerRow.map((cell: any) => + truncate(blockPlainText(cell), 40), + ); + } else if (typeof type === "string" && type.endsWith("List")) { + entry.items = block.content?.length ?? 0; + } + + out.push(entry); + } + return out; +} + +/** + * Resolve a single node by reference and return `{ node, path, type }`, or + * `null` when nothing matches. + * + * - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index + * `n` in `doc.content`. This is the only way to address table/tableRow/ + * tableCell nodes, which carry no `attrs.id`. + * - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the + * tree with `attrs.id === ref` is returned. + * + * `path` is the array of child indices from the doc root down to the node + * (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE, + * so callers can mutate it without touching the input doc. Null-safe. + */ +export function getNodeByRef( + doc: any, + ref: string, +): { node: any; path: number[]; type: string | undefined } | null { + if (!isObject(doc)) return null; + + // "#<n>": index into the top-level content array. + const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null; + if (indexMatch) { + const index = Number(indexMatch[1]); + const block = Array.isArray(doc.content) ? doc.content[index] : undefined; + if (!isObject(block)) return null; + return { node: clone(block), path: [index], type: block.type }; + } + + // Otherwise: depth-first search for the first node with attrs.id === ref. + const search = ( + node: any, + trail: number[], + ): { node: any; path: number[]; type: string } | null => { + if (!isObject(node)) return null; + if (Array.isArray(node.content)) { + for (let i = 0; i < node.content.length; i++) { + const child = node.content[i]; + const path = [...trail, i]; + if (matchesId(child, ref)) { + return { node: clone(child), path, type: child.type }; + } + const hit = search(child, path); + if (hit != null) return hit; + } + } + return null; + }; + + return search(doc, []); +} + +/** + * Replace EVERY node whose `attrs.id === nodeId` with a deep clone of + * `newNode`, anywhere in the tree (including inside callouts and table cells). + * + * Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced` + * is the number of nodes substituted. A fresh clone of `newNode` is used for + * each match so they do not share references. + */ +export function replaceNodeById( + doc: any, + nodeId: string, + newNode: any, +): { doc: any; replaced: number } { + const out = clone(doc); + let replaced = 0; + + // Walk a content array, replacing direct matches and recursing into the + // (possibly new) children of non-matching nodes. + const walkContent = (content: any[]): void => { + for (let i = 0; i < content.length; i++) { + const child = content[i]; + if (matchesId(child, nodeId)) { + content[i] = clone(newNode); + replaced++; + // Do not recurse into a freshly substituted node. + continue; + } + if (isObject(child) && Array.isArray(child.content)) { + walkContent(child.content); + } + } + }; + + if (isObject(out) && Array.isArray(out.content)) { + walkContent(out.content); + } + return { doc: out, replaced }; +} + +/** + * Remove EVERY node whose `attrs.id === nodeId` from its parent `content` + * array, anywhere in the tree (recursive, including callouts and tables). + * + * Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is + * the number of nodes removed. + */ +export function deleteNodeById( + doc: any, + nodeId: string, +): { doc: any; deleted: number } { + const out = clone(doc); + let deleted = 0; + + // Filter a content array in place, dropping matches and recursing into the + // surviving children. + const walkContent = (content: any[]): any[] => { + const kept: any[] = []; + for (const child of content) { + if (matchesId(child, nodeId)) { + deleted++; + continue; + } + if (isObject(child) && Array.isArray(child.content)) { + child.content = walkContent(child.content); + } + kept.push(child); + } + return kept; + }; + + if (isObject(out) && Array.isArray(out.content)) { + out.content = walkContent(out.content); + } + return { doc: out, deleted }; +} + +/** + * Deep-clone `doc` and strip every node/mark attribute whose value is strictly + * `undefined`, so the result is safe to hand to Yjs (which throws an opaque + * "Unexpected content type" when asked to store an `undefined` attribute value). + * + * Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all + * legitimate JSON-storable values and are preserved. Operates on a clone and + * returns it; the input is never mutated. Defensively null-safe like the rest + * of the file. + */ +export function sanitizeForYjs(doc: any): any { + const out = clone(doc); + + // Drop every key whose value is strictly `undefined` from an attrs object. + const stripUndefined = (attrs: any): void => { + if (!isObject(attrs)) return; + for (const key of Object.keys(attrs)) { + if (attrs[key] === undefined) { + delete attrs[key]; + } + } + }; + + const walk = (node: any): void => { + if (!isObject(node)) return; + stripUndefined(node.attrs); + if (Array.isArray(node.marks)) { + for (const mark of node.marks) { + if (isObject(mark)) stripUndefined(mark.attrs); + } + } + if (Array.isArray(node.content)) { + for (const child of node.content) { + walk(child); + } + } + }; + + walk(out); + return out; +} + +/** + * Diagnostics helper: walk the tree and return a human-readable path string for + * the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs + * cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint` + * (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when + * every attribute is storable. Null-safe. + */ +export function findUnstorableAttr(doc: any): string | null { + const isUnstorable = (value: any): string | null => { + if (value === undefined) return "undefined"; + const t = typeof value; + if (t === "function") return "function"; + if (t === "symbol") return "symbol"; + if (t === "bigint") return "bigint"; + return null; + }; + + // Check an attrs object; return the offending sub-path or null. + const checkAttrs = (attrs: any, basePath: string): string | null => { + if (!isObject(attrs)) return null; + for (const key of Object.keys(attrs)) { + const kind = isUnstorable(attrs[key]); + if (kind != null) return `${basePath}.${key} (${kind})`; + } + return null; + }; + + const walk = (node: any, path: string): string | null => { + if (!isObject(node)) return null; + const attrHit = checkAttrs(node.attrs, `${path}.attrs`); + if (attrHit != null) return attrHit; + if (Array.isArray(node.marks)) { + for (let i = 0; i < node.marks.length; i++) { + const markHit = checkAttrs( + node.marks[i]?.attrs, + `${path}.marks[${i}].attrs`, + ); + if (markHit != null) return markHit; + } + } + if (Array.isArray(node.content)) { + for (let i = 0; i < node.content.length; i++) { + const childHit = walk(node.content[i], `${path}.content[${i}]`); + if (childHit != null) return childHit; + } + } + return null; + }; + + // The root doc node carries no useful index, so start the path at "doc". + if (!isObject(doc)) return null; + const attrHit = checkAttrs(doc.attrs, "attrs"); + if (attrHit != null) return attrHit; + if (Array.isArray(doc.content)) { + for (let i = 0; i < doc.content.length; i++) { + const childHit = walk(doc.content[i], `content[${i}]`); + if (childHit != null) return childHit; + } + } + return null; +} + +/** + * Table structural node types and the container each must live directly inside. + * Used by `insertNodeRelative` to splice rows/cells into the correct ancestor + * rather than blindly into the anchor's direct parent (which would corrupt the + * table's nesting). + */ +const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]); +const REQUIRED_CONTAINER: Record<string, string> = { + tableRow: "table", + tableCell: "tableRow", + tableHeader: "tableRow", +}; + +/** + * Locate an anchor and return its ancestor chain (from `doc` down to and + * including the matched node). Each chain entry is `{ node, index }` where + * `index` is the node's position inside its parent's `content` array (the root + * doc has index -1). Returns `null` when the anchor cannot be resolved. + */ +function findAnchorChain( + doc: any, + opts: InsertOptions, +): { node: any; index: number }[] | null { + if (!isObject(doc)) return null; + + // DFS by id anywhere in the tree, accumulating the path. + if (opts.anchorNodeId != null) { + const targetId = opts.anchorNodeId; + const search = ( + node: any, + index: number, + trail: { node: any; index: number }[], + ): { node: any; index: number }[] | null => { + if (!isObject(node)) return null; + const here = [...trail, { node, index }]; + if (matchesId(node, targetId)) return here; + if (Array.isArray(node.content)) { + for (let i = 0; i < node.content.length; i++) { + const hit = search(node.content[i], i, here); + if (hit != null) return hit; + } + } + return null; + }; + return search(doc, -1, []); + } + + // By text: only top-level blocks are scanned (same rule as the JSON path). + if (opts.anchorText != null && Array.isArray(doc.content)) { + for (let i = 0; i < doc.content.length; i++) { + if (blockPlainText(doc.content[i]).includes(opts.anchorText)) { + return [ + { node: doc, index: -1 }, + { node: doc.content[i], index: i }, + ]; + } + } + } + + return null; +} + +/** Options controlling where `insertNodeRelative` places the new node. */ +export interface InsertOptions { + position: "before" | "after" | "append"; + /** Resolve the anchor by node id anywhere in the tree (preferred). */ + anchorNodeId?: string; + /** Fallback: first TOP-LEVEL block whose plain text includes this string. */ + anchorText?: string; +} + +/** + * Insert a deep clone of `node` relative to an anchor. + * + * - position "append": push the node onto the top-level `doc.content`. + * - position "before"/"after": locate the anchor and splice the node into the + * anchor's parent `content` array immediately before / after it. + * + * Anchor resolution for before/after: + * - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId` + * anywhere in the tree (recursive); + * - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content` + * blocks and pick the first whose `blockPlainText` includes `anchorText`. + * + * Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is + * false when the anchor could not be resolved (the doc is returned unchanged + * apart from being cloned). + */ +export function insertNodeRelative( + doc: any, + node: any, + opts: InsertOptions, +): { doc: any; inserted: boolean } { + const out = clone(doc); + const fresh = clone(node); + + // Defensive: stay null-safe like the other exports — a missing opts means + // there is nothing actionable to do. + if (!isObject(opts)) return { doc: out, inserted: false }; + + const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type); + + // "append": top-level push. + if (opts.position === "append") { + // Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the + // top level — appending one would produce invalid nesting. + if (isStructural) { + throw new Error( + `insert_node: cannot append a ${node.type} at the top level; use ` + + `position before/after with an anchor inside the target table`, + ); + } + if (isObject(out)) { + if (!Array.isArray(out.content)) out.content = []; + out.content.push(fresh); + return { doc: out, inserted: true }; + } + return { doc: out, inserted: false }; + } + + const offset = opts.position === "after" ? 1 : 0; + + // Structural insert (before/after a tableRow/tableCell/tableHeader): splice + // into the nearest enclosing table/tableRow rather than the anchor's direct + // parent, so the row/cell lands at the correct level of the table. + if (isStructural) { + const containerType = REQUIRED_CONTAINER[node.type]; + const chain = findAnchorChain(out, opts); + // Anchor not resolved at all — keep the existing "anchor not found" path. + if (chain == null) return { doc: out, inserted: false }; + + // Find the DEEPEST ancestor (including the anchor itself) of the required + // container type. + let containerIdx = -1; + for (let i = chain.length - 1; i >= 0; i--) { + if (isObject(chain[i].node) && chain[i].node.type === containerType) { + containerIdx = i; + break; + } + } + + if (containerIdx === -1) { + throw new Error( + `insert_node: cannot insert a ${node.type} here — the anchor is not ` + + `inside a ${containerType}. Anchor on a cell's text or a block id ` + + `that lives inside the target table.`, + ); + } + + const container = chain[containerIdx].node; + if (!Array.isArray(container.content)) container.content = []; + + if (containerIdx === chain.length - 1) { + // The matched container IS the anchor node itself (e.g. anchorText + // resolved to the table block): append/prepend within it. + const at = opts.position === "after" ? container.content.length : 0; + container.content.splice(at, 0, fresh); + } else { + // The immediate child on the path leading to the anchor is the row/cell + // to splice next to. + const enclosingChildIndex = chain[containerIdx + 1].index; + container.content.splice(enclosingChildIndex + offset, 0, fresh); + } + return { doc: out, inserted: true }; + } + + // Resolve by id anywhere in the tree: splice into the parent content array. + if (opts.anchorNodeId != null) { + let inserted = false; + const walkContent = (content: any[]): void => { + for (let i = 0; i < content.length; i++) { + const child = content[i]; + if (matchesId(child, opts.anchorNodeId as string)) { + content.splice(i + offset, 0, fresh); + inserted = true; + return; + } + if (isObject(child) && Array.isArray(child.content)) { + walkContent(child.content); + if (inserted) return; + } + } + }; + if (isObject(out) && Array.isArray(out.content)) { + walkContent(out.content); + } + return { doc: out, inserted }; + } + + // Resolve by text: only top-level doc.content blocks are scanned. + if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) { + for (let i = 0; i < out.content.length; i++) { + if (blockPlainText(out.content[i]).includes(opts.anchorText)) { + out.content.splice(i + offset, 0, fresh); + return { doc: out, inserted: true }; + } + } + } + + return { doc: out, inserted: false }; +} + +// =========================================================================== +// Table editing helpers +// +// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes: +// table -> { type:"table", content:[tableRow...] } +// row -> { type:"tableRow", content:[tableCell|tableHeader...] } +// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth}, +// content:[paragraph...] } +// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] } +// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the +// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE +// of the input doc (via `clone`) and never mutate their inputs. +// =========================================================================== + +/** + * Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed + * `makeFreshId` so generated paragraph ids never collide with existing ones. + */ +function collectIds(node: any, used: Set<string>): void { + if (!isObject(node)) return; + if (isObject(node.attrs) && typeof node.attrs.id === "string") { + used.add(node.attrs.id); + } + if (Array.isArray(node.content)) { + for (const child of node.content) collectIds(child, used); + } +} + +/** + * Fresh-id generator: returns a random Docmost-style id (12 chars from + * lowercase `a-z0-9`) that is not already in `used`, and records it. On the + * rare collision the id is regenerated. Callers rely on uniqueness, not on the + * exact string, so randomness is fine — and unlike a module-local counter it + * needs no reset and cannot become predictable across calls. + */ +function makeFreshId(used: Set<string>): string { + const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789"; + let id: string; + do { + id = ""; + for (let i = 0; i < 12; i++) { + id += alphabet[Math.floor(Math.random() * alphabet.length)]; + } + } while (used.has(id) || id === ""); + used.add(id); + return id; +} + +/** + * Resolve a table reference against an ALREADY-CLONED doc and return the LIVE + * table node (a reference inside `rootClone`, so the caller may mutate it) plus + * its index path. Returns null when no table matches. + * + * - `#<n>`: the top-level block at index `n`, only if its `type === "table"`. + * - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its + * ancestor chain to the nearest `type === "table"` ancestor. + */ +function locateTable( + rootClone: any, + tableRef: string, +): { table: any; path: number[] } | null { + if (!isObject(rootClone)) return null; + + // "#<n>": index into the top-level content array; must be a table. + const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null; + if (indexMatch) { + const index = Number(indexMatch[1]); + const block = Array.isArray(rootClone.content) + ? rootClone.content[index] + : undefined; + if (isObject(block) && block.type === "table") { + return { table: block, path: [index] }; + } + return null; + } + + // Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then + // climb to the nearest enclosing table. + const search = ( + node: any, + trail: { node: any; index: number }[], + ): { table: any; path: number[] } | null => { + if (!isObject(node)) return null; + if (Array.isArray(node.content)) { + for (let i = 0; i < node.content.length; i++) { + const child = node.content[i]; + const here = [...trail, { node: child, index: i }]; + if (matchesId(child, tableRef)) { + // Walk UP to the nearest table ancestor (including the match itself). + for (let j = here.length - 1; j >= 0; j--) { + if (isObject(here[j].node) && here[j].node.type === "table") { + return { + table: here[j].node, + path: here.slice(0, j + 1).map((e) => e.index), + }; + } + } + return null; // id found but no enclosing table + } + const hit = search(child, here); + if (hit != null) return hit; + } + } + return null; + }; + + return search(rootClone, []); +} + +/** Build the plain-text → single-paragraph cell content used by all writers. */ +function makeCellParagraph(id: string, text: string): any { + return { + type: "paragraph", + attrs: { id, indent: 0 }, + // Empty string → a paragraph with an empty content array. + content: text ? [{ type: "text", text }] : [], + }; +} + +/** + * Read a table as a matrix. Returns null when `tableRef` resolves to no table. + * + * - `rows`/`cols`: the table's row count and the column count of its FIRST row. + * Tables may be ragged (rows of differing length), so `cols` reflects only + * row 0; use the per-row length of `cells`/`cellIds` for each row's actual + * width. + * - `cells`: `string[][]` of each cell's `blockPlainText`. + * - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null), + * so callers can `patch_node` a cell for rich-formatted edits. + * - `path`: index path of the table within the doc. + */ +export function readTable( + doc: any, + tableRef: string, +): { + rows: number; + cols: number; + cells: string[][]; + cellIds: (string | null)[][]; + path: number[]; +} | null { + const root = clone(doc); + const located = locateTable(root, tableRef); + if (located == null) return null; + const { table, path } = located; + + const rowNodes = Array.isArray(table.content) ? table.content : []; + const rows = rowNodes.length; + const cols = rowNodes[0]?.content?.length ?? 0; + + const cells: string[][] = []; + const cellIds: (string | null)[][] = []; + for (const rowNode of rowNodes) { + const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : []; + const rowText: string[] = []; + const rowIds: (string | null)[] = []; + for (const cellNode of cellNodes) { + rowText.push(blockPlainText(cellNode)); + // The cell's first paragraph carries the id used for patch_node. + const firstPara = Array.isArray(cellNode?.content) + ? cellNode.content[0] + : undefined; + const id = + isObject(firstPara) && isObject(firstPara.attrs) + ? firstPara.attrs.id ?? null + : null; + rowIds.push(id); + } + cells.push(rowText); + cellIds.push(rowIds); + } + + return { rows, cols, cells, cellIds, path }; +} + +/** + * Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`. + * + * The row is padded to the table's column count (`cells[i] ?? ""`); supplying + * MORE cells than columns throws. Each new cell copies `colwidth` for its + * column from the header row when present, gets a fresh-id paragraph, and a + * `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices + * the row there; otherwise the row is appended at the end. + */ +export function insertTableRow( + doc: any, + tableRef: string, + cells: string[], + index?: number, +): { doc: any; inserted: boolean } { + const out = clone(doc); + const located = locateTable(out, tableRef); + if (located == null) return { doc: out, inserted: false }; + const { table } = located; + + if (!Array.isArray(table.content)) table.content = []; + const rows = table.content.length; + const headerRow = table.content[0]; + const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : []; + + // Column count is the WIDEST existing row, so the guard below stays + // meaningful for ragged tables and the new row matches the table's width. + // Fall back to the supplied cell count only when the table has no rows. + let colCount = 0; + for (const r of table.content) { + if (isObject(r) && Array.isArray(r.content)) colCount = Math.max(colCount, r.content.length); + } + if (colCount === 0) colCount = Array.isArray(cells) ? cells.length : 0; + + if (Array.isArray(cells) && cells.length > colCount) { + throw new Error( + `table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`, + ); + } + + // Resolve the landing index up front so the cell-type decision and the splice + // below agree: a valid integer in [0, rows] splices there, else we append. + const landingIndex = + typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows + ? index + : rows; + + // Seed the id generator with every id already in the doc so the new cell + // paragraph ids are unique within the whole document. + const used = new Set<string>(); + collectIds(out, used); + + const newCells: any[] = []; + for (let i = 0; i < colCount; i++) { + const text = (Array.isArray(cells) ? cells[i] : undefined) ?? ""; + const attrs: Record<string, any> = { colspan: 1, rowspan: 1 }; + // Copy this column's colwidth from the header row's cell when present. + const colwidth = headerCells[i]?.attrs?.colwidth; + if (colwidth !== undefined) attrs.colwidth = colwidth; + // A row landing at index 0 becomes the new header row, so inherit the + // current header cell's type per column (Docmost uses "tableHeader" there); + // every other position is a plain data cell. + const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell"; + newCells.push({ + type: cellType, + attrs, + content: [makeCellParagraph(makeFreshId(used), text)], + }); + } + + const newRow = { type: "tableRow", content: newCells }; + + // Splice at the resolved landing index (append when index was omitted/invalid). + table.content.splice(landingIndex, 0, newRow); + + return { doc: out, inserted: true }; +} + +/** + * Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`. + * `deleted` is false only when the table cannot be located. Throws on an + * out-of-range index, and refuses to delete the table's only row. + */ +export function deleteTableRow( + doc: any, + tableRef: string, + index: number, +): { doc: any; deleted: boolean } { + const out = clone(doc); + const located = locateTable(out, tableRef); + if (located == null) return { doc: out, deleted: false }; + const { table } = located; + + if (!Array.isArray(table.content)) table.content = []; + const rows = table.content.length; + + if (!Number.isInteger(index) || index < 0 || index >= rows) { + throw new Error( + `table_delete_row: row index ${index} out of range (table has ${rows} row(s))`, + ); + } + if (rows <= 1) { + throw new Error( + "table_delete_row: refusing to delete the only row of the table", + ); + } + + table.content.splice(index, 1); + return { doc: out, deleted: true }; +} + +/** + * Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns + * `{ doc, updated }`; `updated` is false only when the table cannot be located. + * Throws when `row`/`col` is out of range. The cell's own attrs (colspan/ + * rowspan/colwidth) are preserved; its content becomes a single text paragraph + * that reuses the cell's existing first-paragraph id when present, else a fresh + * one. + */ +export function updateTableCell( + doc: any, + tableRef: string, + row: number, + col: number, + text: string, +): { doc: any; updated: boolean } { + const out = clone(doc); + const located = locateTable(out, tableRef); + if (located == null) return { doc: out, updated: false }; + const { table } = located; + + const rowNodes = Array.isArray(table.content) ? table.content : []; + const rows = rowNodes.length; + const rowNode = rowNodes[row]; + const cols = isObject(rowNode) && Array.isArray(rowNode.content) + ? rowNode.content.length + : 0; + + if ( + !Number.isInteger(row) || + row < 0 || + row >= rows || + !Number.isInteger(col) || + col < 0 || + col >= cols + ) { + throw new Error(`table_update_cell: cell [${row},${col}] out of range`); + } + + const cellNode = rowNode.content[col]; + // Reuse the cell's existing first-paragraph id, or mint a fresh unique one. + const existingPara = Array.isArray(cellNode?.content) + ? cellNode.content[0] + : undefined; + let id = + isObject(existingPara) && isObject(existingPara.attrs) + ? existingPara.attrs.id + : undefined; + if (typeof id !== "string" || id.length === 0) { + const used = new Set<string>(); + collectIds(out, used); + id = makeFreshId(used); + } + + cellNode.content = [makeCellParagraph(id, text)]; + return { doc: out, updated: true }; +} diff --git a/packages/docmost-client/src/lib/page-lock.ts b/packages/docmost-client/src/lib/page-lock.ts new file mode 100644 index 0000000..75b50c4 --- /dev/null +++ b/packages/docmost-client/src/lib/page-lock.ts @@ -0,0 +1,39 @@ +/** + * Per-page async mutex. + * + * Content writes over the collaboration websocket must never overlap for the + * same page: two concurrent full-document replaces would race on the live Yjs + * fragment. We serialize them with a per-pageId promise chain — each new + * operation waits for the previous one on that page to settle (success or + * failure) before it runs. Different pages never block each other. + */ + +const chains = new Map<string, Promise<unknown>>(); + +// The returned promise carries the real result/rejection of `fn` and MUST be +// awaited/handled by the caller; only the internal chaining tail swallows +// errors (purely to gate ordering). +export function withPageLock<T>( + pageId: string, + fn: () => Promise<T>, +): Promise<T> { + // Wait for the previous op on this page; swallow its error so a failure does + // not poison the queue for the next caller. + const prev = (chains.get(pageId) ?? Promise.resolve()).catch(() => {}); + const run = prev.then(fn); + + // The tail used for chaining must also swallow errors (it only gates order). + const tail = run.catch(() => {}); + chains.set(pageId, tail); + + // Drop the map entry once this op is the tail and has settled, to avoid an + // unbounded map of resolved promises. + tail.then(() => { + if (chains.get(pageId) === tail) { + chains.delete(pageId); + } + }); + + // Callers get the real result/rejection of fn. + return run; +} diff --git a/packages/docmost-client/src/lib/transforms.ts b/packages/docmost-client/src/lib/transforms.ts new file mode 100644 index 0000000..d8fba09 --- /dev/null +++ b/packages/docmost-client/src/lib/transforms.ts @@ -0,0 +1,477 @@ +/** + * Pure, network-free transform primitives for a ProseMirror/TipTap document + * tree, plus one higher-level orchestration (commentsToFootnotes). + * + * A ProseMirror node here is a plain JSON object of the shape produced by + * Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the + * `content` array; callouts, tables, lists all hold their children in + * `content`, so a single recursive walk reaches them all. + * + * Conventions (matching node-ops.ts): + * - functions that produce a new document deep-clone their input and return a + * `{ doc, ... }` object; the caller's objects are never mutated. + * - functions are defensively null-safe. + * - `marks` arrays are preserved verbatim when fragments are split/reordered. + */ + +import { blockPlainText } from "./node-ops.js"; + +/** Deep-clone a JSON-serializable value without mutating the original. */ +function clone<T>(value: T): T { + if (typeof structuredClone === "function") { + return structuredClone(value); + } + // Fallback for environments without structuredClone. + return JSON.parse(JSON.stringify(value)) as T; +} + +/** True if `value` is a non-null object (and not an array). */ +function isObject(value: any): value is Record<string, any> { + return value != null && typeof value === "object" && !Array.isArray(value); +} + +/** + * Plain text of a node (re-export of node-ops' blockPlainText so transform + * authors have a single import surface). Recurses through nested content. + */ +export function blockText(node: any): string { + return blockPlainText(node); +} + +/** + * Depth-first visit of every node in the tree, including the root and the + * nested content of callouts, tables, lists, etc. `fn` is called once per node. + * Null-safe: a nullish or non-object node is ignored. + */ +export function walk(node: any, fn: (node: any) => void): void { + if (!isObject(node)) return; + fn(node); + if (Array.isArray(node.content)) { + for (const child of node.content) { + walk(child, fn); + } + } +} + +/** + * Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree. + * Works even when the node carries no `attrs.id` (it searches the raw tree, not + * an id index). Returns the live node reference inside `doc` (NOT a clone), or + * null when nothing matches. Typical use: `getList(doc, n => n.type === + * "orderedList")`. + */ +export function getList( + doc: any, + predicate: (node: any) => boolean, +): any | null { + let found: any | null = null; + walk(doc, (node) => { + if (found == null && predicate(node)) { + found = node; + } + }); + return found; +} + +/** Options for insertMarkerAfter. */ +export interface InsertMarkerOptions { + /** + * Limit the search to TOP-LEVEL blocks with index < beforeBlock. Used to keep + * footnote markers in the body and out of the notes section. + */ + beforeBlock?: number; +} + +/** + * Insert `marker` as a PLAIN (unmarked) text run right after the first + * occurrence of `anchor`. + * + * The text run that contains the END of the anchor is SPLIT at the anchor end, + * so all existing marks (links, bold, ...) on the surrounding text are + * preserved, while the inserted marker run carries NO marks. The marker is + * inserted as a leading-space-padded run (`" " + marker`) so it visually + * separates from the preceding word. + * + * The anchor is matched against the concatenated plain text of each top-level + * block (so an anchor that spans several text/mark runs still matches). The + * insertion happens inside the inline content array that holds the anchor's + * final character. + * + * Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is + * false when the anchor text was not found in any in-scope block. + */ +export function insertMarkerAfter( + doc: any, + anchor: string, + marker: string, + opts: InsertMarkerOptions = {}, +): { doc: any; inserted: boolean } { + const out = clone(doc); + if (!isObject(out) || !Array.isArray(out.content) || !anchor) { + return { doc: out, inserted: false }; + } + + const limit = + typeof opts.beforeBlock === "number" + ? Math.min(opts.beforeBlock, out.content.length) + : out.content.length; + + for (let b = 0; b < limit; b++) { + const block = out.content[b]; + if (!isObject(block)) continue; + // Quick reject: skip blocks whose plain text cannot contain the anchor. + if (!blockPlainText(block).includes(anchor)) continue; + + // Walk the inline content arrays inside this block, tracking a running + // character offset so we can locate the inline array + text run that holds + // the END of the anchor's first occurrence. + let inserted = false; + let offset = 0; // characters of plain text seen so far in this block + const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)(); + + // Recurse into inline-bearing containers (paragraph, heading, table cell, + // callout child paragraphs, ...). We only split inside an array of inline + // nodes (text/inline atoms); the FIRST array whose cumulative range covers + // anchorEnd receives the split + marker. + const visit = (container: any): void => { + if (inserted || !isObject(container) || !Array.isArray(container.content)) { + return; + } + const inline = container.content; + // Detect whether this array is an inline array (contains text nodes). + const hasText = inline.some( + (n: any) => isObject(n) && n.type === "text", + ); + if (hasText) { + for (let i = 0; i < inline.length; i++) { + const n = inline[i]; + const len = isObject(n) ? blockPlainText(n).length : 0; + const runStart = offset; + const runEnd = offset + len; + // The run that contains the anchor end (anchorEnd lands inside this + // run, i.e. runStart < anchorEnd <= runEnd) is the split point. + if ( + !inserted && + isObject(n) && + n.type === "text" && + typeof n.text === "string" && + anchorEnd > runStart && + anchorEnd <= runEnd + ) { + const cut = anchorEnd - runStart; // split index within this text run + const before = n.text.slice(0, cut); + const after = n.text.slice(cut); + const marks = Array.isArray(n.marks) ? n.marks : []; + const parts: any[] = []; + if (before.length > 0) { + parts.push({ ...n, text: before, marks: [...marks] }); + } + // Marker is a PLAIN run: no marks copied. Leading space separates it. + parts.push({ type: "text", text: " " + marker }); + if (after.length > 0) { + parts.push({ ...n, text: after, marks: [...marks] }); + } + inline.splice(i, 1, ...parts); + inserted = true; + return; + } + offset = runEnd; + } + } else { + // Not an inline array: recurse into children (e.g. callout -> paragraph). + for (const child of inline) { + visit(child); + if (inserted) return; + } + } + }; + + visit(block); + if (inserted) { + return { doc: out, inserted: true }; + } + // If the block matched in plain text but we could not split (e.g. anchor + // lands inside an atom), fall through to the next block rather than failing. + } + + return { doc: out, inserted: false }; +} + +/** + * In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`. + * + * Docmost translations use a callout that states the footnote range, e.g. + * "[1]…[5]". When the number of notes changes, this rewrites the trailing + * number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a + * callout's text nodes to `[1]…[n]`. Operates on a clone; returns + * `{ doc, changed }` where `changed` is the number of text nodes rewritten. + */ +export function setCalloutRange( + doc: any, + n: number, +): { doc: any; changed: number } { + const out = clone(doc); + let changed = 0; + // Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n. + const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g; + walk(out, (node) => { + if (node.type === "callout") { + walk(node, (inner) => { + if ( + inner.type === "text" && + typeof inner.text === "string" && + rangeRe.test(inner.text) + ) { + rangeRe.lastIndex = 0; + inner.text = inner.text.replace(rangeRe, `$1${n}$2`); + changed++; + } + rangeRe.lastIndex = 0; + }); + } + }); + return { doc: out, changed }; +} + +/** + * Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid; + * a base36 random string is sufficient here (uniqueness within one document). + */ +function freshId(): string { + return ( + Math.random().toString(36).slice(2, 12) + + Math.random().toString(36).slice(2, 6) + ); +} + +/** + * Wrap inline ProseMirror nodes in a list item: + * { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] } + * with a fresh random block id on the paragraph. The inline nodes are cloned so + * the result shares no references with the caller's input. + */ +export function noteItem(inlineNodes: any[]): any { + const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : []; + return { + type: "listItem", + content: [ + { + type: "paragraph", + attrs: { id: freshId() }, + content, + }, + ], + }; +} + +/** + * Convert a comment's markdown (e.g. `**Lead.** body...`) into inline + * ProseMirror nodes. + * + * A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is + * stripped first. Then a minimal bold-split is applied: a leading + * `**bold lead**` run becomes a text node with a bold mark, and the remainder + * becomes a plain text node. This keeps the conversion synchronous (the + * transform sandbox runs synchronously) and dependency-free; the existing + * async markdownToProseMirror is intentionally NOT used here. + */ +export function mdToInlineNodes(markdown: string): any[] { + let md = typeof markdown === "string" ? markdown : ""; + // Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix. + md = md.replace(/^\s*комментарий\s*:\s*/i, ""); + md = md.replace(/^\s*\d+\.\s+/, ""); + md = md.trim(); + + if (md === "") return []; + + const nodes: any[] = []; + // Leading bold lead: **...** at the very start. + const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md); + if (leadMatch) { + const leadText = leadMatch[1]; + nodes.push({ + type: "text", + text: leadText, + marks: [{ type: "bold" }], + }); + const rest = md.slice(leadMatch[0].length); + if (rest.length > 0) { + // Preserve the separating space that followed the bold lead. + const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md); + const spacing = sep ? sep[1] : ""; + nodes.push({ type: "text", text: spacing + rest }); + } + return nodes; + } + + // No bold lead: emit the whole thing as a single plain text node, with any + // remaining **bold** spans split out inline. + return splitInlineBold(md); +} + +/** + * Split a string with inline `**bold**` spans into text nodes, bolding the + * spans. Used as the no-lead fallback in mdToInlineNodes. + */ +function splitInlineBold(text: string): any[] { + const nodes: any[] = []; + const re = /\*\*([^*]+)\*\*/g; + let last = 0; + let m: RegExpExecArray | null; + while ((m = re.exec(text)) !== null) { + if (m.index > last) { + nodes.push({ type: "text", text: text.slice(last, m.index) }); + } + nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] }); + last = m.index + m[0].length; + } + if (last < text.length) { + nodes.push({ type: "text", text: text.slice(last) }); + } + return nodes.length > 0 ? nodes : [{ type: "text", text }]; +} + +/** Options for commentsToFootnotes. */ +export interface CommentsToFootnotesOptions { + /** Heading text under which the notes orderedList lives. */ + notesHeading?: string; +} + +/** A comment shape as returned by DocmostClient.listComments. */ +export interface FootnoteComment { + id: string; + content: string; + selection?: string | null; + [k: string]: any; +} + +/** + * Turn inline comments into numbered footnotes. + * + * For each inline comment that carries a `selection`: + * 1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000" + * sentinel) right after the selection text in the BODY (before the + * notes heading); + * 2. build a note list item from the comment's markdown content. + * + * Then RENUMBER every footnote marker in the body by reading order: existing + * `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a + * sequential `[seq]`, and the notes orderedList is reordered so each note lines + * up with its marker's reading-order position. Finally the disclaimer callout + * range is synced to the new note count. + * + * Returns `{ doc, consumed }` where `consumed` lists the ids of comments that + * were successfully anchored (their selection was found and a placeholder + * inserted). Operates on a clone of `doc`. + */ +export function commentsToFootnotes( + doc: any, + comments: FootnoteComment[], + opts: CommentsToFootnotesOptions = {}, +): { doc: any; consumed: string[] } { + let working = clone(doc); + const notesHeading = opts.notesHeading ?? "Примечания переводчика"; + + const top: any[] = Array.isArray(working.content) ? working.content : []; + const notesIdx = top.findIndex( + (n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading, + ); + if (notesIdx < 0) { + throw new Error(`heading "${notesHeading}" not found`); + } + // The notes orderedList lives at or after the heading. + const notesList = top + .slice(notesIdx) + .find((n) => isObject(n) && n.type === "orderedList"); + if (!notesList) { + throw new Error("notes orderedList not found"); + } + + const consumed: string[] = []; + const noteByPh = new Map<string, any>(); + + (Array.isArray(comments) ? comments : []).forEach((c, i) => { + if (!c || !c.selection) return; + // Collision-proof sentinel delimited by NUL control chars, which never occur + // in real Docmost prose — so the renumber regex below cannot mistake any body + // text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is + // transient: the placeholder round-trips within this function (insertMarkerAfter + // inserts it, the renumber pass replaces it with "[N]"), so it never persists + // in a returned/pushed document. + const ph = `\u0000FN${i}\u0000`; + // insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh + // the `top` / `notesList` references that point into it. + const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, { + beforeBlock: notesIdx, + }); + if (!r.inserted) return; + working = r.doc; + noteByPh.set(ph, noteItem(mdToInlineNodes(c.content))); + consumed.push(c.id); + }); + + // Re-resolve references into the (possibly re-cloned) working doc. + const top2: any[] = Array.isArray(working.content) ? working.content : []; + const notesList2 = top2 + .slice(notesIdx) + .find((n) => isObject(n) && n.type === "orderedList"); + if (!notesList2) { + throw new Error("notes orderedList not found"); + } + + const oldNotes: any[] = Array.isArray(notesList2.content) + ? notesList2.content + : []; + const newNotes: any[] = []; + let seq = 0; + // Match either an existing "[N]" marker or a NUL-delimited "\u0000FN<i>\u0000" + // placeholder, in reading order across the body (blocks before the notes heading). + const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g; + // Same range regex setCalloutRange uses to detect the disclaimer callout's + // "[1]…[K]" range; used here to decide whether a top-level callout is the + // disclaimer (skip) or an ordinary callout (renumber normally). + const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/; + for (let i = 0; i < notesIdx; i++) { + // Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote + // marker and is synced separately by setCalloutRange. Renumbering it here + // would consume note slots and corrupt the sequence. Other top-level + // callouts may carry legitimate "[N]" body markers and are renumbered. + if ( + isObject(top2[i]) && + top2[i].type === "callout" && + disclaimerRangeRe.test(blockText(top2[i])) + ) { + continue; + } + walk(top2[i], (node) => { + if (node.type !== "text" || typeof node.text !== "string") return; + node.text = node.text.replace(re, (_m: string, oldNum: string, phIdx: string) => { + if (oldNum != null) { + const note = oldNotes[Number(oldNum) - 1]; + // Every existing body marker MUST map to a real note. An out-of-range + // marker means the document is internally inconsistent; fail loudly + // rather than silently dropping the note and desyncing the callout. + if (note === undefined) { + throw new Error( + `footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`, + ); + } + newNotes.push(note); + } else { + newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`)); + } + return `[${++seq}]`; + }); + }); + } + + // Reorder the notes list IN PLACE on `working` first, THEN sync the callout + // range. setCalloutRange clones `working`, so the reordered notes (mutated + // before the clone) are carried into its result automatically. No null-filter + // here: marker count and note count must stay exactly equal (the out-of-range + // guard above guarantees no undefined entry is ever pushed). + notesList2.content = newNotes; + const synced = setCalloutRange(working, notesList2.content.length); + + return { doc: synced.doc, consumed }; +} diff --git a/packages/docmost-client/tsconfig.json b/packages/docmost-client/tsconfig.json new file mode 100644 index 0000000..a013e0c --- /dev/null +++ b/packages/docmost-client/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src" + }, + "include": ["src/**/*"] +} diff --git a/src/pull.ts b/src/pull.ts new file mode 100644 index 0000000..be6d0ed --- /dev/null +++ b/src/pull.ts @@ -0,0 +1,137 @@ +/** + * Read-only Docmost -> filesystem mirror (SPEC §6 pull, Phase 1). + * + * Walks the configured space's page tree and writes one self-contained `.md` + * per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is + * READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes + * local files. The meta block inside each file carries pageId/slugId/ + * parentPageId (identity), so no external map file is needed. + * + * Requires a `.env` with real Docmost credentials. This file must COMPILE and + * be correct, but is not expected to be run without live access. + * + * Run via: npm run pull (-> node build/pull.js) + */ +import { mkdir, writeFile } from "node:fs/promises"; +import { join } from "node:path"; +import { pathToFileURL } from "node:url"; +import { DocmostClient } from "docmost-client"; +import { loadSettings } from "./settings.js"; +import { sanitizeTitle, disambiguate } from "./sanitize.js"; + +/** Flat page node as returned by listAllSpacePages (no content). */ +interface PageNode { + id: string; + title?: string; + slugId?: string; + parentPageId?: string | null; + hasChildren?: boolean; +} + +/** + * Compute a deterministic, collision-free folder/file name for a node among its + * siblings. `usedBySibling` maps a parent key -> set of names already taken, so + * two siblings that sanitize to the same name get a stable ` ~slugId` suffix + * (SPEC §12). The name is COSMETIC; identity lives in the file's meta block. + */ +function nameForNode( + node: PageNode, + usedBySibling: Map<string, Set<string>>, +): string { + const parentKey = node.parentPageId ?? "__root__"; + let used = usedBySibling.get(parentKey); + if (!used) { + used = new Set<string>(); + usedBySibling.set(parentKey, used); + } + + let name = sanitizeTitle(node.title ?? ""); + if (used.has(name)) { + // Sibling collision: disambiguate with the stable slugId (fall back to the + // pageId if no slugId is present). + name = disambiguate(name, node.slugId ?? node.id); + } + used.add(name); + return name; +} + +async function main(): Promise<void> { + const s = loadSettings(); + const client = new DocmostClient( + s.docmostApiUrl, + s.docmostEmail, + s.docmostPassword, + ); + + const spaceId = s.docmostSpaceId; + const vaultRoot = s.vaultPath; + + const pages: PageNode[] = await client.listAllSpacePages(spaceId); + + // Index pages by id so the parent chain can be walked. + const byId = new Map<string, PageNode>(); + for (const p of pages) { + if (p && p.id) byId.set(p.id, p); + } + + // Resolve each node's display name once, deterministically, tracking sibling + // collisions per parent. + const usedBySibling = new Map<string, Set<string>>(); + const nameById = new Map<string, string>(); + for (const p of pages) { + if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling)); + } + + // Build the folder path for a page by walking parentPageId to the root. The + // page's OWN name is the file stem; its ancestors become folders. A `visited` + // guard prevents an infinite loop on a malformed parent cycle. + const folderSegmentsFor = (node: PageNode): string[] => { + const ancestors: string[] = []; + const visited = new Set<string>(); + let current: PageNode | undefined = node.parentPageId + ? byId.get(node.parentPageId) + : undefined; + while (current && current.id && !visited.has(current.id)) { + visited.add(current.id); + ancestors.unshift( + nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""), + ); + current = current.parentPageId + ? byId.get(current.parentPageId) + : undefined; + } + return ancestors; + }; + + let written = 0; + for (const p of pages) { + if (!p || !p.id) continue; + const segments = folderSegmentsFor(p); + const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? ""); + const dir = join(vaultRoot, ...segments); + await mkdir(dir, { recursive: true }); + + // Body + meta only (no comments block) — SPEC §3. + const fileMd = await client.exportPageBody(p.id); + await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8"); + written++; + } + + console.log( + `pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`, + ); +} + +// Only auto-run when invoked directly as the CLI entrypoint, not when this +// module is imported (e.g. by a unit test importing sanitizeTitle / path +// helpers), so the import does not trigger loadSettings() + process.exit. +const invokedDirectly = + typeof process.argv[1] === "string" && + import.meta.url === pathToFileURL(process.argv[1]).href; + +if (invokedDirectly) { + main().catch((err) => { + console.error("pull failed:", err instanceof Error ? err.stack : err); + process.exit(1); + }); +} diff --git a/src/roundtrip.ts b/src/roundtrip.ts new file mode 100644 index 0000000..14baf41 --- /dev/null +++ b/src/roundtrip.ts @@ -0,0 +1,222 @@ +/** + * Phase-0 idempotency harness (SPEC §11, "Задача №0"). + * + * git diffs byte-for-byte, so a non-deterministic export would make every pull + * produce a phantom diff -> endless commits/conflicts. Before the auto two-way + * mode can be enabled the round-trip `export -> import -> export` MUST be + * stable. This CLI proves (or disproves) that property on a given document. + * + * Usage (offline, the default for CI): + * node build/roundtrip.js --fixture path/to/doc.json + * + * Usage (live — needs a .env with real Docmost creds): + * node build/roundtrip.js --page <pageId> + * + * Exit code: 0 when the markdown is byte-stable, 1 on any markdown mismatch (so + * it is CI-able). A non-empty document-level divergence (after stripping block + * ids) is reported but does NOT fail the run — the converter reconstructs + * schema default attrs, a KNOWN finding per SPEC §11. + */ +import { readFile } from "node:fs/promises"; +import { pathToFileURL } from "node:url"; +import { + DocmostClient, + convertProseMirrorToMarkdown, + markdownToProseMirror, +} from "docmost-client"; +import { loadSettings } from "./settings.js"; + +// Default fixture used when no --fixture/--page is given (offline CI path). +const DEFAULT_FIXTURE = "test/fixtures/sample-doc.json"; + +/** + * Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids + * are regenerated by `markdownToProseMirror` (SPEC §11), so they must be + * ignored when comparing the semantic shape of two documents. Returns a NEW + * tree; the input is not mutated. + */ +export function stripBlockIds(node: any): any { + if (Array.isArray(node)) { + return node.map(stripBlockIds); + } + if (node && typeof node === "object") { + const out: any = {}; + for (const key of Object.keys(node)) { + if (key === "attrs" && node.attrs && typeof node.attrs === "object") { + // Drop the `id` attr; keep every other attribute. + const { id, ...rest } = node.attrs as Record<string, unknown>; + void id; + out.attrs = stripBlockIds(rest); + } else { + out[key] = stripBlockIds(node[key]); + } + } + return out; + } + return node; +} + +interface ParsedArgs { + fixture?: string; + page?: string; +} + +function parseArgs(argv: string[]): ParsedArgs { + const args: ParsedArgs = {}; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === "--fixture") { + args.fixture = argv[++i]; + } else if (a === "--page") { + args.page = argv[++i]; + } + } + return args; +} + +/** Load the source ProseMirror doc from a fixture file or a live page. */ +async function loadDoc(args: ParsedArgs): Promise<any> { + if (args.page) { + // Live mode: read the page's ProseMirror content from Docmost. + const s = loadSettings(); + const client = new DocmostClient( + s.docmostApiUrl, + s.docmostEmail, + s.docmostPassword, + ); + const page = await client.getPageJson(args.page); + return page.content; + } + // Offline mode: read a ProseMirror doc JSON from a fixture (default applies + // when no flag is given). + const fixture = args.fixture ?? DEFAULT_FIXTURE; + const raw = await readFile(fixture, "utf8"); + return JSON.parse(raw); +} + +/** + * Find the first divergence between two values via a recursive deep compare. + * Returns a short path + the two differing values, or null if they are equal. + */ +function firstDivergence( + a: any, + b: any, + path = "$", +): { path: string; a: any; b: any } | null { + if (a === b) return null; + + const ta = typeof a; + const tb = typeof b; + if (ta !== tb || a === null || b === null) { + return { path, a, b }; + } + if (ta !== "object") { + return { path, a, b }; + } + + const aIsArr = Array.isArray(a); + const bIsArr = Array.isArray(b); + if (aIsArr !== bIsArr) return { path, a, b }; + + if (aIsArr) { + if (a.length !== b.length) { + return { path: `${path}.length`, a: a.length, b: b.length }; + } + for (let i = 0; i < a.length; i++) { + const d = firstDivergence(a[i], b[i], `${path}[${i}]`); + if (d) return d; + } + return null; + } + + const keys = new Set([...Object.keys(a), ...Object.keys(b)]); + for (const k of keys) { + const d = firstDivergence(a[k], b[k], `${path}.${k}`); + if (d) return d; + } + return null; +} + +async function main(): Promise<void> { + const args = parseArgs(process.argv.slice(2)); + const doc = await loadDoc(args); + + const source = args.page + ? `page ${args.page}` + : `fixture ${args.fixture ?? DEFAULT_FIXTURE}`; + console.log(`Round-trip harness — source: ${source}`); + + // export -> import -> export + const md1 = convertProseMirrorToMarkdown(doc); + const doc2 = await markdownToProseMirror(md1); + const md2 = convertProseMirrorToMarkdown(doc2); + + // 1) The byte-stable markdown property git actually needs. + const markdownStable = md1 === md2; + + // 2) Semantic comparison of the documents with block ids stripped (they are + // regenerated on import, per SPEC §11). + const normDoc = stripBlockIds(doc); + const normDoc2 = stripBlockIds(doc2); + const docDivergence = firstDivergence(normDoc, normDoc2); + const semanticStable = docDivergence === null; + + console.log(""); + console.log(`markdown byte-stable (md1 === md2): ${markdownStable}`); + console.log(`document semantically stable (ids stripped): ${semanticStable}`); + + if (!markdownStable) { + // Show the first differing line so the divergence is concrete. + const lines1 = md1.split("\n"); + const lines2 = md2.split("\n"); + const max = Math.max(lines1.length, lines2.length); + let firstLine = -1; + for (let i = 0; i < max; i++) { + if (lines1[i] !== lines2[i]) { + firstLine = i; + break; + } + } + console.log(""); + console.log("--- markdown divergence ---"); + console.log(`first differing line: ${firstLine + 1}`); + if (firstLine >= 0) { + console.log(` export #1: ${JSON.stringify(lines1[firstLine])}`); + console.log(` export #2: ${JSON.stringify(lines2[firstLine])}`); + } + console.log(` md1 length: ${md1.length}, md2 length: ${md2.length}`); + } + + if (!semanticStable && docDivergence) { + console.log(""); + console.log("--- document divergence (ids stripped) ---"); + console.log(` path: ${docDivergence.path}`); + console.log(` doc: ${JSON.stringify(docDivergence.a)}`); + console.log(` doc2: ${JSON.stringify(docDivergence.b)}`); + console.log( + " (EXPECTED per SPEC §11: the converter reconstructs schema default" + + " attrs; does not affect markdown byte-stability)", + ); + } + + // The CI-relevant invariant is markdown byte-stability. A doc-level + // divergence after id-stripping is a KNOWN SPEC §11 finding and does not + // fail the harness. + console.log(""); + console.log(markdownStable ? "RESULT: MARKDOWN STABLE" : "RESULT: NOT STABLE"); + process.exit(markdownStable ? 0 : 1); +} + +// Only auto-run when invoked directly as the CLI entrypoint, not when this +// module is imported (e.g. by a unit test importing stripBlockIds), so the +// import does not trigger a fixture read + process.exit. +const invokedDirectly = + typeof process.argv[1] === "string" && + import.meta.url === pathToFileURL(process.argv[1]).href; + +if (invokedDirectly) { + main().catch((err) => { + console.error("roundtrip failed:", err instanceof Error ? err.stack : err); + process.exit(1); + }); +} diff --git a/src/sanitize.ts b/src/sanitize.ts new file mode 100644 index 0000000..d121f81 --- /dev/null +++ b/src/sanitize.ts @@ -0,0 +1,109 @@ +/** + * Deterministic filename strategy (SPEC §12). + * + * The file name is COSMETIC — the source of truth for the file<->page link is + * `pageId` / `slugId` inside the meta block, so renaming a file is safe. These + * functions are intentionally dependency-free and pure, so they are trivially + * unit-testable. + */ + +// Printable characters forbidden in file names on common filesystems (mainly +// Windows): / \ < > : " | ? *. Each match is replaced with a single "-". +// Spaces are NOT in this set; whitespace is normalized separately below. +// ASCII control characters (code points 0..31) are stripped in a separate pass +// (see stripControlChars) to keep this literal free of embedded control bytes. +const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g; + +// Runs of whitespace (including tabs/newlines) collapse to a single space. +const WHITESPACE_RUN_RE = /\s+/g; + +// Reserved Windows device names (case-insensitive). A bare match (with or +// without an extension) is unusable as a file name, so it is prefixed with "_". +const RESERVED_WINDOWS_NAMES = new Set([ + "con", + "prn", + "aux", + "nul", + "com1", + "com2", + "com3", + "com4", + "com5", + "com6", + "com7", + "com8", + "com9", + "lpt1", + "lpt2", + "lpt3", + "lpt4", + "lpt5", + "lpt6", + "lpt7", + "lpt8", + "lpt9", +]); + +// Cap on the sanitized length to stay well within filesystem path-component +// limits (255 bytes on most FSes) while leaving room for an extension and a +// disambiguation suffix. +const MAX_LENGTH = 120; + +/** + * Replace every ASCII control character (code points 0..31) with "-". Done by + * scanning code points rather than a control-range regex literal, so the source + * file carries no embedded control bytes. + */ +function stripControlChars(input: string): string { + let out = ""; + for (let i = 0; i < input.length; i++) { + out += input.charCodeAt(i) < 32 ? "-" : input[i]; + } + return out; +} + +/** + * Sanitize a page title into a safe file-name component (WITHOUT extension). + * + * Steps: replace forbidden / control characters with "-", collapse whitespace + * runs to a single space, trim, cap the length, then guard against an empty + * result, an all-dots result, or a reserved Windows device name by prefixing + * with "_". + */ +export function sanitizeTitle(title: string): string { + let name = stripControlChars(title ?? "") + .replace(FORBIDDEN_PRINTABLE_RE, "-") + .replace(WHITESPACE_RUN_RE, " ") + .trim(); + + if (name.length > MAX_LENGTH) { + name = name.slice(0, MAX_LENGTH).trim(); + } + + // Compare the base name (before the first dot) against reserved names, so + // both "CON" and "con.md" are caught. + const base = name.split(".")[0]?.toLowerCase() ?? ""; + // A name that is empty, consists only of dots ("." / ".." / "..."), or is a + // reserved Windows device name is unusable as a path component. The all-dots + // case is a path-traversal hazard in particular: an unprefixed ".." would + // become a parent-directory segment and let a page escape the vault, so it + // MUST be neutralized here (becomes "_..", which is a literal file name). + if ( + name.length === 0 || + /^\.+$/.test(name) || + RESERVED_WINDOWS_NAMES.has(base) + ) { + name = "_" + name; + } + + return name; +} + +/** + * Disambiguate a sanitized name when two siblings in the same folder collapse + * to the same name. Appends a stable suffix built from the page's `slugId`, so + * the result stays deterministic across runs (SPEC §12: `Title ~slugId`). + */ +export function disambiguate(name: string, slugId: string): string { + return `${name} ~${slugId}`; +} diff --git a/test-strategy-report.md b/test-strategy-report.md new file mode 100644 index 0000000..eddbf72 --- /dev/null +++ b/test-strategy-report.md @@ -0,0 +1,248 @@ +# Отчёт по тест-стратегии — docmost-sync — 2026-06-16 + +> Двунаправленная синхронизация статей Docmost с локальным Markdown-git-хранилищем +> (git — хранилище состояния). Монорепо: корневое приложение-движок (`src/`) + +> библиотека `packages/docmost-client` (~7.5k LOC). Стек: TypeScript ESM, Node ≥ 20, +> Vitest 3.2.6. Все тесты лежат в корневом `test/` (`include: ['test/**/*.test.ts']`). + +## 1. Исполнительное резюме + +- **Проанализировано модулей:** 9 (1 субагент `module-testability-analyst` на модуль, все завершились). +- **Предложено тестов (unit / integration / contract / E2E):** **50 / 7 / 1 / 2** (итого 60). + - unit = 83 % (≥ 70 % ✓), integration = 12 % (≤ 20 % ✓), E2E = 3 % и 2 шт. (≤ 5 % и ≤ 10 ✓). +- **Отклонено как малоценные:** ≈ 60 символов/областей (декларативные spec-объекты схемы, + тривиальные плоские мапперы, framework-обвязка, type-only интерфейсы, passthrough-обёртки). +- **Покрытие сейчас (проверено v8 лично):** **2.6 %** statements по обоим пакетам + (искажено огромным непокрытым `docmost-client`). Изолированно: корневое приложение ≈ **40 %**, + пакет `docmost-client` ≈ **0 %** (поведенчески покрыт лишь `collectRecentSince`). + **Прогноз после Фаз 1–4:** ≈ **60–65 %** (чистые lib-модули 80 %+, корневое приложение ≈ 85 %, + транспортный `client.ts` ≈ 40 %). + +> ⚠️ **Артефакт измерения покрытия.** `package.json` пакета указывает `main: dist/index.js`, +> поэтому `import from 'docmost-client'` грузит **скомпилированный `dist/`**, а не `src/`. +> v8 меряет `src/` → показывает `client.ts` 0 %, хотя `collectRecentSince` реально исполняется. +> **Перед измерением покрытия** добавить в `vitest.config.ts` alias `docmost-client → packages/docmost-client/src/index.ts` +> (или мерить по `dist` после сборки), иначе любые новые тесты библиотеки не отразятся в отчёте. +> `@vitest/coverage-v8` и скрипт `"coverage"` в проекте отсутствуют — их нужно добавить. + +## 2. Рекомендации по модулям + +### app-root (`src/`) — движок синка, конфиг, sanitize, round-trip-харнесс +- **Извлечь в чистые функции:** `folderSegmentsFor` (`pull.ts:88`, замкнута внутри `main`), + `firstDivergence`/`parseArgs` (`roundtrip.ts:101/64`, не экспортированы). +- **Unit добавить:** `firstDivergence` (равные/разные деревья, путь расхождения, циклы) — + ловит ложное «stable» при реальном расхождении (вся суть харнесса); `nameForNode` (коллизии + имён сиблингов → перезапись файлов на диске); `folderSegmentsFor` (вложенность + защита от цикла + parent A→B→A, иначе зависание); `parseArgs`; ветка invalid-value в `loadSettingsOrExit` + (`config-errors.ts:27-30`, единственный значимый пробел). +- **Integration добавить:** `pull.main` с фейковым клиентом + временной директорией + (один файл на страницу, верные папки, узлы без id пропускаются) — после R-App-4. +- **НЕ тестировать:** `index.ts` (тонкий CLI-passthrough, только `console.log`); `envSchema` + (тестировать = тестировать Zod, покрыт через `parseSettings`); тело `roundtrip.main` + (байт-стабильность уже покрыта `roundtrip.test.ts`); `invokedDirectly`-guard-блоки; + `sanitizeTitle`/`disambiguate`/`parseSettings`/`stripBlockIds` (уже ~100 %). + +### client-core (`packages/docmost-client/src/client.ts`, ~2770 строк) — god-object REST+WS клиент +- **Извлечь в чистые функции:** валидаторы `isSafeUrl`/`validateDocUrls`/`validateDocStructure` + (`client.ts:905/941/1004`), `imageMimeFromPath`/`buildImageNode` (1844/1864) — поднять в `lib/` + рядом с `filters.ts`; распаковку конвертов и clamp-логику пагинации (378-393, 1505) в pure-функции. +- **Unit добавить:** XSS-allowlist `isSafeUrl`+`validateDocUrls` (`javascript:`/`data:`/`file:`, + пробельно-контрольный обход `java\tscript:`, на всех медиа-узлах) — **высший приоритет по безопасности**; + `validateDocStructure` (глубина > 200, не-string type); расширить `collectRecentSince` + (граница `updatedAt === sinceIso`, элементы без `id`/`updatedAt`); `imageMimeFromPath`+`buildImageNode`; + `paginateAll` (стоп-условия, MAX_PAGES=50 + предупреждение, clamp 1..100, оба конверта) — после R-Client-2; + `appUrl`/`shareUrl`/`parseCommentContent`; sandbox `transformPage` (`node:vm`: нет `require`/`process`/`fs`, + таймаут 5 c, не-функция/не-doc → throw) — security. +- **Integration добавить (после R-Client-1, инъекция HTTP):** авто-реавторизация + (401-интерсептор + дедуп `login` + `getCollabTokenWithReauth`: один retry, `/auth/login` не ретраится, + `loginPromise` сбрасывается в `finally`); `uploadImage` (порядок guard ext→stat→read, > 20 MiB, + пересборка FormData на 401, нет утечки тела ответа в ошибку); `createPage` (replay multipart на 401); + `checkNewComments` (битая дата → throw, а не «ничего нового»; граница `createdAt > since`; флаг truncated). +- **НЕ тестировать:** тонкие REST-passthrough (`getWorkspace`/`getSpaces`/`renamePage`/`movePage`/ + `deletePage`/`restorePage`/`listTrash` и пр.) — конверт `data.data ?? data` покрыть один раз + извлечённой функцией; делегаты в node-ops/converter/diff (тестировать в их модулях); сами axios/yjs/hocuspocus. + +### markdown-conversion (`lib/markdown-converter.ts` + `markdown-document.ts`) — конвертер ProseMirror↔Markdown +- **Unit добавить:** табличная golden-матрица по типам узлов (заголовки, маркированные/кодовые + спаны, ссылки с title, картинки с пробелами/скобками в src, кодоблоки с языком + срез хвостовых `\n`, + GFM-таблицы с выравниванием, spanned-таблицы → `<table>`, blockquote, task-list, math `a < b`, + mention/attachment/callout/details/columns/медиа, hr, hard break, неизвестный тип, пустой doc → `""`); + идемпотентность экранирования (`escapeAttr` стабилен на `& "`, `encodeMdUrl` пробел→`%20`), + отступы вложенных списков (`indentItemChildren`); envelope `parseDocmostMarkdown`/`serializeDocmostMarkdown` + (восстановление meta/body/comments, CRLF, «последний `docmost:comments`-блок побеждает», throw на битом JSON); + edge/malformed-вход (`null`/`{}`/нет content, отсутствующие attrs, глубокая вложенность без переполнения стека). +- **Integration добавить:** **property-тест round-trip идемпотентности** — `md→PM→md == md` байт-в-байт + + семантическая стабильность через `stripBlockIds`. **Самый ценный тест проекта** (фантомные git-диффы — + ровно то, ради чего существует харнесс). Требует фабрику документов и генератор (см. §3). +- **НЕ тестировать:** интерфейс `DocmostMdMeta`; одиночный токен `{{SUBPAGES}}`; внутренности + `marked`/`@tiptap/html`; underline/sub/sup как отдельные тесты — свернуть в один inline-marks-кейс. + +### prosemirror-schema (`lib/docmost-schema.ts`, ~1065 строк) — ~90 % декларативный конфиг +- **Unit добавить (ровно 2, намеренно не раздуваем):** `sanitizeCssColor` (`:44`) — allowlist против + CSS/style-инъекции: принять named/hex3-8/rgb(a)/hsl(a), отвергнуть `red; --x:url()`, `expression(...)`, + `red"><script>`, пустое/не-string; `clampCalloutType` (`:21`) — нормализация enum + регистр + фолбэк `info`. +- **НЕ тестировать:** все `Node.create`/`Mark.create`/`Extension.create` spec-объекты (~26 шт.) и + триплеты `default`/`parseHTML`/`renderHTML` — декларативные данные, тест тавтологичен; поведение + узлов проверяется **косвенно** через round-trip (другой модуль). Closures `textStyle.getAttrs`, + `Highlight`-guard, `Column.width` — покрыть HTML-фикстурами round-trip, не лезть в приватные closures. + +### node-ops (`lib/node-ops.ts`, ~897 строк) — чистые структурные операции над деревом узлов +- **Unit добавить (все unit, высочайший ROI — JSON-вход/JSON-выход):** `insertNodeRelative` + (append/before/after, by-id/by-anchor, маршрутизация структурных узлов, throw-ветки, offset); + `insertTableRow` (индекс/паддинг/наследование типа и colwidth заголовка, OOB→append); `replaceNodeById` + (изоляция клонов на N совпадений, без рекурсии в подставленный узел); `getNodeByRef` (`#n` in/out-of-range, + дубль id → первый, гарантия клона); `updateTableCell` (переиспользование id первого параграфа, + сохранение colspan/rowspan, OOB→throw); `deleteNodeById`/`deleteTableRow` (throw vs тихий no-op); + `sanitizeForYjs`+`findUnstorableAttr` (срез `undefined`, путь до bigint/function); `buildOutline`+`readTable`+ + `blockPlainText` (cols из row-0, усечение, ragged-таблицы). **Везде** ассерт «вход не мутирован». +- **Извлечь/рефактор:** инъекция `makeFreshId` (`:591`, `Math.random()`) — для точных ассертов на id + в `insertTableRow`/`updateTableCell`; иначе проверять формат+уникальность без рефактора. +- **НЕ тестировать:** интерфейсы `OutlineEntry`/`InsertOptions`; внутренние `clone`/`isObject`/`matchesId`/ + `truncate`/`makeCellParagraph`/`locateTable` (покрыты транзитивно); недостижимый fallback `structuredClone`. + +### collaboration (`lib/collaboration.ts`, ~618 строк) — чистый верх + транспортный низ (Yjs/Hocuspocus/WS) +- **Unit добавить (чистый верх, без рефактора):** `buildCollabWsUrl` (http→ws, https→wss, срез `/api`, + `/collab` ровно один раз, drop query/hash, fallback на битый URL); `buildYDoc`/`assertYjsEncodable` + (валид кодируется; `undefined`-attr санитайзится; неэнкодируемый attr → ошибка с путём; dryRun==apply); + `bridgeTaskLists` (ol со всеми чекбоксами → ul taskList, без фантомного orderedList); `preprocessCallouts` + (`:::` внутри кодоблока не считается забором; незакрытый callout); `replacePageContent` (guard не-doc → throw). +- **Unit (после рефактора R-Collab-1):** ядро `onSynced` read-transform-write — пустой live-doc → дефолт, + `transform→null` без записи, `transform throw` пробрасывается, фрагмент заменяется полностью. + **Защищает от потери данных при конкурентном редактировании** (инвариант «без `await` между read и write»). +- **Unit (после R-Collab-2):** подавление ложного успеха — `unsyncedChanges→0` при разрыве не считается + успехом (флаг `connectionLost`); ловит «ложную персистенцию» / reconnect-шторм как успешную запись. +- **Integration:** `mutatePageContent` против mock-Hocuspocus-сервера (после R-Collab-2/3 + fake-таймеры). +- **НЕ тестировать:** `updatePageContentRealtime` (passthrough); глобальная мутация `window`/`document`/ + `WebSocket` на импорте (env-обвязка); внутренности yjs/hocuspocus/marked. + +### transforms (`lib/transforms.ts`, ~477 строк) — чистые примитивы трансформации документа +- **Unit добавить:** `commentsToFootnotes` — перенумерация/порядок (маркеры не по порядку массива → + `[1]..[k]` в порядке чтения, список заметок переупорядочен) **и** иммутабельность входа + throw на + несогласованности (`[9]` при 3 заметках, нет heading/orderedList); `insertMarkerAfter` (сплит по нескольким + text/mark-ранам, маркер plain, окружающие марки сохранены, scope `beforeBlock`); `setCalloutRange` + (статичность regex `lastIndex` на двух text-узлах, только внутри callout, Unicode `…` и ASCII `...`); + `mdToInlineNodes` (срез префиксов `комментарий:`/`N.`, граница `**bold**`-лида, пробел не теряется); + `walk`/`getList` (полнота обхода, live-ссылка не клон). +- **Извлечь/рефактор:** инъекция `freshId` (`:240`) — опционально, для воспроизводимого dryRun. +- **НЕ тестировать:** `blockText` (re-export `node-ops.blockPlainText`); `splitInlineBold` (внутр.); + `clone`/`isObject`/`freshId`; интерфейсы. Sandbox-eval `(doc,ctx)=>doc` живёт в `client.ts`, не здесь. + +### diff (`lib/diff.ts`, ~319 строк) — headless-дифф документов (чистый, детерминированный) +- **Unit добавить:** `diffDocs` (вставка/удаление/идентичность + пустые doc; счётчики + `inserted`/`deleted`); подсчёт целостности (images/tables/callouts old→new, дедуп ссылки, + разбитой на два рана; битая ссылка считается 1 раз); `footnoteMarkers` (граница body/notes по + `notesHeading`, порядок чтения, кастомный/отсутствующий heading); coarse-fallback (форс-исключение + precise-пути → нет throw, есть пометка о деградации, whitespace-блоки не репортятся); `blockContextAt`+ + `blocksChanged` (усечение >80, не-пустой контекст ловит проглоченный `catch`, дедуп блоков). +- **НЕ тестировать:** `getSchema(docmostExtensions)` (обвязка); сам алгоритм + `recreateTransform`/`ChangeSet`/`simplifyChanges`; интерфейсы; точный порядок строк секции Changes + в markdown (порядок задаёт библиотека — проверять множества/счётчики). + +### client-utils (`lib/auth-utils.ts` + `filters.ts` + `json-edit.ts` + `page-lock.ts`, 345 строк) +- **Unit добавить:** `applyTextEdits` (`json-edit.ts:45`) — полный набор: single/`replaceAll`, + multi-match без replaceAll → throw, «not found» vs «spans multiple formatting runs», **литеральная + вставка `$&`/`$1`** (явный foot-gun String.replace), обрезка пустых узлов, иммутабельность входа; + `withPageLock` (`page-lock.ts:16`) — сериализация одной страницы, конкурентность разных, ошибка не + «отравляет» очередь, реальный reject доходит до вызывающего (через deferred-промисы, **не** sleep); + `performLogin` парсинг cookie (точное имя `authToken` ≠ `authTokenRefresh`, base64-`=` не обрезается); + `filterPage` (условный spread: `content === ""` включается, не-string опускается); `getCollabToken` + (распаковка `data.data.token`→`data.token`, `err.status` выживает, тело ответа не утекает без `DEBUG`); + `filterComment` (`??` vs `||`: пустая строка markdownContent сохраняется). +- **НЕ тестировать:** `filterWorkspace`/`filterSpace`/`filterGroup` (плоские мапперы без ветвлений — + максимум один общий shape-ассерт); интерфейсы `TextEdit`/`TextEditResult`; приватные + `collectText`/`countOccurrences`/`truncate`. + +## 3. Сквозные аспекты + +- **Contract-тесты** (1 набор): между `docmost-client` и живым Docmost — записанные фикстуры/pact-стиль, + проверяющие конверты ответов (`data.data ?? data`, `items`-vs-bare-array, `meta.hasNextPage`), от которых + зависит весь клиент. Привязать к закреплённой версии Docmost; ловит дрейф контракта API. +- **Property-based** (через извлечение чистых функций): (1) round-trip Markdown — корона стратегии; + (2) инварианты иммутабельности node-ops; (3) идемпотентность `commentsToFootnotes`/`setCalloutRange`. + Рекомендуется dev-зависимость `fast-check` (с воспроизводимым seed + shrinking). +- **Дымовые/нагрузочные:** неприменимо (нет высоконагруженных путей); пропустить. +- **Test-data factories (нужны):** билдер ProseMirror-документов (узлы/марки) для golden+property-тестов; + фабрика конвертов REST-ответов Docmost; фабрика login/Set-Cookie-ответа; корпус фикстур для round-trip + (расширить `test/fixtures/sample-doc.json`). + +## 4. Обнаруженные антипаттерны + +- **God-объект:** `DocmostClient` — ~2770 строк, ~58 членов (auth + REST + WS + FS + comments + vm-sandbox) + в одном классе (`client.ts`). Нет шва для изоляции одной ответственности. +- **Скрытые побочные эффекты на импорте:** глобальная мутация `global.window`/`document`/`Element`/`WebSocket` + (`collaboration.ts:13-19`) — импорт модуля меняет глобал воркера; конструктор `DocmostClient` вешает + axios-интерсептор и создаёт реальный axios. +- **Нетестируемые синглтоны / общее состояние:** модульная `Map chains` (`page-lock.ts:11`) — состояние течёт + между тестами в одном воркере (изолировать `pageId`/`vi.resetModules()`); `Math.random()` в + `node-ops.ts:591` (`makeFreshId`) и `transforms.ts:240` (`freshId`) — недетерминизм id. +- **Порядко-зависимые тесты (риск):** чтение `process.env.DEBUG` в `auth-utils.ts` (set/unset + restore); + глобалы collaboration; общая `chains`-Map. +- **Артефакт покрытия dist-vs-src:** `main: dist/index.js` → тесты исполняют скомпилированный код, v8 меряет + `src` → ложные 0 % (см. §1). +- **Чистая логика в ловушке `async main()`:** `pull.ts`/`roundtrip.ts` — поэтому 0 %/19 % при наличии + тестируемой чистой логики. +- **`node:vm` исполняет пользовательский JS** (`client.ts::transformPage`, ~2491) — security, нужен явный тест + на отсутствие escape (`require`/`process`/`fs`) и таймаут. +- **Проглоченные ошибки:** `diff.ts:172` (`catch{return ""}` маскирует баг резолвера позиции); + пустые `catch` в cleanup collaboration. Статичный `lastIndex` regex `/g` в `transforms.ts:216`. +- **Нестабильные тесты (CI-история):** н/д — CI-история отказов отсутствует (проект на Increment 1, + тесты только базовые); пункт неактуален сейчас. + +## 5. Необходимые рефакторинги перед написанием тестов + +- **R-App-1** — извлечь `folderSegmentsFor` на верхний уровень + экспортировать `nameForNode`. + Блокирует: unit-тесты путей `pull.ts` (коллизии, защита от цикла). +- **R-App-2** — экспортировать `parseArgs` и `firstDivergence` (`roundtrip.ts`). + Блокирует: unit-тесты дивергенции и парсинга аргументов. +- **R-App-4** — инъекция клиента + fs в `pull.main`. Блокирует: integration-тест `pull.main`. +- **R-Client-1** — инъекция HTTP-клиента (axios-instance + multipart-poster). + Блокирует: все integration-тесты REST (auth-реавторизация, uploadImage, createPage, checkNewComments). +- **R-Client-2** — извлечь pure-функции маппинга ответов/конвертов/clamp. + Блокирует: перевод ~15 кейсов из integration в быстрый unit (`paginateAll`, list-endpoints). +- **R-Client-3** — инъекция collab-транспорта (`mutatePageContent`/provider-factory). + Блокирует: unit-тесты оркестрации patch/insert/delete/table/comment. +- **R-Client-4** — поднять чистые валидаторы (`isSafeUrl`/`validateDocUrls`/`validateDocStructure`/ + `imageMimeFromPath`/`buildImageNode`) в `lib/` или экспортировать. Блокирует: XSS-unit-тесты (высший приоритет). +- **R-Collab-1** — извлечь тело `onSynced` в чистую `applyTransformToYdoc(ydoc, transform)`. + Блокирует: unit ядра read-transform-write (потеря данных). +- **R-Collab-2 + R-Collab-3** — инъекция provider-factory и часов (fake-таймеры). + Блокируют: тесты ложного успеха/таймаутов и integration `mutatePageContent`. +- **R-NodeOps / R-Transforms** — инъекция `makeFreshId`/`freshId` (опционально). + Блокируют: только точные ассерты на id; без рефактора — проверять формат+уникальность. +- **Инфраструктура** — добавить `@vitest/coverage-v8` + скрипт `"coverage"`; alias `docmost-client→src` + в `vitest.config.ts`; dev-deps `fast-check` (property) и mock-ws/msw (integration). Эти изменения + трогают конфиги/`package.json` — вне правки данного отчёта, заложить в Фазу 1. + +## 6. План внедрения (по фазам) + +- **Фаза 1 — чистые unit, нулевой/малый рефактор (наивысший ROI).** node-ops (8), transforms (5–6), + diff (5), client-utils (6), guards схемы (2), golden-матрица + envelope конвертера (3), + чистый верх collaboration (5), чистые валидаторы клиента после R-Client-4 (XSS + structure + image), + расширение `collectRecentSince`, app-root после R-App-1/2 (5). Плюс инфраструктура покрытия/alias. + *ROI:* мгновенно поднимает покрытие самой дефектоопасной чистой логики (потеря данных, XSS) почти без риска. +- **Фаза 2 — корона: property-тест round-trip Markdown + фабрика документов.** Ловит фантомные git-диффы и + неидемпотентность — главный класс дефектов всего инструмента синхронизации. +- **Фаза 3 — refactor-gated unit.** R-Collab-1 → ядро `onSynced` (потеря данных при конкуренции); + R-Client-2 → unit пагинации/list-endpoints; sandbox `transformPage` (security). +- **Фаза 4 — integration с DI.** R-Client-1/3 → авто-реавторизация, uploadImage, createPage, checkNewComments; + R-Collab-2/3 → подавление ложного успеха + e2e против mock-WS; R-App-4 → `pull.main`. +- **Фаза 5 — contract + E2E.** 1 contract-набор против закреплённой версии Docmost; + **2 E2E-смоука** против `docker-compose` Docmost — user journeys: (1) «pull пространства в vault» + (страницы → файлы с верной иерархией), (2) «round-trip страницы без фантомного diff». + +## 7. Источники + +- Отчёты **9** субагентов `module-testability-analyst` (app-root, client-core, markdown-conversion, + prosemirror-schema, node-ops, collaboration, transforms, diff, client-utils). +- Вывод coverage-инструмента: `vitest run --coverage` (provider v8), запущен оркестратором лично; + 6 тест-файлов / 33 теста зелёные; overall 2.6 % statements (артефакт dist-vs-src учтён). +- **Фильтрация предложений:** + - Шаг 1 (кросс-модульный дедуп): снято ≈ 20 (поведение схемы → round-trip; делегаты client.ts → + модули node-ops/converter/diff; `blockText` → node-ops). + - Шаг 2 (skip-list): снято ≈ 40 (декларативные spec-объекты схемы ~26, плоские фильтры 3, + type-only интерфейсы, framework-обвязка, `index.ts`, passthrough-обёртки). + - Шаг 3 (бюджет пирамиды): E2E сведены к 2; множество per-endpoint integration свёрнуты в `paginateAll` + + представительные кейсы. + - Шаг 6 (adversarial): оставлены только тесты с конкретными ассертами, падающими при реалистичной + поломке (отказ XSS-схем, байт-равенство round-trip, порядок перенумерации сносок); тавтологичные + ассерты атрибутов схемы отброшены. diff --git a/test/fixtures/sample-doc.json b/test/fixtures/sample-doc.json new file mode 100644 index 0000000..dc8c793 --- /dev/null +++ b/test/fixtures/sample-doc.json @@ -0,0 +1,144 @@ +{ + "type": "doc", + "content": [ + { + "type": "heading", + "attrs": { "level": 1, "id": "h-1" }, + "content": [{ "type": "text", "text": "Round-trip sample" }] + }, + { + "type": "paragraph", + "attrs": { "id": "p-1" }, + "content": [ + { "type": "text", "text": "This paragraph has " }, + { "type": "text", "marks": [{ "type": "bold" }], "text": "bold" }, + { "type": "text", "text": ", " }, + { "type": "text", "marks": [{ "type": "italic" }], "text": "italic" }, + { "type": "text", "text": " and a " }, + { + "type": "text", + "marks": [{ "type": "link", "attrs": { "href": "https://example.com" } }], + "text": "link" + }, + { "type": "text", "text": "." } + ] + }, + { + "type": "paragraph", + "attrs": { "id": "p-2" }, + "content": [ + { "type": "text", "text": "Here is a " }, + { + "type": "text", + "marks": [ + { "type": "comment", "attrs": { "commentId": "cmt-abc123", "resolved": false } } + ], + "text": "commented span" + }, + { "type": "text", "text": " that must survive the round-trip." } + ] + }, + { + "type": "bulletList", + "attrs": { "id": "ul-1" }, + "content": [ + { + "type": "listItem", + "attrs": { "id": "li-1" }, + "content": [ + { + "type": "paragraph", + "attrs": { "id": "p-3" }, + "content": [{ "type": "text", "text": "First bullet" }] + } + ] + }, + { + "type": "listItem", + "attrs": { "id": "li-2" }, + "content": [ + { + "type": "paragraph", + "attrs": { "id": "p-4" }, + "content": [{ "type": "text", "text": "Second bullet" }] + } + ] + } + ] + }, + { + "type": "table", + "attrs": { "id": "tbl-1" }, + "content": [ + { + "type": "tableRow", + "content": [ + { + "type": "tableHeader", + "attrs": { "colspan": 1, "rowspan": 1 }, + "content": [ + { + "type": "paragraph", + "content": [{ "type": "text", "text": "Name" }] + } + ] + }, + { + "type": "tableHeader", + "attrs": { "colspan": 1, "rowspan": 1 }, + "content": [ + { + "type": "paragraph", + "content": [{ "type": "text", "text": "Value" }] + } + ] + } + ] + }, + { + "type": "tableRow", + "content": [ + { + "type": "tableCell", + "attrs": { "colspan": 1, "rowspan": 1 }, + "content": [ + { + "type": "paragraph", + "content": [{ "type": "text", "text": "alpha" }] + } + ] + }, + { + "type": "tableCell", + "attrs": { "colspan": 1, "rowspan": 1 }, + "content": [ + { + "type": "paragraph", + "content": [{ "type": "text", "text": "1" }] + } + ] + } + ] + } + ] + }, + { + "type": "callout", + "attrs": { "type": "info", "id": "callout-1" }, + "content": [ + { + "type": "paragraph", + "attrs": { "id": "p-5" }, + "content": [{ "type": "text", "text": "This is an info callout." }] + } + ] + }, + { + "type": "codeBlock", + "attrs": { "language": "js", "id": "code-1" }, + "content": [ + { "type": "text", "text": "const a = 1;\nconsole.log(a);\n" } + ] + } + ] +} diff --git a/test/recent-since.test.ts b/test/recent-since.test.ts new file mode 100644 index 0000000..9852307 --- /dev/null +++ b/test/recent-since.test.ts @@ -0,0 +1,178 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { collectRecentSince } from 'docmost-client'; + +/** + * Unit tests for the pure cursor-pagination helper behind listRecentSince. + * `fetchPage` is faked (no network); each test models a different server + * behaviour to exercise one stop condition. + */ + +type Item = { id: string; updatedAt: string }; + +/** + * Build a fake `fetchPage` from a list of pages. Each page is served in order; + * the nextCursor of page i points at page i+1 (the last page has no cursor). + * The handed-back cursor is asserted to match what we previously emitted so a + * caller that mis-threads the cursor would fail loudly. Tracks the call count. + */ +function fakeServer(pages: Item[][]) { + let calls = 0; + const cursorFor = (i: number) => (i < pages.length - 1 ? `c${i}` : null); + const fetchPage = async (cursor: string | null) => { + // Resolve which page this cursor selects: null -> page 0, "cN" -> page N+1. + const idx = cursor === null ? 0 : Number(cursor.slice(1)) + 1; + calls++; + const items = pages[idx] ?? []; + return { items, nextCursor: cursorFor(idx) }; + }; + return { + fetchPage, + get calls() { + return calls; + }, + }; +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('collectRecentSince', () => { + it('stops at the cutoff page and does not fetch beyond it', async () => { + // Page 0: all newer than the cutoff. Page 1: contains the cutoff item, so + // the walk must stop here and never request page 2. + const server = fakeServer([ + [ + { id: 'a', updatedAt: '2026-06-16T10:00:00Z' }, + { id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, + ], + [ + { id: 'c', updatedAt: '2026-06-16T08:00:00Z' }, + { id: 'd', updatedAt: '2026-06-16T05:00:00Z' }, // <= cutoff -> stop + { id: 'e', updatedAt: '2026-06-16T04:00:00Z' }, + ], + [{ id: 'f', updatedAt: '2026-06-16T03:00:00Z' }], // must NOT be fetched + ]); + + const out = await collectRecentSince( + server.fetchPage, + '2026-06-16T05:00:00Z', + ); + + // Only strictly-newer items, in server order; the cutoff item 'd' and + // everything after it is excluded. + expect(out.map((i) => i.id)).toEqual(['a', 'b', 'c']); + // Fetched page 0 and page 1 only — stopped at the cutoff page. + expect(server.calls).toBe(2); + }); + + it('dedups ids that overlap across pages', async () => { + // The cursor advances, but page boundaries overlap: 'b' appears on both + // pages. The dedup-by-id Set must keep it exactly once. + const server = fakeServer([ + [ + { id: 'a', updatedAt: '2026-06-16T10:00:00Z' }, + { id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, + ], + [ + { id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, // overlap + { id: 'c', updatedAt: '2026-06-16T08:00:00Z' }, + ], + ]); + + const out = await collectRecentSince( + server.fetchPage, + '2026-06-16T01:00:00Z', + ); + + expect(out.map((i) => i.id)).toEqual(['a', 'b', 'c']); + }); + + it('terminates when the server ignores the cursor (zero new items)', async () => { + // A broken server that returns the SAME first page on every call and always + // claims a nextCursor. Without the zero-new-items guard this loops to the + // cap; with it, the second fetch contributes nothing and the walk stops. + let calls = 0; + const fetchPage = async (_cursor: string | null) => { + calls++; + return { + items: [ + { id: 'a', updatedAt: '2026-06-16T10:00:00Z' }, + { id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, + ] as Item[], + nextCursor: 'always', // server always claims another page + }; + }; + + const out = await collectRecentSince(fetchPage, '2026-06-16T01:00:00Z'); + + // The newer items are returned exactly once (no hang, no duplicates). + expect(out.map((i) => i.id)).toEqual(['a', 'b']); + // First page collects, second page is all-seen -> stop. Capped well below + // the default hardPageCap, proving the loop terminated. + expect(calls).toBe(2); + }); + + it('returns only the first page when sinceIso is null', async () => { + const server = fakeServer([ + [ + { id: 'a', updatedAt: '2026-06-16T10:00:00Z' }, + { id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, + ], + [{ id: 'c', updatedAt: '2026-06-16T08:00:00Z' }], + ]); + + const out = await collectRecentSince(server.fetchPage, null); + + expect(out.map((i) => i.id)).toEqual(['a', 'b']); + // Exactly one page fetched. + expect(server.calls).toBe(1); + }); + + it('stops at hardPageCap and warns when results may be truncated', async () => { + // Every page is all-newer-than-cutoff, every item is unique, and there is + // always a nextCursor: the only thing that can stop the walk is the cap. + let n = 0; + const fetchPage = async (_cursor: string | null) => { + const id = `id${n++}`; + return { + items: [{ id, updatedAt: '2026-06-16T10:00:00Z' }] as Item[], + nextCursor: 'next', // never runs out + }; + }; + + const warn = vi.spyOn(console, 'warn').mockImplementation(() => {}); + + const cap = 5; + const out = await collectRecentSince( + fetchPage, + '2020-01-01T00:00:00Z', + cap, + ); + + // Exactly `cap` pages were collected (one unique item each). + expect(out).toHaveLength(cap); + expect(warn).toHaveBeenCalledTimes(1); + expect(String(warn.mock.calls[0][0])).toContain('hardPageCap=5'); + }); + + it('preserves server (descending) order across pages', async () => { + const server = fakeServer([ + [ + { id: 'a', updatedAt: '2026-06-16T10:00:00Z' }, + { id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, + ], + [ + { id: 'c', updatedAt: '2026-06-16T08:00:00Z' }, + { id: 'd', updatedAt: '2026-06-16T07:00:00Z' }, + ], + ]); + + const out = await collectRecentSince( + server.fetchPage, + '2026-06-16T01:00:00Z', + ); + + expect(out.map((i) => i.id)).toEqual(['a', 'b', 'c', 'd']); + }); +}); diff --git a/test/roundtrip.test.ts b/test/roundtrip.test.ts new file mode 100644 index 0000000..32d8f82 --- /dev/null +++ b/test/roundtrip.test.ts @@ -0,0 +1,29 @@ +import { readFile } from 'node:fs/promises'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { + convertProseMirrorToMarkdown, + markdownToProseMirror, +} from 'docmost-client'; + +// Resolve the fixture relative to this test file so the test is CWD-independent. +const here = dirname(fileURLToPath(import.meta.url)); +const FIXTURE = join(here, 'fixtures', 'sample-doc.json'); + +describe('round-trip idempotency (SPEC §11)', () => { + it('markdown is byte-stable across export -> import -> export', async () => { + const doc = JSON.parse(await readFile(FIXTURE, 'utf8')); + + // export -> import -> export + const md1 = convertProseMirrorToMarkdown(doc); + const doc2 = await markdownToProseMirror(md1); + const md2 = convertProseMirrorToMarkdown(doc2); + + // The property git actually needs: a second export reproduces the first + // byte-for-byte. We intentionally do NOT deep-equal doc vs doc2 — the + // converter reconstructs schema default attrs (e.g. indent:null), a known + // SPEC §11 divergence that does not affect markdown stability. + expect(md2).toBe(md1); + }); +}); diff --git a/test/sanitize.test.ts b/test/sanitize.test.ts new file mode 100644 index 0000000..a40383f --- /dev/null +++ b/test/sanitize.test.ts @@ -0,0 +1,96 @@ +import { describe, expect, it } from 'vitest'; +import { sanitizeTitle, disambiguate } from '../src/sanitize.js'; + +describe('sanitizeTitle', () => { + it('passes a plain title through unchanged', () => { + expect(sanitizeTitle('Getting Started')).toBe('Getting Started'); + }); + + it('replaces every forbidden printable character with a dash', () => { + // Forbidden set: / \ < > : " | ? * + expect(sanitizeTitle('a/b\\c<d>e:f"g|h?i*j')).toBe('a-b-c-d-e-f-g-h-i-j'); + }); + + it('replaces ASCII control characters with a dash', () => { + // Build the input with explicit control code points (tab=9, newline=10) to + // avoid editor escaping pitfalls. Control chars become "-" BEFORE + // whitespace is collapsed, so they survive as dashes (not a folded space). + const TAB = String.fromCharCode(9); + const NL = String.fromCharCode(10); + expect(sanitizeTitle('a b' + TAB + 'c' + NL + 'd')).toBe('a b-c-d'); + }); + + it('collapses runs of plain whitespace to a single space and trims', () => { + expect(sanitizeTitle(' hello world ')).toBe('hello world'); + }); + + it('caps the length at 120 characters', () => { + const long = 'x'.repeat(200); + const out = sanitizeTitle(long); + expect(out.length).toBe(120); + expect(out).toBe('x'.repeat(120)); + }); + + it('prefixes reserved Windows names with an underscore', () => { + expect(sanitizeTitle('CON')).toBe('_CON'); + expect(sanitizeTitle('nul')).toBe('_nul'); + // The base name (before the first dot) is what matters. + expect(sanitizeTitle('con.md')).toBe('_con.md'); + }); + + it('does not flag names that merely contain a reserved word', () => { + expect(sanitizeTitle('console')).toBe('console'); + expect(sanitizeTitle('Control')).toBe('Control'); + }); + + it('returns "_" for empty or whitespace-only input', () => { + expect(sanitizeTitle('')).toBe('_'); + expect(sanitizeTitle(' ')).toBe('_'); + }); + + it('handles a title that is only forbidden characters', () => { + // Each forbidden char becomes "-", so the result is non-empty and safe. + expect(sanitizeTitle('///')).toBe('---'); + }); + + it('neutralizes all-dot names so they cannot escape the vault', () => { + // ".", "..", "..." (and whitespace-padded variants) are path-traversal + // hazards as directory segments. The result must never be a pure-dot + // segment and must contain no path separators. + for (const input of ['.', '..', '...', ' .. ']) { + const out = sanitizeTitle(input); + expect(['.', '..', '...']).not.toContain(out); + expect(/^\.+$/.test(out)).toBe(false); + expect(out).not.toContain('/'); + expect(out).not.toContain('\\'); + } + // The concrete prefixing behaviour (existing "_" safeguard). + expect(sanitizeTitle('.')).toBe('_.'); + expect(sanitizeTitle('..')).toBe('_..'); + expect(sanitizeTitle('...')).toBe('_...'); + expect(sanitizeTitle(' .. ')).toBe('_..'); + }); + + it('is deterministic — the same input yields the same output', () => { + const title = 'Some / weird : title?'; + expect(sanitizeTitle(title)).toBe(sanitizeTitle(title)); + }); +}); + +describe('disambiguate', () => { + it('appends a stable ~slugId suffix', () => { + expect(disambiguate('Notes', 'abc123')).toBe('Notes ~abc123'); + }); + + it('is deterministic for the same name and slugId', () => { + expect(disambiguate('Notes', 'abc123')).toBe( + disambiguate('Notes', 'abc123'), + ); + }); + + it('produces distinct names for colliding siblings', () => { + const a = disambiguate('Notes', 'slug-a'); + const b = disambiguate('Notes', 'slug-b'); + expect(a).not.toBe(b); + }); +}); diff --git a/test/strip-block-ids.test.ts b/test/strip-block-ids.test.ts new file mode 100644 index 0000000..a89dc7a --- /dev/null +++ b/test/strip-block-ids.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it } from 'vitest'; +import { stripBlockIds } from '../src/roundtrip.js'; + +describe('stripBlockIds', () => { + it('removes only attrs.id, recursively, keeping every other attribute', () => { + const input = { + type: 'doc', + content: [ + { + type: 'heading', + attrs: { id: 'h1', level: 2 }, + content: [{ type: 'text', text: 'Title' }], + }, + { + type: 'callout', + attrs: { id: 'c1', kind: 'info' }, + content: [ + { + type: 'paragraph', + attrs: { id: 'p1', indent: null }, + content: [{ type: 'text', text: 'Body' }], + }, + ], + }, + ], + }; + + const out = stripBlockIds(input); + + expect(out).toEqual({ + type: 'doc', + content: [ + { + type: 'heading', + attrs: { level: 2 }, + content: [{ type: 'text', text: 'Title' }], + }, + { + type: 'callout', + attrs: { kind: 'info' }, + content: [ + { + type: 'paragraph', + attrs: { indent: null }, + content: [{ type: 'text', text: 'Body' }], + }, + ], + }, + ], + }); + // No stray `id` survives anywhere in the tree. + expect(JSON.stringify(out)).not.toContain('"id"'); + }); + + it('does not mutate its input (frozen object passes through unchanged)', () => { + const inner = Object.freeze({ + type: 'paragraph', + attrs: Object.freeze({ id: 'p1', indent: null }), + content: Object.freeze([ + Object.freeze({ type: 'text', text: 'x' }), + ]), + }); + const input = Object.freeze({ + type: 'doc', + content: Object.freeze([inner]), + }); + const before = JSON.stringify(input); + + // Would throw on any write to a frozen node if the function mutated input. + const out = stripBlockIds(input); + + // Input is structurally identical after the call (no mutation). + expect(JSON.stringify(input)).toBe(before); + // The id is gone from the returned (new) tree. + expect((out.content[0].attrs as Record<string, unknown>).id).toBeUndefined(); + expect((out.content[0].attrs as Record<string, unknown>).indent).toBeNull(); + // A fresh tree is returned, not the same reference. + expect(out).not.toBe(input); + }); +}); diff --git a/tsconfig.base.json b/tsconfig.base.json new file mode 100644 index 0000000..48a1db8 --- /dev/null +++ b/tsconfig.base.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "sourceMap": true + } +}