feat(sync): scaffold monorepo, extract docmost-client, add Phase-0 harness + read-only pull
Lock the access-layer decision (REST only) and start implementation per SPEC. - monorepo (npm workspaces): packages/docmost-client = DocmostClient + lib/* copied 1:1 from docmost-mcp/src (backport target), plus bannered sync methods (listTrash, restorePage, listAllSpacePages, exportPageBody, listRecentSince / collectRecentSince cursor scan) - engine stays the root app per AGENTS.md (src/, test/, build/, data/, settings.ts); add roundtrip.ts (SPEC §11 idempotency harness), pull.ts (SPEC §6 read-only Docmost->FS mirror), sanitize.ts (SPEC §12 filenames, path-traversal-safe) - Dockerfile builds the workspace lib before the app; vitest gates CI - exportPageBody never touches /comments (SPEC §3); serializeDocmostMarkdownBody emits meta + body only - SPEC: resolve access-layer (REST), reflect root-engine layout + REST pagination - tests: sanitize (incl. dot-traversal), collectRecentSince (cutoff/dedup/cap), stripBlockIds, markdown round-trip byte-stability Note: raw ProseMirror round-trip is byte-stable in Markdown but not yet attribute- idempotent (SPEC §11 Задача №0, before Phase 2).
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
.git
|
.git
|
||||||
node_modules/
|
node_modules/
|
||||||
build/
|
build/
|
||||||
|
packages/*/dist/
|
||||||
|
packages/*/node_modules/
|
||||||
.env
|
.env
|
||||||
data/
|
data/
|
||||||
test/
|
test/
|
||||||
|
|||||||
@@ -14,6 +14,13 @@ and the phased plan before adding engine logic.
|
|||||||
|
|
||||||
## Project structure
|
## Project structure
|
||||||
|
|
||||||
|
The project is now an **npm-workspaces monorepo**. `packages/docmost-client` is
|
||||||
|
the extracted `DocmostClient` + `lib/` — a verbatim 1:1 copy of `docmost-mcp/src/`
|
||||||
|
with the sync-specific methods appended under a clear banner (changes are
|
||||||
|
backported into `docmost-mcp` manually). The **ROOT remains the engine app**
|
||||||
|
(`src/`, `test/`, `build/`, `data/`) and depends on `docmost-client`. `npm run
|
||||||
|
build` builds the lib first, then compiles the app to `build/`.
|
||||||
|
|
||||||
- `src/` — application code.
|
- `src/` — application code.
|
||||||
- `src/settings.ts` — the single config entry point (zod schema keyed by the
|
- `src/settings.ts` — the single config entry point (zod schema keyed by the
|
||||||
real ENV var names; `parseSettings` is pure, `loadSettings` reads `.env`).
|
real ENV var names; `parseSettings` is pure, `loadSettings` reads `.env`).
|
||||||
|
|||||||
@@ -2,15 +2,18 @@ FROM node:22-slim
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Dependencies first (better layer caching): copy manifests, install from lock.
|
# Dependencies first (better layer caching): copy the root manifest, the lock,
|
||||||
|
# and the workspace package manifest so `npm ci` can link the workspace.
|
||||||
COPY package.json package-lock.json ./
|
COPY package.json package-lock.json ./
|
||||||
|
COPY packages/docmost-client/package.json packages/docmost-client/package.json
|
||||||
RUN npm ci
|
RUN npm ci
|
||||||
|
|
||||||
# Runtime state directory (mounted as a volume in production).
|
# Runtime state directory (mounted as a volume in production).
|
||||||
RUN mkdir -p data
|
RUN mkdir -p data
|
||||||
|
|
||||||
# Source + TS config, then compile to build/.
|
# Source + TS config, then build the workspace lib and compile the app to build/.
|
||||||
COPY tsconfig.json ./
|
COPY tsconfig.json tsconfig.base.json ./
|
||||||
|
COPY packages/ packages/
|
||||||
COPY src/ src/
|
COPY src/ src/
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
|
|||||||
12
Makefile
12
Makefile
@@ -1,6 +1,6 @@
|
|||||||
.DEFAULT_GOAL := help
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
.PHONY: help install env build test run dev clean
|
.PHONY: help install env build test run dev roundtrip pull clean
|
||||||
|
|
||||||
help: ## Show this help
|
help: ## Show this help
|
||||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \
|
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) \
|
||||||
@@ -27,5 +27,11 @@ run: build ## Build and run the app
|
|||||||
dev: install ## Run in watch mode (tsx)
|
dev: install ## Run in watch mode (tsx)
|
||||||
npm run dev
|
npm run dev
|
||||||
|
|
||||||
clean: ## Remove build artifacts and node_modules
|
roundtrip: build ## Run the offline round-trip idempotency harness (SPEC §11)
|
||||||
rm -rf build node_modules
|
npm run roundtrip
|
||||||
|
|
||||||
|
pull: build ## Mirror the configured Docmost space into the vault (read-only)
|
||||||
|
npm run pull
|
||||||
|
|
||||||
|
clean: ## Remove build artifacts and node_modules (incl. the workspace lib)
|
||||||
|
rm -rf build node_modules packages/*/dist
|
||||||
|
|||||||
110
README.md
110
README.md
@@ -2,52 +2,92 @@
|
|||||||
|
|
||||||
Bidirectional sync between Docmost articles and a local Markdown git vault — the
|
Bidirectional sync between Docmost articles and a local Markdown git vault — the
|
||||||
git repository is the state store. For the full design and the phased
|
git repository is the state store. For the full design and the phased
|
||||||
implementation plan, see [`SPEC.md`](./SPEC.md).
|
implementation plan, see [`SPEC.md`](./SPEC.md) (the authoritative spec).
|
||||||
|
|
||||||
> **Status: scaffold only — the sync engine is not implemented yet.**
|
> **Status: Increment 1 — monorepo scaffold + read-only `pull` + Phase-0
|
||||||
> `src/index.ts` validates configuration and exits. The engine described in
|
> round-trip harness.** Continuous two-way sync is not implemented yet; see the
|
||||||
> `SPEC.md` is out of scope for this scaffold.
|
> phased plan in `SPEC.md`.
|
||||||
|
|
||||||
It reuses the sibling project **docmost-mcp** as a library (DocmostClient,
|
It reuses the sibling project **docmost-mcp** as a library: the `DocmostClient`
|
||||||
ProseMirror ↔ Markdown converter, collab-write).
|
REST client and the lossless ProseMirror ↔ Markdown converter are extracted into
|
||||||
|
this monorepo (so changes can be backported file-by-file).
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
|
||||||
|
This is an npm-workspaces monorepo:
|
||||||
|
|
||||||
|
- **`packages/docmost-client`** (`docmost-client`) — the Docmost REST client and
|
||||||
|
its `lib/` (converter, markdown-document, collaboration, …). Its source layout
|
||||||
|
mirrors `docmost-mcp/src/` 1:1 so diffs can be backported by copying files.
|
||||||
|
Sync-specific REST methods are added under clearly marked `docmost-sync
|
||||||
|
additions` banners.
|
||||||
|
- **the repo ROOT** — the sync engine app (`src/`, `test/`, `build/`, `data/`).
|
||||||
|
It depends on `docmost-client` and holds the config (`src/settings.ts`),
|
||||||
|
filename sanitization (`src/sanitize.ts`), the Phase-0 round-trip idempotency
|
||||||
|
harness (`src/roundtrip.ts`), and the read-only `pull` (`src/pull.ts`).
|
||||||
|
|
||||||
|
## Install & build
|
||||||
|
|
||||||
|
Requires Node >= 20.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
npm install # links the workspace packages
|
||||||
|
npm run build # builds docmost-client, then compiles the app into build/
|
||||||
|
```
|
||||||
|
|
||||||
|
`docmost-client` must build before the app (the app consumes its built output);
|
||||||
|
the root `build` script builds the lib first, then runs `tsc`.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
All config comes from ENV / `.env` (see [`.env.example`](./.env.example)), read
|
Copy [`.env.example`](./.env.example) to `.env` and fill in real values. The
|
||||||
through the single settings layer in `src/settings.ts`. A missing required
|
config is read through [`src/settings.ts`](./src/settings.ts).
|
||||||
variable fails at startup with a clear message that names it.
|
|
||||||
|
|
||||||
| Variable | Required | Default | Meaning |
|
| Variable | Required | Meaning |
|
||||||
| ------------------ | :------: | ------------ | -------------------------------------------------------------- |
|
| ------------------- | :------: | -------------------------------------------------------- |
|
||||||
| `DOCMOST_API_URL` | yes | — | Base URL of our Docmost instance (used for `/auth/login`). |
|
| `DOCMOST_API_URL` | yes | Base URL of our Docmost instance. |
|
||||||
| `DOCMOST_EMAIL` | yes | — | Docmost login email. |
|
| `DOCMOST_EMAIL` | yes | Docmost service-user login email. |
|
||||||
| `DOCMOST_PASSWORD` | yes | — | Docmost login password. |
|
| `DOCMOST_PASSWORD` | yes | Docmost service-user login password. |
|
||||||
| `DOCMOST_SPACE_ID` | yes | — | The Docmost space to mirror. |
|
| `DOCMOST_SPACE_ID` | yes | Which Docmost space to mirror. |
|
||||||
| `VAULT_PATH` | no | `data/vault` | Local git vault path (kept under `data/` for the volume). |
|
| `VAULT_PATH` | no | Local vault directory (default `data/vault`). |
|
||||||
| `GIT_REMOTE` | no | _(unset)_ | Optional git remote the vault pushes to; empty = local-only. |
|
| `GIT_REMOTE` | no | Optional git remote the vault pushes to. |
|
||||||
| `POLL_INTERVAL_MS` | no | `15000` | How often to poll Docmost for changes (ms). |
|
| `POLL_INTERVAL_MS` | no | Poll interval in ms (default `15000`). |
|
||||||
| `DEBOUNCE_MS` | no | `2000` | Debounce window for local file changes (ms). |
|
| `DEBOUNCE_MS` | no | Debounce window in ms (default `2000`). |
|
||||||
| `LOG_LEVEL` | no | `info` | One of `debug`, `info`, `warn`, `error`. |
|
| `LOG_LEVEL` | no | `debug` \| `info` \| `warn` \| `error` (default `info`). |
|
||||||
|
|
||||||
Credentials and the address of our own Docmost instance have NO default — they
|
**Real secrets go in `.env`, which is git-ignored — never commit them.** The
|
||||||
go ONLY into `.env`, never into code or inline command-line env vars.
|
git remote grants access to the whole vault, so protect it no less than Docmost
|
||||||
|
itself (SPEC §12).
|
||||||
|
|
||||||
## Quick start
|
## Running
|
||||||
|
|
||||||
|
### Round-trip idempotency harness (Phase 0, SPEC §11)
|
||||||
|
|
||||||
|
Verifies that `export → import → export` is byte-stable. Runs offline against a
|
||||||
|
fixture (the default for CI) — **no Docmost credentials needed**:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
make install # install dependencies (npm ci)
|
npm run build
|
||||||
make env # create .env from .env.example, then fill it in
|
node build/roundtrip.js --fixture test/fixtures/sample-doc.json
|
||||||
make test # run the test suite (vitest)
|
|
||||||
make run # build and run
|
|
||||||
make dev # run in watch mode (tsx)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
`make` (or `make help`) lists all targets.
|
Or against a live page (needs `.env`):
|
||||||
|
|
||||||
## Deploy
|
```sh
|
||||||
|
node build/roundtrip.js --page <pageId>
|
||||||
|
```
|
||||||
|
|
||||||
Production runs a prebuilt image from `ghcr.io` (no build on prod):
|
Exit code is 0 when the markdown is byte-stable, 1 on a markdown divergence
|
||||||
`docker-compose.yml` pulls `ghcr.io/vvzvlad/docmost-sync:latest`, mounts a
|
(CI-able). A document-level divergence after stripping block ids is a known
|
||||||
volume at `/app/data`, and [watchtower](https://containrrr.dev/watchtower/)
|
SPEC §11 finding and does not fail the run.
|
||||||
auto-updates the container when a new image is published. CI (GitHub Actions)
|
|
||||||
builds and pushes the image; the `build` job runs only after `test` passes.
|
### Pull (Docmost → filesystem mirror, SPEC §6)
|
||||||
|
|
||||||
|
Read-only mirror: walks the configured space's page tree and writes one `.md`
|
||||||
|
per page under `<VAULT_PATH>/<…ancestors>/<Title>.md`. **Requires a `.env` with
|
||||||
|
real Docmost credentials** — it makes live REST calls and does not touch Docmost
|
||||||
|
state (read-only this increment):
|
||||||
|
|
||||||
|
```sh
|
||||||
|
npm run pull
|
||||||
|
```
|
||||||
|
|||||||
10
SPEC.md
10
SPEC.md
@@ -29,8 +29,8 @@
|
|||||||
|
|
||||||
Реализация — **monorepo (npm workspaces)**: `packages/docmost-client` (выносной
|
Реализация — **monorepo (npm workspaces)**: `packages/docmost-client` (выносной
|
||||||
`DocmostClient` + `lib/*`, лейаут 1:1 с `docmost-mcp/src/` — sync-методы дописываем
|
`DocmostClient` + `lib/*`, лейаут 1:1 с `docmost-mcp/src/` — sync-методы дописываем
|
||||||
сюда, изменения бэкпортятся в `docmost-mcp` вручную) и `packages/sync` (движок
|
сюда, изменения бэкпортятся в `docmost-mcp` вручную), а движок синхронизации —
|
||||||
синхронизации).
|
приложение в корне репозитория (`src/`, по конвенциям `AGENTS.md`).
|
||||||
|
|
||||||
**Важно:** MCP-инструменты — это тонкая обёртка над HTTP API Docmost. Синк-движок
|
**Важно:** MCP-инструменты — это тонкая обёртка над HTTP API Docmost. Синк-движок
|
||||||
ходит в REST Docmost **напрямую** и волен использовать любые эндпойнты, которых
|
ходит в REST Docmost **напрямую** и волен использовать любые эндпойнты, которых
|
||||||
@@ -475,7 +475,11 @@ append|prepend|replace, format: json|markdown|html }`) перезаписыва
|
|||||||
|
|
||||||
### Подводные камни
|
### Подводные камни
|
||||||
1. Пагинация **курсорная** (`cursor` / `beforeCursor` / `limit` ≤ 100) в теле
|
1. Пагинация **курсорная** (`cursor` / `beforeCursor` / `limit` ≤ 100) в теле
|
||||||
JSON — не `page` / `offset`.
|
JSON — не `page` / `offset`. `listRecentSince` (changes-since) идёт по
|
||||||
|
`cursor`/`nextCursor` согласно этому, с обрывом по `updatedAt ≤ T_last`.
|
||||||
|
Примечание: общий `paginateAll` переиспользуемого клиента и `sidebar-pages`
|
||||||
|
исторически ходят через `page`/`limit` (сервер принимает) — на них опираются
|
||||||
|
`listTrash` / `listAllSpacePages`.
|
||||||
2. Корзина и `recent` могут быть пер-спейс → перечисляем спейсы.
|
2. Корзина и `recent` могут быть пер-спейс → перечисляем спейсы.
|
||||||
3. `content` отдают только `/info` и `/trash`; `/recent` — без тела.
|
3. `content` отдают только `/info` и `/trash`; `/recent` — без тела.
|
||||||
4. Запись тела — collab-путь, не `/update` (см. выше).
|
4. Запись тела — collab-путь, не `/update` (см. выше).
|
||||||
|
|||||||
1673
package-lock.json
generated
1673
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
14
package.json
14
package.json
@@ -3,19 +3,21 @@
|
|||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"private": true,
|
"private": true,
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"description": "Bidirectional sync daemon between Docmost articles and a local Markdown git vault.",
|
"description": "Bidirectional sync daemon between Docmost articles and a local Markdown git vault (git is the state store). See SPEC.md.",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"engines": {
|
"workspaces": ["packages/*"],
|
||||||
"node": ">=20"
|
"engines": { "node": ">=20" },
|
||||||
},
|
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "tsc",
|
"build": "npm run build -w docmost-client && tsc",
|
||||||
"start": "node build/index.js",
|
"start": "node build/index.js",
|
||||||
"dev": "tsx watch src/index.ts",
|
"dev": "tsx watch src/index.ts",
|
||||||
"test": "vitest run",
|
"test": "vitest run",
|
||||||
"test:watch": "vitest"
|
"test:watch": "vitest",
|
||||||
|
"roundtrip": "node build/roundtrip.js",
|
||||||
|
"pull": "node build/pull.js"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"docmost-client": "*",
|
||||||
"dotenv": "17.4.2",
|
"dotenv": "17.4.2",
|
||||||
"zod": "3.25.76"
|
"zod": "3.25.76"
|
||||||
},
|
},
|
||||||
|
|||||||
44
packages/docmost-client/package.json
Normal file
44
packages/docmost-client/package.json
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"name": "docmost-client",
|
||||||
|
"version": "0.1.0",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"main": "dist/index.js",
|
||||||
|
"types": "dist/index.d.ts",
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"types": "./dist/index.d.ts",
|
||||||
|
"import": "./dist/index.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc -p tsconfig.json"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@fellow/prosemirror-recreate-transform": "^1.2.3",
|
||||||
|
"@hocuspocus/provider": "^3.4.4",
|
||||||
|
"@hocuspocus/transformer": "^3.4.4",
|
||||||
|
"@tiptap/core": "^3.18.0",
|
||||||
|
"@tiptap/extension-highlight": "^3.26.1",
|
||||||
|
"@tiptap/extension-image": "^3.18.0",
|
||||||
|
"@tiptap/extension-subscript": "^3.26.1",
|
||||||
|
"@tiptap/extension-superscript": "^3.26.1",
|
||||||
|
"@tiptap/extension-task-item": "^3.26.1",
|
||||||
|
"@tiptap/extension-task-list": "^3.26.1",
|
||||||
|
"@tiptap/html": "^3.18.0",
|
||||||
|
"@tiptap/pm": "^3.18.0",
|
||||||
|
"@tiptap/starter-kit": "^3.18.0",
|
||||||
|
"axios": "^1.6.0",
|
||||||
|
"form-data": "^4.0.0",
|
||||||
|
"jsdom": "^27.4.0",
|
||||||
|
"marked": "^17.0.1",
|
||||||
|
"ws": "^8.19.0",
|
||||||
|
"yjs": "^13.6.29"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/jsdom": "^27.0.0",
|
||||||
|
"@types/node": "^20.0.0",
|
||||||
|
"@types/ws": "^8.5.10",
|
||||||
|
"typescript": "^5.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
2770
packages/docmost-client/src/client.ts
Normal file
2770
packages/docmost-client/src/client.ts
Normal file
File diff suppressed because it is too large
Load Diff
23
packages/docmost-client/src/index.ts
Normal file
23
packages/docmost-client/src/index.ts
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
/**
|
||||||
|
* Public surface of the `docmost-client` package.
|
||||||
|
*
|
||||||
|
* This is a NEW barrel authored for docmost-sync (it is NOT copied from
|
||||||
|
* docmost-mcp, whose `src/index.ts` is the MCP-server entry point and is
|
||||||
|
* deliberately not part of this package). It re-exports the pieces the sync
|
||||||
|
* engine and other consumers need: the REST client, the self-contained
|
||||||
|
* markdown (de)serializers, and the lossless ProseMirror <-> Markdown
|
||||||
|
* converter.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export { DocmostClient, collectRecentSince } from "./client.js";
|
||||||
|
|
||||||
|
export {
|
||||||
|
serializeDocmostMarkdown,
|
||||||
|
parseDocmostMarkdown,
|
||||||
|
serializeDocmostMarkdownBody,
|
||||||
|
} from "./lib/markdown-document.js";
|
||||||
|
export type { DocmostMdMeta } from "./lib/markdown-document.js";
|
||||||
|
|
||||||
|
export { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
|
||||||
|
|
||||||
|
export { markdownToProseMirror } from "./lib/collaboration.js";
|
||||||
86
packages/docmost-client/src/lib/auth-utils.ts
Normal file
86
packages/docmost-client/src/lib/auth-utils.ts
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
import axios from "axios";
|
||||||
|
|
||||||
|
export async function getCollabToken(
|
||||||
|
baseUrl: string,
|
||||||
|
apiToken: string,
|
||||||
|
): Promise<string> {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${baseUrl}/auth/collab-token`,
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// console.error('Collab Token Response:', response.data);
|
||||||
|
// Response is wrapped in { data: { token: ... } }
|
||||||
|
return response.data.data?.token || response.data.token;
|
||||||
|
} catch (error) {
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
// Attach the HTTP status to the plain Error so callers (e.g.
|
||||||
|
// getCollabTokenWithReauth) can still detect a 401/403 after the
|
||||||
|
// original AxiosError has been wrapped away.
|
||||||
|
// Avoid leaking the full server response body by default; include only
|
||||||
|
// status + statusText. Append the body only when DEBUG is set.
|
||||||
|
let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`;
|
||||||
|
if (process.env.DEBUG) {
|
||||||
|
message += ` - ${JSON.stringify(error.response?.data)}`;
|
||||||
|
}
|
||||||
|
const err: any = new Error(message);
|
||||||
|
err.status = error.response?.status;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function performLogin(
|
||||||
|
baseUrl: string,
|
||||||
|
email: string,
|
||||||
|
password: string,
|
||||||
|
): Promise<string> {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(`${baseUrl}/auth/login`, {
|
||||||
|
email,
|
||||||
|
password,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract token from Set-Cookie header
|
||||||
|
const cookies = response.headers["set-cookie"];
|
||||||
|
if (!cookies) {
|
||||||
|
throw new Error("No Set-Cookie header found in login response");
|
||||||
|
}
|
||||||
|
// Match the cookie name exactly to avoid matching a future
|
||||||
|
// authTokenRefresh cookie (startsWith would catch it).
|
||||||
|
const authCookie = cookies.find((c: string) => {
|
||||||
|
const kv = c.split(";")[0];
|
||||||
|
return kv.slice(0, kv.indexOf("=")) === "authToken";
|
||||||
|
});
|
||||||
|
if (!authCookie) {
|
||||||
|
throw new Error("No authToken cookie found in login response");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take everything after the FIRST "=" up to the first ";".
|
||||||
|
// Splitting on "=" would truncate base64 values containing "=" padding.
|
||||||
|
const kv = authCookie.split(";")[0];
|
||||||
|
const token = kv.slice(kv.indexOf("=") + 1);
|
||||||
|
return token;
|
||||||
|
} catch (error: any) {
|
||||||
|
// Avoid leaking the full server response body by default; log only the
|
||||||
|
// HTTP status. Log the verbose body only when DEBUG is set.
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
if (process.env.DEBUG) {
|
||||||
|
console.error("Login failed:", error.response?.data);
|
||||||
|
} else {
|
||||||
|
console.error("Login failed:", error.response?.status);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error("Login failed:", error.message);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
618
packages/docmost-client/src/lib/collaboration.ts
Normal file
618
packages/docmost-client/src/lib/collaboration.ts
Normal file
@@ -0,0 +1,618 @@
|
|||||||
|
import { HocuspocusProvider } from "@hocuspocus/provider";
|
||||||
|
import { TiptapTransformer } from "@hocuspocus/transformer";
|
||||||
|
import * as Y from "yjs";
|
||||||
|
import WebSocket from "ws";
|
||||||
|
import { marked } from "marked";
|
||||||
|
import { generateJSON } from "@tiptap/html";
|
||||||
|
import { JSDOM } from "jsdom";
|
||||||
|
import { docmostExtensions } from "./docmost-schema.js";
|
||||||
|
import { withPageLock } from "./page-lock.js";
|
||||||
|
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||||
|
|
||||||
|
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||||
|
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||||
|
global.window = dom.window as any;
|
||||||
|
global.document = dom.window.document;
|
||||||
|
// @ts-ignore
|
||||||
|
global.Element = dom.window.Element;
|
||||||
|
// @ts-ignore
|
||||||
|
global.WebSocket = WebSocket;
|
||||||
|
// Navigator is read-only in newer Node versions and already exists
|
||||||
|
// global.navigator = dom.window.navigator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||||
|
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||||
|
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||||
|
* case the markdown is passed through verbatim (callouts are simply not
|
||||||
|
* detected) rather than risking a slow scan.
|
||||||
|
*/
|
||||||
|
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||||
|
|
||||||
|
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||||
|
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||||
|
/** Matches a bare closing callout fence: `:::`. */
|
||||||
|
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||||
|
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||||
|
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||||
|
* callout blocks (the syntax our markdown export produces) into HTML
|
||||||
|
* divs that the callout extension parses. The inner content is rendered
|
||||||
|
* through marked as regular markdown.
|
||||||
|
*
|
||||||
|
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||||
|
* rescan). It:
|
||||||
|
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||||
|
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||||
|
* callout body that itself contains a fenced code block with a `:::` line is
|
||||||
|
* no longer corrupted;
|
||||||
|
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||||
|
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||||
|
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||||
|
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||||
|
* (inner rendered through marked) as the previous regex implementation.
|
||||||
|
*/
|
||||||
|
async function preprocessCallouts(markdown: string): Promise<string> {
|
||||||
|
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||||
|
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||||
|
return markdown;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively transform a slice of lines, converting top-level callouts in
|
||||||
|
// that slice into <div> blocks and rendering their inner content (which may
|
||||||
|
// itself contain nested callouts) through this same function.
|
||||||
|
const transform = async (lines: string[]): Promise<string> => {
|
||||||
|
const out: string[] = [];
|
||||||
|
let inCodeFence = false;
|
||||||
|
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||||
|
let i = 0;
|
||||||
|
|
||||||
|
while (i < lines.length) {
|
||||||
|
const line = lines[i];
|
||||||
|
|
||||||
|
// Inside a code fence, only its matching closing fence is significant;
|
||||||
|
// everything else (including `:::` lines) is copied through verbatim.
|
||||||
|
if (inCodeFence) {
|
||||||
|
out.push(line);
|
||||||
|
const fence = line.match(CODE_FENCE_RE);
|
||||||
|
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||||
|
fence[2].length >= codeFenceMarker.length) {
|
||||||
|
inCodeFence = false;
|
||||||
|
codeFenceMarker = "";
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A code fence opening outside any callout body: enter code-fence mode.
|
||||||
|
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||||
|
if (fenceOpen) {
|
||||||
|
inCodeFence = true;
|
||||||
|
codeFenceMarker = fenceOpen[2];
|
||||||
|
out.push(line);
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// An opening callout fence: scan forward (with code-fence and nested
|
||||||
|
// callout awareness) for its matching closing `:::` at the same level.
|
||||||
|
const open = line.match(CALLOUT_OPEN_RE);
|
||||||
|
if (open) {
|
||||||
|
const type = open[1].toLowerCase();
|
||||||
|
const bodyLines: string[] = [];
|
||||||
|
let depth = 1;
|
||||||
|
let innerInCodeFence = false;
|
||||||
|
let innerCodeFenceMarker = "";
|
||||||
|
let j = i + 1;
|
||||||
|
for (; j < lines.length; j++) {
|
||||||
|
const bl = lines[j];
|
||||||
|
if (innerInCodeFence) {
|
||||||
|
const f = bl.match(CODE_FENCE_RE);
|
||||||
|
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||||
|
f[2].length >= innerCodeFenceMarker.length) {
|
||||||
|
innerInCodeFence = false;
|
||||||
|
innerCodeFenceMarker = "";
|
||||||
|
}
|
||||||
|
bodyLines.push(bl);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const innerFence = bl.match(CODE_FENCE_RE);
|
||||||
|
if (innerFence) {
|
||||||
|
innerInCodeFence = true;
|
||||||
|
innerCodeFenceMarker = innerFence[2];
|
||||||
|
bodyLines.push(bl);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||||
|
depth++;
|
||||||
|
bodyLines.push(bl);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||||
|
depth--;
|
||||||
|
if (depth === 0) break; // matching close for THIS callout
|
||||||
|
bodyLines.push(bl);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
bodyLines.push(bl);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (j < lines.length) {
|
||||||
|
// Found the matching closing fence: render the body (recursively, so
|
||||||
|
// nested callouts are handled) and emit the callout div.
|
||||||
|
const inner = await transform(bodyLines);
|
||||||
|
const renderedInner = await marked.parse(inner);
|
||||||
|
out.push(
|
||||||
|
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
|
||||||
|
);
|
||||||
|
i = j + 1; // skip past the closing `:::`
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// No matching close (unterminated callout): treat the opener as a
|
||||||
|
// literal line and continue, preserving the original text.
|
||||||
|
out.push(line);
|
||||||
|
i++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(line);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return out.join("\n");
|
||||||
|
};
|
||||||
|
|
||||||
|
return transform(markdown.split("\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bridge marked's checkbox lists to TipTap task lists.
|
||||||
|
*
|
||||||
|
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||||
|
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||||
|
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||||
|
* into the shape those extensions expect:
|
||||||
|
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||||
|
* TaskItem matches `li[data-type="taskItem"]`,
|
||||||
|
* the checked state is read from `data-checked === "true"`.
|
||||||
|
*
|
||||||
|
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||||
|
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||||
|
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||||
|
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||||
|
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||||
|
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||||
|
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||||
|
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||||
|
*/
|
||||||
|
function bridgeTaskLists(html: string): string {
|
||||||
|
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||||
|
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||||
|
// common case (most pages have no task lists).
|
||||||
|
if (!/type=["']?checkbox/i.test(html)) {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||||
|
// pathologically large inputs rather than running a second expensive JSDOM
|
||||||
|
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||||
|
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
const dom = new JSDOM(html);
|
||||||
|
const document = dom.window.document;
|
||||||
|
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||||
|
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||||
|
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||||
|
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||||
|
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||||
|
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||||
|
// ALL of them so none survive into the converted item.
|
||||||
|
const directCheckboxes = (li: Element): Element[] => {
|
||||||
|
const found: Element[] = [];
|
||||||
|
for (const child of Array.from(li.children)) {
|
||||||
|
if (
|
||||||
|
child.tagName === "INPUT" &&
|
||||||
|
child.getAttribute("type") === "checkbox"
|
||||||
|
) {
|
||||||
|
found.push(child);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (child.tagName === "P") {
|
||||||
|
for (const inp of Array.from(
|
||||||
|
child.querySelectorAll(":scope > input[type='checkbox']"),
|
||||||
|
)) {
|
||||||
|
found.push(inp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return found;
|
||||||
|
};
|
||||||
|
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||||
|
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||||
|
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||||
|
for (const list of lists) {
|
||||||
|
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||||
|
// their own iteration of the outer loop.
|
||||||
|
const items = Array.from(list.children).filter(
|
||||||
|
(child) => child.tagName === "LI",
|
||||||
|
);
|
||||||
|
if (items.length === 0) continue;
|
||||||
|
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||||
|
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||||
|
if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue;
|
||||||
|
|
||||||
|
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||||
|
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||||
|
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||||
|
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||||
|
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||||
|
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||||
|
let target: Element = list;
|
||||||
|
if (list.tagName === "OL") {
|
||||||
|
const ul = document.createElement("ul");
|
||||||
|
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||||
|
for (const attr of Array.from(list.attributes)) {
|
||||||
|
ul.setAttribute(attr.name, attr.value);
|
||||||
|
}
|
||||||
|
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||||
|
while (list.firstChild) {
|
||||||
|
ul.appendChild(list.firstChild);
|
||||||
|
}
|
||||||
|
list.replaceWith(ul);
|
||||||
|
target = ul;
|
||||||
|
}
|
||||||
|
|
||||||
|
target.setAttribute("data-type", "taskList");
|
||||||
|
items.forEach((li, index) => {
|
||||||
|
const boxes = itemCheckboxes[index];
|
||||||
|
// The first checkbox determines the checked state (matches the previous
|
||||||
|
// single-checkbox behaviour); any extras only need removing.
|
||||||
|
const input = boxes[0] ?? null;
|
||||||
|
li.setAttribute("data-type", "taskItem");
|
||||||
|
const checked =
|
||||||
|
input != null &&
|
||||||
|
(input.hasAttribute("checked") || (input as any).checked);
|
||||||
|
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||||
|
// Remove ALL direct checkbox inputs so none survive into the content
|
||||||
|
// (a raw-inline-HTML <li> may carry more than one).
|
||||||
|
for (const box of boxes) {
|
||||||
|
box.remove();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return document.body.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||||
|
export async function markdownToProseMirror(
|
||||||
|
markdownContent: string,
|
||||||
|
): Promise<any> {
|
||||||
|
const withCallouts = await preprocessCallouts(markdownContent);
|
||||||
|
const html = await marked.parse(withCallouts);
|
||||||
|
const bridged = bridgeTaskLists(html);
|
||||||
|
return generateJSON(bridged, docmostExtensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the collaboration WebSocket URL from an API base URL:
|
||||||
|
* switch http(s)->ws(s), strip a trailing /api, mount on /collab.
|
||||||
|
* Shared by the live read and the mutate path so both target the same socket.
|
||||||
|
*/
|
||||||
|
export function buildCollabWsUrl(baseUrl: string): string {
|
||||||
|
let wsUrl = baseUrl.replace(/^http/, "ws");
|
||||||
|
try {
|
||||||
|
const urlObj = new URL(wsUrl);
|
||||||
|
if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) {
|
||||||
|
urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, "");
|
||||||
|
}
|
||||||
|
urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab";
|
||||||
|
// Drop any query/hash from the base URL so it is not carried into the
|
||||||
|
// collaboration ws URL.
|
||||||
|
urlObj.search = "";
|
||||||
|
urlObj.hash = "";
|
||||||
|
wsUrl = urlObj.toString();
|
||||||
|
} catch (e) {
|
||||||
|
// Fallback if URL parsing fails
|
||||||
|
if (!wsUrl.endsWith("/collab")) {
|
||||||
|
wsUrl = wsUrl.replace(/\/$/, "") + "/collab";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return wsUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning
|
||||||
|
* the opaque yjs "Unexpected content type" failure into a descriptive error.
|
||||||
|
*
|
||||||
|
* `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of
|
||||||
|
* the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point
|
||||||
|
* at the offending attribute path.
|
||||||
|
*/
|
||||||
|
export function buildYDoc(doc: any): Y.Doc {
|
||||||
|
const safe = sanitizeForYjs(doc);
|
||||||
|
try {
|
||||||
|
return TiptapTransformer.toYdoc(safe, "default", docmostExtensions);
|
||||||
|
} catch (e) {
|
||||||
|
const bad = findUnstorableAttr(safe);
|
||||||
|
throw new Error(
|
||||||
|
`Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc.
|
||||||
|
* Throws the same descriptive error as the apply path when it is not. Used by
|
||||||
|
* the dry-run preview so it fails identically to apply.
|
||||||
|
*/
|
||||||
|
export function assertYjsEncodable(doc: any): void {
|
||||||
|
buildYDoc(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Time we wait for the initial handshake/sync before giving up. */
|
||||||
|
const CONNECT_TIMEOUT_MS = 25000;
|
||||||
|
/** Time we wait for the server to acknowledge our write before giving up. */
|
||||||
|
const PERSIST_TIMEOUT_MS = 20000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Safely mutate the live content of a page over the collaboration websocket.
|
||||||
|
*
|
||||||
|
* This is the single safe write path for every MCP content mutation. It:
|
||||||
|
* 1. serializes per-page writes through withPageLock (no two MCP writes on
|
||||||
|
* the same page overlap);
|
||||||
|
* 2. connects to Hocuspocus and waits for the initial sync so the local ydoc
|
||||||
|
* mirrors the authoritative server doc — INCLUDING edits/comments/images
|
||||||
|
* that are not yet in the debounced REST snapshot;
|
||||||
|
* 3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and
|
||||||
|
* writes the result back — with no `await` between read and write so no
|
||||||
|
* remote update can interleave and clobber concurrent human edits;
|
||||||
|
* 4. waits for the server to acknowledge the write (unsyncedChanges -> 0)
|
||||||
|
* before resolving, so the next operation observes our change.
|
||||||
|
*
|
||||||
|
* `transform` receives the live ProseMirror doc and returns the NEW full
|
||||||
|
* ProseMirror doc to write, or `null` to abort with no write (a no-op). If
|
||||||
|
* `transform` throws, the error is propagated to the caller (not swallowed).
|
||||||
|
*
|
||||||
|
* Returns the doc that was written, or the live doc when the transform aborted.
|
||||||
|
*/
|
||||||
|
export async function mutatePageContent(
|
||||||
|
pageId: string,
|
||||||
|
collabToken: string,
|
||||||
|
baseUrl: string,
|
||||||
|
transform: (liveDoc: any) => any | null,
|
||||||
|
): Promise<any> {
|
||||||
|
return withPageLock(pageId, () => {
|
||||||
|
if (process.env.DEBUG) {
|
||||||
|
console.error(`Starting realtime content mutate for page ${pageId}`);
|
||||||
|
// Token prefix is sensitive; only log it under DEBUG.
|
||||||
|
console.error(
|
||||||
|
`Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const ydoc = new Y.Doc();
|
||||||
|
const wsUrl = buildCollabWsUrl(baseUrl);
|
||||||
|
if (process.env.DEBUG) console.error(`Connecting to WebSocket: ${wsUrl}`);
|
||||||
|
|
||||||
|
return new Promise<any>((resolve, reject) => {
|
||||||
|
let provider: HocuspocusProvider | undefined;
|
||||||
|
let applied = false; // onSynced may fire again on reconnect — apply once.
|
||||||
|
let settled = false;
|
||||||
|
// Set true on disconnect/close so a reconnect-driven unsyncedChanges->0
|
||||||
|
// cannot be mistaken for a successful persist of our write.
|
||||||
|
let connectionLost = false;
|
||||||
|
let connectTimer: ReturnType<typeof setTimeout> | undefined;
|
||||||
|
let persistTimer: ReturnType<typeof setTimeout> | undefined;
|
||||||
|
let unsyncedHandler: ((data: { number: number }) => void) | undefined;
|
||||||
|
|
||||||
|
const cleanup = () => {
|
||||||
|
if (connectTimer) clearTimeout(connectTimer);
|
||||||
|
if (persistTimer) clearTimeout(persistTimer);
|
||||||
|
if (provider) {
|
||||||
|
if (unsyncedHandler) {
|
||||||
|
try {
|
||||||
|
provider.off("unsyncedChanges", unsyncedHandler);
|
||||||
|
} catch (err) {}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
provider.destroy();
|
||||||
|
} catch (err) {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const finish = (err: Error | null, value?: any) => {
|
||||||
|
if (settled) return;
|
||||||
|
settled = true;
|
||||||
|
cleanup();
|
||||||
|
if (err) reject(err);
|
||||||
|
else resolve(value);
|
||||||
|
};
|
||||||
|
|
||||||
|
connectTimer = setTimeout(() => {
|
||||||
|
finish(new Error("Connection timeout to collaboration server"));
|
||||||
|
}, CONNECT_TIMEOUT_MS);
|
||||||
|
|
||||||
|
// Resolve once the server has acknowledged our update. The provider
|
||||||
|
// increments unsyncedChanges when our local update is sent and
|
||||||
|
// decrements it when the server replies with a SyncStatus(applied=true);
|
||||||
|
// reaching 0 means the authoritative in-memory ydoc on the server now
|
||||||
|
// contains our write.
|
||||||
|
const waitForPersistence = () => {
|
||||||
|
if (settled) return;
|
||||||
|
// A missing provider is a failure, not a success: without it the write
|
||||||
|
// can never have been acknowledged. Only an actual unsyncedChanges===0
|
||||||
|
// on a live provider counts as persisted.
|
||||||
|
if (!provider) {
|
||||||
|
finish(new Error("collab provider gone before persistence"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (provider.unsyncedChanges === 0) {
|
||||||
|
finish(null, lastWrittenDoc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
persistTimer = setTimeout(() => {
|
||||||
|
finish(
|
||||||
|
new Error(
|
||||||
|
"Timeout waiting for collaboration server to persist the update",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}, PERSIST_TIMEOUT_MS);
|
||||||
|
unsyncedHandler = (data: { number: number }) => {
|
||||||
|
// Only treat unsyncedChanges->0 as success when the connection is
|
||||||
|
// still up. A transient disconnect + reconnect handshake can drive
|
||||||
|
// the counter back to 0 without our write being re-transmitted; in
|
||||||
|
// that case let the disconnect/close error win instead.
|
||||||
|
if (data.number === 0 && !connectionLost) {
|
||||||
|
finish(null, lastWrittenDoc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
provider.on("unsyncedChanges", unsyncedHandler);
|
||||||
|
};
|
||||||
|
|
||||||
|
let lastWrittenDoc: any;
|
||||||
|
|
||||||
|
provider = new HocuspocusProvider({
|
||||||
|
url: wsUrl,
|
||||||
|
name: `page.${pageId}`,
|
||||||
|
document: ydoc,
|
||||||
|
token: collabToken,
|
||||||
|
// @ts-ignore - Required for Node.js environment
|
||||||
|
WebSocketPolyfill: WebSocket,
|
||||||
|
onConnect: () => {
|
||||||
|
if (process.env.DEBUG) console.error("WS Connect");
|
||||||
|
},
|
||||||
|
// An unexpected disconnect/close while we are still waiting (during the
|
||||||
|
// connect-wait before onSynced, or during the persistence wait after the
|
||||||
|
// write) means the update will never be acknowledged — surface it now
|
||||||
|
// instead of hanging until the connect/persist timeout fires. `finish`
|
||||||
|
// is idempotent via the `settled` flag, so the onClose that our own
|
||||||
|
// cleanup()->provider.destroy() triggers (after settled=true is set) is
|
||||||
|
// a harmless no-op and cannot cause a double-resolve.
|
||||||
|
onDisconnect: () => {
|
||||||
|
if (process.env.DEBUG) console.error("WS Disconnect");
|
||||||
|
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
|
||||||
|
// sees the connection as lost and won't report a false success.
|
||||||
|
connectionLost = true;
|
||||||
|
finish(
|
||||||
|
new Error(
|
||||||
|
"Collaboration connection closed before the update was persisted/synced",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
},
|
||||||
|
onClose: () => {
|
||||||
|
if (process.env.DEBUG) console.error("WS Close");
|
||||||
|
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
|
||||||
|
// sees the connection as lost and won't report a false success.
|
||||||
|
connectionLost = true;
|
||||||
|
finish(
|
||||||
|
new Error(
|
||||||
|
"Collaboration connection closed before the update was persisted/synced",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
},
|
||||||
|
onSynced: () => {
|
||||||
|
if (applied || settled) return;
|
||||||
|
applied = true;
|
||||||
|
if (process.env.DEBUG) console.error("Connected and synced!");
|
||||||
|
|
||||||
|
// CRITICAL: everything between reading the live doc and writing it
|
||||||
|
// back must stay synchronous (no await). While the JS event loop is
|
||||||
|
// not yielded, no incoming remote update can interleave, so any
|
||||||
|
// already-synced concurrent edits are preserved in liveDoc.
|
||||||
|
let newDoc: any;
|
||||||
|
try {
|
||||||
|
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
|
||||||
|
if (
|
||||||
|
!liveDoc ||
|
||||||
|
typeof liveDoc !== "object" ||
|
||||||
|
!Array.isArray(liveDoc.content)
|
||||||
|
) {
|
||||||
|
liveDoc = { type: "doc", content: [] };
|
||||||
|
}
|
||||||
|
|
||||||
|
newDoc = transform(liveDoc);
|
||||||
|
|
||||||
|
if (newDoc == null) {
|
||||||
|
// Transform aborted — write nothing, return the live doc.
|
||||||
|
lastWrittenDoc = liveDoc;
|
||||||
|
finish(null, liveDoc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tempDoc = buildYDoc(newDoc);
|
||||||
|
// Fetch the fragment immediately before the transact that mutates
|
||||||
|
// it, rather than reusing a handle grabbed across the transform.
|
||||||
|
const fragment = ydoc.getXmlFragment("default");
|
||||||
|
ydoc.transact(() => {
|
||||||
|
if (fragment.length > 0) {
|
||||||
|
fragment.delete(0, fragment.length);
|
||||||
|
}
|
||||||
|
Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc));
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
// Includes errors thrown by transform (e.g. "afterText not found",
|
||||||
|
// "text not found"): propagate them verbatim to the caller.
|
||||||
|
finish(e instanceof Error ? e : new Error(String(e)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastWrittenDoc = newDoc;
|
||||||
|
if (process.env.DEBUG)
|
||||||
|
console.error("Content written, waiting for server to persist...");
|
||||||
|
waitForPersistence();
|
||||||
|
},
|
||||||
|
onAuthenticationFailed: () => {
|
||||||
|
finish(
|
||||||
|
new Error("Authentication failed for collaboration connection"),
|
||||||
|
);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace the live content of a page over the collaboration websocket.
|
||||||
|
* Accepts a ready ProseMirror JSON document; the caller controls whether
|
||||||
|
* it was produced from markdown (ids regenerate) or edited in place
|
||||||
|
* (existing block ids preserved).
|
||||||
|
*
|
||||||
|
* This is an intentional full replace (used by update_page / update_page_json),
|
||||||
|
* but now runs under the per-page lock and waits for server persistence via
|
||||||
|
* mutatePageContent.
|
||||||
|
*/
|
||||||
|
export async function replacePageContent(
|
||||||
|
pageId: string,
|
||||||
|
prosemirrorDoc: any,
|
||||||
|
collabToken: string,
|
||||||
|
baseUrl: string,
|
||||||
|
): Promise<void> {
|
||||||
|
// Fail fast on a bad document instead of deferring the failure into the
|
||||||
|
// collaboration write (where TiptapTransformer.toYdoc(undefined) used to
|
||||||
|
// throw). The transform must return a valid ProseMirror doc.
|
||||||
|
if (
|
||||||
|
prosemirrorDoc == null ||
|
||||||
|
typeof prosemirrorDoc !== "object" ||
|
||||||
|
prosemirrorDoc.type !== "doc"
|
||||||
|
) {
|
||||||
|
throw new Error("replacePageContent: invalid ProseMirror document");
|
||||||
|
}
|
||||||
|
await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Markdown update path (kept for backwards compatibility).
|
||||||
|
* NOTE: this re-imports the whole document — block ids are regenerated.
|
||||||
|
* Tables and :::callout::: blocks survive thanks to the full schema.
|
||||||
|
*/
|
||||||
|
export async function updatePageContentRealtime(
|
||||||
|
pageId: string,
|
||||||
|
markdownContent: string,
|
||||||
|
collabToken: string,
|
||||||
|
baseUrl: string,
|
||||||
|
): Promise<void> {
|
||||||
|
const tiptapJson = await markdownToProseMirror(markdownContent);
|
||||||
|
await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson);
|
||||||
|
}
|
||||||
319
packages/docmost-client/src/lib/diff.ts
Normal file
319
packages/docmost-client/src/lib/diff.ts
Normal file
@@ -0,0 +1,319 @@
|
|||||||
|
/**
|
||||||
|
* Headless, Docmost-equivalent document diff.
|
||||||
|
*
|
||||||
|
* Docmost's history editor computes a change set with the exact pipeline below
|
||||||
|
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
|
||||||
|
* editor decorations. This module runs the SAME computation but serializes the
|
||||||
|
* result to text + integrity counts instead of decorations, so a diff can be
|
||||||
|
* previewed without a browser.
|
||||||
|
*
|
||||||
|
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
|
||||||
|
* maintained published fork of the MIT prosemirror-recreate-steps source that
|
||||||
|
* Docmost vendors in @docmost/editor-ext; it exposes the identical
|
||||||
|
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
|
||||||
|
* signature.
|
||||||
|
*
|
||||||
|
* If recreateTransform / the changeset throws on a pathological document pair,
|
||||||
|
* we fall back to a coarse block-level text diff so the tool never hard-fails.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { getSchema } from "@tiptap/core";
|
||||||
|
import { Node } from "@tiptap/pm/model";
|
||||||
|
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
|
||||||
|
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
|
||||||
|
import { docmostExtensions } from "./docmost-schema.js";
|
||||||
|
|
||||||
|
/** A single inserted/deleted change with its containing-block context. */
|
||||||
|
export interface DiffChange {
|
||||||
|
op: "insert" | "delete";
|
||||||
|
/** Lead (plain) text of the block that contains the change, for context. */
|
||||||
|
block: string;
|
||||||
|
/** The inserted or deleted text. */
|
||||||
|
text: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Integrity counts as [old, new] tuples; footnoteMarkers as [oldList, newList]. */
|
||||||
|
export interface DiffIntegrity {
|
||||||
|
images: [number, number];
|
||||||
|
links: [number, number];
|
||||||
|
tables: [number, number];
|
||||||
|
callouts: [number, number];
|
||||||
|
footnoteMarkers: [number[], number[]];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface DiffResult {
|
||||||
|
summary: { inserted: number; deleted: number; blocksChanged: number };
|
||||||
|
integrity: DiffIntegrity;
|
||||||
|
changes: DiffChange[];
|
||||||
|
/** Human-readable unified-ish summary. */
|
||||||
|
markdown: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build the schema once; it is pure and reused across calls. */
|
||||||
|
const schema = getSchema(docmostExtensions);
|
||||||
|
|
||||||
|
/** Recursively concatenate the plain text of a JSON node. */
|
||||||
|
function plainText(node: any): string {
|
||||||
|
if (!node || typeof node !== "object") return "";
|
||||||
|
let out = "";
|
||||||
|
if (typeof node.text === "string") out += node.text;
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (const child of node.content) out += plainText(child);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
|
||||||
|
function countNodes(doc: any, pred: (node: any) => boolean): number {
|
||||||
|
let n = 0;
|
||||||
|
const visit = (node: any): void => {
|
||||||
|
if (!node || typeof node !== "object") return;
|
||||||
|
if (pred(node)) n++;
|
||||||
|
if (Array.isArray(node.content)) for (const c of node.content) visit(c);
|
||||||
|
};
|
||||||
|
visit(doc);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
|
||||||
|
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
|
||||||
|
* run); counting link-bearing runs would over-count it. Walking the tree and
|
||||||
|
* collecting hrefs into a Set keys each distinct link once. Link marks with a
|
||||||
|
* missing/empty href are bucketed under a single "" key so a malformed link is
|
||||||
|
* still counted as one.
|
||||||
|
*/
|
||||||
|
function countUniqueLinks(doc: any): number {
|
||||||
|
const hrefs = new Set<string>();
|
||||||
|
const visit = (node: any): void => {
|
||||||
|
if (!node || typeof node !== "object") return;
|
||||||
|
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||||
|
for (const m of node.marks) {
|
||||||
|
if (m && m.type === "link") {
|
||||||
|
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
|
||||||
|
hrefs.add(href);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Array.isArray(node.content)) for (const c of node.content) visit(c);
|
||||||
|
};
|
||||||
|
visit(doc);
|
||||||
|
return hrefs.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse the ordered list of integers from `[N]` footnote markers found in the
|
||||||
|
* BODY only (every top-level block before the first "Примечания..." notes
|
||||||
|
* heading; if no such heading, the whole doc). Returned in reading order.
|
||||||
|
*/
|
||||||
|
function footnoteMarkers(doc: any, notesHeading: string): number[] {
|
||||||
|
const top: any[] = Array.isArray(doc?.content) ? doc.content : [];
|
||||||
|
const notesIdx = top.findIndex(
|
||||||
|
(n) =>
|
||||||
|
n &&
|
||||||
|
n.type === "heading" &&
|
||||||
|
plainText(n).trim() === notesHeading,
|
||||||
|
);
|
||||||
|
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
|
||||||
|
const markers: number[] = [];
|
||||||
|
const re = /\[(\d+)\]/g;
|
||||||
|
for (const block of bodyBlocks) {
|
||||||
|
const text = plainText(block);
|
||||||
|
let m: RegExpExecArray | null;
|
||||||
|
re.lastIndex = 0;
|
||||||
|
while ((m = re.exec(text)) !== null) {
|
||||||
|
markers.push(Number(m[1]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return markers;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Compute the [old,new] integrity tuples for two JSON docs. */
|
||||||
|
function computeIntegrity(
|
||||||
|
oldDoc: any,
|
||||||
|
newDoc: any,
|
||||||
|
notesHeading: string,
|
||||||
|
): DiffIntegrity {
|
||||||
|
const images: [number, number] = [
|
||||||
|
countNodes(oldDoc, (n) => n.type === "image"),
|
||||||
|
countNodes(newDoc, (n) => n.type === "image"),
|
||||||
|
];
|
||||||
|
const links: [number, number] = [
|
||||||
|
countUniqueLinks(oldDoc),
|
||||||
|
countUniqueLinks(newDoc),
|
||||||
|
];
|
||||||
|
const tables: [number, number] = [
|
||||||
|
countNodes(oldDoc, (n) => n.type === "table"),
|
||||||
|
countNodes(newDoc, (n) => n.type === "table"),
|
||||||
|
];
|
||||||
|
const callouts: [number, number] = [
|
||||||
|
countNodes(oldDoc, (n) => n.type === "callout"),
|
||||||
|
countNodes(newDoc, (n) => n.type === "callout"),
|
||||||
|
];
|
||||||
|
const fns: [number[], number[]] = [
|
||||||
|
footnoteMarkers(oldDoc, notesHeading),
|
||||||
|
footnoteMarkers(newDoc, notesHeading),
|
||||||
|
];
|
||||||
|
return { images, links, tables, callouts, footnoteMarkers: fns };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve the lead text of the top-level block in a ProseMirror Node that
|
||||||
|
* contains the given document position. Returns "" when out of range.
|
||||||
|
*/
|
||||||
|
function blockContextAt(node: Node, pos: number): string {
|
||||||
|
try {
|
||||||
|
const clamped = Math.max(0, Math.min(pos, node.content.size));
|
||||||
|
const $pos = node.resolve(clamped);
|
||||||
|
// depth 1 is the top-level block in a doc node.
|
||||||
|
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
|
||||||
|
const text = block.textContent || "";
|
||||||
|
return text.length > 80 ? text.slice(0, 77) + "..." : text;
|
||||||
|
} catch {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Truncate a string for the markdown summary. */
|
||||||
|
function truncate(s: string, n = 120): string {
|
||||||
|
return s.length > n ? s.slice(0, n - 3) + "..." : s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
|
||||||
|
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
|
||||||
|
*/
|
||||||
|
function coarseDiff(oldDoc: any, newDoc: any): DiffChange[] {
|
||||||
|
const oldBlocks: any[] = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
|
||||||
|
const newBlocks: any[] = Array.isArray(newDoc?.content) ? newDoc.content : [];
|
||||||
|
const oldTexts = oldBlocks.map(plainText);
|
||||||
|
const newTexts = newBlocks.map(plainText);
|
||||||
|
const oldSet = new Set(oldTexts);
|
||||||
|
const newSet = new Set(newTexts);
|
||||||
|
const changes: DiffChange[] = [];
|
||||||
|
for (const t of oldTexts) {
|
||||||
|
if (!newSet.has(t) && t.trim() !== "") {
|
||||||
|
changes.push({ op: "delete", block: truncate(t, 80), text: t });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const t of newTexts) {
|
||||||
|
if (!oldSet.has(t) && t.trim() !== "") {
|
||||||
|
changes.push({ op: "insert", block: truncate(t, 80), text: t });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return changes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build the human-readable unified-ish markdown summary. */
|
||||||
|
function renderMarkdown(
|
||||||
|
result: Omit<DiffResult, "markdown">,
|
||||||
|
fellBack: boolean,
|
||||||
|
): string {
|
||||||
|
const lines: string[] = [];
|
||||||
|
const { summary, integrity, changes } = result;
|
||||||
|
lines.push(
|
||||||
|
`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`,
|
||||||
|
);
|
||||||
|
if (fellBack) {
|
||||||
|
lines.push("");
|
||||||
|
lines.push("> note: precise diff failed; coarse block-level diff shown.");
|
||||||
|
}
|
||||||
|
lines.push("");
|
||||||
|
lines.push("## Integrity (old -> new)");
|
||||||
|
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
|
||||||
|
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
|
||||||
|
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
|
||||||
|
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
|
||||||
|
lines.push(
|
||||||
|
`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`,
|
||||||
|
);
|
||||||
|
lines.push("");
|
||||||
|
lines.push("## Changes");
|
||||||
|
if (changes.length === 0) {
|
||||||
|
lines.push("(no textual changes)");
|
||||||
|
} else {
|
||||||
|
for (const c of changes) {
|
||||||
|
const sign = c.op === "insert" ? "+" : "-";
|
||||||
|
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
|
||||||
|
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return lines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
|
||||||
|
* serialize the result to text + integrity counts.
|
||||||
|
*
|
||||||
|
* @param oldDocJson the earlier document
|
||||||
|
* @param newDocJson the later document
|
||||||
|
* @param notesHeading heading delimiting body from notes for footnote counting
|
||||||
|
*/
|
||||||
|
export function diffDocs(
|
||||||
|
oldDocJson: any,
|
||||||
|
newDocJson: any,
|
||||||
|
notesHeading: string = "Примечания переводчика",
|
||||||
|
): DiffResult {
|
||||||
|
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
|
||||||
|
|
||||||
|
let changes: DiffChange[] = [];
|
||||||
|
let inserted = 0;
|
||||||
|
let deleted = 0;
|
||||||
|
let fellBack = false;
|
||||||
|
const changedBlocks = new Set<string>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const oldNode = Node.fromJSON(schema, oldDocJson);
|
||||||
|
const newNode = Node.fromJSON(schema, newDocJson);
|
||||||
|
const tr = recreateTransform(oldNode, newNode, {
|
||||||
|
complexSteps: false,
|
||||||
|
wordDiffs: true,
|
||||||
|
simplifyDiff: true,
|
||||||
|
});
|
||||||
|
const changeSet = ChangeSet.create(oldNode).addSteps(
|
||||||
|
tr.doc,
|
||||||
|
tr.mapping.maps,
|
||||||
|
[],
|
||||||
|
);
|
||||||
|
const simplified = simplifyChanges(changeSet.changes, newNode);
|
||||||
|
|
||||||
|
for (const change of simplified) {
|
||||||
|
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
|
||||||
|
if (change.toA > change.fromA) {
|
||||||
|
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
|
||||||
|
if (text.length > 0) {
|
||||||
|
deleted += text.length;
|
||||||
|
const block = blockContextAt(oldNode, change.fromA);
|
||||||
|
changes.push({ op: "delete", block, text });
|
||||||
|
if (block) changedBlocks.add("d:" + block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
|
||||||
|
if (change.toB > change.fromB) {
|
||||||
|
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
|
||||||
|
if (text.length > 0) {
|
||||||
|
inserted += text.length;
|
||||||
|
const block = blockContextAt(newNode, change.fromB);
|
||||||
|
changes.push({ op: "insert", block, text });
|
||||||
|
if (block) changedBlocks.add("i:" + block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Pathological pair: degrade to a coarse block-level diff so we never throw.
|
||||||
|
fellBack = true;
|
||||||
|
changes = coarseDiff(oldDocJson, newDocJson);
|
||||||
|
for (const c of changes) {
|
||||||
|
if (c.op === "insert") inserted += c.text.length;
|
||||||
|
else deleted += c.text.length;
|
||||||
|
if (c.block) changedBlocks.add(c.op[0] + ":" + c.block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const partial: Omit<DiffResult, "markdown"> = {
|
||||||
|
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
|
||||||
|
integrity,
|
||||||
|
changes,
|
||||||
|
};
|
||||||
|
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
|
||||||
|
}
|
||||||
1065
packages/docmost-client/src/lib/docmost-schema.ts
Normal file
1065
packages/docmost-client/src/lib/docmost-schema.ts
Normal file
File diff suppressed because it is too large
Load Diff
93
packages/docmost-client/src/lib/filters.ts
Normal file
93
packages/docmost-client/src/lib/filters.ts
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
/**
|
||||||
|
* Filter functions to extract only relevant information from API responses
|
||||||
|
* for better agent consumption
|
||||||
|
*/
|
||||||
|
|
||||||
|
export function filterWorkspace(data: any) {
|
||||||
|
return {
|
||||||
|
id: data.id,
|
||||||
|
name: data.name,
|
||||||
|
description: data.description,
|
||||||
|
defaultSpaceId: data.defaultSpaceId,
|
||||||
|
createdAt: data.createdAt,
|
||||||
|
updatedAt: data.updatedAt,
|
||||||
|
deletedAt: data.deletedAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function filterSpace(space: any) {
|
||||||
|
return {
|
||||||
|
id: space.id,
|
||||||
|
name: space.name,
|
||||||
|
description: space.description,
|
||||||
|
slug: space.slug,
|
||||||
|
visibility: space.visibility,
|
||||||
|
createdAt: space.createdAt,
|
||||||
|
updatedAt: space.updatedAt,
|
||||||
|
deletedAt: space.deletedAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function filterGroup(group: any) {
|
||||||
|
return {
|
||||||
|
id: group.id,
|
||||||
|
name: group.name,
|
||||||
|
description: group.description,
|
||||||
|
workspaceId: group.workspaceId,
|
||||||
|
createdAt: group.createdAt,
|
||||||
|
updatedAt: group.updatedAt,
|
||||||
|
deletedAt: group.deletedAt,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function filterPage(page: any, content?: string, subpages?: any[]) {
|
||||||
|
return {
|
||||||
|
id: page.id,
|
||||||
|
slugId: page.slugId,
|
||||||
|
title: page.title,
|
||||||
|
parentPageId: page.parentPageId,
|
||||||
|
spaceId: page.spaceId,
|
||||||
|
isLocked: page.isLocked,
|
||||||
|
createdAt: page.createdAt,
|
||||||
|
updatedAt: page.updatedAt,
|
||||||
|
deletedAt: page.deletedAt,
|
||||||
|
// Include converted markdown content if valid string (even empty)
|
||||||
|
...(typeof content === "string" && { content }),
|
||||||
|
// Include subpages if provided
|
||||||
|
...(subpages &&
|
||||||
|
subpages.length > 0 && {
|
||||||
|
subpages: subpages.map((p) => ({ id: p.id, title: p.title })),
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function filterComment(comment: any, markdownContent?: string) {
|
||||||
|
return {
|
||||||
|
id: comment.id,
|
||||||
|
pageId: comment.pageId,
|
||||||
|
content: markdownContent ?? comment.content,
|
||||||
|
selection: comment.selection || null,
|
||||||
|
type: comment.type || "page",
|
||||||
|
parentCommentId: comment.parentCommentId || null,
|
||||||
|
creatorId: comment.creatorId,
|
||||||
|
creatorName: comment.creator?.name || null,
|
||||||
|
createdAt: comment.createdAt,
|
||||||
|
editedAt: comment.editedAt || null,
|
||||||
|
resolvedAt: comment.resolvedAt || null,
|
||||||
|
resolvedById: comment.resolvedById || null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function filterSearchResult(result: any) {
|
||||||
|
return {
|
||||||
|
id: result.id,
|
||||||
|
title: result.title,
|
||||||
|
parentPageId: result.parentPageId,
|
||||||
|
createdAt: result.createdAt,
|
||||||
|
updatedAt: result.updatedAt,
|
||||||
|
rank: result.rank,
|
||||||
|
highlight: result.highlight,
|
||||||
|
spaceId: result.space?.id,
|
||||||
|
spaceName: result.space?.name,
|
||||||
|
};
|
||||||
|
}
|
||||||
127
packages/docmost-client/src/lib/json-edit.ts
Normal file
127
packages/docmost-client/src/lib/json-edit.ts
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
/**
|
||||||
|
* Surgical text edits on a ProseMirror document without re-importing it.
|
||||||
|
*
|
||||||
|
* Each edit replaces an exact substring inside individual text nodes,
|
||||||
|
* preserving every node id, mark and attribute around it. This is the
|
||||||
|
* safe alternative to a full markdown re-import for small wording fixes.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface TextEdit {
|
||||||
|
find: string;
|
||||||
|
replace: string;
|
||||||
|
/** Replace every occurrence; otherwise the edit must match exactly once. */
|
||||||
|
replaceAll?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TextEditResult {
|
||||||
|
find: string;
|
||||||
|
replacements: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Collect plain text of the whole document (for span-detection hints). */
|
||||||
|
function collectText(node: any): string {
|
||||||
|
let out = "";
|
||||||
|
if (node.type === "text") out += node.text || "";
|
||||||
|
for (const child of node.content || []) out += collectText(child);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function countOccurrences(haystack: string, needle: string): number {
|
||||||
|
if (!needle) return 0;
|
||||||
|
let count = 0;
|
||||||
|
let idx = haystack.indexOf(needle);
|
||||||
|
while (idx !== -1) {
|
||||||
|
count++;
|
||||||
|
idx = haystack.indexOf(needle, idx + needle.length);
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Apply text edits to a ProseMirror doc (mutates a deep copy, returns it).
|
||||||
|
* Throws a descriptive error when an edit matches zero times or matches
|
||||||
|
* multiple times without replaceAll — so the caller can refine `find`.
|
||||||
|
*/
|
||||||
|
export function applyTextEdits(
|
||||||
|
doc: any,
|
||||||
|
edits: TextEdit[],
|
||||||
|
): { doc: any; results: TextEditResult[] } {
|
||||||
|
const copy = JSON.parse(JSON.stringify(doc));
|
||||||
|
const results: TextEditResult[] = [];
|
||||||
|
|
||||||
|
for (const edit of edits) {
|
||||||
|
if (!edit.find) throw new Error("edit.find must be a non-empty string");
|
||||||
|
|
||||||
|
// Count matches inside individual text nodes first.
|
||||||
|
let nodeMatches = 0;
|
||||||
|
(function count(node: any) {
|
||||||
|
if (node.type === "text" && node.text) {
|
||||||
|
nodeMatches += countOccurrences(node.text, edit.find);
|
||||||
|
}
|
||||||
|
for (const child of node.content || []) count(child);
|
||||||
|
})(copy);
|
||||||
|
|
||||||
|
if (nodeMatches === 0) {
|
||||||
|
// Distinguish "text not present" from "text spans formatting runs".
|
||||||
|
const fullText = collectText(copy);
|
||||||
|
if (fullText.includes(edit.find)) {
|
||||||
|
throw new Error(
|
||||||
|
`Edit "${truncate(edit.find)}": the text exists in the document but spans ` +
|
||||||
|
`multiple formatting runs (bold/link/italic boundaries). Use a shorter ` +
|
||||||
|
`fragment that stays inside one run, or use update_page_json for ` +
|
||||||
|
`structural changes.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
throw new Error(
|
||||||
|
`Edit "${truncate(edit.find)}": text not found in the document.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nodeMatches > 1 && !edit.replaceAll) {
|
||||||
|
throw new Error(
|
||||||
|
`Edit "${truncate(edit.find)}": matches ${nodeMatches} times. ` +
|
||||||
|
`Provide a longer, unique fragment or set replaceAll: true.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the replacement(s).
|
||||||
|
let done = 0;
|
||||||
|
(function replace(node: any) {
|
||||||
|
if (node.type === "text" && node.text && node.text.includes(edit.find)) {
|
||||||
|
if (edit.replaceAll) {
|
||||||
|
done += countOccurrences(node.text, edit.find);
|
||||||
|
node.text = node.text.split(edit.find).join(edit.replace);
|
||||||
|
} else if (done === 0) {
|
||||||
|
// Avoid String.replace: its second arg treats $&, $1, $`, $', $$ as
|
||||||
|
// special patterns, expanding them instead of inserting literally.
|
||||||
|
// Splice the first occurrence by index to keep the replacement literal.
|
||||||
|
const idx = node.text.indexOf(edit.find);
|
||||||
|
node.text =
|
||||||
|
node.text.slice(0, idx) +
|
||||||
|
edit.replace +
|
||||||
|
node.text.slice(idx + edit.find.length);
|
||||||
|
done = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const child of node.content || []) replace(child);
|
||||||
|
})(copy);
|
||||||
|
|
||||||
|
results.push({ find: edit.find, replacements: done });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop text nodes that became empty (ProseMirror forbids empty text nodes).
|
||||||
|
(function prune(node: any) {
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
node.content = node.content.filter(
|
||||||
|
(child: any) => !(child.type === "text" && child.text === ""),
|
||||||
|
);
|
||||||
|
for (const child of node.content) prune(child);
|
||||||
|
}
|
||||||
|
})(copy);
|
||||||
|
|
||||||
|
return { doc: copy, results };
|
||||||
|
}
|
||||||
|
|
||||||
|
function truncate(s: string): string {
|
||||||
|
return s.length > 60 ? s.slice(0, 57) + "..." : s;
|
||||||
|
}
|
||||||
861
packages/docmost-client/src/lib/markdown-converter.ts
Normal file
861
packages/docmost-client/src/lib/markdown-converter.ts
Normal file
@@ -0,0 +1,861 @@
|
|||||||
|
/**
|
||||||
|
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||||
|
* Supports all Docmost-specific node types and extensions
|
||||||
|
*/
|
||||||
|
export function convertProseMirrorToMarkdown(content: any): string {
|
||||||
|
if (!content || !content.content) return "";
|
||||||
|
|
||||||
|
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||||
|
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||||
|
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||||
|
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||||
|
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||||
|
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||||
|
// </> back inside attribute values, so escaping them would corrupt the
|
||||||
|
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||||
|
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||||
|
// keeps the value inert against attribute-injection while staying idempotent.
|
||||||
|
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||||
|
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||||
|
// value, and parse5 does not decode ' back inside attribute values, so
|
||||||
|
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||||
|
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||||
|
const escapeAttr = (value: unknown): string =>
|
||||||
|
String(value)
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/"/g, """);
|
||||||
|
|
||||||
|
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||||
|
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||||
|
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||||
|
const escapeHtmlText = (value: unknown): string =>
|
||||||
|
String(value)
|
||||||
|
.replace(/&/g, "&")
|
||||||
|
.replace(/</g, "<")
|
||||||
|
.replace(/>/g, ">");
|
||||||
|
|
||||||
|
// Percent-encode characters that would break out of a markdown URL target
|
||||||
|
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||||
|
// single inert token (used for image/video/youtube srcs).
|
||||||
|
const encodeMdUrl = (value: unknown): string =>
|
||||||
|
String(value || "")
|
||||||
|
.replace(/\s/g, (c: string) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||||
|
.replace(/\(/g, "%28")
|
||||||
|
.replace(/\)/g, "%29");
|
||||||
|
|
||||||
|
const processNode = (node: any): string => {
|
||||||
|
const type = node.type;
|
||||||
|
const nodeContent = node.content || [];
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case "doc":
|
||||||
|
return nodeContent.map(processNode).join("\n\n");
|
||||||
|
|
||||||
|
case "paragraph":
|
||||||
|
const text = nodeContent.map(processNode).join("");
|
||||||
|
const align = node.attrs?.textAlign;
|
||||||
|
if (align && align !== "left") {
|
||||||
|
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||||
|
}
|
||||||
|
return text || "";
|
||||||
|
|
||||||
|
case "heading":
|
||||||
|
const level = node.attrs?.level || 1;
|
||||||
|
const headingText = nodeContent.map(processNode).join("");
|
||||||
|
return "#".repeat(level) + " " + headingText;
|
||||||
|
|
||||||
|
case "text":
|
||||||
|
let textContent = node.text || "";
|
||||||
|
// Apply marks (bold, italic, code, etc.)
|
||||||
|
if (node.marks) {
|
||||||
|
// Markdown code spans (`...`) cannot carry inner formatting, so when a
|
||||||
|
// run has the `code` mark alongside ANY other mark, backtick syntax
|
||||||
|
// would leak literal ** / []() into the code text. In that case emit
|
||||||
|
// nested HTML (<code> innermost, the other marks wrapping it as HTML)
|
||||||
|
// so the output is at least well-formed and re-parseable.
|
||||||
|
//
|
||||||
|
// NOTE: this does NOT round-trip both marks. The schema's `code` mark
|
||||||
|
// has `excludes: "_"` (it excludes every other mark), so on import the
|
||||||
|
// co-occurring mark is always dropped — the run comes back as `code`
|
||||||
|
// only. We keep the emission simple and accept that the other mark is
|
||||||
|
// lost; preserving both is impossible while `code` excludes them.
|
||||||
|
// Only use the backtick form when `code` is the sole mark.
|
||||||
|
const markTypes = node.marks.map((m: any) => m.type);
|
||||||
|
const hasCode = markTypes.includes("code");
|
||||||
|
const codeCombined = hasCode && markTypes.length > 1;
|
||||||
|
for (const mark of node.marks) {
|
||||||
|
switch (mark.type) {
|
||||||
|
case "bold":
|
||||||
|
textContent = codeCombined
|
||||||
|
? `<strong>${textContent}</strong>`
|
||||||
|
: `**${textContent}**`;
|
||||||
|
break;
|
||||||
|
case "italic":
|
||||||
|
textContent = codeCombined
|
||||||
|
? `<em>${textContent}</em>`
|
||||||
|
: `*${textContent}*`;
|
||||||
|
break;
|
||||||
|
case "code":
|
||||||
|
// When combined with another mark, wrap as <code> so the
|
||||||
|
// surrounding HTML marks can nest around it; otherwise use the
|
||||||
|
// plain backtick span.
|
||||||
|
textContent = codeCombined
|
||||||
|
? `<code>${textContent}</code>`
|
||||||
|
: `\`${textContent}\``;
|
||||||
|
break;
|
||||||
|
case "link": {
|
||||||
|
const href = mark.attrs?.href || "";
|
||||||
|
const title = mark.attrs?.title;
|
||||||
|
if (codeCombined) {
|
||||||
|
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||||
|
const safeHref = escapeAttr(href);
|
||||||
|
if (title) {
|
||||||
|
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||||
|
} else {
|
||||||
|
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||||
|
}
|
||||||
|
} else if (title) {
|
||||||
|
// Emit the optional markdown link title; escape an embedded
|
||||||
|
// double-quote so it cannot terminate the title string early.
|
||||||
|
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||||
|
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||||
|
} else {
|
||||||
|
textContent = `[${textContent}](${href})`;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case "strike":
|
||||||
|
textContent = codeCombined
|
||||||
|
? `<s>${textContent}</s>`
|
||||||
|
: `~~${textContent}~~`;
|
||||||
|
break;
|
||||||
|
case "underline":
|
||||||
|
textContent = `<u>${textContent}</u>`;
|
||||||
|
break;
|
||||||
|
case "subscript":
|
||||||
|
textContent = `<sub>${textContent}</sub>`;
|
||||||
|
break;
|
||||||
|
case "superscript":
|
||||||
|
textContent = `<sup>${textContent}</sup>`;
|
||||||
|
break;
|
||||||
|
case "highlight": {
|
||||||
|
// Preserve a null/empty color as a plain highlight (a bare
|
||||||
|
// <mark> with no background-color); only emit the style when a
|
||||||
|
// color is actually set, so a plain highlight is not forced to
|
||||||
|
// yellow on export.
|
||||||
|
const color = mark.attrs?.color;
|
||||||
|
textContent = color
|
||||||
|
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||||
|
: `<mark>${textContent}</mark>`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case "textStyle":
|
||||||
|
if (mark.attrs?.color) {
|
||||||
|
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "comment": {
|
||||||
|
// Emit the inline comment anchor so highlights round-trip. The
|
||||||
|
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||||
|
// commentId/resolved).
|
||||||
|
const cid = mark.attrs?.commentId;
|
||||||
|
if (cid) {
|
||||||
|
const resolvedAttr = mark.attrs?.resolved
|
||||||
|
? ` data-resolved="true"`
|
||||||
|
: "";
|
||||||
|
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return textContent;
|
||||||
|
|
||||||
|
case "codeBlock":
|
||||||
|
const language = node.attrs?.language || "";
|
||||||
|
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||||
|
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||||
|
// here would let the text grow by "\n" on each round-trip. Removing
|
||||||
|
// every trailing newline makes repeated cycles stable.
|
||||||
|
const code = nodeContent
|
||||||
|
.map(processNode)
|
||||||
|
.join("")
|
||||||
|
.replace(/\n+$/, "");
|
||||||
|
return "```" + language + "\n" + code + "\n```";
|
||||||
|
|
||||||
|
case "bulletList":
|
||||||
|
return nodeContent
|
||||||
|
.map((item: any) => processListItem(item, "-"))
|
||||||
|
.join("\n");
|
||||||
|
|
||||||
|
case "orderedList":
|
||||||
|
return nodeContent
|
||||||
|
.map((item: any, index: number) =>
|
||||||
|
processListItem(item, `${index + 1}.`),
|
||||||
|
)
|
||||||
|
.join("\n");
|
||||||
|
|
||||||
|
case "taskList":
|
||||||
|
return nodeContent.map((item: any) => processTaskItem(item)).join("\n");
|
||||||
|
|
||||||
|
case "taskItem":
|
||||||
|
// Delegate to the same helper used by taskList so multi-block and
|
||||||
|
// nested task items render and indent consistently.
|
||||||
|
return processTaskItem(node);
|
||||||
|
|
||||||
|
case "listItem":
|
||||||
|
return nodeContent.map(processNode).join("\n");
|
||||||
|
|
||||||
|
case "blockquote":
|
||||||
|
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||||
|
// children with a blank ">" line so code blocks / multi-paragraph
|
||||||
|
// quotes round-trip correctly.
|
||||||
|
return nodeContent
|
||||||
|
.map((n: any) =>
|
||||||
|
processNode(n)
|
||||||
|
.split("\n")
|
||||||
|
.map((line: string) => (line.length ? `> ${line}` : ">"))
|
||||||
|
.join("\n"),
|
||||||
|
)
|
||||||
|
.join("\n>\n");
|
||||||
|
|
||||||
|
case "horizontalRule":
|
||||||
|
return "---";
|
||||||
|
|
||||||
|
case "hardBreak":
|
||||||
|
// Two trailing spaces before the newline encode a markdown hard break;
|
||||||
|
// a bare "\n" would be reimported as a soft break and lost.
|
||||||
|
return " \n";
|
||||||
|
|
||||||
|
case "image":
|
||||||
|
const imgAlt = node.attrs?.alt || "";
|
||||||
|
// Neutralize characters that could break out of the markdown image
|
||||||
|
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||||
|
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||||
|
// them so the URL stays a single inert token.
|
||||||
|
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||||
|
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||||
|
// not emit one (the previous caption branch was dead).
|
||||||
|
return ``;
|
||||||
|
|
||||||
|
case "video": {
|
||||||
|
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||||
|
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||||
|
// from the standard attributes and the remaining attrs from data-*.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||||
|
if (attrs.alt) parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||||
|
if (attrs.attachmentId)
|
||||||
|
parts.push(
|
||||||
|
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||||
|
);
|
||||||
|
if (attrs.width != null)
|
||||||
|
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||||
|
if (attrs.height != null)
|
||||||
|
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||||
|
if (attrs.size != null)
|
||||||
|
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||||
|
if (attrs.align)
|
||||||
|
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||||
|
if (attrs.aspectRatio != null)
|
||||||
|
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||||
|
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||||
|
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||||
|
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||||
|
// data-type, so the schema parser ignores it and just hoists the video.
|
||||||
|
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "youtube": {
|
||||||
|
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||||
|
// src from data-src and width/height/align from data-* attributes.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [
|
||||||
|
`data-type="youtube"`,
|
||||||
|
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||||
|
];
|
||||||
|
if (attrs.width != null)
|
||||||
|
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||||
|
if (attrs.height != null)
|
||||||
|
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||||
|
if (attrs.align)
|
||||||
|
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||||
|
return `<div ${parts.join(" ")}></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "table": {
|
||||||
|
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||||
|
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||||
|
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||||
|
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||||
|
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||||
|
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||||
|
const tableRows: any[] = nodeContent;
|
||||||
|
if (tableRows.length === 0) return "";
|
||||||
|
const hasSpan = tableRows.some((row: any) =>
|
||||||
|
(row.content || []).some(
|
||||||
|
(cell: any) =>
|
||||||
|
(cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (hasSpan) {
|
||||||
|
// Render each cell's block children to HTML (marked does NOT parse
|
||||||
|
// markdown inside a raw HTML block, so emitting markdown here would
|
||||||
|
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||||
|
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||||
|
const renderHtmlCell = (cell: any): string => {
|
||||||
|
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||||
|
const a = cell.attrs || {};
|
||||||
|
const cellParts: string[] = [];
|
||||||
|
if ((a.colspan ?? 1) > 1)
|
||||||
|
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||||
|
if ((a.rowspan ?? 1) > 1)
|
||||||
|
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||||
|
if (a.align) cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||||
|
const open = cellParts.length
|
||||||
|
? `<${tag} ${cellParts.join(" ")}>`
|
||||||
|
: `<${tag}>`;
|
||||||
|
const inner = (cell.content || [])
|
||||||
|
.map((block: any) => blockToHtml(block))
|
||||||
|
.join("");
|
||||||
|
return `${open}${inner}</${tag}>`;
|
||||||
|
};
|
||||||
|
const htmlRows = tableRows
|
||||||
|
.map(
|
||||||
|
(row: any) =>
|
||||||
|
`<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`,
|
||||||
|
)
|
||||||
|
.join("");
|
||||||
|
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No merged cells: emit a GFM table (header row + separator) so the
|
||||||
|
// markdown can be parsed back into a table on re-import.
|
||||||
|
const rows = tableRows.map(processNode);
|
||||||
|
const headerCells = tableRows[0]?.content || [];
|
||||||
|
const columns = headerCells.length || 1;
|
||||||
|
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||||
|
const markers = Array.from({ length: columns }, (_, i) => {
|
||||||
|
const align = headerCells[i]?.attrs?.align;
|
||||||
|
switch (align) {
|
||||||
|
case "left":
|
||||||
|
return ":--";
|
||||||
|
case "center":
|
||||||
|
return ":-:";
|
||||||
|
case "right":
|
||||||
|
return "--:";
|
||||||
|
default:
|
||||||
|
return "---";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
const separator = "| " + markers.join(" | ") + " |";
|
||||||
|
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
case "tableRow":
|
||||||
|
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||||
|
|
||||||
|
case "tableCell":
|
||||||
|
case "tableHeader": {
|
||||||
|
// Join multiple block children with a space (not "") so adjacent blocks
|
||||||
|
// like a paragraph followed by a list don't collide into "line1- a".
|
||||||
|
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||||
|
// line break cannot corrupt the surrounding GFM row.
|
||||||
|
return nodeContent
|
||||||
|
.map(processNode)
|
||||||
|
.join(" ")
|
||||||
|
.replace(/\r?\n/g, " ")
|
||||||
|
.replace(/\|/g, "\\|");
|
||||||
|
}
|
||||||
|
|
||||||
|
case "callout":
|
||||||
|
const calloutType = node.attrs?.type || "info";
|
||||||
|
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||||
|
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||||
|
|
||||||
|
case "details":
|
||||||
|
return nodeContent.map(processNode).join("\n");
|
||||||
|
|
||||||
|
case "detailsSummary":
|
||||||
|
const summaryText = nodeContent.map(processNode).join("");
|
||||||
|
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||||
|
|
||||||
|
case "detailsContent":
|
||||||
|
const detailsText = nodeContent.map(processNode).join("\n");
|
||||||
|
return `${detailsText}\n</details>`;
|
||||||
|
|
||||||
|
case "mathInline": {
|
||||||
|
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||||
|
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||||
|
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||||
|
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||||
|
// back, so the previous form was lossy.
|
||||||
|
const inlineMath = node.attrs?.text || "";
|
||||||
|
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "mathBlock": {
|
||||||
|
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||||
|
// for the schema's default parser to recover it.
|
||||||
|
const blockMath = node.attrs?.text || "";
|
||||||
|
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "mention": {
|
||||||
|
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||||
|
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||||
|
// plain text that cannot re-parse.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [`data-type="mention"`];
|
||||||
|
if (attrs.id) parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||||
|
if (attrs.label)
|
||||||
|
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||||
|
if (attrs.entityType)
|
||||||
|
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||||
|
if (attrs.entityId)
|
||||||
|
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||||
|
if (attrs.slugId)
|
||||||
|
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||||
|
if (attrs.creatorId)
|
||||||
|
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||||
|
if (attrs.anchorId)
|
||||||
|
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||||
|
// Keep the label as visible text content too; the schema reads attrs
|
||||||
|
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||||
|
const mentionLabel = attrs.label || attrs.id || "";
|
||||||
|
// The label is visible element TEXT content here (the data-* attrs above
|
||||||
|
// carry the real values), so escape it for the text context, not attrs.
|
||||||
|
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "attachment": {
|
||||||
|
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||||
|
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||||
|
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||||
|
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [
|
||||||
|
`data-type="attachment"`,
|
||||||
|
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||||
|
];
|
||||||
|
if (attrs.name)
|
||||||
|
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||||
|
if (attrs.mime)
|
||||||
|
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||||
|
if (attrs.size != null)
|
||||||
|
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||||
|
if (attrs.attachmentId)
|
||||||
|
parts.push(
|
||||||
|
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||||
|
);
|
||||||
|
return `<div ${parts.join(" ")}></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "drawio":
|
||||||
|
case "excalidraw": {
|
||||||
|
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||||
|
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||||
|
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||||
|
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [
|
||||||
|
`data-type="${type}"`,
|
||||||
|
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||||
|
];
|
||||||
|
if (attrs.title != null)
|
||||||
|
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||||
|
if (attrs.alt != null) parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||||
|
if (attrs.width != null)
|
||||||
|
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||||
|
if (attrs.height != null)
|
||||||
|
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||||
|
if (attrs.size != null)
|
||||||
|
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||||
|
if (attrs.aspectRatio != null)
|
||||||
|
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||||
|
if (attrs.align)
|
||||||
|
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||||
|
if (attrs.attachmentId)
|
||||||
|
parts.push(
|
||||||
|
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||||
|
);
|
||||||
|
return `<div ${parts.join(" ")}></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "embed": {
|
||||||
|
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||||
|
// src/provider/align/width/height from data-* attributes so the node
|
||||||
|
// (and its provider iframe info) survives the round-trip.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [
|
||||||
|
`data-type="embed"`,
|
||||||
|
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||||
|
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||||
|
];
|
||||||
|
if (attrs.align)
|
||||||
|
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||||
|
if (attrs.width != null)
|
||||||
|
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||||
|
if (attrs.height != null)
|
||||||
|
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||||
|
return `<div ${parts.join(" ")}></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "audio": {
|
||||||
|
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||||
|
// schema reads src from src and attachmentId/size from data-*.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||||
|
if (attrs.attachmentId)
|
||||||
|
parts.push(
|
||||||
|
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||||
|
);
|
||||||
|
if (attrs.size != null)
|
||||||
|
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||||
|
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||||
|
// inline-level HTML that marked would wrap in <p>.
|
||||||
|
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "pdf": {
|
||||||
|
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||||
|
// The schema reads src/width/height from standard attrs and name/
|
||||||
|
// attachmentId/size from data-*.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [
|
||||||
|
`data-type="pdf"`,
|
||||||
|
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||||
|
];
|
||||||
|
if (attrs.name) parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||||
|
if (attrs.attachmentId)
|
||||||
|
parts.push(
|
||||||
|
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||||
|
);
|
||||||
|
if (attrs.size != null)
|
||||||
|
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||||
|
if (attrs.width != null)
|
||||||
|
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||||
|
if (attrs.height != null)
|
||||||
|
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||||
|
return `<div ${parts.join(" ")}></div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "columns": {
|
||||||
|
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||||
|
// multi-column layout survives. Without a case the children were
|
||||||
|
// concatenated with no separator and the text merged. The schema reads
|
||||||
|
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||||
|
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||||
|
// which marked would not re-parse inside a raw HTML block).
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [`data-type="columns"`];
|
||||||
|
if (attrs.layout)
|
||||||
|
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||||
|
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||||
|
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||||
|
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||||
|
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "column": {
|
||||||
|
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||||
|
// column width from data-width. Children are rendered as HTML so their
|
||||||
|
// formatting survives inside this raw HTML block.
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [`data-type="column"`];
|
||||||
|
if (attrs.width)
|
||||||
|
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||||
|
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||||
|
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||||
|
}
|
||||||
|
|
||||||
|
case "subpages":
|
||||||
|
return "{{SUBPAGES}}";
|
||||||
|
|
||||||
|
default:
|
||||||
|
// Fallback: process children
|
||||||
|
return nodeContent.map(processNode).join("");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||||
|
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||||
|
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||||
|
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||||
|
// accepts so it re-imports as the matching ProseMirror mark.
|
||||||
|
const inlineToHtml = (inlineNodes: any[]): string =>
|
||||||
|
(inlineNodes || [])
|
||||||
|
.map((n: any) => {
|
||||||
|
if (n.type === "hardBreak") return "<br>";
|
||||||
|
if (n.type !== "text") {
|
||||||
|
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||||
|
return processNode(n);
|
||||||
|
}
|
||||||
|
let t = escapeHtmlText(n.text || "");
|
||||||
|
for (const mark of n.marks || []) {
|
||||||
|
switch (mark.type) {
|
||||||
|
case "bold":
|
||||||
|
t = `<strong>${t}</strong>`;
|
||||||
|
break;
|
||||||
|
case "italic":
|
||||||
|
t = `<em>${t}</em>`;
|
||||||
|
break;
|
||||||
|
case "code":
|
||||||
|
t = `<code>${t}</code>`;
|
||||||
|
break;
|
||||||
|
case "strike":
|
||||||
|
t = `<s>${t}</s>`;
|
||||||
|
break;
|
||||||
|
case "underline":
|
||||||
|
t = `<u>${t}</u>`;
|
||||||
|
break;
|
||||||
|
case "subscript":
|
||||||
|
t = `<sub>${t}</sub>`;
|
||||||
|
break;
|
||||||
|
case "superscript":
|
||||||
|
t = `<sup>${t}</sup>`;
|
||||||
|
break;
|
||||||
|
case "link":
|
||||||
|
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||||
|
break;
|
||||||
|
case "highlight":
|
||||||
|
t = mark.attrs?.color
|
||||||
|
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||||
|
: `<mark>${t}</mark>`;
|
||||||
|
break;
|
||||||
|
case "textStyle":
|
||||||
|
if (mark.attrs?.color)
|
||||||
|
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||||
|
break;
|
||||||
|
case "comment":
|
||||||
|
// Inline comment anchor inside a raw-HTML container (columns /
|
||||||
|
// spanned table cells), so commented text there also round-trips.
|
||||||
|
if (mark.attrs?.commentId) {
|
||||||
|
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||||
|
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
})
|
||||||
|
.join("");
|
||||||
|
|
||||||
|
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||||
|
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||||
|
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||||
|
// and would survive as literal text. The Image extension reads src/alt from
|
||||||
|
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||||
|
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||||
|
// attributes, so emit them by name.
|
||||||
|
const imageToHtml = (node: any): string => {
|
||||||
|
const attrs = node.attrs || {};
|
||||||
|
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||||
|
if (attrs.alt) parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||||
|
if (attrs.title) parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||||
|
if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||||
|
if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||||
|
if (attrs.align) parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||||
|
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||||
|
if (attrs.attachmentId)
|
||||||
|
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||||
|
if (attrs.aspectRatio != null)
|
||||||
|
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||||
|
return `<img ${parts.join(" ")}>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||||
|
// schema reads the banner type from data-callout-type. Children are rendered
|
||||||
|
// as HTML so they survive inside a raw-HTML container.
|
||||||
|
const calloutToHtml = (node: any): string => {
|
||||||
|
const type = (node.attrs?.type || "info").toLowerCase();
|
||||||
|
const inner = (node.content || []).map(blockToHtml).join("");
|
||||||
|
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||||
|
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||||
|
const detailsToHtml = (node: any): string => {
|
||||||
|
const inner = (node.content || []).map(blockToHtml).join("");
|
||||||
|
return `<details>${inner}</details>`;
|
||||||
|
};
|
||||||
|
const detailsSummaryToHtml = (node: any): string =>
|
||||||
|
`<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||||
|
const detailsContentToHtml = (node: any): string => {
|
||||||
|
const inner = (node.content || []).map(blockToHtml).join("");
|
||||||
|
return `<div data-type="detailsContent">${inner}</div>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||||
|
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||||
|
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||||
|
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||||
|
const taskListToHtml = (node: any): string => {
|
||||||
|
const items = (node.content || [])
|
||||||
|
.map((it: any) => {
|
||||||
|
const checked = it.attrs?.checked ? "true" : "false";
|
||||||
|
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||||
|
})
|
||||||
|
.join("");
|
||||||
|
return `<ul data-type="taskList">${items}</ul>`;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||||
|
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||||
|
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||||
|
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||||
|
// as literal text on re-import. Nodes whose processNode case already produces
|
||||||
|
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||||
|
// table) are delegated to processNode; the markdown-emitting cases
|
||||||
|
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||||
|
const blockToHtml = (block: any): string => {
|
||||||
|
const children = block.content || [];
|
||||||
|
switch (block.type) {
|
||||||
|
case "paragraph":
|
||||||
|
return `<p>${inlineToHtml(children)}</p>`;
|
||||||
|
case "heading": {
|
||||||
|
const level = block.attrs?.level || 1;
|
||||||
|
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||||
|
}
|
||||||
|
case "bulletList":
|
||||||
|
return `<ul>${children
|
||||||
|
.map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||||
|
.join("")}</ul>`;
|
||||||
|
case "orderedList":
|
||||||
|
return `<ol>${children
|
||||||
|
.map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||||
|
.join("")}</ol>`;
|
||||||
|
case "codeBlock": {
|
||||||
|
const lang = block.attrs?.language || "";
|
||||||
|
// The code itself is element TEXT content (between <code> tags), so it
|
||||||
|
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||||
|
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||||
|
const code = escapeHtmlText(
|
||||||
|
children
|
||||||
|
.map(processNode)
|
||||||
|
.join("")
|
||||||
|
.replace(/\n+$/, ""),
|
||||||
|
);
|
||||||
|
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||||
|
return `<pre><code${cls}>${code}</code></pre>`;
|
||||||
|
}
|
||||||
|
case "image":
|
||||||
|
return imageToHtml(block);
|
||||||
|
case "blockquote":
|
||||||
|
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||||
|
case "horizontalRule":
|
||||||
|
return "<hr>";
|
||||||
|
case "callout":
|
||||||
|
return calloutToHtml(block);
|
||||||
|
case "details":
|
||||||
|
return detailsToHtml(block);
|
||||||
|
case "detailsSummary":
|
||||||
|
return detailsSummaryToHtml(block);
|
||||||
|
case "detailsContent":
|
||||||
|
return detailsContentToHtml(block);
|
||||||
|
case "taskList":
|
||||||
|
return taskListToHtml(block);
|
||||||
|
case "taskItem":
|
||||||
|
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||||
|
// the schema parses it; wrap it in a single-item taskList.
|
||||||
|
return taskListToHtml({ content: [block] });
|
||||||
|
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||||
|
// mention, etc. already emit schema-matching HTML from processNode.
|
||||||
|
case "table":
|
||||||
|
case "columns":
|
||||||
|
case "column":
|
||||||
|
case "mathBlock":
|
||||||
|
case "video":
|
||||||
|
case "audio":
|
||||||
|
case "pdf":
|
||||||
|
case "youtube":
|
||||||
|
case "embed":
|
||||||
|
case "attachment":
|
||||||
|
case "drawio":
|
||||||
|
case "excalidraw":
|
||||||
|
return processNode(block);
|
||||||
|
default:
|
||||||
|
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||||
|
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||||
|
// children in a <div> so their content is preserved; if it has no block
|
||||||
|
// children, render its inline content instead.
|
||||||
|
if (children.length && children.some((c: any) => c.type !== "text")) {
|
||||||
|
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||||
|
}
|
||||||
|
return `<div>${inlineToHtml(children)}</div>`;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Render the block children of a list item to HTML (a listItem holds block+
|
||||||
|
// content). Mirrors processListItem but for the HTML fallback path.
|
||||||
|
const blockChildrenToHtml = (item: any): string =>
|
||||||
|
(item.content || []).map((b: any) => blockToHtml(b)).join("");
|
||||||
|
|
||||||
|
// Indent the rendered children of a list item under a marker prefix.
|
||||||
|
// Each child block is a (possibly multi-line) string. The very first physical
|
||||||
|
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||||
|
// other line — the remaining lines of the first child AND all lines of every
|
||||||
|
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||||
|
// to align under the marker. Without indenting these continuation lines, the
|
||||||
|
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||||
|
//
|
||||||
|
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||||
|
// same as the visible prefix width:
|
||||||
|
// - bullet "- " -> 2 columns
|
||||||
|
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||||
|
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||||
|
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||||
|
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||||
|
// re-import. Callers therefore pass the exact indent width to use.
|
||||||
|
const indentItemChildren = (
|
||||||
|
childStrings: string[],
|
||||||
|
prefix: string,
|
||||||
|
indentWidth: number,
|
||||||
|
): string => {
|
||||||
|
const indent = " ".repeat(indentWidth);
|
||||||
|
const lines: string[] = [];
|
||||||
|
childStrings.forEach((child, childIndex) => {
|
||||||
|
child.split("\n").forEach((line, lineIndex) => {
|
||||||
|
if (childIndex === 0 && lineIndex === 0) {
|
||||||
|
// First physical line of the first block gets the marker.
|
||||||
|
lines.push(`${prefix} ${line}`);
|
||||||
|
} else {
|
||||||
|
// Indent every continuation line by the marker width; keep blank
|
||||||
|
// lines blank rather than emitting trailing whitespace.
|
||||||
|
lines.push(line.length ? `${indent}${line}` : "");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
return lines.join("\n");
|
||||||
|
};
|
||||||
|
|
||||||
|
const processListItem = (item: any, prefix: string): string => {
|
||||||
|
const itemContent = item.content || [];
|
||||||
|
const childStrings = itemContent.map(processNode);
|
||||||
|
if (childStrings.length === 0) return prefix;
|
||||||
|
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||||
|
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||||
|
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||||
|
// since for those the visible prefix IS the list marker.
|
||||||
|
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||||
|
};
|
||||||
|
|
||||||
|
const processTaskItem = (item: any): string => {
|
||||||
|
const checked = item.attrs?.checked || false;
|
||||||
|
const checkbox = checked ? "[x]" : "[ ]";
|
||||||
|
const prefix = `- ${checkbox}`;
|
||||||
|
const itemContent = item.content || [];
|
||||||
|
const childStrings = itemContent.map(processNode);
|
||||||
|
// An empty task item still needs its checkbox marker; without this guard
|
||||||
|
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||||
|
if (childStrings.length === 0) return prefix;
|
||||||
|
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||||
|
// checkbox is item content, NOT part of the marker. So the continuation
|
||||||
|
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||||
|
return indentItemChildren(childStrings, prefix, 2);
|
||||||
|
};
|
||||||
|
|
||||||
|
return processNode(content).trim();
|
||||||
|
}
|
||||||
156
packages/docmost-client/src/lib/markdown-document.ts
Normal file
156
packages/docmost-client/src/lib/markdown-document.ts
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
/**
|
||||||
|
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||||
|
*
|
||||||
|
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||||
|
* a page through "download -> edit body -> re-upload":
|
||||||
|
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||||
|
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||||
|
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||||
|
* threads.
|
||||||
|
*
|
||||||
|
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||||
|
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||||
|
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||||
|
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||||
|
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||||
|
*
|
||||||
|
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||||
|
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||||
|
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||||
|
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||||
|
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||||
|
* literal occurrence is left in the body untouched.
|
||||||
|
*
|
||||||
|
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||||
|
* the file but are NOT pushed back to the server on import — only the inline
|
||||||
|
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||||
|
* records stays with the comment tools/UI.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface DocmostMdMeta {
|
||||||
|
version: number;
|
||||||
|
pageId?: string;
|
||||||
|
slugId?: string;
|
||||||
|
title?: string;
|
||||||
|
spaceId?: string;
|
||||||
|
parentPageId?: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
||||||
|
// the JSON text between the markers.
|
||||||
|
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||||
|
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
|
||||||
|
// rather than end-anchoring a single regex (which would mis-capture across a
|
||||||
|
// literal opener that appears earlier in the body).
|
||||||
|
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||||
|
* comments block. The meta block is always emitted; the comments block is always
|
||||||
|
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||||
|
* and parsing stays simple.
|
||||||
|
*/
|
||||||
|
export function serializeDocmostMarkdown(
|
||||||
|
meta: DocmostMdMeta,
|
||||||
|
body: string,
|
||||||
|
comments: any[],
|
||||||
|
): string {
|
||||||
|
const metaJson = JSON.stringify(meta);
|
||||||
|
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||||
|
const trimmedBody = (body ?? "").trim();
|
||||||
|
return (
|
||||||
|
`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||||
|
`${trimmedBody}\n\n` +
|
||||||
|
`<!-- docmost:comments\n${commentsJson}\n-->\n`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||||
|
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||||
|
* corresponding value is returned as `null` and the whole input is treated as
|
||||||
|
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||||
|
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||||
|
* message. Robust to `\r\n` line endings.
|
||||||
|
*/
|
||||||
|
export function parseDocmostMarkdown(full: string): {
|
||||||
|
meta: DocmostMdMeta | null;
|
||||||
|
body: string;
|
||||||
|
comments: any[] | null;
|
||||||
|
} {
|
||||||
|
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||||
|
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||||
|
|
||||||
|
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||||
|
let meta: DocmostMdMeta | null = null;
|
||||||
|
let metaEnd = 0;
|
||||||
|
const metaMatch = normalized.match(META_RE);
|
||||||
|
if (metaMatch) {
|
||||||
|
try {
|
||||||
|
meta = JSON.parse(metaMatch[1]);
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(
|
||||||
|
`Invalid docmost:meta JSON block: ${
|
||||||
|
e instanceof Error ? e.message : String(e)
|
||||||
|
}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Body starts right after the matched meta block.
|
||||||
|
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||||
|
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||||
|
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||||
|
let lastOpenStart = -1;
|
||||||
|
let lastOpenEnd = -1;
|
||||||
|
let m: RegExpExecArray | null;
|
||||||
|
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||||
|
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||||
|
lastOpenStart = m.index;
|
||||||
|
lastOpenEnd = m.index + m[0].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
let comments: any[] | null = null;
|
||||||
|
let bodyEnd = normalized.length;
|
||||||
|
if (lastOpenStart !== -1) {
|
||||||
|
const rest = normalized.slice(lastOpenEnd);
|
||||||
|
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||||
|
if (close) {
|
||||||
|
const jsonText = rest.slice(0, close.index);
|
||||||
|
try {
|
||||||
|
comments = JSON.parse(jsonText);
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(
|
||||||
|
`Invalid docmost:comments JSON block: ${
|
||||||
|
e instanceof Error ? e.message : String(e)
|
||||||
|
}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||||
|
return { meta, body, comments };
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- docmost-sync addition (backport target: docmost-mcp/src/lib/markdown-document.ts) ---
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialize a self-contained markdown file with the meta block + body ONLY —
|
||||||
|
* NO trailing `docmost:comments` block. The docmost-sync engine never touches
|
||||||
|
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
|
||||||
|
* the body, where comment threads survive only as inline `<span
|
||||||
|
* data-comment-id>` anchor marks inside the body.
|
||||||
|
*
|
||||||
|
* `parseDocmostMarkdown` already tolerates a missing comments block (it returns
|
||||||
|
* `comments: null` and treats the rest as body), so a file produced here
|
||||||
|
* round-trips cleanly through the parser.
|
||||||
|
*/
|
||||||
|
export function serializeDocmostMarkdownBody(
|
||||||
|
meta: DocmostMdMeta,
|
||||||
|
body: string,
|
||||||
|
): string {
|
||||||
|
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
|
||||||
|
}
|
||||||
897
packages/docmost-client/src/lib/node-ops.ts
Normal file
897
packages/docmost-client/src/lib/node-ops.ts
Normal file
@@ -0,0 +1,897 @@
|
|||||||
|
/**
|
||||||
|
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
|
||||||
|
* tree by node id.
|
||||||
|
*
|
||||||
|
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||||
|
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||||
|
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
|
||||||
|
* table cells hold their children in `content` just like any other block, so a
|
||||||
|
* single recursive walk reaches them all.
|
||||||
|
*
|
||||||
|
* Every exported function operates on a DEEP CLONE of the input document and
|
||||||
|
* returns the new document. The input doc and any `newNode`/`node` argument are
|
||||||
|
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||||
|
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||||
|
function clone<T>(value: T): T {
|
||||||
|
if (typeof structuredClone === "function") {
|
||||||
|
return structuredClone(value);
|
||||||
|
}
|
||||||
|
// Fallback for environments without structuredClone.
|
||||||
|
return JSON.parse(JSON.stringify(value)) as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if `value` is a non-null object (and not an array). */
|
||||||
|
function isObject(value: any): value is Record<string, any> {
|
||||||
|
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if `node` carries the given id in `node.attrs.id`. */
|
||||||
|
function matchesId(node: any, nodeId: string): boolean {
|
||||||
|
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively concatenate all text contained in a node.
|
||||||
|
*
|
||||||
|
* Text nodes contribute their `text` string; container nodes contribute the
|
||||||
|
* joined `blockPlainText` of their `content` children. Returns "" for nullish
|
||||||
|
* or non-object inputs.
|
||||||
|
*/
|
||||||
|
export function blockPlainText(node: any): string {
|
||||||
|
if (!isObject(node)) return "";
|
||||||
|
let out = "";
|
||||||
|
if (typeof node.text === "string") {
|
||||||
|
out += node.text;
|
||||||
|
}
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (const child of node.content) {
|
||||||
|
out += blockPlainText(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
|
||||||
|
function truncate(text: string, n: number): string {
|
||||||
|
return text.length > n ? text.slice(0, n) + "…" : text;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** One compact outline entry for a single top-level block. */
|
||||||
|
export interface OutlineEntry {
|
||||||
|
index: number;
|
||||||
|
type: string | undefined;
|
||||||
|
id: string | null;
|
||||||
|
firstText: string;
|
||||||
|
/** Present for headings only. */
|
||||||
|
level?: number | null;
|
||||||
|
/** Present for tables only. */
|
||||||
|
rows?: number;
|
||||||
|
cols?: number;
|
||||||
|
header?: string[];
|
||||||
|
/** Present for list blocks only (bulletList/orderedList/taskList). */
|
||||||
|
items?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
|
||||||
|
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
|
||||||
|
* table cells — compactness is the point; use `getNodeByRef` to drill into a
|
||||||
|
* specific block.
|
||||||
|
*
|
||||||
|
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
|
||||||
|
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
|
||||||
|
* cell texts as `header`; list blocks (types ending in "List") add `items`.
|
||||||
|
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
|
||||||
|
* a missing or non-object doc/content yields `[]`.
|
||||||
|
*/
|
||||||
|
export function buildOutline(doc: any): OutlineEntry[] {
|
||||||
|
if (!isObject(doc) || !Array.isArray(doc.content)) return [];
|
||||||
|
|
||||||
|
const out: OutlineEntry[] = [];
|
||||||
|
for (let i = 0; i < doc.content.length; i++) {
|
||||||
|
const block = doc.content[i];
|
||||||
|
const type = isObject(block) ? block.type : undefined;
|
||||||
|
const entry: OutlineEntry = {
|
||||||
|
index: i,
|
||||||
|
type,
|
||||||
|
id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
|
||||||
|
firstText: truncate(blockPlainText(block), 100),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (type === "heading") {
|
||||||
|
entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
|
||||||
|
} else if (type === "table") {
|
||||||
|
const headerRow = block.content?.[0]?.content ?? [];
|
||||||
|
entry.rows = block.content?.length ?? 0;
|
||||||
|
entry.cols = block.content?.[0]?.content?.length ?? 0;
|
||||||
|
entry.header = headerRow.map((cell: any) =>
|
||||||
|
truncate(blockPlainText(cell), 40),
|
||||||
|
);
|
||||||
|
} else if (typeof type === "string" && type.endsWith("List")) {
|
||||||
|
entry.items = block.content?.length ?? 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(entry);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve a single node by reference and return `{ node, path, type }`, or
|
||||||
|
* `null` when nothing matches.
|
||||||
|
*
|
||||||
|
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
|
||||||
|
* `n` in `doc.content`. This is the only way to address table/tableRow/
|
||||||
|
* tableCell nodes, which carry no `attrs.id`.
|
||||||
|
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
|
||||||
|
* tree with `attrs.id === ref` is returned.
|
||||||
|
*
|
||||||
|
* `path` is the array of child indices from the doc root down to the node
|
||||||
|
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
|
||||||
|
* so callers can mutate it without touching the input doc. Null-safe.
|
||||||
|
*/
|
||||||
|
export function getNodeByRef(
|
||||||
|
doc: any,
|
||||||
|
ref: string,
|
||||||
|
): { node: any; path: number[]; type: string | undefined } | null {
|
||||||
|
if (!isObject(doc)) return null;
|
||||||
|
|
||||||
|
// "#<n>": index into the top-level content array.
|
||||||
|
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
|
||||||
|
if (indexMatch) {
|
||||||
|
const index = Number(indexMatch[1]);
|
||||||
|
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
|
||||||
|
if (!isObject(block)) return null;
|
||||||
|
return { node: clone(block), path: [index], type: block.type };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise: depth-first search for the first node with attrs.id === ref.
|
||||||
|
const search = (
|
||||||
|
node: any,
|
||||||
|
trail: number[],
|
||||||
|
): { node: any; path: number[]; type: string } | null => {
|
||||||
|
if (!isObject(node)) return null;
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (let i = 0; i < node.content.length; i++) {
|
||||||
|
const child = node.content[i];
|
||||||
|
const path = [...trail, i];
|
||||||
|
if (matchesId(child, ref)) {
|
||||||
|
return { node: clone(child), path, type: child.type };
|
||||||
|
}
|
||||||
|
const hit = search(child, path);
|
||||||
|
if (hit != null) return hit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
return search(doc, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
|
||||||
|
* `newNode`, anywhere in the tree (including inside callouts and table cells).
|
||||||
|
*
|
||||||
|
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
|
||||||
|
* is the number of nodes substituted. A fresh clone of `newNode` is used for
|
||||||
|
* each match so they do not share references.
|
||||||
|
*/
|
||||||
|
export function replaceNodeById(
|
||||||
|
doc: any,
|
||||||
|
nodeId: string,
|
||||||
|
newNode: any,
|
||||||
|
): { doc: any; replaced: number } {
|
||||||
|
const out = clone(doc);
|
||||||
|
let replaced = 0;
|
||||||
|
|
||||||
|
// Walk a content array, replacing direct matches and recursing into the
|
||||||
|
// (possibly new) children of non-matching nodes.
|
||||||
|
const walkContent = (content: any[]): void => {
|
||||||
|
for (let i = 0; i < content.length; i++) {
|
||||||
|
const child = content[i];
|
||||||
|
if (matchesId(child, nodeId)) {
|
||||||
|
content[i] = clone(newNode);
|
||||||
|
replaced++;
|
||||||
|
// Do not recurse into a freshly substituted node.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isObject(child) && Array.isArray(child.content)) {
|
||||||
|
walkContent(child.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (isObject(out) && Array.isArray(out.content)) {
|
||||||
|
walkContent(out.content);
|
||||||
|
}
|
||||||
|
return { doc: out, replaced };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
|
||||||
|
* array, anywhere in the tree (recursive, including callouts and tables).
|
||||||
|
*
|
||||||
|
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
|
||||||
|
* the number of nodes removed.
|
||||||
|
*/
|
||||||
|
export function deleteNodeById(
|
||||||
|
doc: any,
|
||||||
|
nodeId: string,
|
||||||
|
): { doc: any; deleted: number } {
|
||||||
|
const out = clone(doc);
|
||||||
|
let deleted = 0;
|
||||||
|
|
||||||
|
// Filter a content array in place, dropping matches and recursing into the
|
||||||
|
// surviving children.
|
||||||
|
const walkContent = (content: any[]): any[] => {
|
||||||
|
const kept: any[] = [];
|
||||||
|
for (const child of content) {
|
||||||
|
if (matchesId(child, nodeId)) {
|
||||||
|
deleted++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isObject(child) && Array.isArray(child.content)) {
|
||||||
|
child.content = walkContent(child.content);
|
||||||
|
}
|
||||||
|
kept.push(child);
|
||||||
|
}
|
||||||
|
return kept;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (isObject(out) && Array.isArray(out.content)) {
|
||||||
|
out.content = walkContent(out.content);
|
||||||
|
}
|
||||||
|
return { doc: out, deleted };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
|
||||||
|
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
|
||||||
|
* "Unexpected content type" when asked to store an `undefined` attribute value).
|
||||||
|
*
|
||||||
|
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
|
||||||
|
* legitimate JSON-storable values and are preserved. Operates on a clone and
|
||||||
|
* returns it; the input is never mutated. Defensively null-safe like the rest
|
||||||
|
* of the file.
|
||||||
|
*/
|
||||||
|
export function sanitizeForYjs(doc: any): any {
|
||||||
|
const out = clone(doc);
|
||||||
|
|
||||||
|
// Drop every key whose value is strictly `undefined` from an attrs object.
|
||||||
|
const stripUndefined = (attrs: any): void => {
|
||||||
|
if (!isObject(attrs)) return;
|
||||||
|
for (const key of Object.keys(attrs)) {
|
||||||
|
if (attrs[key] === undefined) {
|
||||||
|
delete attrs[key];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const walk = (node: any): void => {
|
||||||
|
if (!isObject(node)) return;
|
||||||
|
stripUndefined(node.attrs);
|
||||||
|
if (Array.isArray(node.marks)) {
|
||||||
|
for (const mark of node.marks) {
|
||||||
|
if (isObject(mark)) stripUndefined(mark.attrs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (const child of node.content) {
|
||||||
|
walk(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
walk(out);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Diagnostics helper: walk the tree and return a human-readable path string for
|
||||||
|
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
|
||||||
|
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
|
||||||
|
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
|
||||||
|
* every attribute is storable. Null-safe.
|
||||||
|
*/
|
||||||
|
export function findUnstorableAttr(doc: any): string | null {
|
||||||
|
const isUnstorable = (value: any): string | null => {
|
||||||
|
if (value === undefined) return "undefined";
|
||||||
|
const t = typeof value;
|
||||||
|
if (t === "function") return "function";
|
||||||
|
if (t === "symbol") return "symbol";
|
||||||
|
if (t === "bigint") return "bigint";
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check an attrs object; return the offending sub-path or null.
|
||||||
|
const checkAttrs = (attrs: any, basePath: string): string | null => {
|
||||||
|
if (!isObject(attrs)) return null;
|
||||||
|
for (const key of Object.keys(attrs)) {
|
||||||
|
const kind = isUnstorable(attrs[key]);
|
||||||
|
if (kind != null) return `${basePath}.${key} (${kind})`;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
const walk = (node: any, path: string): string | null => {
|
||||||
|
if (!isObject(node)) return null;
|
||||||
|
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
|
||||||
|
if (attrHit != null) return attrHit;
|
||||||
|
if (Array.isArray(node.marks)) {
|
||||||
|
for (let i = 0; i < node.marks.length; i++) {
|
||||||
|
const markHit = checkAttrs(
|
||||||
|
node.marks[i]?.attrs,
|
||||||
|
`${path}.marks[${i}].attrs`,
|
||||||
|
);
|
||||||
|
if (markHit != null) return markHit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (let i = 0; i < node.content.length; i++) {
|
||||||
|
const childHit = walk(node.content[i], `${path}.content[${i}]`);
|
||||||
|
if (childHit != null) return childHit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
// The root doc node carries no useful index, so start the path at "doc".
|
||||||
|
if (!isObject(doc)) return null;
|
||||||
|
const attrHit = checkAttrs(doc.attrs, "attrs");
|
||||||
|
if (attrHit != null) return attrHit;
|
||||||
|
if (Array.isArray(doc.content)) {
|
||||||
|
for (let i = 0; i < doc.content.length; i++) {
|
||||||
|
const childHit = walk(doc.content[i], `content[${i}]`);
|
||||||
|
if (childHit != null) return childHit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Table structural node types and the container each must live directly inside.
|
||||||
|
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
|
||||||
|
* rather than blindly into the anchor's direct parent (which would corrupt the
|
||||||
|
* table's nesting).
|
||||||
|
*/
|
||||||
|
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
|
||||||
|
const REQUIRED_CONTAINER: Record<string, string> = {
|
||||||
|
tableRow: "table",
|
||||||
|
tableCell: "tableRow",
|
||||||
|
tableHeader: "tableRow",
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||||
|
* including the matched node). Each chain entry is `{ node, index }` where
|
||||||
|
* `index` is the node's position inside its parent's `content` array (the root
|
||||||
|
* doc has index -1). Returns `null` when the anchor cannot be resolved.
|
||||||
|
*/
|
||||||
|
function findAnchorChain(
|
||||||
|
doc: any,
|
||||||
|
opts: InsertOptions,
|
||||||
|
): { node: any; index: number }[] | null {
|
||||||
|
if (!isObject(doc)) return null;
|
||||||
|
|
||||||
|
// DFS by id anywhere in the tree, accumulating the path.
|
||||||
|
if (opts.anchorNodeId != null) {
|
||||||
|
const targetId = opts.anchorNodeId;
|
||||||
|
const search = (
|
||||||
|
node: any,
|
||||||
|
index: number,
|
||||||
|
trail: { node: any; index: number }[],
|
||||||
|
): { node: any; index: number }[] | null => {
|
||||||
|
if (!isObject(node)) return null;
|
||||||
|
const here = [...trail, { node, index }];
|
||||||
|
if (matchesId(node, targetId)) return here;
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (let i = 0; i < node.content.length; i++) {
|
||||||
|
const hit = search(node.content[i], i, here);
|
||||||
|
if (hit != null) return hit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
return search(doc, -1, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||||
|
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||||
|
for (let i = 0; i < doc.content.length; i++) {
|
||||||
|
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
|
||||||
|
return [
|
||||||
|
{ node: doc, index: -1 },
|
||||||
|
{ node: doc.content[i], index: i },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Options controlling where `insertNodeRelative` places the new node. */
|
||||||
|
export interface InsertOptions {
|
||||||
|
position: "before" | "after" | "append";
|
||||||
|
/** Resolve the anchor by node id anywhere in the tree (preferred). */
|
||||||
|
anchorNodeId?: string;
|
||||||
|
/** Fallback: first TOP-LEVEL block whose plain text includes this string. */
|
||||||
|
anchorText?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert a deep clone of `node` relative to an anchor.
|
||||||
|
*
|
||||||
|
* - position "append": push the node onto the top-level `doc.content`.
|
||||||
|
* - position "before"/"after": locate the anchor and splice the node into the
|
||||||
|
* anchor's parent `content` array immediately before / after it.
|
||||||
|
*
|
||||||
|
* Anchor resolution for before/after:
|
||||||
|
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
|
||||||
|
* anywhere in the tree (recursive);
|
||||||
|
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
|
||||||
|
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
|
||||||
|
*
|
||||||
|
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||||
|
* false when the anchor could not be resolved (the doc is returned unchanged
|
||||||
|
* apart from being cloned).
|
||||||
|
*/
|
||||||
|
export function insertNodeRelative(
|
||||||
|
doc: any,
|
||||||
|
node: any,
|
||||||
|
opts: InsertOptions,
|
||||||
|
): { doc: any; inserted: boolean } {
|
||||||
|
const out = clone(doc);
|
||||||
|
const fresh = clone(node);
|
||||||
|
|
||||||
|
// Defensive: stay null-safe like the other exports — a missing opts means
|
||||||
|
// there is nothing actionable to do.
|
||||||
|
if (!isObject(opts)) return { doc: out, inserted: false };
|
||||||
|
|
||||||
|
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
|
||||||
|
|
||||||
|
// "append": top-level push.
|
||||||
|
if (opts.position === "append") {
|
||||||
|
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
|
||||||
|
// top level — appending one would produce invalid nesting.
|
||||||
|
if (isStructural) {
|
||||||
|
throw new Error(
|
||||||
|
`insert_node: cannot append a ${node.type} at the top level; use ` +
|
||||||
|
`position before/after with an anchor inside the target table`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (isObject(out)) {
|
||||||
|
if (!Array.isArray(out.content)) out.content = [];
|
||||||
|
out.content.push(fresh);
|
||||||
|
return { doc: out, inserted: true };
|
||||||
|
}
|
||||||
|
return { doc: out, inserted: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
const offset = opts.position === "after" ? 1 : 0;
|
||||||
|
|
||||||
|
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
|
||||||
|
// into the nearest enclosing table/tableRow rather than the anchor's direct
|
||||||
|
// parent, so the row/cell lands at the correct level of the table.
|
||||||
|
if (isStructural) {
|
||||||
|
const containerType = REQUIRED_CONTAINER[node.type];
|
||||||
|
const chain = findAnchorChain(out, opts);
|
||||||
|
// Anchor not resolved at all — keep the existing "anchor not found" path.
|
||||||
|
if (chain == null) return { doc: out, inserted: false };
|
||||||
|
|
||||||
|
// Find the DEEPEST ancestor (including the anchor itself) of the required
|
||||||
|
// container type.
|
||||||
|
let containerIdx = -1;
|
||||||
|
for (let i = chain.length - 1; i >= 0; i--) {
|
||||||
|
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
|
||||||
|
containerIdx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (containerIdx === -1) {
|
||||||
|
throw new Error(
|
||||||
|
`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
|
||||||
|
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
|
||||||
|
`that lives inside the target table.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const container = chain[containerIdx].node;
|
||||||
|
if (!Array.isArray(container.content)) container.content = [];
|
||||||
|
|
||||||
|
if (containerIdx === chain.length - 1) {
|
||||||
|
// The matched container IS the anchor node itself (e.g. anchorText
|
||||||
|
// resolved to the table block): append/prepend within it.
|
||||||
|
const at = opts.position === "after" ? container.content.length : 0;
|
||||||
|
container.content.splice(at, 0, fresh);
|
||||||
|
} else {
|
||||||
|
// The immediate child on the path leading to the anchor is the row/cell
|
||||||
|
// to splice next to.
|
||||||
|
const enclosingChildIndex = chain[containerIdx + 1].index;
|
||||||
|
container.content.splice(enclosingChildIndex + offset, 0, fresh);
|
||||||
|
}
|
||||||
|
return { doc: out, inserted: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve by id anywhere in the tree: splice into the parent content array.
|
||||||
|
if (opts.anchorNodeId != null) {
|
||||||
|
let inserted = false;
|
||||||
|
const walkContent = (content: any[]): void => {
|
||||||
|
for (let i = 0; i < content.length; i++) {
|
||||||
|
const child = content[i];
|
||||||
|
if (matchesId(child, opts.anchorNodeId as string)) {
|
||||||
|
content.splice(i + offset, 0, fresh);
|
||||||
|
inserted = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (isObject(child) && Array.isArray(child.content)) {
|
||||||
|
walkContent(child.content);
|
||||||
|
if (inserted) return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (isObject(out) && Array.isArray(out.content)) {
|
||||||
|
walkContent(out.content);
|
||||||
|
}
|
||||||
|
return { doc: out, inserted };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve by text: only top-level doc.content blocks are scanned.
|
||||||
|
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||||
|
for (let i = 0; i < out.content.length; i++) {
|
||||||
|
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
|
||||||
|
out.content.splice(i + offset, 0, fresh);
|
||||||
|
return { doc: out, inserted: true };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { doc: out, inserted: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// Table editing helpers
|
||||||
|
//
|
||||||
|
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
|
||||||
|
// table -> { type:"table", content:[tableRow...] }
|
||||||
|
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
|
||||||
|
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
|
||||||
|
// content:[paragraph...] }
|
||||||
|
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
|
||||||
|
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
|
||||||
|
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
|
||||||
|
// of the input doc (via `clone`) and never mutate their inputs.
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
|
||||||
|
* `makeFreshId` so generated paragraph ids never collide with existing ones.
|
||||||
|
*/
|
||||||
|
function collectIds(node: any, used: Set<string>): void {
|
||||||
|
if (!isObject(node)) return;
|
||||||
|
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
|
||||||
|
used.add(node.attrs.id);
|
||||||
|
}
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (const child of node.content) collectIds(child, used);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fresh-id generator: returns a random Docmost-style id (12 chars from
|
||||||
|
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
|
||||||
|
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
|
||||||
|
* exact string, so randomness is fine — and unlike a module-local counter it
|
||||||
|
* needs no reset and cannot become predictable across calls.
|
||||||
|
*/
|
||||||
|
function makeFreshId(used: Set<string>): string {
|
||||||
|
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
|
||||||
|
let id: string;
|
||||||
|
do {
|
||||||
|
id = "";
|
||||||
|
for (let i = 0; i < 12; i++) {
|
||||||
|
id += alphabet[Math.floor(Math.random() * alphabet.length)];
|
||||||
|
}
|
||||||
|
} while (used.has(id) || id === "");
|
||||||
|
used.add(id);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
|
||||||
|
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
|
||||||
|
* its index path. Returns null when no table matches.
|
||||||
|
*
|
||||||
|
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
|
||||||
|
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
|
||||||
|
* ancestor chain to the nearest `type === "table"` ancestor.
|
||||||
|
*/
|
||||||
|
function locateTable(
|
||||||
|
rootClone: any,
|
||||||
|
tableRef: string,
|
||||||
|
): { table: any; path: number[] } | null {
|
||||||
|
if (!isObject(rootClone)) return null;
|
||||||
|
|
||||||
|
// "#<n>": index into the top-level content array; must be a table.
|
||||||
|
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
|
||||||
|
if (indexMatch) {
|
||||||
|
const index = Number(indexMatch[1]);
|
||||||
|
const block = Array.isArray(rootClone.content)
|
||||||
|
? rootClone.content[index]
|
||||||
|
: undefined;
|
||||||
|
if (isObject(block) && block.type === "table") {
|
||||||
|
return { table: block, path: [index] };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
|
||||||
|
// climb to the nearest enclosing table.
|
||||||
|
const search = (
|
||||||
|
node: any,
|
||||||
|
trail: { node: any; index: number }[],
|
||||||
|
): { table: any; path: number[] } | null => {
|
||||||
|
if (!isObject(node)) return null;
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (let i = 0; i < node.content.length; i++) {
|
||||||
|
const child = node.content[i];
|
||||||
|
const here = [...trail, { node: child, index: i }];
|
||||||
|
if (matchesId(child, tableRef)) {
|
||||||
|
// Walk UP to the nearest table ancestor (including the match itself).
|
||||||
|
for (let j = here.length - 1; j >= 0; j--) {
|
||||||
|
if (isObject(here[j].node) && here[j].node.type === "table") {
|
||||||
|
return {
|
||||||
|
table: here[j].node,
|
||||||
|
path: here.slice(0, j + 1).map((e) => e.index),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null; // id found but no enclosing table
|
||||||
|
}
|
||||||
|
const hit = search(child, here);
|
||||||
|
if (hit != null) return hit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
};
|
||||||
|
|
||||||
|
return search(rootClone, []);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Build the plain-text → single-paragraph cell content used by all writers. */
|
||||||
|
function makeCellParagraph(id: string, text: string): any {
|
||||||
|
return {
|
||||||
|
type: "paragraph",
|
||||||
|
attrs: { id, indent: 0 },
|
||||||
|
// Empty string → a paragraph with an empty content array.
|
||||||
|
content: text ? [{ type: "text", text }] : [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
|
||||||
|
*
|
||||||
|
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
|
||||||
|
* Tables may be ragged (rows of differing length), so `cols` reflects only
|
||||||
|
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
|
||||||
|
* width.
|
||||||
|
* - `cells`: `string[][]` of each cell's `blockPlainText`.
|
||||||
|
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
|
||||||
|
* so callers can `patch_node` a cell for rich-formatted edits.
|
||||||
|
* - `path`: index path of the table within the doc.
|
||||||
|
*/
|
||||||
|
export function readTable(
|
||||||
|
doc: any,
|
||||||
|
tableRef: string,
|
||||||
|
): {
|
||||||
|
rows: number;
|
||||||
|
cols: number;
|
||||||
|
cells: string[][];
|
||||||
|
cellIds: (string | null)[][];
|
||||||
|
path: number[];
|
||||||
|
} | null {
|
||||||
|
const root = clone(doc);
|
||||||
|
const located = locateTable(root, tableRef);
|
||||||
|
if (located == null) return null;
|
||||||
|
const { table, path } = located;
|
||||||
|
|
||||||
|
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||||
|
const rows = rowNodes.length;
|
||||||
|
const cols = rowNodes[0]?.content?.length ?? 0;
|
||||||
|
|
||||||
|
const cells: string[][] = [];
|
||||||
|
const cellIds: (string | null)[][] = [];
|
||||||
|
for (const rowNode of rowNodes) {
|
||||||
|
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
|
||||||
|
const rowText: string[] = [];
|
||||||
|
const rowIds: (string | null)[] = [];
|
||||||
|
for (const cellNode of cellNodes) {
|
||||||
|
rowText.push(blockPlainText(cellNode));
|
||||||
|
// The cell's first paragraph carries the id used for patch_node.
|
||||||
|
const firstPara = Array.isArray(cellNode?.content)
|
||||||
|
? cellNode.content[0]
|
||||||
|
: undefined;
|
||||||
|
const id =
|
||||||
|
isObject(firstPara) && isObject(firstPara.attrs)
|
||||||
|
? firstPara.attrs.id ?? null
|
||||||
|
: null;
|
||||||
|
rowIds.push(id);
|
||||||
|
}
|
||||||
|
cells.push(rowText);
|
||||||
|
cellIds.push(rowIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { rows, cols, cells, cellIds, path };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
|
||||||
|
*
|
||||||
|
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
|
||||||
|
* MORE cells than columns throws. Each new cell copies `colwidth` for its
|
||||||
|
* column from the header row when present, gets a fresh-id paragraph, and a
|
||||||
|
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
|
||||||
|
* the row there; otherwise the row is appended at the end.
|
||||||
|
*/
|
||||||
|
export function insertTableRow(
|
||||||
|
doc: any,
|
||||||
|
tableRef: string,
|
||||||
|
cells: string[],
|
||||||
|
index?: number,
|
||||||
|
): { doc: any; inserted: boolean } {
|
||||||
|
const out = clone(doc);
|
||||||
|
const located = locateTable(out, tableRef);
|
||||||
|
if (located == null) return { doc: out, inserted: false };
|
||||||
|
const { table } = located;
|
||||||
|
|
||||||
|
if (!Array.isArray(table.content)) table.content = [];
|
||||||
|
const rows = table.content.length;
|
||||||
|
const headerRow = table.content[0];
|
||||||
|
const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
|
||||||
|
|
||||||
|
// Column count is the WIDEST existing row, so the guard below stays
|
||||||
|
// meaningful for ragged tables and the new row matches the table's width.
|
||||||
|
// Fall back to the supplied cell count only when the table has no rows.
|
||||||
|
let colCount = 0;
|
||||||
|
for (const r of table.content) {
|
||||||
|
if (isObject(r) && Array.isArray(r.content)) colCount = Math.max(colCount, r.content.length);
|
||||||
|
}
|
||||||
|
if (colCount === 0) colCount = Array.isArray(cells) ? cells.length : 0;
|
||||||
|
|
||||||
|
if (Array.isArray(cells) && cells.length > colCount) {
|
||||||
|
throw new Error(
|
||||||
|
`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the landing index up front so the cell-type decision and the splice
|
||||||
|
// below agree: a valid integer in [0, rows] splices there, else we append.
|
||||||
|
const landingIndex =
|
||||||
|
typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
|
||||||
|
? index
|
||||||
|
: rows;
|
||||||
|
|
||||||
|
// Seed the id generator with every id already in the doc so the new cell
|
||||||
|
// paragraph ids are unique within the whole document.
|
||||||
|
const used = new Set<string>();
|
||||||
|
collectIds(out, used);
|
||||||
|
|
||||||
|
const newCells: any[] = [];
|
||||||
|
for (let i = 0; i < colCount; i++) {
|
||||||
|
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
|
||||||
|
const attrs: Record<string, any> = { colspan: 1, rowspan: 1 };
|
||||||
|
// Copy this column's colwidth from the header row's cell when present.
|
||||||
|
const colwidth = headerCells[i]?.attrs?.colwidth;
|
||||||
|
if (colwidth !== undefined) attrs.colwidth = colwidth;
|
||||||
|
// A row landing at index 0 becomes the new header row, so inherit the
|
||||||
|
// current header cell's type per column (Docmost uses "tableHeader" there);
|
||||||
|
// every other position is a plain data cell.
|
||||||
|
const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
|
||||||
|
newCells.push({
|
||||||
|
type: cellType,
|
||||||
|
attrs,
|
||||||
|
content: [makeCellParagraph(makeFreshId(used), text)],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const newRow = { type: "tableRow", content: newCells };
|
||||||
|
|
||||||
|
// Splice at the resolved landing index (append when index was omitted/invalid).
|
||||||
|
table.content.splice(landingIndex, 0, newRow);
|
||||||
|
|
||||||
|
return { doc: out, inserted: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
|
||||||
|
* `deleted` is false only when the table cannot be located. Throws on an
|
||||||
|
* out-of-range index, and refuses to delete the table's only row.
|
||||||
|
*/
|
||||||
|
export function deleteTableRow(
|
||||||
|
doc: any,
|
||||||
|
tableRef: string,
|
||||||
|
index: number,
|
||||||
|
): { doc: any; deleted: boolean } {
|
||||||
|
const out = clone(doc);
|
||||||
|
const located = locateTable(out, tableRef);
|
||||||
|
if (located == null) return { doc: out, deleted: false };
|
||||||
|
const { table } = located;
|
||||||
|
|
||||||
|
if (!Array.isArray(table.content)) table.content = [];
|
||||||
|
const rows = table.content.length;
|
||||||
|
|
||||||
|
if (!Number.isInteger(index) || index < 0 || index >= rows) {
|
||||||
|
throw new Error(
|
||||||
|
`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (rows <= 1) {
|
||||||
|
throw new Error(
|
||||||
|
"table_delete_row: refusing to delete the only row of the table",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
table.content.splice(index, 1);
|
||||||
|
return { doc: out, deleted: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
|
||||||
|
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
|
||||||
|
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
|
||||||
|
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
|
||||||
|
* that reuses the cell's existing first-paragraph id when present, else a fresh
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
export function updateTableCell(
|
||||||
|
doc: any,
|
||||||
|
tableRef: string,
|
||||||
|
row: number,
|
||||||
|
col: number,
|
||||||
|
text: string,
|
||||||
|
): { doc: any; updated: boolean } {
|
||||||
|
const out = clone(doc);
|
||||||
|
const located = locateTable(out, tableRef);
|
||||||
|
if (located == null) return { doc: out, updated: false };
|
||||||
|
const { table } = located;
|
||||||
|
|
||||||
|
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||||
|
const rows = rowNodes.length;
|
||||||
|
const rowNode = rowNodes[row];
|
||||||
|
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
|
||||||
|
? rowNode.content.length
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
if (
|
||||||
|
!Number.isInteger(row) ||
|
||||||
|
row < 0 ||
|
||||||
|
row >= rows ||
|
||||||
|
!Number.isInteger(col) ||
|
||||||
|
col < 0 ||
|
||||||
|
col >= cols
|
||||||
|
) {
|
||||||
|
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cellNode = rowNode.content[col];
|
||||||
|
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
|
||||||
|
const existingPara = Array.isArray(cellNode?.content)
|
||||||
|
? cellNode.content[0]
|
||||||
|
: undefined;
|
||||||
|
let id =
|
||||||
|
isObject(existingPara) && isObject(existingPara.attrs)
|
||||||
|
? existingPara.attrs.id
|
||||||
|
: undefined;
|
||||||
|
if (typeof id !== "string" || id.length === 0) {
|
||||||
|
const used = new Set<string>();
|
||||||
|
collectIds(out, used);
|
||||||
|
id = makeFreshId(used);
|
||||||
|
}
|
||||||
|
|
||||||
|
cellNode.content = [makeCellParagraph(id, text)];
|
||||||
|
return { doc: out, updated: true };
|
||||||
|
}
|
||||||
39
packages/docmost-client/src/lib/page-lock.ts
Normal file
39
packages/docmost-client/src/lib/page-lock.ts
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
/**
|
||||||
|
* Per-page async mutex.
|
||||||
|
*
|
||||||
|
* Content writes over the collaboration websocket must never overlap for the
|
||||||
|
* same page: two concurrent full-document replaces would race on the live Yjs
|
||||||
|
* fragment. We serialize them with a per-pageId promise chain — each new
|
||||||
|
* operation waits for the previous one on that page to settle (success or
|
||||||
|
* failure) before it runs. Different pages never block each other.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const chains = new Map<string, Promise<unknown>>();
|
||||||
|
|
||||||
|
// The returned promise carries the real result/rejection of `fn` and MUST be
|
||||||
|
// awaited/handled by the caller; only the internal chaining tail swallows
|
||||||
|
// errors (purely to gate ordering).
|
||||||
|
export function withPageLock<T>(
|
||||||
|
pageId: string,
|
||||||
|
fn: () => Promise<T>,
|
||||||
|
): Promise<T> {
|
||||||
|
// Wait for the previous op on this page; swallow its error so a failure does
|
||||||
|
// not poison the queue for the next caller.
|
||||||
|
const prev = (chains.get(pageId) ?? Promise.resolve()).catch(() => {});
|
||||||
|
const run = prev.then(fn);
|
||||||
|
|
||||||
|
// The tail used for chaining must also swallow errors (it only gates order).
|
||||||
|
const tail = run.catch(() => {});
|
||||||
|
chains.set(pageId, tail);
|
||||||
|
|
||||||
|
// Drop the map entry once this op is the tail and has settled, to avoid an
|
||||||
|
// unbounded map of resolved promises.
|
||||||
|
tail.then(() => {
|
||||||
|
if (chains.get(pageId) === tail) {
|
||||||
|
chains.delete(pageId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Callers get the real result/rejection of fn.
|
||||||
|
return run;
|
||||||
|
}
|
||||||
477
packages/docmost-client/src/lib/transforms.ts
Normal file
477
packages/docmost-client/src/lib/transforms.ts
Normal file
@@ -0,0 +1,477 @@
|
|||||||
|
/**
|
||||||
|
* Pure, network-free transform primitives for a ProseMirror/TipTap document
|
||||||
|
* tree, plus one higher-level orchestration (commentsToFootnotes).
|
||||||
|
*
|
||||||
|
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||||
|
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||||
|
* `content` array; callouts, tables, lists all hold their children in
|
||||||
|
* `content`, so a single recursive walk reaches them all.
|
||||||
|
*
|
||||||
|
* Conventions (matching node-ops.ts):
|
||||||
|
* - functions that produce a new document deep-clone their input and return a
|
||||||
|
* `{ doc, ... }` object; the caller's objects are never mutated.
|
||||||
|
* - functions are defensively null-safe.
|
||||||
|
* - `marks` arrays are preserved verbatim when fragments are split/reordered.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { blockPlainText } from "./node-ops.js";
|
||||||
|
|
||||||
|
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||||
|
function clone<T>(value: T): T {
|
||||||
|
if (typeof structuredClone === "function") {
|
||||||
|
return structuredClone(value);
|
||||||
|
}
|
||||||
|
// Fallback for environments without structuredClone.
|
||||||
|
return JSON.parse(JSON.stringify(value)) as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if `value` is a non-null object (and not an array). */
|
||||||
|
function isObject(value: any): value is Record<string, any> {
|
||||||
|
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Plain text of a node (re-export of node-ops' blockPlainText so transform
|
||||||
|
* authors have a single import surface). Recurses through nested content.
|
||||||
|
*/
|
||||||
|
export function blockText(node: any): string {
|
||||||
|
return blockPlainText(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Depth-first visit of every node in the tree, including the root and the
|
||||||
|
* nested content of callouts, tables, lists, etc. `fn` is called once per node.
|
||||||
|
* Null-safe: a nullish or non-object node is ignored.
|
||||||
|
*/
|
||||||
|
export function walk(node: any, fn: (node: any) => void): void {
|
||||||
|
if (!isObject(node)) return;
|
||||||
|
fn(node);
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
for (const child of node.content) {
|
||||||
|
walk(child, fn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
|
||||||
|
* Works even when the node carries no `attrs.id` (it searches the raw tree, not
|
||||||
|
* an id index). Returns the live node reference inside `doc` (NOT a clone), or
|
||||||
|
* null when nothing matches. Typical use: `getList(doc, n => n.type ===
|
||||||
|
* "orderedList")`.
|
||||||
|
*/
|
||||||
|
export function getList(
|
||||||
|
doc: any,
|
||||||
|
predicate: (node: any) => boolean,
|
||||||
|
): any | null {
|
||||||
|
let found: any | null = null;
|
||||||
|
walk(doc, (node) => {
|
||||||
|
if (found == null && predicate(node)) {
|
||||||
|
found = node;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Options for insertMarkerAfter. */
|
||||||
|
export interface InsertMarkerOptions {
|
||||||
|
/**
|
||||||
|
* Limit the search to TOP-LEVEL blocks with index < beforeBlock. Used to keep
|
||||||
|
* footnote markers in the body and out of the notes section.
|
||||||
|
*/
|
||||||
|
beforeBlock?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert `marker` as a PLAIN (unmarked) text run right after the first
|
||||||
|
* occurrence of `anchor`.
|
||||||
|
*
|
||||||
|
* The text run that contains the END of the anchor is SPLIT at the anchor end,
|
||||||
|
* so all existing marks (links, bold, ...) on the surrounding text are
|
||||||
|
* preserved, while the inserted marker run carries NO marks. The marker is
|
||||||
|
* inserted as a leading-space-padded run (`" " + marker`) so it visually
|
||||||
|
* separates from the preceding word.
|
||||||
|
*
|
||||||
|
* The anchor is matched against the concatenated plain text of each top-level
|
||||||
|
* block (so an anchor that spans several text/mark runs still matches). The
|
||||||
|
* insertion happens inside the inline content array that holds the anchor's
|
||||||
|
* final character.
|
||||||
|
*
|
||||||
|
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||||
|
* false when the anchor text was not found in any in-scope block.
|
||||||
|
*/
|
||||||
|
export function insertMarkerAfter(
|
||||||
|
doc: any,
|
||||||
|
anchor: string,
|
||||||
|
marker: string,
|
||||||
|
opts: InsertMarkerOptions = {},
|
||||||
|
): { doc: any; inserted: boolean } {
|
||||||
|
const out = clone(doc);
|
||||||
|
if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
|
||||||
|
return { doc: out, inserted: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
const limit =
|
||||||
|
typeof opts.beforeBlock === "number"
|
||||||
|
? Math.min(opts.beforeBlock, out.content.length)
|
||||||
|
: out.content.length;
|
||||||
|
|
||||||
|
for (let b = 0; b < limit; b++) {
|
||||||
|
const block = out.content[b];
|
||||||
|
if (!isObject(block)) continue;
|
||||||
|
// Quick reject: skip blocks whose plain text cannot contain the anchor.
|
||||||
|
if (!blockPlainText(block).includes(anchor)) continue;
|
||||||
|
|
||||||
|
// Walk the inline content arrays inside this block, tracking a running
|
||||||
|
// character offset so we can locate the inline array + text run that holds
|
||||||
|
// the END of the anchor's first occurrence.
|
||||||
|
let inserted = false;
|
||||||
|
let offset = 0; // characters of plain text seen so far in this block
|
||||||
|
const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
|
||||||
|
|
||||||
|
// Recurse into inline-bearing containers (paragraph, heading, table cell,
|
||||||
|
// callout child paragraphs, ...). We only split inside an array of inline
|
||||||
|
// nodes (text/inline atoms); the FIRST array whose cumulative range covers
|
||||||
|
// anchorEnd receives the split + marker.
|
||||||
|
const visit = (container: any): void => {
|
||||||
|
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const inline = container.content;
|
||||||
|
// Detect whether this array is an inline array (contains text nodes).
|
||||||
|
const hasText = inline.some(
|
||||||
|
(n: any) => isObject(n) && n.type === "text",
|
||||||
|
);
|
||||||
|
if (hasText) {
|
||||||
|
for (let i = 0; i < inline.length; i++) {
|
||||||
|
const n = inline[i];
|
||||||
|
const len = isObject(n) ? blockPlainText(n).length : 0;
|
||||||
|
const runStart = offset;
|
||||||
|
const runEnd = offset + len;
|
||||||
|
// The run that contains the anchor end (anchorEnd lands inside this
|
||||||
|
// run, i.e. runStart < anchorEnd <= runEnd) is the split point.
|
||||||
|
if (
|
||||||
|
!inserted &&
|
||||||
|
isObject(n) &&
|
||||||
|
n.type === "text" &&
|
||||||
|
typeof n.text === "string" &&
|
||||||
|
anchorEnd > runStart &&
|
||||||
|
anchorEnd <= runEnd
|
||||||
|
) {
|
||||||
|
const cut = anchorEnd - runStart; // split index within this text run
|
||||||
|
const before = n.text.slice(0, cut);
|
||||||
|
const after = n.text.slice(cut);
|
||||||
|
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||||
|
const parts: any[] = [];
|
||||||
|
if (before.length > 0) {
|
||||||
|
parts.push({ ...n, text: before, marks: [...marks] });
|
||||||
|
}
|
||||||
|
// Marker is a PLAIN run: no marks copied. Leading space separates it.
|
||||||
|
parts.push({ type: "text", text: " " + marker });
|
||||||
|
if (after.length > 0) {
|
||||||
|
parts.push({ ...n, text: after, marks: [...marks] });
|
||||||
|
}
|
||||||
|
inline.splice(i, 1, ...parts);
|
||||||
|
inserted = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
offset = runEnd;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Not an inline array: recurse into children (e.g. callout -> paragraph).
|
||||||
|
for (const child of inline) {
|
||||||
|
visit(child);
|
||||||
|
if (inserted) return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
visit(block);
|
||||||
|
if (inserted) {
|
||||||
|
return { doc: out, inserted: true };
|
||||||
|
}
|
||||||
|
// If the block matched in plain text but we could not split (e.g. anchor
|
||||||
|
// lands inside an atom), fall through to the next block rather than failing.
|
||||||
|
}
|
||||||
|
|
||||||
|
return { doc: out, inserted: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
|
||||||
|
*
|
||||||
|
* Docmost translations use a callout that states the footnote range, e.g.
|
||||||
|
* "[1]…[5]". When the number of notes changes, this rewrites the trailing
|
||||||
|
* number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
|
||||||
|
* callout's text nodes to `[1]…[n]`. Operates on a clone; returns
|
||||||
|
* `{ doc, changed }` where `changed` is the number of text nodes rewritten.
|
||||||
|
*/
|
||||||
|
export function setCalloutRange(
|
||||||
|
doc: any,
|
||||||
|
n: number,
|
||||||
|
): { doc: any; changed: number } {
|
||||||
|
const out = clone(doc);
|
||||||
|
let changed = 0;
|
||||||
|
// Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
|
||||||
|
const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
|
||||||
|
walk(out, (node) => {
|
||||||
|
if (node.type === "callout") {
|
||||||
|
walk(node, (inner) => {
|
||||||
|
if (
|
||||||
|
inner.type === "text" &&
|
||||||
|
typeof inner.text === "string" &&
|
||||||
|
rangeRe.test(inner.text)
|
||||||
|
) {
|
||||||
|
rangeRe.lastIndex = 0;
|
||||||
|
inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
|
||||||
|
changed++;
|
||||||
|
}
|
||||||
|
rangeRe.lastIndex = 0;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return { doc: out, changed };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
|
||||||
|
* a base36 random string is sufficient here (uniqueness within one document).
|
||||||
|
*/
|
||||||
|
function freshId(): string {
|
||||||
|
return (
|
||||||
|
Math.random().toString(36).slice(2, 12) +
|
||||||
|
Math.random().toString(36).slice(2, 6)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrap inline ProseMirror nodes in a list item:
|
||||||
|
* { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
|
||||||
|
* with a fresh random block id on the paragraph. The inline nodes are cloned so
|
||||||
|
* the result shares no references with the caller's input.
|
||||||
|
*/
|
||||||
|
export function noteItem(inlineNodes: any[]): any {
|
||||||
|
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
|
||||||
|
return {
|
||||||
|
type: "listItem",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "paragraph",
|
||||||
|
attrs: { id: freshId() },
|
||||||
|
content,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
|
||||||
|
* ProseMirror nodes.
|
||||||
|
*
|
||||||
|
* A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
|
||||||
|
* stripped first. Then a minimal bold-split is applied: a leading
|
||||||
|
* `**bold lead**` run becomes a text node with a bold mark, and the remainder
|
||||||
|
* becomes a plain text node. This keeps the conversion synchronous (the
|
||||||
|
* transform sandbox runs synchronously) and dependency-free; the existing
|
||||||
|
* async markdownToProseMirror is intentionally NOT used here.
|
||||||
|
*/
|
||||||
|
export function mdToInlineNodes(markdown: string): any[] {
|
||||||
|
let md = typeof markdown === "string" ? markdown : "";
|
||||||
|
// Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
|
||||||
|
md = md.replace(/^\s*комментарий\s*:\s*/i, "");
|
||||||
|
md = md.replace(/^\s*\d+\.\s+/, "");
|
||||||
|
md = md.trim();
|
||||||
|
|
||||||
|
if (md === "") return [];
|
||||||
|
|
||||||
|
const nodes: any[] = [];
|
||||||
|
// Leading bold lead: **...** at the very start.
|
||||||
|
const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
|
||||||
|
if (leadMatch) {
|
||||||
|
const leadText = leadMatch[1];
|
||||||
|
nodes.push({
|
||||||
|
type: "text",
|
||||||
|
text: leadText,
|
||||||
|
marks: [{ type: "bold" }],
|
||||||
|
});
|
||||||
|
const rest = md.slice(leadMatch[0].length);
|
||||||
|
if (rest.length > 0) {
|
||||||
|
// Preserve the separating space that followed the bold lead.
|
||||||
|
const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
|
||||||
|
const spacing = sep ? sep[1] : "";
|
||||||
|
nodes.push({ type: "text", text: spacing + rest });
|
||||||
|
}
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No bold lead: emit the whole thing as a single plain text node, with any
|
||||||
|
// remaining **bold** spans split out inline.
|
||||||
|
return splitInlineBold(md);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Split a string with inline `**bold**` spans into text nodes, bolding the
|
||||||
|
* spans. Used as the no-lead fallback in mdToInlineNodes.
|
||||||
|
*/
|
||||||
|
function splitInlineBold(text: string): any[] {
|
||||||
|
const nodes: any[] = [];
|
||||||
|
const re = /\*\*([^*]+)\*\*/g;
|
||||||
|
let last = 0;
|
||||||
|
let m: RegExpExecArray | null;
|
||||||
|
while ((m = re.exec(text)) !== null) {
|
||||||
|
if (m.index > last) {
|
||||||
|
nodes.push({ type: "text", text: text.slice(last, m.index) });
|
||||||
|
}
|
||||||
|
nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
|
||||||
|
last = m.index + m[0].length;
|
||||||
|
}
|
||||||
|
if (last < text.length) {
|
||||||
|
nodes.push({ type: "text", text: text.slice(last) });
|
||||||
|
}
|
||||||
|
return nodes.length > 0 ? nodes : [{ type: "text", text }];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Options for commentsToFootnotes. */
|
||||||
|
export interface CommentsToFootnotesOptions {
|
||||||
|
/** Heading text under which the notes orderedList lives. */
|
||||||
|
notesHeading?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A comment shape as returned by DocmostClient.listComments. */
|
||||||
|
export interface FootnoteComment {
|
||||||
|
id: string;
|
||||||
|
content: string;
|
||||||
|
selection?: string | null;
|
||||||
|
[k: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Turn inline comments into numbered footnotes.
|
||||||
|
*
|
||||||
|
* For each inline comment that carries a `selection`:
|
||||||
|
* 1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
|
||||||
|
* sentinel) right after the selection text in the BODY (before the
|
||||||
|
* notes heading);
|
||||||
|
* 2. build a note list item from the comment's markdown content.
|
||||||
|
*
|
||||||
|
* Then RENUMBER every footnote marker in the body by reading order: existing
|
||||||
|
* `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
|
||||||
|
* sequential `[seq]`, and the notes orderedList is reordered so each note lines
|
||||||
|
* up with its marker's reading-order position. Finally the disclaimer callout
|
||||||
|
* range is synced to the new note count.
|
||||||
|
*
|
||||||
|
* Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
|
||||||
|
* were successfully anchored (their selection was found and a placeholder
|
||||||
|
* inserted). Operates on a clone of `doc`.
|
||||||
|
*/
|
||||||
|
export function commentsToFootnotes(
|
||||||
|
doc: any,
|
||||||
|
comments: FootnoteComment[],
|
||||||
|
opts: CommentsToFootnotesOptions = {},
|
||||||
|
): { doc: any; consumed: string[] } {
|
||||||
|
let working = clone(doc);
|
||||||
|
const notesHeading = opts.notesHeading ?? "Примечания переводчика";
|
||||||
|
|
||||||
|
const top: any[] = Array.isArray(working.content) ? working.content : [];
|
||||||
|
const notesIdx = top.findIndex(
|
||||||
|
(n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading,
|
||||||
|
);
|
||||||
|
if (notesIdx < 0) {
|
||||||
|
throw new Error(`heading "${notesHeading}" not found`);
|
||||||
|
}
|
||||||
|
// The notes orderedList lives at or after the heading.
|
||||||
|
const notesList = top
|
||||||
|
.slice(notesIdx)
|
||||||
|
.find((n) => isObject(n) && n.type === "orderedList");
|
||||||
|
if (!notesList) {
|
||||||
|
throw new Error("notes orderedList not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
const consumed: string[] = [];
|
||||||
|
const noteByPh = new Map<string, any>();
|
||||||
|
|
||||||
|
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
|
||||||
|
if (!c || !c.selection) return;
|
||||||
|
// Collision-proof sentinel delimited by NUL control chars, which never occur
|
||||||
|
// in real Docmost prose — so the renumber regex below cannot mistake any body
|
||||||
|
// text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
|
||||||
|
// transient: the placeholder round-trips within this function (insertMarkerAfter
|
||||||
|
// inserts it, the renumber pass replaces it with "[N]"), so it never persists
|
||||||
|
// in a returned/pushed document.
|
||||||
|
const ph = `\u0000FN${i}\u0000`;
|
||||||
|
// insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh
|
||||||
|
// the `top` / `notesList` references that point into it.
|
||||||
|
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
|
||||||
|
beforeBlock: notesIdx,
|
||||||
|
});
|
||||||
|
if (!r.inserted) return;
|
||||||
|
working = r.doc;
|
||||||
|
noteByPh.set(ph, noteItem(mdToInlineNodes(c.content)));
|
||||||
|
consumed.push(c.id);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Re-resolve references into the (possibly re-cloned) working doc.
|
||||||
|
const top2: any[] = Array.isArray(working.content) ? working.content : [];
|
||||||
|
const notesList2 = top2
|
||||||
|
.slice(notesIdx)
|
||||||
|
.find((n) => isObject(n) && n.type === "orderedList");
|
||||||
|
if (!notesList2) {
|
||||||
|
throw new Error("notes orderedList not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
const oldNotes: any[] = Array.isArray(notesList2.content)
|
||||||
|
? notesList2.content
|
||||||
|
: [];
|
||||||
|
const newNotes: any[] = [];
|
||||||
|
let seq = 0;
|
||||||
|
// Match either an existing "[N]" marker or a NUL-delimited "\u0000FN<i>\u0000"
|
||||||
|
// placeholder, in reading order across the body (blocks before the notes heading).
|
||||||
|
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
|
||||||
|
// Same range regex setCalloutRange uses to detect the disclaimer callout's
|
||||||
|
// "[1]…[K]" range; used here to decide whether a top-level callout is the
|
||||||
|
// disclaimer (skip) or an ordinary callout (renumber normally).
|
||||||
|
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
|
||||||
|
for (let i = 0; i < notesIdx; i++) {
|
||||||
|
// Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote
|
||||||
|
// marker and is synced separately by setCalloutRange. Renumbering it here
|
||||||
|
// would consume note slots and corrupt the sequence. Other top-level
|
||||||
|
// callouts may carry legitimate "[N]" body markers and are renumbered.
|
||||||
|
if (
|
||||||
|
isObject(top2[i]) &&
|
||||||
|
top2[i].type === "callout" &&
|
||||||
|
disclaimerRangeRe.test(blockText(top2[i]))
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
walk(top2[i], (node) => {
|
||||||
|
if (node.type !== "text" || typeof node.text !== "string") return;
|
||||||
|
node.text = node.text.replace(re, (_m: string, oldNum: string, phIdx: string) => {
|
||||||
|
if (oldNum != null) {
|
||||||
|
const note = oldNotes[Number(oldNum) - 1];
|
||||||
|
// Every existing body marker MUST map to a real note. An out-of-range
|
||||||
|
// marker means the document is internally inconsistent; fail loudly
|
||||||
|
// rather than silently dropping the note and desyncing the callout.
|
||||||
|
if (note === undefined) {
|
||||||
|
throw new Error(
|
||||||
|
`footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
newNotes.push(note);
|
||||||
|
} else {
|
||||||
|
newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`));
|
||||||
|
}
|
||||||
|
return `[${++seq}]`;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reorder the notes list IN PLACE on `working` first, THEN sync the callout
|
||||||
|
// range. setCalloutRange clones `working`, so the reordered notes (mutated
|
||||||
|
// before the clone) are carried into its result automatically. No null-filter
|
||||||
|
// here: marker count and note count must stay exactly equal (the out-of-range
|
||||||
|
// guard above guarantees no undefined entry is ever pushed).
|
||||||
|
notesList2.content = newNotes;
|
||||||
|
const synced = setCalloutRange(working, notesList2.content.length);
|
||||||
|
|
||||||
|
return { doc: synced.doc, consumed };
|
||||||
|
}
|
||||||
8
packages/docmost-client/tsconfig.json
Normal file
8
packages/docmost-client/tsconfig.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.base.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
},
|
||||||
|
"include": ["src/**/*"]
|
||||||
|
}
|
||||||
137
src/pull.ts
Normal file
137
src/pull.ts
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
/**
|
||||||
|
* Read-only Docmost -> filesystem mirror (SPEC §6 pull, Phase 1).
|
||||||
|
*
|
||||||
|
* Walks the configured space's page tree and writes one self-contained `.md`
|
||||||
|
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
|
||||||
|
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
|
||||||
|
* local files. The meta block inside each file carries pageId/slugId/
|
||||||
|
* parentPageId (identity), so no external map file is needed.
|
||||||
|
*
|
||||||
|
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
||||||
|
* be correct, but is not expected to be run without live access.
|
||||||
|
*
|
||||||
|
* Run via: npm run pull (-> node build/pull.js)
|
||||||
|
*/
|
||||||
|
import { mkdir, writeFile } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { pathToFileURL } from "node:url";
|
||||||
|
import { DocmostClient } from "docmost-client";
|
||||||
|
import { loadSettings } from "./settings.js";
|
||||||
|
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||||
|
|
||||||
|
/** Flat page node as returned by listAllSpacePages (no content). */
|
||||||
|
interface PageNode {
|
||||||
|
id: string;
|
||||||
|
title?: string;
|
||||||
|
slugId?: string;
|
||||||
|
parentPageId?: string | null;
|
||||||
|
hasChildren?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute a deterministic, collision-free folder/file name for a node among its
|
||||||
|
* siblings. `usedBySibling` maps a parent key -> set of names already taken, so
|
||||||
|
* two siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||||
|
* (SPEC §12). The name is COSMETIC; identity lives in the file's meta block.
|
||||||
|
*/
|
||||||
|
function nameForNode(
|
||||||
|
node: PageNode,
|
||||||
|
usedBySibling: Map<string, Set<string>>,
|
||||||
|
): string {
|
||||||
|
const parentKey = node.parentPageId ?? "__root__";
|
||||||
|
let used = usedBySibling.get(parentKey);
|
||||||
|
if (!used) {
|
||||||
|
used = new Set<string>();
|
||||||
|
usedBySibling.set(parentKey, used);
|
||||||
|
}
|
||||||
|
|
||||||
|
let name = sanitizeTitle(node.title ?? "");
|
||||||
|
if (used.has(name)) {
|
||||||
|
// Sibling collision: disambiguate with the stable slugId (fall back to the
|
||||||
|
// pageId if no slugId is present).
|
||||||
|
name = disambiguate(name, node.slugId ?? node.id);
|
||||||
|
}
|
||||||
|
used.add(name);
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const s = loadSettings();
|
||||||
|
const client = new DocmostClient(
|
||||||
|
s.docmostApiUrl,
|
||||||
|
s.docmostEmail,
|
||||||
|
s.docmostPassword,
|
||||||
|
);
|
||||||
|
|
||||||
|
const spaceId = s.docmostSpaceId;
|
||||||
|
const vaultRoot = s.vaultPath;
|
||||||
|
|
||||||
|
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
|
||||||
|
|
||||||
|
// Index pages by id so the parent chain can be walked.
|
||||||
|
const byId = new Map<string, PageNode>();
|
||||||
|
for (const p of pages) {
|
||||||
|
if (p && p.id) byId.set(p.id, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve each node's display name once, deterministically, tracking sibling
|
||||||
|
// collisions per parent.
|
||||||
|
const usedBySibling = new Map<string, Set<string>>();
|
||||||
|
const nameById = new Map<string, string>();
|
||||||
|
for (const p of pages) {
|
||||||
|
if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the folder path for a page by walking parentPageId to the root. The
|
||||||
|
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||||
|
// guard prevents an infinite loop on a malformed parent cycle.
|
||||||
|
const folderSegmentsFor = (node: PageNode): string[] => {
|
||||||
|
const ancestors: string[] = [];
|
||||||
|
const visited = new Set<string>();
|
||||||
|
let current: PageNode | undefined = node.parentPageId
|
||||||
|
? byId.get(node.parentPageId)
|
||||||
|
: undefined;
|
||||||
|
while (current && current.id && !visited.has(current.id)) {
|
||||||
|
visited.add(current.id);
|
||||||
|
ancestors.unshift(
|
||||||
|
nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""),
|
||||||
|
);
|
||||||
|
current = current.parentPageId
|
||||||
|
? byId.get(current.parentPageId)
|
||||||
|
: undefined;
|
||||||
|
}
|
||||||
|
return ancestors;
|
||||||
|
};
|
||||||
|
|
||||||
|
let written = 0;
|
||||||
|
for (const p of pages) {
|
||||||
|
if (!p || !p.id) continue;
|
||||||
|
const segments = folderSegmentsFor(p);
|
||||||
|
const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? "");
|
||||||
|
const dir = join(vaultRoot, ...segments);
|
||||||
|
await mkdir(dir, { recursive: true });
|
||||||
|
|
||||||
|
// Body + meta only (no comments block) — SPEC §3.
|
||||||
|
const fileMd = await client.exportPageBody(p.id);
|
||||||
|
await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8");
|
||||||
|
written++;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
||||||
|
// module is imported (e.g. by a unit test importing sanitizeTitle / path
|
||||||
|
// helpers), so the import does not trigger loadSettings() + process.exit.
|
||||||
|
const invokedDirectly =
|
||||||
|
typeof process.argv[1] === "string" &&
|
||||||
|
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||||
|
|
||||||
|
if (invokedDirectly) {
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error("pull failed:", err instanceof Error ? err.stack : err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
222
src/roundtrip.ts
Normal file
222
src/roundtrip.ts
Normal file
@@ -0,0 +1,222 @@
|
|||||||
|
/**
|
||||||
|
* Phase-0 idempotency harness (SPEC §11, "Задача №0").
|
||||||
|
*
|
||||||
|
* git diffs byte-for-byte, so a non-deterministic export would make every pull
|
||||||
|
* produce a phantom diff -> endless commits/conflicts. Before the auto two-way
|
||||||
|
* mode can be enabled the round-trip `export -> import -> export` MUST be
|
||||||
|
* stable. This CLI proves (or disproves) that property on a given document.
|
||||||
|
*
|
||||||
|
* Usage (offline, the default for CI):
|
||||||
|
* node build/roundtrip.js --fixture path/to/doc.json
|
||||||
|
*
|
||||||
|
* Usage (live — needs a .env with real Docmost creds):
|
||||||
|
* node build/roundtrip.js --page <pageId>
|
||||||
|
*
|
||||||
|
* Exit code: 0 when the markdown is byte-stable, 1 on any markdown mismatch (so
|
||||||
|
* it is CI-able). A non-empty document-level divergence (after stripping block
|
||||||
|
* ids) is reported but does NOT fail the run — the converter reconstructs
|
||||||
|
* schema default attrs, a KNOWN finding per SPEC §11.
|
||||||
|
*/
|
||||||
|
import { readFile } from "node:fs/promises";
|
||||||
|
import { pathToFileURL } from "node:url";
|
||||||
|
import {
|
||||||
|
DocmostClient,
|
||||||
|
convertProseMirrorToMarkdown,
|
||||||
|
markdownToProseMirror,
|
||||||
|
} from "docmost-client";
|
||||||
|
import { loadSettings } from "./settings.js";
|
||||||
|
|
||||||
|
// Default fixture used when no --fixture/--page is given (offline CI path).
|
||||||
|
const DEFAULT_FIXTURE = "test/fixtures/sample-doc.json";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||||
|
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||||
|
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||||
|
* tree; the input is not mutated.
|
||||||
|
*/
|
||||||
|
export function stripBlockIds(node: any): any {
|
||||||
|
if (Array.isArray(node)) {
|
||||||
|
return node.map(stripBlockIds);
|
||||||
|
}
|
||||||
|
if (node && typeof node === "object") {
|
||||||
|
const out: any = {};
|
||||||
|
for (const key of Object.keys(node)) {
|
||||||
|
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||||
|
// Drop the `id` attr; keep every other attribute.
|
||||||
|
const { id, ...rest } = node.attrs as Record<string, unknown>;
|
||||||
|
void id;
|
||||||
|
out.attrs = stripBlockIds(rest);
|
||||||
|
} else {
|
||||||
|
out[key] = stripBlockIds(node[key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ParsedArgs {
|
||||||
|
fixture?: string;
|
||||||
|
page?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(argv: string[]): ParsedArgs {
|
||||||
|
const args: ParsedArgs = {};
|
||||||
|
for (let i = 0; i < argv.length; i++) {
|
||||||
|
const a = argv[i];
|
||||||
|
if (a === "--fixture") {
|
||||||
|
args.fixture = argv[++i];
|
||||||
|
} else if (a === "--page") {
|
||||||
|
args.page = argv[++i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Load the source ProseMirror doc from a fixture file or a live page. */
|
||||||
|
async function loadDoc(args: ParsedArgs): Promise<any> {
|
||||||
|
if (args.page) {
|
||||||
|
// Live mode: read the page's ProseMirror content from Docmost.
|
||||||
|
const s = loadSettings();
|
||||||
|
const client = new DocmostClient(
|
||||||
|
s.docmostApiUrl,
|
||||||
|
s.docmostEmail,
|
||||||
|
s.docmostPassword,
|
||||||
|
);
|
||||||
|
const page = await client.getPageJson(args.page);
|
||||||
|
return page.content;
|
||||||
|
}
|
||||||
|
// Offline mode: read a ProseMirror doc JSON from a fixture (default applies
|
||||||
|
// when no flag is given).
|
||||||
|
const fixture = args.fixture ?? DEFAULT_FIXTURE;
|
||||||
|
const raw = await readFile(fixture, "utf8");
|
||||||
|
return JSON.parse(raw);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the first divergence between two values via a recursive deep compare.
|
||||||
|
* Returns a short path + the two differing values, or null if they are equal.
|
||||||
|
*/
|
||||||
|
function firstDivergence(
|
||||||
|
a: any,
|
||||||
|
b: any,
|
||||||
|
path = "$",
|
||||||
|
): { path: string; a: any; b: any } | null {
|
||||||
|
if (a === b) return null;
|
||||||
|
|
||||||
|
const ta = typeof a;
|
||||||
|
const tb = typeof b;
|
||||||
|
if (ta !== tb || a === null || b === null) {
|
||||||
|
return { path, a, b };
|
||||||
|
}
|
||||||
|
if (ta !== "object") {
|
||||||
|
return { path, a, b };
|
||||||
|
}
|
||||||
|
|
||||||
|
const aIsArr = Array.isArray(a);
|
||||||
|
const bIsArr = Array.isArray(b);
|
||||||
|
if (aIsArr !== bIsArr) return { path, a, b };
|
||||||
|
|
||||||
|
if (aIsArr) {
|
||||||
|
if (a.length !== b.length) {
|
||||||
|
return { path: `${path}.length`, a: a.length, b: b.length };
|
||||||
|
}
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
const d = firstDivergence(a[i], b[i], `${path}[${i}]`);
|
||||||
|
if (d) return d;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
|
||||||
|
for (const k of keys) {
|
||||||
|
const d = firstDivergence(a[k], b[k], `${path}.${k}`);
|
||||||
|
if (d) return d;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const args = parseArgs(process.argv.slice(2));
|
||||||
|
const doc = await loadDoc(args);
|
||||||
|
|
||||||
|
const source = args.page
|
||||||
|
? `page ${args.page}`
|
||||||
|
: `fixture ${args.fixture ?? DEFAULT_FIXTURE}`;
|
||||||
|
console.log(`Round-trip harness — source: ${source}`);
|
||||||
|
|
||||||
|
// export -> import -> export
|
||||||
|
const md1 = convertProseMirrorToMarkdown(doc);
|
||||||
|
const doc2 = await markdownToProseMirror(md1);
|
||||||
|
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||||
|
|
||||||
|
// 1) The byte-stable markdown property git actually needs.
|
||||||
|
const markdownStable = md1 === md2;
|
||||||
|
|
||||||
|
// 2) Semantic comparison of the documents with block ids stripped (they are
|
||||||
|
// regenerated on import, per SPEC §11).
|
||||||
|
const normDoc = stripBlockIds(doc);
|
||||||
|
const normDoc2 = stripBlockIds(doc2);
|
||||||
|
const docDivergence = firstDivergence(normDoc, normDoc2);
|
||||||
|
const semanticStable = docDivergence === null;
|
||||||
|
|
||||||
|
console.log("");
|
||||||
|
console.log(`markdown byte-stable (md1 === md2): ${markdownStable}`);
|
||||||
|
console.log(`document semantically stable (ids stripped): ${semanticStable}`);
|
||||||
|
|
||||||
|
if (!markdownStable) {
|
||||||
|
// Show the first differing line so the divergence is concrete.
|
||||||
|
const lines1 = md1.split("\n");
|
||||||
|
const lines2 = md2.split("\n");
|
||||||
|
const max = Math.max(lines1.length, lines2.length);
|
||||||
|
let firstLine = -1;
|
||||||
|
for (let i = 0; i < max; i++) {
|
||||||
|
if (lines1[i] !== lines2[i]) {
|
||||||
|
firstLine = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log("");
|
||||||
|
console.log("--- markdown divergence ---");
|
||||||
|
console.log(`first differing line: ${firstLine + 1}`);
|
||||||
|
if (firstLine >= 0) {
|
||||||
|
console.log(` export #1: ${JSON.stringify(lines1[firstLine])}`);
|
||||||
|
console.log(` export #2: ${JSON.stringify(lines2[firstLine])}`);
|
||||||
|
}
|
||||||
|
console.log(` md1 length: ${md1.length}, md2 length: ${md2.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!semanticStable && docDivergence) {
|
||||||
|
console.log("");
|
||||||
|
console.log("--- document divergence (ids stripped) ---");
|
||||||
|
console.log(` path: ${docDivergence.path}`);
|
||||||
|
console.log(` doc: ${JSON.stringify(docDivergence.a)}`);
|
||||||
|
console.log(` doc2: ${JSON.stringify(docDivergence.b)}`);
|
||||||
|
console.log(
|
||||||
|
" (EXPECTED per SPEC §11: the converter reconstructs schema default" +
|
||||||
|
" attrs; does not affect markdown byte-stability)",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The CI-relevant invariant is markdown byte-stability. A doc-level
|
||||||
|
// divergence after id-stripping is a KNOWN SPEC §11 finding and does not
|
||||||
|
// fail the harness.
|
||||||
|
console.log("");
|
||||||
|
console.log(markdownStable ? "RESULT: MARKDOWN STABLE" : "RESULT: NOT STABLE");
|
||||||
|
process.exit(markdownStable ? 0 : 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
||||||
|
// module is imported (e.g. by a unit test importing stripBlockIds), so the
|
||||||
|
// import does not trigger a fixture read + process.exit.
|
||||||
|
const invokedDirectly =
|
||||||
|
typeof process.argv[1] === "string" &&
|
||||||
|
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||||
|
|
||||||
|
if (invokedDirectly) {
|
||||||
|
main().catch((err) => {
|
||||||
|
console.error("roundtrip failed:", err instanceof Error ? err.stack : err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
109
src/sanitize.ts
Normal file
109
src/sanitize.ts
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
/**
|
||||||
|
* Deterministic filename strategy (SPEC §12).
|
||||||
|
*
|
||||||
|
* The file name is COSMETIC — the source of truth for the file<->page link is
|
||||||
|
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
|
||||||
|
* functions are intentionally dependency-free and pure, so they are trivially
|
||||||
|
* unit-testable.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Printable characters forbidden in file names on common filesystems (mainly
|
||||||
|
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
|
||||||
|
// Spaces are NOT in this set; whitespace is normalized separately below.
|
||||||
|
// ASCII control characters (code points 0..31) are stripped in a separate pass
|
||||||
|
// (see stripControlChars) to keep this literal free of embedded control bytes.
|
||||||
|
const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g;
|
||||||
|
|
||||||
|
// Runs of whitespace (including tabs/newlines) collapse to a single space.
|
||||||
|
const WHITESPACE_RUN_RE = /\s+/g;
|
||||||
|
|
||||||
|
// Reserved Windows device names (case-insensitive). A bare match (with or
|
||||||
|
// without an extension) is unusable as a file name, so it is prefixed with "_".
|
||||||
|
const RESERVED_WINDOWS_NAMES = new Set([
|
||||||
|
"con",
|
||||||
|
"prn",
|
||||||
|
"aux",
|
||||||
|
"nul",
|
||||||
|
"com1",
|
||||||
|
"com2",
|
||||||
|
"com3",
|
||||||
|
"com4",
|
||||||
|
"com5",
|
||||||
|
"com6",
|
||||||
|
"com7",
|
||||||
|
"com8",
|
||||||
|
"com9",
|
||||||
|
"lpt1",
|
||||||
|
"lpt2",
|
||||||
|
"lpt3",
|
||||||
|
"lpt4",
|
||||||
|
"lpt5",
|
||||||
|
"lpt6",
|
||||||
|
"lpt7",
|
||||||
|
"lpt8",
|
||||||
|
"lpt9",
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Cap on the sanitized length to stay well within filesystem path-component
|
||||||
|
// limits (255 bytes on most FSes) while leaving room for an extension and a
|
||||||
|
// disambiguation suffix.
|
||||||
|
const MAX_LENGTH = 120;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace every ASCII control character (code points 0..31) with "-". Done by
|
||||||
|
* scanning code points rather than a control-range regex literal, so the source
|
||||||
|
* file carries no embedded control bytes.
|
||||||
|
*/
|
||||||
|
function stripControlChars(input: string): string {
|
||||||
|
let out = "";
|
||||||
|
for (let i = 0; i < input.length; i++) {
|
||||||
|
out += input.charCodeAt(i) < 32 ? "-" : input[i];
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sanitize a page title into a safe file-name component (WITHOUT extension).
|
||||||
|
*
|
||||||
|
* Steps: replace forbidden / control characters with "-", collapse whitespace
|
||||||
|
* runs to a single space, trim, cap the length, then guard against an empty
|
||||||
|
* result, an all-dots result, or a reserved Windows device name by prefixing
|
||||||
|
* with "_".
|
||||||
|
*/
|
||||||
|
export function sanitizeTitle(title: string): string {
|
||||||
|
let name = stripControlChars(title ?? "")
|
||||||
|
.replace(FORBIDDEN_PRINTABLE_RE, "-")
|
||||||
|
.replace(WHITESPACE_RUN_RE, " ")
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
if (name.length > MAX_LENGTH) {
|
||||||
|
name = name.slice(0, MAX_LENGTH).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare the base name (before the first dot) against reserved names, so
|
||||||
|
// both "CON" and "con.md" are caught.
|
||||||
|
const base = name.split(".")[0]?.toLowerCase() ?? "";
|
||||||
|
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
|
||||||
|
// reserved Windows device name is unusable as a path component. The all-dots
|
||||||
|
// case is a path-traversal hazard in particular: an unprefixed ".." would
|
||||||
|
// become a parent-directory segment and let a page escape the vault, so it
|
||||||
|
// MUST be neutralized here (becomes "_..", which is a literal file name).
|
||||||
|
if (
|
||||||
|
name.length === 0 ||
|
||||||
|
/^\.+$/.test(name) ||
|
||||||
|
RESERVED_WINDOWS_NAMES.has(base)
|
||||||
|
) {
|
||||||
|
name = "_" + name;
|
||||||
|
}
|
||||||
|
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Disambiguate a sanitized name when two siblings in the same folder collapse
|
||||||
|
* to the same name. Appends a stable suffix built from the page's `slugId`, so
|
||||||
|
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
|
||||||
|
*/
|
||||||
|
export function disambiguate(name: string, slugId: string): string {
|
||||||
|
return `${name} ~${slugId}`;
|
||||||
|
}
|
||||||
248
test-strategy-report.md
Normal file
248
test-strategy-report.md
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
# Отчёт по тест-стратегии — docmost-sync — 2026-06-16
|
||||||
|
|
||||||
|
> Двунаправленная синхронизация статей Docmost с локальным Markdown-git-хранилищем
|
||||||
|
> (git — хранилище состояния). Монорепо: корневое приложение-движок (`src/`) +
|
||||||
|
> библиотека `packages/docmost-client` (~7.5k LOC). Стек: TypeScript ESM, Node ≥ 20,
|
||||||
|
> Vitest 3.2.6. Все тесты лежат в корневом `test/` (`include: ['test/**/*.test.ts']`).
|
||||||
|
|
||||||
|
## 1. Исполнительное резюме
|
||||||
|
|
||||||
|
- **Проанализировано модулей:** 9 (1 субагент `module-testability-analyst` на модуль, все завершились).
|
||||||
|
- **Предложено тестов (unit / integration / contract / E2E):** **50 / 7 / 1 / 2** (итого 60).
|
||||||
|
- unit = 83 % (≥ 70 % ✓), integration = 12 % (≤ 20 % ✓), E2E = 3 % и 2 шт. (≤ 5 % и ≤ 10 ✓).
|
||||||
|
- **Отклонено как малоценные:** ≈ 60 символов/областей (декларативные spec-объекты схемы,
|
||||||
|
тривиальные плоские мапперы, framework-обвязка, type-only интерфейсы, passthrough-обёртки).
|
||||||
|
- **Покрытие сейчас (проверено v8 лично):** **2.6 %** statements по обоим пакетам
|
||||||
|
(искажено огромным непокрытым `docmost-client`). Изолированно: корневое приложение ≈ **40 %**,
|
||||||
|
пакет `docmost-client` ≈ **0 %** (поведенчески покрыт лишь `collectRecentSince`).
|
||||||
|
**Прогноз после Фаз 1–4:** ≈ **60–65 %** (чистые lib-модули 80 %+, корневое приложение ≈ 85 %,
|
||||||
|
транспортный `client.ts` ≈ 40 %).
|
||||||
|
|
||||||
|
> ⚠️ **Артефакт измерения покрытия.** `package.json` пакета указывает `main: dist/index.js`,
|
||||||
|
> поэтому `import from 'docmost-client'` грузит **скомпилированный `dist/`**, а не `src/`.
|
||||||
|
> v8 меряет `src/` → показывает `client.ts` 0 %, хотя `collectRecentSince` реально исполняется.
|
||||||
|
> **Перед измерением покрытия** добавить в `vitest.config.ts` alias `docmost-client → packages/docmost-client/src/index.ts`
|
||||||
|
> (или мерить по `dist` после сборки), иначе любые новые тесты библиотеки не отразятся в отчёте.
|
||||||
|
> `@vitest/coverage-v8` и скрипт `"coverage"` в проекте отсутствуют — их нужно добавить.
|
||||||
|
|
||||||
|
## 2. Рекомендации по модулям
|
||||||
|
|
||||||
|
### app-root (`src/`) — движок синка, конфиг, sanitize, round-trip-харнесс
|
||||||
|
- **Извлечь в чистые функции:** `folderSegmentsFor` (`pull.ts:88`, замкнута внутри `main`),
|
||||||
|
`firstDivergence`/`parseArgs` (`roundtrip.ts:101/64`, не экспортированы).
|
||||||
|
- **Unit добавить:** `firstDivergence` (равные/разные деревья, путь расхождения, циклы) —
|
||||||
|
ловит ложное «stable» при реальном расхождении (вся суть харнесса); `nameForNode` (коллизии
|
||||||
|
имён сиблингов → перезапись файлов на диске); `folderSegmentsFor` (вложенность + защита от цикла
|
||||||
|
parent A→B→A, иначе зависание); `parseArgs`; ветка invalid-value в `loadSettingsOrExit`
|
||||||
|
(`config-errors.ts:27-30`, единственный значимый пробел).
|
||||||
|
- **Integration добавить:** `pull.main` с фейковым клиентом + временной директорией
|
||||||
|
(один файл на страницу, верные папки, узлы без id пропускаются) — после R-App-4.
|
||||||
|
- **НЕ тестировать:** `index.ts` (тонкий CLI-passthrough, только `console.log`); `envSchema`
|
||||||
|
(тестировать = тестировать Zod, покрыт через `parseSettings`); тело `roundtrip.main`
|
||||||
|
(байт-стабильность уже покрыта `roundtrip.test.ts`); `invokedDirectly`-guard-блоки;
|
||||||
|
`sanitizeTitle`/`disambiguate`/`parseSettings`/`stripBlockIds` (уже ~100 %).
|
||||||
|
|
||||||
|
### client-core (`packages/docmost-client/src/client.ts`, ~2770 строк) — god-object REST+WS клиент
|
||||||
|
- **Извлечь в чистые функции:** валидаторы `isSafeUrl`/`validateDocUrls`/`validateDocStructure`
|
||||||
|
(`client.ts:905/941/1004`), `imageMimeFromPath`/`buildImageNode` (1844/1864) — поднять в `lib/`
|
||||||
|
рядом с `filters.ts`; распаковку конвертов и clamp-логику пагинации (378-393, 1505) в pure-функции.
|
||||||
|
- **Unit добавить:** XSS-allowlist `isSafeUrl`+`validateDocUrls` (`javascript:`/`data:`/`file:`,
|
||||||
|
пробельно-контрольный обход `java\tscript:`, на всех медиа-узлах) — **высший приоритет по безопасности**;
|
||||||
|
`validateDocStructure` (глубина > 200, не-string type); расширить `collectRecentSince`
|
||||||
|
(граница `updatedAt === sinceIso`, элементы без `id`/`updatedAt`); `imageMimeFromPath`+`buildImageNode`;
|
||||||
|
`paginateAll` (стоп-условия, MAX_PAGES=50 + предупреждение, clamp 1..100, оба конверта) — после R-Client-2;
|
||||||
|
`appUrl`/`shareUrl`/`parseCommentContent`; sandbox `transformPage` (`node:vm`: нет `require`/`process`/`fs`,
|
||||||
|
таймаут 5 c, не-функция/не-doc → throw) — security.
|
||||||
|
- **Integration добавить (после R-Client-1, инъекция HTTP):** авто-реавторизация
|
||||||
|
(401-интерсептор + дедуп `login` + `getCollabTokenWithReauth`: один retry, `/auth/login` не ретраится,
|
||||||
|
`loginPromise` сбрасывается в `finally`); `uploadImage` (порядок guard ext→stat→read, > 20 MiB,
|
||||||
|
пересборка FormData на 401, нет утечки тела ответа в ошибку); `createPage` (replay multipart на 401);
|
||||||
|
`checkNewComments` (битая дата → throw, а не «ничего нового»; граница `createdAt > since`; флаг truncated).
|
||||||
|
- **НЕ тестировать:** тонкие REST-passthrough (`getWorkspace`/`getSpaces`/`renamePage`/`movePage`/
|
||||||
|
`deletePage`/`restorePage`/`listTrash` и пр.) — конверт `data.data ?? data` покрыть один раз
|
||||||
|
извлечённой функцией; делегаты в node-ops/converter/diff (тестировать в их модулях); сами axios/yjs/hocuspocus.
|
||||||
|
|
||||||
|
### markdown-conversion (`lib/markdown-converter.ts` + `markdown-document.ts`) — конвертер ProseMirror↔Markdown
|
||||||
|
- **Unit добавить:** табличная golden-матрица по типам узлов (заголовки, маркированные/кодовые
|
||||||
|
спаны, ссылки с title, картинки с пробелами/скобками в src, кодоблоки с языком + срез хвостовых `\n`,
|
||||||
|
GFM-таблицы с выравниванием, spanned-таблицы → `<table>`, blockquote, task-list, math `a < b`,
|
||||||
|
mention/attachment/callout/details/columns/медиа, hr, hard break, неизвестный тип, пустой doc → `""`);
|
||||||
|
идемпотентность экранирования (`escapeAttr` стабилен на `& "`, `encodeMdUrl` пробел→`%20`),
|
||||||
|
отступы вложенных списков (`indentItemChildren`); envelope `parseDocmostMarkdown`/`serializeDocmostMarkdown`
|
||||||
|
(восстановление meta/body/comments, CRLF, «последний `docmost:comments`-блок побеждает», throw на битом JSON);
|
||||||
|
edge/malformed-вход (`null`/`{}`/нет content, отсутствующие attrs, глубокая вложенность без переполнения стека).
|
||||||
|
- **Integration добавить:** **property-тест round-trip идемпотентности** — `md→PM→md == md` байт-в-байт
|
||||||
|
+ семантическая стабильность через `stripBlockIds`. **Самый ценный тест проекта** (фантомные git-диффы —
|
||||||
|
ровно то, ради чего существует харнесс). Требует фабрику документов и генератор (см. §3).
|
||||||
|
- **НЕ тестировать:** интерфейс `DocmostMdMeta`; одиночный токен `{{SUBPAGES}}`; внутренности
|
||||||
|
`marked`/`@tiptap/html`; underline/sub/sup как отдельные тесты — свернуть в один inline-marks-кейс.
|
||||||
|
|
||||||
|
### prosemirror-schema (`lib/docmost-schema.ts`, ~1065 строк) — ~90 % декларативный конфиг
|
||||||
|
- **Unit добавить (ровно 2, намеренно не раздуваем):** `sanitizeCssColor` (`:44`) — allowlist против
|
||||||
|
CSS/style-инъекции: принять named/hex3-8/rgb(a)/hsl(a), отвергнуть `red; --x:url()`, `expression(...)`,
|
||||||
|
`red"><script>`, пустое/не-string; `clampCalloutType` (`:21`) — нормализация enum + регистр + фолбэк `info`.
|
||||||
|
- **НЕ тестировать:** все `Node.create`/`Mark.create`/`Extension.create` spec-объекты (~26 шт.) и
|
||||||
|
триплеты `default`/`parseHTML`/`renderHTML` — декларативные данные, тест тавтологичен; поведение
|
||||||
|
узлов проверяется **косвенно** через round-trip (другой модуль). Closures `textStyle.getAttrs`,
|
||||||
|
`Highlight`-guard, `Column.width` — покрыть HTML-фикстурами round-trip, не лезть в приватные closures.
|
||||||
|
|
||||||
|
### node-ops (`lib/node-ops.ts`, ~897 строк) — чистые структурные операции над деревом узлов
|
||||||
|
- **Unit добавить (все unit, высочайший ROI — JSON-вход/JSON-выход):** `insertNodeRelative`
|
||||||
|
(append/before/after, by-id/by-anchor, маршрутизация структурных узлов, throw-ветки, offset);
|
||||||
|
`insertTableRow` (индекс/паддинг/наследование типа и colwidth заголовка, OOB→append); `replaceNodeById`
|
||||||
|
(изоляция клонов на N совпадений, без рекурсии в подставленный узел); `getNodeByRef` (`#n` in/out-of-range,
|
||||||
|
дубль id → первый, гарантия клона); `updateTableCell` (переиспользование id первого параграфа,
|
||||||
|
сохранение colspan/rowspan, OOB→throw); `deleteNodeById`/`deleteTableRow` (throw vs тихий no-op);
|
||||||
|
`sanitizeForYjs`+`findUnstorableAttr` (срез `undefined`, путь до bigint/function); `buildOutline`+`readTable`+
|
||||||
|
`blockPlainText` (cols из row-0, усечение, ragged-таблицы). **Везде** ассерт «вход не мутирован».
|
||||||
|
- **Извлечь/рефактор:** инъекция `makeFreshId` (`:591`, `Math.random()`) — для точных ассертов на id
|
||||||
|
в `insertTableRow`/`updateTableCell`; иначе проверять формат+уникальность без рефактора.
|
||||||
|
- **НЕ тестировать:** интерфейсы `OutlineEntry`/`InsertOptions`; внутренние `clone`/`isObject`/`matchesId`/
|
||||||
|
`truncate`/`makeCellParagraph`/`locateTable` (покрыты транзитивно); недостижимый fallback `structuredClone`.
|
||||||
|
|
||||||
|
### collaboration (`lib/collaboration.ts`, ~618 строк) — чистый верх + транспортный низ (Yjs/Hocuspocus/WS)
|
||||||
|
- **Unit добавить (чистый верх, без рефактора):** `buildCollabWsUrl` (http→ws, https→wss, срез `/api`,
|
||||||
|
`/collab` ровно один раз, drop query/hash, fallback на битый URL); `buildYDoc`/`assertYjsEncodable`
|
||||||
|
(валид кодируется; `undefined`-attr санитайзится; неэнкодируемый attr → ошибка с путём; dryRun==apply);
|
||||||
|
`bridgeTaskLists` (ol со всеми чекбоксами → ul taskList, без фантомного orderedList); `preprocessCallouts`
|
||||||
|
(`:::` внутри кодоблока не считается забором; незакрытый callout); `replacePageContent` (guard не-doc → throw).
|
||||||
|
- **Unit (после рефактора R-Collab-1):** ядро `onSynced` read-transform-write — пустой live-doc → дефолт,
|
||||||
|
`transform→null` без записи, `transform throw` пробрасывается, фрагмент заменяется полностью.
|
||||||
|
**Защищает от потери данных при конкурентном редактировании** (инвариант «без `await` между read и write»).
|
||||||
|
- **Unit (после R-Collab-2):** подавление ложного успеха — `unsyncedChanges→0` при разрыве не считается
|
||||||
|
успехом (флаг `connectionLost`); ловит «ложную персистенцию» / reconnect-шторм как успешную запись.
|
||||||
|
- **Integration:** `mutatePageContent` против mock-Hocuspocus-сервера (после R-Collab-2/3 + fake-таймеры).
|
||||||
|
- **НЕ тестировать:** `updatePageContentRealtime` (passthrough); глобальная мутация `window`/`document`/
|
||||||
|
`WebSocket` на импорте (env-обвязка); внутренности yjs/hocuspocus/marked.
|
||||||
|
|
||||||
|
### transforms (`lib/transforms.ts`, ~477 строк) — чистые примитивы трансформации документа
|
||||||
|
- **Unit добавить:** `commentsToFootnotes` — перенумерация/порядок (маркеры не по порядку массива →
|
||||||
|
`[1]..[k]` в порядке чтения, список заметок переупорядочен) **и** иммутабельность входа + throw на
|
||||||
|
несогласованности (`[9]` при 3 заметках, нет heading/orderedList); `insertMarkerAfter` (сплит по нескольким
|
||||||
|
text/mark-ранам, маркер plain, окружающие марки сохранены, scope `beforeBlock`); `setCalloutRange`
|
||||||
|
(статичность regex `lastIndex` на двух text-узлах, только внутри callout, Unicode `…` и ASCII `...`);
|
||||||
|
`mdToInlineNodes` (срез префиксов `комментарий:`/`N.`, граница `**bold**`-лида, пробел не теряется);
|
||||||
|
`walk`/`getList` (полнота обхода, live-ссылка не клон).
|
||||||
|
- **Извлечь/рефактор:** инъекция `freshId` (`:240`) — опционально, для воспроизводимого dryRun.
|
||||||
|
- **НЕ тестировать:** `blockText` (re-export `node-ops.blockPlainText`); `splitInlineBold` (внутр.);
|
||||||
|
`clone`/`isObject`/`freshId`; интерфейсы. Sandbox-eval `(doc,ctx)=>doc` живёт в `client.ts`, не здесь.
|
||||||
|
|
||||||
|
### diff (`lib/diff.ts`, ~319 строк) — headless-дифф документов (чистый, детерминированный)
|
||||||
|
- **Unit добавить:** `diffDocs` (вставка/удаление/идентичность + пустые doc; счётчики
|
||||||
|
`inserted`/`deleted`); подсчёт целостности (images/tables/callouts old→new, дедуп ссылки,
|
||||||
|
разбитой на два рана; битая ссылка считается 1 раз); `footnoteMarkers` (граница body/notes по
|
||||||
|
`notesHeading`, порядок чтения, кастомный/отсутствующий heading); coarse-fallback (форс-исключение
|
||||||
|
precise-пути → нет throw, есть пометка о деградации, whitespace-блоки не репортятся); `blockContextAt`+
|
||||||
|
`blocksChanged` (усечение >80, не-пустой контекст ловит проглоченный `catch`, дедуп блоков).
|
||||||
|
- **НЕ тестировать:** `getSchema(docmostExtensions)` (обвязка); сам алгоритм
|
||||||
|
`recreateTransform`/`ChangeSet`/`simplifyChanges`; интерфейсы; точный порядок строк секции Changes
|
||||||
|
в markdown (порядок задаёт библиотека — проверять множества/счётчики).
|
||||||
|
|
||||||
|
### client-utils (`lib/auth-utils.ts` + `filters.ts` + `json-edit.ts` + `page-lock.ts`, 345 строк)
|
||||||
|
- **Unit добавить:** `applyTextEdits` (`json-edit.ts:45`) — полный набор: single/`replaceAll`,
|
||||||
|
multi-match без replaceAll → throw, «not found» vs «spans multiple formatting runs», **литеральная
|
||||||
|
вставка `$&`/`$1`** (явный foot-gun String.replace), обрезка пустых узлов, иммутабельность входа;
|
||||||
|
`withPageLock` (`page-lock.ts:16`) — сериализация одной страницы, конкурентность разных, ошибка не
|
||||||
|
«отравляет» очередь, реальный reject доходит до вызывающего (через deferred-промисы, **не** sleep);
|
||||||
|
`performLogin` парсинг cookie (точное имя `authToken` ≠ `authTokenRefresh`, base64-`=` не обрезается);
|
||||||
|
`filterPage` (условный spread: `content === ""` включается, не-string опускается); `getCollabToken`
|
||||||
|
(распаковка `data.data.token`→`data.token`, `err.status` выживает, тело ответа не утекает без `DEBUG`);
|
||||||
|
`filterComment` (`??` vs `||`: пустая строка markdownContent сохраняется).
|
||||||
|
- **НЕ тестировать:** `filterWorkspace`/`filterSpace`/`filterGroup` (плоские мапперы без ветвлений —
|
||||||
|
максимум один общий shape-ассерт); интерфейсы `TextEdit`/`TextEditResult`; приватные
|
||||||
|
`collectText`/`countOccurrences`/`truncate`.
|
||||||
|
|
||||||
|
## 3. Сквозные аспекты
|
||||||
|
|
||||||
|
- **Contract-тесты** (1 набор): между `docmost-client` и живым Docmost — записанные фикстуры/pact-стиль,
|
||||||
|
проверяющие конверты ответов (`data.data ?? data`, `items`-vs-bare-array, `meta.hasNextPage`), от которых
|
||||||
|
зависит весь клиент. Привязать к закреплённой версии Docmost; ловит дрейф контракта API.
|
||||||
|
- **Property-based** (через извлечение чистых функций): (1) round-trip Markdown — корона стратегии;
|
||||||
|
(2) инварианты иммутабельности node-ops; (3) идемпотентность `commentsToFootnotes`/`setCalloutRange`.
|
||||||
|
Рекомендуется dev-зависимость `fast-check` (с воспроизводимым seed + shrinking).
|
||||||
|
- **Дымовые/нагрузочные:** неприменимо (нет высоконагруженных путей); пропустить.
|
||||||
|
- **Test-data factories (нужны):** билдер ProseMirror-документов (узлы/марки) для golden+property-тестов;
|
||||||
|
фабрика конвертов REST-ответов Docmost; фабрика login/Set-Cookie-ответа; корпус фикстур для round-trip
|
||||||
|
(расширить `test/fixtures/sample-doc.json`).
|
||||||
|
|
||||||
|
## 4. Обнаруженные антипаттерны
|
||||||
|
|
||||||
|
- **God-объект:** `DocmostClient` — ~2770 строк, ~58 членов (auth + REST + WS + FS + comments + vm-sandbox)
|
||||||
|
в одном классе (`client.ts`). Нет шва для изоляции одной ответственности.
|
||||||
|
- **Скрытые побочные эффекты на импорте:** глобальная мутация `global.window`/`document`/`Element`/`WebSocket`
|
||||||
|
(`collaboration.ts:13-19`) — импорт модуля меняет глобал воркера; конструктор `DocmostClient` вешает
|
||||||
|
axios-интерсептор и создаёт реальный axios.
|
||||||
|
- **Нетестируемые синглтоны / общее состояние:** модульная `Map chains` (`page-lock.ts:11`) — состояние течёт
|
||||||
|
между тестами в одном воркере (изолировать `pageId`/`vi.resetModules()`); `Math.random()` в
|
||||||
|
`node-ops.ts:591` (`makeFreshId`) и `transforms.ts:240` (`freshId`) — недетерминизм id.
|
||||||
|
- **Порядко-зависимые тесты (риск):** чтение `process.env.DEBUG` в `auth-utils.ts` (set/unset + restore);
|
||||||
|
глобалы collaboration; общая `chains`-Map.
|
||||||
|
- **Артефакт покрытия dist-vs-src:** `main: dist/index.js` → тесты исполняют скомпилированный код, v8 меряет
|
||||||
|
`src` → ложные 0 % (см. §1).
|
||||||
|
- **Чистая логика в ловушке `async main()`:** `pull.ts`/`roundtrip.ts` — поэтому 0 %/19 % при наличии
|
||||||
|
тестируемой чистой логики.
|
||||||
|
- **`node:vm` исполняет пользовательский JS** (`client.ts::transformPage`, ~2491) — security, нужен явный тест
|
||||||
|
на отсутствие escape (`require`/`process`/`fs`) и таймаут.
|
||||||
|
- **Проглоченные ошибки:** `diff.ts:172` (`catch{return ""}` маскирует баг резолвера позиции);
|
||||||
|
пустые `catch` в cleanup collaboration. Статичный `lastIndex` regex `/g` в `transforms.ts:216`.
|
||||||
|
- **Нестабильные тесты (CI-история):** н/д — CI-история отказов отсутствует (проект на Increment 1,
|
||||||
|
тесты только базовые); пункт неактуален сейчас.
|
||||||
|
|
||||||
|
## 5. Необходимые рефакторинги перед написанием тестов
|
||||||
|
|
||||||
|
- **R-App-1** — извлечь `folderSegmentsFor` на верхний уровень + экспортировать `nameForNode`.
|
||||||
|
Блокирует: unit-тесты путей `pull.ts` (коллизии, защита от цикла).
|
||||||
|
- **R-App-2** — экспортировать `parseArgs` и `firstDivergence` (`roundtrip.ts`).
|
||||||
|
Блокирует: unit-тесты дивергенции и парсинга аргументов.
|
||||||
|
- **R-App-4** — инъекция клиента + fs в `pull.main`. Блокирует: integration-тест `pull.main`.
|
||||||
|
- **R-Client-1** — инъекция HTTP-клиента (axios-instance + multipart-poster).
|
||||||
|
Блокирует: все integration-тесты REST (auth-реавторизация, uploadImage, createPage, checkNewComments).
|
||||||
|
- **R-Client-2** — извлечь pure-функции маппинга ответов/конвертов/clamp.
|
||||||
|
Блокирует: перевод ~15 кейсов из integration в быстрый unit (`paginateAll`, list-endpoints).
|
||||||
|
- **R-Client-3** — инъекция collab-транспорта (`mutatePageContent`/provider-factory).
|
||||||
|
Блокирует: unit-тесты оркестрации patch/insert/delete/table/comment.
|
||||||
|
- **R-Client-4** — поднять чистые валидаторы (`isSafeUrl`/`validateDocUrls`/`validateDocStructure`/
|
||||||
|
`imageMimeFromPath`/`buildImageNode`) в `lib/` или экспортировать. Блокирует: XSS-unit-тесты (высший приоритет).
|
||||||
|
- **R-Collab-1** — извлечь тело `onSynced` в чистую `applyTransformToYdoc(ydoc, transform)`.
|
||||||
|
Блокирует: unit ядра read-transform-write (потеря данных).
|
||||||
|
- **R-Collab-2 + R-Collab-3** — инъекция provider-factory и часов (fake-таймеры).
|
||||||
|
Блокируют: тесты ложного успеха/таймаутов и integration `mutatePageContent`.
|
||||||
|
- **R-NodeOps / R-Transforms** — инъекция `makeFreshId`/`freshId` (опционально).
|
||||||
|
Блокируют: только точные ассерты на id; без рефактора — проверять формат+уникальность.
|
||||||
|
- **Инфраструктура** — добавить `@vitest/coverage-v8` + скрипт `"coverage"`; alias `docmost-client→src`
|
||||||
|
в `vitest.config.ts`; dev-deps `fast-check` (property) и mock-ws/msw (integration). Эти изменения
|
||||||
|
трогают конфиги/`package.json` — вне правки данного отчёта, заложить в Фазу 1.
|
||||||
|
|
||||||
|
## 6. План внедрения (по фазам)
|
||||||
|
|
||||||
|
- **Фаза 1 — чистые unit, нулевой/малый рефактор (наивысший ROI).** node-ops (8), transforms (5–6),
|
||||||
|
diff (5), client-utils (6), guards схемы (2), golden-матрица + envelope конвертера (3),
|
||||||
|
чистый верх collaboration (5), чистые валидаторы клиента после R-Client-4 (XSS + structure + image),
|
||||||
|
расширение `collectRecentSince`, app-root после R-App-1/2 (5). Плюс инфраструктура покрытия/alias.
|
||||||
|
*ROI:* мгновенно поднимает покрытие самой дефектоопасной чистой логики (потеря данных, XSS) почти без риска.
|
||||||
|
- **Фаза 2 — корона: property-тест round-trip Markdown + фабрика документов.** Ловит фантомные git-диффы и
|
||||||
|
неидемпотентность — главный класс дефектов всего инструмента синхронизации.
|
||||||
|
- **Фаза 3 — refactor-gated unit.** R-Collab-1 → ядро `onSynced` (потеря данных при конкуренции);
|
||||||
|
R-Client-2 → unit пагинации/list-endpoints; sandbox `transformPage` (security).
|
||||||
|
- **Фаза 4 — integration с DI.** R-Client-1/3 → авто-реавторизация, uploadImage, createPage, checkNewComments;
|
||||||
|
R-Collab-2/3 → подавление ложного успеха + e2e против mock-WS; R-App-4 → `pull.main`.
|
||||||
|
- **Фаза 5 — contract + E2E.** 1 contract-набор против закреплённой версии Docmost;
|
||||||
|
**2 E2E-смоука** против `docker-compose` Docmost — user journeys: (1) «pull пространства в vault»
|
||||||
|
(страницы → файлы с верной иерархией), (2) «round-trip страницы без фантомного diff».
|
||||||
|
|
||||||
|
## 7. Источники
|
||||||
|
|
||||||
|
- Отчёты **9** субагентов `module-testability-analyst` (app-root, client-core, markdown-conversion,
|
||||||
|
prosemirror-schema, node-ops, collaboration, transforms, diff, client-utils).
|
||||||
|
- Вывод coverage-инструмента: `vitest run --coverage` (provider v8), запущен оркестратором лично;
|
||||||
|
6 тест-файлов / 33 теста зелёные; overall 2.6 % statements (артефакт dist-vs-src учтён).
|
||||||
|
- **Фильтрация предложений:**
|
||||||
|
- Шаг 1 (кросс-модульный дедуп): снято ≈ 20 (поведение схемы → round-trip; делегаты client.ts →
|
||||||
|
модули node-ops/converter/diff; `blockText` → node-ops).
|
||||||
|
- Шаг 2 (skip-list): снято ≈ 40 (декларативные spec-объекты схемы ~26, плоские фильтры 3,
|
||||||
|
type-only интерфейсы, framework-обвязка, `index.ts`, passthrough-обёртки).
|
||||||
|
- Шаг 3 (бюджет пирамиды): E2E сведены к 2; множество per-endpoint integration свёрнуты в `paginateAll`
|
||||||
|
+ представительные кейсы.
|
||||||
|
- Шаг 6 (adversarial): оставлены только тесты с конкретными ассертами, падающими при реалистичной
|
||||||
|
поломке (отказ XSS-схем, байт-равенство round-trip, порядок перенумерации сносок); тавтологичные
|
||||||
|
ассерты атрибутов схемы отброшены.
|
||||||
144
test/fixtures/sample-doc.json
vendored
Normal file
144
test/fixtures/sample-doc.json
vendored
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
{
|
||||||
|
"type": "doc",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "heading",
|
||||||
|
"attrs": { "level": 1, "id": "h-1" },
|
||||||
|
"content": [{ "type": "text", "text": "Round-trip sample" }]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"attrs": { "id": "p-1" },
|
||||||
|
"content": [
|
||||||
|
{ "type": "text", "text": "This paragraph has " },
|
||||||
|
{ "type": "text", "marks": [{ "type": "bold" }], "text": "bold" },
|
||||||
|
{ "type": "text", "text": ", " },
|
||||||
|
{ "type": "text", "marks": [{ "type": "italic" }], "text": "italic" },
|
||||||
|
{ "type": "text", "text": " and a " },
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"marks": [{ "type": "link", "attrs": { "href": "https://example.com" } }],
|
||||||
|
"text": "link"
|
||||||
|
},
|
||||||
|
{ "type": "text", "text": "." }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"attrs": { "id": "p-2" },
|
||||||
|
"content": [
|
||||||
|
{ "type": "text", "text": "Here is a " },
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"marks": [
|
||||||
|
{ "type": "comment", "attrs": { "commentId": "cmt-abc123", "resolved": false } }
|
||||||
|
],
|
||||||
|
"text": "commented span"
|
||||||
|
},
|
||||||
|
{ "type": "text", "text": " that must survive the round-trip." }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "bulletList",
|
||||||
|
"attrs": { "id": "ul-1" },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "listItem",
|
||||||
|
"attrs": { "id": "li-1" },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"attrs": { "id": "p-3" },
|
||||||
|
"content": [{ "type": "text", "text": "First bullet" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "listItem",
|
||||||
|
"attrs": { "id": "li-2" },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"attrs": { "id": "p-4" },
|
||||||
|
"content": [{ "type": "text", "text": "Second bullet" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "table",
|
||||||
|
"attrs": { "id": "tbl-1" },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tableRow",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tableHeader",
|
||||||
|
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"content": [{ "type": "text", "text": "Name" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "tableHeader",
|
||||||
|
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"content": [{ "type": "text", "text": "Value" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "tableRow",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "tableCell",
|
||||||
|
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"content": [{ "type": "text", "text": "alpha" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "tableCell",
|
||||||
|
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"content": [{ "type": "text", "text": "1" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "callout",
|
||||||
|
"attrs": { "type": "info", "id": "callout-1" },
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "paragraph",
|
||||||
|
"attrs": { "id": "p-5" },
|
||||||
|
"content": [{ "type": "text", "text": "This is an info callout." }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "codeBlock",
|
||||||
|
"attrs": { "language": "js", "id": "code-1" },
|
||||||
|
"content": [
|
||||||
|
{ "type": "text", "text": "const a = 1;\nconsole.log(a);\n" }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
178
test/recent-since.test.ts
Normal file
178
test/recent-since.test.ts
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
import { collectRecentSince } from 'docmost-client';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for the pure cursor-pagination helper behind listRecentSince.
|
||||||
|
* `fetchPage` is faked (no network); each test models a different server
|
||||||
|
* behaviour to exercise one stop condition.
|
||||||
|
*/
|
||||||
|
|
||||||
|
type Item = { id: string; updatedAt: string };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a fake `fetchPage` from a list of pages. Each page is served in order;
|
||||||
|
* the nextCursor of page i points at page i+1 (the last page has no cursor).
|
||||||
|
* The handed-back cursor is asserted to match what we previously emitted so a
|
||||||
|
* caller that mis-threads the cursor would fail loudly. Tracks the call count.
|
||||||
|
*/
|
||||||
|
function fakeServer(pages: Item[][]) {
|
||||||
|
let calls = 0;
|
||||||
|
const cursorFor = (i: number) => (i < pages.length - 1 ? `c${i}` : null);
|
||||||
|
const fetchPage = async (cursor: string | null) => {
|
||||||
|
// Resolve which page this cursor selects: null -> page 0, "cN" -> page N+1.
|
||||||
|
const idx = cursor === null ? 0 : Number(cursor.slice(1)) + 1;
|
||||||
|
calls++;
|
||||||
|
const items = pages[idx] ?? [];
|
||||||
|
return { items, nextCursor: cursorFor(idx) };
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
fetchPage,
|
||||||
|
get calls() {
|
||||||
|
return calls;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('collectRecentSince', () => {
|
||||||
|
it('stops at the cutoff page and does not fetch beyond it', async () => {
|
||||||
|
// Page 0: all newer than the cutoff. Page 1: contains the cutoff item, so
|
||||||
|
// the walk must stop here and never request page 2.
|
||||||
|
const server = fakeServer([
|
||||||
|
[
|
||||||
|
{ id: 'a', updatedAt: '2026-06-16T10:00:00Z' },
|
||||||
|
{ id: 'b', updatedAt: '2026-06-16T09:00:00Z' },
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{ id: 'c', updatedAt: '2026-06-16T08:00:00Z' },
|
||||||
|
{ id: 'd', updatedAt: '2026-06-16T05:00:00Z' }, // <= cutoff -> stop
|
||||||
|
{ id: 'e', updatedAt: '2026-06-16T04:00:00Z' },
|
||||||
|
],
|
||||||
|
[{ id: 'f', updatedAt: '2026-06-16T03:00:00Z' }], // must NOT be fetched
|
||||||
|
]);
|
||||||
|
|
||||||
|
const out = await collectRecentSince(
|
||||||
|
server.fetchPage,
|
||||||
|
'2026-06-16T05:00:00Z',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Only strictly-newer items, in server order; the cutoff item 'd' and
|
||||||
|
// everything after it is excluded.
|
||||||
|
expect(out.map((i) => i.id)).toEqual(['a', 'b', 'c']);
|
||||||
|
// Fetched page 0 and page 1 only — stopped at the cutoff page.
|
||||||
|
expect(server.calls).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('dedups ids that overlap across pages', async () => {
|
||||||
|
// The cursor advances, but page boundaries overlap: 'b' appears on both
|
||||||
|
// pages. The dedup-by-id Set must keep it exactly once.
|
||||||
|
const server = fakeServer([
|
||||||
|
[
|
||||||
|
{ id: 'a', updatedAt: '2026-06-16T10:00:00Z' },
|
||||||
|
{ id: 'b', updatedAt: '2026-06-16T09:00:00Z' },
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{ id: 'b', updatedAt: '2026-06-16T09:00:00Z' }, // overlap
|
||||||
|
{ id: 'c', updatedAt: '2026-06-16T08:00:00Z' },
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const out = await collectRecentSince(
|
||||||
|
server.fetchPage,
|
||||||
|
'2026-06-16T01:00:00Z',
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(out.map((i) => i.id)).toEqual(['a', 'b', 'c']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('terminates when the server ignores the cursor (zero new items)', async () => {
|
||||||
|
// A broken server that returns the SAME first page on every call and always
|
||||||
|
// claims a nextCursor. Without the zero-new-items guard this loops to the
|
||||||
|
// cap; with it, the second fetch contributes nothing and the walk stops.
|
||||||
|
let calls = 0;
|
||||||
|
const fetchPage = async (_cursor: string | null) => {
|
||||||
|
calls++;
|
||||||
|
return {
|
||||||
|
items: [
|
||||||
|
{ id: 'a', updatedAt: '2026-06-16T10:00:00Z' },
|
||||||
|
{ id: 'b', updatedAt: '2026-06-16T09:00:00Z' },
|
||||||
|
] as Item[],
|
||||||
|
nextCursor: 'always', // server always claims another page
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const out = await collectRecentSince(fetchPage, '2026-06-16T01:00:00Z');
|
||||||
|
|
||||||
|
// The newer items are returned exactly once (no hang, no duplicates).
|
||||||
|
expect(out.map((i) => i.id)).toEqual(['a', 'b']);
|
||||||
|
// First page collects, second page is all-seen -> stop. Capped well below
|
||||||
|
// the default hardPageCap, proving the loop terminated.
|
||||||
|
expect(calls).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns only the first page when sinceIso is null', async () => {
|
||||||
|
const server = fakeServer([
|
||||||
|
[
|
||||||
|
{ id: 'a', updatedAt: '2026-06-16T10:00:00Z' },
|
||||||
|
{ id: 'b', updatedAt: '2026-06-16T09:00:00Z' },
|
||||||
|
],
|
||||||
|
[{ id: 'c', updatedAt: '2026-06-16T08:00:00Z' }],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const out = await collectRecentSince(server.fetchPage, null);
|
||||||
|
|
||||||
|
expect(out.map((i) => i.id)).toEqual(['a', 'b']);
|
||||||
|
// Exactly one page fetched.
|
||||||
|
expect(server.calls).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('stops at hardPageCap and warns when results may be truncated', async () => {
|
||||||
|
// Every page is all-newer-than-cutoff, every item is unique, and there is
|
||||||
|
// always a nextCursor: the only thing that can stop the walk is the cap.
|
||||||
|
let n = 0;
|
||||||
|
const fetchPage = async (_cursor: string | null) => {
|
||||||
|
const id = `id${n++}`;
|
||||||
|
return {
|
||||||
|
items: [{ id, updatedAt: '2026-06-16T10:00:00Z' }] as Item[],
|
||||||
|
nextCursor: 'next', // never runs out
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const warn = vi.spyOn(console, 'warn').mockImplementation(() => {});
|
||||||
|
|
||||||
|
const cap = 5;
|
||||||
|
const out = await collectRecentSince(
|
||||||
|
fetchPage,
|
||||||
|
'2020-01-01T00:00:00Z',
|
||||||
|
cap,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Exactly `cap` pages were collected (one unique item each).
|
||||||
|
expect(out).toHaveLength(cap);
|
||||||
|
expect(warn).toHaveBeenCalledTimes(1);
|
||||||
|
expect(String(warn.mock.calls[0][0])).toContain('hardPageCap=5');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves server (descending) order across pages', async () => {
|
||||||
|
const server = fakeServer([
|
||||||
|
[
|
||||||
|
{ id: 'a', updatedAt: '2026-06-16T10:00:00Z' },
|
||||||
|
{ id: 'b', updatedAt: '2026-06-16T09:00:00Z' },
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{ id: 'c', updatedAt: '2026-06-16T08:00:00Z' },
|
||||||
|
{ id: 'd', updatedAt: '2026-06-16T07:00:00Z' },
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
|
||||||
|
const out = await collectRecentSince(
|
||||||
|
server.fetchPage,
|
||||||
|
'2026-06-16T01:00:00Z',
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(out.map((i) => i.id)).toEqual(['a', 'b', 'c', 'd']);
|
||||||
|
});
|
||||||
|
});
|
||||||
29
test/roundtrip.test.ts
Normal file
29
test/roundtrip.test.ts
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import { readFile } from 'node:fs/promises';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import { dirname, join } from 'node:path';
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
convertProseMirrorToMarkdown,
|
||||||
|
markdownToProseMirror,
|
||||||
|
} from 'docmost-client';
|
||||||
|
|
||||||
|
// Resolve the fixture relative to this test file so the test is CWD-independent.
|
||||||
|
const here = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const FIXTURE = join(here, 'fixtures', 'sample-doc.json');
|
||||||
|
|
||||||
|
describe('round-trip idempotency (SPEC §11)', () => {
|
||||||
|
it('markdown is byte-stable across export -> import -> export', async () => {
|
||||||
|
const doc = JSON.parse(await readFile(FIXTURE, 'utf8'));
|
||||||
|
|
||||||
|
// export -> import -> export
|
||||||
|
const md1 = convertProseMirrorToMarkdown(doc);
|
||||||
|
const doc2 = await markdownToProseMirror(md1);
|
||||||
|
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||||
|
|
||||||
|
// The property git actually needs: a second export reproduces the first
|
||||||
|
// byte-for-byte. We intentionally do NOT deep-equal doc vs doc2 — the
|
||||||
|
// converter reconstructs schema default attrs (e.g. indent:null), a known
|
||||||
|
// SPEC §11 divergence that does not affect markdown stability.
|
||||||
|
expect(md2).toBe(md1);
|
||||||
|
});
|
||||||
|
});
|
||||||
96
test/sanitize.test.ts
Normal file
96
test/sanitize.test.ts
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { sanitizeTitle, disambiguate } from '../src/sanitize.js';
|
||||||
|
|
||||||
|
describe('sanitizeTitle', () => {
|
||||||
|
it('passes a plain title through unchanged', () => {
|
||||||
|
expect(sanitizeTitle('Getting Started')).toBe('Getting Started');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('replaces every forbidden printable character with a dash', () => {
|
||||||
|
// Forbidden set: / \ < > : " | ? *
|
||||||
|
expect(sanitizeTitle('a/b\\c<d>e:f"g|h?i*j')).toBe('a-b-c-d-e-f-g-h-i-j');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('replaces ASCII control characters with a dash', () => {
|
||||||
|
// Build the input with explicit control code points (tab=9, newline=10) to
|
||||||
|
// avoid editor escaping pitfalls. Control chars become "-" BEFORE
|
||||||
|
// whitespace is collapsed, so they survive as dashes (not a folded space).
|
||||||
|
const TAB = String.fromCharCode(9);
|
||||||
|
const NL = String.fromCharCode(10);
|
||||||
|
expect(sanitizeTitle('a b' + TAB + 'c' + NL + 'd')).toBe('a b-c-d');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses runs of plain whitespace to a single space and trims', () => {
|
||||||
|
expect(sanitizeTitle(' hello world ')).toBe('hello world');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('caps the length at 120 characters', () => {
|
||||||
|
const long = 'x'.repeat(200);
|
||||||
|
const out = sanitizeTitle(long);
|
||||||
|
expect(out.length).toBe(120);
|
||||||
|
expect(out).toBe('x'.repeat(120));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('prefixes reserved Windows names with an underscore', () => {
|
||||||
|
expect(sanitizeTitle('CON')).toBe('_CON');
|
||||||
|
expect(sanitizeTitle('nul')).toBe('_nul');
|
||||||
|
// The base name (before the first dot) is what matters.
|
||||||
|
expect(sanitizeTitle('con.md')).toBe('_con.md');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not flag names that merely contain a reserved word', () => {
|
||||||
|
expect(sanitizeTitle('console')).toBe('console');
|
||||||
|
expect(sanitizeTitle('Control')).toBe('Control');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns "_" for empty or whitespace-only input', () => {
|
||||||
|
expect(sanitizeTitle('')).toBe('_');
|
||||||
|
expect(sanitizeTitle(' ')).toBe('_');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles a title that is only forbidden characters', () => {
|
||||||
|
// Each forbidden char becomes "-", so the result is non-empty and safe.
|
||||||
|
expect(sanitizeTitle('///')).toBe('---');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('neutralizes all-dot names so they cannot escape the vault', () => {
|
||||||
|
// ".", "..", "..." (and whitespace-padded variants) are path-traversal
|
||||||
|
// hazards as directory segments. The result must never be a pure-dot
|
||||||
|
// segment and must contain no path separators.
|
||||||
|
for (const input of ['.', '..', '...', ' .. ']) {
|
||||||
|
const out = sanitizeTitle(input);
|
||||||
|
expect(['.', '..', '...']).not.toContain(out);
|
||||||
|
expect(/^\.+$/.test(out)).toBe(false);
|
||||||
|
expect(out).not.toContain('/');
|
||||||
|
expect(out).not.toContain('\\');
|
||||||
|
}
|
||||||
|
// The concrete prefixing behaviour (existing "_" safeguard).
|
||||||
|
expect(sanitizeTitle('.')).toBe('_.');
|
||||||
|
expect(sanitizeTitle('..')).toBe('_..');
|
||||||
|
expect(sanitizeTitle('...')).toBe('_...');
|
||||||
|
expect(sanitizeTitle(' .. ')).toBe('_..');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is deterministic — the same input yields the same output', () => {
|
||||||
|
const title = 'Some / weird : title?';
|
||||||
|
expect(sanitizeTitle(title)).toBe(sanitizeTitle(title));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('disambiguate', () => {
|
||||||
|
it('appends a stable ~slugId suffix', () => {
|
||||||
|
expect(disambiguate('Notes', 'abc123')).toBe('Notes ~abc123');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is deterministic for the same name and slugId', () => {
|
||||||
|
expect(disambiguate('Notes', 'abc123')).toBe(
|
||||||
|
disambiguate('Notes', 'abc123'),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces distinct names for colliding siblings', () => {
|
||||||
|
const a = disambiguate('Notes', 'slug-a');
|
||||||
|
const b = disambiguate('Notes', 'slug-b');
|
||||||
|
expect(a).not.toBe(b);
|
||||||
|
});
|
||||||
|
});
|
||||||
80
test/strip-block-ids.test.ts
Normal file
80
test/strip-block-ids.test.ts
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { stripBlockIds } from '../src/roundtrip.js';
|
||||||
|
|
||||||
|
describe('stripBlockIds', () => {
|
||||||
|
it('removes only attrs.id, recursively, keeping every other attribute', () => {
|
||||||
|
const input = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'heading',
|
||||||
|
attrs: { id: 'h1', level: 2 },
|
||||||
|
content: [{ type: 'text', text: 'Title' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'callout',
|
||||||
|
attrs: { id: 'c1', kind: 'info' },
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'paragraph',
|
||||||
|
attrs: { id: 'p1', indent: null },
|
||||||
|
content: [{ type: 'text', text: 'Body' }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const out = stripBlockIds(input);
|
||||||
|
|
||||||
|
expect(out).toEqual({
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'heading',
|
||||||
|
attrs: { level: 2 },
|
||||||
|
content: [{ type: 'text', text: 'Title' }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: 'callout',
|
||||||
|
attrs: { kind: 'info' },
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'paragraph',
|
||||||
|
attrs: { indent: null },
|
||||||
|
content: [{ type: 'text', text: 'Body' }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
// No stray `id` survives anywhere in the tree.
|
||||||
|
expect(JSON.stringify(out)).not.toContain('"id"');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not mutate its input (frozen object passes through unchanged)', () => {
|
||||||
|
const inner = Object.freeze({
|
||||||
|
type: 'paragraph',
|
||||||
|
attrs: Object.freeze({ id: 'p1', indent: null }),
|
||||||
|
content: Object.freeze([
|
||||||
|
Object.freeze({ type: 'text', text: 'x' }),
|
||||||
|
]),
|
||||||
|
});
|
||||||
|
const input = Object.freeze({
|
||||||
|
type: 'doc',
|
||||||
|
content: Object.freeze([inner]),
|
||||||
|
});
|
||||||
|
const before = JSON.stringify(input);
|
||||||
|
|
||||||
|
// Would throw on any write to a frozen node if the function mutated input.
|
||||||
|
const out = stripBlockIds(input);
|
||||||
|
|
||||||
|
// Input is structurally identical after the call (no mutation).
|
||||||
|
expect(JSON.stringify(input)).toBe(before);
|
||||||
|
// The id is gone from the returned (new) tree.
|
||||||
|
expect((out.content[0].attrs as Record<string, unknown>).id).toBeUndefined();
|
||||||
|
expect((out.content[0].attrs as Record<string, unknown>).indent).toBeNull();
|
||||||
|
// A fresh tree is returned, not the same reference.
|
||||||
|
expect(out).not.toBe(input);
|
||||||
|
});
|
||||||
|
});
|
||||||
13
tsconfig.base.json
Normal file
13
tsconfig.base.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2022",
|
||||||
|
"module": "Node16",
|
||||||
|
"moduleResolution": "Node16",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"declaration": true,
|
||||||
|
"sourceMap": true
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user