feat(sync): scaffold monorepo, extract docmost-client, add Phase-0 harness + read-only pull

Lock the access-layer decision (REST only) and start implementation per SPEC.

- monorepo (npm workspaces): packages/docmost-client = DocmostClient + lib/*
  copied 1:1 from docmost-mcp/src (backport target), plus bannered sync methods
  (listTrash, restorePage, listAllSpacePages, exportPageBody, listRecentSince /
  collectRecentSince cursor scan)
- engine stays the root app per AGENTS.md (src/, test/, build/, data/, settings.ts);
  add roundtrip.ts (SPEC §11 idempotency harness), pull.ts (SPEC §6 read-only
  Docmost->FS mirror), sanitize.ts (SPEC §12 filenames, path-traversal-safe)
- Dockerfile builds the workspace lib before the app; vitest gates CI
- exportPageBody never touches /comments (SPEC §3); serializeDocmostMarkdownBody
  emits meta + body only
- SPEC: resolve access-layer (REST), reflect root-engine layout + REST pagination
- tests: sanitize (incl. dot-traversal), collectRecentSince (cutoff/dedup/cap),
  stripBlockIds, markdown round-trip byte-stability

Note: raw ProseMirror round-trip is byte-stable in Markdown but not yet attribute-
idempotent (SPEC §11 Задача №0, before Phase 2).
This commit is contained in:
vvzvlad
2026-06-16 20:20:20 +03:00
parent 2f92dc4c1f
commit 447d2508ae
33 changed files with 10502 additions and 174 deletions

137
src/pull.ts Normal file
View File

@@ -0,0 +1,137 @@
/**
* Read-only Docmost -> filesystem mirror (SPEC §6 pull, Phase 1).
*
* Walks the configured space's page tree and writes one self-contained `.md`
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
* local files. The meta block inside each file carries pageId/slugId/
* parentPageId (identity), so no external map file is needed.
*
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
* be correct, but is not expected to be run without live access.
*
* Run via: npm run pull (-> node build/pull.js)
*/
import { mkdir, writeFile } from "node:fs/promises";
import { join } from "node:path";
import { pathToFileURL } from "node:url";
import { DocmostClient } from "docmost-client";
import { loadSettings } from "./settings.js";
import { sanitizeTitle, disambiguate } from "./sanitize.js";
/** Flat page node as returned by listAllSpacePages (no content). */
interface PageNode {
id: string;
title?: string;
slugId?: string;
parentPageId?: string | null;
hasChildren?: boolean;
}
/**
* Compute a deterministic, collision-free folder/file name for a node among its
* siblings. `usedBySibling` maps a parent key -> set of names already taken, so
* two siblings that sanitize to the same name get a stable ` ~slugId` suffix
* (SPEC §12). The name is COSMETIC; identity lives in the file's meta block.
*/
function nameForNode(
node: PageNode,
usedBySibling: Map<string, Set<string>>,
): string {
const parentKey = node.parentPageId ?? "__root__";
let used = usedBySibling.get(parentKey);
if (!used) {
used = new Set<string>();
usedBySibling.set(parentKey, used);
}
let name = sanitizeTitle(node.title ?? "");
if (used.has(name)) {
// Sibling collision: disambiguate with the stable slugId (fall back to the
// pageId if no slugId is present).
name = disambiguate(name, node.slugId ?? node.id);
}
used.add(name);
return name;
}
async function main(): Promise<void> {
const s = loadSettings();
const client = new DocmostClient(
s.docmostApiUrl,
s.docmostEmail,
s.docmostPassword,
);
const spaceId = s.docmostSpaceId;
const vaultRoot = s.vaultPath;
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
// Index pages by id so the parent chain can be walked.
const byId = new Map<string, PageNode>();
for (const p of pages) {
if (p && p.id) byId.set(p.id, p);
}
// Resolve each node's display name once, deterministically, tracking sibling
// collisions per parent.
const usedBySibling = new Map<string, Set<string>>();
const nameById = new Map<string, string>();
for (const p of pages) {
if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling));
}
// Build the folder path for a page by walking parentPageId to the root. The
// page's OWN name is the file stem; its ancestors become folders. A `visited`
// guard prevents an infinite loop on a malformed parent cycle.
const folderSegmentsFor = (node: PageNode): string[] => {
const ancestors: string[] = [];
const visited = new Set<string>();
let current: PageNode | undefined = node.parentPageId
? byId.get(node.parentPageId)
: undefined;
while (current && current.id && !visited.has(current.id)) {
visited.add(current.id);
ancestors.unshift(
nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""),
);
current = current.parentPageId
? byId.get(current.parentPageId)
: undefined;
}
return ancestors;
};
let written = 0;
for (const p of pages) {
if (!p || !p.id) continue;
const segments = folderSegmentsFor(p);
const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? "");
const dir = join(vaultRoot, ...segments);
await mkdir(dir, { recursive: true });
// Body + meta only (no comments block) — SPEC §3.
const fileMd = await client.exportPageBody(p.id);
await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8");
written++;
}
console.log(
`pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`,
);
}
// Only auto-run when invoked directly as the CLI entrypoint, not when this
// module is imported (e.g. by a unit test importing sanitizeTitle / path
// helpers), so the import does not trigger loadSettings() + process.exit.
const invokedDirectly =
typeof process.argv[1] === "string" &&
import.meta.url === pathToFileURL(process.argv[1]).href;
if (invokedDirectly) {
main().catch((err) => {
console.error("pull failed:", err instanceof Error ? err.stack : err);
process.exit(1);
});
}