feat(sync): scaffold monorepo, extract docmost-client, add Phase-0 harness + read-only pull
Lock the access-layer decision (REST only) and start implementation per SPEC. - monorepo (npm workspaces): packages/docmost-client = DocmostClient + lib/* copied 1:1 from docmost-mcp/src (backport target), plus bannered sync methods (listTrash, restorePage, listAllSpacePages, exportPageBody, listRecentSince / collectRecentSince cursor scan) - engine stays the root app per AGENTS.md (src/, test/, build/, data/, settings.ts); add roundtrip.ts (SPEC §11 idempotency harness), pull.ts (SPEC §6 read-only Docmost->FS mirror), sanitize.ts (SPEC §12 filenames, path-traversal-safe) - Dockerfile builds the workspace lib before the app; vitest gates CI - exportPageBody never touches /comments (SPEC §3); serializeDocmostMarkdownBody emits meta + body only - SPEC: resolve access-layer (REST), reflect root-engine layout + REST pagination - tests: sanitize (incl. dot-traversal), collectRecentSince (cutoff/dedup/cap), stripBlockIds, markdown round-trip byte-stability Note: raw ProseMirror round-trip is byte-stable in Markdown but not yet attribute- idempotent (SPEC §11 Задача №0, before Phase 2).
This commit is contained in:
137
src/pull.ts
Normal file
137
src/pull.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Read-only Docmost -> filesystem mirror (SPEC §6 pull, Phase 1).
|
||||
*
|
||||
* Walks the configured space's page tree and writes one self-contained `.md`
|
||||
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
|
||||
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
|
||||
* local files. The meta block inside each file carries pageId/slugId/
|
||||
* parentPageId (identity), so no external map file is needed.
|
||||
*
|
||||
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
||||
* be correct, but is not expected to be run without live access.
|
||||
*
|
||||
* Run via: npm run pull (-> node build/pull.js)
|
||||
*/
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { pathToFileURL } from "node:url";
|
||||
import { DocmostClient } from "docmost-client";
|
||||
import { loadSettings } from "./settings.js";
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
|
||||
/** Flat page node as returned by listAllSpacePages (no content). */
|
||||
interface PageNode {
|
||||
id: string;
|
||||
title?: string;
|
||||
slugId?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free folder/file name for a node among its
|
||||
* siblings. `usedBySibling` maps a parent key -> set of names already taken, so
|
||||
* two siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The name is COSMETIC; identity lives in the file's meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
const parentKey = node.parentPageId ?? "__root__";
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable slugId (fall back to the
|
||||
// pageId if no slugId is present).
|
||||
name = disambiguate(name, node.slugId ?? node.id);
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const s = loadSettings();
|
||||
const client = new DocmostClient(
|
||||
s.docmostApiUrl,
|
||||
s.docmostEmail,
|
||||
s.docmostPassword,
|
||||
);
|
||||
|
||||
const spaceId = s.docmostSpaceId;
|
||||
const vaultRoot = s.vaultPath;
|
||||
|
||||
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
|
||||
|
||||
// Index pages by id so the parent chain can be walked.
|
||||
const byId = new Map<string, PageNode>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id) byId.set(p.id, p);
|
||||
}
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling));
|
||||
}
|
||||
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node: PageNode): string[] => {
|
||||
const ancestors: string[] = [];
|
||||
const visited = new Set<string>();
|
||||
let current: PageNode | undefined = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(
|
||||
nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""),
|
||||
);
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
|
||||
let written = 0;
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id) continue;
|
||||
const segments = folderSegmentsFor(p);
|
||||
const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? "");
|
||||
const dir = join(vaultRoot, ...segments);
|
||||
await mkdir(dir, { recursive: true });
|
||||
|
||||
// Body + meta only (no comments block) — SPEC §3.
|
||||
const fileMd = await client.exportPageBody(p.id);
|
||||
await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8");
|
||||
written++;
|
||||
}
|
||||
|
||||
console.log(
|
||||
`pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
||||
// module is imported (e.g. by a unit test importing sanitizeTitle / path
|
||||
// helpers), so the import does not trigger loadSettings() + process.exit.
|
||||
const invokedDirectly =
|
||||
typeof process.argv[1] === "string" &&
|
||||
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||
|
||||
if (invokedDirectly) {
|
||||
main().catch((err) => {
|
||||
console.error("pull failed:", err instanceof Error ? err.stack : err);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user