refactor(pull): extract tested vault-layout module; harden pull; close review findings
Address the Increment-1 code review (3 warnings + suggestions). - layout: new pure src/layout.ts (buildVaultLayout) — page-tree -> vault paths, sibling + full-path collision disambiguation (sanitized ~slugId suffix), parent cycle guard; pull.ts is now a thin I/O loop - layout: resolve orphan/root collisions at the NAME stage so an orphan ancestor can't desync its children's folder segments (fixes review Major); covered by test - pull: per-page try/catch (one bad page no longer aborts the mirror), bounded concurrency (6), progress logging, process.exitCode=1 on partial mirror - security: filename disambiguation suffix now passes through sanitizeTitle - docs: AGENTS.md -> Increment 1 status/structure/run targets; pull.ts meta-block comment; collectRecentSince JSDoc (lexicographic UTC-ISO precondition) - tests: layout (9), markdown-document round-trip (no comments block, SPEC §3), firstDivergence; export firstDivergence. 49 tests green.
This commit is contained in:
14
AGENTS.md
14
AGENTS.md
@@ -8,9 +8,11 @@ A daemon that bidirectionally syncs Docmost articles with a local Markdown git
|
|||||||
vault (git is the state store). It reuses the sibling project **docmost-mcp** as
|
vault (git is the state store). It reuses the sibling project **docmost-mcp** as
|
||||||
a library (DocmostClient, ProseMirror ↔ Markdown converter, collab-write).
|
a library (DocmostClient, ProseMirror ↔ Markdown converter, collab-write).
|
||||||
|
|
||||||
**Status: scaffold only — the sync engine is NOT implemented yet.** `src/index.ts`
|
**Status: Increment 1.** `src/index.ts` is still a thin config-validating entry,
|
||||||
is a thin stub that validates config and exits. See `SPEC.md` for the full design
|
but the engine now has a working READ-ONLY `pull` (Docmost -> FS mirror) and a
|
||||||
and the phased plan before adding engine logic.
|
Phase-0 round-trip idempotency harness. Bidirectional sync, conflict handling,
|
||||||
|
and git are NOT implemented yet (see the SPEC phases). See `SPEC.md` for the full
|
||||||
|
design and the phased plan before adding engine logic.
|
||||||
|
|
||||||
## Project structure
|
## Project structure
|
||||||
|
|
||||||
@@ -27,6 +29,10 @@ build` builds the lib first, then compiles the app to `build/`.
|
|||||||
- `src/config-errors.ts` — `loadSettingsOrExit` turns a config error into a
|
- `src/config-errors.ts` — `loadSettingsOrExit` turns a config error into a
|
||||||
clear startup message that names the missing/invalid variable, then exits.
|
clear startup message that names the missing/invalid variable, then exits.
|
||||||
- `src/index.ts` — thin entry point.
|
- `src/index.ts` — thin entry point.
|
||||||
|
- `src/sanitize.ts` — filesystem-safe filename sanitization (SPEC §12).
|
||||||
|
- `src/layout.ts` — pure page-tree -> vault path mapping.
|
||||||
|
- `src/roundtrip.ts` — Phase-0 idempotency harness (SPEC §11).
|
||||||
|
- `src/pull.ts` — read-only Docmost -> FS mirror (SPEC §6).
|
||||||
- `test/` — vitest tests (`*.test.ts`).
|
- `test/` — vitest tests (`*.test.ts`).
|
||||||
- `data/` — all mutable runtime state (the git vault lives here). Gitignored;
|
- `data/` — all mutable runtime state (the git vault lives here). Gitignored;
|
||||||
mounted as a docker volume in production. Never put code/static assets here.
|
mounted as a docker volume in production. Never put code/static assets here.
|
||||||
@@ -46,6 +52,8 @@ Relative imports inside `src/` use the `.js` extension (NodeNext), e.g.
|
|||||||
- `make test` — run the test suite (vitest).
|
- `make test` — run the test suite (vitest).
|
||||||
- `make run` — build and run the app.
|
- `make run` — build and run the app.
|
||||||
- `make dev` — run in watch mode (tsx).
|
- `make dev` — run in watch mode (tsx).
|
||||||
|
- `make roundtrip` — run the offline round-trip idempotency harness.
|
||||||
|
- `make pull` — mirror the configured space into the vault (read-only).
|
||||||
|
|
||||||
`make` (or `make help`) lists all targets.
|
`make` (or `make help`) lists all targets.
|
||||||
|
|
||||||
|
|||||||
@@ -2704,6 +2704,11 @@ export class DocmostClient {
|
|||||||
* collecting items strictly newer than sinceIso and stopping at the first item
|
* collecting items strictly newer than sinceIso and stopping at the first item
|
||||||
* with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id
|
* with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id
|
||||||
* guards a server that ignores the cursor; hardPageCap bounds the walk.
|
* guards a server that ignores the cursor; hardPageCap bounds the walk.
|
||||||
|
*
|
||||||
|
* Precondition: `sinceIso` and each `item.updatedAt` MUST be the SAME UTC
|
||||||
|
* ISO-8601 format that Docmost emits, because the cutoff comparison is purely
|
||||||
|
* lexicographic (string `<=`); mixed formats or non-UTC offsets would compare
|
||||||
|
* incorrectly.
|
||||||
*/
|
*/
|
||||||
export async function collectRecentSince(
|
export async function collectRecentSince(
|
||||||
fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>,
|
fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>,
|
||||||
|
|||||||
177
src/layout.ts
Normal file
177
src/layout.ts
Normal file
@@ -0,0 +1,177 @@
|
|||||||
|
/**
|
||||||
|
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||||
|
*
|
||||||
|
* Given the flat list of page nodes for a space (as returned by
|
||||||
|
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||||
|
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||||
|
* page's own name, no extension). This module is intentionally PURE and
|
||||||
|
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||||
|
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||||
|
* lives in each file's meta block (pageId / slugId).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||||
|
|
||||||
|
/** Flat page node as returned by `listAllSpacePages` (no content). */
|
||||||
|
export interface PageNode {
|
||||||
|
id: string;
|
||||||
|
title?: string;
|
||||||
|
slugId?: string;
|
||||||
|
parentPageId?: string | null;
|
||||||
|
hasChildren?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A page's resolved vault destination: folder path + file stem. */
|
||||||
|
export interface VaultEntry {
|
||||||
|
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
|
||||||
|
segments: string[];
|
||||||
|
/** The page's own file name without extension. */
|
||||||
|
stem: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the full vault layout for a space.
|
||||||
|
*
|
||||||
|
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||||
|
* deterministic for a given input and guarantees every full destination path
|
||||||
|
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||||
|
* another.
|
||||||
|
*
|
||||||
|
* Disambiguation is layered:
|
||||||
|
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||||
|
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||||
|
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||||
|
* path separator).
|
||||||
|
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||||
|
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||||
|
* both bucket at the root with `segments: []`.
|
||||||
|
*/
|
||||||
|
export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||||
|
// Index pages by id so the parent chain can be walked. Guard against
|
||||||
|
// duplicate ids in the input (first one wins).
|
||||||
|
const byId = new Map<string, PageNode>();
|
||||||
|
for (const p of pages) {
|
||||||
|
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve each node's display name once, deterministically, tracking sibling
|
||||||
|
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||||
|
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||||
|
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||||
|
// an orphan whose parent is outside the input set) the node buckets at
|
||||||
|
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||||
|
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||||
|
// pages to be disambiguated against each other here — making `nameById` final
|
||||||
|
// before any `segments` are computed, so no ancestor name can drift later.
|
||||||
|
const usedBySibling = new Map<string, Set<string>>();
|
||||||
|
const nameById = new Map<string, string>();
|
||||||
|
for (const p of pages) {
|
||||||
|
if (p && p.id && !nameById.has(p.id)) {
|
||||||
|
const parentKey =
|
||||||
|
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||||
|
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Every id we index above MUST get a resolved name; this helper returns it
|
||||||
|
// and THROWS if it is somehow absent, rather than silently recomputing a
|
||||||
|
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
|
||||||
|
// its target file).
|
||||||
|
const nameOf = (id: string): string => {
|
||||||
|
const name = nameById.get(id);
|
||||||
|
if (name === undefined) {
|
||||||
|
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
|
||||||
|
}
|
||||||
|
return name;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Build the folder path for a page by walking parentPageId to the root. The
|
||||||
|
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||||
|
// guard prevents an infinite loop on a malformed parent cycle.
|
||||||
|
const folderSegmentsFor = (node: PageNode): string[] => {
|
||||||
|
const ancestors: string[] = [];
|
||||||
|
const visited = new Set<string>();
|
||||||
|
let current: PageNode | undefined = node.parentPageId
|
||||||
|
? byId.get(node.parentPageId)
|
||||||
|
: undefined;
|
||||||
|
while (current && current.id && !visited.has(current.id)) {
|
||||||
|
visited.add(current.id);
|
||||||
|
ancestors.unshift(nameOf(current.id));
|
||||||
|
current = current.parentPageId
|
||||||
|
? byId.get(current.parentPageId)
|
||||||
|
: undefined;
|
||||||
|
}
|
||||||
|
return ancestors;
|
||||||
|
};
|
||||||
|
|
||||||
|
// First pass: compute the provisional { segments, stem } for every node.
|
||||||
|
const layout = new Map<string, VaultEntry>();
|
||||||
|
for (const p of pages) {
|
||||||
|
if (!p || !p.id || layout.has(p.id)) continue;
|
||||||
|
layout.set(p.id, {
|
||||||
|
segments: folderSegmentsFor(p),
|
||||||
|
stem: nameOf(p.id),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
|
||||||
|
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
|
||||||
|
// above (orphans share the "__root__" bucket), so ancestor names are final
|
||||||
|
// before `segments` are built and this pass should rarely/never re-stem an
|
||||||
|
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
|
||||||
|
// slugId/id, then (if still colliding) appends the id.
|
||||||
|
const usedPaths = new Set<string>();
|
||||||
|
const seenIds = new Set<string>();
|
||||||
|
const pathKey = (e: VaultEntry): string => [...e.segments, e.stem].join("/");
|
||||||
|
for (const p of pages) {
|
||||||
|
if (!p || !p.id || seenIds.has(p.id)) continue;
|
||||||
|
seenIds.add(p.id);
|
||||||
|
const entry = layout.get(p.id);
|
||||||
|
if (!entry) continue;
|
||||||
|
|
||||||
|
if (usedPaths.has(pathKey(entry))) {
|
||||||
|
// First attempt: disambiguate the stem with the sanitized slugId (or id).
|
||||||
|
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
|
||||||
|
if (usedPaths.has(pathKey(entry))) {
|
||||||
|
// Still colliding: append the (sanitized) id as a last resort. The id
|
||||||
|
// is globally unique, so this always resolves the collision.
|
||||||
|
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
usedPaths.add(pathKey(entry));
|
||||||
|
}
|
||||||
|
|
||||||
|
return layout;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||||
|
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||||
|
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||||
|
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||||
|
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||||
|
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||||
|
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||||
|
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||||
|
* the meta block.
|
||||||
|
*/
|
||||||
|
function nameForNode(
|
||||||
|
node: PageNode,
|
||||||
|
parentKey: string,
|
||||||
|
usedBySibling: Map<string, Set<string>>,
|
||||||
|
): string {
|
||||||
|
let used = usedBySibling.get(parentKey);
|
||||||
|
if (!used) {
|
||||||
|
used = new Set<string>();
|
||||||
|
usedBySibling.set(parentKey, used);
|
||||||
|
}
|
||||||
|
|
||||||
|
let name = sanitizeTitle(node.title ?? "");
|
||||||
|
if (used.has(name)) {
|
||||||
|
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||||
|
// back to the sanitized pageId if no slugId is present).
|
||||||
|
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||||
|
}
|
||||||
|
used.add(name);
|
||||||
|
return name;
|
||||||
|
}
|
||||||
150
src/pull.ts
150
src/pull.ts
@@ -4,8 +4,12 @@
|
|||||||
* Walks the configured space's page tree and writes one self-contained `.md`
|
* Walks the configured space's page tree and writes one self-contained `.md`
|
||||||
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
|
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
|
||||||
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
|
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
|
||||||
* local files. The meta block inside each file carries pageId/slugId/
|
* local files. The meta block inside each file carries
|
||||||
* parentPageId (identity), so no external map file is needed.
|
* `{ version, pageId, slugId, title, spaceId, parentPageId }` (identity), so no
|
||||||
|
* external map file is needed.
|
||||||
|
*
|
||||||
|
* The pure tree -> path mapping lives in `./layout.js`; this file is a thin,
|
||||||
|
* fault-tolerant I/O loop around it.
|
||||||
*
|
*
|
||||||
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
||||||
* be correct, but is not expected to be run without live access.
|
* be correct, but is not expected to be run without live access.
|
||||||
@@ -17,43 +21,13 @@ import { join } from "node:path";
|
|||||||
import { pathToFileURL } from "node:url";
|
import { pathToFileURL } from "node:url";
|
||||||
import { DocmostClient } from "docmost-client";
|
import { DocmostClient } from "docmost-client";
|
||||||
import { loadSettings } from "./settings.js";
|
import { loadSettings } from "./settings.js";
|
||||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
import { buildVaultLayout, type PageNode } from "./layout.js";
|
||||||
|
|
||||||
/** Flat page node as returned by listAllSpacePages (no content). */
|
// Number of pages fetched/written concurrently. Bounded so a large space does
|
||||||
interface PageNode {
|
// not open thousands of simultaneous requests/file handles.
|
||||||
id: string;
|
const CONCURRENCY = 6;
|
||||||
title?: string;
|
// How often to log incremental progress (every N completed pages).
|
||||||
slugId?: string;
|
const PROGRESS_EVERY = 25;
|
||||||
parentPageId?: string | null;
|
|
||||||
hasChildren?: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute a deterministic, collision-free folder/file name for a node among its
|
|
||||||
* siblings. `usedBySibling` maps a parent key -> set of names already taken, so
|
|
||||||
* two siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
|
||||||
* (SPEC §12). The name is COSMETIC; identity lives in the file's meta block.
|
|
||||||
*/
|
|
||||||
function nameForNode(
|
|
||||||
node: PageNode,
|
|
||||||
usedBySibling: Map<string, Set<string>>,
|
|
||||||
): string {
|
|
||||||
const parentKey = node.parentPageId ?? "__root__";
|
|
||||||
let used = usedBySibling.get(parentKey);
|
|
||||||
if (!used) {
|
|
||||||
used = new Set<string>();
|
|
||||||
usedBySibling.set(parentKey, used);
|
|
||||||
}
|
|
||||||
|
|
||||||
let name = sanitizeTitle(node.title ?? "");
|
|
||||||
if (used.has(name)) {
|
|
||||||
// Sibling collision: disambiguate with the stable slugId (fall back to the
|
|
||||||
// pageId if no slugId is present).
|
|
||||||
name = disambiguate(name, node.slugId ?? node.id);
|
|
||||||
}
|
|
||||||
used.add(name);
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function main(): Promise<void> {
|
async function main(): Promise<void> {
|
||||||
const s = loadSettings();
|
const s = loadSettings();
|
||||||
@@ -67,64 +41,74 @@ async function main(): Promise<void> {
|
|||||||
const vaultRoot = s.vaultPath;
|
const vaultRoot = s.vaultPath;
|
||||||
|
|
||||||
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
|
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
|
||||||
// Index pages by id so the parent chain can be walked.
|
const total = pages.length;
|
||||||
const byId = new Map<string, PageNode>();
|
let written = 0;
|
||||||
for (const p of pages) {
|
let failed = 0;
|
||||||
if (p && p.id) byId.set(p.id, p);
|
let completed = 0;
|
||||||
}
|
let nextIndex = 0;
|
||||||
|
|
||||||
// Resolve each node's display name once, deterministically, tracking sibling
|
// Pull + write a single page. Each call is wrapped so one bad page (network
|
||||||
// collisions per parent.
|
// error, page deleted between the walk and the fetch, body conversion
|
||||||
const usedBySibling = new Map<string, Set<string>>();
|
// failure) NEVER aborts the whole pull — it is counted as a failure and the
|
||||||
const nameById = new Map<string, string>();
|
// pool moves on. Mirrors the deliberately fault-tolerant enumerateSpacePages.
|
||||||
for (const p of pages) {
|
const pullOne = async (page: PageNode): Promise<void> => {
|
||||||
if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling));
|
if (!page || !page.id) return;
|
||||||
}
|
const entry = layout.get(page.id);
|
||||||
|
if (!entry) return; // no layout entry (e.g. duplicate/skipped id)
|
||||||
// Build the folder path for a page by walking parentPageId to the root. The
|
try {
|
||||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
const dir = join(vaultRoot, ...entry.segments);
|
||||||
// guard prevents an infinite loop on a malformed parent cycle.
|
await mkdir(dir, { recursive: true });
|
||||||
const folderSegmentsFor = (node: PageNode): string[] => {
|
// Body + meta only (no comments block) — SPEC §3.
|
||||||
const ancestors: string[] = [];
|
const md = await client.exportPageBody(page.id);
|
||||||
const visited = new Set<string>();
|
await writeFile(join(dir, `${entry.stem}.md`), md, "utf8");
|
||||||
let current: PageNode | undefined = node.parentPageId
|
written++;
|
||||||
? byId.get(node.parentPageId)
|
} catch (err) {
|
||||||
: undefined;
|
failed++;
|
||||||
while (current && current.id && !visited.has(current.id)) {
|
console.error(
|
||||||
visited.add(current.id);
|
`pull: failed page ${page.id}:`,
|
||||||
ancestors.unshift(
|
err instanceof Error ? err.message : String(err),
|
||||||
nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""),
|
|
||||||
);
|
);
|
||||||
current = current.parentPageId
|
} finally {
|
||||||
? byId.get(current.parentPageId)
|
completed++;
|
||||||
: undefined;
|
if (completed % PROGRESS_EVERY === 0) {
|
||||||
|
console.log(`pulled ${completed}/${total}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ancestors;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let written = 0;
|
// A small dependency-free bounded-concurrency pool: a fixed set of runners
|
||||||
for (const p of pages) {
|
// each pull the next index until the list is exhausted.
|
||||||
if (!p || !p.id) continue;
|
const runner = async (): Promise<void> => {
|
||||||
const segments = folderSegmentsFor(p);
|
while (true) {
|
||||||
const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? "");
|
const i = nextIndex++;
|
||||||
const dir = join(vaultRoot, ...segments);
|
if (i >= pages.length) return;
|
||||||
await mkdir(dir, { recursive: true });
|
await pullOne(pages[i]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Body + meta only (no comments block) — SPEC §3.
|
const runners = Array.from(
|
||||||
const fileMd = await client.exportPageBody(p.id);
|
{ length: Math.min(CONCURRENCY, pages.length) },
|
||||||
await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8");
|
() => runner(),
|
||||||
written++;
|
);
|
||||||
}
|
await Promise.all(runners);
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`,
|
`pull complete: ${written} page(s) written, ${failed} failed, ` +
|
||||||
|
`out of ${total} from space ${spaceId} into ${vaultRoot}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Signal a partial mirror so callers/CI can react. Use process.exitCode (not
|
||||||
|
// a hard process.exit) so any buffered output is flushed cleanly.
|
||||||
|
if (failed > 0) {
|
||||||
|
process.exitCode = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
||||||
// module is imported (e.g. by a unit test importing sanitizeTitle / path
|
// module is imported (e.g. by a unit test), so the import does not trigger
|
||||||
// helpers), so the import does not trigger loadSettings() + process.exit.
|
// loadSettings() + process.exit.
|
||||||
const invokedDirectly =
|
const invokedDirectly =
|
||||||
typeof process.argv[1] === "string" &&
|
typeof process.argv[1] === "string" &&
|
||||||
import.meta.url === pathToFileURL(process.argv[1]).href;
|
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ async function loadDoc(args: ParsedArgs): Promise<any> {
|
|||||||
* Find the first divergence between two values via a recursive deep compare.
|
* Find the first divergence between two values via a recursive deep compare.
|
||||||
* Returns a short path + the two differing values, or null if they are equal.
|
* Returns a short path + the two differing values, or null if they are equal.
|
||||||
*/
|
*/
|
||||||
function firstDivergence(
|
export function firstDivergence(
|
||||||
a: any,
|
a: any,
|
||||||
b: any,
|
b: any,
|
||||||
path = "$",
|
path = "$",
|
||||||
|
|||||||
40
test/divergence.test.ts
Normal file
40
test/divergence.test.ts
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { firstDivergence } from '../src/roundtrip.js';
|
||||||
|
|
||||||
|
describe('firstDivergence', () => {
|
||||||
|
it('returns null for equal nested objects', () => {
|
||||||
|
const a = { k1: { k2: 1, k3: [1, 2, 3] }, n: 'x' };
|
||||||
|
const b = { k1: { k2: 1, k3: [1, 2, 3] }, n: 'x' };
|
||||||
|
expect(firstDivergence(a, b)).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports the correct path for a differing leaf', () => {
|
||||||
|
const a = { k1: { k2: 1 } };
|
||||||
|
const b = { k1: { k2: 2 } };
|
||||||
|
const d = firstDivergence(a, b);
|
||||||
|
expect(d).not.toBeNull();
|
||||||
|
expect(d!.path).toBe('$.k1.k2');
|
||||||
|
expect(d!.a).toBe(1);
|
||||||
|
expect(d!.b).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports an array length mismatch at $.arr.length', () => {
|
||||||
|
const a = { arr: [1, 2, 3] };
|
||||||
|
const b = { arr: [1, 2] };
|
||||||
|
const d = firstDivergence(a, b);
|
||||||
|
expect(d).not.toBeNull();
|
||||||
|
expect(d!.path).toBe('$.arr.length');
|
||||||
|
expect(d!.a).toBe(3);
|
||||||
|
expect(d!.b).toBe(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('reports a key present only in a', () => {
|
||||||
|
const a = { only: 'here', shared: 1 };
|
||||||
|
const b = { shared: 1 };
|
||||||
|
const d = firstDivergence(a, b);
|
||||||
|
expect(d).not.toBeNull();
|
||||||
|
expect(d!.path).toBe('$.only');
|
||||||
|
expect(d!.a).toBe('here');
|
||||||
|
expect(d!.b).toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
144
test/layout.test.ts
Normal file
144
test/layout.test.ts
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { buildVaultLayout, type PageNode } from '../src/layout.js';
|
||||||
|
|
||||||
|
describe('buildVaultLayout', () => {
|
||||||
|
it('disambiguates two siblings with the same sanitized title via ~slugId', () => {
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'p1', title: 'Notes', slugId: 'slug-a', parentPageId: null },
|
||||||
|
{ id: 'p2', title: 'Notes', slugId: 'slug-b', parentPageId: null },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
expect(layout.get('p1')).toEqual({ segments: [], stem: 'Notes' });
|
||||||
|
expect(layout.get('p2')).toEqual({ segments: [], stem: 'Notes ~slug-b' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to ~id when a colliding sibling has no slugId', () => {
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'p1', title: 'Notes', parentPageId: null },
|
||||||
|
{ id: 'p2', title: 'Notes', parentPageId: null },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
expect(layout.get('p1')?.stem).toBe('Notes');
|
||||||
|
expect(layout.get('p2')?.stem).toBe('Notes ~p2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT collide identical titles under DIFFERENT parents (distinct segments)', () => {
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'a', title: 'Alpha', parentPageId: null },
|
||||||
|
{ id: 'b', title: 'Beta', parentPageId: null },
|
||||||
|
{ id: 'a1', title: 'Notes', parentPageId: 'a' },
|
||||||
|
{ id: 'b1', title: 'Notes', parentPageId: 'b' },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
// Same stem, but different folder segments => no disambiguation needed.
|
||||||
|
expect(layout.get('a1')).toEqual({ segments: ['Alpha'], stem: 'Notes' });
|
||||||
|
expect(layout.get('b1')).toEqual({ segments: ['Beta'], stem: 'Notes' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('terminates on a 2-node parent cycle and yields a finite result', () => {
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'a', title: 'A', parentPageId: 'b' },
|
||||||
|
{ id: 'b', title: 'B', parentPageId: 'a' },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
// Both resolve to a finite path; the visited-guard breaks the cycle.
|
||||||
|
expect(layout.size).toBe(2);
|
||||||
|
const a = layout.get('a');
|
||||||
|
const b = layout.get('b');
|
||||||
|
expect(a).toBeDefined();
|
||||||
|
expect(b).toBeDefined();
|
||||||
|
// Each node's segment chain is bounded (no infinite walk).
|
||||||
|
expect(a!.segments.length).toBeLessThanOrEqual(2);
|
||||||
|
expect(b!.segments.length).toBeLessThanOrEqual(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('maps a root page (parentPageId null) to empty segments', () => {
|
||||||
|
const pages: PageNode[] = [{ id: 'root', title: 'Home', parentPageId: null }];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
expect(layout.get('root')).toEqual({ segments: [], stem: 'Home' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('emits ancestors in root->leaf order for a deep chain', () => {
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'g', title: 'Grand', parentPageId: null },
|
||||||
|
{ id: 'p', title: 'Parent', parentPageId: 'g' },
|
||||||
|
{ id: 'c', title: 'Child', parentPageId: 'p' },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
expect(layout.get('c')).toEqual({
|
||||||
|
segments: ['Grand', 'Parent'],
|
||||||
|
stem: 'Child',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('disambiguates two orphan-parent pages with the same title at the path level', () => {
|
||||||
|
// Both parents are OUTSIDE the input set, so both pages bucket at the root
|
||||||
|
// with segments: []. Sibling-scoping cannot see this (different parentKeys),
|
||||||
|
// so the final full-path pass must produce DISTINCT paths.
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'x', title: 'Orphan', slugId: 'sx', parentPageId: 'missing-1' },
|
||||||
|
{ id: 'y', title: 'Orphan', slugId: 'sy', parentPageId: 'missing-2' },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
const ex = layout.get('x')!;
|
||||||
|
const ey = layout.get('y')!;
|
||||||
|
const pathOf = (e: { segments: string[]; stem: string }) =>
|
||||||
|
[...e.segments, e.stem].join('/');
|
||||||
|
expect(pathOf(ex)).not.toBe(pathOf(ey));
|
||||||
|
// The first keeps the plain stem; the later one is re-stemmed.
|
||||||
|
expect(ex.stem).toBe('Orphan');
|
||||||
|
expect(ey.stem).toBe('Orphan ~sy');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('sanitizes a slugId containing a path separator before using it as a suffix', () => {
|
||||||
|
// A crafted slugId with "/" must NOT leak a path separator into the stem.
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'p1', title: 'Notes', slugId: 'a/b', parentPageId: null },
|
||||||
|
{ id: 'p2', title: 'Notes', slugId: 'c/d', parentPageId: null },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
const stem = layout.get('p2')!.stem;
|
||||||
|
expect(stem).not.toContain('/');
|
||||||
|
expect(stem).not.toContain('\\');
|
||||||
|
// The "/" was replaced by sanitizeTitle's dash substitution.
|
||||||
|
expect(stem).toBe('Notes ~c-d');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('disambiguates two ORPHAN ancestors at the NAME pass so their children stay in sync', () => {
|
||||||
|
// Two orphan PARENTS share the same title but live under DIFFERENT missing
|
||||||
|
// parents, so sibling-scoping by raw parentPageId would never compare them.
|
||||||
|
// Both bucket at the vault root, so they MUST be disambiguated in the name
|
||||||
|
// pass (sharing the "__root__" bucket) BEFORE any child folder segment is
|
||||||
|
// computed from the parent name — otherwise re-stemming a parent post-hoc
|
||||||
|
// would desync its child's folder from the parent file.
|
||||||
|
const pages: PageNode[] = [
|
||||||
|
{ id: 'p1', title: 'Dup', slugId: 's1', parentPageId: 'missing-1' },
|
||||||
|
{ id: 'p2', title: 'Dup', slugId: 's2', parentPageId: 'missing-2' },
|
||||||
|
{ id: 'c1', title: 'Child', parentPageId: 'p1' },
|
||||||
|
{ id: 'c2', title: 'Child', parentPageId: 'p2' },
|
||||||
|
];
|
||||||
|
const layout = buildVaultLayout(pages);
|
||||||
|
const p1 = layout.get('p1')!;
|
||||||
|
const p2 = layout.get('p2')!;
|
||||||
|
const c1 = layout.get('c1')!;
|
||||||
|
const c2 = layout.get('c2')!;
|
||||||
|
|
||||||
|
// The two orphan parents get DISTINCT stems, both at the root.
|
||||||
|
expect(p1.segments).toEqual([]);
|
||||||
|
expect(p2.segments).toEqual([]);
|
||||||
|
expect(p1.stem).toBe('Dup');
|
||||||
|
expect(p2.stem).toBe('Dup ~s2');
|
||||||
|
expect(p1.stem).not.toBe(p2.stem);
|
||||||
|
|
||||||
|
// Each child's folder segment EXACTLY equals its parent's resolved stem
|
||||||
|
// (no desync): the parent name is final before segments are built.
|
||||||
|
expect(c1.segments).toEqual([p1.stem]);
|
||||||
|
expect(c2.segments).toEqual([p2.stem]);
|
||||||
|
|
||||||
|
// All four full paths are unique.
|
||||||
|
const pathOf = (e: { segments: string[]; stem: string }) =>
|
||||||
|
[...e.segments, e.stem].join('/');
|
||||||
|
const paths = [p1, p2, c1, c2].map(pathOf);
|
||||||
|
expect(new Set(paths).size).toBe(paths.length);
|
||||||
|
});
|
||||||
|
});
|
||||||
66
test/markdown-document.test.ts
Normal file
66
test/markdown-document.test.ts
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
serializeDocmostMarkdownBody,
|
||||||
|
parseDocmostMarkdown,
|
||||||
|
type DocmostMdMeta,
|
||||||
|
} from 'docmost-client';
|
||||||
|
|
||||||
|
describe('serializeDocmostMarkdownBody round-trip (SPEC §3)', () => {
|
||||||
|
it('serialize -> parse preserves meta and the trimmed body, with no comments block', () => {
|
||||||
|
const meta: DocmostMdMeta = {
|
||||||
|
version: 1,
|
||||||
|
pageId: 'page-123',
|
||||||
|
slugId: 'slug-abc',
|
||||||
|
title: 'My Page',
|
||||||
|
spaceId: 'space-1',
|
||||||
|
parentPageId: 'parent-9',
|
||||||
|
};
|
||||||
|
const body = 'Hello\n\nWorld';
|
||||||
|
|
||||||
|
const file = serializeDocmostMarkdownBody(meta, body);
|
||||||
|
const parsed = parseDocmostMarkdown(file);
|
||||||
|
|
||||||
|
expect(parsed.meta).toEqual(meta);
|
||||||
|
expect(parsed.body).toBe(body);
|
||||||
|
// No trailing docmost:comments block was emitted (SPEC §3).
|
||||||
|
expect(parsed.comments).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves a null parentPageId for a root page', () => {
|
||||||
|
const meta: DocmostMdMeta = {
|
||||||
|
version: 1,
|
||||||
|
pageId: 'root-1',
|
||||||
|
slugId: 'root-slug',
|
||||||
|
title: 'Root',
|
||||||
|
spaceId: 'space-1',
|
||||||
|
parentPageId: null,
|
||||||
|
};
|
||||||
|
const file = serializeDocmostMarkdownBody(meta, 'body text');
|
||||||
|
const parsed = parseDocmostMarkdown(file);
|
||||||
|
expect(parsed.meta).toEqual(meta);
|
||||||
|
expect(parsed.comments).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces a parseable file for an empty/missing body', () => {
|
||||||
|
const meta: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
|
||||||
|
|
||||||
|
// Empty string body.
|
||||||
|
const emptyFile = serializeDocmostMarkdownBody(meta, '');
|
||||||
|
expect(() => parseDocmostMarkdown(emptyFile)).not.toThrow();
|
||||||
|
const parsedEmpty = parseDocmostMarkdown(emptyFile);
|
||||||
|
expect(parsedEmpty.meta).toEqual(meta);
|
||||||
|
expect(parsedEmpty.body).toBe('');
|
||||||
|
expect(parsedEmpty.comments).toBeNull();
|
||||||
|
|
||||||
|
// Missing body (undefined) — serializer coalesces to "".
|
||||||
|
const missingFile = serializeDocmostMarkdownBody(
|
||||||
|
meta,
|
||||||
|
undefined as unknown as string,
|
||||||
|
);
|
||||||
|
expect(() => parseDocmostMarkdown(missingFile)).not.toThrow();
|
||||||
|
const parsedMissing = parseDocmostMarkdown(missingFile);
|
||||||
|
expect(parsedMissing.meta).toEqual(meta);
|
||||||
|
expect(parsedMissing.body).toBe('');
|
||||||
|
expect(parsedMissing.comments).toBeNull();
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user