refactor(pull): extract tested vault-layout module; harden pull; close review findings
Address the Increment-1 code review (3 warnings + suggestions). - layout: new pure src/layout.ts (buildVaultLayout) — page-tree -> vault paths, sibling + full-path collision disambiguation (sanitized ~slugId suffix), parent cycle guard; pull.ts is now a thin I/O loop - layout: resolve orphan/root collisions at the NAME stage so an orphan ancestor can't desync its children's folder segments (fixes review Major); covered by test - pull: per-page try/catch (one bad page no longer aborts the mirror), bounded concurrency (6), progress logging, process.exitCode=1 on partial mirror - security: filename disambiguation suffix now passes through sanitizeTitle - docs: AGENTS.md -> Increment 1 status/structure/run targets; pull.ts meta-block comment; collectRecentSince JSDoc (lexicographic UTC-ISO precondition) - tests: layout (9), markdown-document round-trip (no comments block, SPEC §3), firstDivergence; export firstDivergence. 49 tests green.
This commit is contained in:
14
AGENTS.md
14
AGENTS.md
@@ -8,9 +8,11 @@ A daemon that bidirectionally syncs Docmost articles with a local Markdown git
|
||||
vault (git is the state store). It reuses the sibling project **docmost-mcp** as
|
||||
a library (DocmostClient, ProseMirror ↔ Markdown converter, collab-write).
|
||||
|
||||
**Status: scaffold only — the sync engine is NOT implemented yet.** `src/index.ts`
|
||||
is a thin stub that validates config and exits. See `SPEC.md` for the full design
|
||||
and the phased plan before adding engine logic.
|
||||
**Status: Increment 1.** `src/index.ts` is still a thin config-validating entry,
|
||||
but the engine now has a working READ-ONLY `pull` (Docmost -> FS mirror) and a
|
||||
Phase-0 round-trip idempotency harness. Bidirectional sync, conflict handling,
|
||||
and git are NOT implemented yet (see the SPEC phases). See `SPEC.md` for the full
|
||||
design and the phased plan before adding engine logic.
|
||||
|
||||
## Project structure
|
||||
|
||||
@@ -27,6 +29,10 @@ build` builds the lib first, then compiles the app to `build/`.
|
||||
- `src/config-errors.ts` — `loadSettingsOrExit` turns a config error into a
|
||||
clear startup message that names the missing/invalid variable, then exits.
|
||||
- `src/index.ts` — thin entry point.
|
||||
- `src/sanitize.ts` — filesystem-safe filename sanitization (SPEC §12).
|
||||
- `src/layout.ts` — pure page-tree -> vault path mapping.
|
||||
- `src/roundtrip.ts` — Phase-0 idempotency harness (SPEC §11).
|
||||
- `src/pull.ts` — read-only Docmost -> FS mirror (SPEC §6).
|
||||
- `test/` — vitest tests (`*.test.ts`).
|
||||
- `data/` — all mutable runtime state (the git vault lives here). Gitignored;
|
||||
mounted as a docker volume in production. Never put code/static assets here.
|
||||
@@ -46,6 +52,8 @@ Relative imports inside `src/` use the `.js` extension (NodeNext), e.g.
|
||||
- `make test` — run the test suite (vitest).
|
||||
- `make run` — build and run the app.
|
||||
- `make dev` — run in watch mode (tsx).
|
||||
- `make roundtrip` — run the offline round-trip idempotency harness.
|
||||
- `make pull` — mirror the configured space into the vault (read-only).
|
||||
|
||||
`make` (or `make help`) lists all targets.
|
||||
|
||||
|
||||
@@ -2704,6 +2704,11 @@ export class DocmostClient {
|
||||
* collecting items strictly newer than sinceIso and stopping at the first item
|
||||
* with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id
|
||||
* guards a server that ignores the cursor; hardPageCap bounds the walk.
|
||||
*
|
||||
* Precondition: `sinceIso` and each `item.updatedAt` MUST be the SAME UTC
|
||||
* ISO-8601 format that Docmost emits, because the cutoff comparison is purely
|
||||
* lexicographic (string `<=`); mixed formats or non-UTC offsets would compare
|
||||
* incorrectly.
|
||||
*/
|
||||
export async function collectRecentSince(
|
||||
fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>,
|
||||
|
||||
177
src/layout.ts
Normal file
177
src/layout.ts
Normal file
@@ -0,0 +1,177 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
|
||||
/** Flat page node as returned by `listAllSpacePages` (no content). */
|
||||
export interface PageNode {
|
||||
id: string;
|
||||
title?: string;
|
||||
slugId?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
}
|
||||
|
||||
/** A page's resolved vault destination: folder path + file stem. */
|
||||
export interface VaultEntry {
|
||||
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
|
||||
segments: string[];
|
||||
/** The page's own file name without extension. */
|
||||
stem: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
// Index pages by id so the parent chain can be walked. Guard against
|
||||
// duplicate ids in the input (first one wins).
|
||||
const byId = new Map<string, PageNode>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
|
||||
}
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey =
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
}
|
||||
}
|
||||
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
// and THROWS if it is somehow absent, rather than silently recomputing a
|
||||
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
|
||||
// its target file).
|
||||
const nameOf = (id: string): string => {
|
||||
const name = nameById.get(id);
|
||||
if (name === undefined) {
|
||||
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node: PageNode): string[] => {
|
||||
const ancestors: string[] = [];
|
||||
const visited = new Set<string>();
|
||||
let current: PageNode | undefined = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(nameOf(current.id));
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
|
||||
// First pass: compute the provisional { segments, stem } for every node.
|
||||
const layout = new Map<string, VaultEntry>();
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || layout.has(p.id)) continue;
|
||||
layout.set(p.id, {
|
||||
segments: folderSegmentsFor(p),
|
||||
stem: nameOf(p.id),
|
||||
});
|
||||
}
|
||||
|
||||
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
|
||||
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
|
||||
// above (orphans share the "__root__" bucket), so ancestor names are final
|
||||
// before `segments` are built and this pass should rarely/never re-stem an
|
||||
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
|
||||
// slugId/id, then (if still colliding) appends the id.
|
||||
const usedPaths = new Set<string>();
|
||||
const seenIds = new Set<string>();
|
||||
const pathKey = (e: VaultEntry): string => [...e.segments, e.stem].join("/");
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || seenIds.has(p.id)) continue;
|
||||
seenIds.add(p.id);
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry) continue;
|
||||
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// First attempt: disambiguate the stem with the sanitized slugId (or id).
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// Still colliding: append the (sanitized) id as a last resort. The id
|
||||
// is globally unique, so this always resolves the collision.
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
|
||||
}
|
||||
}
|
||||
usedPaths.add(pathKey(entry));
|
||||
}
|
||||
|
||||
return layout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
parentKey: string,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
150
src/pull.ts
150
src/pull.ts
@@ -4,8 +4,12 @@
|
||||
* Walks the configured space's page tree and writes one self-contained `.md`
|
||||
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
|
||||
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
|
||||
* local files. The meta block inside each file carries pageId/slugId/
|
||||
* parentPageId (identity), so no external map file is needed.
|
||||
* local files. The meta block inside each file carries
|
||||
* `{ version, pageId, slugId, title, spaceId, parentPageId }` (identity), so no
|
||||
* external map file is needed.
|
||||
*
|
||||
* The pure tree -> path mapping lives in `./layout.js`; this file is a thin,
|
||||
* fault-tolerant I/O loop around it.
|
||||
*
|
||||
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
||||
* be correct, but is not expected to be run without live access.
|
||||
@@ -17,43 +21,13 @@ import { join } from "node:path";
|
||||
import { pathToFileURL } from "node:url";
|
||||
import { DocmostClient } from "docmost-client";
|
||||
import { loadSettings } from "./settings.js";
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
import { buildVaultLayout, type PageNode } from "./layout.js";
|
||||
|
||||
/** Flat page node as returned by listAllSpacePages (no content). */
|
||||
interface PageNode {
|
||||
id: string;
|
||||
title?: string;
|
||||
slugId?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free folder/file name for a node among its
|
||||
* siblings. `usedBySibling` maps a parent key -> set of names already taken, so
|
||||
* two siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The name is COSMETIC; identity lives in the file's meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
const parentKey = node.parentPageId ?? "__root__";
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable slugId (fall back to the
|
||||
// pageId if no slugId is present).
|
||||
name = disambiguate(name, node.slugId ?? node.id);
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
// Number of pages fetched/written concurrently. Bounded so a large space does
|
||||
// not open thousands of simultaneous requests/file handles.
|
||||
const CONCURRENCY = 6;
|
||||
// How often to log incremental progress (every N completed pages).
|
||||
const PROGRESS_EVERY = 25;
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const s = loadSettings();
|
||||
@@ -67,64 +41,74 @@ async function main(): Promise<void> {
|
||||
const vaultRoot = s.vaultPath;
|
||||
|
||||
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
|
||||
const layout = buildVaultLayout(pages);
|
||||
|
||||
// Index pages by id so the parent chain can be walked.
|
||||
const byId = new Map<string, PageNode>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id) byId.set(p.id, p);
|
||||
}
|
||||
const total = pages.length;
|
||||
let written = 0;
|
||||
let failed = 0;
|
||||
let completed = 0;
|
||||
let nextIndex = 0;
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling));
|
||||
}
|
||||
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node: PageNode): string[] => {
|
||||
const ancestors: string[] = [];
|
||||
const visited = new Set<string>();
|
||||
let current: PageNode | undefined = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(
|
||||
nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""),
|
||||
// Pull + write a single page. Each call is wrapped so one bad page (network
|
||||
// error, page deleted between the walk and the fetch, body conversion
|
||||
// failure) NEVER aborts the whole pull — it is counted as a failure and the
|
||||
// pool moves on. Mirrors the deliberately fault-tolerant enumerateSpacePages.
|
||||
const pullOne = async (page: PageNode): Promise<void> => {
|
||||
if (!page || !page.id) return;
|
||||
const entry = layout.get(page.id);
|
||||
if (!entry) return; // no layout entry (e.g. duplicate/skipped id)
|
||||
try {
|
||||
const dir = join(vaultRoot, ...entry.segments);
|
||||
await mkdir(dir, { recursive: true });
|
||||
// Body + meta only (no comments block) — SPEC §3.
|
||||
const md = await client.exportPageBody(page.id);
|
||||
await writeFile(join(dir, `${entry.stem}.md`), md, "utf8");
|
||||
written++;
|
||||
} catch (err) {
|
||||
failed++;
|
||||
console.error(
|
||||
`pull: failed page ${page.id}:`,
|
||||
err instanceof Error ? err.message : String(err),
|
||||
);
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
} finally {
|
||||
completed++;
|
||||
if (completed % PROGRESS_EVERY === 0) {
|
||||
console.log(`pulled ${completed}/${total}`);
|
||||
}
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
|
||||
let written = 0;
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id) continue;
|
||||
const segments = folderSegmentsFor(p);
|
||||
const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? "");
|
||||
const dir = join(vaultRoot, ...segments);
|
||||
await mkdir(dir, { recursive: true });
|
||||
// A small dependency-free bounded-concurrency pool: a fixed set of runners
|
||||
// each pull the next index until the list is exhausted.
|
||||
const runner = async (): Promise<void> => {
|
||||
while (true) {
|
||||
const i = nextIndex++;
|
||||
if (i >= pages.length) return;
|
||||
await pullOne(pages[i]);
|
||||
}
|
||||
};
|
||||
|
||||
// Body + meta only (no comments block) — SPEC §3.
|
||||
const fileMd = await client.exportPageBody(p.id);
|
||||
await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8");
|
||||
written++;
|
||||
}
|
||||
const runners = Array.from(
|
||||
{ length: Math.min(CONCURRENCY, pages.length) },
|
||||
() => runner(),
|
||||
);
|
||||
await Promise.all(runners);
|
||||
|
||||
console.log(
|
||||
`pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`,
|
||||
`pull complete: ${written} page(s) written, ${failed} failed, ` +
|
||||
`out of ${total} from space ${spaceId} into ${vaultRoot}`,
|
||||
);
|
||||
|
||||
// Signal a partial mirror so callers/CI can react. Use process.exitCode (not
|
||||
// a hard process.exit) so any buffered output is flushed cleanly.
|
||||
if (failed > 0) {
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
||||
// module is imported (e.g. by a unit test importing sanitizeTitle / path
|
||||
// helpers), so the import does not trigger loadSettings() + process.exit.
|
||||
// module is imported (e.g. by a unit test), so the import does not trigger
|
||||
// loadSettings() + process.exit.
|
||||
const invokedDirectly =
|
||||
typeof process.argv[1] === "string" &&
|
||||
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||
|
||||
@@ -98,7 +98,7 @@ async function loadDoc(args: ParsedArgs): Promise<any> {
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
function firstDivergence(
|
||||
export function firstDivergence(
|
||||
a: any,
|
||||
b: any,
|
||||
path = "$",
|
||||
|
||||
40
test/divergence.test.ts
Normal file
40
test/divergence.test.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { firstDivergence } from '../src/roundtrip.js';
|
||||
|
||||
describe('firstDivergence', () => {
|
||||
it('returns null for equal nested objects', () => {
|
||||
const a = { k1: { k2: 1, k3: [1, 2, 3] }, n: 'x' };
|
||||
const b = { k1: { k2: 1, k3: [1, 2, 3] }, n: 'x' };
|
||||
expect(firstDivergence(a, b)).toBeNull();
|
||||
});
|
||||
|
||||
it('reports the correct path for a differing leaf', () => {
|
||||
const a = { k1: { k2: 1 } };
|
||||
const b = { k1: { k2: 2 } };
|
||||
const d = firstDivergence(a, b);
|
||||
expect(d).not.toBeNull();
|
||||
expect(d!.path).toBe('$.k1.k2');
|
||||
expect(d!.a).toBe(1);
|
||||
expect(d!.b).toBe(2);
|
||||
});
|
||||
|
||||
it('reports an array length mismatch at $.arr.length', () => {
|
||||
const a = { arr: [1, 2, 3] };
|
||||
const b = { arr: [1, 2] };
|
||||
const d = firstDivergence(a, b);
|
||||
expect(d).not.toBeNull();
|
||||
expect(d!.path).toBe('$.arr.length');
|
||||
expect(d!.a).toBe(3);
|
||||
expect(d!.b).toBe(2);
|
||||
});
|
||||
|
||||
it('reports a key present only in a', () => {
|
||||
const a = { only: 'here', shared: 1 };
|
||||
const b = { shared: 1 };
|
||||
const d = firstDivergence(a, b);
|
||||
expect(d).not.toBeNull();
|
||||
expect(d!.path).toBe('$.only');
|
||||
expect(d!.a).toBe('here');
|
||||
expect(d!.b).toBeUndefined();
|
||||
});
|
||||
});
|
||||
144
test/layout.test.ts
Normal file
144
test/layout.test.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { buildVaultLayout, type PageNode } from '../src/layout.js';
|
||||
|
||||
describe('buildVaultLayout', () => {
|
||||
it('disambiguates two siblings with the same sanitized title via ~slugId', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Notes', slugId: 'slug-a', parentPageId: null },
|
||||
{ id: 'p2', title: 'Notes', slugId: 'slug-b', parentPageId: null },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('p1')).toEqual({ segments: [], stem: 'Notes' });
|
||||
expect(layout.get('p2')).toEqual({ segments: [], stem: 'Notes ~slug-b' });
|
||||
});
|
||||
|
||||
it('falls back to ~id when a colliding sibling has no slugId', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Notes', parentPageId: null },
|
||||
{ id: 'p2', title: 'Notes', parentPageId: null },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('p1')?.stem).toBe('Notes');
|
||||
expect(layout.get('p2')?.stem).toBe('Notes ~p2');
|
||||
});
|
||||
|
||||
it('does NOT collide identical titles under DIFFERENT parents (distinct segments)', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'a', title: 'Alpha', parentPageId: null },
|
||||
{ id: 'b', title: 'Beta', parentPageId: null },
|
||||
{ id: 'a1', title: 'Notes', parentPageId: 'a' },
|
||||
{ id: 'b1', title: 'Notes', parentPageId: 'b' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
// Same stem, but different folder segments => no disambiguation needed.
|
||||
expect(layout.get('a1')).toEqual({ segments: ['Alpha'], stem: 'Notes' });
|
||||
expect(layout.get('b1')).toEqual({ segments: ['Beta'], stem: 'Notes' });
|
||||
});
|
||||
|
||||
it('terminates on a 2-node parent cycle and yields a finite result', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'a', title: 'A', parentPageId: 'b' },
|
||||
{ id: 'b', title: 'B', parentPageId: 'a' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
// Both resolve to a finite path; the visited-guard breaks the cycle.
|
||||
expect(layout.size).toBe(2);
|
||||
const a = layout.get('a');
|
||||
const b = layout.get('b');
|
||||
expect(a).toBeDefined();
|
||||
expect(b).toBeDefined();
|
||||
// Each node's segment chain is bounded (no infinite walk).
|
||||
expect(a!.segments.length).toBeLessThanOrEqual(2);
|
||||
expect(b!.segments.length).toBeLessThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('maps a root page (parentPageId null) to empty segments', () => {
|
||||
const pages: PageNode[] = [{ id: 'root', title: 'Home', parentPageId: null }];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('root')).toEqual({ segments: [], stem: 'Home' });
|
||||
});
|
||||
|
||||
it('emits ancestors in root->leaf order for a deep chain', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'g', title: 'Grand', parentPageId: null },
|
||||
{ id: 'p', title: 'Parent', parentPageId: 'g' },
|
||||
{ id: 'c', title: 'Child', parentPageId: 'p' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('c')).toEqual({
|
||||
segments: ['Grand', 'Parent'],
|
||||
stem: 'Child',
|
||||
});
|
||||
});
|
||||
|
||||
it('disambiguates two orphan-parent pages with the same title at the path level', () => {
|
||||
// Both parents are OUTSIDE the input set, so both pages bucket at the root
|
||||
// with segments: []. Sibling-scoping cannot see this (different parentKeys),
|
||||
// so the final full-path pass must produce DISTINCT paths.
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'x', title: 'Orphan', slugId: 'sx', parentPageId: 'missing-1' },
|
||||
{ id: 'y', title: 'Orphan', slugId: 'sy', parentPageId: 'missing-2' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
const ex = layout.get('x')!;
|
||||
const ey = layout.get('y')!;
|
||||
const pathOf = (e: { segments: string[]; stem: string }) =>
|
||||
[...e.segments, e.stem].join('/');
|
||||
expect(pathOf(ex)).not.toBe(pathOf(ey));
|
||||
// The first keeps the plain stem; the later one is re-stemmed.
|
||||
expect(ex.stem).toBe('Orphan');
|
||||
expect(ey.stem).toBe('Orphan ~sy');
|
||||
});
|
||||
|
||||
it('sanitizes a slugId containing a path separator before using it as a suffix', () => {
|
||||
// A crafted slugId with "/" must NOT leak a path separator into the stem.
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Notes', slugId: 'a/b', parentPageId: null },
|
||||
{ id: 'p2', title: 'Notes', slugId: 'c/d', parentPageId: null },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
const stem = layout.get('p2')!.stem;
|
||||
expect(stem).not.toContain('/');
|
||||
expect(stem).not.toContain('\\');
|
||||
// The "/" was replaced by sanitizeTitle's dash substitution.
|
||||
expect(stem).toBe('Notes ~c-d');
|
||||
});
|
||||
|
||||
it('disambiguates two ORPHAN ancestors at the NAME pass so their children stay in sync', () => {
|
||||
// Two orphan PARENTS share the same title but live under DIFFERENT missing
|
||||
// parents, so sibling-scoping by raw parentPageId would never compare them.
|
||||
// Both bucket at the vault root, so they MUST be disambiguated in the name
|
||||
// pass (sharing the "__root__" bucket) BEFORE any child folder segment is
|
||||
// computed from the parent name — otherwise re-stemming a parent post-hoc
|
||||
// would desync its child's folder from the parent file.
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Dup', slugId: 's1', parentPageId: 'missing-1' },
|
||||
{ id: 'p2', title: 'Dup', slugId: 's2', parentPageId: 'missing-2' },
|
||||
{ id: 'c1', title: 'Child', parentPageId: 'p1' },
|
||||
{ id: 'c2', title: 'Child', parentPageId: 'p2' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
const p1 = layout.get('p1')!;
|
||||
const p2 = layout.get('p2')!;
|
||||
const c1 = layout.get('c1')!;
|
||||
const c2 = layout.get('c2')!;
|
||||
|
||||
// The two orphan parents get DISTINCT stems, both at the root.
|
||||
expect(p1.segments).toEqual([]);
|
||||
expect(p2.segments).toEqual([]);
|
||||
expect(p1.stem).toBe('Dup');
|
||||
expect(p2.stem).toBe('Dup ~s2');
|
||||
expect(p1.stem).not.toBe(p2.stem);
|
||||
|
||||
// Each child's folder segment EXACTLY equals its parent's resolved stem
|
||||
// (no desync): the parent name is final before segments are built.
|
||||
expect(c1.segments).toEqual([p1.stem]);
|
||||
expect(c2.segments).toEqual([p2.stem]);
|
||||
|
||||
// All four full paths are unique.
|
||||
const pathOf = (e: { segments: string[]; stem: string }) =>
|
||||
[...e.segments, e.stem].join('/');
|
||||
const paths = [p1, p2, c1, c2].map(pathOf);
|
||||
expect(new Set(paths).size).toBe(paths.length);
|
||||
});
|
||||
});
|
||||
66
test/markdown-document.test.ts
Normal file
66
test/markdown-document.test.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
serializeDocmostMarkdownBody,
|
||||
parseDocmostMarkdown,
|
||||
type DocmostMdMeta,
|
||||
} from 'docmost-client';
|
||||
|
||||
describe('serializeDocmostMarkdownBody round-trip (SPEC §3)', () => {
|
||||
it('serialize -> parse preserves meta and the trimmed body, with no comments block', () => {
|
||||
const meta: DocmostMdMeta = {
|
||||
version: 1,
|
||||
pageId: 'page-123',
|
||||
slugId: 'slug-abc',
|
||||
title: 'My Page',
|
||||
spaceId: 'space-1',
|
||||
parentPageId: 'parent-9',
|
||||
};
|
||||
const body = 'Hello\n\nWorld';
|
||||
|
||||
const file = serializeDocmostMarkdownBody(meta, body);
|
||||
const parsed = parseDocmostMarkdown(file);
|
||||
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.body).toBe(body);
|
||||
// No trailing docmost:comments block was emitted (SPEC §3).
|
||||
expect(parsed.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('preserves a null parentPageId for a root page', () => {
|
||||
const meta: DocmostMdMeta = {
|
||||
version: 1,
|
||||
pageId: 'root-1',
|
||||
slugId: 'root-slug',
|
||||
title: 'Root',
|
||||
spaceId: 'space-1',
|
||||
parentPageId: null,
|
||||
};
|
||||
const file = serializeDocmostMarkdownBody(meta, 'body text');
|
||||
const parsed = parseDocmostMarkdown(file);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('produces a parseable file for an empty/missing body', () => {
|
||||
const meta: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
|
||||
|
||||
// Empty string body.
|
||||
const emptyFile = serializeDocmostMarkdownBody(meta, '');
|
||||
expect(() => parseDocmostMarkdown(emptyFile)).not.toThrow();
|
||||
const parsedEmpty = parseDocmostMarkdown(emptyFile);
|
||||
expect(parsedEmpty.meta).toEqual(meta);
|
||||
expect(parsedEmpty.body).toBe('');
|
||||
expect(parsedEmpty.comments).toBeNull();
|
||||
|
||||
// Missing body (undefined) — serializer coalesces to "".
|
||||
const missingFile = serializeDocmostMarkdownBody(
|
||||
meta,
|
||||
undefined as unknown as string,
|
||||
);
|
||||
expect(() => parseDocmostMarkdown(missingFile)).not.toThrow();
|
||||
const parsedMissing = parseDocmostMarkdown(missingFile);
|
||||
expect(parsedMissing.meta).toEqual(meta);
|
||||
expect(parsedMissing.body).toBe('');
|
||||
expect(parsedMissing.comments).toBeNull();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user