refactor(pull): extract tested vault-layout module; harden pull; close review findings

Address the Increment-1 code review (3 warnings + suggestions).

- layout: new pure src/layout.ts (buildVaultLayout) — page-tree -> vault paths,
  sibling + full-path collision disambiguation (sanitized ~slugId suffix), parent
  cycle guard; pull.ts is now a thin I/O loop
- layout: resolve orphan/root collisions at the NAME stage so an orphan ancestor
  can't desync its children's folder segments (fixes review Major); covered by test
- pull: per-page try/catch (one bad page no longer aborts the mirror), bounded
  concurrency (6), progress logging, process.exitCode=1 on partial mirror
- security: filename disambiguation suffix now passes through sanitizeTitle
- docs: AGENTS.md -> Increment 1 status/structure/run targets; pull.ts meta-block
  comment; collectRecentSince JSDoc (lexicographic UTC-ISO precondition)
- tests: layout (9), markdown-document round-trip (no comments block, SPEC §3),
  firstDivergence; export firstDivergence. 49 tests green.
This commit is contained in:
vvzvlad
2026-06-16 21:09:40 +03:00
parent 447d2508ae
commit c6edd73324
8 changed files with 511 additions and 87 deletions

View File

@@ -8,9 +8,11 @@ A daemon that bidirectionally syncs Docmost articles with a local Markdown git
vault (git is the state store). It reuses the sibling project **docmost-mcp** as vault (git is the state store). It reuses the sibling project **docmost-mcp** as
a library (DocmostClient, ProseMirror ↔ Markdown converter, collab-write). a library (DocmostClient, ProseMirror ↔ Markdown converter, collab-write).
**Status: scaffold only — the sync engine is NOT implemented yet.** `src/index.ts` **Status: Increment 1.** `src/index.ts` is still a thin config-validating entry,
is a thin stub that validates config and exits. See `SPEC.md` for the full design but the engine now has a working READ-ONLY `pull` (Docmost -> FS mirror) and a
and the phased plan before adding engine logic. Phase-0 round-trip idempotency harness. Bidirectional sync, conflict handling,
and git are NOT implemented yet (see the SPEC phases). See `SPEC.md` for the full
design and the phased plan before adding engine logic.
## Project structure ## Project structure
@@ -27,6 +29,10 @@ build` builds the lib first, then compiles the app to `build/`.
- `src/config-errors.ts``loadSettingsOrExit` turns a config error into a - `src/config-errors.ts``loadSettingsOrExit` turns a config error into a
clear startup message that names the missing/invalid variable, then exits. clear startup message that names the missing/invalid variable, then exits.
- `src/index.ts` — thin entry point. - `src/index.ts` — thin entry point.
- `src/sanitize.ts` — filesystem-safe filename sanitization (SPEC §12).
- `src/layout.ts` — pure page-tree -> vault path mapping.
- `src/roundtrip.ts` — Phase-0 idempotency harness (SPEC §11).
- `src/pull.ts` — read-only Docmost -> FS mirror (SPEC §6).
- `test/` — vitest tests (`*.test.ts`). - `test/` — vitest tests (`*.test.ts`).
- `data/` — all mutable runtime state (the git vault lives here). Gitignored; - `data/` — all mutable runtime state (the git vault lives here). Gitignored;
mounted as a docker volume in production. Never put code/static assets here. mounted as a docker volume in production. Never put code/static assets here.
@@ -46,6 +52,8 @@ Relative imports inside `src/` use the `.js` extension (NodeNext), e.g.
- `make test` — run the test suite (vitest). - `make test` — run the test suite (vitest).
- `make run` — build and run the app. - `make run` — build and run the app.
- `make dev` — run in watch mode (tsx). - `make dev` — run in watch mode (tsx).
- `make roundtrip` — run the offline round-trip idempotency harness.
- `make pull` — mirror the configured space into the vault (read-only).
`make` (or `make help`) lists all targets. `make` (or `make help`) lists all targets.

View File

@@ -2704,6 +2704,11 @@ export class DocmostClient {
* collecting items strictly newer than sinceIso and stopping at the first item * collecting items strictly newer than sinceIso and stopping at the first item
* with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id * with updatedAt <= sinceIso. `fetchPage(cursor)` returns one page; dedup by id
* guards a server that ignores the cursor; hardPageCap bounds the walk. * guards a server that ignores the cursor; hardPageCap bounds the walk.
*
* Precondition: `sinceIso` and each `item.updatedAt` MUST be the SAME UTC
* ISO-8601 format that Docmost emits, because the cutoff comparison is purely
* lexicographic (string `<=`); mixed formats or non-UTC offsets would compare
* incorrectly.
*/ */
export async function collectRecentSince( export async function collectRecentSince(
fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>, fetchPage: (cursor: string | null) => Promise<{ items: any[]; nextCursor: string | null }>,

177
src/layout.ts Normal file
View File

@@ -0,0 +1,177 @@
/**
* Pure page-tree -> vault path mapping (SPEC §12).
*
* Given the flat list of page nodes for a space (as returned by
* `listAllSpacePages`), compute for every page a deterministic, collision-free
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
* page's own name, no extension). This module is intentionally PURE and
* dependency-free apart from the sanitization helpers, so the whole tree ->
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
* lives in each file's meta block (pageId / slugId).
*/
import { sanitizeTitle, disambiguate } from "./sanitize.js";
/** Flat page node as returned by `listAllSpacePages` (no content). */
export interface PageNode {
id: string;
title?: string;
slugId?: string;
parentPageId?: string | null;
hasChildren?: boolean;
}
/** A page's resolved vault destination: folder path + file stem. */
export interface VaultEntry {
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
segments: string[];
/** The page's own file name without extension. */
stem: string;
}
/**
* Build the full vault layout for a space.
*
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
* deterministic for a given input and guarantees every full destination path
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
* another.
*
* Disambiguation is layered:
* 1. Sibling collisions (same sanitized title under the same parent) are
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
* sanitized, since slugId/id is untrusted data that must never inject a
* path separator).
* 2. A final full-path pass catches residual collisions that sibling-scoping
* cannot see — e.g. two pages whose parents are BOTH outside the input set
* both bucket at the root with `segments: []`.
*/
export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
// Index pages by id so the parent chain can be walked. Guard against
// duplicate ids in the input (first one wins).
const byId = new Map<string, PageNode>();
for (const p of pages) {
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
}
// Resolve each node's display name once, deterministically, tracking sibling
// collisions per parent. `usedBySibling` maps a parent key -> set of names
// already taken under that parent. The bucket key is the node's parent ONLY
// when that parent is actually present in `byId`; otherwise (null parent, or
// an orphan whose parent is outside the input set) the node buckets at
// `"__root__"`. This is critical: orphans land at the vault root (see
// `folderSegmentsFor`), so they MUST share the root bucket with real root
// pages to be disambiguated against each other here — making `nameById` final
// before any `segments` are computed, so no ancestor name can drift later.
const usedBySibling = new Map<string, Set<string>>();
const nameById = new Map<string, string>();
for (const p of pages) {
if (p && p.id && !nameById.has(p.id)) {
const parentKey =
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
}
}
// Every id we index above MUST get a resolved name; this helper returns it
// and THROWS if it is somehow absent, rather than silently recomputing a
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
// its target file).
const nameOf = (id: string): string => {
const name = nameById.get(id);
if (name === undefined) {
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
}
return name;
};
// Build the folder path for a page by walking parentPageId to the root. The
// page's OWN name is the file stem; its ancestors become folders. A `visited`
// guard prevents an infinite loop on a malformed parent cycle.
const folderSegmentsFor = (node: PageNode): string[] => {
const ancestors: string[] = [];
const visited = new Set<string>();
let current: PageNode | undefined = node.parentPageId
? byId.get(node.parentPageId)
: undefined;
while (current && current.id && !visited.has(current.id)) {
visited.add(current.id);
ancestors.unshift(nameOf(current.id));
current = current.parentPageId
? byId.get(current.parentPageId)
: undefined;
}
return ancestors;
};
// First pass: compute the provisional { segments, stem } for every node.
const layout = new Map<string, VaultEntry>();
for (const p of pages) {
if (!p || !p.id || layout.has(p.id)) continue;
layout.set(p.id, {
segments: folderSegmentsFor(p),
stem: nameOf(p.id),
});
}
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
// above (orphans share the "__root__" bucket), so ancestor names are final
// before `segments` are built and this pass should rarely/never re-stem an
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
// slugId/id, then (if still colliding) appends the id.
const usedPaths = new Set<string>();
const seenIds = new Set<string>();
const pathKey = (e: VaultEntry): string => [...e.segments, e.stem].join("/");
for (const p of pages) {
if (!p || !p.id || seenIds.has(p.id)) continue;
seenIds.add(p.id);
const entry = layout.get(p.id);
if (!entry) continue;
if (usedPaths.has(pathKey(entry))) {
// First attempt: disambiguate the stem with the sanitized slugId (or id).
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
if (usedPaths.has(pathKey(entry))) {
// Still colliding: append the (sanitized) id as a last resort. The id
// is globally unique, so this always resolves the collision.
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
}
}
usedPaths.add(pathKey(entry));
}
return layout;
}
/**
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
* `usedBySibling` maps a parent key -> set of names already taken, so two
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
* slugId/id is a second untrusted-data channel that must never leak a path
* separator into the name. `parentKey` is supplied by the caller (it resolves
* to `"__root__"` for root pages AND for orphans whose parent is outside the
* input set, so they share one bucket). The name is COSMETIC; identity lives in
* the meta block.
*/
function nameForNode(
node: PageNode,
parentKey: string,
usedBySibling: Map<string, Set<string>>,
): string {
let used = usedBySibling.get(parentKey);
if (!used) {
used = new Set<string>();
usedBySibling.set(parentKey, used);
}
let name = sanitizeTitle(node.title ?? "");
if (used.has(name)) {
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
// back to the sanitized pageId if no slugId is present).
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
}
used.add(name);
return name;
}

View File

@@ -4,8 +4,12 @@
* Walks the configured space's page tree and writes one self-contained `.md` * Walks the configured space's page tree and writes one self-contained `.md`
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is * per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes * READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
* local files. The meta block inside each file carries pageId/slugId/ * local files. The meta block inside each file carries
* parentPageId (identity), so no external map file is needed. * `{ version, pageId, slugId, title, spaceId, parentPageId }` (identity), so no
* external map file is needed.
*
* The pure tree -> path mapping lives in `./layout.js`; this file is a thin,
* fault-tolerant I/O loop around it.
* *
* Requires a `.env` with real Docmost credentials. This file must COMPILE and * Requires a `.env` with real Docmost credentials. This file must COMPILE and
* be correct, but is not expected to be run without live access. * be correct, but is not expected to be run without live access.
@@ -17,43 +21,13 @@ import { join } from "node:path";
import { pathToFileURL } from "node:url"; import { pathToFileURL } from "node:url";
import { DocmostClient } from "docmost-client"; import { DocmostClient } from "docmost-client";
import { loadSettings } from "./settings.js"; import { loadSettings } from "./settings.js";
import { sanitizeTitle, disambiguate } from "./sanitize.js"; import { buildVaultLayout, type PageNode } from "./layout.js";
/** Flat page node as returned by listAllSpacePages (no content). */ // Number of pages fetched/written concurrently. Bounded so a large space does
interface PageNode { // not open thousands of simultaneous requests/file handles.
id: string; const CONCURRENCY = 6;
title?: string; // How often to log incremental progress (every N completed pages).
slugId?: string; const PROGRESS_EVERY = 25;
parentPageId?: string | null;
hasChildren?: boolean;
}
/**
* Compute a deterministic, collision-free folder/file name for a node among its
* siblings. `usedBySibling` maps a parent key -> set of names already taken, so
* two siblings that sanitize to the same name get a stable ` ~slugId` suffix
* (SPEC §12). The name is COSMETIC; identity lives in the file's meta block.
*/
function nameForNode(
node: PageNode,
usedBySibling: Map<string, Set<string>>,
): string {
const parentKey = node.parentPageId ?? "__root__";
let used = usedBySibling.get(parentKey);
if (!used) {
used = new Set<string>();
usedBySibling.set(parentKey, used);
}
let name = sanitizeTitle(node.title ?? "");
if (used.has(name)) {
// Sibling collision: disambiguate with the stable slugId (fall back to the
// pageId if no slugId is present).
name = disambiguate(name, node.slugId ?? node.id);
}
used.add(name);
return name;
}
async function main(): Promise<void> { async function main(): Promise<void> {
const s = loadSettings(); const s = loadSettings();
@@ -67,64 +41,74 @@ async function main(): Promise<void> {
const vaultRoot = s.vaultPath; const vaultRoot = s.vaultPath;
const pages: PageNode[] = await client.listAllSpacePages(spaceId); const pages: PageNode[] = await client.listAllSpacePages(spaceId);
const layout = buildVaultLayout(pages);
// Index pages by id so the parent chain can be walked. const total = pages.length;
const byId = new Map<string, PageNode>(); let written = 0;
for (const p of pages) { let failed = 0;
if (p && p.id) byId.set(p.id, p); let completed = 0;
} let nextIndex = 0;
// Resolve each node's display name once, deterministically, tracking sibling // Pull + write a single page. Each call is wrapped so one bad page (network
// collisions per parent. // error, page deleted between the walk and the fetch, body conversion
const usedBySibling = new Map<string, Set<string>>(); // failure) NEVER aborts the whole pull — it is counted as a failure and the
const nameById = new Map<string, string>(); // pool moves on. Mirrors the deliberately fault-tolerant enumerateSpacePages.
for (const p of pages) { const pullOne = async (page: PageNode): Promise<void> => {
if (p && p.id) nameById.set(p.id, nameForNode(p, usedBySibling)); if (!page || !page.id) return;
} const entry = layout.get(page.id);
if (!entry) return; // no layout entry (e.g. duplicate/skipped id)
// Build the folder path for a page by walking parentPageId to the root. The try {
// page's OWN name is the file stem; its ancestors become folders. A `visited` const dir = join(vaultRoot, ...entry.segments);
// guard prevents an infinite loop on a malformed parent cycle. await mkdir(dir, { recursive: true });
const folderSegmentsFor = (node: PageNode): string[] => { // Body + meta only (no comments block) — SPEC §3.
const ancestors: string[] = []; const md = await client.exportPageBody(page.id);
const visited = new Set<string>(); await writeFile(join(dir, `${entry.stem}.md`), md, "utf8");
let current: PageNode | undefined = node.parentPageId written++;
? byId.get(node.parentPageId) } catch (err) {
: undefined; failed++;
while (current && current.id && !visited.has(current.id)) { console.error(
visited.add(current.id); `pull: failed page ${page.id}:`,
ancestors.unshift( err instanceof Error ? err.message : String(err),
nameById.get(current.id) ?? sanitizeTitle(current.title ?? ""),
); );
current = current.parentPageId } finally {
? byId.get(current.parentPageId) completed++;
: undefined; if (completed % PROGRESS_EVERY === 0) {
console.log(`pulled ${completed}/${total}`);
}
} }
return ancestors;
}; };
let written = 0; // A small dependency-free bounded-concurrency pool: a fixed set of runners
for (const p of pages) { // each pull the next index until the list is exhausted.
if (!p || !p.id) continue; const runner = async (): Promise<void> => {
const segments = folderSegmentsFor(p); while (true) {
const fileStem = nameById.get(p.id) ?? sanitizeTitle(p.title ?? ""); const i = nextIndex++;
const dir = join(vaultRoot, ...segments); if (i >= pages.length) return;
await mkdir(dir, { recursive: true }); await pullOne(pages[i]);
}
};
// Body + meta only (no comments block) — SPEC §3. const runners = Array.from(
const fileMd = await client.exportPageBody(p.id); { length: Math.min(CONCURRENCY, pages.length) },
await writeFile(join(dir, `${fileStem}.md`), fileMd, "utf8"); () => runner(),
written++; );
} await Promise.all(runners);
console.log( console.log(
`pull complete: ${written} page(s) from space ${spaceId} into ${vaultRoot}`, `pull complete: ${written} page(s) written, ${failed} failed, ` +
`out of ${total} from space ${spaceId} into ${vaultRoot}`,
); );
// Signal a partial mirror so callers/CI can react. Use process.exitCode (not
// a hard process.exit) so any buffered output is flushed cleanly.
if (failed > 0) {
process.exitCode = 1;
}
} }
// Only auto-run when invoked directly as the CLI entrypoint, not when this // Only auto-run when invoked directly as the CLI entrypoint, not when this
// module is imported (e.g. by a unit test importing sanitizeTitle / path // module is imported (e.g. by a unit test), so the import does not trigger
// helpers), so the import does not trigger loadSettings() + process.exit. // loadSettings() + process.exit.
const invokedDirectly = const invokedDirectly =
typeof process.argv[1] === "string" && typeof process.argv[1] === "string" &&
import.meta.url === pathToFileURL(process.argv[1]).href; import.meta.url === pathToFileURL(process.argv[1]).href;

View File

@@ -98,7 +98,7 @@ async function loadDoc(args: ParsedArgs): Promise<any> {
* Find the first divergence between two values via a recursive deep compare. * Find the first divergence between two values via a recursive deep compare.
* Returns a short path + the two differing values, or null if they are equal. * Returns a short path + the two differing values, or null if they are equal.
*/ */
function firstDivergence( export function firstDivergence(
a: any, a: any,
b: any, b: any,
path = "$", path = "$",

40
test/divergence.test.ts Normal file
View File

@@ -0,0 +1,40 @@
import { describe, expect, it } from 'vitest';
import { firstDivergence } from '../src/roundtrip.js';
describe('firstDivergence', () => {
it('returns null for equal nested objects', () => {
const a = { k1: { k2: 1, k3: [1, 2, 3] }, n: 'x' };
const b = { k1: { k2: 1, k3: [1, 2, 3] }, n: 'x' };
expect(firstDivergence(a, b)).toBeNull();
});
it('reports the correct path for a differing leaf', () => {
const a = { k1: { k2: 1 } };
const b = { k1: { k2: 2 } };
const d = firstDivergence(a, b);
expect(d).not.toBeNull();
expect(d!.path).toBe('$.k1.k2');
expect(d!.a).toBe(1);
expect(d!.b).toBe(2);
});
it('reports an array length mismatch at $.arr.length', () => {
const a = { arr: [1, 2, 3] };
const b = { arr: [1, 2] };
const d = firstDivergence(a, b);
expect(d).not.toBeNull();
expect(d!.path).toBe('$.arr.length');
expect(d!.a).toBe(3);
expect(d!.b).toBe(2);
});
it('reports a key present only in a', () => {
const a = { only: 'here', shared: 1 };
const b = { shared: 1 };
const d = firstDivergence(a, b);
expect(d).not.toBeNull();
expect(d!.path).toBe('$.only');
expect(d!.a).toBe('here');
expect(d!.b).toBeUndefined();
});
});

144
test/layout.test.ts Normal file
View File

@@ -0,0 +1,144 @@
import { describe, expect, it } from 'vitest';
import { buildVaultLayout, type PageNode } from '../src/layout.js';
describe('buildVaultLayout', () => {
it('disambiguates two siblings with the same sanitized title via ~slugId', () => {
const pages: PageNode[] = [
{ id: 'p1', title: 'Notes', slugId: 'slug-a', parentPageId: null },
{ id: 'p2', title: 'Notes', slugId: 'slug-b', parentPageId: null },
];
const layout = buildVaultLayout(pages);
expect(layout.get('p1')).toEqual({ segments: [], stem: 'Notes' });
expect(layout.get('p2')).toEqual({ segments: [], stem: 'Notes ~slug-b' });
});
it('falls back to ~id when a colliding sibling has no slugId', () => {
const pages: PageNode[] = [
{ id: 'p1', title: 'Notes', parentPageId: null },
{ id: 'p2', title: 'Notes', parentPageId: null },
];
const layout = buildVaultLayout(pages);
expect(layout.get('p1')?.stem).toBe('Notes');
expect(layout.get('p2')?.stem).toBe('Notes ~p2');
});
it('does NOT collide identical titles under DIFFERENT parents (distinct segments)', () => {
const pages: PageNode[] = [
{ id: 'a', title: 'Alpha', parentPageId: null },
{ id: 'b', title: 'Beta', parentPageId: null },
{ id: 'a1', title: 'Notes', parentPageId: 'a' },
{ id: 'b1', title: 'Notes', parentPageId: 'b' },
];
const layout = buildVaultLayout(pages);
// Same stem, but different folder segments => no disambiguation needed.
expect(layout.get('a1')).toEqual({ segments: ['Alpha'], stem: 'Notes' });
expect(layout.get('b1')).toEqual({ segments: ['Beta'], stem: 'Notes' });
});
it('terminates on a 2-node parent cycle and yields a finite result', () => {
const pages: PageNode[] = [
{ id: 'a', title: 'A', parentPageId: 'b' },
{ id: 'b', title: 'B', parentPageId: 'a' },
];
const layout = buildVaultLayout(pages);
// Both resolve to a finite path; the visited-guard breaks the cycle.
expect(layout.size).toBe(2);
const a = layout.get('a');
const b = layout.get('b');
expect(a).toBeDefined();
expect(b).toBeDefined();
// Each node's segment chain is bounded (no infinite walk).
expect(a!.segments.length).toBeLessThanOrEqual(2);
expect(b!.segments.length).toBeLessThanOrEqual(2);
});
it('maps a root page (parentPageId null) to empty segments', () => {
const pages: PageNode[] = [{ id: 'root', title: 'Home', parentPageId: null }];
const layout = buildVaultLayout(pages);
expect(layout.get('root')).toEqual({ segments: [], stem: 'Home' });
});
it('emits ancestors in root->leaf order for a deep chain', () => {
const pages: PageNode[] = [
{ id: 'g', title: 'Grand', parentPageId: null },
{ id: 'p', title: 'Parent', parentPageId: 'g' },
{ id: 'c', title: 'Child', parentPageId: 'p' },
];
const layout = buildVaultLayout(pages);
expect(layout.get('c')).toEqual({
segments: ['Grand', 'Parent'],
stem: 'Child',
});
});
it('disambiguates two orphan-parent pages with the same title at the path level', () => {
// Both parents are OUTSIDE the input set, so both pages bucket at the root
// with segments: []. Sibling-scoping cannot see this (different parentKeys),
// so the final full-path pass must produce DISTINCT paths.
const pages: PageNode[] = [
{ id: 'x', title: 'Orphan', slugId: 'sx', parentPageId: 'missing-1' },
{ id: 'y', title: 'Orphan', slugId: 'sy', parentPageId: 'missing-2' },
];
const layout = buildVaultLayout(pages);
const ex = layout.get('x')!;
const ey = layout.get('y')!;
const pathOf = (e: { segments: string[]; stem: string }) =>
[...e.segments, e.stem].join('/');
expect(pathOf(ex)).not.toBe(pathOf(ey));
// The first keeps the plain stem; the later one is re-stemmed.
expect(ex.stem).toBe('Orphan');
expect(ey.stem).toBe('Orphan ~sy');
});
it('sanitizes a slugId containing a path separator before using it as a suffix', () => {
// A crafted slugId with "/" must NOT leak a path separator into the stem.
const pages: PageNode[] = [
{ id: 'p1', title: 'Notes', slugId: 'a/b', parentPageId: null },
{ id: 'p2', title: 'Notes', slugId: 'c/d', parentPageId: null },
];
const layout = buildVaultLayout(pages);
const stem = layout.get('p2')!.stem;
expect(stem).not.toContain('/');
expect(stem).not.toContain('\\');
// The "/" was replaced by sanitizeTitle's dash substitution.
expect(stem).toBe('Notes ~c-d');
});
it('disambiguates two ORPHAN ancestors at the NAME pass so their children stay in sync', () => {
// Two orphan PARENTS share the same title but live under DIFFERENT missing
// parents, so sibling-scoping by raw parentPageId would never compare them.
// Both bucket at the vault root, so they MUST be disambiguated in the name
// pass (sharing the "__root__" bucket) BEFORE any child folder segment is
// computed from the parent name — otherwise re-stemming a parent post-hoc
// would desync its child's folder from the parent file.
const pages: PageNode[] = [
{ id: 'p1', title: 'Dup', slugId: 's1', parentPageId: 'missing-1' },
{ id: 'p2', title: 'Dup', slugId: 's2', parentPageId: 'missing-2' },
{ id: 'c1', title: 'Child', parentPageId: 'p1' },
{ id: 'c2', title: 'Child', parentPageId: 'p2' },
];
const layout = buildVaultLayout(pages);
const p1 = layout.get('p1')!;
const p2 = layout.get('p2')!;
const c1 = layout.get('c1')!;
const c2 = layout.get('c2')!;
// The two orphan parents get DISTINCT stems, both at the root.
expect(p1.segments).toEqual([]);
expect(p2.segments).toEqual([]);
expect(p1.stem).toBe('Dup');
expect(p2.stem).toBe('Dup ~s2');
expect(p1.stem).not.toBe(p2.stem);
// Each child's folder segment EXACTLY equals its parent's resolved stem
// (no desync): the parent name is final before segments are built.
expect(c1.segments).toEqual([p1.stem]);
expect(c2.segments).toEqual([p2.stem]);
// All four full paths are unique.
const pathOf = (e: { segments: string[]; stem: string }) =>
[...e.segments, e.stem].join('/');
const paths = [p1, p2, c1, c2].map(pathOf);
expect(new Set(paths).size).toBe(paths.length);
});
});

View File

@@ -0,0 +1,66 @@
import { describe, expect, it } from 'vitest';
import {
serializeDocmostMarkdownBody,
parseDocmostMarkdown,
type DocmostMdMeta,
} from 'docmost-client';
describe('serializeDocmostMarkdownBody round-trip (SPEC §3)', () => {
it('serialize -> parse preserves meta and the trimmed body, with no comments block', () => {
const meta: DocmostMdMeta = {
version: 1,
pageId: 'page-123',
slugId: 'slug-abc',
title: 'My Page',
spaceId: 'space-1',
parentPageId: 'parent-9',
};
const body = 'Hello\n\nWorld';
const file = serializeDocmostMarkdownBody(meta, body);
const parsed = parseDocmostMarkdown(file);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe(body);
// No trailing docmost:comments block was emitted (SPEC §3).
expect(parsed.comments).toBeNull();
});
it('preserves a null parentPageId for a root page', () => {
const meta: DocmostMdMeta = {
version: 1,
pageId: 'root-1',
slugId: 'root-slug',
title: 'Root',
spaceId: 'space-1',
parentPageId: null,
};
const file = serializeDocmostMarkdownBody(meta, 'body text');
const parsed = parseDocmostMarkdown(file);
expect(parsed.meta).toEqual(meta);
expect(parsed.comments).toBeNull();
});
it('produces a parseable file for an empty/missing body', () => {
const meta: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
// Empty string body.
const emptyFile = serializeDocmostMarkdownBody(meta, '');
expect(() => parseDocmostMarkdown(emptyFile)).not.toThrow();
const parsedEmpty = parseDocmostMarkdown(emptyFile);
expect(parsedEmpty.meta).toEqual(meta);
expect(parsedEmpty.body).toBe('');
expect(parsedEmpty.comments).toBeNull();
// Missing body (undefined) — serializer coalesces to "".
const missingFile = serializeDocmostMarkdownBody(
meta,
undefined as unknown as string,
);
expect(() => parseDocmostMarkdown(missingFile)).not.toThrow();
const parsedMissing = parseDocmostMarkdown(missingFile);
expect(parsedMissing.meta).toEqual(meta);
expect(parsedMissing.body).toBe('');
expect(parsedMissing.comments).toBeNull();
});
});