From 17500585030b3ecfeb6b17718e575609bdf47f74 Mon Sep 17 00:00:00 2001 From: vvzvlad Date: Wed, 17 Jun 2026 01:29:49 +0300 Subject: [PATCH] refactor(sync): testability seams for pull + collab; integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Behavior-preserving refactors (R-Collab-1, R-Pull-1, R-Pull-2) to unblock testing, plus the integration tests they enable. - collaboration: extract applyTransformToYdoc from onSynced; onSynced stays synchronous (NO await between Yjs read and write — SPEC §2 atomicity preserved) - pull: readExisting(deps) injectable IO; split main into pure computePullActions (plan + suppression/mass-delete decisions) + thin applyPullActions(deps) (IO); ordering and data-loss guards preserved bit-for-bit - tests (+35): collaboration-apply (atomicity/null-abort/throw-no-partial), read-existing, compute/apply-pull-actions (move-write-fail keeps old path), git temp-repo 3-way non-FF merge - transforms-extra property: constrain the generator to mutually-non-substring words (the domain where the renumber property holds) -> deterministic; document the inherited commentsToFootnotes substring-overlap comment-drop via it.fails (off the sync path, SPEC §3; backport-fix lives in docmost-mcp) - 695 -> 731 green; build clean; corpus STABLE --- .../docmost-client/src/lib/collaboration.ts | 101 +++- src/pull.ts | 551 ++++++++++++------ test/apply-pull-actions.test.ts | 417 +++++++++++++ test/collaboration-apply.test.ts | 182 ++++++ test/compute-pull-actions.test.ts | 193 ++++++ test/git-merge.test.ts | 151 +++++ test/read-existing.test.ts | 120 ++++ test/transforms-extra.test.ts | Bin 8534 -> 10628 bytes 8 files changed, 1493 insertions(+), 222 deletions(-) create mode 100644 test/apply-pull-actions.test.ts create mode 100644 test/collaboration-apply.test.ts create mode 100644 test/compute-pull-actions.test.ts create mode 100644 test/git-merge.test.ts create mode 100644 test/read-existing.test.ts diff --git a/packages/docmost-client/src/lib/collaboration.ts b/packages/docmost-client/src/lib/collaboration.ts index e8840c6..2d7a0ce 100644 --- a/packages/docmost-client/src/lib/collaboration.ts +++ b/packages/docmost-client/src/lib/collaboration.ts @@ -351,6 +351,62 @@ export function assertYjsEncodable(doc: any): void { buildYDoc(doc); } +/** + * SYNCHRONOUS read-transform-write of a live collaboration `Y.Doc` (SPEC §2). + * + * This is the extracted body of `mutatePageContent`'s `onSynced` callback. It + * MUST stay fully synchronous — there is deliberately NO `await` anywhere + * between reading the live doc and writing it back. While the JS event loop is + * not yielded, no incoming remote update can interleave, so any already-synced + * concurrent human edits in `liveDoc` are preserved rather than clobbered. The + * extraction is a testing seam ONLY; the runtime semantics are identical to the + * inline version that lived in `onSynced`. + * + * Steps: + * 1. Read the live doc via `TiptapTransformer.fromYdoc`. An empty/invalid live + * doc falls back to `{ type: "doc", content: [] }` (a valid empty doc). + * 2. Run `transform(liveDoc)`. If it returns `null` -> abort: NO write is + * performed and `{ written: false, doc: liveDoc }` is returned. If it + * THROWS, the error propagates to the caller (no partial write happens — + * the throw is before any `ydoc.transact`). + * 3. Otherwise encode the new doc (`buildYDoc`) and, inside a single + * `ydoc.transact`, clear the existing fragment and apply the new state. + * Returns `{ written: true, doc: newDoc }`. + */ +export function applyTransformToYdoc( + ydoc: Y.Doc, + transform: (liveDoc: any) => any | null, +): { written: boolean; doc: any } { + let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); + if ( + !liveDoc || + typeof liveDoc !== "object" || + !Array.isArray(liveDoc.content) + ) { + liveDoc = { type: "doc", content: [] }; + } + + const newDoc = transform(liveDoc); + + if (newDoc == null) { + // Transform aborted — write nothing, return the live doc. + return { written: false, doc: liveDoc }; + } + + const tempDoc = buildYDoc(newDoc); + // Fetch the fragment immediately before the transact that mutates it, rather + // than reusing a handle grabbed across the transform. + const fragment = ydoc.getXmlFragment("default"); + ydoc.transact(() => { + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); + }); + + return { written: true, doc: newDoc }; +} + /** Time we wait for the initial handshake/sync before giving up. */ const CONNECT_TIMEOUT_MS = 25000; /** Time we wait for the server to acknowledge our write before giving up. */ @@ -520,37 +576,13 @@ export async function mutatePageContent( // CRITICAL: everything between reading the live doc and writing it // back must stay synchronous (no await). While the JS event loop is // not yielded, no incoming remote update can interleave, so any - // already-synced concurrent edits are preserved in liveDoc. - let newDoc: any; + // already-synced concurrent edits are preserved in liveDoc. This is + // delegated to the SYNCHRONOUS `applyTransformToYdoc` helper — there + // is deliberately NO `await` on its result, so the read-transform- + // write stays atomic (SPEC §2). + let result: { written: boolean; doc: any }; try { - let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default"); - if ( - !liveDoc || - typeof liveDoc !== "object" || - !Array.isArray(liveDoc.content) - ) { - liveDoc = { type: "doc", content: [] }; - } - - newDoc = transform(liveDoc); - - if (newDoc == null) { - // Transform aborted — write nothing, return the live doc. - lastWrittenDoc = liveDoc; - finish(null, liveDoc); - return; - } - - const tempDoc = buildYDoc(newDoc); - // Fetch the fragment immediately before the transact that mutates - // it, rather than reusing a handle grabbed across the transform. - const fragment = ydoc.getXmlFragment("default"); - ydoc.transact(() => { - if (fragment.length > 0) { - fragment.delete(0, fragment.length); - } - Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); - }); + result = applyTransformToYdoc(ydoc, transform); } catch (e) { // Includes errors thrown by transform (e.g. "afterText not found", // "text not found"): propagate them verbatim to the caller. @@ -558,7 +590,14 @@ export async function mutatePageContent( return; } - lastWrittenDoc = newDoc; + if (!result.written) { + // Transform aborted — write nothing, return the live doc. + lastWrittenDoc = result.doc; + finish(null, result.doc); + return; + } + + lastWrittenDoc = result.doc; if (process.env.DEBUG) console.error("Content written, waiting for server to persist..."); waitForPersistence(); diff --git a/src/pull.ts b/src/pull.ts index 991368b..bcf96f7 100644 --- a/src/pull.ts +++ b/src/pull.ts @@ -46,6 +46,8 @@ import { planReconciliation, decideAbsenceDeletions, type LiveEntry, + type MovedEntry, + type DeletionDecision, } from "./reconcile.js"; import { stabilizePageFile, type PageMeta } from "./stabilize.js"; @@ -70,23 +72,41 @@ function segmentsToRelPath(segments: string[], stem: string): string { return [...segments, `${stem}.md`].join("/"); } +/** + * Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real + * `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile` + * rooted at the vault; tests pass fakes so the parsing/skip rules are unit- + * testable without a real git repo or filesystem. + */ +export interface ReadExistingDeps { + /** List tracked .md paths (forward-slash, vault-relative). */ + listTracked: () => Promise; + /** Read a tracked file's text by its (forward-slash) vault-relative path. */ + readFile: (relPath: string) => Promise; +} + /** * Read every tracked .md file in the vault and parse its `docmost:meta` to * recover `{ pageId, relPath }`. Files without a parseable pageId in meta are * skipped (they are not engine-tracked pages — e.g. a stray hand-written file). + * + * The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules: + * - a `readFile` rejection (tracked but missing on disk, a mid-operation race) + * -> skipped, NOT thrown; the next pull converges; + * - unparseable meta (`parseDocmostMarkdown` throws) -> skipped; + * - parseable but no `pageId` in meta -> skipped. */ -async function readExisting( - git: VaultGit, - vaultRoot: string, +export async function readExisting( + deps: ReadExistingDeps, ): Promise<{ pageId: string; relPath: string }[]> { - const tracked = await git.listTrackedFiles("*.md"); + const tracked = await deps.listTracked(); const existing: { pageId: string; relPath: string }[] = []; for (const relPath of tracked) { // git ls-files always emits forward-slash paths; normalize just in case. const rel = relPath.split(sep).join("/"); let text: string; try { - text = await readFile(relToAbs(vaultRoot, rel), "utf8"); + text = await deps.readFile(rel); } catch { // Tracked but missing on disk (mid-operation race) — skip; the next pull // converges. @@ -105,6 +125,306 @@ async function readExisting( return existing; } +/** + * Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live + * tree nodes + completeness flag (from `listSpaceTree`) and the parsed + * `existing` tracked files (from `readExisting`). + */ +export interface PullActionsInput { + /** Live page nodes for the space (from `listSpaceTree`). */ + pages: PageNode[]; + /** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */ + treeComplete: boolean; + /** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */ + existing: { pageId: string; relPath: string }[]; +} + +/** + * The PURE decisions object computed by `computePullActions` (no IO). It holds + * the reconciliation plan plus the SPEC §8 absence-deletion decision, with the + * suppression already folded in: `toDelete` is the POST-suppression set the + * caller should actually remove (empty when `deletionDecision.apply` is false). + */ +export interface PullActions { + /** Pages to (re)write at their relPath (add + update + move target). */ + toWrite: { pageId: string; relPath: string }[]; + /** Moves: write new path, then remove old path (only on a successful write). */ + moved: MovedEntry[]; + /** + * Absence-based paths to delete AFTER suppression. Empty when the decision + * suppressed deletions this cycle, so the caller can apply it unconditionally. + */ + toDelete: string[]; + /** Why absence deletions were (or were not) applied (for logging + tests). */ + deletionDecision: DeletionDecision; + /** Tracked-file count (for the suppression log messages). */ + existingCount: number; + /** Planned absence-delete count BEFORE suppression (for the log message). */ + plannedDeleteCount: number; +} + +/** + * PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live + * tree nodes + completeness + existing tracked files and returns the full set of + * decisions with NO IO: + * + * - builds the vault layout (deterministic relPath per live page), + * - `planReconciliation` -> toWrite / moved / absence-toDelete, + * - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch + + * empty-live + mass-delete guard), folded IN here so `toDelete` is the + * POST-suppression set (empty when suppressed). + * + * Moves are NOT governed by the suppression: a moved page is present in `live`, + * so its old-path removal is real (the caller still gates it on the write + * succeeding). The expensive content fetch / file write / git ops happen in the + * thin `applyPullActions`. + */ +export function computePullActions(input: PullActionsInput): PullActions { + const { pages, treeComplete, existing } = input; + const layout = buildVaultLayout(pages); + + const live: LiveEntry[] = []; + for (const p of pages) { + if (!p || !p.id) continue; + const entry = layout.get(p.id); + if (!entry) continue; + live.push({ + pageId: p.id, + relPath: segmentsToRelPath(entry.segments, entry.stem), + }); + } + + // Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only; + // `plan.moved` carries move old-path removals separately. + const plan = planReconciliation(live, existing); + + // Decide whether the ABSENCE-based deletions may be applied this cycle + // (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard. + // Moves are NOT governed by this. + const deletionDecision = decideAbsenceDeletions({ + treeComplete, + liveCount: live.length, + existingCount: existing.length, + deleteCount: plan.toDelete.length, + }); + + return { + toWrite: plan.toWrite, + moved: plan.moved, + // Fold the suppression in: a suppressed cycle deletes nothing. + toDelete: deletionDecision.apply ? plan.toDelete : [], + deletionDecision, + existingCount: existing.length, + plannedDeleteCount: plan.toDelete.length, + }; +} + +/** + * Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these + * to the live client, the vault git wrapper, and `node:fs/promises`; tests pass + * fakes that RECORD calls so the ordering + the move-on-success data-loss guard + * are testable without real git/fs/network. + */ +export interface ApplyPullActionsDeps { + client: Pick; + git: Pick; + /** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */ + writeFile: (absPath: string, text: string) => Promise; + /** Recursive mkdir of an ABSOLUTE directory path. */ + mkdir: (absDir: string) => Promise; + /** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */ + rm: (absPath: string) => Promise; +} + +/** Outcome counters from `applyPullActions` (for the summary + tests). */ +export interface ApplyResult { + written: number; + movedApplied: number; + deleted: number; + failed: number; + committed: boolean; + merge: { ok: boolean; conflict: boolean; output: string }; +} + +/** + * THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current + * order, with all the original safety guards preserved bit-for-bit: + * + * 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize + * (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page + * never aborts the pull (bounded-concurrency pool, fault-tolerant). + * 2. apply MOVE old-path removals — ONLY when the planner marked the old path + * removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a + * failed move-write keeps the old path so the page never vanishes). + * 3. apply (post-suppression) absence deletes. + * 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted + * counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9). + * + * `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps. + */ +export async function applyPullActions( + deps: ApplyPullActionsDeps, + actions: PullActions, + vaultRoot: string, +): Promise { + const { client, git } = deps; + + // Emit the SPEC §8 suppression warnings (preserved from the original `main`). + const decision = actions.deletionDecision; + if (!decision.apply) { + if (decision.reason === "incomplete-fetch") { + console.warn( + "pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)", + ); + } else if (decision.reason === "empty-live") { + console.warn( + `pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` + + `tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` + + `Docmost is reachable.`, + ); + } else { + console.warn( + `pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` + + `tracked file(s) (mass-delete guard) — deletions suppressed this ` + + `cycle (SPEC §8). Verify the live Docmost tree, then re-run.`, + ); + } + } + + // 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11). + let written = 0; + let failed = 0; + let completed = 0; + let nextIndex = 0; + // pageIds whose write FAILED. A moved page whose new-path write failed must + // NOT have its old path removed (otherwise the page vanishes entirely). + const failedPageIds = new Set(); + + const writeOne = async (w: { + pageId: string; + relPath: string; + }): Promise => { + try { + const page = await client.getPageJson(w.pageId); + const meta: PageMeta = { + version: 1, + pageId: page.id, + slugId: page.slugId, + title: page.title, + spaceId: page.spaceId, + parentPageId: page.parentPageId ?? null, + }; + const text = await stabilizePageFile(page.content, meta); + const abs = relToAbs(vaultRoot, w.relPath); + await deps.mkdir(dirname(abs)); + await deps.writeFile(abs, text); + written++; + } catch (err) { + failed++; + failedPageIds.add(w.pageId); + console.error( + `pull: failed page ${w.pageId}:`, + err instanceof Error ? err.message : String(err), + ); + } finally { + completed++; + if (completed % PROGRESS_EVERY === 0) { + console.log(`pulled ${completed}/${actions.toWrite.length}`); + } + } + }; + + // Bounded-concurrency pool (dependency-free): a fixed set of runners each + // take the next index until the write list is exhausted. One bad page never + // aborts the whole pull (mirrors the fault-tolerant tree walk). + const runner = async (): Promise => { + while (true) { + const i = nextIndex++; + if (i >= actions.toWrite.length) return; + await writeOne(actions.toWrite[i]); + } + }; + await Promise.all( + Array.from( + { length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, + () => runner(), + ), + ); + + // Helper: `rm` with force:true is a no-op if the file is already gone. + const removePath = async (rel: string, what: string): Promise => { + try { + await deps.rm(relToAbs(vaultRoot, rel)); + return true; + } catch (err) { + console.error( + `pull: failed to ${what} ${rel}:`, + err instanceof Error ? err.message : String(err), + ); + return false; + } + }; + + // 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its + // old path is genuinely stale — NOT subject to the incomplete-fetch + // suppression. BUT only remove the old path when (a) the planner marked it + // removable (not reused by another live page) AND (b) the new-path write + // actually SUCCEEDED — otherwise we would delete the only copy of a page + // whose move-write failed (⭐ data-loss guard). + let movedApplied = 0; + for (const m of actions.moved) { + if (!m.removeOldPath) continue; + if (failedPageIds.has(m.pageId)) { + console.warn( + `pull: move write for ${m.pageId} failed — keeping old path ` + + `${m.fromRelPath} (SPEC §8)`, + ); + continue; + } + if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++; + } + + // 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the + // post-suppression set (empty when the decision suppressed them, SPEC §8). + let deleted = 0; + for (const rel of actions.toDelete) { + if (await removePath(rel, "delete")) deleted++; + } + + // 4. Stage + commit on `docmost` (only if there is something to commit). + // Deterministic stabilized output means unchanged pages produce identical + // bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the + // ACTUAL work applied (pages written + files deleted), not the planned size, + // so a run with failures does not over-report (SPEC §5 nit). + const subject = + deleted > 0 + ? `docmost: sync ${written} page(s), ${deleted} deleted` + : `docmost: sync ${written} page(s)`; + await git.stageAll(); + const committed = await git.commit(subject, { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: [SOURCE_TRAILER], + }); + + // Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9); + // we never push to Docmost. Push to a git remote is deferred (SPEC §7). + await git.checkout(DEFAULT_BRANCH); + const merge = await git.merge(DOCMOST_BRANCH); + if (merge.conflict) { + console.error( + "pull: merge of docmost -> main CONFLICTED. Conflict markers were left " + + "in the vault for manual resolution (SPEC §9). Nothing is pushed to " + + "Docmost (read-only). Resolve locally, then re-run.", + ); + } else if (!merge.ok) { + console.error(`pull: merge of docmost -> main failed: ${merge.output}`); + } + console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7)."); + + return { written, movedApplied, deleted, failed, committed, merge }; +} + async function main(): Promise { const s = loadSettings(); const client = new DocmostClient( @@ -143,203 +463,52 @@ async function main(): Promise { // 2. Work on the docmost mirror branch. await git.checkout(DOCMOST_BRANCH); - // 3. Fetch the live tree and compute the desired files (relPath via the pure - // sanitize + disambiguation layout). `listSpaceTree` reports completeness: - // if ANY branch's children fetch failed or the node cap was hit, the tree - // is PARTIAL and absence-based deletions must be suppressed this cycle + // 3. Fetch the live tree. `listSpaceTree` reports completeness: if ANY + // branch's children fetch failed or the node cap was hit, the tree is + // PARTIAL and absence-based deletions must be suppressed this cycle // (SPEC §8) — a missing pageId in a partial tree is NOT proof of deletion. const { pages: rawPages, complete: treeComplete } = await client.listSpaceTree(spaceId); const pages = rawPages as PageNode[]; - const layout = buildVaultLayout(pages); - const live: LiveEntry[] = []; - const liveNodeByPageId = new Map(); - for (const p of pages) { - if (!p || !p.id) continue; - const entry = layout.get(p.id); - if (!entry) continue; - live.push({ - pageId: p.id, - relPath: segmentsToRelPath(entry.segments, entry.stem), - }); - liveNodeByPageId.set(p.id, p); - } - - // 4. Parse the existing tracked .md files (pageId + relPath). - const existing = await readExisting(git, vaultRoot); - - // 5. Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only; - // `plan.moved` carries move old-path removals separately. - const plan = planReconciliation(live, existing); - - // 6. Decide whether the ABSENCE-based deletions (`plan.toDelete`) may be - // applied this cycle (SPEC §8). The pure helper folds in BOTH the - // incomplete-fetch suppression (a partial tree must not look like - // deletions) AND the mass-delete guard (defense in depth). Moves are NOT - // governed by this — a moved page is present in `live`, so its old-path - // removal is real and applied unconditionally (subject only to its write - // succeeding). - const deleteDecision = decideAbsenceDeletions({ - treeComplete, - liveCount: live.length, - existingCount: existing.length, - deleteCount: plan.toDelete.length, + // 4. Parse the existing tracked .md files (pageId + relPath). Inject the real + // git + fs IO (R-Pull-1). + const existing = await readExisting({ + listTracked: () => git.listTrackedFiles("*.md"), + readFile: (rel) => readFile(relToAbs(vaultRoot, rel), "utf8"), }); - if (!deleteDecision.apply) { - if (deleteDecision.reason === "incomplete-fetch") { - console.warn( - "pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)", - ); - } else if (deleteDecision.reason === "empty-live") { - console.warn( - `pull: live fetch returned 0 pages but ${existing.length} file(s) are ` + - `tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` + - `Docmost is reachable.`, - ); - } else { - console.warn( - `pull: plan would delete ${plan.toDelete.length} of ${existing.length} ` + - `tracked file(s) (mass-delete guard) — deletions suppressed this ` + - `cycle (SPEC §8). Verify the live Docmost tree, then re-run.`, - ); - } - } - // 7. Write each live page in its fixpoint form (normalize-on-write, SPEC §11), - // then apply move-old-path + absence-delete removals. - let written = 0; - let failed = 0; - let completed = 0; - let nextIndex = 0; - // pageIds whose write FAILED. A moved page whose new-path write failed must - // NOT have its old path removed (otherwise the page vanishes entirely). - const failedPageIds = new Set(); + // 5. Compute the pull actions (pure, SPEC §5/§8): layout + reconciliation + + // the absence-deletion suppression decision, folded together. No IO. + const actions = computePullActions({ pages, treeComplete, existing }); - const writeOne = async (w: { pageId: string; relPath: string }): Promise => { - const node = liveNodeByPageId.get(w.pageId); - if (!node) return; - try { - const page = await client.getPageJson(w.pageId); - const meta: PageMeta = { - version: 1, - pageId: page.id, - slugId: page.slugId, - title: page.title, - spaceId: page.spaceId, - parentPageId: page.parentPageId ?? null, - }; - const text = await stabilizePageFile(page.content, meta); - const abs = relToAbs(vaultRoot, w.relPath); - await mkdir(dirname(abs), { recursive: true }); - await writeFile(abs, text, "utf8"); - written++; - } catch (err) { - failed++; - failedPageIds.add(w.pageId); - console.error( - `pull: failed page ${w.pageId}:`, - err instanceof Error ? err.message : String(err), - ); - } finally { - completed++; - if (completed % PROGRESS_EVERY === 0) { - console.log(`pulled ${completed}/${plan.toWrite.length}`); - } - } - }; - - // Bounded-concurrency pool (dependency-free): a fixed set of runners each - // take the next index until the write list is exhausted. One bad page never - // aborts the whole pull (mirrors the fault-tolerant tree walk). - const runner = async (): Promise => { - while (true) { - const i = nextIndex++; - if (i >= plan.toWrite.length) return; - await writeOne(plan.toWrite[i]); - } - }; - await Promise.all( - Array.from( - { length: Math.min(CONCURRENCY, plan.toWrite.length) || 1 }, - () => runner(), - ), + // 6. Apply the actions (thin IO, exact order + guards preserved): write each + // live page in its fixpoint form (SPEC §11), apply move-old-path removals + // (only when the write succeeded) + absence deletes, then commit on + // `docmost` and merge into main (conflicts surfaced, SPEC §9). + const result = await applyPullActions( + { + client, + git, + // `applyPullActions` already calls `deps.mkdir(dirname(abs))` before each + // write, so the file write itself only needs `writeFile`. + writeFile: async (abs, text) => { + await writeFile(abs, text, "utf8"); + }, + mkdir: async (abs) => { + await mkdir(abs, { recursive: true }); + }, + rm: async (abs) => { + await rm(abs, { force: true }); + }, + }, + actions, + vaultRoot, ); - // Helper: `rm` with force:true is a no-op if the file is already gone. - const removePath = async (rel: string, what: string): Promise => { - try { - await rm(relToAbs(vaultRoot, rel), { force: true }); - return true; - } catch (err) { - console.error( - `pull: failed to ${what} ${rel}:`, - err instanceof Error ? err.message : String(err), - ); - return false; - } - }; + const { written, movedApplied, deleted, failed, committed, merge } = result; - // 7a. Apply MOVE old-path removals. A moved page IS present in `live`, so its - // old path is genuinely stale — this is NOT subject to the incomplete- - // fetch suppression. BUT only remove the old path when (a) the planner - // marked it removable (not reused by another live page) AND (b) the new- - // path write actually SUCCEEDED — otherwise we would delete the only copy - // of a page whose move-write failed. - let movedApplied = 0; - for (const m of plan.moved) { - if (!m.removeOldPath) continue; - if (failedPageIds.has(m.pageId)) { - console.warn( - `pull: move write for ${m.pageId} failed — keeping old path ` + - `${m.fromRelPath} (SPEC §8)`, - ); - continue; - } - if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++; - } - - // 7b. Apply ABSENCE-based deletions — ONLY if the decision allowed them - // (incomplete-fetch suppression + mass-delete guard, SPEC §8). - let deleted = 0; - if (deleteDecision.apply) { - for (const rel of plan.toDelete) { - if (await removePath(rel, "delete")) deleted++; - } - } - - // 8. Stage + commit on `docmost` (only if there is something to commit). - // Deterministic stabilized output means unchanged pages produce identical - // bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the - // ACTUAL work applied (pages written + files deleted), not the planned size, - // so a run with failures does not over-report (SPEC §5 nit). - const subject = - deleted > 0 - ? `docmost: sync ${written} page(s), ${deleted} deleted` - : `docmost: sync ${written} page(s)`; - await git.stageAll(); - const committed = await git.commit(subject, { - authorName: BOT_AUTHOR_NAME, - authorEmail: BOT_AUTHOR_EMAIL, - trailers: [SOURCE_TRAILER], - }); - - // 9. Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9); - // we never push to Docmost. Push to a git remote is deferred (SPEC §7). - await git.checkout(DEFAULT_BRANCH); - const merge = await git.merge(DOCMOST_BRANCH); - if (merge.conflict) { - console.error( - "pull: merge of docmost -> main CONFLICTED. Conflict markers were left " + - "in the vault for manual resolution (SPEC §9). Nothing is pushed to " + - "Docmost (read-only). Resolve locally, then re-run.", - ); - } else if (!merge.ok) { - console.error(`pull: merge of docmost -> main failed: ${merge.output}`); - } - console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7)."); - - // 10. One-line summary. + // 7. One-line summary. console.log( `pull complete: ${written} written, ${movedApplied} moved, ` + `${deleted} deleted, committed=${committed}, ` + diff --git a/test/apply-pull-actions.test.ts b/test/apply-pull-actions.test.ts new file mode 100644 index 0000000..2a3fc28 --- /dev/null +++ b/test/apply-pull-actions.test.ts @@ -0,0 +1,417 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest'; +import { applyPullActions } from '../src/pull.js'; +import type { + PullActions, + ApplyPullActionsDeps, +} from '../src/pull.js'; +import type { DeletionDecision } from '../src/reconcile.js'; + +// R-Pull-2 (test-strategy report §5): `applyPullActions` is the THIN IO half of +// the pull cycle. These tests drive it with FAKES that record every call — no +// real git, fs, or network — so the ordering and the ⭐ move-on-success +// data-loss guard are verifiable. SPEC §8 (delete suppression) + SPEC §5 (commit +// subject reflects ACTUAL counts) are asserted here. + +const VAULT = '/vault'; + +/** A getPageJson fake: returns a minimal page whose content stabilizes cheaply. */ +function makeClient(opts?: { failFor?: Set }) { + const calls: string[] = []; + const client = { + getPageJson: vi.fn(async (pageId: string) => { + calls.push(pageId); + if (opts?.failFor?.has(pageId)) { + throw new Error(`fetch failed for ${pageId}`); + } + return { + id: pageId, + slugId: `slug-${pageId}`, + title: `Title ${pageId}`, + spaceId: 'space', + parentPageId: null, + updatedAt: '2026-01-01T00:00:00.000Z', + // A trivial doc so stabilizePageFile (the real one) runs fast. + content: { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: pageId }] }, + ], + }, + }; + }), + }; + return { client, calls }; +} + +/** A git fake recording the order of ops; merge result is configurable. */ +function makeGit(merge: { ok: boolean; conflict: boolean; output?: string } = { + ok: true, + conflict: false, +}) { + const order: string[] = []; + let committedSubject: string | undefined; + const git = { + stageAll: vi.fn(async () => { + order.push('stageAll'); + }), + commit: vi.fn(async (subject: string) => { + order.push(`commit:${subject}`); + committedSubject = subject; + return true; + }), + checkout: vi.fn(async (branch: string) => { + order.push(`checkout:${branch}`); + }), + merge: vi.fn(async () => { + order.push('merge'); + return { ok: merge.ok, conflict: merge.conflict, output: merge.output ?? '' }; + }), + }; + return { + git, + order, + get committedSubject() { + return committedSubject; + }, + }; +} + +/** A recording fs fake: writes/mkdirs/rms tracked in arrays. */ +function makeFs(opts?: { failWriteFor?: Set }) { + const writes: { abs: string; text: string }[] = []; + const mkdirs: string[] = []; + const rms: string[] = []; + const fs = { + writeFile: vi.fn(async (abs: string, text: string) => { + // Fail a specific destination path if asked (to simulate a write failure). + if (opts?.failWriteFor?.has(abs)) { + throw new Error(`write failed for ${abs}`); + } + writes.push({ abs, text }); + }), + mkdir: vi.fn(async (abs: string) => { + mkdirs.push(abs); + }), + rm: vi.fn(async (abs: string) => { + rms.push(abs); + }), + }; + return { fs, writes, mkdirs, rms }; +} + +function deps( + client: any, + git: any, + fs: ReturnType, +): ApplyPullActionsDeps { + return { + client, + git, + writeFile: fs.fs.writeFile, + mkdir: fs.fs.mkdir, + rm: fs.fs.rm, + }; +} + +const APPLY: DeletionDecision = { apply: true }; + +function actions(partial: Partial): PullActions { + return { + toWrite: [], + moved: [], + toDelete: [], + deletionDecision: APPLY, + existingCount: 0, + plannedDeleteCount: 0, + ...partial, + }; +} + +beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('applyPullActions — happy path (write + commit + merge)', () => { + it('fetches, writes each page, stages, commits, checks out main, merges', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [ + { pageId: 'p1', relPath: 'A.md' }, + { pageId: 'p2', relPath: 'Sub/B.md' }, + ], + }), + VAULT, + ); + + expect(res.written).toBe(2); + expect(res.failed).toBe(0); + expect(res.committed).toBe(true); + expect(res.merge).toEqual({ ok: true, conflict: false, output: '' }); + + // Both pages were fetched and written at their absolute paths. + expect(client.getPageJson).toHaveBeenCalledTimes(2); + const writtenPaths = fs.writes.map((w) => w.abs).sort(); + expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']); + + // The git op order is: stageAll -> commit -> checkout main -> merge. + expect(g.order).toEqual([ + 'stageAll', + `commit:docmost: sync 2 page(s)`, + 'checkout:main', + 'merge', + ]); + }); +}); + +describe('applyPullActions — ordering (write before move/delete before commit)', () => { + it('does writes, then move-old-path removals, then deletes, then commit/merge', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + toDelete: ['Dead.md'], + plannedDeleteCount: 1, + existingCount: 3, + }), + VAULT, + ); + + // The write to the new path happened (the page was fetched first). + expect(fs.writes.map((w) => w.abs)).toEqual(['/vault/New/M.md']); + // The move old-path removal AND the absence delete both ran, old path first. + expect(fs.rms).toEqual(['/vault/Old/M.md', '/vault/Dead.md']); + // git ops happen AFTER all fs work. + expect(g.order).toEqual([ + 'stageAll', + 'commit:docmost: sync 1 page(s), 1 deleted', + 'checkout:main', + 'merge', + ]); + }); +}); + +describe('applyPullActions — ⭐ data-loss guard (move-on-success)', () => { + it('does NOT remove the OLD path when the new-path write FAILS', async () => { + // The page "m" is being moved Old/M.md -> New/M.md, but its new-path write + // FAILS. Removing the old path now would erase the only copy of the page. + // The guard must KEEP the old path. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs({ failWriteFor: new Set(['/vault/New/M.md']) }); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + }), + VAULT, + ); + + // The write failed -> recorded as a failure, nothing written. + expect(res.failed).toBe(1); + expect(res.written).toBe(0); + expect(fs.writes).toEqual([]); + // ⭐ The OLD path was NOT removed: the data-loss guard kept it. + expect(fs.rms).not.toContain('/vault/Old/M.md'); + expect(fs.rms).toEqual([]); + expect(res.movedApplied).toBe(0); + + // The commit subject reflects ACTUAL counts: 0 written, 0 deleted. + expect(g.committedSubject).toBe('docmost: sync 0 page(s)'); + }); + + it('DOES remove the old path when the new-path write SUCCEEDS', async () => { + // Same move, but the write succeeds -> the old path is safely removed. This + // is the positive control proving the guard is keyed on write success. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); // no write failures + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + }), + VAULT, + ); + + expect(res.written).toBe(1); + expect(res.movedApplied).toBe(1); + expect(fs.rms).toContain('/vault/Old/M.md'); + expect(g.committedSubject).toBe('docmost: sync 1 page(s)'); + }); + + it('honours removeOldPath:false (path reused by another live page is kept)', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'X.md', + toRelPath: 'New/M.md', + removeOldPath: false, // X.md is a live target of another page + }, + ], + }), + VAULT, + ); + + // The reused old path is never removed. + expect(fs.rms).not.toContain('/vault/X.md'); + expect(fs.rms).toEqual([]); + }); +}); + +describe('applyPullActions — deletion suppression (SPEC §8)', () => { + it('skips deletions when the decision SUPPRESSES them (toDelete already empty)', async () => { + // computePullActions empties toDelete when suppressed, but assert the applier + // ALSO does no removals and the subject omits the deleted count. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'p1', relPath: 'A.md' }], + // Suppressed: toDelete is empty even though 5 were planned. + toDelete: [], + deletionDecision: { apply: false, reason: 'incomplete-fetch' }, + plannedDeleteCount: 5, + existingCount: 6, + }), + VAULT, + ); + + expect(res.deleted).toBe(0); + expect(fs.rms).toEqual([]); + // Subject reflects 0 deleted (no ", N deleted" suffix). + expect(g.committedSubject).toBe('docmost: sync 1 page(s)'); + // The suppression warning was emitted. + expect(console.warn).toHaveBeenCalledWith( + expect.stringMatching(/tree fetch incomplete/), + ); + }); + + it('applies deletions present in toDelete when the decision allows them', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'p1', relPath: 'A.md' }], + toDelete: ['Dead1.md', 'Dead2.md'], + deletionDecision: APPLY, + plannedDeleteCount: 2, + existingCount: 5, + }), + VAULT, + ); + + expect(res.deleted).toBe(2); + expect(fs.rms).toEqual(['/vault/Dead1.md', '/vault/Dead2.md']); + // Subject reflects ACTUAL written + deleted counts. + expect(g.committedSubject).toBe('docmost: sync 1 page(s), 2 deleted'); + }); +}); + +describe('applyPullActions — commit subject reflects ACTUAL counts', () => { + it('counts only SUCCESSFUL writes when some page fetches fail', async () => { + // p2 fetch fails; the subject must say 1 page (only p1 was written), not 2. + const { client } = makeClient({ failFor: new Set(['p2']) }); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [ + { pageId: 'p1', relPath: 'A.md' }, + { pageId: 'p2', relPath: 'B.md' }, + ], + }), + VAULT, + ); + + expect(res.written).toBe(1); + expect(res.failed).toBe(1); + expect(g.committedSubject).toBe('docmost: sync 1 page(s)'); + }); +}); + +describe('applyPullActions — merge result is surfaced, not swallowed', () => { + it('returns conflict:true on a conflicting merge (no auto-resolve)', async () => { + const { client } = makeClient(); + const g = makeGit({ ok: false, conflict: true, output: 'CONFLICT' }); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }), + VAULT, + ); + expect(res.merge.conflict).toBe(true); + expect(res.merge.ok).toBe(false); + }); + + it('returns ok:false conflict:false on a non-conflict merge failure', async () => { + const { client } = makeClient(); + const g = makeGit({ ok: false, conflict: false, output: 'some error' }); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }), + VAULT, + ); + expect(res.merge.ok).toBe(false); + expect(res.merge.conflict).toBe(false); + }); +}); diff --git a/test/collaboration-apply.test.ts b/test/collaboration-apply.test.ts new file mode 100644 index 0000000..733868d --- /dev/null +++ b/test/collaboration-apply.test.ts @@ -0,0 +1,182 @@ +import { describe, expect, it } from 'vitest'; +import * as Y from 'yjs'; +import { TiptapTransformer } from '@hocuspocus/transformer'; + +// R-Collab-1 (test-strategy report §5): the SYNCHRONOUS read-transform-write +// body of `mutatePageContent`'s `onSynced` is now the exported pure-ish +// `applyTransformToYdoc(ydoc, transform)`. These tests drive it directly +// against a real `Y.Doc` — NO network, NO Hocuspocus server. They assert the +// SPEC §2 atomicity contract holds (read -> transform -> write with no await), +// plus the abort/throw/empty-doc-fallback behaviour preserved from the inline +// version. +// +// Import directly from the source .js (matches the repo's other collaboration +// tests, e.g. collaboration-mutate.test.ts). +import { + applyTransformToYdoc, + buildYDoc, +} from '../packages/docmost-client/src/lib/collaboration.js'; +import { docmostExtensions } from '../packages/docmost-client/src/lib/docmost-schema.js'; + +// A valid minimal ProseMirror doc with a single paragraph of `text`. +function docWith(text: string): any { + return { + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text }] }], + }; +} + +// Seed a Y.Doc's "default" fragment with a ProseMirror doc, exactly the way the +// live collaboration server would have it after the initial sync. We encode via +// the same TiptapTransformer path the SUT reads back through. +function seedYdoc(content: any): Y.Doc { + const seeded = buildYDoc(content); + const ydoc = new Y.Doc(); + Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(seeded)); + return ydoc; +} + +// Read the live ProseMirror doc back off a Y.Doc the same way the SUT does. +function readYdoc(ydoc: Y.Doc): any { + return TiptapTransformer.fromYdoc(ydoc, 'default'); +} + +describe('applyTransformToYdoc — synchronous read/transform/write (R-Collab-1)', () => { + it('writes back the transformed doc when transform mutates it', () => { + const ydoc = seedYdoc(docWith('original')); + + let seenLive: any; + const result = applyTransformToYdoc(ydoc, (live) => { + seenLive = live; + return docWith('rewritten'); + }); + + // The transform observed the seeded live doc... + expect(seenLive.content[0].content[0].text).toBe('original'); + // ...and the write happened. + expect(result.written).toBe(true); + expect(result.doc).toEqual(docWith('rewritten')); + // The Y.Doc fragment now holds the NEW content (old text fully replaced). + const xml = ydoc.getXmlFragment('default').toString(); + expect(xml).toContain('rewritten'); + expect(xml).not.toContain('original'); + }); + + it('is fully SYNCHRONOUS — the fragment is mutated before control returns', () => { + // The whole point of the SPEC §2 invariant: no `await` is yielded between + // reading the live doc and writing it back. We assert this structurally by + // observing the write took effect on the SAME synchronous tick — the + // function does not return a Promise, and the fragment already reflects the + // new doc the instant the call returns (no microtask hop needed). + const ydoc = seedYdoc(docWith('before')); + const ret = applyTransformToYdoc(ydoc, () => docWith('after')); + // Not a thenable: the contract is a plain synchronous value, not a Promise. + expect(typeof (ret as any).then).not.toBe('function'); + // Already written synchronously. + expect(ydoc.getXmlFragment('default').toString()).toContain('after'); + }); + + it('transform returning null ABORTS with NO write (live doc preserved)', () => { + const ydoc = seedYdoc(docWith('keepme')); + const before = ydoc.getXmlFragment('default').toString(); + + let seenLive: any; + const result = applyTransformToYdoc(ydoc, (live) => { + seenLive = live; + return null; // abort + }); + + expect(result.written).toBe(false); + // The returned doc is the live doc the transform saw (no write). + expect(result.doc).toBe(seenLive); + expect(result.doc.content[0].content[0].text).toBe('keepme'); + // The fragment is byte-identical to before: nothing was written. + expect(ydoc.getXmlFragment('default').toString()).toBe(before); + }); + + it('transform THROWING propagates and leaves NO partial write', () => { + const ydoc = seedYdoc(docWith('intact')); + const before = ydoc.getXmlFragment('default').toString(); + + expect(() => + applyTransformToYdoc(ydoc, () => { + throw new Error('boom from transform'); + }), + ).toThrow(/boom from transform/); + + // The throw happens before any ydoc.transact, so the live doc is untouched. + expect(ydoc.getXmlFragment('default').toString()).toBe(before); + expect(readYdoc(ydoc).content[0].content[0].text).toBe('intact'); + }); + + it('an empty/invalid live doc falls back to { type:"doc", content:[] }', () => { + // A brand-new Y.Doc has an empty "default" fragment; fromYdoc yields a doc + // with no content array, which the helper must coerce to a valid empty doc + // before handing it to the transform. + const ydoc = new Y.Doc(); + + let seenLive: any; + applyTransformToYdoc(ydoc, (live) => { + seenLive = live; + return null; // abort — we only care what the transform saw + }); + + expect(seenLive).toEqual({ type: 'doc', content: [] }); + }); + + it('the empty-doc fallback is still WRITABLE (transform can write into it)', () => { + const ydoc = new Y.Doc(); + const result = applyTransformToYdoc(ydoc, (live) => { + // The live doc is the empty fallback; produce real content from it. + expect(live).toEqual({ type: 'doc', content: [] }); + return docWith('seeded from empty'); + }); + expect(result.written).toBe(true); + expect(ydoc.getXmlFragment('default').toString()).toContain( + 'seeded from empty', + ); + }); + + it('preserves concurrent live content the transform chooses to keep (atomicity)', () => { + // Model the SPEC §2 concern: the live doc already contains a concurrent + // human edit. A transform that appends without discarding must not lose it, + // and because the read+write is one synchronous unit nothing can interleave. + const live = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'human edit' }] }, + ], + }; + const ydoc = seedYdoc(live); + + const result = applyTransformToYdoc(ydoc, (liveDoc) => { + // Append a machine paragraph while keeping the human's paragraph. + return { + type: 'doc', + content: [ + ...liveDoc.content, + { type: 'paragraph', content: [{ type: 'text', text: 'machine edit' }] }, + ], + }; + }); + + expect(result.written).toBe(true); + const xml = ydoc.getXmlFragment('default').toString(); + expect(xml).toContain('human edit'); + expect(xml).toContain('machine edit'); + }); +}); + +// Sanity: the helper round-trips through the real schema, proving the seed/read +// path is faithful (not a degenerate empty-fragment artifact). +describe('applyTransformToYdoc — schema fidelity', () => { + it('round-trips a paragraph through the docmost schema unchanged', () => { + const ydoc = seedYdoc(docWith('round trip')); + const got = TiptapTransformer.fromYdoc(ydoc, 'default'); + // The doc encodes/decodes against the real docmost extension set (the same + // set buildYDoc uses), so the seed/read path is the production one. + expect(got.type).toBe('doc'); + expect(got.content[0].content[0].text).toBe('round trip'); + expect(Array.isArray(docmostExtensions)).toBe(true); + }); +}); diff --git a/test/compute-pull-actions.test.ts b/test/compute-pull-actions.test.ts new file mode 100644 index 0000000..1053af8 --- /dev/null +++ b/test/compute-pull-actions.test.ts @@ -0,0 +1,193 @@ +import { describe, expect, it } from 'vitest'; +import { computePullActions } from '../src/pull.js'; +import type { PageNode } from '../src/layout.js'; + +// R-Pull-2 (test-strategy report §5): `computePullActions` is the PURE half of +// the pull cycle — layout + planReconciliation + the SPEC §8 absence-deletion +// suppression decision, folded together, with NO IO. These tests exercise it +// without git/fs/network. The thin IO applier is covered in apply-pull-actions. + +/** A live tree node (only the fields the layout / reconciliation read). */ +function node( + id: string, + title: string, + parentPageId: string | null = null, + hasChildren = false, +): PageNode { + return { id, title, slugId: id, parentPageId, hasChildren }; +} + +describe('computePullActions — normal complete fetch', () => { + it('builds toWrite from the live layout and an empty existing set (all adds)', () => { + const pages = [ + node('root', 'Root', null, true), + node('child', 'Child', 'root'), + ]; + const actions = computePullActions({ + pages, + treeComplete: true, + existing: [], + }); + // Each live page is (re)written at its deterministic layout path. + expect(actions.toWrite).toEqual([ + { pageId: 'root', relPath: 'Root.md' }, + { pageId: 'child', relPath: 'Root/Child.md' }, + ]); + expect(actions.moved).toEqual([]); + expect(actions.toDelete).toEqual([]); + expect(actions.deletionDecision).toEqual({ apply: true }); + }); + + it('plans toWrite / moved / toDelete correctly for a mixed reconciliation', () => { + const pages = [ + node('keep', 'Keep'), + node('mover', 'Mover'), + node('fresh', 'Fresh'), + ]; + // existing: keep (same path), mover (old path -> move), dead (absent -> delete). + const existing = [ + { pageId: 'keep', relPath: 'Keep.md' }, + { pageId: 'mover', relPath: 'Old/Mover.md' }, + { pageId: 'dead', relPath: 'Dead.md' }, + ]; + const actions = computePullActions({ pages, treeComplete: true, existing }); + + expect(actions.toWrite).toEqual([ + { pageId: 'keep', relPath: 'Keep.md' }, + { pageId: 'mover', relPath: 'Mover.md' }, + { pageId: 'fresh', relPath: 'Fresh.md' }, + ]); + // mover moved from Old/Mover.md to the new layout path Mover.md. + expect(actions.moved).toEqual([ + { + pageId: 'mover', + fromRelPath: 'Old/Mover.md', + toRelPath: 'Mover.md', + removeOldPath: true, + }, + ]); + // dead is absent from live -> an absence delete (decision applies it). + expect(actions.toDelete).toEqual(['Dead.md']); + expect(actions.deletionDecision).toEqual({ apply: true }); + }); + + it('a live page moved to a NEW path is in `moved`, its old path NOT in toDelete', () => { + const pages = [node('p1', 'Doc', 'newparent'), node('newparent', 'NewParent', null, true)]; + const existing = [{ pageId: 'p1', relPath: 'OldParent/Doc.md' }]; + const actions = computePullActions({ pages, treeComplete: true, existing }); + + const moved = actions.moved.find((m) => m.pageId === 'p1'); + expect(moved).toBeTruthy(); + expect(moved!.fromRelPath).toBe('OldParent/Doc.md'); + expect(moved!.toRelPath).toBe('NewParent/Doc.md'); + // The old path is a MOVE removal, NEVER an absence delete. + expect(actions.toDelete).not.toContain('OldParent/Doc.md'); + expect(actions.toDelete).toEqual([]); + }); +}); + +describe('computePullActions — SPEC §8 suppression folded in', () => { + it('INCOMPLETE fetch (treeComplete:false) SUPPRESSES absence deletions', () => { + // dead is absent from the live tree, but the tree fetch was partial -> the + // missing pageId is NOT proof of deletion, so toDelete must be EMPTY and the + // decision must report apply:false / incomplete-fetch. + const pages = [node('keep', 'Keep')]; + const existing = [ + { pageId: 'keep', relPath: 'Keep.md' }, + { pageId: 'dead', relPath: 'Dead.md' }, + ]; + const actions = computePullActions({ + pages, + treeComplete: false, + existing, + }); + + expect(actions.deletionDecision).toEqual({ + apply: false, + reason: 'incomplete-fetch', + }); + // Suppressed: nothing to delete this cycle... + expect(actions.toDelete).toEqual([]); + // ...but the planned count is still reported (for the suppression log). + expect(actions.plannedDeleteCount).toBe(1); + // Writes/updates still happen regardless of the suppression. + expect(actions.toWrite).toEqual([{ pageId: 'keep', relPath: 'Keep.md' }]); + }); + + it('MASS-DELETE guard (>50% of a non-trivial vault) SUPPRESSES deletions', () => { + // 1 live page, 10 existing tracked, 9 of them absent -> 9/10 > 50% on a + // non-trivial (>=4) vault -> mass-delete suppression. + const pages = [node('p0', 'P0')]; + const existing = [ + { pageId: 'p0', relPath: 'P0.md' }, + ...Array.from({ length: 9 }, (_, i) => ({ + pageId: `gone${i}`, + relPath: `Gone${i}.md`, + })), + ]; + const actions = computePullActions({ pages, treeComplete: true, existing }); + + expect(actions.deletionDecision).toEqual({ + apply: false, + reason: 'mass-delete', + }); + expect(actions.toDelete).toEqual([]); + expect(actions.plannedDeleteCount).toBe(9); + expect(actions.existingCount).toBe(10); + }); + + it('moves are NOT suppressed even on an incomplete fetch', () => { + // A moved page is PRESENT in live, so its move is real regardless of the + // suppression (which only governs ABSENCE deletes). + const pages = [node('m', 'Moved')]; + const existing = [{ pageId: 'm', relPath: 'Old/Moved.md' }]; + const actions = computePullActions({ + pages, + treeComplete: false, + existing, + }); + expect(actions.moved).toEqual([ + { + pageId: 'm', + fromRelPath: 'Old/Moved.md', + toRelPath: 'Moved.md', + removeOldPath: true, + }, + ]); + // No absence deletes were planned here, so the decision trivially applies. + expect(actions.toDelete).toEqual([]); + }); + + it('empty-live with tracked files SUPPRESSES (failed fetch, not a real wipe)', () => { + const existing = [ + { pageId: 'a', relPath: 'A.md' }, + { pageId: 'b', relPath: 'B.md' }, + ]; + const actions = computePullActions({ + pages: [], + treeComplete: true, + existing, + }); + expect(actions.deletionDecision).toEqual({ + apply: false, + reason: 'empty-live', + }); + expect(actions.toDelete).toEqual([]); + expect(actions.toWrite).toEqual([]); + }); +}); + +describe('computePullActions — degenerate inputs', () => { + it('skips nodes without an id and nodes with no layout entry', () => { + const pages = [ + node('p1', 'Valid'), + { id: '', title: 'NoId' } as PageNode, // skipped (no id) + ]; + const actions = computePullActions({ + pages, + treeComplete: true, + existing: [], + }); + expect(actions.toWrite).toEqual([{ pageId: 'p1', relPath: 'Valid.md' }]); + }); +}); diff --git a/test/git-merge.test.ts b/test/git-merge.test.ts new file mode 100644 index 0000000..36d61b5 --- /dev/null +++ b/test/git-merge.test.ts @@ -0,0 +1,151 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { + VaultGit, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, +} from '../src/git.js'; + +// git 3-way merge integration (test-strategy report §2 git gap). The existing +// git.test.ts covers a fast-forward merge and a conflicting merge; this file +// adds the two MISSING cases against a REAL temp git repo under os.tmpdir(): +// 1. a clean NON-fast-forward 3-way merge of non-overlapping changes -> +// { ok:true, conflict:false } and a real merge commit (two parents); +// 2. a NON-conflict merge FAILURE -> { ok:false, conflict:false } so the pull +// cycle does not mislabel it a "conflict markers in vault" situation. +// The conflicting-merge case (markers + conflict:true) already lives in +// git.test.ts and is NOT duplicated here. Skips gracefully if git is missing. + +const execFileAsync = promisify(execFile); + +async function gitAvailable(): Promise { + try { + await execFileAsync('git', ['--version']); + return true; + } catch { + return false; + } +} + +/** Number of parents of HEAD (2 => a real merge commit). */ +async function headParentCount(dir: string): Promise { + const { stdout } = await execFileAsync( + 'git', + ['--no-pager', 'rev-list', '--parents', '-n', '1', 'HEAD'], + { cwd: dir }, + ); + // Output: " ..." — parents are the trailing ids. + return stdout.trim().split(/\s+/).length - 1; +} + +describe('VaultGit.merge — 3-way merge integration (temp repo)', () => { + let available = false; + let dir: string; + + beforeAll(async () => { + available = await gitAvailable(); + }); + + afterEach(async () => { + if (dir) await rm(dir, { recursive: true, force: true }); + }); + + async function freshRepo(): Promise<{ vault: string; git: VaultGit }> { + dir = await mkdtemp(join(tmpdir(), 'docmost-merge-')); + const git = new VaultGit(dir); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + return { vault: dir, git }; + } + + async function commit( + git: VaultGit, + subject: string, + author = { name: BOT_AUTHOR_NAME, email: BOT_AUTHOR_EMAIL }, + ): Promise { + await git.stageAll(); + await git.commit(subject, { + authorName: author.name, + authorEmail: author.email, + }); + } + + it('clean NON-fast-forward 3-way merge of non-overlapping changes -> merge commit', async () => { + if (!available) return; // skip gracefully when git is unavailable + const { vault, git } = await freshRepo(); + + // Seed a shared base file on main so both branches diverge from a real + // merge-base (not an empty tree). + await writeFile(join(vault, 'base.md'), 'shared base\n', 'utf8'); + await commit(git, 'base'); + // Re-create docmost from this base so the merge-base is `base`. + await execFileAsync('git', ['--no-pager', 'branch', '-f', 'docmost', 'main'], { + cwd: vault, + }); + + // docmost adds doc-only.md (a DIFFERENT file than main touches). + await git.checkout('docmost'); + await writeFile(join(vault, 'doc-only.md'), 'from docmost\n', 'utf8'); + await commit(git, 'docmost: add doc-only'); + + // main adds main-only.md AND advances past the merge-base, so the merge can + // NOT fast-forward — it must create a real 3-way merge commit. + await git.checkout('main'); + await writeFile(join(vault, 'main-only.md'), 'from main\n', 'utf8'); + await commit(git, 'local: add main-only', { + name: 'Human', + email: 'human@local', + }); + + const res = await git.merge('docmost'); + expect(res.ok).toBe(true); + expect(res.conflict).toBe(false); + + // A real (non-FF) merge: HEAD has TWO parents. + expect(await headParentCount(vault)).toBe(2); + + // Both non-overlapping changes are present on main after the merge. + const tracked = await git.listTrackedFiles(); + expect(new Set(tracked)).toEqual( + new Set(['base.md', 'main-only.md', 'doc-only.md']), + ); + }); + + it('NON-conflict merge FAILURE -> { ok:false, conflict:false } (not mislabeled a conflict)', async () => { + if (!available) return; + const { vault, git } = await freshRepo(); + + // base file on main, then fork docmost from this base. + await writeFile(join(vault, 'f.md'), 'base\n', 'utf8'); + await commit(git, 'base'); + await execFileAsync('git', ['--no-pager', 'branch', '-f', 'docmost', 'main'], { + cwd: vault, + }); + + // docmost modifies f.md (committed). + await git.checkout('docmost'); + await writeFile(join(vault, 'f.md'), 'docmost change\n', 'utf8'); + await commit(git, 'docmost: edit f'); + + // Back on main, leave an UNCOMMITTED local change to f.md. git refuses the + // merge ("Your local changes ... would be overwritten by merge") and exits + // non-zero — but there are NO unmerged index paths, so this is a clean + // FAILURE, not a conflict. `merge()` must report { ok:false, conflict:false } + // so pull.ts does not falsely claim conflict markers are in the vault. + await git.checkout('main'); + await writeFile(join(vault, 'f.md'), 'uncommitted local edit\n', 'utf8'); + // NOTE: deliberately NOT staged/committed. + + const res = await git.merge('docmost'); + expect(res.ok).toBe(false); + expect(res.conflict).toBe(false); + // The merge did not start: HEAD is still a single-parent commit. + expect(await headParentCount(vault)).toBe(1); + // And the repo is NOT left mid-merge (no MERGE_HEAD / unmerged paths). + expect(await git.isMergeInProgress()).toBe(false); + }); +}); diff --git a/test/read-existing.test.ts b/test/read-existing.test.ts new file mode 100644 index 0000000..acb2e88 --- /dev/null +++ b/test/read-existing.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from 'vitest'; +import { readExisting } from '../src/pull.js'; + +// R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO +// (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable +// without a real git repo or filesystem. These tests pass fakes only — no git, +// no fs, no network. + +/** Build a valid self-contained file with a `docmost:meta` block. */ +function withMeta(meta: Record, body = '# Title\nbody\n'): string { + return `\n\n${body}`; +} + +/** A fake `readFile` backed by an in-memory map (rejects on a missing key). */ +function fakeReadFile(files: Record) { + return async (rel: string): Promise => { + if (!(rel in files)) { + throw Object.assign(new Error(`ENOENT: ${rel}`), { code: 'ENOENT' }); + } + return files[rel]; + }; +} + +describe('readExisting (R-Pull-1, injected IO)', () => { + it('recovers { pageId, relPath } for valid tracked files', async () => { + const files = { + 'Space/A.md': withMeta({ version: 1, pageId: 'p1', title: 'A' }), + 'Space/Sub/B.md': withMeta({ version: 1, pageId: 'p2', title: 'B' }), + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + expect(result).toEqual([ + { pageId: 'p1', relPath: 'Space/A.md' }, + { pageId: 'p2', relPath: 'Space/Sub/B.md' }, + ]); + }); + + it('SKIPS a file with no docmost:meta block (plain hand-written markdown)', async () => { + const files = { + 'tracked.md': withMeta({ version: 1, pageId: 'p1' }), + 'stray.md': '# Just a hand-written note\n\nNo meta here.\n', + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + // Only the engine-tracked file (with a pageId) survives. + expect(result).toEqual([{ pageId: 'p1', relPath: 'tracked.md' }]); + }); + + it('SKIPS a file whose meta has no pageId', async () => { + const files = { + 'has-id.md': withMeta({ version: 1, pageId: 'keep' }), + 'no-id.md': withMeta({ version: 1, title: 'untitled', slugId: 's' }), + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + expect(result).toEqual([{ pageId: 'keep', relPath: 'has-id.md' }]); + }); + + it('SKIPS a file with an unparseable (invalid-JSON) meta block, does not throw', async () => { + // Invalid JSON inside the meta block makes parseDocmostMarkdown throw; the + // skip-rule must swallow it and treat the file as not-engine-tracked. + const files = { + 'good.md': withMeta({ version: 1, pageId: 'good' }), + 'broken.md': '\n\nbody\n', + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + expect(result).toEqual([{ pageId: 'good', relPath: 'good.md' }]); + }); + + it('does NOT throw when readFile REJECTS (tracked but missing) — treats it as skipped', async () => { + const files = { + 'present.md': withMeta({ version: 1, pageId: 'present' }), + // "ghost.md" is listed as tracked but absent from the file map -> reject. + }; + const result = await readExisting({ + listTracked: async () => ['present.md', 'ghost.md'], + readFile: fakeReadFile(files), + }); + // The rejection is swallowed; the present file still comes through. + expect(result).toEqual([{ pageId: 'present', relPath: 'present.md' }]); + }); + + it('returns an empty list when nothing is tracked', async () => { + const result = await readExisting({ + listTracked: async () => [], + readFile: async () => { + throw new Error('should not be called'); + }, + }); + expect(result).toEqual([]); + }); + + it('combines all skip rules in one listing (only the valid files survive)', async () => { + const files = { + 'ok1.md': withMeta({ version: 1, pageId: 'a' }), + 'no-meta.md': 'plain\n', + 'no-id.md': withMeta({ version: 1, title: 'x' }), + 'broken.md': '\nbody\n', + 'ok2.md': withMeta({ version: 1, pageId: 'b' }), + // missing.md rejects on read. + }; + const result = await readExisting({ + listTracked: async () => [...Object.keys(files), 'missing.md'], + readFile: fakeReadFile(files), + }); + expect(result).toEqual([ + { pageId: 'a', relPath: 'ok1.md' }, + { pageId: 'b', relPath: 'ok2.md' }, + ]); + }); +}); diff --git a/test/transforms-extra.test.ts b/test/transforms-extra.test.ts index bd7f0e48c7bd37dad15e293434699e3ae9da2d9f..c77d5dd6179c499b8c4ec558b2a375faf5790ca0 100644 GIT binary patch delta 1779 zcmZ8i-D(?07=_Y{;6iC3gj~eot00lo$eY@^lk0+GE5)rN8%rh>W6)?dyF1S8%w}e` ziYR77X>NL}zCkZa?*e^;(yKl|A0W@r>(Vp(BZcV3`}2KgzCGVL$NxP3Y4qmzJH^9? zbfA@Wh6zPZ)6?_u`Tp_ocQia1ZjH_#kH%+5!>?(s&CF6qs89yv5>8S$ZkVJNSF;w` zJc`82ZhwRD17xnmi<~QNEitJTCt*oxM_lPVPU!8cUx){BK&>JcvPDeIsO4}uQ@Aml zrrISn2u*X>z_!Zjw?F^5s_xM|5n)n}Bvir;#+m^yOtcXyrc~Gj=#pc2%Q1%wA||A5 z5aTJAdfpA_nQ)0`0H3C`+Oqi+oOpzpPCYf8;S|dy51r7eKEYx}mQF>S>)gsk1IirE zv83m!P7(od}tfi{-C(UN2v7ep29u_UIxAg1PNc8`QQh zcS}^yYorZzBnLQ(zSzZ|FTt)1Bv-LZaR1;zmws54zteD4HC(;daP?k8fkQ-uEz@Uv zdvrzj?|b|UaT)l%=QD0Q{{HH++a<$YZWKjKTD~5U=z4Yj`l_yVCSR3acdMo-44Adt zxDNXM_-6CQhXpR6pl^pK&xW*KsT8Sj%yS>m1Zq_lbfh+LcH+gwXuH2nq0SZ*8J%Kl zVXCd$O2f>ntHPXwI!(E9cC4Rh?I1{Abq`O*gr)lB+hUlvZoCvLL~YF`h*_hv&TSd|f@iFBug_ z$AjS*0dRPBa(X&Atk60sGtas4;HcUm7(8+wA=}Go-3aPUW;oP_ee)kIbKPqS6sK<+kmTyI$hfPYGq}}%vRgA zJ{{UsP__{eh{JV2RAo~=TcBzk18$M=KD9GuSZr99_%o{C^3CSW8>^eYHb4KUd3C+J zvHW*)XW6+M-fFZR)C=sC&RrI_?=Ank`|0u(4gLR+F;sMPIM_e#SJ~Zk)#3xi_@JGY zhHXFxxiRpfTu_x?wC!`U9tVhsW3XOkgAd#%`$xy4ChhTN?iF9D=wsyh1d7Oz%{JIr T{!QJRJ9WNX`%l8P&zt`NC1*Fh delta 36 rcmZn(zUH)HrOad}xq80xV!fQyy!4U`1v^^>!_B+pWms5hHLbY-07DE1