refactor(sync): testability seams for pull + collab; integration tests

Behavior-preserving refactors (R-Collab-1, R-Pull-1, R-Pull-2) to unblock testing,
plus the integration tests they enable.

- collaboration: extract applyTransformToYdoc from onSynced; onSynced stays
  synchronous (NO await between Yjs read and write — SPEC §2 atomicity preserved)
- pull: readExisting(deps) injectable IO; split main into pure computePullActions
  (plan + suppression/mass-delete decisions) + thin applyPullActions(deps) (IO);
  ordering and data-loss guards preserved bit-for-bit
- tests (+35): collaboration-apply (atomicity/null-abort/throw-no-partial),
  read-existing, compute/apply-pull-actions (move-write-fail keeps old path),
  git temp-repo 3-way non-FF merge
- transforms-extra property: constrain the generator to mutually-non-substring
  words (the domain where the renumber property holds) -> deterministic; document
  the inherited commentsToFootnotes substring-overlap comment-drop via it.fails
  (off the sync path, SPEC §3; backport-fix lives in docmost-mcp)
- 695 -> 731 green; build clean; corpus STABLE
This commit is contained in:
vvzvlad
2026-06-17 01:29:49 +03:00
parent d9d8538846
commit 1750058503
8 changed files with 1493 additions and 222 deletions

View File

@@ -351,6 +351,62 @@ export function assertYjsEncodable(doc: any): void {
buildYDoc(doc);
}
/**
* SYNCHRONOUS read-transform-write of a live collaboration `Y.Doc` (SPEC §2).
*
* This is the extracted body of `mutatePageContent`'s `onSynced` callback. It
* MUST stay fully synchronous — there is deliberately NO `await` anywhere
* between reading the live doc and writing it back. While the JS event loop is
* not yielded, no incoming remote update can interleave, so any already-synced
* concurrent human edits in `liveDoc` are preserved rather than clobbered. The
* extraction is a testing seam ONLY; the runtime semantics are identical to the
* inline version that lived in `onSynced`.
*
* Steps:
* 1. Read the live doc via `TiptapTransformer.fromYdoc`. An empty/invalid live
* doc falls back to `{ type: "doc", content: [] }` (a valid empty doc).
* 2. Run `transform(liveDoc)`. If it returns `null` -> abort: NO write is
* performed and `{ written: false, doc: liveDoc }` is returned. If it
* THROWS, the error propagates to the caller (no partial write happens —
* the throw is before any `ydoc.transact`).
* 3. Otherwise encode the new doc (`buildYDoc`) and, inside a single
* `ydoc.transact`, clear the existing fragment and apply the new state.
* Returns `{ written: true, doc: newDoc }`.
*/
export function applyTransformToYdoc(
ydoc: Y.Doc,
transform: (liveDoc: any) => any | null,
): { written: boolean; doc: any } {
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
if (
!liveDoc ||
typeof liveDoc !== "object" ||
!Array.isArray(liveDoc.content)
) {
liveDoc = { type: "doc", content: [] };
}
const newDoc = transform(liveDoc);
if (newDoc == null) {
// Transform aborted — write nothing, return the live doc.
return { written: false, doc: liveDoc };
}
const tempDoc = buildYDoc(newDoc);
// Fetch the fragment immediately before the transact that mutates it, rather
// than reusing a handle grabbed across the transform.
const fragment = ydoc.getXmlFragment("default");
ydoc.transact(() => {
if (fragment.length > 0) {
fragment.delete(0, fragment.length);
}
Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc));
});
return { written: true, doc: newDoc };
}
/** Time we wait for the initial handshake/sync before giving up. */
const CONNECT_TIMEOUT_MS = 25000;
/** Time we wait for the server to acknowledge our write before giving up. */
@@ -520,37 +576,13 @@ export async function mutatePageContent(
// CRITICAL: everything between reading the live doc and writing it
// back must stay synchronous (no await). While the JS event loop is
// not yielded, no incoming remote update can interleave, so any
// already-synced concurrent edits are preserved in liveDoc.
let newDoc: any;
// already-synced concurrent edits are preserved in liveDoc. This is
// delegated to the SYNCHRONOUS `applyTransformToYdoc` helper — there
// is deliberately NO `await` on its result, so the read-transform-
// write stays atomic (SPEC §2).
let result: { written: boolean; doc: any };
try {
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
if (
!liveDoc ||
typeof liveDoc !== "object" ||
!Array.isArray(liveDoc.content)
) {
liveDoc = { type: "doc", content: [] };
}
newDoc = transform(liveDoc);
if (newDoc == null) {
// Transform aborted — write nothing, return the live doc.
lastWrittenDoc = liveDoc;
finish(null, liveDoc);
return;
}
const tempDoc = buildYDoc(newDoc);
// Fetch the fragment immediately before the transact that mutates
// it, rather than reusing a handle grabbed across the transform.
const fragment = ydoc.getXmlFragment("default");
ydoc.transact(() => {
if (fragment.length > 0) {
fragment.delete(0, fragment.length);
}
Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc));
});
result = applyTransformToYdoc(ydoc, transform);
} catch (e) {
// Includes errors thrown by transform (e.g. "afterText not found",
// "text not found"): propagate them verbatim to the caller.
@@ -558,7 +590,14 @@ export async function mutatePageContent(
return;
}
lastWrittenDoc = newDoc;
if (!result.written) {
// Transform aborted — write nothing, return the live doc.
lastWrittenDoc = result.doc;
finish(null, result.doc);
return;
}
lastWrittenDoc = result.doc;
if (process.env.DEBUG)
console.error("Content written, waiting for server to persist...");
waitForPersistence();

View File

@@ -46,6 +46,8 @@ import {
planReconciliation,
decideAbsenceDeletions,
type LiveEntry,
type MovedEntry,
type DeletionDecision,
} from "./reconcile.js";
import { stabilizePageFile, type PageMeta } from "./stabilize.js";
@@ -70,23 +72,41 @@ function segmentsToRelPath(segments: string[], stem: string): string {
return [...segments, `${stem}.md`].join("/");
}
/**
* Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real
* `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile`
* rooted at the vault; tests pass fakes so the parsing/skip rules are unit-
* testable without a real git repo or filesystem.
*/
export interface ReadExistingDeps {
/** List tracked .md paths (forward-slash, vault-relative). */
listTracked: () => Promise<string[]>;
/** Read a tracked file's text by its (forward-slash) vault-relative path. */
readFile: (relPath: string) => Promise<string>;
}
/**
* Read every tracked .md file in the vault and parse its `docmost:meta` to
* recover `{ pageId, relPath }`. Files without a parseable pageId in meta are
* skipped (they are not engine-tracked pages — e.g. a stray hand-written file).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - unparseable meta (`parseDocmostMarkdown` throws) -> skipped;
* - parseable but no `pageId` in meta -> skipped.
*/
async function readExisting(
git: VaultGit,
vaultRoot: string,
export async function readExisting(
deps: ReadExistingDeps,
): Promise<{ pageId: string; relPath: string }[]> {
const tracked = await git.listTrackedFiles("*.md");
const tracked = await deps.listTracked();
const existing: { pageId: string; relPath: string }[] = [];
for (const relPath of tracked) {
// git ls-files always emits forward-slash paths; normalize just in case.
const rel = relPath.split(sep).join("/");
let text: string;
try {
text = await readFile(relToAbs(vaultRoot, rel), "utf8");
text = await deps.readFile(rel);
} catch {
// Tracked but missing on disk (mid-operation race) — skip; the next pull
// converges.
@@ -105,6 +125,306 @@ async function readExisting(
return existing;
}
/**
* Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live
* tree nodes + completeness flag (from `listSpaceTree`) and the parsed
* `existing` tracked files (from `readExisting`).
*/
export interface PullActionsInput {
/** Live page nodes for the space (from `listSpaceTree`). */
pages: PageNode[];
/** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */
treeComplete: boolean;
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
existing: { pageId: string; relPath: string }[];
}
/**
* The PURE decisions object computed by `computePullActions` (no IO). It holds
* the reconciliation plan plus the SPEC §8 absence-deletion decision, with the
* suppression already folded in: `toDelete` is the POST-suppression set the
* caller should actually remove (empty when `deletionDecision.apply` is false).
*/
export interface PullActions {
/** Pages to (re)write at their relPath (add + update + move target). */
toWrite: { pageId: string; relPath: string }[];
/** Moves: write new path, then remove old path (only on a successful write). */
moved: MovedEntry[];
/**
* Absence-based paths to delete AFTER suppression. Empty when the decision
* suppressed deletions this cycle, so the caller can apply it unconditionally.
*/
toDelete: string[];
/** Why absence deletions were (or were not) applied (for logging + tests). */
deletionDecision: DeletionDecision;
/** Tracked-file count (for the suppression log messages). */
existingCount: number;
/** Planned absence-delete count BEFORE suppression (for the log message). */
plannedDeleteCount: number;
}
/**
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
* tree nodes + completeness + existing tracked files and returns the full set of
* decisions with NO IO:
*
* - builds the vault layout (deterministic relPath per live page),
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
* POST-suppression set (empty when suppressed).
*
* Moves are NOT governed by the suppression: a moved page is present in `live`,
* so its old-path removal is real (the caller still gates it on the write
* succeeding). The expensive content fetch / file write / git ops happen in the
* thin `applyPullActions`.
*/
export function computePullActions(input: PullActionsInput): PullActions {
const { pages, treeComplete, existing } = input;
const layout = buildVaultLayout(pages);
const live: LiveEntry[] = [];
for (const p of pages) {
if (!p || !p.id) continue;
const entry = layout.get(p.id);
if (!entry) continue;
live.push({
pageId: p.id,
relPath: segmentsToRelPath(entry.segments, entry.stem),
});
}
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
// `plan.moved` carries move old-path removals separately.
const plan = planReconciliation(live, existing);
// Decide whether the ABSENCE-based deletions may be applied this cycle
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
// Moves are NOT governed by this.
const deletionDecision = decideAbsenceDeletions({
treeComplete,
liveCount: live.length,
existingCount: existing.length,
deleteCount: plan.toDelete.length,
});
return {
toWrite: plan.toWrite,
moved: plan.moved,
// Fold the suppression in: a suppressed cycle deletes nothing.
toDelete: deletionDecision.apply ? plan.toDelete : [],
deletionDecision,
existingCount: existing.length,
plannedDeleteCount: plan.toDelete.length,
};
}
/**
* Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these
* to the live client, the vault git wrapper, and `node:fs/promises`; tests pass
* fakes that RECORD calls so the ordering + the move-on-success data-loss guard
* are testable without real git/fs/network.
*/
export interface ApplyPullActionsDeps {
client: Pick<DocmostClient, "getPageJson">;
git: Pick<VaultGit, "stageAll" | "commit" | "checkout" | "merge">;
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
writeFile: (absPath: string, text: string) => Promise<void>;
/** Recursive mkdir of an ABSOLUTE directory path. */
mkdir: (absDir: string) => Promise<void>;
/** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */
rm: (absPath: string) => Promise<void>;
}
/** Outcome counters from `applyPullActions` (for the summary + tests). */
export interface ApplyResult {
written: number;
movedApplied: number;
deleted: number;
failed: number;
committed: boolean;
merge: { ok: boolean; conflict: boolean; output: string };
}
/**
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
* order, with all the original safety guards preserved bit-for-bit:
*
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
* failed move-write keeps the old path so the page never vanishes).
* 3. apply (post-suppression) absence deletes.
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
*
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
*/
export async function applyPullActions(
deps: ApplyPullActionsDeps,
actions: PullActions,
vaultRoot: string,
): Promise<ApplyResult> {
const { client, git } = deps;
// Emit the SPEC §8 suppression warnings (preserved from the original `main`).
const decision = actions.deletionDecision;
if (!decision.apply) {
if (decision.reason === "incomplete-fetch") {
console.warn(
"pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)",
);
} else if (decision.reason === "empty-live") {
console.warn(
`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` +
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
`Docmost is reachable.`,
);
} else {
console.warn(
`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` +
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`,
);
}
}
// 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11).
let written = 0;
let failed = 0;
let completed = 0;
let nextIndex = 0;
// pageIds whose write FAILED. A moved page whose new-path write failed must
// NOT have its old path removed (otherwise the page vanishes entirely).
const failedPageIds = new Set<string>();
const writeOne = async (w: {
pageId: string;
relPath: string;
}): Promise<void> => {
try {
const page = await client.getPageJson(w.pageId);
const meta: PageMeta = {
version: 1,
pageId: page.id,
slugId: page.slugId,
title: page.title,
spaceId: page.spaceId,
parentPageId: page.parentPageId ?? null,
};
const text = await stabilizePageFile(page.content, meta);
const abs = relToAbs(vaultRoot, w.relPath);
await deps.mkdir(dirname(abs));
await deps.writeFile(abs, text);
written++;
} catch (err) {
failed++;
failedPageIds.add(w.pageId);
console.error(
`pull: failed page ${w.pageId}:`,
err instanceof Error ? err.message : String(err),
);
} finally {
completed++;
if (completed % PROGRESS_EVERY === 0) {
console.log(`pulled ${completed}/${actions.toWrite.length}`);
}
}
};
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
// take the next index until the write list is exhausted. One bad page never
// aborts the whole pull (mirrors the fault-tolerant tree walk).
const runner = async (): Promise<void> => {
while (true) {
const i = nextIndex++;
if (i >= actions.toWrite.length) return;
await writeOne(actions.toWrite[i]);
}
};
await Promise.all(
Array.from(
{ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 },
() => runner(),
),
);
// Helper: `rm` with force:true is a no-op if the file is already gone.
const removePath = async (rel: string, what: string): Promise<boolean> => {
try {
await deps.rm(relToAbs(vaultRoot, rel));
return true;
} catch (err) {
console.error(
`pull: failed to ${what} ${rel}:`,
err instanceof Error ? err.message : String(err),
);
return false;
}
};
// 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its
// old path is genuinely stale — NOT subject to the incomplete-fetch
// suppression. BUT only remove the old path when (a) the planner marked it
// removable (not reused by another live page) AND (b) the new-path write
// actually SUCCEEDED — otherwise we would delete the only copy of a page
// whose move-write failed (⭐ data-loss guard).
let movedApplied = 0;
for (const m of actions.moved) {
if (!m.removeOldPath) continue;
if (failedPageIds.has(m.pageId)) {
console.warn(
`pull: move write for ${m.pageId} failed — keeping old path ` +
`${m.fromRelPath} (SPEC §8)`,
);
continue;
}
if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++;
}
// 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the
// post-suppression set (empty when the decision suppressed them, SPEC §8).
let deleted = 0;
for (const rel of actions.toDelete) {
if (await removePath(rel, "delete")) deleted++;
}
// 4. Stage + commit on `docmost` (only if there is something to commit).
// Deterministic stabilized output means unchanged pages produce identical
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
// ACTUAL work applied (pages written + files deleted), not the planned size,
// so a run with failures does not over-report (SPEC §5 nit).
const subject =
deleted > 0
? `docmost: sync ${written} page(s), ${deleted} deleted`
: `docmost: sync ${written} page(s)`;
await git.stageAll();
const committed = await git.commit(subject, {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: [SOURCE_TRAILER],
});
// Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
await git.checkout(DEFAULT_BRANCH);
const merge = await git.merge(DOCMOST_BRANCH);
if (merge.conflict) {
console.error(
"pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
"Docmost (read-only). Resolve locally, then re-run.",
);
} else if (!merge.ok) {
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
}
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
return { written, movedApplied, deleted, failed, committed, merge };
}
async function main(): Promise<void> {
const s = loadSettings();
const client = new DocmostClient(
@@ -143,203 +463,52 @@ async function main(): Promise<void> {
// 2. Work on the docmost mirror branch.
await git.checkout(DOCMOST_BRANCH);
// 3. Fetch the live tree and compute the desired files (relPath via the pure
// sanitize + disambiguation layout). `listSpaceTree` reports completeness:
// if ANY branch's children fetch failed or the node cap was hit, the tree
// is PARTIAL and absence-based deletions must be suppressed this cycle
// 3. Fetch the live tree. `listSpaceTree` reports completeness: if ANY
// branch's children fetch failed or the node cap was hit, the tree is
// PARTIAL and absence-based deletions must be suppressed this cycle
// (SPEC §8) — a missing pageId in a partial tree is NOT proof of deletion.
const { pages: rawPages, complete: treeComplete } =
await client.listSpaceTree(spaceId);
const pages = rawPages as PageNode[];
const layout = buildVaultLayout(pages);
const live: LiveEntry[] = [];
const liveNodeByPageId = new Map<string, PageNode>();
for (const p of pages) {
if (!p || !p.id) continue;
const entry = layout.get(p.id);
if (!entry) continue;
live.push({
pageId: p.id,
relPath: segmentsToRelPath(entry.segments, entry.stem),
// 4. Parse the existing tracked .md files (pageId + relPath). Inject the real
// git + fs IO (R-Pull-1).
const existing = await readExisting({
listTracked: () => git.listTrackedFiles("*.md"),
readFile: (rel) => readFile(relToAbs(vaultRoot, rel), "utf8"),
});
liveNodeByPageId.set(p.id, p);
}
// 4. Parse the existing tracked .md files (pageId + relPath).
const existing = await readExisting(git, vaultRoot);
// 5. Compute the pull actions (pure, SPEC §5/§8): layout + reconciliation +
// the absence-deletion suppression decision, folded together. No IO.
const actions = computePullActions({ pages, treeComplete, existing });
// 5. Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
// `plan.moved` carries move old-path removals separately.
const plan = planReconciliation(live, existing);
// 6. Decide whether the ABSENCE-based deletions (`plan.toDelete`) may be
// applied this cycle (SPEC §8). The pure helper folds in BOTH the
// incomplete-fetch suppression (a partial tree must not look like
// deletions) AND the mass-delete guard (defense in depth). Moves are NOT
// governed by this — a moved page is present in `live`, so its old-path
// removal is real and applied unconditionally (subject only to its write
// succeeding).
const deleteDecision = decideAbsenceDeletions({
treeComplete,
liveCount: live.length,
existingCount: existing.length,
deleteCount: plan.toDelete.length,
});
if (!deleteDecision.apply) {
if (deleteDecision.reason === "incomplete-fetch") {
console.warn(
"pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)",
);
} else if (deleteDecision.reason === "empty-live") {
console.warn(
`pull: live fetch returned 0 pages but ${existing.length} file(s) are ` +
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
`Docmost is reachable.`,
);
} else {
console.warn(
`pull: plan would delete ${plan.toDelete.length} of ${existing.length} ` +
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`,
);
}
}
// 7. Write each live page in its fixpoint form (normalize-on-write, SPEC §11),
// then apply move-old-path + absence-delete removals.
let written = 0;
let failed = 0;
let completed = 0;
let nextIndex = 0;
// pageIds whose write FAILED. A moved page whose new-path write failed must
// NOT have its old path removed (otherwise the page vanishes entirely).
const failedPageIds = new Set<string>();
const writeOne = async (w: { pageId: string; relPath: string }): Promise<void> => {
const node = liveNodeByPageId.get(w.pageId);
if (!node) return;
try {
const page = await client.getPageJson(w.pageId);
const meta: PageMeta = {
version: 1,
pageId: page.id,
slugId: page.slugId,
title: page.title,
spaceId: page.spaceId,
parentPageId: page.parentPageId ?? null,
};
const text = await stabilizePageFile(page.content, meta);
const abs = relToAbs(vaultRoot, w.relPath);
await mkdir(dirname(abs), { recursive: true });
// 6. Apply the actions (thin IO, exact order + guards preserved): write each
// live page in its fixpoint form (SPEC §11), apply move-old-path removals
// (only when the write succeeded) + absence deletes, then commit on
// `docmost` and merge into main (conflicts surfaced, SPEC §9).
const result = await applyPullActions(
{
client,
git,
// `applyPullActions` already calls `deps.mkdir(dirname(abs))` before each
// write, so the file write itself only needs `writeFile`.
writeFile: async (abs, text) => {
await writeFile(abs, text, "utf8");
written++;
} catch (err) {
failed++;
failedPageIds.add(w.pageId);
console.error(
`pull: failed page ${w.pageId}:`,
err instanceof Error ? err.message : String(err),
);
} finally {
completed++;
if (completed % PROGRESS_EVERY === 0) {
console.log(`pulled ${completed}/${plan.toWrite.length}`);
}
}
};
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
// take the next index until the write list is exhausted. One bad page never
// aborts the whole pull (mirrors the fault-tolerant tree walk).
const runner = async (): Promise<void> => {
while (true) {
const i = nextIndex++;
if (i >= plan.toWrite.length) return;
await writeOne(plan.toWrite[i]);
}
};
await Promise.all(
Array.from(
{ length: Math.min(CONCURRENCY, plan.toWrite.length) || 1 },
() => runner(),
),
},
mkdir: async (abs) => {
await mkdir(abs, { recursive: true });
},
rm: async (abs) => {
await rm(abs, { force: true });
},
},
actions,
vaultRoot,
);
// Helper: `rm` with force:true is a no-op if the file is already gone.
const removePath = async (rel: string, what: string): Promise<boolean> => {
try {
await rm(relToAbs(vaultRoot, rel), { force: true });
return true;
} catch (err) {
console.error(
`pull: failed to ${what} ${rel}:`,
err instanceof Error ? err.message : String(err),
);
return false;
}
};
const { written, movedApplied, deleted, failed, committed, merge } = result;
// 7a. Apply MOVE old-path removals. A moved page IS present in `live`, so its
// old path is genuinely stale — this is NOT subject to the incomplete-
// fetch suppression. BUT only remove the old path when (a) the planner
// marked it removable (not reused by another live page) AND (b) the new-
// path write actually SUCCEEDED — otherwise we would delete the only copy
// of a page whose move-write failed.
let movedApplied = 0;
for (const m of plan.moved) {
if (!m.removeOldPath) continue;
if (failedPageIds.has(m.pageId)) {
console.warn(
`pull: move write for ${m.pageId} failed — keeping old path ` +
`${m.fromRelPath} (SPEC §8)`,
);
continue;
}
if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++;
}
// 7b. Apply ABSENCE-based deletions — ONLY if the decision allowed them
// (incomplete-fetch suppression + mass-delete guard, SPEC §8).
let deleted = 0;
if (deleteDecision.apply) {
for (const rel of plan.toDelete) {
if (await removePath(rel, "delete")) deleted++;
}
}
// 8. Stage + commit on `docmost` (only if there is something to commit).
// Deterministic stabilized output means unchanged pages produce identical
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
// ACTUAL work applied (pages written + files deleted), not the planned size,
// so a run with failures does not over-report (SPEC §5 nit).
const subject =
deleted > 0
? `docmost: sync ${written} page(s), ${deleted} deleted`
: `docmost: sync ${written} page(s)`;
await git.stageAll();
const committed = await git.commit(subject, {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: [SOURCE_TRAILER],
});
// 9. Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
await git.checkout(DEFAULT_BRANCH);
const merge = await git.merge(DOCMOST_BRANCH);
if (merge.conflict) {
console.error(
"pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
"Docmost (read-only). Resolve locally, then re-run.",
);
} else if (!merge.ok) {
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
}
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
// 10. One-line summary.
// 7. One-line summary.
console.log(
`pull complete: ${written} written, ${movedApplied} moved, ` +
`${deleted} deleted, committed=${committed}, ` +

View File

@@ -0,0 +1,417 @@
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
import { applyPullActions } from '../src/pull.js';
import type {
PullActions,
ApplyPullActionsDeps,
} from '../src/pull.js';
import type { DeletionDecision } from '../src/reconcile.js';
// R-Pull-2 (test-strategy report §5): `applyPullActions` is the THIN IO half of
// the pull cycle. These tests drive it with FAKES that record every call — no
// real git, fs, or network — so the ordering and the ⭐ move-on-success
// data-loss guard are verifiable. SPEC §8 (delete suppression) + SPEC §5 (commit
// subject reflects ACTUAL counts) are asserted here.
const VAULT = '/vault';
/** A getPageJson fake: returns a minimal page whose content stabilizes cheaply. */
function makeClient(opts?: { failFor?: Set<string> }) {
const calls: string[] = [];
const client = {
getPageJson: vi.fn(async (pageId: string) => {
calls.push(pageId);
if (opts?.failFor?.has(pageId)) {
throw new Error(`fetch failed for ${pageId}`);
}
return {
id: pageId,
slugId: `slug-${pageId}`,
title: `Title ${pageId}`,
spaceId: 'space',
parentPageId: null,
updatedAt: '2026-01-01T00:00:00.000Z',
// A trivial doc so stabilizePageFile (the real one) runs fast.
content: {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: pageId }] },
],
},
};
}),
};
return { client, calls };
}
/** A git fake recording the order of ops; merge result is configurable. */
function makeGit(merge: { ok: boolean; conflict: boolean; output?: string } = {
ok: true,
conflict: false,
}) {
const order: string[] = [];
let committedSubject: string | undefined;
const git = {
stageAll: vi.fn(async () => {
order.push('stageAll');
}),
commit: vi.fn(async (subject: string) => {
order.push(`commit:${subject}`);
committedSubject = subject;
return true;
}),
checkout: vi.fn(async (branch: string) => {
order.push(`checkout:${branch}`);
}),
merge: vi.fn(async () => {
order.push('merge');
return { ok: merge.ok, conflict: merge.conflict, output: merge.output ?? '' };
}),
};
return {
git,
order,
get committedSubject() {
return committedSubject;
},
};
}
/** A recording fs fake: writes/mkdirs/rms tracked in arrays. */
function makeFs(opts?: { failWriteFor?: Set<string> }) {
const writes: { abs: string; text: string }[] = [];
const mkdirs: string[] = [];
const rms: string[] = [];
const fs = {
writeFile: vi.fn(async (abs: string, text: string) => {
// Fail a specific destination path if asked (to simulate a write failure).
if (opts?.failWriteFor?.has(abs)) {
throw new Error(`write failed for ${abs}`);
}
writes.push({ abs, text });
}),
mkdir: vi.fn(async (abs: string) => {
mkdirs.push(abs);
}),
rm: vi.fn(async (abs: string) => {
rms.push(abs);
}),
};
return { fs, writes, mkdirs, rms };
}
function deps(
client: any,
git: any,
fs: ReturnType<typeof makeFs>,
): ApplyPullActionsDeps {
return {
client,
git,
writeFile: fs.fs.writeFile,
mkdir: fs.fs.mkdir,
rm: fs.fs.rm,
};
}
const APPLY: DeletionDecision = { apply: true };
function actions(partial: Partial<PullActions>): PullActions {
return {
toWrite: [],
moved: [],
toDelete: [],
deletionDecision: APPLY,
existingCount: 0,
plannedDeleteCount: 0,
...partial,
};
}
beforeEach(() => {
vi.spyOn(console, 'log').mockImplementation(() => {});
vi.spyOn(console, 'warn').mockImplementation(() => {});
vi.spyOn(console, 'error').mockImplementation(() => {});
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('applyPullActions — happy path (write + commit + merge)', () => {
it('fetches, writes each page, stages, commits, checks out main, merges', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [
{ pageId: 'p1', relPath: 'A.md' },
{ pageId: 'p2', relPath: 'Sub/B.md' },
],
}),
VAULT,
);
expect(res.written).toBe(2);
expect(res.failed).toBe(0);
expect(res.committed).toBe(true);
expect(res.merge).toEqual({ ok: true, conflict: false, output: '' });
// Both pages were fetched and written at their absolute paths.
expect(client.getPageJson).toHaveBeenCalledTimes(2);
const writtenPaths = fs.writes.map((w) => w.abs).sort();
expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']);
// The git op order is: stageAll -> commit -> checkout main -> merge.
expect(g.order).toEqual([
'stageAll',
`commit:docmost: sync 2 page(s)`,
'checkout:main',
'merge',
]);
});
});
describe('applyPullActions — ordering (write before move/delete before commit)', () => {
it('does writes, then move-old-path removals, then deletes, then commit/merge', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
toDelete: ['Dead.md'],
plannedDeleteCount: 1,
existingCount: 3,
}),
VAULT,
);
// The write to the new path happened (the page was fetched first).
expect(fs.writes.map((w) => w.abs)).toEqual(['/vault/New/M.md']);
// The move old-path removal AND the absence delete both ran, old path first.
expect(fs.rms).toEqual(['/vault/Old/M.md', '/vault/Dead.md']);
// git ops happen AFTER all fs work.
expect(g.order).toEqual([
'stageAll',
'commit:docmost: sync 1 page(s), 1 deleted',
'checkout:main',
'merge',
]);
});
});
describe('applyPullActions — ⭐ data-loss guard (move-on-success)', () => {
it('does NOT remove the OLD path when the new-path write FAILS', async () => {
// The page "m" is being moved Old/M.md -> New/M.md, but its new-path write
// FAILS. Removing the old path now would erase the only copy of the page.
// The guard must KEEP the old path.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs({ failWriteFor: new Set(['/vault/New/M.md']) });
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
}),
VAULT,
);
// The write failed -> recorded as a failure, nothing written.
expect(res.failed).toBe(1);
expect(res.written).toBe(0);
expect(fs.writes).toEqual([]);
// ⭐ The OLD path was NOT removed: the data-loss guard kept it.
expect(fs.rms).not.toContain('/vault/Old/M.md');
expect(fs.rms).toEqual([]);
expect(res.movedApplied).toBe(0);
// The commit subject reflects ACTUAL counts: 0 written, 0 deleted.
expect(g.committedSubject).toBe('docmost: sync 0 page(s)');
});
it('DOES remove the old path when the new-path write SUCCEEDS', async () => {
// Same move, but the write succeeds -> the old path is safely removed. This
// is the positive control proving the guard is keyed on write success.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs(); // no write failures
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
}),
VAULT,
);
expect(res.written).toBe(1);
expect(res.movedApplied).toBe(1);
expect(fs.rms).toContain('/vault/Old/M.md');
expect(g.committedSubject).toBe('docmost: sync 1 page(s)');
});
it('honours removeOldPath:false (path reused by another live page is kept)', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'X.md',
toRelPath: 'New/M.md',
removeOldPath: false, // X.md is a live target of another page
},
],
}),
VAULT,
);
// The reused old path is never removed.
expect(fs.rms).not.toContain('/vault/X.md');
expect(fs.rms).toEqual([]);
});
});
describe('applyPullActions — deletion suppression (SPEC §8)', () => {
it('skips deletions when the decision SUPPRESSES them (toDelete already empty)', async () => {
// computePullActions empties toDelete when suppressed, but assert the applier
// ALSO does no removals and the subject omits the deleted count.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'p1', relPath: 'A.md' }],
// Suppressed: toDelete is empty even though 5 were planned.
toDelete: [],
deletionDecision: { apply: false, reason: 'incomplete-fetch' },
plannedDeleteCount: 5,
existingCount: 6,
}),
VAULT,
);
expect(res.deleted).toBe(0);
expect(fs.rms).toEqual([]);
// Subject reflects 0 deleted (no ", N deleted" suffix).
expect(g.committedSubject).toBe('docmost: sync 1 page(s)');
// The suppression warning was emitted.
expect(console.warn).toHaveBeenCalledWith(
expect.stringMatching(/tree fetch incomplete/),
);
});
it('applies deletions present in toDelete when the decision allows them', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'p1', relPath: 'A.md' }],
toDelete: ['Dead1.md', 'Dead2.md'],
deletionDecision: APPLY,
plannedDeleteCount: 2,
existingCount: 5,
}),
VAULT,
);
expect(res.deleted).toBe(2);
expect(fs.rms).toEqual(['/vault/Dead1.md', '/vault/Dead2.md']);
// Subject reflects ACTUAL written + deleted counts.
expect(g.committedSubject).toBe('docmost: sync 1 page(s), 2 deleted');
});
});
describe('applyPullActions — commit subject reflects ACTUAL counts', () => {
it('counts only SUCCESSFUL writes when some page fetches fail', async () => {
// p2 fetch fails; the subject must say 1 page (only p1 was written), not 2.
const { client } = makeClient({ failFor: new Set(['p2']) });
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [
{ pageId: 'p1', relPath: 'A.md' },
{ pageId: 'p2', relPath: 'B.md' },
],
}),
VAULT,
);
expect(res.written).toBe(1);
expect(res.failed).toBe(1);
expect(g.committedSubject).toBe('docmost: sync 1 page(s)');
});
});
describe('applyPullActions — merge result is surfaced, not swallowed', () => {
it('returns conflict:true on a conflicting merge (no auto-resolve)', async () => {
const { client } = makeClient();
const g = makeGit({ ok: false, conflict: true, output: 'CONFLICT' });
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }),
VAULT,
);
expect(res.merge.conflict).toBe(true);
expect(res.merge.ok).toBe(false);
});
it('returns ok:false conflict:false on a non-conflict merge failure', async () => {
const { client } = makeClient();
const g = makeGit({ ok: false, conflict: false, output: 'some error' });
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }),
VAULT,
);
expect(res.merge.ok).toBe(false);
expect(res.merge.conflict).toBe(false);
});
});

View File

@@ -0,0 +1,182 @@
import { describe, expect, it } from 'vitest';
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
// R-Collab-1 (test-strategy report §5): the SYNCHRONOUS read-transform-write
// body of `mutatePageContent`'s `onSynced` is now the exported pure-ish
// `applyTransformToYdoc(ydoc, transform)`. These tests drive it directly
// against a real `Y.Doc` — NO network, NO Hocuspocus server. They assert the
// SPEC §2 atomicity contract holds (read -> transform -> write with no await),
// plus the abort/throw/empty-doc-fallback behaviour preserved from the inline
// version.
//
// Import directly from the source .js (matches the repo's other collaboration
// tests, e.g. collaboration-mutate.test.ts).
import {
applyTransformToYdoc,
buildYDoc,
} from '../packages/docmost-client/src/lib/collaboration.js';
import { docmostExtensions } from '../packages/docmost-client/src/lib/docmost-schema.js';
// A valid minimal ProseMirror doc with a single paragraph of `text`.
function docWith(text: string): any {
return {
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text }] }],
};
}
// Seed a Y.Doc's "default" fragment with a ProseMirror doc, exactly the way the
// live collaboration server would have it after the initial sync. We encode via
// the same TiptapTransformer path the SUT reads back through.
function seedYdoc(content: any): Y.Doc {
const seeded = buildYDoc(content);
const ydoc = new Y.Doc();
Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(seeded));
return ydoc;
}
// Read the live ProseMirror doc back off a Y.Doc the same way the SUT does.
function readYdoc(ydoc: Y.Doc): any {
return TiptapTransformer.fromYdoc(ydoc, 'default');
}
describe('applyTransformToYdoc — synchronous read/transform/write (R-Collab-1)', () => {
it('writes back the transformed doc when transform mutates it', () => {
const ydoc = seedYdoc(docWith('original'));
let seenLive: any;
const result = applyTransformToYdoc(ydoc, (live) => {
seenLive = live;
return docWith('rewritten');
});
// The transform observed the seeded live doc...
expect(seenLive.content[0].content[0].text).toBe('original');
// ...and the write happened.
expect(result.written).toBe(true);
expect(result.doc).toEqual(docWith('rewritten'));
// The Y.Doc fragment now holds the NEW content (old text fully replaced).
const xml = ydoc.getXmlFragment('default').toString();
expect(xml).toContain('rewritten');
expect(xml).not.toContain('original');
});
it('is fully SYNCHRONOUS — the fragment is mutated before control returns', () => {
// The whole point of the SPEC §2 invariant: no `await` is yielded between
// reading the live doc and writing it back. We assert this structurally by
// observing the write took effect on the SAME synchronous tick — the
// function does not return a Promise, and the fragment already reflects the
// new doc the instant the call returns (no microtask hop needed).
const ydoc = seedYdoc(docWith('before'));
const ret = applyTransformToYdoc(ydoc, () => docWith('after'));
// Not a thenable: the contract is a plain synchronous value, not a Promise.
expect(typeof (ret as any).then).not.toBe('function');
// Already written synchronously.
expect(ydoc.getXmlFragment('default').toString()).toContain('after');
});
it('transform returning null ABORTS with NO write (live doc preserved)', () => {
const ydoc = seedYdoc(docWith('keepme'));
const before = ydoc.getXmlFragment('default').toString();
let seenLive: any;
const result = applyTransformToYdoc(ydoc, (live) => {
seenLive = live;
return null; // abort
});
expect(result.written).toBe(false);
// The returned doc is the live doc the transform saw (no write).
expect(result.doc).toBe(seenLive);
expect(result.doc.content[0].content[0].text).toBe('keepme');
// The fragment is byte-identical to before: nothing was written.
expect(ydoc.getXmlFragment('default').toString()).toBe(before);
});
it('transform THROWING propagates and leaves NO partial write', () => {
const ydoc = seedYdoc(docWith('intact'));
const before = ydoc.getXmlFragment('default').toString();
expect(() =>
applyTransformToYdoc(ydoc, () => {
throw new Error('boom from transform');
}),
).toThrow(/boom from transform/);
// The throw happens before any ydoc.transact, so the live doc is untouched.
expect(ydoc.getXmlFragment('default').toString()).toBe(before);
expect(readYdoc(ydoc).content[0].content[0].text).toBe('intact');
});
it('an empty/invalid live doc falls back to { type:"doc", content:[] }', () => {
// A brand-new Y.Doc has an empty "default" fragment; fromYdoc yields a doc
// with no content array, which the helper must coerce to a valid empty doc
// before handing it to the transform.
const ydoc = new Y.Doc();
let seenLive: any;
applyTransformToYdoc(ydoc, (live) => {
seenLive = live;
return null; // abort — we only care what the transform saw
});
expect(seenLive).toEqual({ type: 'doc', content: [] });
});
it('the empty-doc fallback is still WRITABLE (transform can write into it)', () => {
const ydoc = new Y.Doc();
const result = applyTransformToYdoc(ydoc, (live) => {
// The live doc is the empty fallback; produce real content from it.
expect(live).toEqual({ type: 'doc', content: [] });
return docWith('seeded from empty');
});
expect(result.written).toBe(true);
expect(ydoc.getXmlFragment('default').toString()).toContain(
'seeded from empty',
);
});
it('preserves concurrent live content the transform chooses to keep (atomicity)', () => {
// Model the SPEC §2 concern: the live doc already contains a concurrent
// human edit. A transform that appends without discarding must not lose it,
// and because the read+write is one synchronous unit nothing can interleave.
const live = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'human edit' }] },
],
};
const ydoc = seedYdoc(live);
const result = applyTransformToYdoc(ydoc, (liveDoc) => {
// Append a machine paragraph while keeping the human's paragraph.
return {
type: 'doc',
content: [
...liveDoc.content,
{ type: 'paragraph', content: [{ type: 'text', text: 'machine edit' }] },
],
};
});
expect(result.written).toBe(true);
const xml = ydoc.getXmlFragment('default').toString();
expect(xml).toContain('human edit');
expect(xml).toContain('machine edit');
});
});
// Sanity: the helper round-trips through the real schema, proving the seed/read
// path is faithful (not a degenerate empty-fragment artifact).
describe('applyTransformToYdoc — schema fidelity', () => {
it('round-trips a paragraph through the docmost schema unchanged', () => {
const ydoc = seedYdoc(docWith('round trip'));
const got = TiptapTransformer.fromYdoc(ydoc, 'default');
// The doc encodes/decodes against the real docmost extension set (the same
// set buildYDoc uses), so the seed/read path is the production one.
expect(got.type).toBe('doc');
expect(got.content[0].content[0].text).toBe('round trip');
expect(Array.isArray(docmostExtensions)).toBe(true);
});
});

View File

@@ -0,0 +1,193 @@
import { describe, expect, it } from 'vitest';
import { computePullActions } from '../src/pull.js';
import type { PageNode } from '../src/layout.js';
// R-Pull-2 (test-strategy report §5): `computePullActions` is the PURE half of
// the pull cycle — layout + planReconciliation + the SPEC §8 absence-deletion
// suppression decision, folded together, with NO IO. These tests exercise it
// without git/fs/network. The thin IO applier is covered in apply-pull-actions.
/** A live tree node (only the fields the layout / reconciliation read). */
function node(
id: string,
title: string,
parentPageId: string | null = null,
hasChildren = false,
): PageNode {
return { id, title, slugId: id, parentPageId, hasChildren };
}
describe('computePullActions — normal complete fetch', () => {
it('builds toWrite from the live layout and an empty existing set (all adds)', () => {
const pages = [
node('root', 'Root', null, true),
node('child', 'Child', 'root'),
];
const actions = computePullActions({
pages,
treeComplete: true,
existing: [],
});
// Each live page is (re)written at its deterministic layout path.
expect(actions.toWrite).toEqual([
{ pageId: 'root', relPath: 'Root.md' },
{ pageId: 'child', relPath: 'Root/Child.md' },
]);
expect(actions.moved).toEqual([]);
expect(actions.toDelete).toEqual([]);
expect(actions.deletionDecision).toEqual({ apply: true });
});
it('plans toWrite / moved / toDelete correctly for a mixed reconciliation', () => {
const pages = [
node('keep', 'Keep'),
node('mover', 'Mover'),
node('fresh', 'Fresh'),
];
// existing: keep (same path), mover (old path -> move), dead (absent -> delete).
const existing = [
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'mover', relPath: 'Old/Mover.md' },
{ pageId: 'dead', relPath: 'Dead.md' },
];
const actions = computePullActions({ pages, treeComplete: true, existing });
expect(actions.toWrite).toEqual([
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'mover', relPath: 'Mover.md' },
{ pageId: 'fresh', relPath: 'Fresh.md' },
]);
// mover moved from Old/Mover.md to the new layout path Mover.md.
expect(actions.moved).toEqual([
{
pageId: 'mover',
fromRelPath: 'Old/Mover.md',
toRelPath: 'Mover.md',
removeOldPath: true,
},
]);
// dead is absent from live -> an absence delete (decision applies it).
expect(actions.toDelete).toEqual(['Dead.md']);
expect(actions.deletionDecision).toEqual({ apply: true });
});
it('a live page moved to a NEW path is in `moved`, its old path NOT in toDelete', () => {
const pages = [node('p1', 'Doc', 'newparent'), node('newparent', 'NewParent', null, true)];
const existing = [{ pageId: 'p1', relPath: 'OldParent/Doc.md' }];
const actions = computePullActions({ pages, treeComplete: true, existing });
const moved = actions.moved.find((m) => m.pageId === 'p1');
expect(moved).toBeTruthy();
expect(moved!.fromRelPath).toBe('OldParent/Doc.md');
expect(moved!.toRelPath).toBe('NewParent/Doc.md');
// The old path is a MOVE removal, NEVER an absence delete.
expect(actions.toDelete).not.toContain('OldParent/Doc.md');
expect(actions.toDelete).toEqual([]);
});
});
describe('computePullActions — SPEC §8 suppression folded in', () => {
it('INCOMPLETE fetch (treeComplete:false) SUPPRESSES absence deletions', () => {
// dead is absent from the live tree, but the tree fetch was partial -> the
// missing pageId is NOT proof of deletion, so toDelete must be EMPTY and the
// decision must report apply:false / incomplete-fetch.
const pages = [node('keep', 'Keep')];
const existing = [
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'dead', relPath: 'Dead.md' },
];
const actions = computePullActions({
pages,
treeComplete: false,
existing,
});
expect(actions.deletionDecision).toEqual({
apply: false,
reason: 'incomplete-fetch',
});
// Suppressed: nothing to delete this cycle...
expect(actions.toDelete).toEqual([]);
// ...but the planned count is still reported (for the suppression log).
expect(actions.plannedDeleteCount).toBe(1);
// Writes/updates still happen regardless of the suppression.
expect(actions.toWrite).toEqual([{ pageId: 'keep', relPath: 'Keep.md' }]);
});
it('MASS-DELETE guard (>50% of a non-trivial vault) SUPPRESSES deletions', () => {
// 1 live page, 10 existing tracked, 9 of them absent -> 9/10 > 50% on a
// non-trivial (>=4) vault -> mass-delete suppression.
const pages = [node('p0', 'P0')];
const existing = [
{ pageId: 'p0', relPath: 'P0.md' },
...Array.from({ length: 9 }, (_, i) => ({
pageId: `gone${i}`,
relPath: `Gone${i}.md`,
})),
];
const actions = computePullActions({ pages, treeComplete: true, existing });
expect(actions.deletionDecision).toEqual({
apply: false,
reason: 'mass-delete',
});
expect(actions.toDelete).toEqual([]);
expect(actions.plannedDeleteCount).toBe(9);
expect(actions.existingCount).toBe(10);
});
it('moves are NOT suppressed even on an incomplete fetch', () => {
// A moved page is PRESENT in live, so its move is real regardless of the
// suppression (which only governs ABSENCE deletes).
const pages = [node('m', 'Moved')];
const existing = [{ pageId: 'm', relPath: 'Old/Moved.md' }];
const actions = computePullActions({
pages,
treeComplete: false,
existing,
});
expect(actions.moved).toEqual([
{
pageId: 'm',
fromRelPath: 'Old/Moved.md',
toRelPath: 'Moved.md',
removeOldPath: true,
},
]);
// No absence deletes were planned here, so the decision trivially applies.
expect(actions.toDelete).toEqual([]);
});
it('empty-live with tracked files SUPPRESSES (failed fetch, not a real wipe)', () => {
const existing = [
{ pageId: 'a', relPath: 'A.md' },
{ pageId: 'b', relPath: 'B.md' },
];
const actions = computePullActions({
pages: [],
treeComplete: true,
existing,
});
expect(actions.deletionDecision).toEqual({
apply: false,
reason: 'empty-live',
});
expect(actions.toDelete).toEqual([]);
expect(actions.toWrite).toEqual([]);
});
});
describe('computePullActions — degenerate inputs', () => {
it('skips nodes without an id and nodes with no layout entry', () => {
const pages = [
node('p1', 'Valid'),
{ id: '', title: 'NoId' } as PageNode, // skipped (no id)
];
const actions = computePullActions({
pages,
treeComplete: true,
existing: [],
});
expect(actions.toWrite).toEqual([{ pageId: 'p1', relPath: 'Valid.md' }]);
});
});

151
test/git-merge.test.ts Normal file
View File

@@ -0,0 +1,151 @@
import { execFile } from 'node:child_process';
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { promisify } from 'node:util';
import { afterEach, beforeAll, describe, expect, it } from 'vitest';
import {
VaultGit,
BOT_AUTHOR_NAME,
BOT_AUTHOR_EMAIL,
} from '../src/git.js';
// git 3-way merge integration (test-strategy report §2 git gap). The existing
// git.test.ts covers a fast-forward merge and a conflicting merge; this file
// adds the two MISSING cases against a REAL temp git repo under os.tmpdir():
// 1. a clean NON-fast-forward 3-way merge of non-overlapping changes ->
// { ok:true, conflict:false } and a real merge commit (two parents);
// 2. a NON-conflict merge FAILURE -> { ok:false, conflict:false } so the pull
// cycle does not mislabel it a "conflict markers in vault" situation.
// The conflicting-merge case (markers + conflict:true) already lives in
// git.test.ts and is NOT duplicated here. Skips gracefully if git is missing.
const execFileAsync = promisify(execFile);
async function gitAvailable(): Promise<boolean> {
try {
await execFileAsync('git', ['--version']);
return true;
} catch {
return false;
}
}
/** Number of parents of HEAD (2 => a real merge commit). */
async function headParentCount(dir: string): Promise<number> {
const { stdout } = await execFileAsync(
'git',
['--no-pager', 'rev-list', '--parents', '-n', '1', 'HEAD'],
{ cwd: dir },
);
// Output: "<commit> <parent1> <parent2?>..." — parents are the trailing ids.
return stdout.trim().split(/\s+/).length - 1;
}
describe('VaultGit.merge — 3-way merge integration (temp repo)', () => {
let available = false;
let dir: string;
beforeAll(async () => {
available = await gitAvailable();
});
afterEach(async () => {
if (dir) await rm(dir, { recursive: true, force: true });
});
async function freshRepo(): Promise<{ vault: string; git: VaultGit }> {
dir = await mkdtemp(join(tmpdir(), 'docmost-merge-'));
const git = new VaultGit(dir);
await git.ensureRepo();
await git.ensureBranch('docmost', 'main');
return { vault: dir, git };
}
async function commit(
git: VaultGit,
subject: string,
author = { name: BOT_AUTHOR_NAME, email: BOT_AUTHOR_EMAIL },
): Promise<void> {
await git.stageAll();
await git.commit(subject, {
authorName: author.name,
authorEmail: author.email,
});
}
it('clean NON-fast-forward 3-way merge of non-overlapping changes -> merge commit', async () => {
if (!available) return; // skip gracefully when git is unavailable
const { vault, git } = await freshRepo();
// Seed a shared base file on main so both branches diverge from a real
// merge-base (not an empty tree).
await writeFile(join(vault, 'base.md'), 'shared base\n', 'utf8');
await commit(git, 'base');
// Re-create docmost from this base so the merge-base is `base`.
await execFileAsync('git', ['--no-pager', 'branch', '-f', 'docmost', 'main'], {
cwd: vault,
});
// docmost adds doc-only.md (a DIFFERENT file than main touches).
await git.checkout('docmost');
await writeFile(join(vault, 'doc-only.md'), 'from docmost\n', 'utf8');
await commit(git, 'docmost: add doc-only');
// main adds main-only.md AND advances past the merge-base, so the merge can
// NOT fast-forward — it must create a real 3-way merge commit.
await git.checkout('main');
await writeFile(join(vault, 'main-only.md'), 'from main\n', 'utf8');
await commit(git, 'local: add main-only', {
name: 'Human',
email: 'human@local',
});
const res = await git.merge('docmost');
expect(res.ok).toBe(true);
expect(res.conflict).toBe(false);
// A real (non-FF) merge: HEAD has TWO parents.
expect(await headParentCount(vault)).toBe(2);
// Both non-overlapping changes are present on main after the merge.
const tracked = await git.listTrackedFiles();
expect(new Set(tracked)).toEqual(
new Set(['base.md', 'main-only.md', 'doc-only.md']),
);
});
it('NON-conflict merge FAILURE -> { ok:false, conflict:false } (not mislabeled a conflict)', async () => {
if (!available) return;
const { vault, git } = await freshRepo();
// base file on main, then fork docmost from this base.
await writeFile(join(vault, 'f.md'), 'base\n', 'utf8');
await commit(git, 'base');
await execFileAsync('git', ['--no-pager', 'branch', '-f', 'docmost', 'main'], {
cwd: vault,
});
// docmost modifies f.md (committed).
await git.checkout('docmost');
await writeFile(join(vault, 'f.md'), 'docmost change\n', 'utf8');
await commit(git, 'docmost: edit f');
// Back on main, leave an UNCOMMITTED local change to f.md. git refuses the
// merge ("Your local changes ... would be overwritten by merge") and exits
// non-zero — but there are NO unmerged index paths, so this is a clean
// FAILURE, not a conflict. `merge()` must report { ok:false, conflict:false }
// so pull.ts does not falsely claim conflict markers are in the vault.
await git.checkout('main');
await writeFile(join(vault, 'f.md'), 'uncommitted local edit\n', 'utf8');
// NOTE: deliberately NOT staged/committed.
const res = await git.merge('docmost');
expect(res.ok).toBe(false);
expect(res.conflict).toBe(false);
// The merge did not start: HEAD is still a single-parent commit.
expect(await headParentCount(vault)).toBe(1);
// And the repo is NOT left mid-merge (no MERGE_HEAD / unmerged paths).
expect(await git.isMergeInProgress()).toBe(false);
});
});

120
test/read-existing.test.ts Normal file
View File

@@ -0,0 +1,120 @@
import { describe, expect, it } from 'vitest';
import { readExisting } from '../src/pull.js';
// R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO
// (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable
// without a real git repo or filesystem. These tests pass fakes only — no git,
// no fs, no network.
/** Build a valid self-contained file with a `docmost:meta` block. */
function withMeta(meta: Record<string, unknown>, body = '# Title\nbody\n'): string {
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${body}`;
}
/** A fake `readFile` backed by an in-memory map (rejects on a missing key). */
function fakeReadFile(files: Record<string, string>) {
return async (rel: string): Promise<string> => {
if (!(rel in files)) {
throw Object.assign(new Error(`ENOENT: ${rel}`), { code: 'ENOENT' });
}
return files[rel];
};
}
describe('readExisting (R-Pull-1, injected IO)', () => {
it('recovers { pageId, relPath } for valid tracked files', async () => {
const files = {
'Space/A.md': withMeta({ version: 1, pageId: 'p1', title: 'A' }),
'Space/Sub/B.md': withMeta({ version: 1, pageId: 'p2', title: 'B' }),
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
readFile: fakeReadFile(files),
});
expect(result).toEqual([
{ pageId: 'p1', relPath: 'Space/A.md' },
{ pageId: 'p2', relPath: 'Space/Sub/B.md' },
]);
});
it('SKIPS a file with no docmost:meta block (plain hand-written markdown)', async () => {
const files = {
'tracked.md': withMeta({ version: 1, pageId: 'p1' }),
'stray.md': '# Just a hand-written note\n\nNo meta here.\n',
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
readFile: fakeReadFile(files),
});
// Only the engine-tracked file (with a pageId) survives.
expect(result).toEqual([{ pageId: 'p1', relPath: 'tracked.md' }]);
});
it('SKIPS a file whose meta has no pageId', async () => {
const files = {
'has-id.md': withMeta({ version: 1, pageId: 'keep' }),
'no-id.md': withMeta({ version: 1, title: 'untitled', slugId: 's' }),
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
readFile: fakeReadFile(files),
});
expect(result).toEqual([{ pageId: 'keep', relPath: 'has-id.md' }]);
});
it('SKIPS a file with an unparseable (invalid-JSON) meta block, does not throw', async () => {
// Invalid JSON inside the meta block makes parseDocmostMarkdown throw; the
// skip-rule must swallow it and treat the file as not-engine-tracked.
const files = {
'good.md': withMeta({ version: 1, pageId: 'good' }),
'broken.md': '<!-- docmost:meta\n{ this is : not, json }\n-->\n\nbody\n',
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
readFile: fakeReadFile(files),
});
expect(result).toEqual([{ pageId: 'good', relPath: 'good.md' }]);
});
it('does NOT throw when readFile REJECTS (tracked but missing) — treats it as skipped', async () => {
const files = {
'present.md': withMeta({ version: 1, pageId: 'present' }),
// "ghost.md" is listed as tracked but absent from the file map -> reject.
};
const result = await readExisting({
listTracked: async () => ['present.md', 'ghost.md'],
readFile: fakeReadFile(files),
});
// The rejection is swallowed; the present file still comes through.
expect(result).toEqual([{ pageId: 'present', relPath: 'present.md' }]);
});
it('returns an empty list when nothing is tracked', async () => {
const result = await readExisting({
listTracked: async () => [],
readFile: async () => {
throw new Error('should not be called');
},
});
expect(result).toEqual([]);
});
it('combines all skip rules in one listing (only the valid files survive)', async () => {
const files = {
'ok1.md': withMeta({ version: 1, pageId: 'a' }),
'no-meta.md': 'plain\n',
'no-id.md': withMeta({ version: 1, title: 'x' }),
'broken.md': '<!-- docmost:meta\n{bad\n-->\nbody\n',
'ok2.md': withMeta({ version: 1, pageId: 'b' }),
// missing.md rejects on read.
};
const result = await readExisting({
listTracked: async () => [...Object.keys(files), 'missing.md'],
readFile: fakeReadFile(files),
});
expect(result).toEqual([
{ pageId: 'a', relPath: 'ok1.md' },
{ pageId: 'b', relPath: 'ok2.md' },
]);
});
});

Binary file not shown.