feat(sync): add git vault layer (§5) and the Docmost->vault pull cycle (§6)
Turn the read-only mirror into a git-backed pull cycle. Read-only toward Docmost.
- git.ts (VaultGit): system-git wrapper, all ops cwd=vaultPath (vault is its own
repo under data/vault, never the source repo); ensureRepo/branches main+docmost,
commit with provenance (author/committer identity + Docmost-Sync-Source trailer,
§7.3), merge with conflict surfacing (no auto-resolve, §9), isMergeInProgress;
GIT_DIR/GIT_WORK_TREE stripped from env (§12 cwd isolation)
- stabilize.ts: normalize-on-write (one export->import->export fixpoint pass, §11)
- reconcile.ts: pure planReconciliation (add/update/move/delete by pageId) +
decideAbsenceDeletions gate
- pull.ts: write/commit on docmost -> merge into main; listSpaceTree completeness
signal suppresses absence-deletions on a partial fetch (§8); mass-delete guard;
merge-in-progress guard makes re-runs converge (§12); move old-path removal only
on successful write
- docmost-client: listSpaceTree({pages, complete}) without touching the 1:1-copied
enumerateSpacePages
- tests: reconcile planner + decideAbsenceDeletions, VaultGit incl. real temp-repo
merge conflict, listSpaceTree completeness (586 green)
Push to a git remote and the FS->Docmost direction are deferred to the next increment.
This commit is contained in:
@@ -2619,6 +2619,69 @@ export class DocmostClient {
|
|||||||
return this.enumerateSpacePages(spaceId, rootPageId);
|
return this.enumerateSpacePages(spaceId, rootPageId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Completeness-tracking variant of the space tree walk (SPEC §8).
|
||||||
|
*
|
||||||
|
* Same iterative breadth-first walk as the private `enumerateSpacePages`
|
||||||
|
* (kept 1:1 with upstream for backport), but it does NOT silently swallow
|
||||||
|
* partial fetches: it returns `{ pages, complete }`, where `complete` is
|
||||||
|
* `false` if ANY branch's children fetch threw (the branch is skipped and the
|
||||||
|
* walk continues) OR if the hard `MAX_NODES` cap was hit before the queue
|
||||||
|
* drained. The caller uses this signal to SUPPRESS absence-based deletions on
|
||||||
|
* an incomplete fetch — per SPEC §8, a page missing from a partial tree is NOT
|
||||||
|
* proof it was deleted ("детекция удаления — точный запрос, а не вывод 'pageId
|
||||||
|
* пропал из дерева'").
|
||||||
|
*/
|
||||||
|
async listSpaceTree(
|
||||||
|
spaceId: string,
|
||||||
|
rootPageId?: string,
|
||||||
|
): Promise<{ pages: any[]; complete: boolean }> {
|
||||||
|
const MAX_NODES = 10000;
|
||||||
|
const result: any[] = [];
|
||||||
|
const visited = new Set<string>();
|
||||||
|
let complete = true;
|
||||||
|
|
||||||
|
// Seed the queue with the starting level (subtree children or roots). A
|
||||||
|
// failure to fetch even the seed level means the result is incomplete.
|
||||||
|
let queue: any[];
|
||||||
|
try {
|
||||||
|
queue = await this.listSidebarPages(spaceId, rootPageId);
|
||||||
|
} catch (e: any) {
|
||||||
|
return { pages: result, complete: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
while (queue.length > 0 && result.length < MAX_NODES) {
|
||||||
|
const node = queue.shift();
|
||||||
|
if (!node || typeof node !== "object" || !node.id) continue;
|
||||||
|
|
||||||
|
// Skip already-seen ids to guard against cycles / duplicate references.
|
||||||
|
if (visited.has(node.id)) continue;
|
||||||
|
visited.add(node.id);
|
||||||
|
|
||||||
|
result.push(node);
|
||||||
|
|
||||||
|
if (node.hasChildren) {
|
||||||
|
try {
|
||||||
|
const children = await this.listSidebarPages(spaceId, node.id);
|
||||||
|
for (const child of children) queue.push(child);
|
||||||
|
} catch (e: any) {
|
||||||
|
// A failure fetching one node's children must not abort the whole
|
||||||
|
// walk: skip this branch and keep enumerating the rest, but RECORD
|
||||||
|
// that the tree we return is incomplete (SPEC §8).
|
||||||
|
complete = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we stopped because the node cap was hit while the queue still had
|
||||||
|
// work, the tree is incomplete too.
|
||||||
|
if (queue.length > 0 && result.length >= MAX_NODES) {
|
||||||
|
complete = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { pages: result, complete };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* "Changes since T" scan (SPEC §16). There is NO server-side `updatedAt`
|
* "Changes since T" scan (SPEC §16). There is NO server-side `updatedAt`
|
||||||
* filter in Docmost and `/pages/recent` is CURSOR-paginated, so this is a
|
* filter in Docmost and `/pages/recent` is CURSOR-paginated, so this is a
|
||||||
|
|||||||
352
src/git.ts
Normal file
352
src/git.ts
Normal file
@@ -0,0 +1,352 @@
|
|||||||
|
/**
|
||||||
|
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
|
||||||
|
*
|
||||||
|
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
|
||||||
|
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
|
||||||
|
* the docmost-sync source repo. This module MUST NEVER run git against the
|
||||||
|
* source repo. `data/` is gitignored by the source repo, so a nested repo under
|
||||||
|
* `data/vault` is safe. The pull cycle is READ-ONLY toward Docmost; this module
|
||||||
|
* only touches the local vault git, never a git remote (push is deferred, see
|
||||||
|
* SPEC §7).
|
||||||
|
*
|
||||||
|
* Implementation notes:
|
||||||
|
* - We shell out via `node:child_process` `execFile` (promisified), passing
|
||||||
|
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
|
||||||
|
* if a page title / branch name contains shell metacharacters.
|
||||||
|
* - Every invocation prepends `--no-pager` so git never blocks on a pager.
|
||||||
|
* - "nothing to commit" is treated as a graceful no-op, not an error.
|
||||||
|
*/
|
||||||
|
import { execFile } from "node:child_process";
|
||||||
|
import { mkdir } from "node:fs/promises";
|
||||||
|
import { promisify } from "node:util";
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
|
||||||
|
export const BOT_AUTHOR_NAME = "Docmost Sync";
|
||||||
|
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
|
||||||
|
|
||||||
|
/** Default branch the vault repo is initialized on. */
|
||||||
|
export const DEFAULT_BRANCH = "main";
|
||||||
|
|
||||||
|
/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */
|
||||||
|
export interface MergeResult {
|
||||||
|
/** True when the merge applied cleanly (fast-forward or clean 3-way). */
|
||||||
|
ok: boolean;
|
||||||
|
/** True when the merge stopped on conflicts (markers left in the worktree). */
|
||||||
|
conflict: boolean;
|
||||||
|
/** Raw combined stdout+stderr, for logging/diagnostics. */
|
||||||
|
output: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Options for an engine-authored commit (provenance, SPEC §7.3). */
|
||||||
|
export interface CommitOptions {
|
||||||
|
authorName: string;
|
||||||
|
authorEmail: string;
|
||||||
|
/**
|
||||||
|
* Trailer lines appended to the commit message body (e.g.
|
||||||
|
* `Docmost-Sync-Source: docmost`). These are the machine-readable provenance
|
||||||
|
* the loop-guard keys on (SPEC §12, "commit-attribution").
|
||||||
|
*/
|
||||||
|
trailers?: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A git wrapper bound to a single vault path. Construct once per vault; every
|
||||||
|
* method runs git with `cwd = vaultPath`.
|
||||||
|
*/
|
||||||
|
export class VaultGit {
|
||||||
|
constructor(private readonly vaultPath: string) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run `git --no-pager <args...>` in the vault. Returns trimmed stdout.
|
||||||
|
* Throws a clear Error (including stderr) on a non-zero exit.
|
||||||
|
*/
|
||||||
|
private async run(args: string[]): Promise<string> {
|
||||||
|
try {
|
||||||
|
const { stdout } = await execFileAsync("git", ["--no-pager", ...args], {
|
||||||
|
cwd: this.vaultPath,
|
||||||
|
// Generous buffer: `git status --porcelain` / file listings on a large
|
||||||
|
// vault can be sizable.
|
||||||
|
maxBuffer: 64 * 1024 * 1024,
|
||||||
|
env: vaultGitEnv(),
|
||||||
|
});
|
||||||
|
return stdout.trim();
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const e = err as { stderr?: string; stdout?: string; message?: string };
|
||||||
|
const detail = (e.stderr || e.stdout || e.message || "").toString().trim();
|
||||||
|
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like `run`, but returns the full exit info instead of throwing on a
|
||||||
|
* non-zero exit. Used where a non-zero exit is an expected, meaningful state
|
||||||
|
* (e.g. a merge conflict, or a porcelain diff that "fails" deliberately).
|
||||||
|
*/
|
||||||
|
private async runRaw(
|
||||||
|
args: string[],
|
||||||
|
): Promise<{ code: number; stdout: string; stderr: string }> {
|
||||||
|
try {
|
||||||
|
const { stdout, stderr } = await execFileAsync(
|
||||||
|
"git",
|
||||||
|
["--no-pager", ...args],
|
||||||
|
{ cwd: this.vaultPath, maxBuffer: 64 * 1024 * 1024, env: vaultGitEnv() },
|
||||||
|
);
|
||||||
|
return { code: 0, stdout, stderr };
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const e = err as {
|
||||||
|
code?: number;
|
||||||
|
stdout?: string;
|
||||||
|
stderr?: string;
|
||||||
|
};
|
||||||
|
return {
|
||||||
|
code: typeof e.code === "number" ? e.code : 1,
|
||||||
|
stdout: e.stdout ?? "",
|
||||||
|
stderr: e.stderr ?? "",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure the vault directory exists and is an initialized git repo on `main`
|
||||||
|
* with an initial (empty) commit so branches exist. Idempotent: safe to call
|
||||||
|
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
|
||||||
|
* (so engine commits never fall back to a global/unset identity).
|
||||||
|
*/
|
||||||
|
async ensureRepo(): Promise<void> {
|
||||||
|
await mkdir(this.vaultPath, { recursive: true });
|
||||||
|
|
||||||
|
if (!(await this.isRepo())) {
|
||||||
|
// `git init -b main` sets the initial branch on modern git; we still
|
||||||
|
// guard the branch name below for safety on older binaries.
|
||||||
|
await this.run(["init", "-b", DEFAULT_BRANCH]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set a local identity for the vault repo if unset, so engine commits have
|
||||||
|
// a deterministic committer even on a machine with no global git config.
|
||||||
|
if (!(await this.hasLocalConfig("user.name"))) {
|
||||||
|
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
|
||||||
|
}
|
||||||
|
if (!(await this.hasLocalConfig("user.email"))) {
|
||||||
|
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the initial empty commit on `main` if the repo has no commits yet,
|
||||||
|
// so both `main` and (later) `docmost` branches have a common base.
|
||||||
|
if (!(await this.hasAnyCommit())) {
|
||||||
|
// Make sure we are on the default branch before the first commit (covers
|
||||||
|
// the older-git case where `init -b` was not honored).
|
||||||
|
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
|
||||||
|
await this.commitRaw("init vault", {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
allowEmpty: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
|
||||||
|
private async isRepo(): Promise<boolean> {
|
||||||
|
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
|
||||||
|
return r.code === 0 && r.stdout.trim() === "true";
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if a LOCAL git config key is set in the vault repo. */
|
||||||
|
private async hasLocalConfig(key: string): Promise<boolean> {
|
||||||
|
const r = await this.runRaw(["config", "--local", "--get", key]);
|
||||||
|
return r.code === 0 && r.stdout.trim().length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if the repo has at least one commit (HEAD resolves). */
|
||||||
|
private async hasAnyCommit(): Promise<boolean> {
|
||||||
|
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
|
||||||
|
return r.code === 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if a branch with the given name exists. */
|
||||||
|
async branchExists(name: string): Promise<boolean> {
|
||||||
|
const r = await this.runRaw([
|
||||||
|
"rev-parse",
|
||||||
|
"--verify",
|
||||||
|
`refs/heads/${name}`,
|
||||||
|
]);
|
||||||
|
return r.code === 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
|
||||||
|
* checkout) when the branch is already present.
|
||||||
|
*/
|
||||||
|
async ensureBranch(name: string, fromBranch: string): Promise<void> {
|
||||||
|
if (await this.branchExists(name)) return;
|
||||||
|
await this.run(["branch", name, fromBranch]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Name of the currently checked-out branch. */
|
||||||
|
async currentBranch(): Promise<string> {
|
||||||
|
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check out an existing branch. */
|
||||||
|
async checkout(name: string): Promise<void> {
|
||||||
|
await this.run(["checkout", name]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Stage everything (adds, modifications, deletions). */
|
||||||
|
async stageAll(): Promise<void> {
|
||||||
|
await this.run(["add", "-A"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if the vault is mid-merge (an unresolved merge from a previous run,
|
||||||
|
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
|
||||||
|
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
|
||||||
|
* BEFORE any checkout so a left-over merge produces a clear, actionable
|
||||||
|
* message instead of a raw "you need to resolve your current index first"
|
||||||
|
* failure deep inside `checkout`. This is what makes re-runs converge
|
||||||
|
* (resumability, SPEC §12).
|
||||||
|
*/
|
||||||
|
async isMergeInProgress(): Promise<boolean> {
|
||||||
|
// MERGE_HEAD exists exactly while a merge is in progress.
|
||||||
|
const mergeHead = await this.runRaw([
|
||||||
|
"rev-parse",
|
||||||
|
"--verify",
|
||||||
|
"--quiet",
|
||||||
|
"MERGE_HEAD",
|
||||||
|
]);
|
||||||
|
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0) return true;
|
||||||
|
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
|
||||||
|
// working tree is mid-conflict and a checkout would refuse.
|
||||||
|
const unmerged = await this.runRaw(["ls-files", "-u"]);
|
||||||
|
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commit the currently STAGED changes with an explicit author/committer
|
||||||
|
* identity and the given trailers appended to the message body (SPEC §7.3
|
||||||
|
* provenance). Returns `true` if a commit was made, `false` if there was
|
||||||
|
* nothing to commit (graceful no-op). The caller is expected to have staged
|
||||||
|
* its changes first (e.g. via `stageAll`).
|
||||||
|
*/
|
||||||
|
async commit(message: string, opts: CommitOptions): Promise<boolean> {
|
||||||
|
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
|
||||||
|
// deterministic re-pull of unchanged pages produces identical bytes, so
|
||||||
|
// git sees no diff and we must not error).
|
||||||
|
const staged = await this.runRaw([
|
||||||
|
"diff",
|
||||||
|
"--cached",
|
||||||
|
"--quiet",
|
||||||
|
]);
|
||||||
|
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
|
||||||
|
// staged), 1 when there are staged changes.
|
||||||
|
if (staged.code === 0) return false;
|
||||||
|
|
||||||
|
await this.commitRaw(message, opts);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
|
||||||
|
* Builds the full message with appended trailers and sets author + committer
|
||||||
|
* identity via env vars (so the committer matches the author, not the repo
|
||||||
|
* default).
|
||||||
|
*/
|
||||||
|
private async commitRaw(
|
||||||
|
message: string,
|
||||||
|
opts: CommitOptions & { allowEmpty?: boolean },
|
||||||
|
): Promise<void> {
|
||||||
|
const fullMessage = buildCommitMessage(message, opts.trailers);
|
||||||
|
const args = ["commit", "-m", fullMessage];
|
||||||
|
if (opts.allowEmpty) args.push("--allow-empty");
|
||||||
|
|
||||||
|
await execFileAsync("git", ["--no-pager", ...args], {
|
||||||
|
cwd: this.vaultPath,
|
||||||
|
maxBuffer: 64 * 1024 * 1024,
|
||||||
|
env: vaultGitEnv({
|
||||||
|
GIT_AUTHOR_NAME: opts.authorName,
|
||||||
|
GIT_AUTHOR_EMAIL: opts.authorEmail,
|
||||||
|
GIT_COMMITTER_NAME: opts.authorName,
|
||||||
|
GIT_COMMITTER_EMAIL: opts.authorEmail,
|
||||||
|
}),
|
||||||
|
}).catch((err: unknown) => {
|
||||||
|
const e = err as { stderr?: string; message?: string };
|
||||||
|
throw new Error(
|
||||||
|
`git commit failed: ${(e.stderr || e.message || "").toString().trim()}`,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
|
||||||
|
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
|
||||||
|
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
|
||||||
|
* markers are left in the worktree for manual resolution by a later increment,
|
||||||
|
* and — critically — nothing is pushed to Docmost (we never write to Docmost
|
||||||
|
* anyway).
|
||||||
|
*/
|
||||||
|
async merge(fromBranch: string): Promise<MergeResult> {
|
||||||
|
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
|
||||||
|
const output = `${r.stdout}\n${r.stderr}`.trim();
|
||||||
|
if (r.code === 0) {
|
||||||
|
return { ok: true, conflict: false, output };
|
||||||
|
}
|
||||||
|
// A non-zero exit on merge most commonly means a conflict. Confirm by
|
||||||
|
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
|
||||||
|
// an unrelated failure as a conflict.
|
||||||
|
const conflict = await this.hasUnmergedPaths();
|
||||||
|
return { ok: false, conflict, output };
|
||||||
|
}
|
||||||
|
|
||||||
|
/** True if the index has any unmerged (conflicted) paths. */
|
||||||
|
private async hasUnmergedPaths(): Promise<boolean> {
|
||||||
|
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
|
||||||
|
return r.code === 0 && r.stdout.trim().length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List tracked files on the current branch (paths relative to the vault
|
||||||
|
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||||
|
* the listing, e.g. `"*.md"`.
|
||||||
|
*/
|
||||||
|
async listTrackedFiles(glob?: string): Promise<string[]> {
|
||||||
|
const args = ["ls-files"];
|
||||||
|
if (glob) args.push(glob);
|
||||||
|
const out = await this.run(args);
|
||||||
|
if (out.length === 0) return [];
|
||||||
|
return out.split("\n").filter((l) => l.length > 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
|
||||||
|
*
|
||||||
|
* cwd-isolation is this module's central safety guarantee: every git command
|
||||||
|
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
|
||||||
|
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
|
||||||
|
* redirect the operation away from `cwd` (e.g. to the source repo or another
|
||||||
|
* checkout), defeating that guarantee. So we always strip them, regardless of
|
||||||
|
* whatever else the caller adds (author/committer identity, etc.).
|
||||||
|
*/
|
||||||
|
function vaultGitEnv(
|
||||||
|
extra?: Record<string, string>,
|
||||||
|
): NodeJS.ProcessEnv {
|
||||||
|
const env: NodeJS.ProcessEnv = { ...process.env, ...extra };
|
||||||
|
delete env.GIT_DIR;
|
||||||
|
delete env.GIT_WORK_TREE;
|
||||||
|
return env;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a commit message body with trailer lines appended (SPEC §7.3). The
|
||||||
|
* trailers are separated from the subject by a blank line so `git interpret-
|
||||||
|
* trailers` / `git log --format=%(trailers)` parse them as trailers.
|
||||||
|
* Exported for unit testing.
|
||||||
|
*/
|
||||||
|
export function buildCommitMessage(
|
||||||
|
subject: string,
|
||||||
|
trailers?: string[],
|
||||||
|
): string {
|
||||||
|
if (!trailers || trailers.length === 0) return subject;
|
||||||
|
return `${subject}\n\n${trailers.join("\n")}`;
|
||||||
|
}
|
||||||
339
src/pull.ts
339
src/pull.ts
@@ -1,34 +1,110 @@
|
|||||||
/**
|
/**
|
||||||
* Read-only Docmost -> filesystem mirror (SPEC §6 pull, Phase 1).
|
* Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС").
|
||||||
*
|
*
|
||||||
* Walks the configured space's page tree and writes one self-contained `.md`
|
* This increment turns the read-only mirror into the git-backed pull cycle:
|
||||||
* per page under `<vaultPath>/<...ancestors>/<Title>.md`. This increment is
|
|
||||||
* READ-ONLY toward Docmost (no writes, no git) — it only fetches and writes
|
|
||||||
* local files. The meta block inside each file carries
|
|
||||||
* `{ version, pageId, slugId, title, spaceId, parentPageId }` (identity), so no
|
|
||||||
* external map file is needed.
|
|
||||||
*
|
*
|
||||||
* The pure tree -> path mapping lives in `./layout.js`; this file is a thin,
|
* 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12);
|
||||||
* fault-tolerant I/O loop around it.
|
* ensureBranch("docmost", "main") (SPEC §5 branches)
|
||||||
|
* 2. checkout docmost
|
||||||
|
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
|
||||||
|
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
|
||||||
|
* 4. parse `existing` tracked .md files (pageId + relPath from docmost:meta)
|
||||||
|
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
|
||||||
|
* is absence-only, moves are separate
|
||||||
|
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
|
||||||
|
* fetch (SPEC §8) and behind the mass-delete guard (defense in depth)
|
||||||
|
* 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11);
|
||||||
|
* apply moved-old-path removals (only when the move write SUCCEEDED) and
|
||||||
|
* absence-delete removals (only when the decision allowed them)
|
||||||
|
* 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3)
|
||||||
|
* 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved,
|
||||||
|
* SPEC §9); push is deferred (SPEC §7)
|
||||||
|
* 10. one-line summary
|
||||||
|
*
|
||||||
|
* DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost
|
||||||
|
* (read-only: listSpaceTree + getPageJson). All git operations run against
|
||||||
|
* the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts).
|
||||||
*
|
*
|
||||||
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
* Requires a `.env` with real Docmost credentials. This file must COMPILE and
|
||||||
* be correct, but is not expected to be run without live access.
|
* be correct, but is NOT expected to be run without live access.
|
||||||
*
|
*
|
||||||
* Run via: npm run pull (-> node build/pull.js)
|
* Run via: npm run pull (-> node build/pull.js)
|
||||||
*/
|
*/
|
||||||
import { mkdir, writeFile } from "node:fs/promises";
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
||||||
import { join } from "node:path";
|
import { dirname, join, sep } from "node:path";
|
||||||
import { pathToFileURL } from "node:url";
|
import { pathToFileURL } from "node:url";
|
||||||
import { DocmostClient } from "docmost-client";
|
import { DocmostClient, parseDocmostMarkdown } from "docmost-client";
|
||||||
import { loadSettings } from "./settings.js";
|
import { loadSettings } from "./settings.js";
|
||||||
import { buildVaultLayout, type PageNode } from "./layout.js";
|
import { buildVaultLayout, type PageNode } from "./layout.js";
|
||||||
|
import {
|
||||||
|
VaultGit,
|
||||||
|
BOT_AUTHOR_NAME,
|
||||||
|
BOT_AUTHOR_EMAIL,
|
||||||
|
DEFAULT_BRANCH,
|
||||||
|
} from "./git.js";
|
||||||
|
import {
|
||||||
|
planReconciliation,
|
||||||
|
decideAbsenceDeletions,
|
||||||
|
type LiveEntry,
|
||||||
|
} from "./reconcile.js";
|
||||||
|
import { stabilizePageFile, type PageMeta } from "./stabilize.js";
|
||||||
|
|
||||||
// Number of pages fetched/written concurrently. Bounded so a large space does
|
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
|
||||||
// not open thousands of simultaneous requests/file handles.
|
const DOCMOST_BRANCH = "docmost";
|
||||||
|
// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12).
|
||||||
|
const SOURCE_TRAILER = "Docmost-Sync-Source: docmost";
|
||||||
|
|
||||||
|
// Number of pages fetched/stabilized concurrently. Bounded so a large space
|
||||||
|
// does not open thousands of simultaneous requests/conversions at once.
|
||||||
const CONCURRENCY = 6;
|
const CONCURRENCY = 6;
|
||||||
// How often to log incremental progress (every N completed pages).
|
// How often to log incremental progress (every N completed pages).
|
||||||
const PROGRESS_EVERY = 25;
|
const PROGRESS_EVERY = 25;
|
||||||
|
|
||||||
|
/** Convert a vault-relative path (forward-slash) to an absolute FS path. */
|
||||||
|
function relToAbs(vaultRoot: string, relPath: string): string {
|
||||||
|
return join(vaultRoot, ...relPath.split("/"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert an absolute/relative segment list under the vault to a relPath. */
|
||||||
|
function segmentsToRelPath(segments: string[], stem: string): string {
|
||||||
|
return [...segments, `${stem}.md`].join("/");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read every tracked .md file in the vault and parse its `docmost:meta` to
|
||||||
|
* recover `{ pageId, relPath }`. Files without a parseable pageId in meta are
|
||||||
|
* skipped (they are not engine-tracked pages — e.g. a stray hand-written file).
|
||||||
|
*/
|
||||||
|
async function readExisting(
|
||||||
|
git: VaultGit,
|
||||||
|
vaultRoot: string,
|
||||||
|
): Promise<{ pageId: string; relPath: string }[]> {
|
||||||
|
const tracked = await git.listTrackedFiles("*.md");
|
||||||
|
const existing: { pageId: string; relPath: string }[] = [];
|
||||||
|
for (const relPath of tracked) {
|
||||||
|
// git ls-files always emits forward-slash paths; normalize just in case.
|
||||||
|
const rel = relPath.split(sep).join("/");
|
||||||
|
let text: string;
|
||||||
|
try {
|
||||||
|
text = await readFile(relToAbs(vaultRoot, rel), "utf8");
|
||||||
|
} catch {
|
||||||
|
// Tracked but missing on disk (mid-operation race) — skip; the next pull
|
||||||
|
// converges.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let pageId: string | undefined;
|
||||||
|
try {
|
||||||
|
const { meta } = parseDocmostMarkdown(text);
|
||||||
|
pageId = meta?.pageId;
|
||||||
|
} catch {
|
||||||
|
// Unparseable meta — not engine-tracked; leave it alone.
|
||||||
|
pageId = undefined;
|
||||||
|
}
|
||||||
|
if (pageId) existing.push({ pageId, relPath: rel });
|
||||||
|
}
|
||||||
|
return existing;
|
||||||
|
}
|
||||||
|
|
||||||
async function main(): Promise<void> {
|
async function main(): Promise<void> {
|
||||||
const s = loadSettings();
|
const s = loadSettings();
|
||||||
const client = new DocmostClient(
|
const client = new DocmostClient(
|
||||||
@@ -40,75 +116,244 @@ async function main(): Promise<void> {
|
|||||||
const spaceId = s.docmostSpaceId;
|
const spaceId = s.docmostSpaceId;
|
||||||
const vaultRoot = s.vaultPath;
|
const vaultRoot = s.vaultPath;
|
||||||
|
|
||||||
const pages: PageNode[] = await client.listAllSpacePages(spaceId);
|
// 1. Ensure the vault git repo exists with main + an initial commit, and the
|
||||||
|
// engine-only `docmost` branch exists, branched from main.
|
||||||
|
const git = new VaultGit(vaultRoot);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
// 1b. Refuse to run on top of an unresolved merge (SPEC §9 / §12). A previous
|
||||||
|
// conflicting pull leaves the vault mid-merge; the next `checkout` would
|
||||||
|
// fail with a raw "you need to resolve your current index first". Detect
|
||||||
|
// it BEFORE any checkout and exit cleanly with an actionable message so
|
||||||
|
// re-runs converge once the human resolves (or aborts) the merge.
|
||||||
|
if (await git.isMergeInProgress()) {
|
||||||
|
console.error(
|
||||||
|
`vault has an unresolved merge at ${vaultRoot} — resolve it (or ` +
|
||||||
|
`'git merge --abort') and re-run (SPEC §9)`,
|
||||||
|
);
|
||||||
|
process.exitCode = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await git.ensureBranch(DOCMOST_BRANCH, DEFAULT_BRANCH);
|
||||||
|
|
||||||
|
// 2. Work on the docmost mirror branch.
|
||||||
|
await git.checkout(DOCMOST_BRANCH);
|
||||||
|
|
||||||
|
// 3. Fetch the live tree and compute the desired files (relPath via the pure
|
||||||
|
// sanitize + disambiguation layout). `listSpaceTree` reports completeness:
|
||||||
|
// if ANY branch's children fetch failed or the node cap was hit, the tree
|
||||||
|
// is PARTIAL and absence-based deletions must be suppressed this cycle
|
||||||
|
// (SPEC §8) — a missing pageId in a partial tree is NOT proof of deletion.
|
||||||
|
const { pages: rawPages, complete: treeComplete } =
|
||||||
|
await client.listSpaceTree(spaceId);
|
||||||
|
const pages = rawPages as PageNode[];
|
||||||
const layout = buildVaultLayout(pages);
|
const layout = buildVaultLayout(pages);
|
||||||
|
|
||||||
const total = pages.length;
|
const live: LiveEntry[] = [];
|
||||||
|
const liveNodeByPageId = new Map<string, PageNode>();
|
||||||
|
for (const p of pages) {
|
||||||
|
if (!p || !p.id) continue;
|
||||||
|
const entry = layout.get(p.id);
|
||||||
|
if (!entry) continue;
|
||||||
|
live.push({
|
||||||
|
pageId: p.id,
|
||||||
|
relPath: segmentsToRelPath(entry.segments, entry.stem),
|
||||||
|
});
|
||||||
|
liveNodeByPageId.set(p.id, p);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Parse the existing tracked .md files (pageId + relPath).
|
||||||
|
const existing = await readExisting(git, vaultRoot);
|
||||||
|
|
||||||
|
// 5. Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
|
||||||
|
// `plan.moved` carries move old-path removals separately.
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
|
||||||
|
// 6. Decide whether the ABSENCE-based deletions (`plan.toDelete`) may be
|
||||||
|
// applied this cycle (SPEC §8). The pure helper folds in BOTH the
|
||||||
|
// incomplete-fetch suppression (a partial tree must not look like
|
||||||
|
// deletions) AND the mass-delete guard (defense in depth). Moves are NOT
|
||||||
|
// governed by this — a moved page is present in `live`, so its old-path
|
||||||
|
// removal is real and applied unconditionally (subject only to its write
|
||||||
|
// succeeding).
|
||||||
|
const deleteDecision = decideAbsenceDeletions({
|
||||||
|
treeComplete,
|
||||||
|
liveCount: live.length,
|
||||||
|
existingCount: existing.length,
|
||||||
|
deleteCount: plan.toDelete.length,
|
||||||
|
});
|
||||||
|
if (!deleteDecision.apply) {
|
||||||
|
if (deleteDecision.reason === "incomplete-fetch") {
|
||||||
|
console.warn(
|
||||||
|
"pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)",
|
||||||
|
);
|
||||||
|
} else if (deleteDecision.reason === "empty-live") {
|
||||||
|
console.warn(
|
||||||
|
`pull: live fetch returned 0 pages but ${existing.length} file(s) are ` +
|
||||||
|
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
|
||||||
|
`Docmost is reachable.`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
console.warn(
|
||||||
|
`pull: plan would delete ${plan.toDelete.length} of ${existing.length} ` +
|
||||||
|
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
|
||||||
|
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7. Write each live page in its fixpoint form (normalize-on-write, SPEC §11),
|
||||||
|
// then apply move-old-path + absence-delete removals.
|
||||||
let written = 0;
|
let written = 0;
|
||||||
let failed = 0;
|
let failed = 0;
|
||||||
let completed = 0;
|
let completed = 0;
|
||||||
let nextIndex = 0;
|
let nextIndex = 0;
|
||||||
|
// pageIds whose write FAILED. A moved page whose new-path write failed must
|
||||||
|
// NOT have its old path removed (otherwise the page vanishes entirely).
|
||||||
|
const failedPageIds = new Set<string>();
|
||||||
|
|
||||||
// Pull + write a single page. Each call is wrapped so one bad page (network
|
const writeOne = async (w: { pageId: string; relPath: string }): Promise<void> => {
|
||||||
// error, page deleted between the walk and the fetch, body conversion
|
const node = liveNodeByPageId.get(w.pageId);
|
||||||
// failure) NEVER aborts the whole pull — it is counted as a failure and the
|
if (!node) return;
|
||||||
// pool moves on. Mirrors the deliberately fault-tolerant enumerateSpacePages.
|
|
||||||
const pullOne = async (page: PageNode): Promise<void> => {
|
|
||||||
if (!page || !page.id) return;
|
|
||||||
const entry = layout.get(page.id);
|
|
||||||
if (!entry) return; // no layout entry (e.g. duplicate/skipped id)
|
|
||||||
try {
|
try {
|
||||||
const dir = join(vaultRoot, ...entry.segments);
|
const page = await client.getPageJson(w.pageId);
|
||||||
await mkdir(dir, { recursive: true });
|
const meta: PageMeta = {
|
||||||
// Body + meta only (no comments block) — SPEC §3.
|
version: 1,
|
||||||
const md = await client.exportPageBody(page.id);
|
pageId: page.id,
|
||||||
await writeFile(join(dir, `${entry.stem}.md`), md, "utf8");
|
slugId: page.slugId,
|
||||||
|
title: page.title,
|
||||||
|
spaceId: page.spaceId,
|
||||||
|
parentPageId: page.parentPageId ?? null,
|
||||||
|
};
|
||||||
|
const text = await stabilizePageFile(page.content, meta);
|
||||||
|
const abs = relToAbs(vaultRoot, w.relPath);
|
||||||
|
await mkdir(dirname(abs), { recursive: true });
|
||||||
|
await writeFile(abs, text, "utf8");
|
||||||
written++;
|
written++;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
failed++;
|
failed++;
|
||||||
|
failedPageIds.add(w.pageId);
|
||||||
console.error(
|
console.error(
|
||||||
`pull: failed page ${page.id}:`,
|
`pull: failed page ${w.pageId}:`,
|
||||||
err instanceof Error ? err.message : String(err),
|
err instanceof Error ? err.message : String(err),
|
||||||
);
|
);
|
||||||
} finally {
|
} finally {
|
||||||
completed++;
|
completed++;
|
||||||
if (completed % PROGRESS_EVERY === 0) {
|
if (completed % PROGRESS_EVERY === 0) {
|
||||||
console.log(`pulled ${completed}/${total}`);
|
console.log(`pulled ${completed}/${plan.toWrite.length}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// A small dependency-free bounded-concurrency pool: a fixed set of runners
|
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
|
||||||
// each pull the next index until the list is exhausted.
|
// take the next index until the write list is exhausted. One bad page never
|
||||||
|
// aborts the whole pull (mirrors the fault-tolerant tree walk).
|
||||||
const runner = async (): Promise<void> => {
|
const runner = async (): Promise<void> => {
|
||||||
while (true) {
|
while (true) {
|
||||||
const i = nextIndex++;
|
const i = nextIndex++;
|
||||||
if (i >= pages.length) return;
|
if (i >= plan.toWrite.length) return;
|
||||||
await pullOne(pages[i]);
|
await writeOne(plan.toWrite[i]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
await Promise.all(
|
||||||
|
Array.from(
|
||||||
|
{ length: Math.min(CONCURRENCY, plan.toWrite.length) || 1 },
|
||||||
|
() => runner(),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Helper: `rm` with force:true is a no-op if the file is already gone.
|
||||||
|
const removePath = async (rel: string, what: string): Promise<boolean> => {
|
||||||
|
try {
|
||||||
|
await rm(relToAbs(vaultRoot, rel), { force: true });
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
console.error(
|
||||||
|
`pull: failed to ${what} ${rel}:`,
|
||||||
|
err instanceof Error ? err.message : String(err),
|
||||||
|
);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const runners = Array.from(
|
// 7a. Apply MOVE old-path removals. A moved page IS present in `live`, so its
|
||||||
{ length: Math.min(CONCURRENCY, pages.length) },
|
// old path is genuinely stale — this is NOT subject to the incomplete-
|
||||||
() => runner(),
|
// fetch suppression. BUT only remove the old path when (a) the planner
|
||||||
);
|
// marked it removable (not reused by another live page) AND (b) the new-
|
||||||
await Promise.all(runners);
|
// path write actually SUCCEEDED — otherwise we would delete the only copy
|
||||||
|
// of a page whose move-write failed.
|
||||||
|
let movedApplied = 0;
|
||||||
|
for (const m of plan.moved) {
|
||||||
|
if (!m.removeOldPath) continue;
|
||||||
|
if (failedPageIds.has(m.pageId)) {
|
||||||
|
console.warn(
|
||||||
|
`pull: move write for ${m.pageId} failed — keeping old path ` +
|
||||||
|
`${m.fromRelPath} (SPEC §8)`,
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 7b. Apply ABSENCE-based deletions — ONLY if the decision allowed them
|
||||||
|
// (incomplete-fetch suppression + mass-delete guard, SPEC §8).
|
||||||
|
let deleted = 0;
|
||||||
|
if (deleteDecision.apply) {
|
||||||
|
for (const rel of plan.toDelete) {
|
||||||
|
if (await removePath(rel, "delete")) deleted++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 8. Stage + commit on `docmost` (only if there is something to commit).
|
||||||
|
// Deterministic stabilized output means unchanged pages produce identical
|
||||||
|
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
|
||||||
|
// ACTUAL work applied (pages written + files deleted), not the planned size,
|
||||||
|
// so a run with failures does not over-report (SPEC §5 nit).
|
||||||
|
const subject =
|
||||||
|
deleted > 0
|
||||||
|
? `docmost: sync ${written} page(s), ${deleted} deleted`
|
||||||
|
: `docmost: sync ${written} page(s)`;
|
||||||
|
await git.stageAll();
|
||||||
|
const committed = await git.commit(subject, {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
trailers: [SOURCE_TRAILER],
|
||||||
|
});
|
||||||
|
|
||||||
|
// 9. Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
|
||||||
|
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
|
||||||
|
await git.checkout(DEFAULT_BRANCH);
|
||||||
|
const merge = await git.merge(DOCMOST_BRANCH);
|
||||||
|
if (merge.conflict) {
|
||||||
|
console.error(
|
||||||
|
"pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
|
||||||
|
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
|
||||||
|
"Docmost (read-only). Resolve locally, then re-run.",
|
||||||
|
);
|
||||||
|
} else if (!merge.ok) {
|
||||||
|
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
|
||||||
|
}
|
||||||
|
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
|
||||||
|
|
||||||
|
// 10. One-line summary.
|
||||||
console.log(
|
console.log(
|
||||||
`pull complete: ${written} page(s) written, ${failed} failed, ` +
|
`pull complete: ${written} written, ${movedApplied} moved, ` +
|
||||||
`out of ${total} from space ${spaceId} into ${vaultRoot}`,
|
`${deleted} deleted, committed=${committed}, ` +
|
||||||
|
`merge=${merge.conflict ? "CONFLICT" : merge.ok ? "ok" : "failed"} ` +
|
||||||
|
`(${failed} page failures) from space ${spaceId} into ${vaultRoot}`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Signal a partial mirror so callers/CI can react. Use process.exitCode (not
|
// Signal a partial mirror / conflict so callers/CI can react. Use
|
||||||
// a hard process.exit) so any buffered output is flushed cleanly.
|
// process.exitCode (not a hard exit) so buffered output flushes cleanly.
|
||||||
if (failed > 0) {
|
if (failed > 0 || merge.conflict || !merge.ok) {
|
||||||
process.exitCode = 1;
|
process.exitCode = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
// Only auto-run when invoked directly as the CLI entrypoint, not when this
|
||||||
// module is imported (e.g. by a unit test), so the import does not trigger
|
// module is imported (e.g. by a unit test), so the import does not trigger
|
||||||
// loadSettings() + process.exit.
|
// loadSettings() + git/network access.
|
||||||
const invokedDirectly =
|
const invokedDirectly =
|
||||||
typeof process.argv[1] === "string" &&
|
typeof process.argv[1] === "string" &&
|
||||||
import.meta.url === pathToFileURL(process.argv[1]).href;
|
import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||||
|
|||||||
200
src/reconcile.ts
Normal file
200
src/reconcile.ts
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
/**
|
||||||
|
* Pure reconciliation planner (SPEC §5/§6/§8).
|
||||||
|
*
|
||||||
|
* Given the desired live set of files (computed from the current Docmost tree)
|
||||||
|
* and the set of files currently tracked in the vault, compute what to write,
|
||||||
|
* what to move (old path to remove), and what to delete. Identity is `pageId`
|
||||||
|
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
|
||||||
|
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
|
||||||
|
* the live tree is a DELETE.
|
||||||
|
*
|
||||||
|
* This module is intentionally PURE (no IO, no git) so the whole plan is
|
||||||
|
* unit-testable. The actual file writing / git operations happen in pull.ts.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** A page that SHOULD exist in the vault at a given path. */
|
||||||
|
export interface LiveEntry {
|
||||||
|
pageId: string;
|
||||||
|
/** Vault-relative path (forward-slash), e.g. `Space/Parent/Child.md`. */
|
||||||
|
relPath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A page currently tracked in the vault (pageId parsed from its meta). */
|
||||||
|
export interface ExistingEntry {
|
||||||
|
pageId: string;
|
||||||
|
/** Vault-relative path (forward-slash) of the tracked file. */
|
||||||
|
relPath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A page to (re)write at its destination path. */
|
||||||
|
export interface WriteEntry {
|
||||||
|
pageId: string;
|
||||||
|
relPath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** A page that moved: written at its NEW relPath, with the OLD path removed. */
|
||||||
|
export interface MovedEntry {
|
||||||
|
pageId: string;
|
||||||
|
fromRelPath: string;
|
||||||
|
toRelPath: string;
|
||||||
|
/**
|
||||||
|
* Whether the old path (`fromRelPath`) is SAFE to remove. False when another
|
||||||
|
* live page will (re)write that exact path (path reuse): removing it would
|
||||||
|
* destroy real data, so the caller must skip the removal. The move itself is
|
||||||
|
* still recorded (the new path is written regardless).
|
||||||
|
*/
|
||||||
|
removeOldPath: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The full reconciliation plan. */
|
||||||
|
export interface ReconciliationPlan {
|
||||||
|
/**
|
||||||
|
* Pages present in `live` -> (re)write at their relPath. This naturally
|
||||||
|
* covers add, content-update (same path) AND move (same pageId, new path),
|
||||||
|
* since every live page is (re)written regardless of whether it existed.
|
||||||
|
*/
|
||||||
|
toWrite: WriteEntry[];
|
||||||
|
/**
|
||||||
|
* Vault-relative paths to delete because their tracked pageId is ABSENT from
|
||||||
|
* `live` (page removed/trashed). This set is ONLY absence-based deletions —
|
||||||
|
* the OLD paths of moved pages are NOT here (they live in `moved` and are
|
||||||
|
* applied separately by the caller). Keeping the two apart lets pull.ts gate
|
||||||
|
* absence deletions behind the incomplete-fetch suppression + mass-delete
|
||||||
|
* guard (SPEC §8) while still applying real moves.
|
||||||
|
*/
|
||||||
|
toDelete: string[];
|
||||||
|
/**
|
||||||
|
* Tracked pages whose relPath changed. The caller writes the page at
|
||||||
|
* `toRelPath`, then removes `fromRelPath` — but ONLY after the new-path write
|
||||||
|
* succeeded. The old path is NOT in `toDelete`.
|
||||||
|
*/
|
||||||
|
moved: MovedEntry[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the reconciliation plan.
|
||||||
|
*
|
||||||
|
* Rules:
|
||||||
|
* - Every `live` page is written at its relPath (covers add + update + move).
|
||||||
|
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
|
||||||
|
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
|
||||||
|
* path is written) and is NEVER added to `toDelete`.
|
||||||
|
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
|
||||||
|
* is added to `toDelete`.
|
||||||
|
*
|
||||||
|
* Notes:
|
||||||
|
* - Safety filter (no data loss): no path that is a live TARGET path of any
|
||||||
|
* page is ever deleted/removed (a write owns it). This applies to BOTH the
|
||||||
|
* absence `toDelete` set AND a moved page's old-path removal — if a moved
|
||||||
|
* page's OLD path is reused by ANOTHER live page, the move records no old
|
||||||
|
* path to remove, because that path will be (re)written.
|
||||||
|
* - `existing` may legitimately contain duplicate pageIds (two stray files
|
||||||
|
* carrying the same meta pageId); each such file that is not the live target
|
||||||
|
* path is removed (as an absence/move) so the vault converges to exactly the
|
||||||
|
* live set.
|
||||||
|
*/
|
||||||
|
export function planReconciliation(
|
||||||
|
live: LiveEntry[],
|
||||||
|
existing: ExistingEntry[],
|
||||||
|
): ReconciliationPlan {
|
||||||
|
// Desired path for each live pageId.
|
||||||
|
const liveByPageId = new Map<string, string>();
|
||||||
|
// Set of all paths that WILL be written (never delete/remove one of these).
|
||||||
|
const liveTargetPaths = new Set<string>();
|
||||||
|
for (const e of live) {
|
||||||
|
liveByPageId.set(e.pageId, e.relPath);
|
||||||
|
liveTargetPaths.add(e.relPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
const toWrite: WriteEntry[] = live.map((e) => ({
|
||||||
|
pageId: e.pageId,
|
||||||
|
relPath: e.relPath,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const moved: MovedEntry[] = [];
|
||||||
|
// Absence-based deletions ONLY (tracked pageId absent from `live`). Use a Set
|
||||||
|
// so the same path coming from multiple existing rows is queued only once.
|
||||||
|
const toDeleteSet = new Set<string>();
|
||||||
|
|
||||||
|
for (const ex of existing) {
|
||||||
|
const liveRel = liveByPageId.get(ex.pageId);
|
||||||
|
if (liveRel === undefined) {
|
||||||
|
// Tracked page is gone from the live tree -> absence delete.
|
||||||
|
// Never queue a path a live page will (re)write (path reuse -> no loss).
|
||||||
|
if (!liveTargetPaths.has(ex.relPath)) toDeleteSet.add(ex.relPath);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (liveRel !== ex.relPath) {
|
||||||
|
// Same pageId, different path -> a MOVE. Record it so the caller can write
|
||||||
|
// the new path first, then remove the old one. If the old path is itself a
|
||||||
|
// live target (reused by another page), it must NOT be removed — the write
|
||||||
|
// owns it — so flag `removeOldPath: false` (move still recorded).
|
||||||
|
moved.push({
|
||||||
|
pageId: ex.pageId,
|
||||||
|
fromRelPath: ex.relPath,
|
||||||
|
toRelPath: liveRel,
|
||||||
|
removeOldPath: !liveTargetPaths.has(ex.relPath),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// liveRel === ex.relPath -> content-update in place; nothing extra to do
|
||||||
|
// (the write above re-emits the file; identical bytes => git no-op).
|
||||||
|
}
|
||||||
|
|
||||||
|
const toDelete = [...toDeleteSet];
|
||||||
|
|
||||||
|
return { toWrite, toDelete, moved };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Below this many tracked files the mass-delete fraction guard is not applied
|
||||||
|
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
|
||||||
|
*/
|
||||||
|
export const MASS_DELETE_MIN_EXISTING = 4;
|
||||||
|
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
|
||||||
|
export const MASS_DELETE_FRACTION = 0.5;
|
||||||
|
|
||||||
|
/** Why absence-based deletions were (or were not) applied this cycle. */
|
||||||
|
export type DeletionDecision =
|
||||||
|
| { apply: true }
|
||||||
|
| { apply: false; reason: "incomplete-fetch" | "empty-live" | "mass-delete" };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
|
||||||
|
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
|
||||||
|
* testable without live creds or git:
|
||||||
|
*
|
||||||
|
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
|
||||||
|
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
|
||||||
|
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
|
||||||
|
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
|
||||||
|
* (almost always a failed fetch, never a real "delete everything").
|
||||||
|
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
|
||||||
|
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
|
||||||
|
*
|
||||||
|
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
|
||||||
|
* its old-path removal is real (handled by the caller separately).
|
||||||
|
*/
|
||||||
|
export function decideAbsenceDeletions(args: {
|
||||||
|
treeComplete: boolean;
|
||||||
|
liveCount: number;
|
||||||
|
existingCount: number;
|
||||||
|
deleteCount: number;
|
||||||
|
}): DeletionDecision {
|
||||||
|
const { treeComplete, liveCount, existingCount, deleteCount } = args;
|
||||||
|
|
||||||
|
// No tracked files, or nothing to delete -> trivially fine to "apply".
|
||||||
|
if (existingCount === 0 || deleteCount === 0) return { apply: true };
|
||||||
|
|
||||||
|
if (!treeComplete) return { apply: false, reason: "incomplete-fetch" };
|
||||||
|
|
||||||
|
if (liveCount === 0) return { apply: false, reason: "empty-live" };
|
||||||
|
|
||||||
|
if (
|
||||||
|
existingCount >= MASS_DELETE_MIN_EXISTING &&
|
||||||
|
deleteCount > existingCount * MASS_DELETE_FRACTION
|
||||||
|
) {
|
||||||
|
return { apply: false, reason: "mass-delete" };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { apply: true };
|
||||||
|
}
|
||||||
58
src/stabilize.ts
Normal file
58
src/stabilize.ts
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
/**
|
||||||
|
* Normalize-on-write helper (SPEC §11 "Резолюция").
|
||||||
|
*
|
||||||
|
* git diffs byte-for-byte, so writing a page in a NON-fixpoint markdown form
|
||||||
|
* would make the next pull re-export it to a slightly different (but stable)
|
||||||
|
* form and produce a phantom diff -> churny commits. The converter has a couple
|
||||||
|
* of known one-pass asymmetries (a block image after a paragraph adds an empty
|
||||||
|
* paragraph; a diagram materializes `data-align`), all of which converge to a
|
||||||
|
* fixpoint after ONE `export -> import -> export` round-trip.
|
||||||
|
*
|
||||||
|
* So at write time we run exactly that one pass and persist the fixpoint form.
|
||||||
|
* Already-stable content is unaffected (the pass is idempotent), so re-pulls of
|
||||||
|
* unchanged pages produce identical bytes and git sees no diff.
|
||||||
|
*/
|
||||||
|
import {
|
||||||
|
convertProseMirrorToMarkdown,
|
||||||
|
markdownToProseMirror,
|
||||||
|
serializeDocmostMarkdownBody,
|
||||||
|
type DocmostMdMeta,
|
||||||
|
} from "docmost-client";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
|
||||||
|
* compatible so files produced here match `exportPageBody`'s output exactly.
|
||||||
|
*/
|
||||||
|
export interface PageMeta {
|
||||||
|
version: 1;
|
||||||
|
pageId: string;
|
||||||
|
slugId: string;
|
||||||
|
title: string;
|
||||||
|
spaceId: string;
|
||||||
|
parentPageId: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Produce the self-contained `.md` file text for a page from its raw
|
||||||
|
* ProseMirror `content` + identity meta, in the verified fixpoint form.
|
||||||
|
*
|
||||||
|
* md1 = convertProseMirrorToMarkdown(content)
|
||||||
|
* doc2 = markdownToProseMirror(md1) // one import...
|
||||||
|
* stableBody = convertProseMirrorToMarkdown(doc2) // ...and re-export
|
||||||
|
* file = serializeDocmostMarkdownBody(meta, stableBody)
|
||||||
|
*
|
||||||
|
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||||
|
* idempotent for already-stable content, and the convergence point for the
|
||||||
|
* known converter asymmetries.
|
||||||
|
*/
|
||||||
|
export async function stabilizePageFile(
|
||||||
|
content: unknown,
|
||||||
|
meta: PageMeta,
|
||||||
|
): Promise<string> {
|
||||||
|
const md1 = convertProseMirrorToMarkdown(content);
|
||||||
|
const doc2 = await markdownToProseMirror(md1);
|
||||||
|
const stableBody = convertProseMirrorToMarkdown(doc2);
|
||||||
|
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
|
||||||
|
// DocmostMdMeta (a superset with optional fields) for the serializer.
|
||||||
|
return serializeDocmostMarkdownBody(meta as DocmostMdMeta, stableBody);
|
||||||
|
}
|
||||||
@@ -442,6 +442,105 @@ describe('checkNewComments', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// listSpaceTree — completeness signal (SPEC §8)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
describe('listSpaceTree (completeness)', () => {
|
||||||
|
// The walk seeds from /pages/sidebar-pages with only { spaceId } (roots), then
|
||||||
|
// fetches each hasChildren node's children with { spaceId, pageId }. We route
|
||||||
|
// by the presence of `pageId` in the request body.
|
||||||
|
it('returns complete:true and every node for a fully-fetched tree', async () => {
|
||||||
|
const client = new DocmostClient(BASE_URL, 'a@b.c', 'pw');
|
||||||
|
stubLoginSuccess(globalAxiosMock());
|
||||||
|
const imock = instanceMock(client);
|
||||||
|
|
||||||
|
imock.onPost('/pages/sidebar-pages').reply((config) => {
|
||||||
|
const body = JSON.parse(config.data);
|
||||||
|
if (!body.pageId) {
|
||||||
|
// Root level: one parent with children + one leaf.
|
||||||
|
return [
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
data: {
|
||||||
|
items: [
|
||||||
|
{ id: 'root', title: 'Root', hasChildren: true },
|
||||||
|
{ id: 'leaf', title: 'Leaf', hasChildren: false },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
if (body.pageId === 'root') {
|
||||||
|
return [
|
||||||
|
200,
|
||||||
|
{ data: { items: [{ id: 'child', title: 'Child', hasChildren: false }] } },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return [200, { data: { items: [] } }];
|
||||||
|
});
|
||||||
|
|
||||||
|
const { pages, complete } = await client.listSpaceTree('space-1');
|
||||||
|
expect(complete).toBe(true);
|
||||||
|
expect(new Set(pages.map((p: any) => p.id))).toEqual(
|
||||||
|
new Set(['root', 'leaf', 'child']),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns complete:false but still the other nodes when a branch fetch THROWS', async () => {
|
||||||
|
const client = new DocmostClient(BASE_URL, 'a@b.c', 'pw');
|
||||||
|
stubLoginSuccess(globalAxiosMock());
|
||||||
|
const imock = instanceMock(client);
|
||||||
|
|
||||||
|
imock.onPost('/pages/sidebar-pages').reply((config) => {
|
||||||
|
const body = JSON.parse(config.data);
|
||||||
|
if (!body.pageId) {
|
||||||
|
// Two parents, both claim children; one of them will fail to expand.
|
||||||
|
return [
|
||||||
|
200,
|
||||||
|
{
|
||||||
|
data: {
|
||||||
|
items: [
|
||||||
|
{ id: 'ok', title: 'Ok', hasChildren: true },
|
||||||
|
{ id: 'boom', title: 'Boom', hasChildren: true },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
if (body.pageId === 'ok') {
|
||||||
|
return [
|
||||||
|
200,
|
||||||
|
{ data: { items: [{ id: 'okchild', title: 'OkChild', hasChildren: false }] } },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
// The 'boom' branch fails -> walk must continue, completeness must drop.
|
||||||
|
return [500, {}];
|
||||||
|
});
|
||||||
|
|
||||||
|
const { pages, complete } = await client.listSpaceTree('space-1');
|
||||||
|
// The failed branch flips completeness to false...
|
||||||
|
expect(complete).toBe(false);
|
||||||
|
// ...but the rest of the tree is still collected (no abort, no wipe signal).
|
||||||
|
expect(new Set(pages.map((p: any) => p.id))).toEqual(
|
||||||
|
new Set(['ok', 'boom', 'okchild']),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns complete:false and no nodes when the seed (root) fetch fails', async () => {
|
||||||
|
const client = new DocmostClient(BASE_URL, 'a@b.c', 'pw');
|
||||||
|
stubLoginSuccess(globalAxiosMock());
|
||||||
|
const imock = instanceMock(client);
|
||||||
|
|
||||||
|
// Every sidebar-pages call fails -> listSidebarPages itself throws on the
|
||||||
|
// seed, so the walk returns empty + incomplete (never "0 pages, complete").
|
||||||
|
imock.onPost('/pages/sidebar-pages').reply(500, {});
|
||||||
|
|
||||||
|
const { pages, complete } = await client.listSpaceTree('space-1');
|
||||||
|
expect(complete).toBe(false);
|
||||||
|
expect(pages).toEqual([]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// AUTH: 401 interceptor + re-login dedup + getCollabTokenWithReauth
|
// AUTH: 401 interceptor + re-login dedup + getCollabTokenWithReauth
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
292
test/git.test.ts
Normal file
292
test/git.test.ts
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
import { execFile } from 'node:child_process';
|
||||||
|
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { promisify } from 'node:util';
|
||||||
|
import { afterEach, beforeAll, describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
VaultGit,
|
||||||
|
BOT_AUTHOR_NAME,
|
||||||
|
BOT_AUTHOR_EMAIL,
|
||||||
|
buildCommitMessage,
|
||||||
|
} from '../src/git.js';
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
/** True if a usable `git` binary is on PATH (skip the suite otherwise). */
|
||||||
|
async function gitAvailable(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
await execFileAsync('git', ['--version']);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Read the full commit message of HEAD (subject + body) in a repo dir. */
|
||||||
|
async function headMessage(dir: string): Promise<string> {
|
||||||
|
const { stdout } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['--no-pager', 'log', '-1', '--pretty=%B'],
|
||||||
|
{ cwd: dir },
|
||||||
|
);
|
||||||
|
return stdout.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Read the author "Name <email>" of HEAD in a repo dir. */
|
||||||
|
async function headAuthor(dir: string): Promise<string> {
|
||||||
|
const { stdout } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['--no-pager', 'log', '-1', '--pretty=%an <%ae>'],
|
||||||
|
{ cwd: dir },
|
||||||
|
);
|
||||||
|
return stdout.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('buildCommitMessage (pure)', () => {
|
||||||
|
it('returns the bare subject when there are no trailers', () => {
|
||||||
|
expect(buildCommitMessage('subject')).toBe('subject');
|
||||||
|
expect(buildCommitMessage('subject', [])).toBe('subject');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('appends trailers separated from the subject by a blank line', () => {
|
||||||
|
expect(buildCommitMessage('subject', ['Docmost-Sync-Source: docmost'])).toBe(
|
||||||
|
'subject\n\nDocmost-Sync-Source: docmost',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('VaultGit (integration; temp repo)', () => {
|
||||||
|
let available = false;
|
||||||
|
let dir: string;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
available = await gitAvailable();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
if (dir) {
|
||||||
|
await rm(dir, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Make a fresh temp dir for one test (under the OS tmpdir, NOT the repo). */
|
||||||
|
async function freshDir(): Promise<string> {
|
||||||
|
dir = await mkdtemp(join(tmpdir(), 'docmost-vault-'));
|
||||||
|
return dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
it('ensureRepo creates .git + main + an initial commit', async () => {
|
||||||
|
if (!available) return; // skip gracefully when git is unavailable
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
// It is a git work-tree now.
|
||||||
|
const { stdout: insideWt } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['rev-parse', '--is-inside-work-tree'],
|
||||||
|
{ cwd: vault },
|
||||||
|
);
|
||||||
|
expect(insideWt.trim()).toBe('true');
|
||||||
|
|
||||||
|
// On `main`.
|
||||||
|
expect(await git.currentBranch()).toBe('main');
|
||||||
|
|
||||||
|
// Has the initial commit.
|
||||||
|
expect(await headMessage(vault)).toBe('init vault');
|
||||||
|
|
||||||
|
// Idempotent: calling again does not create a second commit.
|
||||||
|
await git.ensureRepo();
|
||||||
|
const { stdout: count } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['rev-list', '--count', 'HEAD'],
|
||||||
|
{ cwd: vault },
|
||||||
|
);
|
||||||
|
expect(count.trim()).toBe('1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ensureBranch creates the docmost branch from main', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
expect(await git.branchExists('docmost')).toBe(false);
|
||||||
|
await git.ensureBranch('docmost', 'main');
|
||||||
|
expect(await git.branchExists('docmost')).toBe(true);
|
||||||
|
|
||||||
|
// Idempotent.
|
||||||
|
await git.ensureBranch('docmost', 'main');
|
||||||
|
expect(await git.branchExists('docmost')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('commit writes a commit with the provenance trailer and the bot identity', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
await writeFile(join(vault, 'page.md'), 'hello\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
const made = await git.commit('docmost: sync 1 page(s)', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
trailers: ['Docmost-Sync-Source: docmost'],
|
||||||
|
});
|
||||||
|
expect(made).toBe(true);
|
||||||
|
|
||||||
|
const msg = await headMessage(vault);
|
||||||
|
expect(msg).toContain('docmost: sync 1 page(s)');
|
||||||
|
expect(msg).toContain('Docmost-Sync-Source: docmost');
|
||||||
|
|
||||||
|
const author = await headAuthor(vault);
|
||||||
|
expect(author).toBe(`${BOT_AUTHOR_NAME} <${BOT_AUTHOR_EMAIL}>`);
|
||||||
|
|
||||||
|
// The trailer is parseable by git itself.
|
||||||
|
const { stdout: trailers } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['--no-pager', 'log', '-1', '--pretty=%(trailers:key=Docmost-Sync-Source,valueonly)'],
|
||||||
|
{ cwd: vault },
|
||||||
|
);
|
||||||
|
expect(trailers.trim()).toBe('docmost');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('commit is a no-op when there is nothing to commit', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
await git.stageAll(); // nothing changed since the init commit
|
||||||
|
const made = await git.commit('docmost: sync 0 page(s)', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
trailers: ['Docmost-Sync-Source: docmost'],
|
||||||
|
});
|
||||||
|
expect(made).toBe(false);
|
||||||
|
|
||||||
|
// Still exactly one commit (the init one).
|
||||||
|
const { stdout: count } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['rev-list', '--count', 'HEAD'],
|
||||||
|
{ cwd: vault },
|
||||||
|
);
|
||||||
|
expect(count.trim()).toBe('1');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('merge fast-forwards main to docmost', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
await git.ensureBranch('docmost', 'main');
|
||||||
|
|
||||||
|
// Commit a file on docmost.
|
||||||
|
await git.checkout('docmost');
|
||||||
|
await writeFile(join(vault, 'a.md'), 'a\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('docmost: sync 1 page(s)', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
trailers: ['Docmost-Sync-Source: docmost'],
|
||||||
|
});
|
||||||
|
|
||||||
|
// main has not diverged, so the merge is a clean fast-forward.
|
||||||
|
await git.checkout('main');
|
||||||
|
const res = await git.merge('docmost');
|
||||||
|
expect(res.ok).toBe(true);
|
||||||
|
expect(res.conflict).toBe(false);
|
||||||
|
|
||||||
|
// main now contains the file and the docmost commit.
|
||||||
|
const tracked = await git.listTrackedFiles();
|
||||||
|
expect(tracked).toContain('a.md');
|
||||||
|
expect(await headMessage(vault)).toContain('docmost: sync 1 page(s)');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('merge surfaces a conflict distinctly (no auto-resolve)', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
await git.ensureBranch('docmost', 'main');
|
||||||
|
|
||||||
|
// Divergent edits to the SAME file on both branches -> real conflict.
|
||||||
|
await git.checkout('docmost');
|
||||||
|
await writeFile(join(vault, 'c.md'), 'from docmost\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('docmost edit', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
});
|
||||||
|
|
||||||
|
await git.checkout('main');
|
||||||
|
await writeFile(join(vault, 'c.md'), 'from main\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('main edit', {
|
||||||
|
authorName: 'Human',
|
||||||
|
authorEmail: 'human@local',
|
||||||
|
});
|
||||||
|
|
||||||
|
const res = await git.merge('docmost');
|
||||||
|
expect(res.ok).toBe(false);
|
||||||
|
expect(res.conflict).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('isMergeInProgress is false on a clean repo and true mid-merge', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
await git.ensureBranch('docmost', 'main');
|
||||||
|
|
||||||
|
// Clean repo, no merge in progress.
|
||||||
|
expect(await git.isMergeInProgress()).toBe(false);
|
||||||
|
|
||||||
|
// Create a REAL conflict: divergent edits to the same file on both branches.
|
||||||
|
await git.checkout('docmost');
|
||||||
|
await writeFile(join(vault, 'c.md'), 'from docmost\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('docmost edit', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
});
|
||||||
|
|
||||||
|
await git.checkout('main');
|
||||||
|
await writeFile(join(vault, 'c.md'), 'from main\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('main edit', {
|
||||||
|
authorName: 'Human',
|
||||||
|
authorEmail: 'human@local',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Merge conflicts -> the repo is now left mid-merge.
|
||||||
|
const res = await git.merge('docmost');
|
||||||
|
expect(res.conflict).toBe(true);
|
||||||
|
expect(await git.isMergeInProgress()).toBe(true);
|
||||||
|
|
||||||
|
// Aborting the merge clears the in-progress state again.
|
||||||
|
await execFileAsync('git', ['--no-pager', 'merge', '--abort'], { cwd: vault });
|
||||||
|
expect(await git.isMergeInProgress()).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('listTrackedFiles supports a glob and returns forward-slash paths', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
await writeFile(join(vault, 'keep.md'), 'k\n', 'utf8');
|
||||||
|
await writeFile(join(vault, 'note.txt'), 't\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('add files', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
});
|
||||||
|
|
||||||
|
const md = await git.listTrackedFiles('*.md');
|
||||||
|
expect(md).toEqual(['keep.md']);
|
||||||
|
const all = await git.listTrackedFiles();
|
||||||
|
expect(new Set(all)).toEqual(new Set(['keep.md', 'note.txt']));
|
||||||
|
});
|
||||||
|
});
|
||||||
238
test/reconcile.test.ts
Normal file
238
test/reconcile.test.ts
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
planReconciliation,
|
||||||
|
decideAbsenceDeletions,
|
||||||
|
type ExistingEntry,
|
||||||
|
type LiveEntry,
|
||||||
|
} from '../src/reconcile.js';
|
||||||
|
|
||||||
|
describe('planReconciliation', () => {
|
||||||
|
it('ADD: a new live page (not tracked) is written, nothing deleted', () => {
|
||||||
|
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/New.md' }];
|
||||||
|
const existing: ExistingEntry[] = [];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/New.md' }]);
|
||||||
|
expect(plan.toDelete).toEqual([]);
|
||||||
|
expect(plan.moved).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('CONTENT-UPDATE: tracked page at the SAME path is rewritten, not moved/deleted', () => {
|
||||||
|
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Doc.md' }];
|
||||||
|
const existing: ExistingEntry[] = [{ pageId: 'p1', relPath: 'Space/Doc.md' }];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
// Still written (re-emitted; identical bytes => git no-op), no move/delete.
|
||||||
|
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Doc.md' }]);
|
||||||
|
expect(plan.toDelete).toEqual([]);
|
||||||
|
expect(plan.moved).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('MOVE: same pageId, new path -> write new + recorded as moved (NOT in toDelete)', () => {
|
||||||
|
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/NewParent/Doc.md' }];
|
||||||
|
const existing: ExistingEntry[] = [
|
||||||
|
{ pageId: 'p1', relPath: 'Space/OldParent/Doc.md' },
|
||||||
|
];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(plan.toWrite).toEqual([
|
||||||
|
{ pageId: 'p1', relPath: 'Space/NewParent/Doc.md' },
|
||||||
|
]);
|
||||||
|
// The old path is a MOVE removal, NOT an absence delete -> not in toDelete.
|
||||||
|
expect(plan.toDelete).toEqual([]);
|
||||||
|
expect(plan.moved).toEqual([
|
||||||
|
{
|
||||||
|
pageId: 'p1',
|
||||||
|
fromRelPath: 'Space/OldParent/Doc.md',
|
||||||
|
toRelPath: 'Space/NewParent/Doc.md',
|
||||||
|
removeOldPath: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('DELETE: a tracked pageId gone from live -> its file is deleted', () => {
|
||||||
|
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
|
||||||
|
const existing: ExistingEntry[] = [
|
||||||
|
{ pageId: 'p1', relPath: 'Space/Keep.md' },
|
||||||
|
{ pageId: 'p2', relPath: 'Space/Gone.md' },
|
||||||
|
];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Keep.md' }]);
|
||||||
|
expect(plan.toDelete).toEqual(['Space/Gone.md']);
|
||||||
|
expect(plan.moved).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('NO-OP: live and existing identical -> writes (re-emit) but no deletes/moves', () => {
|
||||||
|
const live: LiveEntry[] = [
|
||||||
|
{ pageId: 'p1', relPath: 'A.md' },
|
||||||
|
{ pageId: 'p2', relPath: 'B.md' },
|
||||||
|
];
|
||||||
|
const existing: ExistingEntry[] = [
|
||||||
|
{ pageId: 'p1', relPath: 'A.md' },
|
||||||
|
{ pageId: 'p2', relPath: 'B.md' },
|
||||||
|
];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(plan.toWrite).toEqual(live);
|
||||||
|
expect(plan.toDelete).toEqual([]);
|
||||||
|
expect(plan.moved).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT delete an old path that another live page will write (path reuse)', () => {
|
||||||
|
// p1 moves from X.md to Y.md; p2 is a NEW page taking over X.md. The old
|
||||||
|
// X.md must NOT be deleted, because p2 writes it.
|
||||||
|
const live: LiveEntry[] = [
|
||||||
|
{ pageId: 'p1', relPath: 'Y.md' },
|
||||||
|
{ pageId: 'p2', relPath: 'X.md' },
|
||||||
|
];
|
||||||
|
const existing: ExistingEntry[] = [{ pageId: 'p1', relPath: 'X.md' }];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(new Set(plan.toWrite)).toEqual(
|
||||||
|
new Set([
|
||||||
|
{ pageId: 'p1', relPath: 'Y.md' },
|
||||||
|
{ pageId: 'p2', relPath: 'X.md' },
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
// X.md is a live target, so nothing is deleted.
|
||||||
|
expect(plan.toDelete).toEqual([]);
|
||||||
|
// The move is still recorded, but its old path is NOT removable (p2 writes
|
||||||
|
// X.md): removeOldPath:false protects the reused path from data loss.
|
||||||
|
expect(plan.moved).toEqual([
|
||||||
|
{ pageId: 'p1', fromRelPath: 'X.md', toRelPath: 'Y.md', removeOldPath: false },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('combines add + update + move + delete in one plan', () => {
|
||||||
|
const live: LiveEntry[] = [
|
||||||
|
{ pageId: 'keep', relPath: 'Keep.md' }, // update in place
|
||||||
|
{ pageId: 'mover', relPath: 'New/Moved.md' }, // moved
|
||||||
|
{ pageId: 'fresh', relPath: 'Fresh.md' }, // added
|
||||||
|
];
|
||||||
|
const existing: ExistingEntry[] = [
|
||||||
|
{ pageId: 'keep', relPath: 'Keep.md' },
|
||||||
|
{ pageId: 'mover', relPath: 'Old/Moved.md' },
|
||||||
|
{ pageId: 'dead', relPath: 'Dead.md' }, // deleted
|
||||||
|
];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(plan.toWrite).toEqual(live);
|
||||||
|
expect(plan.moved).toEqual([
|
||||||
|
{
|
||||||
|
pageId: 'mover',
|
||||||
|
fromRelPath: 'Old/Moved.md',
|
||||||
|
toRelPath: 'New/Moved.md',
|
||||||
|
removeOldPath: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
// toDelete is ABSENCE-only now: the moved old path lives in `moved`, so only
|
||||||
|
// the genuinely-gone page (Dead.md) is here.
|
||||||
|
expect(plan.toDelete).toEqual(['Dead.md']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('records each duplicate tracked row of a present pageId as a removable move', () => {
|
||||||
|
// Two stray files both claim pageId "dup"; the live page lives elsewhere.
|
||||||
|
// Each stray is a MOVE (same pageId, different path) -> recorded in `moved`
|
||||||
|
// with removeOldPath:true, NOT in absence-based toDelete.
|
||||||
|
const live: LiveEntry[] = [{ pageId: 'dup', relPath: 'Canonical.md' }];
|
||||||
|
const existing: ExistingEntry[] = [
|
||||||
|
{ pageId: 'dup', relPath: 'StrayA.md' },
|
||||||
|
{ pageId: 'dup', relPath: 'StrayB.md' },
|
||||||
|
];
|
||||||
|
const plan = planReconciliation(live, existing);
|
||||||
|
expect(plan.toWrite).toEqual([{ pageId: 'dup', relPath: 'Canonical.md' }]);
|
||||||
|
expect(plan.toDelete).toEqual([]);
|
||||||
|
expect(plan.moved).toEqual([
|
||||||
|
{
|
||||||
|
pageId: 'dup',
|
||||||
|
fromRelPath: 'StrayA.md',
|
||||||
|
toRelPath: 'Canonical.md',
|
||||||
|
removeOldPath: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pageId: 'dup',
|
||||||
|
fromRelPath: 'StrayB.md',
|
||||||
|
toRelPath: 'Canonical.md',
|
||||||
|
removeOldPath: true,
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('decideAbsenceDeletions (SPEC §8)', () => {
|
||||||
|
it('APPLIES when the tree is complete and the delete count is modest', () => {
|
||||||
|
const d = decideAbsenceDeletions({
|
||||||
|
treeComplete: true,
|
||||||
|
liveCount: 10,
|
||||||
|
existingCount: 10,
|
||||||
|
deleteCount: 1,
|
||||||
|
});
|
||||||
|
expect(d).toEqual({ apply: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('SUPPRESSES all absence deletions when the tree fetch is incomplete', () => {
|
||||||
|
// Even a single absence delete is suppressed on a partial tree (a missing
|
||||||
|
// pageId in a partial tree is NOT proof of deletion).
|
||||||
|
const d = decideAbsenceDeletions({
|
||||||
|
treeComplete: false,
|
||||||
|
liveCount: 9,
|
||||||
|
existingCount: 10,
|
||||||
|
deleteCount: 1,
|
||||||
|
});
|
||||||
|
expect(d).toEqual({ apply: false, reason: 'incomplete-fetch' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('SUPPRESSES when live returned 0 pages but files are tracked (complete flag aside)', () => {
|
||||||
|
const d = decideAbsenceDeletions({
|
||||||
|
treeComplete: true,
|
||||||
|
liveCount: 0,
|
||||||
|
existingCount: 5,
|
||||||
|
deleteCount: 5,
|
||||||
|
});
|
||||||
|
expect(d).toEqual({ apply: false, reason: 'empty-live' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('SUPPRESSES over the mass-delete guard (> 50% of a non-trivial vault)', () => {
|
||||||
|
const d = decideAbsenceDeletions({
|
||||||
|
treeComplete: true,
|
||||||
|
liveCount: 4,
|
||||||
|
existingCount: 10,
|
||||||
|
deleteCount: 6, // 60% > 50%
|
||||||
|
});
|
||||||
|
expect(d).toEqual({ apply: false, reason: 'mass-delete' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT apply the fraction guard for a tiny vault (below the floor)', () => {
|
||||||
|
// 1-of-2 is normal in a tiny vault; the fraction guard does not fire.
|
||||||
|
const d = decideAbsenceDeletions({
|
||||||
|
treeComplete: true,
|
||||||
|
liveCount: 1,
|
||||||
|
existingCount: 2,
|
||||||
|
deleteCount: 1,
|
||||||
|
});
|
||||||
|
expect(d).toEqual({ apply: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('incomplete-fetch takes precedence over the mass-delete reason', () => {
|
||||||
|
const d = decideAbsenceDeletions({
|
||||||
|
treeComplete: false,
|
||||||
|
liveCount: 4,
|
||||||
|
existingCount: 10,
|
||||||
|
deleteCount: 6,
|
||||||
|
});
|
||||||
|
expect(d).toEqual({ apply: false, reason: 'incomplete-fetch' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('trivially applies when nothing is tracked or nothing would be deleted', () => {
|
||||||
|
expect(
|
||||||
|
decideAbsenceDeletions({
|
||||||
|
treeComplete: false,
|
||||||
|
liveCount: 0,
|
||||||
|
existingCount: 0,
|
||||||
|
deleteCount: 0,
|
||||||
|
}),
|
||||||
|
).toEqual({ apply: true });
|
||||||
|
expect(
|
||||||
|
decideAbsenceDeletions({
|
||||||
|
treeComplete: false,
|
||||||
|
liveCount: 5,
|
||||||
|
existingCount: 5,
|
||||||
|
deleteCount: 0,
|
||||||
|
}),
|
||||||
|
).toEqual({ apply: true });
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user