feat(sync): add git vault layer (§5) and the Docmost->vault pull cycle (§6)
Turn the read-only mirror into a git-backed pull cycle. Read-only toward Docmost.
- git.ts (VaultGit): system-git wrapper, all ops cwd=vaultPath (vault is its own
repo under data/vault, never the source repo); ensureRepo/branches main+docmost,
commit with provenance (author/committer identity + Docmost-Sync-Source trailer,
§7.3), merge with conflict surfacing (no auto-resolve, §9), isMergeInProgress;
GIT_DIR/GIT_WORK_TREE stripped from env (§12 cwd isolation)
- stabilize.ts: normalize-on-write (one export->import->export fixpoint pass, §11)
- reconcile.ts: pure planReconciliation (add/update/move/delete by pageId) +
decideAbsenceDeletions gate
- pull.ts: write/commit on docmost -> merge into main; listSpaceTree completeness
signal suppresses absence-deletions on a partial fetch (§8); mass-delete guard;
merge-in-progress guard makes re-runs converge (§12); move old-path removal only
on successful write
- docmost-client: listSpaceTree({pages, complete}) without touching the 1:1-copied
enumerateSpacePages
- tests: reconcile planner + decideAbsenceDeletions, VaultGit incl. real temp-repo
merge conflict, listSpaceTree completeness (586 green)
Push to a git remote and the FS->Docmost direction are deferred to the next increment.
This commit is contained in:
352
src/git.ts
Normal file
352
src/git.ts
Normal file
@@ -0,0 +1,352 @@
|
||||
/**
|
||||
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
|
||||
*
|
||||
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
|
||||
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
|
||||
* the docmost-sync source repo. This module MUST NEVER run git against the
|
||||
* source repo. `data/` is gitignored by the source repo, so a nested repo under
|
||||
* `data/vault` is safe. The pull cycle is READ-ONLY toward Docmost; this module
|
||||
* only touches the local vault git, never a git remote (push is deferred, see
|
||||
* SPEC §7).
|
||||
*
|
||||
* Implementation notes:
|
||||
* - We shell out via `node:child_process` `execFile` (promisified), passing
|
||||
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
|
||||
* if a page title / branch name contains shell metacharacters.
|
||||
* - Every invocation prepends `--no-pager` so git never blocks on a pager.
|
||||
* - "nothing to commit" is treated as a graceful no-op, not an error.
|
||||
*/
|
||||
import { execFile } from "node:child_process";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import { promisify } from "node:util";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
|
||||
export const BOT_AUTHOR_NAME = "Docmost Sync";
|
||||
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
|
||||
|
||||
/** Default branch the vault repo is initialized on. */
|
||||
export const DEFAULT_BRANCH = "main";
|
||||
|
||||
/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */
|
||||
export interface MergeResult {
|
||||
/** True when the merge applied cleanly (fast-forward or clean 3-way). */
|
||||
ok: boolean;
|
||||
/** True when the merge stopped on conflicts (markers left in the worktree). */
|
||||
conflict: boolean;
|
||||
/** Raw combined stdout+stderr, for logging/diagnostics. */
|
||||
output: string;
|
||||
}
|
||||
|
||||
/** Options for an engine-authored commit (provenance, SPEC §7.3). */
|
||||
export interface CommitOptions {
|
||||
authorName: string;
|
||||
authorEmail: string;
|
||||
/**
|
||||
* Trailer lines appended to the commit message body (e.g.
|
||||
* `Docmost-Sync-Source: docmost`). These are the machine-readable provenance
|
||||
* the loop-guard keys on (SPEC §12, "commit-attribution").
|
||||
*/
|
||||
trailers?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* A git wrapper bound to a single vault path. Construct once per vault; every
|
||||
* method runs git with `cwd = vaultPath`.
|
||||
*/
|
||||
export class VaultGit {
|
||||
constructor(private readonly vaultPath: string) {}
|
||||
|
||||
/**
|
||||
* Run `git --no-pager <args...>` in the vault. Returns trimmed stdout.
|
||||
* Throws a clear Error (including stderr) on a non-zero exit.
|
||||
*/
|
||||
private async run(args: string[]): Promise<string> {
|
||||
try {
|
||||
const { stdout } = await execFileAsync("git", ["--no-pager", ...args], {
|
||||
cwd: this.vaultPath,
|
||||
// Generous buffer: `git status --porcelain` / file listings on a large
|
||||
// vault can be sizable.
|
||||
maxBuffer: 64 * 1024 * 1024,
|
||||
env: vaultGitEnv(),
|
||||
});
|
||||
return stdout.trim();
|
||||
} catch (err: unknown) {
|
||||
const e = err as { stderr?: string; stdout?: string; message?: string };
|
||||
const detail = (e.stderr || e.stdout || e.message || "").toString().trim();
|
||||
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Like `run`, but returns the full exit info instead of throwing on a
|
||||
* non-zero exit. Used where a non-zero exit is an expected, meaningful state
|
||||
* (e.g. a merge conflict, or a porcelain diff that "fails" deliberately).
|
||||
*/
|
||||
private async runRaw(
|
||||
args: string[],
|
||||
): Promise<{ code: number; stdout: string; stderr: string }> {
|
||||
try {
|
||||
const { stdout, stderr } = await execFileAsync(
|
||||
"git",
|
||||
["--no-pager", ...args],
|
||||
{ cwd: this.vaultPath, maxBuffer: 64 * 1024 * 1024, env: vaultGitEnv() },
|
||||
);
|
||||
return { code: 0, stdout, stderr };
|
||||
} catch (err: unknown) {
|
||||
const e = err as {
|
||||
code?: number;
|
||||
stdout?: string;
|
||||
stderr?: string;
|
||||
};
|
||||
return {
|
||||
code: typeof e.code === "number" ? e.code : 1,
|
||||
stdout: e.stdout ?? "",
|
||||
stderr: e.stderr ?? "",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the vault directory exists and is an initialized git repo on `main`
|
||||
* with an initial (empty) commit so branches exist. Idempotent: safe to call
|
||||
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
|
||||
* (so engine commits never fall back to a global/unset identity).
|
||||
*/
|
||||
async ensureRepo(): Promise<void> {
|
||||
await mkdir(this.vaultPath, { recursive: true });
|
||||
|
||||
if (!(await this.isRepo())) {
|
||||
// `git init -b main` sets the initial branch on modern git; we still
|
||||
// guard the branch name below for safety on older binaries.
|
||||
await this.run(["init", "-b", DEFAULT_BRANCH]);
|
||||
}
|
||||
|
||||
// Set a local identity for the vault repo if unset, so engine commits have
|
||||
// a deterministic committer even on a machine with no global git config.
|
||||
if (!(await this.hasLocalConfig("user.name"))) {
|
||||
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
|
||||
}
|
||||
if (!(await this.hasLocalConfig("user.email"))) {
|
||||
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
||||
}
|
||||
|
||||
// Create the initial empty commit on `main` if the repo has no commits yet,
|
||||
// so both `main` and (later) `docmost` branches have a common base.
|
||||
if (!(await this.hasAnyCommit())) {
|
||||
// Make sure we are on the default branch before the first commit (covers
|
||||
// the older-git case where `init -b` was not honored).
|
||||
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
|
||||
await this.commitRaw("init vault", {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
allowEmpty: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
|
||||
private async isRepo(): Promise<boolean> {
|
||||
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
|
||||
return r.code === 0 && r.stdout.trim() === "true";
|
||||
}
|
||||
|
||||
/** True if a LOCAL git config key is set in the vault repo. */
|
||||
private async hasLocalConfig(key: string): Promise<boolean> {
|
||||
const r = await this.runRaw(["config", "--local", "--get", key]);
|
||||
return r.code === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
|
||||
/** True if the repo has at least one commit (HEAD resolves). */
|
||||
private async hasAnyCommit(): Promise<boolean> {
|
||||
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
|
||||
return r.code === 0;
|
||||
}
|
||||
|
||||
/** True if a branch with the given name exists. */
|
||||
async branchExists(name: string): Promise<boolean> {
|
||||
const r = await this.runRaw([
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
`refs/heads/${name}`,
|
||||
]);
|
||||
return r.code === 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
|
||||
* checkout) when the branch is already present.
|
||||
*/
|
||||
async ensureBranch(name: string, fromBranch: string): Promise<void> {
|
||||
if (await this.branchExists(name)) return;
|
||||
await this.run(["branch", name, fromBranch]);
|
||||
}
|
||||
|
||||
/** Name of the currently checked-out branch. */
|
||||
async currentBranch(): Promise<string> {
|
||||
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
|
||||
}
|
||||
|
||||
/** Check out an existing branch. */
|
||||
async checkout(name: string): Promise<void> {
|
||||
await this.run(["checkout", name]);
|
||||
}
|
||||
|
||||
/** Stage everything (adds, modifications, deletions). */
|
||||
async stageAll(): Promise<void> {
|
||||
await this.run(["add", "-A"]);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if the vault is mid-merge (an unresolved merge from a previous run,
|
||||
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
|
||||
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
|
||||
* BEFORE any checkout so a left-over merge produces a clear, actionable
|
||||
* message instead of a raw "you need to resolve your current index first"
|
||||
* failure deep inside `checkout`. This is what makes re-runs converge
|
||||
* (resumability, SPEC §12).
|
||||
*/
|
||||
async isMergeInProgress(): Promise<boolean> {
|
||||
// MERGE_HEAD exists exactly while a merge is in progress.
|
||||
const mergeHead = await this.runRaw([
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
"--quiet",
|
||||
"MERGE_HEAD",
|
||||
]);
|
||||
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0) return true;
|
||||
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
|
||||
// working tree is mid-conflict and a checkout would refuse.
|
||||
const unmerged = await this.runRaw(["ls-files", "-u"]);
|
||||
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit the currently STAGED changes with an explicit author/committer
|
||||
* identity and the given trailers appended to the message body (SPEC §7.3
|
||||
* provenance). Returns `true` if a commit was made, `false` if there was
|
||||
* nothing to commit (graceful no-op). The caller is expected to have staged
|
||||
* its changes first (e.g. via `stageAll`).
|
||||
*/
|
||||
async commit(message: string, opts: CommitOptions): Promise<boolean> {
|
||||
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
|
||||
// deterministic re-pull of unchanged pages produces identical bytes, so
|
||||
// git sees no diff and we must not error).
|
||||
const staged = await this.runRaw([
|
||||
"diff",
|
||||
"--cached",
|
||||
"--quiet",
|
||||
]);
|
||||
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
|
||||
// staged), 1 when there are staged changes.
|
||||
if (staged.code === 0) return false;
|
||||
|
||||
await this.commitRaw(message, opts);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
|
||||
* Builds the full message with appended trailers and sets author + committer
|
||||
* identity via env vars (so the committer matches the author, not the repo
|
||||
* default).
|
||||
*/
|
||||
private async commitRaw(
|
||||
message: string,
|
||||
opts: CommitOptions & { allowEmpty?: boolean },
|
||||
): Promise<void> {
|
||||
const fullMessage = buildCommitMessage(message, opts.trailers);
|
||||
const args = ["commit", "-m", fullMessage];
|
||||
if (opts.allowEmpty) args.push("--allow-empty");
|
||||
|
||||
await execFileAsync("git", ["--no-pager", ...args], {
|
||||
cwd: this.vaultPath,
|
||||
maxBuffer: 64 * 1024 * 1024,
|
||||
env: vaultGitEnv({
|
||||
GIT_AUTHOR_NAME: opts.authorName,
|
||||
GIT_AUTHOR_EMAIL: opts.authorEmail,
|
||||
GIT_COMMITTER_NAME: opts.authorName,
|
||||
GIT_COMMITTER_EMAIL: opts.authorEmail,
|
||||
}),
|
||||
}).catch((err: unknown) => {
|
||||
const e = err as { stderr?: string; message?: string };
|
||||
throw new Error(
|
||||
`git commit failed: ${(e.stderr || e.message || "").toString().trim()}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
|
||||
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
|
||||
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
|
||||
* markers are left in the worktree for manual resolution by a later increment,
|
||||
* and — critically — nothing is pushed to Docmost (we never write to Docmost
|
||||
* anyway).
|
||||
*/
|
||||
async merge(fromBranch: string): Promise<MergeResult> {
|
||||
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
|
||||
const output = `${r.stdout}\n${r.stderr}`.trim();
|
||||
if (r.code === 0) {
|
||||
return { ok: true, conflict: false, output };
|
||||
}
|
||||
// A non-zero exit on merge most commonly means a conflict. Confirm by
|
||||
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
|
||||
// an unrelated failure as a conflict.
|
||||
const conflict = await this.hasUnmergedPaths();
|
||||
return { ok: false, conflict, output };
|
||||
}
|
||||
|
||||
/** True if the index has any unmerged (conflicted) paths. */
|
||||
private async hasUnmergedPaths(): Promise<boolean> {
|
||||
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
|
||||
return r.code === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* List tracked files on the current branch (paths relative to the vault
|
||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||
* the listing, e.g. `"*.md"`.
|
||||
*/
|
||||
async listTrackedFiles(glob?: string): Promise<string[]> {
|
||||
const args = ["ls-files"];
|
||||
if (glob) args.push(glob);
|
||||
const out = await this.run(args);
|
||||
if (out.length === 0) return [];
|
||||
return out.split("\n").filter((l) => l.length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
|
||||
*
|
||||
* cwd-isolation is this module's central safety guarantee: every git command
|
||||
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
|
||||
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
|
||||
* redirect the operation away from `cwd` (e.g. to the source repo or another
|
||||
* checkout), defeating that guarantee. So we always strip them, regardless of
|
||||
* whatever else the caller adds (author/committer identity, etc.).
|
||||
*/
|
||||
function vaultGitEnv(
|
||||
extra?: Record<string, string>,
|
||||
): NodeJS.ProcessEnv {
|
||||
const env: NodeJS.ProcessEnv = { ...process.env, ...extra };
|
||||
delete env.GIT_DIR;
|
||||
delete env.GIT_WORK_TREE;
|
||||
return env;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a commit message body with trailer lines appended (SPEC §7.3). The
|
||||
* trailers are separated from the subject by a blank line so `git interpret-
|
||||
* trailers` / `git log --format=%(trailers)` parse them as trailers.
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function buildCommitMessage(
|
||||
subject: string,
|
||||
trailers?: string[],
|
||||
): string {
|
||||
if (!trailers || trailers.length === 0) return subject;
|
||||
return `${subject}\n\n${trailers.join("\n")}`;
|
||||
}
|
||||
Reference in New Issue
Block a user