From f68168e3c152371d94195c1e1193953fcb066d8f Mon Sep 17 00:00:00 2001 From: vvzvlad Date: Wed, 17 Jun 2026 00:32:37 +0300 Subject: [PATCH] refactor(sync): unify git exec layer; fix non-ASCII paths + diagnostics (review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address a code review of the git-hardening changes. - single runRaw primitive: every git invocation funnels through it; run() is a thin throw+trim wrapper; the two direct execFileAsync bypasses (commitRaw, assertGitAvailable) removed; one unified error format - `-c core.quotepath=false` is now the argv baseline for ALL commands (was only listTrackedFiles) — removes the latent quoting asymmetry on ls-files -u / diff --name-only; persisted LOCAL config (autocrlf/safecrlf/gpgsign/ attributesFile) kept as-is in ensureRepo - preserve spawn-error message (ENOENT): use `||` not `??` (promisified execFile sets stderr to "" on spawn failure) - contextual error when pinning vault git config; module/vaultGitEnv docs corrected - README: require a system git binary on PATH for local runs - tests: --no-verify honored (failing pre-commit hook), vaultGitEnv pins, core.attributesFile=/dev/null neutralization (593 green) --- README.md | 4 +- src/git.ts | 138 +++++++++++++++++++++++++++++++---------------- test/git.test.ts | 74 +++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index f03d19c..0f78534 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,9 @@ This is an npm-workspaces monorepo: ## Install & build -Requires Node >= 20. +Requires Node >= 20. For local (non-Docker) runs a system `git` binary must +also be on `PATH` — the vault state store shells out to it for every operation +(the daemon fails fast at startup if `git` is missing). ```sh npm install # links the workspace packages diff --git a/src/git.ts b/src/git.ts index 3cb7645..c13912a 100644 --- a/src/git.ts +++ b/src/git.ts @@ -13,7 +13,10 @@ * - We shell out via `node:child_process` `execFile` (promisified), passing * ARGS AS AN ARRAY — no shell, so there is no command injection surface even * if a page title / branch name contains shell metacharacters. - * - Every invocation prepends `--no-pager` so git never blocks on a pager. + * - EVERY git invocation funnels through the single `runRaw` primitive, which + * ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git + * never blocks on a pager and always prints verbatim UTF-8 paths). There is + * no exception — even the `git --version` preflight goes through `runRaw`. * - "nothing to commit" is treated as a graceful no-op, not an error. */ import { execFile } from "node:child_process"; @@ -67,51 +70,69 @@ export class VaultGit { * with NO `cwd` (the vault dir may not exist yet at preflight time). */ async assertGitAvailable(): Promise { - try { - await execFileAsync("git", ["--version"], { env: vaultGitEnv() }); - } catch (err: unknown) { - const e = err as { message?: string }; + // Goes through the single `runRaw` primitive like every other invocation. + // `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at + // preflight time, so we must not point git at a missing directory. + const r = await this.runRaw(["--version"], { cwd: null }); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); throw new Error( "git binary not found or not runnable — install git (the vault state " + - `store requires it). Underlying error: ${(e.message ?? "").trim()}`, + `store requires it). Underlying error: ${detail}`, ); } } /** - * Run `git --no-pager ` in the vault. Returns trimmed stdout. - * Throws a clear Error (including stderr) on a non-zero exit. + * Run a git command in the vault and return trimmed stdout. THIN wrapper over + * the single `runRaw` primitive: throws a clear, unified Error (including + * stderr/stdout) on a non-zero exit. */ - private async run(args: string[]): Promise { - try { - const { stdout } = await execFileAsync("git", ["--no-pager", ...args], { - cwd: this.vaultPath, - // Generous buffer: `git status --porcelain` / file listings on a large - // vault can be sizable. - maxBuffer: 64 * 1024 * 1024, - env: vaultGitEnv(), - }); - return stdout.trim(); - } catch (err: unknown) { - const e = err as { stderr?: string; stdout?: string; message?: string }; - const detail = (e.stderr || e.stdout || e.message || "").toString().trim(); + private async run( + args: string[], + opts?: { cwd?: string | null; env?: Record }, + ): Promise { + const r = await this.runRaw(args, opts); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); throw new Error(`git ${args.join(" ")} failed: ${detail}`); } + return r.stdout.trim(); } /** - * Like `run`, but returns the full exit info instead of throwing on a - * non-zero exit. Used where a non-zero exit is an expected, meaningful state - * (e.g. a merge conflict, or a porcelain diff that "fails" deliberately). + * The ONE primitive every git invocation in this module flows through. Builds + * the full argv (`--no-pager -c core.quotepath=false `), env, cwd, and + * maxBuffer, runs git, and NEVER throws — it returns the exit info so callers + * can treat a non-zero exit as either an error (`run`) or a meaningful state + * (e.g. a merge conflict, a porcelain diff that "fails" deliberately). + * + * - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never + * blocks on a pager and always prints verbatim UTF-8 paths (no octal + * escaping/quoting). `quotepath=false` is the baseline for ALL path- + * printing commands (ls-files, diff --name-only, …). + * - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the + * vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`. + * - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras). + * - On a spawn/exec error we capture the error `message` too, so a failure + * before git could write to stderr (e.g. ENOENT) is NOT lost. */ private async runRaw( args: string[], + opts?: { cwd?: string | null; env?: Record }, ): Promise<{ code: number; stdout: string; stderr: string }> { + const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath); try { const { stdout, stderr } = await execFileAsync( "git", - ["--no-pager", ...args], - { cwd: this.vaultPath, maxBuffer: 64 * 1024 * 1024, env: vaultGitEnv() }, + ["--no-pager", "-c", "core.quotepath=false", ...args], + { + // Generous buffer: file listings / porcelain output on a large vault + // can be sizable. + ...(cwd !== undefined ? { cwd } : {}), + maxBuffer: 64 * 1024 * 1024, + env: vaultGitEnv(opts?.env), + }, ); return { code: 0, stdout, stderr }; } catch (err: unknown) { @@ -119,11 +140,15 @@ export class VaultGit { code?: number; stdout?: string; stderr?: string; + message?: string; }; return { code: typeof e.code === "number" ? e.code : 1, stdout: e.stdout ?? "", - stderr: e.stderr ?? "", + // Preserve the error message when there is no stderr (e.g. a spawn + // failure like ENOENT, where promisified execFile sets stderr to an + // EMPTY STRING — so `||`, not `??`, to fall through to `message`). + stderr: e.stderr || e.message || "", }; } } @@ -173,10 +198,24 @@ export class VaultGit { // that core.autocrlf=false does not cover). POSIX-only path, which is // fine: the daemon runs on Linux (Docker) / macOS. A system // /etc/gitattributes remains the host admin's domain (out of scope). - await this.run(["config", "core.autocrlf", "false"]); - await this.run(["config", "core.safecrlf", "false"]); - await this.run(["config", "commit.gpgsign", "false"]); - await this.run(["config", "core.attributesFile", "/dev/null"]); + // NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a + // human running git by hand in the vault must inherit the same neutralized + // behavior; a transient `-c` would not persist. (core.quotepath, by + // contrast, only affects OUR parsing of output and so is baked into the + // `runRaw` argv baseline instead.) + try { + await this.run(["config", "core.autocrlf", "false"]); + await this.run(["config", "core.safecrlf", "false"]); + await this.run(["config", "commit.gpgsign", "false"]); + await this.run(["config", "core.attributesFile", "/dev/null"]); + } catch (err: unknown) { + const detail = err instanceof Error ? err.message : String(err); + throw new Error( + `failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` + + "/.git/config is writable and not locked (e.g. stale config.lock): " + + detail, + ); + } // Create the initial empty commit on `main` if the repo has no commits yet, // so both `main` and (later) `docmost` branches have a common base. @@ -309,21 +348,21 @@ export class VaultGit { const args = ["commit", "--no-verify", "-m", fullMessage]; if (opts.allowEmpty) args.push("--allow-empty"); - await execFileAsync("git", ["--no-pager", ...args], { - cwd: this.vaultPath, - maxBuffer: 64 * 1024 * 1024, - env: vaultGitEnv({ + // Route through the single `runRaw` primitive; set author + committer + // identity via env vars (so the committer matches the author, not the repo + // default). Throw via the same unified message on a non-zero exit. + const r = await this.runRaw(args, { + env: { GIT_AUTHOR_NAME: opts.authorName, GIT_AUTHOR_EMAIL: opts.authorEmail, GIT_COMMITTER_NAME: opts.authorName, GIT_COMMITTER_EMAIL: opts.authorEmail, - }), - }).catch((err: unknown) => { - const e = err as { stderr?: string; message?: string }; - throw new Error( - `git commit failed: ${(e.stderr || e.message || "").toString().trim()}`, - ); + }, }); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ${args.join(" ")} failed: ${detail}`); + } } /** @@ -363,7 +402,9 @@ export class VaultGit { * returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`), * which `src/pull.ts` `readExisting` would then parse as garbage paths, * breaking move/duplicate detection. We defeat that two ways at once: - * - `-c core.quotepath=false` disables the octal-escape/quoting. + * - `core.quotepath=false` disables the octal-escape/quoting. It is now the + * `runRaw` argv baseline (prepended to EVERY invocation), so we no longer + * pass it inline here. * - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline * ambiguity), which we split on `\0`. * We read the RAW stdout (NOT the trimming `run()` helper, which would mangle @@ -371,11 +412,10 @@ export class VaultGit { * are returned verbatim — git already emits forward slashes. */ async listTrackedFiles(glob?: string): Promise { - const args = ["-c", "core.quotepath=false", "ls-files", "-z"]; - if (glob) args.push(glob); - const r = await this.runRaw(args); + const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]); if (r.code !== 0) { - throw new Error(`git ls-files failed: ${r.stderr.trim()}`); + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ls-files failed: ${detail}`); } return r.stdout.split("\0").filter((p) => p.length > 0); } @@ -383,6 +423,8 @@ export class VaultGit { /** * Build the environment for a vault git invocation (SPEC §12 cwd-isolation). + * Used by the single `runRaw` primitive every git command flows through, so + * these pins apply uniformly (including the `git --version` preflight). * * cwd-isolation is this module's central safety guarantee: every git command * MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An @@ -390,8 +432,10 @@ export class VaultGit { * redirect the operation away from `cwd` (e.g. to the source repo or another * checkout), defeating that guarantee. So we always strip them, regardless of * whatever else the caller adds (author/committer identity, etc.). + * + * Exported for unit testing. */ -function vaultGitEnv( +export function vaultGitEnv( extra?: Record, ): NodeJS.ProcessEnv { const env: NodeJS.ProcessEnv = { diff --git a/test/git.test.ts b/test/git.test.ts index a8a2e3a..50ed88c 100644 --- a/test/git.test.ts +++ b/test/git.test.ts @@ -4,11 +4,13 @@ import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { promisify } from 'node:util'; import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { chmod } from 'node:fs/promises'; import { VaultGit, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, buildCommitMessage, + vaultGitEnv, } from '../src/git.js'; const execFileAsync = promisify(execFile); @@ -56,6 +58,39 @@ describe('buildCommitMessage (pure)', () => { }); }); +describe('vaultGitEnv (pure)', () => { + it('pins locale, pager and prompt, and strips GIT_DIR/GIT_WORK_TREE', () => { + // Seed inputs that MUST be neutralized/stripped: a redirecting GIT_DIR and + // GIT_WORK_TREE would defeat the cwd-isolation guarantee (SPEC §12). + process.env.GIT_DIR = '/somewhere/else/.git'; + process.env.GIT_WORK_TREE = '/somewhere/else'; + try { + const env = vaultGitEnv(); + // Locale-independent output. + expect(env.LC_ALL).toBe('C'); + expect(env.LANG).toBe('C'); + // Never page, never block on an interactive prompt. + expect(env.GIT_PAGER).toBe('cat'); + expect(env.GIT_TERMINAL_PROMPT).toBe('0'); + // The redirecting vars are removed regardless of what process.env held. + expect(env.GIT_DIR).toBeUndefined(); + expect(env.GIT_WORK_TREE).toBeUndefined(); + } finally { + delete process.env.GIT_DIR; + delete process.env.GIT_WORK_TREE; + } + }); + + it('passes through caller extras (e.g. author/committer identity)', () => { + const env = vaultGitEnv({ GIT_AUTHOR_NAME: 'X', GIT_AUTHOR_EMAIL: 'x@y' }); + expect(env.GIT_AUTHOR_NAME).toBe('X'); + expect(env.GIT_AUTHOR_EMAIL).toBe('x@y'); + // Still strips the redirecting vars even with extras present. + expect(env.GIT_DIR).toBeUndefined(); + expect(env.GIT_WORK_TREE).toBeUndefined(); + }); +}); + describe('VaultGit (integration; temp repo)', () => { let available = false; let dir: string; @@ -236,6 +271,45 @@ describe('VaultGit (integration; temp repo)', () => { expect(count.trim()).toBe('1'); }); + it('commit honors --no-verify (a failing pre-commit hook does not block it)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Commit count BEFORE: just the init commit. + const countBefore = async (): Promise => { + const { stdout } = await execFileAsync( + 'git', + ['rev-list', '--count', 'HEAD'], + { cwd: vault }, + ); + return Number(stdout.trim()); + }; + const before = await countBefore(); + + // Install an EXECUTABLE pre-commit hook that always fails. Without + // `--no-verify`, `git commit` would run it, the hook would `exit 1`, and the + // commit would be ABORTED. So this test fails (no commit created, made !== + // true) the moment `--no-verify` is removed from commitRaw. + const hookPath = join(vault, '.git', 'hooks', 'pre-commit'); + await writeFile(hookPath, '#!/bin/sh\nexit 1\n', 'utf8'); + await chmod(hookPath, 0o755); + + await writeFile(join(vault, 'hooked.md'), 'content\n', 'utf8'); + await git.stageAll(); + const made = await git.commit('commit past a failing hook', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: ['Docmost-Sync-Source: docmost'], + }); + + // The commit was reported made AND actually landed (HEAD advanced by one). + expect(made).toBe(true); + expect(await countBefore()).toBe(before + 1); + expect(await headMessage(vault)).toContain('commit past a failing hook'); + }); + it('merge fast-forwards main to docmost', async () => { if (!available) return; const vault = await freshDir();