refactor(sync): unify git exec layer; fix non-ASCII paths + diagnostics (review)

Address a code review of the git-hardening changes.

- single runRaw primitive: every git invocation funnels through it; run() is a
  thin throw+trim wrapper; the two direct execFileAsync bypasses (commitRaw,
  assertGitAvailable) removed; one unified error format
- `-c core.quotepath=false` is now the argv baseline for ALL commands (was only
  listTrackedFiles) — removes the latent quoting asymmetry on ls-files -u /
  diff --name-only; persisted LOCAL config (autocrlf/safecrlf/gpgsign/
  attributesFile) kept as-is in ensureRepo
- preserve spawn-error message (ENOENT): use `||` not `??` (promisified execFile
  sets stderr to "" on spawn failure)
- contextual error when pinning vault git config; module/vaultGitEnv docs corrected
- README: require a system git binary on PATH for local runs
- tests: --no-verify honored (failing pre-commit hook), vaultGitEnv pins,
  core.attributesFile=/dev/null neutralization (593 green)
This commit is contained in:
vvzvlad
2026-06-17 00:32:37 +03:00
parent ec0a3d47c7
commit f68168e3c1
3 changed files with 168 additions and 48 deletions

View File

@@ -28,7 +28,9 @@ This is an npm-workspaces monorepo:
## Install & build
Requires Node >= 20.
Requires Node >= 20. For local (non-Docker) runs a system `git` binary must
also be on `PATH` — the vault state store shells out to it for every operation
(the daemon fails fast at startup if `git` is missing).
```sh
npm install # links the workspace packages

View File

@@ -13,7 +13,10 @@
* - We shell out via `node:child_process` `execFile` (promisified), passing
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
* if a page title / branch name contains shell metacharacters.
* - Every invocation prepends `--no-pager` so git never blocks on a pager.
* - EVERY git invocation funnels through the single `runRaw` primitive, which
* ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git
* never blocks on a pager and always prints verbatim UTF-8 paths). There is
* no exception — even the `git --version` preflight goes through `runRaw`.
* - "nothing to commit" is treated as a graceful no-op, not an error.
*/
import { execFile } from "node:child_process";
@@ -67,51 +70,69 @@ export class VaultGit {
* with NO `cwd` (the vault dir may not exist yet at preflight time).
*/
async assertGitAvailable(): Promise<void> {
try {
await execFileAsync("git", ["--version"], { env: vaultGitEnv() });
} catch (err: unknown) {
const e = err as { message?: string };
// Goes through the single `runRaw` primitive like every other invocation.
// `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at
// preflight time, so we must not point git at a missing directory.
const r = await this.runRaw(["--version"], { cwd: null });
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(
"git binary not found or not runnable — install git (the vault state " +
`store requires it). Underlying error: ${(e.message ?? "").trim()}`,
`store requires it). Underlying error: ${detail}`,
);
}
}
/**
* Run `git --no-pager <args...>` in the vault. Returns trimmed stdout.
* Throws a clear Error (including stderr) on a non-zero exit.
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
* the single `runRaw` primitive: throws a clear, unified Error (including
* stderr/stdout) on a non-zero exit.
*/
private async run(args: string[]): Promise<string> {
try {
const { stdout } = await execFileAsync("git", ["--no-pager", ...args], {
cwd: this.vaultPath,
// Generous buffer: `git status --porcelain` / file listings on a large
// vault can be sizable.
maxBuffer: 64 * 1024 * 1024,
env: vaultGitEnv(),
});
return stdout.trim();
} catch (err: unknown) {
const e = err as { stderr?: string; stdout?: string; message?: string };
const detail = (e.stderr || e.stdout || e.message || "").toString().trim();
private async run(
args: string[],
opts?: { cwd?: string | null; env?: Record<string, string> },
): Promise<string> {
const r = await this.runRaw(args, opts);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
return r.stdout.trim();
}
/**
* Like `run`, but returns the full exit info instead of throwing on a
* non-zero exit. Used where a non-zero exit is an expected, meaningful state
* (e.g. a merge conflict, or a porcelain diff that "fails" deliberately).
* The ONE primitive every git invocation in this module flows through. Builds
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
* can treat a non-zero exit as either an error (`run`) or a meaningful state
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
*
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
* printing commands (ls-files, diff --name-only, …).
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
* - On a spawn/exec error we capture the error `message` too, so a failure
* before git could write to stderr (e.g. ENOENT) is NOT lost.
*/
private async runRaw(
args: string[],
opts?: { cwd?: string | null; env?: Record<string, string> },
): Promise<{ code: number; stdout: string; stderr: string }> {
const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath);
try {
const { stdout, stderr } = await execFileAsync(
"git",
["--no-pager", ...args],
{ cwd: this.vaultPath, maxBuffer: 64 * 1024 * 1024, env: vaultGitEnv() },
["--no-pager", "-c", "core.quotepath=false", ...args],
{
// Generous buffer: file listings / porcelain output on a large vault
// can be sizable.
...(cwd !== undefined ? { cwd } : {}),
maxBuffer: 64 * 1024 * 1024,
env: vaultGitEnv(opts?.env),
},
);
return { code: 0, stdout, stderr };
} catch (err: unknown) {
@@ -119,11 +140,15 @@ export class VaultGit {
code?: number;
stdout?: string;
stderr?: string;
message?: string;
};
return {
code: typeof e.code === "number" ? e.code : 1,
stdout: e.stdout ?? "",
stderr: e.stderr ?? "",
// Preserve the error message when there is no stderr (e.g. a spawn
// failure like ENOENT, where promisified execFile sets stderr to an
// EMPTY STRING — so `||`, not `??`, to fall through to `message`).
stderr: e.stderr || e.message || "",
};
}
}
@@ -173,10 +198,24 @@ export class VaultGit {
// that core.autocrlf=false does not cover). POSIX-only path, which is
// fine: the daemon runs on Linux (Docker) / macOS. A system
// /etc/gitattributes remains the host admin's domain (out of scope).
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
// human running git by hand in the vault must inherit the same neutralized
// behavior; a transient `-c` would not persist. (core.quotepath, by
// contrast, only affects OUR parsing of output and so is baked into the
// `runRaw` argv baseline instead.)
try {
await this.run(["config", "core.autocrlf", "false"]);
await this.run(["config", "core.safecrlf", "false"]);
await this.run(["config", "commit.gpgsign", "false"]);
await this.run(["config", "core.attributesFile", "/dev/null"]);
} catch (err: unknown) {
const detail = err instanceof Error ? err.message : String(err);
throw new Error(
`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` +
"/.git/config is writable and not locked (e.g. stale config.lock): " +
detail,
);
}
// Create the initial empty commit on `main` if the repo has no commits yet,
// so both `main` and (later) `docmost` branches have a common base.
@@ -309,21 +348,21 @@ export class VaultGit {
const args = ["commit", "--no-verify", "-m", fullMessage];
if (opts.allowEmpty) args.push("--allow-empty");
await execFileAsync("git", ["--no-pager", ...args], {
cwd: this.vaultPath,
maxBuffer: 64 * 1024 * 1024,
env: vaultGitEnv({
// Route through the single `runRaw` primitive; set author + committer
// identity via env vars (so the committer matches the author, not the repo
// default). Throw via the same unified message on a non-zero exit.
const r = await this.runRaw(args, {
env: {
GIT_AUTHOR_NAME: opts.authorName,
GIT_AUTHOR_EMAIL: opts.authorEmail,
GIT_COMMITTER_NAME: opts.authorName,
GIT_COMMITTER_EMAIL: opts.authorEmail,
}),
}).catch((err: unknown) => {
const e = err as { stderr?: string; message?: string };
throw new Error(
`git commit failed: ${(e.stderr || e.message || "").toString().trim()}`,
);
},
});
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
}
/**
@@ -363,7 +402,9 @@ export class VaultGit {
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
* breaking move/duplicate detection. We defeat that two ways at once:
* - `-c core.quotepath=false` disables the octal-escape/quoting.
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
* pass it inline here.
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
* ambiguity), which we split on `\0`.
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
@@ -371,11 +412,10 @@ export class VaultGit {
* are returned verbatim — git already emits forward slashes.
*/
async listTrackedFiles(glob?: string): Promise<string[]> {
const args = ["-c", "core.quotepath=false", "ls-files", "-z"];
if (glob) args.push(glob);
const r = await this.runRaw(args);
const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]);
if (r.code !== 0) {
throw new Error(`git ls-files failed: ${r.stderr.trim()}`);
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ls-files failed: ${detail}`);
}
return r.stdout.split("\0").filter((p) => p.length > 0);
}
@@ -383,6 +423,8 @@ export class VaultGit {
/**
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
* Used by the single `runRaw` primitive every git command flows through, so
* these pins apply uniformly (including the `git --version` preflight).
*
* cwd-isolation is this module's central safety guarantee: every git command
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
@@ -390,8 +432,10 @@ export class VaultGit {
* redirect the operation away from `cwd` (e.g. to the source repo or another
* checkout), defeating that guarantee. So we always strip them, regardless of
* whatever else the caller adds (author/committer identity, etc.).
*
* Exported for unit testing.
*/
function vaultGitEnv(
export function vaultGitEnv(
extra?: Record<string, string>,
): NodeJS.ProcessEnv {
const env: NodeJS.ProcessEnv = {

View File

@@ -4,11 +4,13 @@ import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { promisify } from 'node:util';
import { afterEach, beforeAll, describe, expect, it } from 'vitest';
import { chmod } from 'node:fs/promises';
import {
VaultGit,
BOT_AUTHOR_NAME,
BOT_AUTHOR_EMAIL,
buildCommitMessage,
vaultGitEnv,
} from '../src/git.js';
const execFileAsync = promisify(execFile);
@@ -56,6 +58,39 @@ describe('buildCommitMessage (pure)', () => {
});
});
describe('vaultGitEnv (pure)', () => {
it('pins locale, pager and prompt, and strips GIT_DIR/GIT_WORK_TREE', () => {
// Seed inputs that MUST be neutralized/stripped: a redirecting GIT_DIR and
// GIT_WORK_TREE would defeat the cwd-isolation guarantee (SPEC §12).
process.env.GIT_DIR = '/somewhere/else/.git';
process.env.GIT_WORK_TREE = '/somewhere/else';
try {
const env = vaultGitEnv();
// Locale-independent output.
expect(env.LC_ALL).toBe('C');
expect(env.LANG).toBe('C');
// Never page, never block on an interactive prompt.
expect(env.GIT_PAGER).toBe('cat');
expect(env.GIT_TERMINAL_PROMPT).toBe('0');
// The redirecting vars are removed regardless of what process.env held.
expect(env.GIT_DIR).toBeUndefined();
expect(env.GIT_WORK_TREE).toBeUndefined();
} finally {
delete process.env.GIT_DIR;
delete process.env.GIT_WORK_TREE;
}
});
it('passes through caller extras (e.g. author/committer identity)', () => {
const env = vaultGitEnv({ GIT_AUTHOR_NAME: 'X', GIT_AUTHOR_EMAIL: 'x@y' });
expect(env.GIT_AUTHOR_NAME).toBe('X');
expect(env.GIT_AUTHOR_EMAIL).toBe('x@y');
// Still strips the redirecting vars even with extras present.
expect(env.GIT_DIR).toBeUndefined();
expect(env.GIT_WORK_TREE).toBeUndefined();
});
});
describe('VaultGit (integration; temp repo)', () => {
let available = false;
let dir: string;
@@ -236,6 +271,45 @@ describe('VaultGit (integration; temp repo)', () => {
expect(count.trim()).toBe('1');
});
it('commit honors --no-verify (a failing pre-commit hook does not block it)', async () => {
if (!available) return;
const vault = await freshDir();
const git = new VaultGit(vault);
await git.ensureRepo();
// Commit count BEFORE: just the init commit.
const countBefore = async (): Promise<number> => {
const { stdout } = await execFileAsync(
'git',
['rev-list', '--count', 'HEAD'],
{ cwd: vault },
);
return Number(stdout.trim());
};
const before = await countBefore();
// Install an EXECUTABLE pre-commit hook that always fails. Without
// `--no-verify`, `git commit` would run it, the hook would `exit 1`, and the
// commit would be ABORTED. So this test fails (no commit created, made !==
// true) the moment `--no-verify` is removed from commitRaw.
const hookPath = join(vault, '.git', 'hooks', 'pre-commit');
await writeFile(hookPath, '#!/bin/sh\nexit 1\n', 'utf8');
await chmod(hookPath, 0o755);
await writeFile(join(vault, 'hooked.md'), 'content\n', 'utf8');
await git.stageAll();
const made = await git.commit('commit past a failing hook', {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: ['Docmost-Sync-Source: docmost'],
});
// The commit was reported made AND actually landed (HEAD advanced by one).
expect(made).toBe(true);
expect(await countBefore()).toBe(before + 1);
expect(await headMessage(vault)).toContain('commit past a failing hook');
});
it('merge fast-forwards main to docmost', async () => {
if (!available) return;
const vault = await freshDir();