fix(sync): robust git coupling — non-ASCII paths, config neutralization, runtime git
Address git-integration fragility (output is not parsed for control flow; we rely on exit codes + plumbing — but porcelain BEHAVIOR is config-sensitive, and the runtime image lacked git). - listTrackedFiles: `git -c core.quotepath=false ls-files -z` + NUL split — fixes Cyrillic/UTF-8 vault filenames being returned octal-escaped/quoted - Dockerfile: install git (node:22-slim ships none; the daemon shells out at runtime) - VaultGit env: LC_ALL=C/LANG=C, GIT_PAGER=cat, GIT_TERMINAL_PROMPT=0; keep stripping GIT_DIR/GIT_WORK_TREE (cwd-isolation, §12) - ensureRepo local config: core.autocrlf=false + core.safecrlf=false (protect §11 byte-stability from a global autocrlf=true), commit.gpgsign=false, and core.attributesFile=/dev/null (neutralize a global clean/smudge filter that would rewrite the stored blob); commit uses --no-verify (skip injected hooks) - assertGitAvailable() preflight: clear error if the git binary is missing - tests: Cyrillic listTrackedFiles, LF byte-preservation of the stored blob, local-config neutralization incl. attributesFile (590+ green)
This commit is contained in:
@@ -2,6 +2,13 @@ FROM node:22-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# The daemon shells out to the system `git` binary at runtime (git is the vault
|
||||
# state store), but node:22-slim does NOT ship git. Install it and KEEP it in
|
||||
# the final image. Placed before `npm ci` so this layer caches across rebuilds.
|
||||
# `npm prune --omit=dev` below cannot remove this — it is an OS package, not an
|
||||
# npm dependency.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Dependencies first (better layer caching): copy the root manifest, the lock,
|
||||
# and the workspace package manifest so `npm ci` can link the workspace.
|
||||
COPY package.json package-lock.json ./
|
||||
|
||||
88
src/git.ts
88
src/git.ts
@@ -58,6 +58,26 @@ export interface CommitOptions {
|
||||
export class VaultGit {
|
||||
constructor(private readonly vaultPath: string) {}
|
||||
|
||||
/**
|
||||
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
|
||||
* to system `git` for every vault operation, so a missing binary (e.g. a slim
|
||||
* container image without git) must fail fast with an actionable message
|
||||
* rather than a cryptic ENOENT deep inside the first real git call. Presence
|
||||
* check only — we do NOT gate on a specific version. Runs `git --version`
|
||||
* with NO `cwd` (the vault dir may not exist yet at preflight time).
|
||||
*/
|
||||
async assertGitAvailable(): Promise<void> {
|
||||
try {
|
||||
await execFileAsync("git", ["--version"], { env: vaultGitEnv() });
|
||||
} catch (err: unknown) {
|
||||
const e = err as { message?: string };
|
||||
throw new Error(
|
||||
"git binary not found or not runnable — install git (the vault state " +
|
||||
`store requires it). Underlying error: ${(e.message ?? "").trim()}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `git --no-pager <args...>` in the vault. Returns trimmed stdout.
|
||||
* Throws a clear Error (including stderr) on a non-zero exit.
|
||||
@@ -132,6 +152,32 @@ export class VaultGit {
|
||||
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
||||
}
|
||||
|
||||
// Neutralize correctness-affecting git config in the vault's LOCAL config so
|
||||
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
|
||||
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
|
||||
// values (which override global/system) are the right scope. Set
|
||||
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
|
||||
// writes to `--local` by default inside the repo. These MUST be in place
|
||||
// before any add/commit/checkout that could be affected, hence they run
|
||||
// before the initial-commit block below.
|
||||
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
|
||||
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
|
||||
// byte-stable markdown churn and breaking the round-trip invariant.
|
||||
// `false` guarantees git stores/checks out verbatim bytes.
|
||||
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
|
||||
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
|
||||
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
|
||||
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
|
||||
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
|
||||
// cannot rewrite the STORED blob and break §11 byte-stability (a config
|
||||
// that core.autocrlf=false does not cover). POSIX-only path, which is
|
||||
// fine: the daemon runs on Linux (Docker) / macOS. A system
|
||||
// /etc/gitattributes remains the host admin's domain (out of scope).
|
||||
await this.run(["config", "core.autocrlf", "false"]);
|
||||
await this.run(["config", "core.safecrlf", "false"]);
|
||||
await this.run(["config", "commit.gpgsign", "false"]);
|
||||
await this.run(["config", "core.attributesFile", "/dev/null"]);
|
||||
|
||||
// Create the initial empty commit on `main` if the repo has no commits yet,
|
||||
// so both `main` and (later) `docmost` branches have a common base.
|
||||
if (!(await this.hasAnyCommit())) {
|
||||
@@ -257,7 +303,10 @@ export class VaultGit {
|
||||
opts: CommitOptions & { allowEmpty?: boolean },
|
||||
): Promise<void> {
|
||||
const fullMessage = buildCommitMessage(message, opts.trailers);
|
||||
const args = ["commit", "-m", fullMessage];
|
||||
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
|
||||
// (or any injected hook) must never interfere with engine commits in our
|
||||
// dedicated vault repo.
|
||||
const args = ["commit", "--no-verify", "-m", fullMessage];
|
||||
if (opts.allowEmpty) args.push("--allow-empty");
|
||||
|
||||
await execFileAsync("git", ["--no-pager", ...args], {
|
||||
@@ -308,13 +357,27 @@ export class VaultGit {
|
||||
* List tracked files on the current branch (paths relative to the vault
|
||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||
* the listing, e.g. `"*.md"`.
|
||||
*
|
||||
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
|
||||
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
|
||||
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
|
||||
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
|
||||
* breaking move/duplicate detection. We defeat that two ways at once:
|
||||
* - `-c core.quotepath=false` disables the octal-escape/quoting.
|
||||
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
|
||||
* ambiguity), which we split on `\0`.
|
||||
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
|
||||
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
|
||||
* are returned verbatim — git already emits forward slashes.
|
||||
*/
|
||||
async listTrackedFiles(glob?: string): Promise<string[]> {
|
||||
const args = ["ls-files"];
|
||||
const args = ["-c", "core.quotepath=false", "ls-files", "-z"];
|
||||
if (glob) args.push(glob);
|
||||
const out = await this.run(args);
|
||||
if (out.length === 0) return [];
|
||||
return out.split("\n").filter((l) => l.length > 0);
|
||||
const r = await this.runRaw(args);
|
||||
if (r.code !== 0) {
|
||||
throw new Error(`git ls-files failed: ${r.stderr.trim()}`);
|
||||
}
|
||||
return r.stdout.split("\0").filter((p) => p.length > 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -331,7 +394,20 @@ export class VaultGit {
|
||||
function vaultGitEnv(
|
||||
extra?: Record<string, string>,
|
||||
): NodeJS.ProcessEnv {
|
||||
const env: NodeJS.ProcessEnv = { ...process.env, ...extra };
|
||||
const env: NodeJS.ProcessEnv = {
|
||||
...process.env,
|
||||
// Locale-independent output (defense in depth). We never parse localized
|
||||
// prose, but pinning the locale prevents a future regression where some
|
||||
// git message we DO key on is translated by an inherited LC_ALL/LANG.
|
||||
LC_ALL: "C",
|
||||
LANG: "C",
|
||||
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
|
||||
// bite) and never block on an interactive prompt (e.g. credentials) — the
|
||||
// daemon runs unattended and must not hang.
|
||||
GIT_PAGER: "cat",
|
||||
GIT_TERMINAL_PROMPT: "0",
|
||||
...extra,
|
||||
};
|
||||
delete env.GIT_DIR;
|
||||
delete env.GIT_WORK_TREE;
|
||||
return env;
|
||||
|
||||
@@ -119,6 +119,9 @@ async function main(): Promise<void> {
|
||||
// 1. Ensure the vault git repo exists with main + an initial commit, and the
|
||||
// engine-only `docmost` branch exists, branched from main.
|
||||
const git = new VaultGit(vaultRoot);
|
||||
// Preflight: fail fast (with an actionable message via main().catch) if the
|
||||
// git binary is missing — the entire vault state store relies on it.
|
||||
await git.assertGitAvailable();
|
||||
await git.ensureRepo();
|
||||
|
||||
// 1b. Refuse to run on top of an unresolved merge (SPEC §9 / §12). A previous
|
||||
|
||||
112
test/git.test.ts
112
test/git.test.ts
@@ -1,5 +1,5 @@
|
||||
import { execFile } from 'node:child_process';
|
||||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { promisify } from 'node:util';
|
||||
@@ -106,6 +106,67 @@ describe('VaultGit (integration; temp repo)', () => {
|
||||
expect(count.trim()).toBe('1');
|
||||
});
|
||||
|
||||
it('ensureRepo neutralizes correctness-affecting LOCAL config', async () => {
|
||||
if (!available) return;
|
||||
const vault = await freshDir();
|
||||
const git = new VaultGit(vault);
|
||||
await git.ensureRepo();
|
||||
|
||||
// These LOCAL values neutralize a hostile GLOBAL/system config that would
|
||||
// otherwise change porcelain BEHAVIOR and corrupt the vault (SPEC §11 for
|
||||
// core.autocrlf; gpgsign/safecrlf for the headless daemon).
|
||||
const localConfig = async (key: string): Promise<string> => {
|
||||
const { stdout } = await execFileAsync(
|
||||
'git',
|
||||
['config', '--local', '--get', key],
|
||||
{ cwd: vault },
|
||||
);
|
||||
return stdout.trim();
|
||||
};
|
||||
expect(await localConfig('core.autocrlf')).toBe('false');
|
||||
expect(await localConfig('commit.gpgsign')).toBe('false');
|
||||
expect(await localConfig('core.safecrlf')).toBe('false');
|
||||
expect(await localConfig('core.attributesFile')).toBe('/dev/null');
|
||||
|
||||
// Idempotent: a second run leaves the same single values (no duplicates).
|
||||
await git.ensureRepo();
|
||||
expect(await localConfig('core.autocrlf')).toBe('false');
|
||||
expect(await localConfig('commit.gpgsign')).toBe('false');
|
||||
expect(await localConfig('core.safecrlf')).toBe('false');
|
||||
});
|
||||
|
||||
it('preserves LF bytes verbatim on commit (SPEC §11: autocrlf=false)', async () => {
|
||||
if (!available) return;
|
||||
const vault = await freshDir();
|
||||
const git = new VaultGit(vault);
|
||||
await git.ensureRepo();
|
||||
|
||||
// Write content with explicit LF line endings. With a hostile
|
||||
// core.autocrlf=true git would translate these to CRLF in the stored blob,
|
||||
// breaking the byte-stable round-trip invariant. ensureRepo pins
|
||||
// core.autocrlf=false locally, so the stored bytes must round-trip exactly.
|
||||
const fileName = 'lf.md';
|
||||
const content = 'line1\nline2\nline3\n';
|
||||
await writeFile(join(vault, fileName), content, 'utf8');
|
||||
await git.stageAll();
|
||||
const made = await git.commit('add LF file', {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
});
|
||||
expect(made).toBe(true);
|
||||
|
||||
// Read the STORED blob (not the worktree file) and assert verbatim bytes:
|
||||
// still LF-only, no CRLF translation.
|
||||
const { stdout: stored } = await execFileAsync(
|
||||
'git',
|
||||
['--no-pager', 'show', `HEAD:${fileName}`],
|
||||
{ cwd: vault, encoding: 'buffer' },
|
||||
);
|
||||
const storedBuf = stored as unknown as Buffer;
|
||||
expect(storedBuf.includes(Buffer.from('\r\n'))).toBe(false);
|
||||
expect(storedBuf.toString('utf8')).toBe(content);
|
||||
});
|
||||
|
||||
it('ensureBranch creates the docmost branch from main', async () => {
|
||||
if (!available) return;
|
||||
const vault = await freshDir();
|
||||
@@ -289,4 +350,53 @@ describe('VaultGit (integration; temp repo)', () => {
|
||||
const all = await git.listTrackedFiles();
|
||||
expect(new Set(all)).toEqual(new Set(['keep.md', 'note.txt']));
|
||||
});
|
||||
|
||||
it('listTrackedFiles returns RAW UTF-8 Cyrillic paths (not octal-escaped/quoted)', async () => {
|
||||
if (!available) return;
|
||||
const vault = await freshDir();
|
||||
const git = new VaultGit(vault);
|
||||
await git.ensureRepo();
|
||||
|
||||
// The target wiki is Russian, so file names contain Cyrillic. With git's
|
||||
// DEFAULT core.quotepath=true these come back as `"\320\232..."` from
|
||||
// ls-files; `listTrackedFiles` must return them verbatim as UTF-8.
|
||||
const topName = 'Колонка.md';
|
||||
const nestedDir = 'Раздел';
|
||||
const nestedName = 'Подстраница.md';
|
||||
await writeFile(join(vault, topName), 'top\n', 'utf8');
|
||||
await mkdir(join(vault, nestedDir), { recursive: true });
|
||||
await writeFile(join(vault, nestedDir, nestedName), 'nested\n', 'utf8');
|
||||
await git.stageAll();
|
||||
await git.commit('add cyrillic files', {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
});
|
||||
|
||||
const md = await git.listTrackedFiles('*.md');
|
||||
// Exact UTF-8 names, forward-slash separated for the nested one — NOT an
|
||||
// escaped/quoted form like `"\320\232..."`.
|
||||
expect(new Set(md)).toEqual(
|
||||
new Set([topName, `${nestedDir}/${nestedName}`]),
|
||||
);
|
||||
// Guard explicitly against the quotepath regression: no entry is quoted or
|
||||
// contains a backslash escape sequence.
|
||||
for (const p of md) {
|
||||
expect(p.startsWith('"')).toBe(false);
|
||||
expect(p.includes('\\')).toBe(false);
|
||||
}
|
||||
|
||||
// No-glob listing also returns the raw Cyrillic names.
|
||||
const all = await git.listTrackedFiles();
|
||||
expect(all).toContain(topName);
|
||||
expect(all).toContain(`${nestedDir}/${nestedName}`);
|
||||
});
|
||||
|
||||
it('assertGitAvailable resolves when git is present', async () => {
|
||||
if (!available) return;
|
||||
const vault = await freshDir();
|
||||
const git = new VaultGit(vault);
|
||||
// No repo needed: it only probes `git --version` (and the vault dir need
|
||||
// not even exist yet).
|
||||
await expect(git.assertGitAvailable()).resolves.toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user