fix(sync): robust git coupling — non-ASCII paths, config neutralization, runtime git
Address git-integration fragility (output is not parsed for control flow; we rely on exit codes + plumbing — but porcelain BEHAVIOR is config-sensitive, and the runtime image lacked git). - listTrackedFiles: `git -c core.quotepath=false ls-files -z` + NUL split — fixes Cyrillic/UTF-8 vault filenames being returned octal-escaped/quoted - Dockerfile: install git (node:22-slim ships none; the daemon shells out at runtime) - VaultGit env: LC_ALL=C/LANG=C, GIT_PAGER=cat, GIT_TERMINAL_PROMPT=0; keep stripping GIT_DIR/GIT_WORK_TREE (cwd-isolation, §12) - ensureRepo local config: core.autocrlf=false + core.safecrlf=false (protect §11 byte-stability from a global autocrlf=true), commit.gpgsign=false, and core.attributesFile=/dev/null (neutralize a global clean/smudge filter that would rewrite the stored blob); commit uses --no-verify (skip injected hooks) - assertGitAvailable() preflight: clear error if the git binary is missing - tests: Cyrillic listTrackedFiles, LF byte-preservation of the stored blob, local-config neutralization incl. attributesFile (590+ green)
This commit is contained in:
@@ -2,6 +2,13 @@ FROM node:22-slim
|
|||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# The daemon shells out to the system `git` binary at runtime (git is the vault
|
||||||
|
# state store), but node:22-slim does NOT ship git. Install it and KEEP it in
|
||||||
|
# the final image. Placed before `npm ci` so this layer caches across rebuilds.
|
||||||
|
# `npm prune --omit=dev` below cannot remove this — it is an OS package, not an
|
||||||
|
# npm dependency.
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Dependencies first (better layer caching): copy the root manifest, the lock,
|
# Dependencies first (better layer caching): copy the root manifest, the lock,
|
||||||
# and the workspace package manifest so `npm ci` can link the workspace.
|
# and the workspace package manifest so `npm ci` can link the workspace.
|
||||||
COPY package.json package-lock.json ./
|
COPY package.json package-lock.json ./
|
||||||
|
|||||||
88
src/git.ts
88
src/git.ts
@@ -58,6 +58,26 @@ export interface CommitOptions {
|
|||||||
export class VaultGit {
|
export class VaultGit {
|
||||||
constructor(private readonly vaultPath: string) {}
|
constructor(private readonly vaultPath: string) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
|
||||||
|
* to system `git` for every vault operation, so a missing binary (e.g. a slim
|
||||||
|
* container image without git) must fail fast with an actionable message
|
||||||
|
* rather than a cryptic ENOENT deep inside the first real git call. Presence
|
||||||
|
* check only — we do NOT gate on a specific version. Runs `git --version`
|
||||||
|
* with NO `cwd` (the vault dir may not exist yet at preflight time).
|
||||||
|
*/
|
||||||
|
async assertGitAvailable(): Promise<void> {
|
||||||
|
try {
|
||||||
|
await execFileAsync("git", ["--version"], { env: vaultGitEnv() });
|
||||||
|
} catch (err: unknown) {
|
||||||
|
const e = err as { message?: string };
|
||||||
|
throw new Error(
|
||||||
|
"git binary not found or not runnable — install git (the vault state " +
|
||||||
|
`store requires it). Underlying error: ${(e.message ?? "").trim()}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run `git --no-pager <args...>` in the vault. Returns trimmed stdout.
|
* Run `git --no-pager <args...>` in the vault. Returns trimmed stdout.
|
||||||
* Throws a clear Error (including stderr) on a non-zero exit.
|
* Throws a clear Error (including stderr) on a non-zero exit.
|
||||||
@@ -132,6 +152,32 @@ export class VaultGit {
|
|||||||
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Neutralize correctness-affecting git config in the vault's LOCAL config so
|
||||||
|
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
|
||||||
|
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
|
||||||
|
// values (which override global/system) are the right scope. Set
|
||||||
|
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
|
||||||
|
// writes to `--local` by default inside the repo. These MUST be in place
|
||||||
|
// before any add/commit/checkout that could be affected, hence they run
|
||||||
|
// before the initial-commit block below.
|
||||||
|
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
|
||||||
|
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
|
||||||
|
// byte-stable markdown churn and breaking the round-trip invariant.
|
||||||
|
// `false` guarantees git stores/checks out verbatim bytes.
|
||||||
|
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
|
||||||
|
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
|
||||||
|
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
|
||||||
|
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
|
||||||
|
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
|
||||||
|
// cannot rewrite the STORED blob and break §11 byte-stability (a config
|
||||||
|
// that core.autocrlf=false does not cover). POSIX-only path, which is
|
||||||
|
// fine: the daemon runs on Linux (Docker) / macOS. A system
|
||||||
|
// /etc/gitattributes remains the host admin's domain (out of scope).
|
||||||
|
await this.run(["config", "core.autocrlf", "false"]);
|
||||||
|
await this.run(["config", "core.safecrlf", "false"]);
|
||||||
|
await this.run(["config", "commit.gpgsign", "false"]);
|
||||||
|
await this.run(["config", "core.attributesFile", "/dev/null"]);
|
||||||
|
|
||||||
// Create the initial empty commit on `main` if the repo has no commits yet,
|
// Create the initial empty commit on `main` if the repo has no commits yet,
|
||||||
// so both `main` and (later) `docmost` branches have a common base.
|
// so both `main` and (later) `docmost` branches have a common base.
|
||||||
if (!(await this.hasAnyCommit())) {
|
if (!(await this.hasAnyCommit())) {
|
||||||
@@ -257,7 +303,10 @@ export class VaultGit {
|
|||||||
opts: CommitOptions & { allowEmpty?: boolean },
|
opts: CommitOptions & { allowEmpty?: boolean },
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
const fullMessage = buildCommitMessage(message, opts.trailers);
|
const fullMessage = buildCommitMessage(message, opts.trailers);
|
||||||
const args = ["commit", "-m", fullMessage];
|
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
|
||||||
|
// (or any injected hook) must never interfere with engine commits in our
|
||||||
|
// dedicated vault repo.
|
||||||
|
const args = ["commit", "--no-verify", "-m", fullMessage];
|
||||||
if (opts.allowEmpty) args.push("--allow-empty");
|
if (opts.allowEmpty) args.push("--allow-empty");
|
||||||
|
|
||||||
await execFileAsync("git", ["--no-pager", ...args], {
|
await execFileAsync("git", ["--no-pager", ...args], {
|
||||||
@@ -308,13 +357,27 @@ export class VaultGit {
|
|||||||
* List tracked files on the current branch (paths relative to the vault
|
* List tracked files on the current branch (paths relative to the vault
|
||||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||||
* the listing, e.g. `"*.md"`.
|
* the listing, e.g. `"*.md"`.
|
||||||
|
*
|
||||||
|
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
|
||||||
|
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
|
||||||
|
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
|
||||||
|
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
|
||||||
|
* breaking move/duplicate detection. We defeat that two ways at once:
|
||||||
|
* - `-c core.quotepath=false` disables the octal-escape/quoting.
|
||||||
|
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
|
||||||
|
* ambiguity), which we split on `\0`.
|
||||||
|
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
|
||||||
|
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
|
||||||
|
* are returned verbatim — git already emits forward slashes.
|
||||||
*/
|
*/
|
||||||
async listTrackedFiles(glob?: string): Promise<string[]> {
|
async listTrackedFiles(glob?: string): Promise<string[]> {
|
||||||
const args = ["ls-files"];
|
const args = ["-c", "core.quotepath=false", "ls-files", "-z"];
|
||||||
if (glob) args.push(glob);
|
if (glob) args.push(glob);
|
||||||
const out = await this.run(args);
|
const r = await this.runRaw(args);
|
||||||
if (out.length === 0) return [];
|
if (r.code !== 0) {
|
||||||
return out.split("\n").filter((l) => l.length > 0);
|
throw new Error(`git ls-files failed: ${r.stderr.trim()}`);
|
||||||
|
}
|
||||||
|
return r.stdout.split("\0").filter((p) => p.length > 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -331,7 +394,20 @@ export class VaultGit {
|
|||||||
function vaultGitEnv(
|
function vaultGitEnv(
|
||||||
extra?: Record<string, string>,
|
extra?: Record<string, string>,
|
||||||
): NodeJS.ProcessEnv {
|
): NodeJS.ProcessEnv {
|
||||||
const env: NodeJS.ProcessEnv = { ...process.env, ...extra };
|
const env: NodeJS.ProcessEnv = {
|
||||||
|
...process.env,
|
||||||
|
// Locale-independent output (defense in depth). We never parse localized
|
||||||
|
// prose, but pinning the locale prevents a future regression where some
|
||||||
|
// git message we DO key on is translated by an inherited LC_ALL/LANG.
|
||||||
|
LC_ALL: "C",
|
||||||
|
LANG: "C",
|
||||||
|
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
|
||||||
|
// bite) and never block on an interactive prompt (e.g. credentials) — the
|
||||||
|
// daemon runs unattended and must not hang.
|
||||||
|
GIT_PAGER: "cat",
|
||||||
|
GIT_TERMINAL_PROMPT: "0",
|
||||||
|
...extra,
|
||||||
|
};
|
||||||
delete env.GIT_DIR;
|
delete env.GIT_DIR;
|
||||||
delete env.GIT_WORK_TREE;
|
delete env.GIT_WORK_TREE;
|
||||||
return env;
|
return env;
|
||||||
|
|||||||
@@ -119,6 +119,9 @@ async function main(): Promise<void> {
|
|||||||
// 1. Ensure the vault git repo exists with main + an initial commit, and the
|
// 1. Ensure the vault git repo exists with main + an initial commit, and the
|
||||||
// engine-only `docmost` branch exists, branched from main.
|
// engine-only `docmost` branch exists, branched from main.
|
||||||
const git = new VaultGit(vaultRoot);
|
const git = new VaultGit(vaultRoot);
|
||||||
|
// Preflight: fail fast (with an actionable message via main().catch) if the
|
||||||
|
// git binary is missing — the entire vault state store relies on it.
|
||||||
|
await git.assertGitAvailable();
|
||||||
await git.ensureRepo();
|
await git.ensureRepo();
|
||||||
|
|
||||||
// 1b. Refuse to run on top of an unresolved merge (SPEC §9 / §12). A previous
|
// 1b. Refuse to run on top of an unresolved merge (SPEC §9 / §12). A previous
|
||||||
|
|||||||
112
test/git.test.ts
112
test/git.test.ts
@@ -1,5 +1,5 @@
|
|||||||
import { execFile } from 'node:child_process';
|
import { execFile } from 'node:child_process';
|
||||||
import { mkdtemp, rm, writeFile } from 'node:fs/promises';
|
import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
||||||
import { tmpdir } from 'node:os';
|
import { tmpdir } from 'node:os';
|
||||||
import { join } from 'node:path';
|
import { join } from 'node:path';
|
||||||
import { promisify } from 'node:util';
|
import { promisify } from 'node:util';
|
||||||
@@ -106,6 +106,67 @@ describe('VaultGit (integration; temp repo)', () => {
|
|||||||
expect(count.trim()).toBe('1');
|
expect(count.trim()).toBe('1');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('ensureRepo neutralizes correctness-affecting LOCAL config', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
// These LOCAL values neutralize a hostile GLOBAL/system config that would
|
||||||
|
// otherwise change porcelain BEHAVIOR and corrupt the vault (SPEC §11 for
|
||||||
|
// core.autocrlf; gpgsign/safecrlf for the headless daemon).
|
||||||
|
const localConfig = async (key: string): Promise<string> => {
|
||||||
|
const { stdout } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['config', '--local', '--get', key],
|
||||||
|
{ cwd: vault },
|
||||||
|
);
|
||||||
|
return stdout.trim();
|
||||||
|
};
|
||||||
|
expect(await localConfig('core.autocrlf')).toBe('false');
|
||||||
|
expect(await localConfig('commit.gpgsign')).toBe('false');
|
||||||
|
expect(await localConfig('core.safecrlf')).toBe('false');
|
||||||
|
expect(await localConfig('core.attributesFile')).toBe('/dev/null');
|
||||||
|
|
||||||
|
// Idempotent: a second run leaves the same single values (no duplicates).
|
||||||
|
await git.ensureRepo();
|
||||||
|
expect(await localConfig('core.autocrlf')).toBe('false');
|
||||||
|
expect(await localConfig('commit.gpgsign')).toBe('false');
|
||||||
|
expect(await localConfig('core.safecrlf')).toBe('false');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves LF bytes verbatim on commit (SPEC §11: autocrlf=false)', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
// Write content with explicit LF line endings. With a hostile
|
||||||
|
// core.autocrlf=true git would translate these to CRLF in the stored blob,
|
||||||
|
// breaking the byte-stable round-trip invariant. ensureRepo pins
|
||||||
|
// core.autocrlf=false locally, so the stored bytes must round-trip exactly.
|
||||||
|
const fileName = 'lf.md';
|
||||||
|
const content = 'line1\nline2\nline3\n';
|
||||||
|
await writeFile(join(vault, fileName), content, 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
const made = await git.commit('add LF file', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
});
|
||||||
|
expect(made).toBe(true);
|
||||||
|
|
||||||
|
// Read the STORED blob (not the worktree file) and assert verbatim bytes:
|
||||||
|
// still LF-only, no CRLF translation.
|
||||||
|
const { stdout: stored } = await execFileAsync(
|
||||||
|
'git',
|
||||||
|
['--no-pager', 'show', `HEAD:${fileName}`],
|
||||||
|
{ cwd: vault, encoding: 'buffer' },
|
||||||
|
);
|
||||||
|
const storedBuf = stored as unknown as Buffer;
|
||||||
|
expect(storedBuf.includes(Buffer.from('\r\n'))).toBe(false);
|
||||||
|
expect(storedBuf.toString('utf8')).toBe(content);
|
||||||
|
});
|
||||||
|
|
||||||
it('ensureBranch creates the docmost branch from main', async () => {
|
it('ensureBranch creates the docmost branch from main', async () => {
|
||||||
if (!available) return;
|
if (!available) return;
|
||||||
const vault = await freshDir();
|
const vault = await freshDir();
|
||||||
@@ -289,4 +350,53 @@ describe('VaultGit (integration; temp repo)', () => {
|
|||||||
const all = await git.listTrackedFiles();
|
const all = await git.listTrackedFiles();
|
||||||
expect(new Set(all)).toEqual(new Set(['keep.md', 'note.txt']));
|
expect(new Set(all)).toEqual(new Set(['keep.md', 'note.txt']));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('listTrackedFiles returns RAW UTF-8 Cyrillic paths (not octal-escaped/quoted)', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
await git.ensureRepo();
|
||||||
|
|
||||||
|
// The target wiki is Russian, so file names contain Cyrillic. With git's
|
||||||
|
// DEFAULT core.quotepath=true these come back as `"\320\232..."` from
|
||||||
|
// ls-files; `listTrackedFiles` must return them verbatim as UTF-8.
|
||||||
|
const topName = 'Колонка.md';
|
||||||
|
const nestedDir = 'Раздел';
|
||||||
|
const nestedName = 'Подстраница.md';
|
||||||
|
await writeFile(join(vault, topName), 'top\n', 'utf8');
|
||||||
|
await mkdir(join(vault, nestedDir), { recursive: true });
|
||||||
|
await writeFile(join(vault, nestedDir, nestedName), 'nested\n', 'utf8');
|
||||||
|
await git.stageAll();
|
||||||
|
await git.commit('add cyrillic files', {
|
||||||
|
authorName: BOT_AUTHOR_NAME,
|
||||||
|
authorEmail: BOT_AUTHOR_EMAIL,
|
||||||
|
});
|
||||||
|
|
||||||
|
const md = await git.listTrackedFiles('*.md');
|
||||||
|
// Exact UTF-8 names, forward-slash separated for the nested one — NOT an
|
||||||
|
// escaped/quoted form like `"\320\232..."`.
|
||||||
|
expect(new Set(md)).toEqual(
|
||||||
|
new Set([topName, `${nestedDir}/${nestedName}`]),
|
||||||
|
);
|
||||||
|
// Guard explicitly against the quotepath regression: no entry is quoted or
|
||||||
|
// contains a backslash escape sequence.
|
||||||
|
for (const p of md) {
|
||||||
|
expect(p.startsWith('"')).toBe(false);
|
||||||
|
expect(p.includes('\\')).toBe(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No-glob listing also returns the raw Cyrillic names.
|
||||||
|
const all = await git.listTrackedFiles();
|
||||||
|
expect(all).toContain(topName);
|
||||||
|
expect(all).toContain(`${nestedDir}/${nestedName}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('assertGitAvailable resolves when git is present', async () => {
|
||||||
|
if (!available) return;
|
||||||
|
const vault = await freshDir();
|
||||||
|
const git = new VaultGit(vault);
|
||||||
|
// No repo needed: it only probes `git --version` (and the vault dir need
|
||||||
|
// not even exist yet).
|
||||||
|
await expect(git.assertGitAvailable()).resolves.toBeUndefined();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user