From f0778cb85aebe9d6d98d1f7a08557871ebc821ea Mon Sep 17 00:00:00 2001 From: agent_qa Date: Fri, 3 Jul 2026 07:25:48 +0300 Subject: [PATCH] fix(git-sync): self-heal a stale .git lock that wedged a space forever (D3-N3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An interrupted git operation (a hard crash / OOM-kill / abrupt container stop mid `git add`/`commit`/`checkout`) leaves a `.git/index.lock` (or a ref `*.lock`). Git then refuses EVERY subsequent operation ("Unable to create '…/index.lock': File exists"), so every poll cycle failed and the space's sync wedged INDEFINITELY with no self-heal — the whole space stopped syncing until a human ran `rm` on the lock (found via web-test restart/corruption charter, reproduced deterministically). The daemon holds the per-space Redis lock and is the vault's ONLY writer, so any `*.lock` reaching a fresh cycle is necessarily stale (no live git process holds it). Add `VaultGit.clearStaleGitLocks()` and call it in the cycle preflight, right after ensureRepo and before the mid-merge recovery — clearing index/HEAD/config/packed-refs/ MERGE_HEAD/ORIG_HEAD and the engine's ref locks (best-effort, missing = no-op). Verified on the stand: a planted stale index.lock is now cleared and the space recovers (edit reaches the vault, 0 "File exists" errors) — was wedged forever. Unit test (real temp repo: index.lock blocks git add -> clear -> git add works); git-sync suite green (707). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/git-sync/src/engine/cycle.ts | 9 +++++ packages/git-sync/src/engine/git.ts | 34 ++++++++++++++++++- packages/git-sync/test/cycle.test.ts | 1 + packages/git-sync/test/git.test.ts | 25 ++++++++++++++ .../git-sync/test/redteam-push-cycle.test.ts | 2 ++ 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/packages/git-sync/src/engine/cycle.ts b/packages/git-sync/src/engine/cycle.ts index 00e17c28..68a039f3 100644 --- a/packages/git-sync/src/engine/cycle.ts +++ b/packages/git-sync/src/engine/cycle.ts @@ -116,6 +116,15 @@ export async function runCycle(deps: RunCycleDeps): Promise { await vault.assertGitAvailable(); await vault.ensureRepo(); + // 1b. CLEAR stale git lock files left by an interrupted git op (bug D3-N3). A + // hard crash / OOM-kill / abrupt container stop mid `git add`/`commit`/ + // `checkout` leaves a `.git/index.lock` (or a ref `*.lock`); git then refuses + // every later op ("Unable to create '…/index.lock': File exists"), wedging the + // space forever with no self-heal. The daemon holds the per-space Redis lock + // and is the vault's only writer, so any leftover lock here is stale — remove + // it before the merge check + any checkout/diff below. + await vault.clearStaleGitLocks(); + // 2. RECOVER from a vault left mid-merge by a PRIOR cycle (SPEC §9 wedge fix). // A leftover merge used to WEDGE THE WHOLE SPACE: this check returned // `skipped: "merge-in-progress"` so EVERY later cycle skipped the entire diff --git a/packages/git-sync/src/engine/git.ts b/packages/git-sync/src/engine/git.ts index 39d52255..df5477fc 100644 --- a/packages/git-sync/src/engine/git.ts +++ b/packages/git-sync/src/engine/git.ts @@ -19,7 +19,7 @@ * - "nothing to commit" is treated as a graceful no-op, not an error. */ import { execFile } from "node:child_process"; -import { mkdir } from "node:fs/promises"; +import { mkdir, rm } from "node:fs/promises"; import { promisify } from "node:util"; const execFileAsync = promisify(execFile); @@ -322,6 +322,38 @@ export class VaultGit { * failure deep inside `checkout`. This is what makes re-runs converge * (resumability, SPEC §12). */ + /** + * Remove STALE git lock files left by an INTERRUPTED git operation (a hard + * crash / OOM-kill / abrupt container stop mid `git add`/`commit`/`checkout` + * leaves `.git/index.lock`; interrupted ref updates leave `*.lock` files). Git + * then refuses EVERY subsequent operation ("Unable to create '…/index.lock': + * File exists"), which WEDGES the space's sync loop indefinitely with no + * self-heal (bug D3-N3). The daemon holds the per-space Redis lock and is the + * vault's ONLY writer, so any leftover `*.lock` reaching a fresh cycle is + * necessarily stale (no live git process holds it) — clear them best-effort in + * the cycle preflight, alongside the mid-merge recovery. Missing files are a + * no-op (`force: true`). + */ + async clearStaleGitLocks(): Promise { + const gitDir = `${this.vaultPath}/.git`; + const locks = [ + "index.lock", + "HEAD.lock", + "config.lock", + "packed-refs.lock", + "MERGE_HEAD.lock", + "ORIG_HEAD.lock", + "refs/heads/main.lock", + "refs/heads/docmost.lock", + "refs/docmost/last-pushed.lock", + ]; + await Promise.all( + locks.map((rel) => + rm(`${gitDir}/${rel}`, { force: true }).catch(() => undefined), + ), + ); + } + async isMergeInProgress(): Promise { // MERGE_HEAD exists exactly while a merge is in progress. const mergeHead = await this.runRaw([ diff --git a/packages/git-sync/test/cycle.test.ts b/packages/git-sync/test/cycle.test.ts index 6347aa2d..de7e91cb 100644 --- a/packages/git-sync/test/cycle.test.ts +++ b/packages/git-sync/test/cycle.test.ts @@ -16,6 +16,7 @@ function fakeVault(overrides: Record = {}) { order, assertGitAvailable: rec("assertGitAvailable"), ensureRepo: rec("ensureRepo"), + clearStaleGitLocks: rec("clearStaleGitLocks"), isMergeInProgress: vi.fn(async () => false), ensureBranch: rec("ensureBranch"), checkout: rec("checkout"), diff --git a/packages/git-sync/test/git.test.ts b/packages/git-sync/test/git.test.ts index a3255862..cbeaec51 100644 --- a/packages/git-sync/test/git.test.ts +++ b/packages/git-sync/test/git.test.ts @@ -141,6 +141,31 @@ describe('VaultGit (integration; temp repo)', () => { expect(count.trim()).toBe('1'); }); + it('clearStaleGitLocks removes a leftover index.lock so git ops work again (bug D3-N3)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Simulate an interrupted git op: a stale index.lock left behind. Git now + // refuses index-touching operations. + await writeFile(join(vault, '.git', 'index.lock'), ''); + await expect( + execFileAsync('git', ['add', '-A'], { cwd: vault }), + ).rejects.toThrow(/index\.lock/); + + // The preflight clears it (the daemon is the vault's sole writer, so it is stale). + await git.clearStaleGitLocks(); + + // The lock is gone and git ops succeed again. + await expect( + execFileAsync('git', ['add', '-A'], { cwd: vault }), + ).resolves.toBeDefined(); + + // Idempotent / safe when no lock exists. + await expect(git.clearStaleGitLocks()).resolves.toBeUndefined(); + }); + it('ensureRepo neutralizes correctness-affecting LOCAL config', async () => { if (!available) return; const vault = await freshDir(); diff --git a/packages/git-sync/test/redteam-push-cycle.test.ts b/packages/git-sync/test/redteam-push-cycle.test.ts index 7edb4227..b7758b60 100644 --- a/packages/git-sync/test/redteam-push-cycle.test.ts +++ b/packages/git-sync/test/redteam-push-cycle.test.ts @@ -40,6 +40,7 @@ function makePushGit(opts: { const git: PushDeps['git'] = { assertGitAvailable: vi.fn(async () => {}), ensureRepo: vi.fn(async () => {}), + clearStaleGitLocks: vi.fn(async () => {}), isMergeInProgress: vi.fn(async () => false), // NO merge in progress checkout: vi.fn(async () => {}), stageAll: vi.fn(async () => {}), @@ -342,6 +343,7 @@ function fakeVault(overrides: Record = {}) { order, assertGitAvailable: rec('assertGitAvailable'), ensureRepo: rec('ensureRepo'), + clearStaleGitLocks: rec('clearStaleGitLocks'), isMergeInProgress: vi.fn(async () => false), ensureBranch: rec('ensureBranch'), checkout: rec('checkout'),