Files
gitmost/packages/git-sync/src/engine/cycle.ts
T
claude code agent 227 24b903aaf3 build(git-sync): land the @docmost/git-sync package into develop, code-only (#326 step 1 / PR-A)
The git-sync converter + engine source lived only on the #119 branch; develop
had just the dead compiled build/. Bring the whole package (src + ~700 tests)
onto develop under CI, with NO consumer wired — git-sync stays fully inert in
develop (nothing in apps/server imports it), so runtime behavior is unchanged.
This unblocks #293 (extract the shared converter package from the landed source)
and lets #119's functionality land LAST, already writing the canonical format
(per the #326 landing order).

- packages/git-sync: src (lib converter + engine) + test corpus + configs.
- Remove develop's dead committed packages/git-sync/build/; gitignore it
  (built in CI/Docker via pnpm build, never committed — no src/build drift).
- pnpm-lock.yaml: add the @docmost/git-sync importer (a missing workspace
  package in the lock is a CI blocker). `pnpm install --frozen-lockfile` passes.
- NO server integration / loader / Dockerfile runtime changes (those come with
  #119 at step 6).

Verified: tsc clean; vitest 711 passed | 1 expected-fail, 0 failures, 0 type
errors; pnpm --frozen-lockfile EXIT 0; apps/server has no git-sync import.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 06:21:41 +03:00

245 lines
11 KiB
TypeScript

import { VaultGit, DEFAULT_BRANCH } from "./git.js";
import { GitSyncClient } from "./client.types.js";
import { Settings } from "./settings.js";
import { readExisting, computePullActions, applyPullActions } from "./pull.js";
import { runPush } from "./push.js";
import { assertVaultPathSafe, type PathGuardIo } from "./path-guard.js";
/**
* Absolute-path filesystem primitives the cycle needs. Injected (not imported)
* so the engine stays IO-free and unit-testable. `mkdir` is recursive; `rm` is
* force (a missing file is a no-op).
*
* `lstat`/`realpath` back the SYMLINK GUARD (see ./path-guard.ts): every
* read/write/mkdir is screened so a pushed symlink (e.g. `leak.md -> /etc/passwd`
* or `-> .env`) cannot be followed to publish or overwrite a file outside the
* vault. Both MUST resolve to `null` on ENOENT and reject on any other error.
*/
export interface CycleFs extends PathGuardIo {
readFile: (absPath: string) => Promise<string>;
writeFile: (absPath: string, text: string) => Promise<void>;
mkdir: (absDir: string) => Promise<void>;
rm: (absPath: string) => Promise<void>;
}
export interface RunCycleDeps {
spaceId: string;
/** The Docmost seam (reads for pull, writes for push). */
client: GitSyncClient;
/** The per-space git vault (a real working repo). */
vault: VaultGit;
/** Engine settings; `vaultPath` roots the relPath -> absolute-path mapping. */
settings: Settings;
fs: CycleFs;
log: (line: string) => void;
/**
* Optional cooperative-abort signal. The caller (orchestrator) wires this to
* the per-space lock: if a heartbeat refresh cannot CONFIRM the lock is still
* held (CAS-miss / Redis error), the signal is aborted and the cycle bails at
* its next checkpoint (before the pull-apply and before the push-apply — the
* two destructive write phases) instead of writing blind after a possible
* lock loss. This is a COARSE best-effort guard; a fully fenced cross-process
* single-writer still needs the fencing-token redesign (follow-up).
*/
signal?: AbortSignal;
}
export interface RunCycleResult {
ran: boolean;
/** Set when the cycle short-circuited without running pull/push. */
skipped?: "merge-in-progress";
pull?: { written: number; deleted: number; conflict: boolean };
push?: { mode: string; failures: number };
/**
* Forwarded from the push result: `true` when the push REFUSED to fast-forward
* a divergent `docmost` mirror (the §5 invariant — `docmost` mirrors what
* Docmost contains — is broken). Surfaced here so a caller driving `runCycle`
* can detect the breach without scraping logs (red-team #15).
*/
divergentDocmost?: boolean;
}
/**
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
* (vault -> Docmost), under the engine's required branch choreography. This is
* the single entry point the app drives — it owns the staging order so it can
* never drift from the engine it ships with.
*
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
* would fail otherwise.
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
* then checks out `main` and merges docmost -> main. Writing Docmost
* content straight onto `main` would clobber local file edits before push
* can diff them.
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
* 5. PUSH: vault -> Docmost apply.
*
* Lock POLICY lives in the caller; this owns only the mechanics. Deletes are
* soft (Trash, reversible) and always logged, so there is no per-cycle
* delete-cap — engine convergence is the guard against phantom deletions.
*/
export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
const { spaceId, client, vault, settings, fs, log, signal } = deps;
const vaultRoot = settings.vaultPath;
const abs = (relPath: string) => `${vaultRoot}/${relPath}`;
// SYMLINK GUARD (defense-in-depth, see ./path-guard.ts). Wrap the injected
// read/write/mkdir primitives so EVERY engine file access is screened: a path
// that is — or traverses — a symlink, or whose realpath escapes the vault, is
// refused. `rm` is deliberately NOT wrapped: removing a path only deletes the
// link itself (force, non-recursive), never the target, and we WANT to be able
// to clean up a stray pushed symlink. A refusal THROWS; the pull/push loops
// already isolate per-file errors (skip + log), so a single poisoned entry is
// skipped while the rest of the space keeps syncing.
const guard = (p: string) => assertVaultPathSafe(fs, vaultRoot, p);
const safeFs = {
readFile: async (p: string): Promise<string> => {
await guard(p);
return fs.readFile(p);
},
writeFile: async (p: string, text: string): Promise<void> => {
await guard(p);
return fs.writeFile(p, text);
},
mkdir: async (p: string): Promise<void> => {
await guard(p);
return fs.mkdir(p);
},
rm: (p: string): Promise<void> => fs.rm(p),
};
// 1. The engine state store is git: make sure the repo + branches exist
// before any tracked-file listing or diff.
await vault.assertGitAvailable();
await vault.ensureRepo();
// 1b. CLEAR stale git lock files left by an interrupted git op (bug D3-N3). A
// hard crash / OOM-kill / abrupt container stop mid `git add`/`commit`/
// `checkout` leaves a `.git/index.lock` (or a ref `*.lock`); git then refuses
// every later op ("Unable to create '…/index.lock': File exists"), wedging the
// space forever with no self-heal. Only locks OLDER than the staleness
// threshold are removed (a fresh lock from a concurrent replica in the
// TTL-lapse window is preserved), before the merge check + any checkout/diff
// below.
await vault.clearStaleGitLocks();
// 1c. RESTORE a missing `main` branch (bug D3-N1). Ref-store damage can leave an
// existing repo without `main`; the ensureBranch("docmost","main") + checkout
// below would then throw every cycle ("pathspec 'main' did not match"),
// wedging the space forever. Re-create it from `docmost`/HEAD before use.
await vault.ensureMainBranch();
// 2. RECOVER from a vault left mid-merge by a PRIOR cycle (SPEC §9 wedge fix).
// A leftover merge used to WEDGE THE WHOLE SPACE: this check returned
// `skipped: "merge-in-progress"` so EVERY later cycle skipped the entire
// space (all pages, both directions) forever, with no recovery. The pull
// phase below no longer leaves the vault mid-merge (it commits a conflicting
// merge with markers and isolates the one bad page), but a vault wedged by a
// PRE-FIX build (or a manual/interrupted git op) must still self-heal.
// So instead of skipping, ABORT the stale half-merge and continue — the
// fresh pull re-runs and, on a real conflict, commits-with-markers rather
// than re-wedging. A stray unmerged index that `merge --abort` can't clear
// (no MERGE_HEAD) is force-cleared with a hard reset to HEAD.
if (await vault.isMergeInProgress()) {
log(
`vault was left mid-merge by a prior cycle — aborting the stale merge and ` +
`continuing so the space is not wedged (SPEC §9 recovery).`,
);
await vault.abortMerge();
if (await vault.isMergeInProgress()) {
log(
`vault still mid-merge after 'merge --abort' — hard-resetting to HEAD ` +
`to recover (SPEC §9).`,
);
await vault.resetHardToHead();
}
}
try {
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
await vault.ensureBranch("docmost", "main");
await vault.checkout("docmost");
// 4. PULL ------------------------------------------------------------------
const existing = await readExisting({
listTracked: () => vault.listTrackedFiles("*.md"),
readFile: (relPath) => safeFs.readFile(abs(relPath)),
});
const tree = await client.listSpaceTree(spaceId);
const pullActions = computePullActions({
pages: tree.pages,
treeComplete: tree.complete,
existing,
});
// Bail before the first destructive write phase if the lock was lost.
signal?.throwIfAborted();
const pullResult = await applyPullActions(
{
client,
git: vault,
writeFile: (absPath, text) => safeFs.writeFile(absPath, text),
mkdir: (absDir) => safeFs.mkdir(absDir),
rm: (absPath) => safeFs.rm(absPath),
log,
},
pullActions,
vaultRoot,
);
// 5. PUSH ------------------------------------------------------------------
const pushDeps = {
settings,
git: vault,
makeClient: () => client,
readFile: (relPath: string) => safeFs.readFile(abs(relPath)),
writeFile: (relPath: string, text: string) =>
safeFs.writeFile(abs(relPath), text),
log,
};
// Bail before pushing to Docmost if the lock was lost during pull.
signal?.throwIfAborted();
const pushResult = await runPush(pushDeps, { dryRun: false });
return {
ran: true,
pull: {
written: pullResult.written,
deleted: pullResult.deleted,
conflict: pullResult.merge.conflict,
},
push: {
mode: pushResult.mode,
failures: pushResult.failures?.length ?? 0,
},
// Forward a divergent-`docmost` escalation so the caller can act on the §5
// invariant breach without scraping logs (red-team #15).
divergentDocmost: pushResult.divergentDocmost ?? false,
};
} finally {
// STABLE SERVED HEAD (bug #3). The pull transiently checks out the read-only
// `docmost` mirror, and the smart-HTTP host advertises whatever HEAD resolves
// to — so a clone racing a cycle could default to `docmost`. The happy path
// already ends on `main` (runPush), but a throw mid-pull would leave HEAD on
// `docmost`; restore it here so the advertised default branch is `main` BETWEEN
// cycles. Best-effort: skipped if the lock was lost (do not write the working
// tree after a possible takeover), and a failing checkout (e.g. a dirty tree
// from an aborted write) is swallowed — the next cycle's recovery resyncs and
// the read advertisement pins HEAD under the lock regardless.
if (!signal?.aborted) {
try {
await vault.checkout(DEFAULT_BRANCH);
} catch {
/* best-effort: next cycle recovers; advertisement pins HEAD under lock */
}
}
}
}