Files
gitmost/apps/server/src/integrations/git-sync/services/vault-registry.service.ts
T
claude code agent 227 906733b5c8 fix(git-sync): address PR #119 review #4 — symlink guard, dead-code cull, changelog + warnings/suggestions
Blocking (review id 2514):
- [security] Forbid symlinks in vaults. ensureServable now sets
  core.symlinks=false in each vault's local git config (a pushed symlink is
  checked out as a plain file, never a real link), and the engine cycle wraps
  every read/write/mkdir in an lstat/realpath guard (new path-guard.ts) that
  refuses a path that is — or traverses — a symlink, or whose realpath escapes
  the vault root. Prevents a writer from publishing /etc/passwd or the server
  .env, or writing outside the vault. Adds unit tests (path-guard.test.ts) +
  a read-guard integration test (cycle.test.ts) + real lstat/realpath in the
  roundtrip integration test.
- [simplification] Delete dead lib/diff.ts + test/diff.test.ts and drop the
  now-unused @fellow/prosemirror-recreate-transform dependency.
- [documentation] Add a CHANGELOG [Unreleased] → Added entry for git-sync.

Warnings:
- [test-coverage] Cover the CREATE-branch conflict-markers guard (a new .md with
  markers and no gitmost_id is recorded as a create failure, never created).

Suggestions:
- [stability] Bound each `git config` in ensureServable with a timeout.
- [authz] Trigger endpoint resolves spaceId workspace-scoped and 404s a foreign
  space before any vault directory is created.
- [stability] Attribute git-initiated moves to the service account
  (lastUpdatedById), via an optional actor param on PageService.movePage.
- [documentation] Document the per-space autoMergeConflicts toggle in AGENTS.md.
- [test-coverage] Cover the unterminated `:::` callout fence fallback.
- [simplification] Move test-only roundtrip-helpers.ts out of src/ into test/.

Architecture:
- Move the Yjs/ProseMirror merge primitives (yjs-body-merge, three-way-merge,
  lcs + specs) into collaboration/merge/, breaking the collaboration →
  integrations/git-sync dependency cycle this PR introduced.
- Port the schema-surface drift gate to packages/mcp (the mcp schema mirror had
  none); pins 52 entries.

Deferred (with rationale in the review thread): the incremental-pull perf
warning (correctness-neutral; needs a high-water-mark design + its own tests on
the data-loss-critical path) and the redis-sync rolling-deploy mixed-version
edge (the deficient behavior is in already-released old-instance code; the new
code is correct on both sides; impact is a transient rollout-window artifact).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 15:39:12 +03:00

115 lines
5.1 KiB
TypeScript

import { Injectable, Logger } from '@nestjs/common';
import { mkdir } from 'node:fs/promises';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import type { VaultGit } from '@docmost/git-sync';
import { loadGitSync } from '../git-sync.loader';
import { EnvironmentService } from '../../environment/environment.service';
const execFileAsync = promisify(execFile);
/**
* Resolves the on-disk vault location per space and owns the (lazily created,
* cached) `VaultGit` instance for each one.
*
* Topology: one git repo per enabled space, rooted at
* `<GIT_SYNC_DATA_DIR>/<spaceId>`. A `VaultGit` is constructed at most once per
* space and reused across cycles — it is a thin, stateless shell-out wrapper, so
* caching it just avoids re-resolving the path and re-running `mkdir`.
*/
@Injectable()
export class VaultRegistryService {
private readonly logger = new Logger(VaultRegistryService.name);
private readonly vaults = new Map<string, VaultGit>();
constructor(private readonly environmentService: EnvironmentService) {}
/** Absolute vault path for a space: `<GIT_SYNC_DATA_DIR>/<spaceId>`. */
vaultPath(spaceId: string): string {
const root = this.environmentService.getGitSyncDataDir().replace(/\/+$/, '');
return `${root}/${spaceId}`;
}
/**
* Get (or lazily construct + cache) the `VaultGit` for a space, ensuring its
* directory exists. `VaultGit.ensureRepo()` is NOT called here — the engine's
* pull/push paths call it (and the branch/ref setup) as their first step; this
* only guarantees the parent dir exists so a fresh space does not ENOENT.
*/
async getVault(spaceId: string): Promise<VaultGit> {
const cached = this.vaults.get(spaceId);
if (cached) return cached;
const path = this.vaultPath(spaceId);
await mkdir(path, { recursive: true });
const { VaultGit } = await loadGitSync();
const vault = new VaultGit(path);
this.vaults.set(spaceId, vault);
return vault;
}
/**
* Make a space's vault repo servable over smart-HTTP (the /git host). Ensures
* the repo exists (engine `ensureRepo`: `git init -b main` + initial commit +
* branches; idempotent), then sets the LOCAL git config a `git http-backend`
* push needs:
*
* - receive.denyCurrentBranch=updateInstead — a push to the checked-out
* `main` updates the working tree too (the engine's human-facing branch).
* Requires a clean tree, which is guaranteed between cycles / under the
* orchestrator lock that wraps an external push.
* - receive.denyNonFastForwards=true — block force-push so a client cannot
* rewrite the engine's history on `main`.
* - http.receivepack=true / http.uploadpack=true — explicitly allow the
* receive/upload services over HTTP.
* - core.symlinks=false — SECURITY (PR #119 review). A writer could push a
* `.md` entry that is a SYMLINK (e.g. `leak.md -> /etc/passwd` or
* `-> .env`); with symlinks enabled `updateInstead` would materialize a
* real link in the working tree, and the next push cycle would follow it
* and PUBLISH the target's contents as a Docmost page (server-file
* disclosure), or use a symlinked directory to write OUTSIDE the vault on
* pull. With `core.symlinks=false` git checks out such a blob as a PLAIN
* FILE containing the link text, never a real link, defusing the primitive
* at the git layer. (The engine's per-access lstat/realpath guard is the
* second layer — see path-guard.ts.)
*
* All are set idempotently (plain `git config` overwrites the local value).
* Returns the absolute vault path. Idempotent and safe to call before every
* request.
*/
async ensureServable(spaceId: string): Promise<string> {
const { vaultGitEnv } = await loadGitSync();
const vault = await this.getVault(spaceId);
const path = this.vaultPath(spaceId);
// ensureRepo also verifies git is available on its first git call; it does
// `git init -b main` + an initial commit + the engine branches. Idempotent.
await vault.ensureRepo();
const configs: Array<[string, string]> = [
['receive.denyCurrentBranch', 'updateInstead'],
['receive.denyNonFastForwards', 'true'],
['http.receivepack', 'true'],
['http.uploadpack', 'true'],
['core.symlinks', 'false'],
];
// Bound each `git config` (review suggestion): this runs in the request path
// BEFORE the watchdog, so a wedged git (a stale `.git/config.lock`) would
// otherwise hang the request indefinitely. Mirror the engine's GIT_EXEC
// bound via the configured backend timeout.
const timeout = this.environmentService.getGitSyncBackendTimeoutMs();
for (const [key, value] of configs) {
await execFileAsync('git', ['config', key, value], {
cwd: path,
// Use the engine's cwd-isolated env (strips GIT_DIR / GIT_WORK_TREE) so
// the config is written to THIS vault's local config, nothing else.
env: vaultGitEnv(),
timeout,
maxBuffer: 10 * 1024 * 1024,
});
}
return path;
}
}