feat(git-sync): serve spaces over smart-HTTP (gitmost as a two-way git host)

Expose each git-sync-enabled space as a clonable/pushable git repo over HTTP,
so `git clone https://<user>:<pass>@<host>/git/<spaceId>.git` works and external
pushes flow back into Docmost pages — gitmost itself acts as the git host (no
external GitHub/Gitea, no SSH).

Transport: shell out to `git http-backend` (CGI; git is already in the runtime
image) which implements the full smart-HTTP protocol (info/refs, upload-pack,
receive-pack, protocol v2). A raw Fastify route `/git/*` (mounted at the root,
outside the `/api` prefix) bridges the request/response to the CGI; passthrough
content-type parsers for the git media types stream the raw body to stdin.

Reuse the existing engine: clients push the vault's `main` branch, whose commits
beyond `refs/docmost/last-pushed` the engine already reconciles into Docmost.

- http/git-http.service.ts — auth (HTTP Basic -> AuthService.verifyUserCredentials),
  self-resolved workspace (DomainMiddleware does not run for this raw route),
  per-space gating (global + per-space gitSync flags, 404 hides existence),
  CASL authz (Read=fetch, Manage=push), dispatch.
- http/git-http-backend.service.ts — spawn `git http-backend`, binary-safe CGI
  response parsing (Status/headers/body), stream to the socket.
- http/git-http.helpers.ts — pure path parse, service->kind mapping, gate decision
  (unit-tested); rejects literal and percent-encoded path traversal.
- orchestrator: extract reusable withSpaceLock (CAS-guarded lock heartbeat so a
  long push cannot let the lock expire mid-cycle) and add ingestExternalPush
  (receive-pack + Docmost cycle under one lock; 503 on contention).
- vault-registry: ensureServable() — ensureRepo + idempotent receive.denyCurrentBranch
  =updateInstead / denyNonFastForwards / http.receivepack / http.uploadpack.
- env: GIT_SYNC_HTTP_ENABLED (defaults to GIT_SYNC_ENABLED) + validation.
- main.ts: register the /git/* route and the git content-type parsers.

Tests: pure helpers, CGI parsing, and the GitHttpService handler (auth/gate/authz
+ workspace resolution). Server tsc + git-sync/env suites green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-21 19:55:25 +03:00
committed by claude code agent 227
parent d9d1d54aaa
commit 04032ae677
12 changed files with 1655 additions and 14 deletions

View File

@@ -343,6 +343,18 @@ export class EnvironmentService {
);
}
/**
* Whether gitmost serves the per-space vaults over smart-HTTP (the /git host).
* When GIT_SYNC_HTTP_ENABLED is UNSET it DEFAULTS to isGitSyncEnabled() — so
* enabling sync also enables the host unless explicitly disabled. When set, it
* is honored verbatim ('true' -> on, anything else -> off).
*/
isGitSyncHttpEnabled(): boolean {
const raw = this.configService.get<string>('GIT_SYNC_HTTP_ENABLED');
if (raw === undefined) return this.isGitSyncEnabled();
return raw.toLowerCase() === 'true';
}
/**
* Root directory holding the per-space vault repos. Defaults to
* `<DATA_DIR or ./data>/git-sync`. `DATA_DIR` is read directly (no dedicated

View File

@@ -179,6 +179,13 @@ export class EnvironmentVariables {
@IsString()
GIT_SYNC_ENABLED: string;
// Whether to serve the per-space vaults over smart-HTTP (the /git host).
// When unset, defaults to GIT_SYNC_ENABLED (see isGitSyncHttpEnabled).
@IsOptional()
@IsIn(['true', 'false'])
@IsString()
GIT_SYNC_HTTP_ENABLED: string;
@IsOptional()
@IsString()
GIT_SYNC_DATA_DIR: string;

View File

@@ -4,11 +4,14 @@ import { DatabaseModule } from '@docmost/db/database.module';
import { EnvironmentModule } from '../environment/environment.module';
import { CollaborationModule } from '../../collaboration/collaboration.module';
import { PageModule } from '../../core/page/page.module';
import { AuthModule } from '../../core/auth/auth.module';
import { GitmostDataSourceService } from './services/gitmost-datasource.service';
import { GitSyncOrchestrator } from './services/git-sync.orchestrator';
import { VaultRegistryService } from './services/vault-registry.service';
import { PageChangeListener } from './listeners/page-change.listener';
import { GitSyncController } from './git-sync.controller';
import { GitHttpBackendService } from './http/git-http-backend.service';
import { GitHttpService } from './http/git-http.service';
/**
* The git-sync control plane (plan §6). Wires the native datasource, the
@@ -36,6 +39,8 @@ import { GitSyncController } from './git-sync.controller';
EnvironmentModule,
CollaborationModule,
PageModule,
// AuthModule exports AuthService (verifyUserCredentials for /git HTTP Basic).
AuthModule,
ScheduleModule,
],
controllers: [GitSyncController],
@@ -44,6 +49,12 @@ import { GitSyncController } from './git-sync.controller';
GitSyncOrchestrator,
VaultRegistryService,
PageChangeListener,
// /git smart-HTTP host (the raw Fastify route in main.ts resolves these).
GitHttpBackendService,
GitHttpService,
],
// Exported so the raw Fastify route registered in main.ts can resolve the
// handler from the Nest container (app.get(GitHttpService)).
exports: [GitHttpService],
})
export class GitSyncModule {}

View File

@@ -0,0 +1,87 @@
// Unit tests for the pure CGI-response helpers used by GitHttpBackendService.
// The header/body split MUST treat the body as binary (Buffer) and never
// stringify it; the Status: header sets the HTTP status (default 200).
import {
parseCgiResponse,
splitCgiBuffer,
} from './git-http-backend.service';
describe('parseCgiResponse', () => {
it('defaults to status 200 with no Status header', () => {
const r = parseCgiResponse('Content-Type: application/x-git-upload-pack-result');
expect(r.statusCode).toBe(200);
expect(r.headers).toEqual([
['Content-Type', 'application/x-git-upload-pack-result'],
]);
});
it('honors a Status header and does not forward it', () => {
const r = parseCgiResponse('Status: 404 Not Found\nContent-Type: text/plain');
expect(r.statusCode).toBe(404);
expect(r.headers).toEqual([['Content-Type', 'text/plain']]);
});
it('parses multiple headers and trims whitespace', () => {
const r = parseCgiResponse(
'Status: 403 Forbidden\r\nContent-Type: text/plain \r\nX-Foo: bar ',
);
expect(r.statusCode).toBe(403);
expect(r.headers).toEqual([
['Content-Type', 'text/plain'],
['X-Foo', 'bar'],
]);
});
it('ignores malformed (colon-less) lines defensively', () => {
const r = parseCgiResponse('Content-Type: text/plain\ngarbage-line\nX-A: b');
expect(r.statusCode).toBe(200);
expect(r.headers).toEqual([
['Content-Type', 'text/plain'],
['X-A', 'b'],
]);
});
it('ignores an out-of-range Status code and keeps the default', () => {
const r = parseCgiResponse('Status: not-a-number\nContent-Type: text/plain');
expect(r.statusCode).toBe(200);
});
it('treats the Status header case-insensitively', () => {
const r = parseCgiResponse('status: 500 Boom');
expect(r.statusCode).toBe(500);
expect(r.headers).toEqual([]);
});
});
describe('splitCgiBuffer', () => {
it('splits on a CRLF blank line and keeps the body as bytes', () => {
const buf = Buffer.concat([
Buffer.from('Status: 200 OK\r\nContent-Type: text/plain\r\n\r\n', 'utf8'),
Buffer.from([0x00, 0x01, 0x02, 0xff]),
]);
const split = splitCgiBuffer(buf);
expect(split).not.toBeNull();
expect(split!.headerText).toBe('Status: 200 OK\r\nContent-Type: text/plain');
expect(Array.from(split!.body)).toEqual([0x00, 0x01, 0x02, 0xff]);
});
it('splits on a bare LF blank line', () => {
const buf = Buffer.from('Content-Type: text/plain\n\nhello', 'utf8');
const split = splitCgiBuffer(buf);
expect(split).not.toBeNull();
expect(split!.headerText).toBe('Content-Type: text/plain');
expect(split!.body.toString('utf8')).toBe('hello');
});
it('returns an empty body when nothing follows the separator', () => {
const buf = Buffer.from('Content-Type: text/plain\r\n\r\n', 'utf8');
const split = splitCgiBuffer(buf);
expect(split).not.toBeNull();
expect(split!.body.length).toBe(0);
});
it('returns null when there is no blank-line separator yet', () => {
const buf = Buffer.from('Content-Type: text/plain\r\nincomplete', 'utf8');
expect(splitCgiBuffer(buf)).toBeNull();
});
});

View File

@@ -0,0 +1,265 @@
import { Injectable, Logger } from '@nestjs/common';
import { spawn } from 'node:child_process';
import type { IncomingMessage, ServerResponse } from 'node:http';
import { vaultGitEnv } from '@docmost/git-sync';
import { EnvironmentService } from '../../environment/environment.service';
/** The parsed first part of a CGI response: the HTTP status + header pairs. */
export interface ParsedCgiResponse {
statusCode: number;
/** Lower-cased? No — keep header names verbatim as git http-backend emits. */
headers: Array<[string, string]>;
}
/**
* Parse the CGI header block emitted by `git http-backend` into an HTTP status
* and a list of header pairs. The input is ONLY the header text (everything up
* to, but not including, the blank-line separator) — the binary body is split
* off by the caller on the raw Buffer (never stringified).
*
* CGI semantics (RFC 3875 §6): a `Status: <code> <reason>` header sets the HTTP
* status (default 200 when absent). Every other header is forwarded verbatim.
* Header lines are `Name: value`; a line without a ':' is ignored defensively.
*
* Pure + framework-free so it is unit-testable in isolation.
*/
export function parseCgiResponse(headerBlock: string): ParsedCgiResponse {
let statusCode = 200;
const headers: Array<[string, string]> = [];
// Header lines may be separated by CRLF or LF; split on either.
const lines = headerBlock.split(/\r?\n/);
for (const line of lines) {
if (line.length === 0) continue;
const sep = line.indexOf(':');
if (sep === -1) continue; // not a header line — ignore defensively
const name = line.slice(0, sep).trim();
const value = line.slice(sep + 1).trim();
if (name.toLowerCase() === 'status') {
// `Status: 404 Not Found` — the leading integer is the HTTP status code.
const code = parseInt(value, 10);
if (Number.isFinite(code) && code >= 100 && code <= 599) {
statusCode = code;
}
continue; // never forward the CGI Status header itself
}
headers.push([name, value]);
}
return { statusCode, headers };
}
/**
* Split a raw CGI response buffer at the first blank-line boundary
* (`\r\n\r\n` or `\n\n`). Returns the header text and the remaining body bytes.
* Returns null when no blank-line separator is present (a malformed response).
*
* Pure (operates on Buffers, never stringifies the body) so it is testable.
*/
export function splitCgiBuffer(
buf: Buffer,
): { headerText: string; body: Buffer } | null {
// Prefer the CRLF separator; fall back to bare LF.
let idx = buf.indexOf('\r\n\r\n');
let sepLen = 4;
if (idx === -1) {
idx = buf.indexOf('\n\n');
sepLen = 2;
}
if (idx === -1) return null;
const headerText = buf.subarray(0, idx).toString('utf8');
const body = buf.subarray(idx + sepLen);
return { headerText, body };
}
/** A parsed git smart-HTTP request, resolved by the controller/handler. */
export interface GitHttpBackendRequest {
/** The space id (the on-disk vault dir name == GIT_PROJECT_ROOT child). */
spaceId: string;
/** The subpath after `<spaceId>.git/`, e.g. `info/refs` or `git-receive-pack`. */
subpath: string;
/** REQUEST_METHOD — `GET` or `POST`. */
method: string;
/** Raw query string WITHOUT the leading '?', e.g. `service=git-receive-pack`. */
queryString: string;
/** Content-Type header value (may be empty for GET). */
contentType: string;
/** The Git-Protocol request header value, or undefined when absent. */
gitProtocol?: string;
/** Authenticated user email — used as REMOTE_USER (reflog identity). */
remoteUser: string;
}
/**
* Bridges an HTTP git smart-protocol request to `git http-backend` (the CGI that
* implements the entire smart-HTTP protocol: info/refs, upload-pack,
* receive-pack, protocol v2, dumb fallback). We do NOT reimplement pkt-line.
*
* The Fastify reply is hijacked by the caller; this service streams the request
* body to the child's stdin and writes the child's CGI response (status +
* headers parsed from the leading header block, then the raw binary body) to the
* Node response. Errors before any output produce a 500. Credentials are never
* logged.
*/
@Injectable()
export class GitHttpBackendService {
private readonly logger = new Logger(GitHttpBackendService.name);
constructor(private readonly environmentService: EnvironmentService) {}
/**
* Spawn `git http-backend` for one request and bridge it to the raw Node
* request/response. Resolves when the response has been fully written (the
* child exited and its output was flushed), or after a 500 was sent on an
* early failure. Never rejects — push ingestion relies on this resolving so
* the lock-held cycle body can run afterwards.
*/
async run(
parsed: GitHttpBackendRequest,
rawReq: IncomingMessage,
rawRes: ServerResponse,
): Promise<void> {
const projectRoot = this.environmentService.getGitSyncDataDir();
// PATH_INFO is the repo-relative CGI path: /<spaceId>.git/<subpath>.
const pathInfo = `/${parsed.spaceId}.git/${parsed.subpath}`;
// Build the CGI env from the engine's cwd-isolated base (strips GIT_DIR /
// GIT_WORK_TREE), then layer the http-backend CGI variables. GIT_PROTOCOL is
// only set when the client sent the Git-Protocol header. PATH is preserved
// (vaultGitEnv already copies process.env, so PATH carries through).
const cgiEnv: Record<string, string> = {
GIT_PROJECT_ROOT: projectRoot,
GIT_HTTP_EXPORT_ALL: '1', // authz is done by us; no git-daemon-export-ok file
PATH_INFO: pathInfo,
REQUEST_METHOD: parsed.method,
QUERY_STRING: parsed.queryString,
CONTENT_TYPE: parsed.contentType,
REMOTE_USER: parsed.remoteUser,
};
if (parsed.gitProtocol) {
cgiEnv.GIT_PROTOCOL = parsed.gitProtocol;
}
const env = vaultGitEnv(cgiEnv);
return new Promise<void>((resolve) => {
let settled = false;
const done = () => {
if (settled) return;
settled = true;
resolve();
};
let child: ReturnType<typeof spawn>;
try {
child = spawn('git', ['http-backend'], { env });
} catch (err) {
this.send500(rawRes, 'spawn-failed', err);
return done();
}
// Accumulate stdout until we have the full CGI header block, then write the
// parsed status/headers and start streaming the remaining body bytes.
let headerParsed = false;
let pending: Buffer = Buffer.alloc(0);
const flushHeadersAndBody = (chunk: Buffer): void => {
pending = Buffer.concat([pending, chunk]);
const split = splitCgiBuffer(pending);
if (!split) return; // header block not complete yet
headerParsed = true;
const { statusCode, headers } = parseCgiResponse(split.headerText);
rawRes.statusCode = statusCode;
for (const [name, value] of headers) {
rawRes.setHeader(name, value);
}
if (split.body.length > 0) rawRes.write(split.body);
pending = Buffer.alloc(0);
};
child.stdout?.on('data', (chunk: Buffer) => {
if (headerParsed) {
rawRes.write(chunk);
} else {
flushHeadersAndBody(chunk);
}
});
let stderr = '';
child.stderr?.on('data', (chunk: Buffer) => {
// Capture for diagnostics; never echo to the client. http-backend writes
// CGI errors here. We do NOT log the request body or any credentials.
if (stderr.length < 8192) stderr += chunk.toString('utf8');
});
child.on('error', (err) => {
if (!headerParsed && !rawRes.headersSent) {
this.send500(rawRes, 'child-error', err);
} else {
// Output already started — we can only terminate the stream.
try {
rawRes.end();
} catch {
/* ignore */
}
}
done();
});
child.on('close', (code) => {
if (!headerParsed && !rawRes.headersSent) {
// The child exited before emitting a complete CGI header block.
this.logger.error(
`git http-backend produced no valid response (exit ${code}) for ` +
`space; stderr: ${stderr.trim().slice(0, 500)}`,
);
this.send500(rawRes, 'no-output');
} else {
try {
rawRes.end();
} catch {
/* ignore */
}
}
done();
});
// Pipe the request body to the child's stdin. For GET there is no body, so
// end stdin immediately. We pipe `rawReq` (the raw Node stream) directly so
// large pushes are streamed, not buffered.
if (parsed.method === 'POST') {
rawReq.pipe(child.stdin!);
rawReq.on('error', () => {
try {
child.stdin?.end();
} catch {
/* ignore */
}
});
} else {
child.stdin?.end();
}
// Swallow EPIPE etc. on the child's stdin so a client disconnect does not
// crash the process.
child.stdin?.on('error', () => {
/* ignore broken-pipe on stdin */
});
});
}
/** Send a clean 500 without leaking credentials or the request body. */
private send500(rawRes: ServerResponse, reason: string, err?: unknown): void {
const message = err instanceof Error ? err.message : undefined;
this.logger.error(
`git http-backend failed (${reason})${message ? `: ${message}` : ''}`,
);
try {
if (!rawRes.headersSent) {
rawRes.statusCode = 500;
rawRes.setHeader('Content-Type', 'text/plain');
}
rawRes.end('Internal server error');
} catch {
/* ignore */
}
}
}

View File

@@ -0,0 +1,183 @@
// Unit tests for the pure /git smart-HTTP helpers: URL parsing, service->kind
// mapping (read vs write), and the gating/auth decision precedence.
import {
decideGitHttpGate,
parseGitPath,
resolveServiceKind,
} from './git-http.helpers';
describe('parseGitPath', () => {
it('parses spaceId + subpath, stripping the trailing .git', () => {
expect(parseGitPath('abc123.git/info/refs')).toEqual({
spaceId: 'abc123',
subpath: 'info/refs',
});
});
it('tolerates a leading slash', () => {
expect(parseGitPath('/abc.git/git-receive-pack')).toEqual({
spaceId: 'abc',
subpath: 'git-receive-pack',
});
});
it('returns an empty subpath for the bare repo root', () => {
expect(parseGitPath('abc.git')).toEqual({ spaceId: 'abc', subpath: '' });
});
it('returns null when the first segment lacks .git', () => {
expect(parseGitPath('abc/info/refs')).toBeNull();
});
it('returns null on an empty space id', () => {
expect(parseGitPath('.git/info/refs')).toBeNull();
});
it('rejects path traversal', () => {
expect(parseGitPath('abc.git/../../etc/passwd')).toBeNull();
expect(parseGitPath('..git/x')).toBeNull();
});
it('rejects percent-encoded dot/slash traversal in the subpath (case-insensitive)', () => {
expect(parseGitPath('abc.git/%2e%2e%2fetc/passwd')).toBeNull();
expect(parseGitPath('abc.git/%2E%2E/secret')).toBeNull();
expect(parseGitPath('abc.git/objects/%2fabsolute')).toBeNull();
});
});
describe('resolveServiceKind', () => {
it('GET info/refs?service=git-upload-pack -> read', () => {
expect(
resolveServiceKind({
method: 'GET',
subpath: 'info/refs',
service: 'git-upload-pack',
}),
).toBe('read');
});
it('GET info/refs?service=git-receive-pack -> write', () => {
expect(
resolveServiceKind({
method: 'GET',
subpath: 'info/refs',
service: 'git-receive-pack',
}),
).toBe('write');
});
it('POST git-upload-pack -> read', () => {
expect(
resolveServiceKind({ method: 'POST', subpath: 'git-upload-pack' }),
).toBe('read');
});
it('POST git-receive-pack -> write', () => {
expect(
resolveServiceKind({ method: 'POST', subpath: 'git-receive-pack' }),
).toBe('write');
});
it('a dumb-protocol GET (HEAD / objects) -> read', () => {
expect(resolveServiceKind({ method: 'GET', subpath: 'HEAD' })).toBe('read');
expect(
resolveServiceKind({ method: 'GET', subpath: 'objects/12/abcdef' }),
).toBe('read');
});
it('info/refs with no/unknown service -> read (dumb discovery)', () => {
expect(resolveServiceKind({ method: 'GET', subpath: 'info/refs' })).toBe(
'read',
);
});
it('an unknown POST endpoint -> null', () => {
expect(resolveServiceKind({ method: 'POST', subpath: 'whatever' })).toBeNull();
});
it('an unsupported method -> null', () => {
expect(
resolveServiceKind({ method: 'DELETE', subpath: 'git-receive-pack' }),
).toBeNull();
});
});
describe('decideGitHttpGate', () => {
const base = {
hasCredentials: true,
credentialsValid: true,
serviceKind: 'read' as const,
gitSyncEnabled: true,
gitHttpEnabled: true,
spaceExists: true,
spaceGitSyncEnabled: true,
permissionGranted: true,
};
it('proceeds on the happy path', () => {
expect(decideGitHttpGate(base)).toEqual({ kind: 'proceed' });
});
it('401 when credentials are missing (even for a valid space)', () => {
expect(
decideGitHttpGate({ ...base, hasCredentials: false }),
).toEqual({ kind: 'unauthorized' });
});
it('401 when credentials are present but invalid', () => {
expect(
decideGitHttpGate({ ...base, credentialsValid: false }),
).toEqual({ kind: 'unauthorized' });
});
it('400 on an unparseable service kind', () => {
expect(decideGitHttpGate({ ...base, serviceKind: null })).toEqual({
kind: 'bad-request',
});
});
it('404 when the space is not git-sync-enabled (never reveals existence)', () => {
expect(
decideGitHttpGate({ ...base, spaceGitSyncEnabled: false }),
).toEqual({ kind: 'not-found' });
});
it('404 when the space does not exist', () => {
expect(decideGitHttpGate({ ...base, spaceExists: false })).toEqual({
kind: 'not-found',
});
});
it('404 when git-sync is globally disabled', () => {
expect(decideGitHttpGate({ ...base, gitSyncEnabled: false })).toEqual({
kind: 'not-found',
});
});
it('404 when the git-http host is disabled', () => {
expect(decideGitHttpGate({ ...base, gitHttpEnabled: false })).toEqual({
kind: 'not-found',
});
});
it('403 when authenticated but lacking the required permission (reader on write)', () => {
expect(
decideGitHttpGate({
...base,
serviceKind: 'write',
permissionGranted: false,
}),
).toEqual({ kind: 'forbidden' });
});
it('still 401 (not 404) for missing creds against a disabled space', () => {
// Anonymous probe must always get 401 first, regardless of space state.
expect(
decideGitHttpGate({
...base,
hasCredentials: false,
spaceGitSyncEnabled: false,
}),
).toEqual({ kind: 'unauthorized' });
});
});

View File

@@ -0,0 +1,147 @@
// Pure, framework-free helpers for the /git smart-HTTP host. They carry no Nest
// / DI / concrete-service imports so the request parsing and the auth/authz
// gating DECISION can be unit-tested in isolation, and nothing here ever logs a
// password or the Authorization header.
/** The git operation a request maps to: a read (fetch/clone) or a write (push). */
export type GitHttpServiceKind = 'read' | 'write';
/** A parsed `/git/<spaceId>.git/<subpath>` URL. */
export interface ParsedGitPath {
spaceId: string;
/** The subpath after `<spaceId>.git/` (no leading slash), e.g. `info/refs`. */
subpath: string;
}
/**
* Parse the `<rest>` of a `/git/<rest>` URL path (no query string) into the
* space id and the repo-relative subpath. The space id is the first path
* segment with its trailing `.git` stripped. Returns null when the shape does
* not match (missing `.git`, empty space id, traversal attempt).
*
* `rest` MUST already be URL-path-decoded of its query string by the caller
* (pass the pathname only). We reject `..` segments defensively even though
* http-backend resolves PATH_INFO against GIT_PROJECT_ROOT.
*/
export function parseGitPath(rest: string): ParsedGitPath | null {
// Strip a leading slash, then take the first segment as `<spaceId>.git`.
const clean = rest.replace(/^\/+/, '');
const slash = clean.indexOf('/');
const first = slash === -1 ? clean : clean.slice(0, slash);
const subpath = slash === -1 ? '' : clean.slice(slash + 1);
if (!first.endsWith('.git')) return null;
const spaceId = first.slice(0, -'.git'.length);
if (!spaceId) return null;
// Reject path traversal / degenerate ids in either component.
if (
spaceId === '.' ||
spaceId.includes('..') ||
spaceId.includes('/') ||
subpath.split('/').some((seg) => seg === '..')
) {
return null;
}
// Defense-in-depth: reject percent-encoded dot/slash traversal (`%2e`, `%2f`,
// case-insensitive) in the subpath BEFORE it is used to build PATH_INFO — a
// decoder downstream could otherwise turn `%2e%2e%2f` back into `../`.
if (/%2e|%2f/i.test(subpath)) {
return null;
}
return { spaceId, subpath };
}
/**
* Map a parsed git request (method + subpath + query) to the required operation
* kind. The smart-HTTP shapes:
* - GET info/refs?service=git-upload-pack -> read (fetch)
* - GET info/refs?service=git-receive-pack -> write (push)
* - POST git-upload-pack -> read (fetch)
* - POST git-receive-pack -> write (push)
* - any other dumb-protocol GET (HEAD, objects/…) -> read
* Returns null for an unsupported shape (e.g. a POST that is neither pack
* endpoint) so the caller can 403/404 rather than guess.
*/
export function resolveServiceKind(input: {
method: string;
subpath: string;
service?: string;
}): GitHttpServiceKind | null {
const method = input.method.toUpperCase();
const subpath = input.subpath;
if (method === 'GET') {
if (subpath === 'info/refs') {
if (input.service === 'git-receive-pack') return 'write';
if (input.service === 'git-upload-pack') return 'read';
// info/refs without a known service: dumb-protocol discovery — read.
return 'read';
}
// Dumb-protocol object/ref fetches (HEAD, objects/…) are reads.
return 'read';
}
if (method === 'POST') {
if (subpath === 'git-receive-pack') return 'write';
if (subpath === 'git-upload-pack') return 'read';
return null; // unknown POST endpoint
}
return null; // unsupported method
}
/** The outcome of the gating/auth decision the request handler must enforce. */
export type GitHttpGateDecision =
| { kind: 'unauthorized' } // 401 + WWW-Authenticate (missing/invalid creds)
| { kind: 'not-found' } // 404 (space hidden / sync or http disabled)
| { kind: 'forbidden' } // 403 (authenticated but lacks the permission)
| { kind: 'bad-request' } // 400 (unparseable git request shape)
| { kind: 'proceed' }; // run http-backend
/**
* Pure gating decision, mirroring the handler precedence so it can be unit
* tested without the DB / CASL graph. Inputs are the already-resolved booleans
* the handler computes from EnvironmentService / SpaceRepo / SpaceAbilityFactory.
*
* Precedence (matches the spec):
* 1. no/invalid Basic credentials -> 401 (regardless of space).
* 2. credentials present but invalid -> 401.
* 3. unparseable git request shape -> 400.
* 4. git-sync globally disabled, or git-http disabled, or the space is missing
* / not git-sync-enabled -> 404 (never reveal existence).
* 5. authenticated but lacking the required perm -> 403.
* 6. otherwise -> proceed.
*
* Note (4) is checked AFTER (1)/(2): an anonymous probe always gets 401 first;
* an authenticated user hitting a hidden/disabled space gets 404 (not 403).
*/
export function decideGitHttpGate(input: {
hasCredentials: boolean;
credentialsValid: boolean;
serviceKind: GitHttpServiceKind | null;
gitSyncEnabled: boolean;
gitHttpEnabled: boolean;
spaceExists: boolean;
spaceGitSyncEnabled: boolean;
permissionGranted: boolean;
}): GitHttpGateDecision {
if (!input.hasCredentials) return { kind: 'unauthorized' };
if (!input.credentialsValid) return { kind: 'unauthorized' };
if (input.serviceKind === null) return { kind: 'bad-request' };
if (
!input.gitSyncEnabled ||
!input.gitHttpEnabled ||
!input.spaceExists ||
!input.spaceGitSyncEnabled
) {
return { kind: 'not-found' };
}
if (!input.permissionGranted) return { kind: 'forbidden' };
return { kind: 'proceed' };
}

View File

@@ -0,0 +1,376 @@
// Unit tests for GitHttpService — the /git smart-HTTP handler. Everything it
// depends on (backend, auth, repos, ability factory, env, orchestrator) is
// mocked so we exercise ONLY the handler wiring: workspace resolution (which is
// done HERE, not by DomainMiddleware — see FIX 1), the auth/gating precedence,
// the read-vs-write dispatch, and that a fetch does NOT take the lock.
//
// These tests deliberately NEVER set `req.raw.workspaceId`: the workspace must
// come from WorkspaceRepo. If the handler regressed to reading
// `req.raw.workspaceId`, the happy-path fetch test below would fail (the repo
// would not be consulted and the request would 401).
import { Logger, UnauthorizedException } from '@nestjs/common';
import {
SpaceCaslAction,
SpaceCaslSubject,
} from '../../../core/casl/interfaces/space-ability.type';
import { GitHttpService } from './git-http.service';
type AnyMock = jest.Mock;
interface BuildOptions {
selfHosted?: boolean;
gitSyncEnabled?: boolean;
gitHttpEnabled?: boolean;
/** What workspaceRepo.findFirst() returns (self-hosted resolution). */
workspace?: { id: string } | null;
/** What spaceRepo.findById() returns. */
space?: { id: string; settings?: unknown } | null;
/** Result of authService.verifyUserCredentials: a user, or throw 401. */
user?: { id: string; email: string } | null;
/** Whether the created ability grants the requested action. */
abilityCan?: boolean;
}
interface Built {
service: GitHttpService;
env: Record<string, AnyMock>;
authService: { verifyUserCredentials: AnyMock };
spaceRepo: { findById: AnyMock };
workspaceRepo: { findFirst: AnyMock; findByHostname: AnyMock };
abilityFactory: { createForUser: AnyMock };
abilityCan: AnyMock;
vaultRegistry: { ensureServable: AnyMock };
orchestrator: { ingestExternalPush: AnyMock };
backend: { run: AnyMock };
}
function build(opts: BuildOptions = {}): Built {
const {
selfHosted = true,
gitSyncEnabled = true,
gitHttpEnabled = true,
workspace = { id: 'ws-1' },
space = { id: 'space-1', settings: { gitSync: { enabled: true } } },
user = { id: 'user-1', email: 'dev@example.com' },
abilityCan = true,
} = opts;
const env: Record<string, AnyMock> = {
isSelfHosted: jest.fn(() => selfHosted),
isCloud: jest.fn(() => !selfHosted),
isGitSyncEnabled: jest.fn(() => gitSyncEnabled),
isGitSyncHttpEnabled: jest.fn(() => gitHttpEnabled),
};
const authService = {
verifyUserCredentials: jest.fn(async () => {
if (!user) throw new UnauthorizedException();
return user;
}),
};
const spaceRepo = { findById: jest.fn(async () => space) };
const workspaceRepo = {
findFirst: jest.fn(async () => workspace),
findByHostname: jest.fn(async () => workspace),
};
const abilityCanMock = jest.fn(() => abilityCan);
const abilityFactory = {
createForUser: jest.fn(async () => ({ can: abilityCanMock })),
};
const vaultRegistry = { ensureServable: jest.fn(async () => undefined) };
const orchestrator = { ingestExternalPush: jest.fn(async () => undefined) };
const backend = { run: jest.fn(async () => undefined) };
const service = new GitHttpService(
env as any,
authService as any,
spaceRepo as any,
workspaceRepo as any,
abilityFactory as any,
vaultRegistry as any,
orchestrator as any,
backend as any,
);
return {
service,
env,
authService,
spaceRepo,
workspaceRepo,
abilityFactory,
abilityCan: abilityCanMock,
vaultRegistry,
orchestrator,
backend,
};
}
/** A fake Fastify reply capturing the terminal status/headers/body. */
function fakeReply() {
const state: {
statusCode?: number;
headers: Record<string, string>;
body?: unknown;
hijacked: boolean;
sent: boolean;
} = { headers: {}, hijacked: false, sent: false };
const reply: any = {
header(name: string, value: string) {
state.headers[name] = value;
return reply;
},
status(code: number) {
state.statusCode = code;
return reply;
},
send(body: unknown) {
state.body = body;
state.sent = true;
return reply;
},
hijack() {
state.hijacked = true;
},
get sent() {
return state.sent;
},
// The raw Node response — only touched on the streaming/error paths.
raw: {
headersSent: false,
writableEnded: false,
statusCode: 200,
setHeader: jest.fn(),
end: jest.fn(),
},
};
return { reply, state };
}
/** A fake Fastify request for a /git smart-HTTP call. */
function fakeRequest(opts: {
url: string;
method?: string;
authorization?: string;
host?: string;
}) {
const { url, method = 'GET', authorization, host = 'docs.example.com' } = opts;
const headers: Record<string, string> = { host };
if (authorization) headers['authorization'] = authorization;
// query is parsed by Fastify; mirror the `service` param when present.
const qIdx = url.indexOf('?');
const query: Record<string, string> = {};
if (qIdx !== -1) {
for (const pair of url.slice(qIdx + 1).split('&')) {
const [k, v] = pair.split('=');
if (k) query[k] = v ?? '';
}
}
return {
url,
method,
headers,
query,
// raw is intentionally WITHOUT workspaceId — the handler must resolve it
// itself via WorkspaceRepo (a regression to req.raw.workspaceId would 401).
raw: {},
} as any;
}
function basic(email: string, password: string): string {
return 'Basic ' + Buffer.from(`${email}:${password}`).toString('base64');
}
beforeEach(() => {
jest.clearAllMocks();
// Silence the handler's logger.warn/error in negative-path tests.
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
});
describe('GitHttpService.handle', () => {
it('fetch with valid creds resolves the workspace via the repo and dispatches WITHOUT the lock', async () => {
const built = build({ selfHosted: true });
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
authorization: basic('dev@example.com', 'pw'),
});
await built.service.handle(req, reply);
// The workspace came from WorkspaceRepo, NOT req.raw.workspaceId.
expect(built.workspaceRepo.findFirst).toHaveBeenCalledTimes(1);
expect(built.authService.verifyUserCredentials).toHaveBeenCalledWith(
{ email: 'dev@example.com', password: 'pw' },
'ws-1',
);
expect(built.spaceRepo.findById).toHaveBeenCalledWith('space-1', 'ws-1');
// Read ability was evaluated.
expect(built.abilityCan).toHaveBeenCalledWith(
SpaceCaslAction.Read,
SpaceCaslSubject.Page,
);
// It proceeded: vault prepared, reply hijacked, backend ran directly.
expect(built.vaultRegistry.ensureServable).toHaveBeenCalledWith('space-1');
expect(state.hijacked).toBe(true);
expect(built.backend.run).toHaveBeenCalledTimes(1);
// A fetch must NOT take the push lock.
expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled();
});
it('cloud deployment resolves the workspace by the host subdomain', async () => {
const built = build({ selfHosted: false });
const { reply } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
authorization: basic('dev@example.com', 'pw'),
host: 'acme.example.com',
});
await built.service.handle(req, reply);
expect(built.workspaceRepo.findByHostname).toHaveBeenCalledWith('acme');
expect(built.workspaceRepo.findFirst).not.toHaveBeenCalled();
expect(built.backend.run).toHaveBeenCalledTimes(1);
});
it('missing Basic credentials -> 401 with WWW-Authenticate', async () => {
const built = build();
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
// no Authorization header
});
await built.service.handle(req, reply);
expect(state.statusCode).toBe(401);
expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"');
expect(built.backend.run).not.toHaveBeenCalled();
expect(built.authService.verifyUserCredentials).not.toHaveBeenCalled();
});
it('invalid Basic credentials -> 401 with WWW-Authenticate', async () => {
const built = build({ user: null }); // verifyUserCredentials throws 401
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
authorization: basic('dev@example.com', 'wrong'),
});
await built.service.handle(req, reply);
expect(state.statusCode).toBe(401);
expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"');
expect(built.backend.run).not.toHaveBeenCalled();
});
it('a write by a Read-only user -> 403 (reader cannot push)', async () => {
const built = build({ abilityCan: false });
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/git-receive-pack',
method: 'POST',
authorization: basic('dev@example.com', 'pw'),
});
await built.service.handle(req, reply);
// The Manage ability was checked for a write and denied.
expect(built.abilityCan).toHaveBeenCalledWith(
SpaceCaslAction.Manage,
SpaceCaslSubject.Page,
);
expect(state.statusCode).toBe(403);
expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled();
expect(built.backend.run).not.toHaveBeenCalled();
});
it('a space that is not git-sync-enabled -> 404 (existence never revealed)', async () => {
const built = build({
space: { id: 'space-1', settings: { gitSync: { enabled: false } } },
});
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
authorization: basic('dev@example.com', 'pw'),
});
await built.service.handle(req, reply);
expect(state.statusCode).toBe(404);
// CASL is never even evaluated for a non-candidate space.
expect(built.abilityFactory.createForUser).not.toHaveBeenCalled();
expect(built.backend.run).not.toHaveBeenCalled();
});
it('git-sync globally disabled -> 404 even with valid creds', async () => {
const built = build({ gitSyncEnabled: false });
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
authorization: basic('dev@example.com', 'pw'),
});
await built.service.handle(req, reply);
expect(state.statusCode).toBe(404);
expect(built.backend.run).not.toHaveBeenCalled();
});
it('a valid write proceeds through the orchestrator (push takes the lock)', async () => {
const built = build({ abilityCan: true });
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/git-receive-pack',
method: 'POST',
authorization: basic('dev@example.com', 'pw'),
});
await built.service.handle(req, reply);
expect(built.abilityCan).toHaveBeenCalledWith(
SpaceCaslAction.Manage,
SpaceCaslSubject.Page,
);
expect(state.hijacked).toBe(true);
expect(built.orchestrator.ingestExternalPush).toHaveBeenCalledTimes(1);
const [spaceId, workspaceId] =
built.orchestrator.ingestExternalPush.mock.calls[0];
expect(spaceId).toBe('space-1');
expect(workspaceId).toBe('ws-1');
});
it('an unresolvable workspace -> 401 (credentials cannot be validated without one)', async () => {
const built = build({ workspace: null });
const { reply, state } = fakeReply();
const req = fakeRequest({
url: '/git/space-1.git/info/refs?service=git-upload-pack',
method: 'GET',
authorization: basic('dev@example.com', 'pw'),
});
await built.service.handle(req, reply);
// Without a workspace we cannot run verifyUserCredentials, so credentials
// are not validated -> 401 (the 401-before-404 ordering is preserved: an
// unauthenticated request never reaches the space-existence 404).
expect(built.workspaceRepo.findFirst).toHaveBeenCalledTimes(1);
expect(built.authService.verifyUserCredentials).not.toHaveBeenCalled();
expect(state.statusCode).toBe(401);
expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"');
expect(built.backend.run).not.toHaveBeenCalled();
});
});

View File

@@ -0,0 +1,319 @@
import { Injectable, Logger, UnauthorizedException } from '@nestjs/common';
import type { FastifyReply, FastifyRequest } from 'fastify';
import { AuthService } from '../../../core/auth/services/auth.service';
import SpaceAbilityFactory from '../../../core/casl/abilities/space-ability.factory';
import {
SpaceCaslAction,
SpaceCaslSubject,
} from '../../../core/casl/interfaces/space-ability.type';
import { SpaceRepo } from '@docmost/db/repos/space/space.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { User } from '@docmost/db/types/entity.types';
import { parseBasicAuth } from '../../mcp/mcp-auth.helpers';
import { EnvironmentService } from '../../environment/environment.service';
import { VaultRegistryService } from '../services/vault-registry.service';
import {
GitSyncLockHeldError,
GitSyncOrchestrator,
} from '../services/git-sync.orchestrator';
import { GitHttpBackendService } from './git-http-backend.service';
import {
decideGitHttpGate,
parseGitPath,
resolveServiceKind,
GitHttpServiceKind,
} from './git-http.helpers';
const WWW_AUTHENTICATE = 'Basic realm="gitmost"';
/**
* The /git smart-HTTP host. Wires request parsing, the reused auth primitives
* (HTTP Basic -> AuthService.verifyUserCredentials), per-space gating
* (EnvironmentService flags + space.settings.gitSync.enabled), CASL authz
* (SpaceAbilityFactory), and dispatch to `git http-backend`:
* - fetch (read) -> ensureServable then stream http-backend directly (no lock).
* - push (write) -> ensureServable then orchestrator.ingestExternalPush, which
* runs the receive-pack under the space lock and then a Docmost cycle.
*
* Mounted at the ROOT (`/git/...`) by a raw Fastify route in main.ts (the global
* `/api` prefix does not apply). Never logs the password or Authorization header.
*/
@Injectable()
export class GitHttpService {
private readonly logger = new Logger(GitHttpService.name);
constructor(
private readonly environmentService: EnvironmentService,
private readonly authService: AuthService,
private readonly spaceRepo: SpaceRepo,
private readonly workspaceRepo: WorkspaceRepo,
private readonly spaceAbilityFactory: SpaceAbilityFactory,
private readonly vaultRegistry: VaultRegistryService,
private readonly orchestrator: GitSyncOrchestrator,
private readonly backend: GitHttpBackendService,
) {}
/**
* Resolve the workspace for a /git request the SAME way DomainMiddleware does,
* because Nest middleware does NOT run for this raw root-mounted route (it is
* registered under the global '/api' router), so `req.raw.workspaceId` is never
* populated here. We replicate DomainMiddleware / McpService:
* - self-hosted (single workspace) -> workspaceRepo.findFirst();
* - cloud (multi-tenant) -> resolve by the host-header subdomain.
* Returns null when no workspace resolves; the gate then 404s (after the
* 401-before-404 credential check encoded in decideGitHttpGate).
*/
private async resolveWorkspaceId(req: FastifyRequest): Promise<string | null> {
try {
if (this.environmentService.isSelfHosted()) {
const workspace = await this.workspaceRepo.findFirst();
return workspace?.id ?? null;
}
if (this.environmentService.isCloud()) {
const host = this.headerValue(req.headers['host']);
const subdomain = host ? host.split('.')[0] : '';
if (!subdomain) return null;
const workspace = await this.workspaceRepo.findByHostname(subdomain);
return workspace?.id ?? null;
}
} catch (err) {
// A DB error resolving the workspace must not leak details; treat as
// unresolvable (the gate will 404, unless creds are missing -> 401 first).
this.logger.warn(
`git-http: workspace resolution error: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
return null;
}
/**
* Handle one `/git/<spaceId>.git/<subpath>` request. `rest` is the path AFTER
* the `/git/` prefix (no query string). The Fastify reply is hijacked before
* any streaming so the binary CGI body is written directly to the raw socket.
*/
async handle(req: FastifyRequest, reply: FastifyReply): Promise<void> {
const rawReq = req.raw;
const rawRes = reply.raw;
// --- parse the URL into spaceId + subpath -------------------------------
const rest = this.extractRest(req.url);
const parsedPath = rest === null ? null : parseGitPath(rest);
// --- resolve the requested git service kind (read vs write) -------------
const service =
typeof req.query === 'object' && req.query !== null
? (req.query as Record<string, string | undefined>).service
: undefined;
const serviceKind: GitHttpServiceKind | null = parsedPath
? resolveServiceKind({
method: req.method,
subpath: parsedPath.subpath,
service,
})
: null;
// --- authenticate (HTTP Basic) ------------------------------------------
const authHeader = req.headers['authorization'];
const basic = parseBasicAuth(
Array.isArray(authHeader) ? authHeader[0] : authHeader,
);
// Resolve the workspace ourselves — DomainMiddleware does NOT run for this
// raw root route, so `req.raw.workspaceId` is never set (see resolver doc).
const workspaceId: string | null = await this.resolveWorkspaceId(req);
let user: User | undefined;
let credentialsValid = false;
if (basic && workspaceId) {
try {
user = await this.authService.verifyUserCredentials(
{ email: basic.email, password: basic.password },
workspaceId,
);
credentialsValid = true;
} catch (err) {
if (!(err instanceof UnauthorizedException)) {
// A non-credential failure (e.g. DB error): treat as invalid creds for
// the gate (a 401), and log without leaking the password/header.
this.logger.warn(
`git-http: credential check error: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
credentialsValid = false;
}
}
// --- resolve the space + per-space gating + CASL ------------------------
let spaceExists = false;
let spaceGitSyncEnabled = false;
let spaceId: string | undefined;
let permissionGranted = false;
if (credentialsValid && user && workspaceId && parsedPath && serviceKind) {
const space = await this.spaceRepo.findById(
parsedPath.spaceId,
workspaceId,
);
if (space) {
spaceExists = true;
spaceId = space.id;
spaceGitSyncEnabled =
(space.settings as any)?.gitSync?.enabled === true;
// Only evaluate CASL when the space is actually a sync candidate — an
// unrelated space stays a 404 (existence is never revealed).
if (spaceGitSyncEnabled) {
try {
const ability = await this.spaceAbilityFactory.createForUser(
user,
space.id,
);
const action =
serviceKind === 'write'
? SpaceCaslAction.Manage
: SpaceCaslAction.Read;
permissionGranted = ability.can(action, SpaceCaslSubject.Page);
} catch {
// createForUser throws NotFoundException when the user has no role in
// the space — that is simply "no permission" here.
permissionGranted = false;
}
}
}
}
// --- the gate decision (pure) -------------------------------------------
const decision = decideGitHttpGate({
hasCredentials: Boolean(basic),
credentialsValid,
serviceKind,
gitSyncEnabled: this.environmentService.isGitSyncEnabled(),
gitHttpEnabled: this.environmentService.isGitSyncHttpEnabled(),
spaceExists,
spaceGitSyncEnabled,
permissionGranted,
});
if (decision.kind === 'unauthorized') {
reply
.header('WWW-Authenticate', WWW_AUTHENTICATE)
.status(401)
.send('Authentication required');
return;
}
if (decision.kind === 'bad-request') {
reply.status(400).send('Bad request');
return;
}
if (decision.kind === 'not-found') {
reply.status(404).send('Not found');
return;
}
if (decision.kind === 'forbidden') {
reply.status(403).send('Forbidden');
return;
}
// decision.kind === 'proceed' — guaranteed below (narrowing for TS).
if (!parsedPath || !serviceKind || !spaceId || !user || !workspaceId) {
// Defensive: 'proceed' implies these are set, but keep TS + runtime safe.
reply.status(500).send('Internal server error');
return;
}
// --- dispatch to git http-backend ---------------------------------------
const backendRequest = {
spaceId,
subpath: parsedPath.subpath,
method: req.method,
queryString: this.extractQueryString(req.url),
contentType: this.headerValue(req.headers['content-type']) ?? '',
gitProtocol: this.headerValue(req.headers['git-protocol']),
remoteUser: user.email,
};
try {
// Idempotently make the vault servable (repo + receive/upload config).
await this.vaultRegistry.ensureServable(spaceId);
} catch (err) {
this.logger.error(
`git-http: failed to prepare vault for space ${spaceId}: ${
err instanceof Error ? err.message : String(err)
}`,
);
if (!reply.sent) reply.status(500).send('Internal server error');
return;
}
// Hijack the reply so the backend can stream the raw (possibly binary) CGI
// response directly to the socket (mirrors the MCP transport pattern).
reply.hijack();
if (serviceKind === 'read') {
// Fetch/clone: stream http-backend directly, no lock (read-only).
await this.backend.run(backendRequest, rawReq, rawRes);
return;
}
// Push: run the receive-pack under the space lock, then a Docmost cycle.
try {
await this.orchestrator.ingestExternalPush(spaceId, workspaceId, () =>
this.backend.run(backendRequest, rawReq, rawRes),
);
} catch (err) {
if (err instanceof GitSyncLockHeldError) {
// The lock could not be acquired and the receive-pack never ran, so the
// response is still unwritten — answer 503 so git retries.
if (!rawRes.headersSent) {
rawRes.statusCode = 503;
rawRes.setHeader('Content-Type', 'text/plain');
rawRes.setHeader('Retry-After', '1');
}
try {
rawRes.end('git-sync busy, retry');
} catch {
/* ignore */
}
return;
}
// Any other error: the receive-pack closure handles its own response, so
// we only log here and make sure the socket is closed.
this.logger.error(
`git-http: push ingestion error for space ${spaceId}: ${
err instanceof Error ? err.message : String(err)
}`,
);
try {
if (!rawRes.writableEnded) rawRes.end();
} catch {
/* ignore */
}
}
}
/** Normalise a possibly-array header value to its first string. */
private headerValue(value: string | string[] | undefined): string | undefined {
if (Array.isArray(value)) return value[0];
return value;
}
/**
* Extract the part of the URL AFTER `/git/` and BEFORE the query string.
* Returns null when the URL is not under `/git/`.
*/
private extractRest(url: string): string | null {
const qIdx = url.indexOf('?');
const pathname = qIdx === -1 ? url : url.slice(0, qIdx);
const prefix = '/git/';
if (!pathname.startsWith(prefix)) return null;
return pathname.slice(prefix.length);
}
/** The raw query string without the leading '?', or '' when none. */
private extractQueryString(url: string): string {
const qIdx = url.indexOf('?');
return qIdx === -1 ? '' : url.slice(qIdx + 1);
}
}

View File

@@ -34,6 +34,19 @@ interface EnabledSpace {
workspaceId: string;
}
/**
* Thrown by `ingestExternalPush` when the per-space lock cannot be acquired (a
* poll cycle is mid-flight on this or another replica). The /git HTTP handler
* maps it to a 503 so the git client retries rather than racing a cycle's
* working-tree checkout/merge.
*/
export class GitSyncLockHeldError extends Error {
constructor(public readonly spaceId: string) {
super(`git-sync: space ${spaceId} is busy (lock held); retry the push`);
this.name = 'GitSyncLockHeldError';
}
}
/** Small status summary returned by `runOnce` (for the admin trigger + logs). */
export interface GitSyncRunStatus {
spaceId: string;
@@ -123,6 +136,35 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
}
}
/**
* CAS-guarded TTL refresh: extend the lock's TTL ONLY while WE still own it
* (the stored value matches our instanceId) — never extend another replica's
* lock that took over after our TTL expired. Used by the heartbeat in
* `withSpaceLock` so a long-running push (client-controlled receive-pack + the
* Docmost cycle) cannot outlive the lock and let a concurrent cycle race the
* working tree. Logs (warn) but never throws — a failed refresh must not break
* the cycle it is protecting.
*/
private async refreshLock(spaceId: string): Promise<void> {
const lua =
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("pexpire", KEYS[1], ARGV[2]) else return 0 end';
try {
await this.redis.eval(
lua,
1,
GIT_SYNC_LOCK_PREFIX + spaceId,
this.instanceId,
String(GIT_SYNC_LOCK_TTL_MS),
);
} catch (err) {
this.logger.warn(
`git-sync: failed to refresh lock for space ${spaceId}: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
}
// --- enabled-space enumeration (plan §10) --------------------------------
/**
@@ -188,30 +230,136 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
return { spaceId, ran: false, skipped: 'no-service-user' };
}
// In-process mutex: never run two overlapping cycles for the same space on
// this instance (the Redis lock guards cross-instance, this guards in-proc).
if (this.running.has(spaceId)) {
return { spaceId, ran: false, skipped: 'in-progress' };
}
// Redis leader lock: only the holder runs the cycle (plan §9).
if (!(await this.acquire(spaceId))) {
return { spaceId, ran: false, skipped: 'lock-held' };
}
this.running.add(spaceId);
// Run the full cycle under the per-space lock. withSpaceLock owns the
// in-process mutex (no overlapping cycles on this instance) AND the Redis
// leader lock (single writer across replicas), and returns a skip sentinel
// when it could not enter — surfaced here as the existing skipped:'in-progress'
// / 'lock-held' status so runOnce's observable behavior is unchanged.
try {
return await this.driveCycle(spaceId, workspaceId, serviceUserId);
const result = await this.withSpaceLock(spaceId, () =>
this.driveCycle(spaceId, workspaceId, serviceUserId),
);
if ('skipped' in result && !('spaceId' in result)) {
return { spaceId, ran: false, skipped: result.skipped };
}
return result;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
this.logger.error(`git-sync: cycle failed for space ${spaceId}: ${message}`);
return { spaceId, ran: false, error: message };
}
}
/**
* Run `fn` under the per-space lock: the in-process mutex (no overlapping
* cycles on this instance) AND the Redis leader lock (single writer across
* replicas). Returns `fn`'s result, or a skip sentinel when the lock could not
* be acquired — `{ skipped: 'in-progress' }` (this instance is mid-cycle) or
* `{ skipped: 'lock-held' }` (another replica holds the Redis lock). The mutex
* + Redis lock are always released in a `finally`, even when `fn` throws (the
* throw propagates to the caller). This is the single reusable wrapper shared
* by `runOnce` (the poll/admin cycle) and `ingestExternalPush` (a push from a
* git client over HTTP) so both serialize against each other identically.
*/
async withSpaceLock<T>(
spaceId: string,
fn: () => Promise<T>,
): Promise<T | { skipped: 'lock-held' | 'in-progress' }> {
if (this.running.has(spaceId)) {
return { skipped: 'in-progress' };
}
if (!(await this.acquire(spaceId))) {
return { skipped: 'lock-held' };
}
this.running.add(spaceId);
// Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so
// a long push (client-controlled receive-pack + the Docmost cycle) cannot
// outlive the fixed TTL and let a concurrent cycle race the working tree. The
// refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the
// timer from holding the event loop open; it is ALWAYS cleared in `finally`.
const heartbeat = setInterval(() => {
void this.refreshLock(spaceId);
}, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3)));
heartbeat.unref?.();
try {
return await fn();
} finally {
clearInterval(heartbeat);
this.running.delete(spaceId);
await this.release(spaceId);
}
}
/**
* Ingest a push that arrived over smart-HTTP (the /git host). Under the SAME
* per-space lock the poll cycle uses, it:
* 1. runs `runReceivePack()` — the closure that spawns `git http-backend` for
* the receive-pack request and finishes streaming the HTTP response to the
* client. The client's push result is determined here.
* 2. THEN — still holding the lock — runs the full Docmost cycle (the same
* `driveCycle` body `runOnce` uses) so the freshly received commits on
* `main` flow back into Docmost pages.
*
* If the cycle body in step 2 throws, it is LOGGED but NOT rethrown: the push
* already succeeded and the commits are durable on `main`, so the poll-interval
* backstop will reconcile them on the next tick. The receive-pack itself is the
* load-bearing step.
*
* Lock contention: if the lock cannot be acquired (a poll cycle is mid-flight),
* this throws a `GitSyncLockHeldError`. The HTTP handler converts that to a 503
* so git surfaces a retryable error to the user (chosen over blocking the
* request behind a potentially long cycle). The receive-pack is NOT run when
* the lock is held — we never write to the working tree concurrently with a
* cycle.
*/
async ingestExternalPush(
spaceId: string,
workspaceId: string,
runReceivePack: () => Promise<void>,
): Promise<void> {
if (!this.environmentService.isGitSyncEnabled()) {
// The HTTP gate already checks this, but be defensive: never run a cycle
// when sync is globally off.
throw new GitSyncLockHeldError(spaceId);
}
const serviceUserId = this.environmentService.getGitSyncServiceUserId();
const result = await this.withSpaceLock(spaceId, async () => {
// 1) Stream the receive-pack to the client (durable commits land on main).
await runReceivePack();
// 2) Reconcile the new commits into Docmost. A service user is required to
// attribute the writes; without one we cannot run the cycle — the commits
// are still durable and the poll backstop will pick them up once configured.
if (!serviceUserId) {
this.logger.error(
'git-sync: GIT_SYNC_SERVICE_USER_ID is required to ingest an external ' +
'push — the push is durable on main; skipping the immediate cycle.',
);
return;
}
try {
await this.driveCycle(spaceId, workspaceId, serviceUserId);
} catch (err) {
// Do NOT rethrow: the push succeeded and the commits are durable on main;
// the poll-interval backstop retries the cycle. Log for visibility.
this.logger.error(
`git-sync: post-push cycle failed for space ${spaceId} (push is ` +
`durable; poll will retry): ${
err instanceof Error ? err.message : String(err)
}`,
);
}
return;
});
// The lock was held (in-progress or another replica) — surface to the caller
// so the HTTP handler can answer 503 and let git retry.
if (typeof result === 'object' && result !== null && 'skipped' in result) {
throw new GitSyncLockHeldError(spaceId);
}
}
/**
* The actual engine wiring (plan §11). Mirrors the engine's own `main`:
* PULL — readExisting -> computePullActions -> applyPullActions,

View File

@@ -1,8 +1,12 @@
import { Injectable, Logger } from '@nestjs/common';
import { mkdir } from 'node:fs/promises';
import { VaultGit } from '@docmost/git-sync';
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { VaultGit, vaultGitEnv } from '@docmost/git-sync';
import { EnvironmentService } from '../../environment/environment.service';
const execFileAsync = promisify(execFile);
/**
* Resolves the on-disk vault location per space and owns the (lazily created,
* cached) `VaultGit` instance for each one (plan §3/§5).
@@ -41,4 +45,49 @@ export class VaultRegistryService {
this.vaults.set(spaceId, vault);
return vault;
}
/**
* Make a space's vault repo servable over smart-HTTP (the /git host). Ensures
* the repo exists (engine `ensureRepo`: `git init -b main` + initial commit +
* branches; idempotent), then sets the LOCAL git config a `git http-backend`
* push needs:
*
* - receive.denyCurrentBranch=updateInstead — a push to the checked-out
* `main` updates the working tree too (the engine's human-facing branch).
* Requires a clean tree, which is guaranteed between cycles / under the
* orchestrator lock that wraps an external push.
* - receive.denyNonFastForwards=true — block force-push so a client cannot
* rewrite the engine's history on `main`.
* - http.receivepack=true / http.uploadpack=true — explicitly allow the
* receive/upload services over HTTP.
*
* All four are set idempotently (plain `git config` overwrites the local
* value). Returns the absolute vault path. Idempotent and safe to call before
* every request.
*/
async ensureServable(spaceId: string): Promise<string> {
const vault = await this.getVault(spaceId);
const path = this.vaultPath(spaceId);
// ensureRepo also verifies git is available on its first git call; it does
// `git init -b main` + an initial commit + the engine branches. Idempotent.
await vault.ensureRepo();
const configs: Array<[string, string]> = [
['receive.denyCurrentBranch', 'updateInstead'],
['receive.denyNonFastForwards', 'true'],
['http.receivepack', 'true'],
['http.uploadpack', 'true'],
];
for (const [key, value] of configs) {
await execFileAsync('git', ['config', key, value], {
cwd: path,
// Use the engine's cwd-isolated env (strips GIT_DIR / GIT_WORK_TREE) so
// the config is written to THIS vault's local config, nothing else.
env: vaultGitEnv(),
});
}
return path;
}
}

View File

@@ -15,6 +15,7 @@ import { InternalLogFilter } from './common/logger/internal-log-filter';
import { EnvironmentService } from './integrations/environment/environment.service';
import { resolveFrameHeader } from './common/helpers';
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
import { GitHttpService } from './integrations/git-sync/http/git-http.service';
async function bootstrap() {
const app = await NestFactory.create<NestFastifyApplication>(
@@ -106,6 +107,23 @@ async function bootstrap() {
},
);
// git smart-HTTP POST bodies use these media types. Register PASSTHROUGH
// content-type parsers so Fastify does NOT buffer/parse them (it would
// otherwise reject the unknown type with 415); the /git handler streams the
// raw Node request (request.raw) to `git http-backend` stdin instead. A
// passthrough parser also bypasses the bodyLimit, so large pushes are not
// truncated (the bytes are never buffered by Fastify).
app
.getHttpAdapter()
.getInstance()
.addContentTypeParser(
[
'application/x-git-upload-pack-request',
'application/x-git-receive-pack-request',
],
(_req, payload, done) => done(null, payload),
);
app
.getHttpAdapter()
.getInstance()
@@ -153,6 +171,25 @@ async function bootstrap() {
app.useGlobalInterceptors(new TransformHttpResponseInterceptor(reflector));
app.enableShutdownHooks();
// git smart-HTTP host (the /git/<spaceId>.git/... subtree). Registered as a
// RAW Fastify route — NOT a Nest controller under the global '/api' prefix —
// so it lives at the ROOT and a single wildcard reliably captures the whole
// multi-segment subtree (avoiding the path-to-regexp v8 wildcard / global-
// prefix-exclude ambiguity in NestJS v11). The handler is resolved from the
// Nest container so all auth/authz/gating still runs. NOTE: Nest middleware
// (DomainMiddleware) does NOT run for this raw root route — it is bound to the
// Nest router under the global '/api' prefix — so request.raw.workspaceId is
// NOT populated here; GitHttpService resolves the workspace itself (mirroring
// DomainMiddleware). The Fastify wildcard '/git/*' captures the multi-segment
// subpath; the handler re-parses req.url itself.
const gitHttpService = app.get(GitHttpService);
app
.getHttpAdapter()
.getInstance()
.all('/git/*', async (request, reply) => {
await gitHttpService.handle(request as any, reply as any);
});
const logger = new Logger('NestApplication');
process.on('unhandledRejection', (reason, promise) => {