From 59a6f65f773ca4c8c4c9c5150ff2128ab825d29b Mon Sep 17 00:00:00 2001 From: claude_code Date: Sun, 21 Jun 2026 19:55:25 +0300 Subject: [PATCH] feat(git-sync): serve spaces over smart-HTTP (gitmost as a two-way git host) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose each git-sync-enabled space as a clonable/pushable git repo over HTTP, so `git clone https://:@/git/.git` works and external pushes flow back into Docmost pages — gitmost itself acts as the git host (no external GitHub/Gitea, no SSH). Transport: shell out to `git http-backend` (CGI; git is already in the runtime image) which implements the full smart-HTTP protocol (info/refs, upload-pack, receive-pack, protocol v2). A raw Fastify route `/git/*` (mounted at the root, outside the `/api` prefix) bridges the request/response to the CGI; passthrough content-type parsers for the git media types stream the raw body to stdin. Reuse the existing engine: clients push the vault's `main` branch, whose commits beyond `refs/docmost/last-pushed` the engine already reconciles into Docmost. - http/git-http.service.ts — auth (HTTP Basic -> AuthService.verifyUserCredentials), self-resolved workspace (DomainMiddleware does not run for this raw route), per-space gating (global + per-space gitSync flags, 404 hides existence), CASL authz (Read=fetch, Manage=push), dispatch. - http/git-http-backend.service.ts — spawn `git http-backend`, binary-safe CGI response parsing (Status/headers/body), stream to the socket. - http/git-http.helpers.ts — pure path parse, service->kind mapping, gate decision (unit-tested); rejects literal and percent-encoded path traversal. - orchestrator: extract reusable withSpaceLock (CAS-guarded lock heartbeat so a long push cannot let the lock expire mid-cycle) and add ingestExternalPush (receive-pack + Docmost cycle under one lock; 503 on contention). - vault-registry: ensureServable() — ensureRepo + idempotent receive.denyCurrentBranch =updateInstead / denyNonFastForwards / http.receivepack / http.uploadpack. - env: GIT_SYNC_HTTP_ENABLED (defaults to GIT_SYNC_ENABLED) + validation. - main.ts: register the /git/* route and the git content-type parsers. Tests: pure helpers, CGI parsing, and the GitHttpService handler (auth/gate/authz + workspace resolution). Server tsc + git-sync/env suites green. Co-Authored-By: Claude Opus 4.8 --- .../environment/environment.service.ts | 12 + .../environment/environment.validation.ts | 7 + .../integrations/git-sync/git-sync.module.ts | 11 + .../http/git-http-backend.service.spec.ts | 87 ++++ .../git-sync/http/git-http-backend.service.ts | 265 ++++++++++++ .../git-sync/http/git-http.helpers.spec.ts | 183 +++++++++ .../git-sync/http/git-http.helpers.ts | 147 +++++++ .../git-sync/http/git-http.service.spec.ts | 376 ++++++++++++++++++ .../git-sync/http/git-http.service.ts | 319 +++++++++++++++ .../services/git-sync.orchestrator.ts | 174 +++++++- .../services/vault-registry.service.ts | 51 ++- apps/server/src/main.ts | 37 ++ 12 files changed, 1655 insertions(+), 14 deletions(-) create mode 100644 apps/server/src/integrations/git-sync/http/git-http-backend.service.spec.ts create mode 100644 apps/server/src/integrations/git-sync/http/git-http-backend.service.ts create mode 100644 apps/server/src/integrations/git-sync/http/git-http.helpers.spec.ts create mode 100644 apps/server/src/integrations/git-sync/http/git-http.helpers.ts create mode 100644 apps/server/src/integrations/git-sync/http/git-http.service.spec.ts create mode 100644 apps/server/src/integrations/git-sync/http/git-http.service.ts diff --git a/apps/server/src/integrations/environment/environment.service.ts b/apps/server/src/integrations/environment/environment.service.ts index 470668fe..6353f6b5 100644 --- a/apps/server/src/integrations/environment/environment.service.ts +++ b/apps/server/src/integrations/environment/environment.service.ts @@ -343,6 +343,18 @@ export class EnvironmentService { ); } + /** + * Whether gitmost serves the per-space vaults over smart-HTTP (the /git host). + * When GIT_SYNC_HTTP_ENABLED is UNSET it DEFAULTS to isGitSyncEnabled() — so + * enabling sync also enables the host unless explicitly disabled. When set, it + * is honored verbatim ('true' -> on, anything else -> off). + */ + isGitSyncHttpEnabled(): boolean { + const raw = this.configService.get('GIT_SYNC_HTTP_ENABLED'); + if (raw === undefined) return this.isGitSyncEnabled(); + return raw.toLowerCase() === 'true'; + } + /** * Root directory holding the per-space vault repos. Defaults to * `/git-sync`. `DATA_DIR` is read directly (no dedicated diff --git a/apps/server/src/integrations/environment/environment.validation.ts b/apps/server/src/integrations/environment/environment.validation.ts index 66627124..8a44d7bd 100644 --- a/apps/server/src/integrations/environment/environment.validation.ts +++ b/apps/server/src/integrations/environment/environment.validation.ts @@ -179,6 +179,13 @@ export class EnvironmentVariables { @IsString() GIT_SYNC_ENABLED: string; + // Whether to serve the per-space vaults over smart-HTTP (the /git host). + // When unset, defaults to GIT_SYNC_ENABLED (see isGitSyncHttpEnabled). + @IsOptional() + @IsIn(['true', 'false']) + @IsString() + GIT_SYNC_HTTP_ENABLED: string; + @IsOptional() @IsString() GIT_SYNC_DATA_DIR: string; diff --git a/apps/server/src/integrations/git-sync/git-sync.module.ts b/apps/server/src/integrations/git-sync/git-sync.module.ts index 6a71a92d..177ce9fe 100644 --- a/apps/server/src/integrations/git-sync/git-sync.module.ts +++ b/apps/server/src/integrations/git-sync/git-sync.module.ts @@ -4,11 +4,14 @@ import { DatabaseModule } from '@docmost/db/database.module'; import { EnvironmentModule } from '../environment/environment.module'; import { CollaborationModule } from '../../collaboration/collaboration.module'; import { PageModule } from '../../core/page/page.module'; +import { AuthModule } from '../../core/auth/auth.module'; import { GitmostDataSourceService } from './services/gitmost-datasource.service'; import { GitSyncOrchestrator } from './services/git-sync.orchestrator'; import { VaultRegistryService } from './services/vault-registry.service'; import { PageChangeListener } from './listeners/page-change.listener'; import { GitSyncController } from './git-sync.controller'; +import { GitHttpBackendService } from './http/git-http-backend.service'; +import { GitHttpService } from './http/git-http.service'; /** * The git-sync control plane (plan §6). Wires the native datasource, the @@ -36,6 +39,8 @@ import { GitSyncController } from './git-sync.controller'; EnvironmentModule, CollaborationModule, PageModule, + // AuthModule exports AuthService (verifyUserCredentials for /git HTTP Basic). + AuthModule, ScheduleModule, ], controllers: [GitSyncController], @@ -44,6 +49,12 @@ import { GitSyncController } from './git-sync.controller'; GitSyncOrchestrator, VaultRegistryService, PageChangeListener, + // /git smart-HTTP host (the raw Fastify route in main.ts resolves these). + GitHttpBackendService, + GitHttpService, ], + // Exported so the raw Fastify route registered in main.ts can resolve the + // handler from the Nest container (app.get(GitHttpService)). + exports: [GitHttpService], }) export class GitSyncModule {} diff --git a/apps/server/src/integrations/git-sync/http/git-http-backend.service.spec.ts b/apps/server/src/integrations/git-sync/http/git-http-backend.service.spec.ts new file mode 100644 index 00000000..c7e51279 --- /dev/null +++ b/apps/server/src/integrations/git-sync/http/git-http-backend.service.spec.ts @@ -0,0 +1,87 @@ +// Unit tests for the pure CGI-response helpers used by GitHttpBackendService. +// The header/body split MUST treat the body as binary (Buffer) and never +// stringify it; the Status: header sets the HTTP status (default 200). +import { + parseCgiResponse, + splitCgiBuffer, +} from './git-http-backend.service'; + +describe('parseCgiResponse', () => { + it('defaults to status 200 with no Status header', () => { + const r = parseCgiResponse('Content-Type: application/x-git-upload-pack-result'); + expect(r.statusCode).toBe(200); + expect(r.headers).toEqual([ + ['Content-Type', 'application/x-git-upload-pack-result'], + ]); + }); + + it('honors a Status header and does not forward it', () => { + const r = parseCgiResponse('Status: 404 Not Found\nContent-Type: text/plain'); + expect(r.statusCode).toBe(404); + expect(r.headers).toEqual([['Content-Type', 'text/plain']]); + }); + + it('parses multiple headers and trims whitespace', () => { + const r = parseCgiResponse( + 'Status: 403 Forbidden\r\nContent-Type: text/plain \r\nX-Foo: bar ', + ); + expect(r.statusCode).toBe(403); + expect(r.headers).toEqual([ + ['Content-Type', 'text/plain'], + ['X-Foo', 'bar'], + ]); + }); + + it('ignores malformed (colon-less) lines defensively', () => { + const r = parseCgiResponse('Content-Type: text/plain\ngarbage-line\nX-A: b'); + expect(r.statusCode).toBe(200); + expect(r.headers).toEqual([ + ['Content-Type', 'text/plain'], + ['X-A', 'b'], + ]); + }); + + it('ignores an out-of-range Status code and keeps the default', () => { + const r = parseCgiResponse('Status: not-a-number\nContent-Type: text/plain'); + expect(r.statusCode).toBe(200); + }); + + it('treats the Status header case-insensitively', () => { + const r = parseCgiResponse('status: 500 Boom'); + expect(r.statusCode).toBe(500); + expect(r.headers).toEqual([]); + }); +}); + +describe('splitCgiBuffer', () => { + it('splits on a CRLF blank line and keeps the body as bytes', () => { + const buf = Buffer.concat([ + Buffer.from('Status: 200 OK\r\nContent-Type: text/plain\r\n\r\n', 'utf8'), + Buffer.from([0x00, 0x01, 0x02, 0xff]), + ]); + const split = splitCgiBuffer(buf); + expect(split).not.toBeNull(); + expect(split!.headerText).toBe('Status: 200 OK\r\nContent-Type: text/plain'); + expect(Array.from(split!.body)).toEqual([0x00, 0x01, 0x02, 0xff]); + }); + + it('splits on a bare LF blank line', () => { + const buf = Buffer.from('Content-Type: text/plain\n\nhello', 'utf8'); + const split = splitCgiBuffer(buf); + expect(split).not.toBeNull(); + expect(split!.headerText).toBe('Content-Type: text/plain'); + expect(split!.body.toString('utf8')).toBe('hello'); + }); + + it('returns an empty body when nothing follows the separator', () => { + const buf = Buffer.from('Content-Type: text/plain\r\n\r\n', 'utf8'); + const split = splitCgiBuffer(buf); + expect(split).not.toBeNull(); + expect(split!.body.length).toBe(0); + }); + + it('returns null when there is no blank-line separator yet', () => { + const buf = Buffer.from('Content-Type: text/plain\r\nincomplete', 'utf8'); + expect(splitCgiBuffer(buf)).toBeNull(); + }); +}); diff --git a/apps/server/src/integrations/git-sync/http/git-http-backend.service.ts b/apps/server/src/integrations/git-sync/http/git-http-backend.service.ts new file mode 100644 index 00000000..de3332ad --- /dev/null +++ b/apps/server/src/integrations/git-sync/http/git-http-backend.service.ts @@ -0,0 +1,265 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { spawn } from 'node:child_process'; +import type { IncomingMessage, ServerResponse } from 'node:http'; +import { vaultGitEnv } from '@docmost/git-sync'; +import { EnvironmentService } from '../../environment/environment.service'; + +/** The parsed first part of a CGI response: the HTTP status + header pairs. */ +export interface ParsedCgiResponse { + statusCode: number; + /** Lower-cased? No — keep header names verbatim as git http-backend emits. */ + headers: Array<[string, string]>; +} + +/** + * Parse the CGI header block emitted by `git http-backend` into an HTTP status + * and a list of header pairs. The input is ONLY the header text (everything up + * to, but not including, the blank-line separator) — the binary body is split + * off by the caller on the raw Buffer (never stringified). + * + * CGI semantics (RFC 3875 §6): a `Status: ` header sets the HTTP + * status (default 200 when absent). Every other header is forwarded verbatim. + * Header lines are `Name: value`; a line without a ':' is ignored defensively. + * + * Pure + framework-free so it is unit-testable in isolation. + */ +export function parseCgiResponse(headerBlock: string): ParsedCgiResponse { + let statusCode = 200; + const headers: Array<[string, string]> = []; + + // Header lines may be separated by CRLF or LF; split on either. + const lines = headerBlock.split(/\r?\n/); + for (const line of lines) { + if (line.length === 0) continue; + const sep = line.indexOf(':'); + if (sep === -1) continue; // not a header line — ignore defensively + const name = line.slice(0, sep).trim(); + const value = line.slice(sep + 1).trim(); + if (name.toLowerCase() === 'status') { + // `Status: 404 Not Found` — the leading integer is the HTTP status code. + const code = parseInt(value, 10); + if (Number.isFinite(code) && code >= 100 && code <= 599) { + statusCode = code; + } + continue; // never forward the CGI Status header itself + } + headers.push([name, value]); + } + + return { statusCode, headers }; +} + +/** + * Split a raw CGI response buffer at the first blank-line boundary + * (`\r\n\r\n` or `\n\n`). Returns the header text and the remaining body bytes. + * Returns null when no blank-line separator is present (a malformed response). + * + * Pure (operates on Buffers, never stringifies the body) so it is testable. + */ +export function splitCgiBuffer( + buf: Buffer, +): { headerText: string; body: Buffer } | null { + // Prefer the CRLF separator; fall back to bare LF. + let idx = buf.indexOf('\r\n\r\n'); + let sepLen = 4; + if (idx === -1) { + idx = buf.indexOf('\n\n'); + sepLen = 2; + } + if (idx === -1) return null; + const headerText = buf.subarray(0, idx).toString('utf8'); + const body = buf.subarray(idx + sepLen); + return { headerText, body }; +} + +/** A parsed git smart-HTTP request, resolved by the controller/handler. */ +export interface GitHttpBackendRequest { + /** The space id (the on-disk vault dir name == GIT_PROJECT_ROOT child). */ + spaceId: string; + /** The subpath after `.git/`, e.g. `info/refs` or `git-receive-pack`. */ + subpath: string; + /** REQUEST_METHOD — `GET` or `POST`. */ + method: string; + /** Raw query string WITHOUT the leading '?', e.g. `service=git-receive-pack`. */ + queryString: string; + /** Content-Type header value (may be empty for GET). */ + contentType: string; + /** The Git-Protocol request header value, or undefined when absent. */ + gitProtocol?: string; + /** Authenticated user email — used as REMOTE_USER (reflog identity). */ + remoteUser: string; +} + +/** + * Bridges an HTTP git smart-protocol request to `git http-backend` (the CGI that + * implements the entire smart-HTTP protocol: info/refs, upload-pack, + * receive-pack, protocol v2, dumb fallback). We do NOT reimplement pkt-line. + * + * The Fastify reply is hijacked by the caller; this service streams the request + * body to the child's stdin and writes the child's CGI response (status + + * headers parsed from the leading header block, then the raw binary body) to the + * Node response. Errors before any output produce a 500. Credentials are never + * logged. + */ +@Injectable() +export class GitHttpBackendService { + private readonly logger = new Logger(GitHttpBackendService.name); + + constructor(private readonly environmentService: EnvironmentService) {} + + /** + * Spawn `git http-backend` for one request and bridge it to the raw Node + * request/response. Resolves when the response has been fully written (the + * child exited and its output was flushed), or after a 500 was sent on an + * early failure. Never rejects — push ingestion relies on this resolving so + * the lock-held cycle body can run afterwards. + */ + async run( + parsed: GitHttpBackendRequest, + rawReq: IncomingMessage, + rawRes: ServerResponse, + ): Promise { + const projectRoot = this.environmentService.getGitSyncDataDir(); + // PATH_INFO is the repo-relative CGI path: /.git/. + const pathInfo = `/${parsed.spaceId}.git/${parsed.subpath}`; + + // Build the CGI env from the engine's cwd-isolated base (strips GIT_DIR / + // GIT_WORK_TREE), then layer the http-backend CGI variables. GIT_PROTOCOL is + // only set when the client sent the Git-Protocol header. PATH is preserved + // (vaultGitEnv already copies process.env, so PATH carries through). + const cgiEnv: Record = { + GIT_PROJECT_ROOT: projectRoot, + GIT_HTTP_EXPORT_ALL: '1', // authz is done by us; no git-daemon-export-ok file + PATH_INFO: pathInfo, + REQUEST_METHOD: parsed.method, + QUERY_STRING: parsed.queryString, + CONTENT_TYPE: parsed.contentType, + REMOTE_USER: parsed.remoteUser, + }; + if (parsed.gitProtocol) { + cgiEnv.GIT_PROTOCOL = parsed.gitProtocol; + } + const env = vaultGitEnv(cgiEnv); + + return new Promise((resolve) => { + let settled = false; + const done = () => { + if (settled) return; + settled = true; + resolve(); + }; + + let child: ReturnType; + try { + child = spawn('git', ['http-backend'], { env }); + } catch (err) { + this.send500(rawRes, 'spawn-failed', err); + return done(); + } + + // Accumulate stdout until we have the full CGI header block, then write the + // parsed status/headers and start streaming the remaining body bytes. + let headerParsed = false; + let pending: Buffer = Buffer.alloc(0); + + const flushHeadersAndBody = (chunk: Buffer): void => { + pending = Buffer.concat([pending, chunk]); + const split = splitCgiBuffer(pending); + if (!split) return; // header block not complete yet + headerParsed = true; + const { statusCode, headers } = parseCgiResponse(split.headerText); + rawRes.statusCode = statusCode; + for (const [name, value] of headers) { + rawRes.setHeader(name, value); + } + if (split.body.length > 0) rawRes.write(split.body); + pending = Buffer.alloc(0); + }; + + child.stdout?.on('data', (chunk: Buffer) => { + if (headerParsed) { + rawRes.write(chunk); + } else { + flushHeadersAndBody(chunk); + } + }); + + let stderr = ''; + child.stderr?.on('data', (chunk: Buffer) => { + // Capture for diagnostics; never echo to the client. http-backend writes + // CGI errors here. We do NOT log the request body or any credentials. + if (stderr.length < 8192) stderr += chunk.toString('utf8'); + }); + + child.on('error', (err) => { + if (!headerParsed && !rawRes.headersSent) { + this.send500(rawRes, 'child-error', err); + } else { + // Output already started — we can only terminate the stream. + try { + rawRes.end(); + } catch { + /* ignore */ + } + } + done(); + }); + + child.on('close', (code) => { + if (!headerParsed && !rawRes.headersSent) { + // The child exited before emitting a complete CGI header block. + this.logger.error( + `git http-backend produced no valid response (exit ${code}) for ` + + `space; stderr: ${stderr.trim().slice(0, 500)}`, + ); + this.send500(rawRes, 'no-output'); + } else { + try { + rawRes.end(); + } catch { + /* ignore */ + } + } + done(); + }); + + // Pipe the request body to the child's stdin. For GET there is no body, so + // end stdin immediately. We pipe `rawReq` (the raw Node stream) directly so + // large pushes are streamed, not buffered. + if (parsed.method === 'POST') { + rawReq.pipe(child.stdin!); + rawReq.on('error', () => { + try { + child.stdin?.end(); + } catch { + /* ignore */ + } + }); + } else { + child.stdin?.end(); + } + // Swallow EPIPE etc. on the child's stdin so a client disconnect does not + // crash the process. + child.stdin?.on('error', () => { + /* ignore broken-pipe on stdin */ + }); + }); + } + + /** Send a clean 500 without leaking credentials or the request body. */ + private send500(rawRes: ServerResponse, reason: string, err?: unknown): void { + const message = err instanceof Error ? err.message : undefined; + this.logger.error( + `git http-backend failed (${reason})${message ? `: ${message}` : ''}`, + ); + try { + if (!rawRes.headersSent) { + rawRes.statusCode = 500; + rawRes.setHeader('Content-Type', 'text/plain'); + } + rawRes.end('Internal server error'); + } catch { + /* ignore */ + } + } +} diff --git a/apps/server/src/integrations/git-sync/http/git-http.helpers.spec.ts b/apps/server/src/integrations/git-sync/http/git-http.helpers.spec.ts new file mode 100644 index 00000000..c9835798 --- /dev/null +++ b/apps/server/src/integrations/git-sync/http/git-http.helpers.spec.ts @@ -0,0 +1,183 @@ +// Unit tests for the pure /git smart-HTTP helpers: URL parsing, service->kind +// mapping (read vs write), and the gating/auth decision precedence. +import { + decideGitHttpGate, + parseGitPath, + resolveServiceKind, +} from './git-http.helpers'; + +describe('parseGitPath', () => { + it('parses spaceId + subpath, stripping the trailing .git', () => { + expect(parseGitPath('abc123.git/info/refs')).toEqual({ + spaceId: 'abc123', + subpath: 'info/refs', + }); + }); + + it('tolerates a leading slash', () => { + expect(parseGitPath('/abc.git/git-receive-pack')).toEqual({ + spaceId: 'abc', + subpath: 'git-receive-pack', + }); + }); + + it('returns an empty subpath for the bare repo root', () => { + expect(parseGitPath('abc.git')).toEqual({ spaceId: 'abc', subpath: '' }); + }); + + it('returns null when the first segment lacks .git', () => { + expect(parseGitPath('abc/info/refs')).toBeNull(); + }); + + it('returns null on an empty space id', () => { + expect(parseGitPath('.git/info/refs')).toBeNull(); + }); + + it('rejects path traversal', () => { + expect(parseGitPath('abc.git/../../etc/passwd')).toBeNull(); + expect(parseGitPath('..git/x')).toBeNull(); + }); + + it('rejects percent-encoded dot/slash traversal in the subpath (case-insensitive)', () => { + expect(parseGitPath('abc.git/%2e%2e%2fetc/passwd')).toBeNull(); + expect(parseGitPath('abc.git/%2E%2E/secret')).toBeNull(); + expect(parseGitPath('abc.git/objects/%2fabsolute')).toBeNull(); + }); +}); + +describe('resolveServiceKind', () => { + it('GET info/refs?service=git-upload-pack -> read', () => { + expect( + resolveServiceKind({ + method: 'GET', + subpath: 'info/refs', + service: 'git-upload-pack', + }), + ).toBe('read'); + }); + + it('GET info/refs?service=git-receive-pack -> write', () => { + expect( + resolveServiceKind({ + method: 'GET', + subpath: 'info/refs', + service: 'git-receive-pack', + }), + ).toBe('write'); + }); + + it('POST git-upload-pack -> read', () => { + expect( + resolveServiceKind({ method: 'POST', subpath: 'git-upload-pack' }), + ).toBe('read'); + }); + + it('POST git-receive-pack -> write', () => { + expect( + resolveServiceKind({ method: 'POST', subpath: 'git-receive-pack' }), + ).toBe('write'); + }); + + it('a dumb-protocol GET (HEAD / objects) -> read', () => { + expect(resolveServiceKind({ method: 'GET', subpath: 'HEAD' })).toBe('read'); + expect( + resolveServiceKind({ method: 'GET', subpath: 'objects/12/abcdef' }), + ).toBe('read'); + }); + + it('info/refs with no/unknown service -> read (dumb discovery)', () => { + expect(resolveServiceKind({ method: 'GET', subpath: 'info/refs' })).toBe( + 'read', + ); + }); + + it('an unknown POST endpoint -> null', () => { + expect(resolveServiceKind({ method: 'POST', subpath: 'whatever' })).toBeNull(); + }); + + it('an unsupported method -> null', () => { + expect( + resolveServiceKind({ method: 'DELETE', subpath: 'git-receive-pack' }), + ).toBeNull(); + }); +}); + +describe('decideGitHttpGate', () => { + const base = { + hasCredentials: true, + credentialsValid: true, + serviceKind: 'read' as const, + gitSyncEnabled: true, + gitHttpEnabled: true, + spaceExists: true, + spaceGitSyncEnabled: true, + permissionGranted: true, + }; + + it('proceeds on the happy path', () => { + expect(decideGitHttpGate(base)).toEqual({ kind: 'proceed' }); + }); + + it('401 when credentials are missing (even for a valid space)', () => { + expect( + decideGitHttpGate({ ...base, hasCredentials: false }), + ).toEqual({ kind: 'unauthorized' }); + }); + + it('401 when credentials are present but invalid', () => { + expect( + decideGitHttpGate({ ...base, credentialsValid: false }), + ).toEqual({ kind: 'unauthorized' }); + }); + + it('400 on an unparseable service kind', () => { + expect(decideGitHttpGate({ ...base, serviceKind: null })).toEqual({ + kind: 'bad-request', + }); + }); + + it('404 when the space is not git-sync-enabled (never reveals existence)', () => { + expect( + decideGitHttpGate({ ...base, spaceGitSyncEnabled: false }), + ).toEqual({ kind: 'not-found' }); + }); + + it('404 when the space does not exist', () => { + expect(decideGitHttpGate({ ...base, spaceExists: false })).toEqual({ + kind: 'not-found', + }); + }); + + it('404 when git-sync is globally disabled', () => { + expect(decideGitHttpGate({ ...base, gitSyncEnabled: false })).toEqual({ + kind: 'not-found', + }); + }); + + it('404 when the git-http host is disabled', () => { + expect(decideGitHttpGate({ ...base, gitHttpEnabled: false })).toEqual({ + kind: 'not-found', + }); + }); + + it('403 when authenticated but lacking the required permission (reader on write)', () => { + expect( + decideGitHttpGate({ + ...base, + serviceKind: 'write', + permissionGranted: false, + }), + ).toEqual({ kind: 'forbidden' }); + }); + + it('still 401 (not 404) for missing creds against a disabled space', () => { + // Anonymous probe must always get 401 first, regardless of space state. + expect( + decideGitHttpGate({ + ...base, + hasCredentials: false, + spaceGitSyncEnabled: false, + }), + ).toEqual({ kind: 'unauthorized' }); + }); +}); diff --git a/apps/server/src/integrations/git-sync/http/git-http.helpers.ts b/apps/server/src/integrations/git-sync/http/git-http.helpers.ts new file mode 100644 index 00000000..a0dabde0 --- /dev/null +++ b/apps/server/src/integrations/git-sync/http/git-http.helpers.ts @@ -0,0 +1,147 @@ +// Pure, framework-free helpers for the /git smart-HTTP host. They carry no Nest +// / DI / concrete-service imports so the request parsing and the auth/authz +// gating DECISION can be unit-tested in isolation, and nothing here ever logs a +// password or the Authorization header. + +/** The git operation a request maps to: a read (fetch/clone) or a write (push). */ +export type GitHttpServiceKind = 'read' | 'write'; + +/** A parsed `/git/.git/` URL. */ +export interface ParsedGitPath { + spaceId: string; + /** The subpath after `.git/` (no leading slash), e.g. `info/refs`. */ + subpath: string; +} + +/** + * Parse the `` of a `/git/` URL path (no query string) into the + * space id and the repo-relative subpath. The space id is the first path + * segment with its trailing `.git` stripped. Returns null when the shape does + * not match (missing `.git`, empty space id, traversal attempt). + * + * `rest` MUST already be URL-path-decoded of its query string by the caller + * (pass the pathname only). We reject `..` segments defensively even though + * http-backend resolves PATH_INFO against GIT_PROJECT_ROOT. + */ +export function parseGitPath(rest: string): ParsedGitPath | null { + // Strip a leading slash, then take the first segment as `.git`. + const clean = rest.replace(/^\/+/, ''); + const slash = clean.indexOf('/'); + const first = slash === -1 ? clean : clean.slice(0, slash); + const subpath = slash === -1 ? '' : clean.slice(slash + 1); + + if (!first.endsWith('.git')) return null; + const spaceId = first.slice(0, -'.git'.length); + if (!spaceId) return null; + + // Reject path traversal / degenerate ids in either component. + if ( + spaceId === '.' || + spaceId.includes('..') || + spaceId.includes('/') || + subpath.split('/').some((seg) => seg === '..') + ) { + return null; + } + + // Defense-in-depth: reject percent-encoded dot/slash traversal (`%2e`, `%2f`, + // case-insensitive) in the subpath BEFORE it is used to build PATH_INFO — a + // decoder downstream could otherwise turn `%2e%2e%2f` back into `../`. + if (/%2e|%2f/i.test(subpath)) { + return null; + } + + return { spaceId, subpath }; +} + +/** + * Map a parsed git request (method + subpath + query) to the required operation + * kind. The smart-HTTP shapes: + * - GET info/refs?service=git-upload-pack -> read (fetch) + * - GET info/refs?service=git-receive-pack -> write (push) + * - POST git-upload-pack -> read (fetch) + * - POST git-receive-pack -> write (push) + * - any other dumb-protocol GET (HEAD, objects/…) -> read + * Returns null for an unsupported shape (e.g. a POST that is neither pack + * endpoint) so the caller can 403/404 rather than guess. + */ +export function resolveServiceKind(input: { + method: string; + subpath: string; + service?: string; +}): GitHttpServiceKind | null { + const method = input.method.toUpperCase(); + const subpath = input.subpath; + + if (method === 'GET') { + if (subpath === 'info/refs') { + if (input.service === 'git-receive-pack') return 'write'; + if (input.service === 'git-upload-pack') return 'read'; + // info/refs without a known service: dumb-protocol discovery — read. + return 'read'; + } + // Dumb-protocol object/ref fetches (HEAD, objects/…) are reads. + return 'read'; + } + + if (method === 'POST') { + if (subpath === 'git-receive-pack') return 'write'; + if (subpath === 'git-upload-pack') return 'read'; + return null; // unknown POST endpoint + } + + return null; // unsupported method +} + +/** The outcome of the gating/auth decision the request handler must enforce. */ +export type GitHttpGateDecision = + | { kind: 'unauthorized' } // 401 + WWW-Authenticate (missing/invalid creds) + | { kind: 'not-found' } // 404 (space hidden / sync or http disabled) + | { kind: 'forbidden' } // 403 (authenticated but lacks the permission) + | { kind: 'bad-request' } // 400 (unparseable git request shape) + | { kind: 'proceed' }; // run http-backend + +/** + * Pure gating decision, mirroring the handler precedence so it can be unit + * tested without the DB / CASL graph. Inputs are the already-resolved booleans + * the handler computes from EnvironmentService / SpaceRepo / SpaceAbilityFactory. + * + * Precedence (matches the spec): + * 1. no/invalid Basic credentials -> 401 (regardless of space). + * 2. credentials present but invalid -> 401. + * 3. unparseable git request shape -> 400. + * 4. git-sync globally disabled, or git-http disabled, or the space is missing + * / not git-sync-enabled -> 404 (never reveal existence). + * 5. authenticated but lacking the required perm -> 403. + * 6. otherwise -> proceed. + * + * Note (4) is checked AFTER (1)/(2): an anonymous probe always gets 401 first; + * an authenticated user hitting a hidden/disabled space gets 404 (not 403). + */ +export function decideGitHttpGate(input: { + hasCredentials: boolean; + credentialsValid: boolean; + serviceKind: GitHttpServiceKind | null; + gitSyncEnabled: boolean; + gitHttpEnabled: boolean; + spaceExists: boolean; + spaceGitSyncEnabled: boolean; + permissionGranted: boolean; +}): GitHttpGateDecision { + if (!input.hasCredentials) return { kind: 'unauthorized' }; + if (!input.credentialsValid) return { kind: 'unauthorized' }; + if (input.serviceKind === null) return { kind: 'bad-request' }; + + if ( + !input.gitSyncEnabled || + !input.gitHttpEnabled || + !input.spaceExists || + !input.spaceGitSyncEnabled + ) { + return { kind: 'not-found' }; + } + + if (!input.permissionGranted) return { kind: 'forbidden' }; + + return { kind: 'proceed' }; +} diff --git a/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts b/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts new file mode 100644 index 00000000..27ae597f --- /dev/null +++ b/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts @@ -0,0 +1,376 @@ +// Unit tests for GitHttpService — the /git smart-HTTP handler. Everything it +// depends on (backend, auth, repos, ability factory, env, orchestrator) is +// mocked so we exercise ONLY the handler wiring: workspace resolution (which is +// done HERE, not by DomainMiddleware — see FIX 1), the auth/gating precedence, +// the read-vs-write dispatch, and that a fetch does NOT take the lock. +// +// These tests deliberately NEVER set `req.raw.workspaceId`: the workspace must +// come from WorkspaceRepo. If the handler regressed to reading +// `req.raw.workspaceId`, the happy-path fetch test below would fail (the repo +// would not be consulted and the request would 401). +import { Logger, UnauthorizedException } from '@nestjs/common'; +import { + SpaceCaslAction, + SpaceCaslSubject, +} from '../../../core/casl/interfaces/space-ability.type'; +import { GitHttpService } from './git-http.service'; + +type AnyMock = jest.Mock; + +interface BuildOptions { + selfHosted?: boolean; + gitSyncEnabled?: boolean; + gitHttpEnabled?: boolean; + /** What workspaceRepo.findFirst() returns (self-hosted resolution). */ + workspace?: { id: string } | null; + /** What spaceRepo.findById() returns. */ + space?: { id: string; settings?: unknown } | null; + /** Result of authService.verifyUserCredentials: a user, or throw 401. */ + user?: { id: string; email: string } | null; + /** Whether the created ability grants the requested action. */ + abilityCan?: boolean; +} + +interface Built { + service: GitHttpService; + env: Record; + authService: { verifyUserCredentials: AnyMock }; + spaceRepo: { findById: AnyMock }; + workspaceRepo: { findFirst: AnyMock; findByHostname: AnyMock }; + abilityFactory: { createForUser: AnyMock }; + abilityCan: AnyMock; + vaultRegistry: { ensureServable: AnyMock }; + orchestrator: { ingestExternalPush: AnyMock }; + backend: { run: AnyMock }; +} + +function build(opts: BuildOptions = {}): Built { + const { + selfHosted = true, + gitSyncEnabled = true, + gitHttpEnabled = true, + workspace = { id: 'ws-1' }, + space = { id: 'space-1', settings: { gitSync: { enabled: true } } }, + user = { id: 'user-1', email: 'dev@example.com' }, + abilityCan = true, + } = opts; + + const env: Record = { + isSelfHosted: jest.fn(() => selfHosted), + isCloud: jest.fn(() => !selfHosted), + isGitSyncEnabled: jest.fn(() => gitSyncEnabled), + isGitSyncHttpEnabled: jest.fn(() => gitHttpEnabled), + }; + + const authService = { + verifyUserCredentials: jest.fn(async () => { + if (!user) throw new UnauthorizedException(); + return user; + }), + }; + + const spaceRepo = { findById: jest.fn(async () => space) }; + + const workspaceRepo = { + findFirst: jest.fn(async () => workspace), + findByHostname: jest.fn(async () => workspace), + }; + + const abilityCanMock = jest.fn(() => abilityCan); + const abilityFactory = { + createForUser: jest.fn(async () => ({ can: abilityCanMock })), + }; + + const vaultRegistry = { ensureServable: jest.fn(async () => undefined) }; + const orchestrator = { ingestExternalPush: jest.fn(async () => undefined) }; + const backend = { run: jest.fn(async () => undefined) }; + + const service = new GitHttpService( + env as any, + authService as any, + spaceRepo as any, + workspaceRepo as any, + abilityFactory as any, + vaultRegistry as any, + orchestrator as any, + backend as any, + ); + + return { + service, + env, + authService, + spaceRepo, + workspaceRepo, + abilityFactory, + abilityCan: abilityCanMock, + vaultRegistry, + orchestrator, + backend, + }; +} + +/** A fake Fastify reply capturing the terminal status/headers/body. */ +function fakeReply() { + const state: { + statusCode?: number; + headers: Record; + body?: unknown; + hijacked: boolean; + sent: boolean; + } = { headers: {}, hijacked: false, sent: false }; + + const reply: any = { + header(name: string, value: string) { + state.headers[name] = value; + return reply; + }, + status(code: number) { + state.statusCode = code; + return reply; + }, + send(body: unknown) { + state.body = body; + state.sent = true; + return reply; + }, + hijack() { + state.hijacked = true; + }, + get sent() { + return state.sent; + }, + // The raw Node response — only touched on the streaming/error paths. + raw: { + headersSent: false, + writableEnded: false, + statusCode: 200, + setHeader: jest.fn(), + end: jest.fn(), + }, + }; + return { reply, state }; +} + +/** A fake Fastify request for a /git smart-HTTP call. */ +function fakeRequest(opts: { + url: string; + method?: string; + authorization?: string; + host?: string; +}) { + const { url, method = 'GET', authorization, host = 'docs.example.com' } = opts; + const headers: Record = { host }; + if (authorization) headers['authorization'] = authorization; + // query is parsed by Fastify; mirror the `service` param when present. + const qIdx = url.indexOf('?'); + const query: Record = {}; + if (qIdx !== -1) { + for (const pair of url.slice(qIdx + 1).split('&')) { + const [k, v] = pair.split('='); + if (k) query[k] = v ?? ''; + } + } + return { + url, + method, + headers, + query, + // raw is intentionally WITHOUT workspaceId — the handler must resolve it + // itself via WorkspaceRepo (a regression to req.raw.workspaceId would 401). + raw: {}, + } as any; +} + +function basic(email: string, password: string): string { + return 'Basic ' + Buffer.from(`${email}:${password}`).toString('base64'); +} + +beforeEach(() => { + jest.clearAllMocks(); + // Silence the handler's logger.warn/error in negative-path tests. + jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined); + jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined); +}); + +describe('GitHttpService.handle', () => { + it('fetch with valid creds resolves the workspace via the repo and dispatches WITHOUT the lock', async () => { + const built = build({ selfHosted: true }); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + // The workspace came from WorkspaceRepo, NOT req.raw.workspaceId. + expect(built.workspaceRepo.findFirst).toHaveBeenCalledTimes(1); + expect(built.authService.verifyUserCredentials).toHaveBeenCalledWith( + { email: 'dev@example.com', password: 'pw' }, + 'ws-1', + ); + expect(built.spaceRepo.findById).toHaveBeenCalledWith('space-1', 'ws-1'); + // Read ability was evaluated. + expect(built.abilityCan).toHaveBeenCalledWith( + SpaceCaslAction.Read, + SpaceCaslSubject.Page, + ); + // It proceeded: vault prepared, reply hijacked, backend ran directly. + expect(built.vaultRegistry.ensureServable).toHaveBeenCalledWith('space-1'); + expect(state.hijacked).toBe(true); + expect(built.backend.run).toHaveBeenCalledTimes(1); + // A fetch must NOT take the push lock. + expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled(); + }); + + it('cloud deployment resolves the workspace by the host subdomain', async () => { + const built = build({ selfHosted: false }); + const { reply } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'pw'), + host: 'acme.example.com', + }); + + await built.service.handle(req, reply); + + expect(built.workspaceRepo.findByHostname).toHaveBeenCalledWith('acme'); + expect(built.workspaceRepo.findFirst).not.toHaveBeenCalled(); + expect(built.backend.run).toHaveBeenCalledTimes(1); + }); + + it('missing Basic credentials -> 401 with WWW-Authenticate', async () => { + const built = build(); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + // no Authorization header + }); + + await built.service.handle(req, reply); + + expect(state.statusCode).toBe(401); + expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"'); + expect(built.backend.run).not.toHaveBeenCalled(); + expect(built.authService.verifyUserCredentials).not.toHaveBeenCalled(); + }); + + it('invalid Basic credentials -> 401 with WWW-Authenticate', async () => { + const built = build({ user: null }); // verifyUserCredentials throws 401 + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'wrong'), + }); + + await built.service.handle(req, reply); + + expect(state.statusCode).toBe(401); + expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"'); + expect(built.backend.run).not.toHaveBeenCalled(); + }); + + it('a write by a Read-only user -> 403 (reader cannot push)', async () => { + const built = build({ abilityCan: false }); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/git-receive-pack', + method: 'POST', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + // The Manage ability was checked for a write and denied. + expect(built.abilityCan).toHaveBeenCalledWith( + SpaceCaslAction.Manage, + SpaceCaslSubject.Page, + ); + expect(state.statusCode).toBe(403); + expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled(); + expect(built.backend.run).not.toHaveBeenCalled(); + }); + + it('a space that is not git-sync-enabled -> 404 (existence never revealed)', async () => { + const built = build({ + space: { id: 'space-1', settings: { gitSync: { enabled: false } } }, + }); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + expect(state.statusCode).toBe(404); + // CASL is never even evaluated for a non-candidate space. + expect(built.abilityFactory.createForUser).not.toHaveBeenCalled(); + expect(built.backend.run).not.toHaveBeenCalled(); + }); + + it('git-sync globally disabled -> 404 even with valid creds', async () => { + const built = build({ gitSyncEnabled: false }); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + expect(state.statusCode).toBe(404); + expect(built.backend.run).not.toHaveBeenCalled(); + }); + + it('a valid write proceeds through the orchestrator (push takes the lock)', async () => { + const built = build({ abilityCan: true }); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/git-receive-pack', + method: 'POST', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + expect(built.abilityCan).toHaveBeenCalledWith( + SpaceCaslAction.Manage, + SpaceCaslSubject.Page, + ); + expect(state.hijacked).toBe(true); + expect(built.orchestrator.ingestExternalPush).toHaveBeenCalledTimes(1); + const [spaceId, workspaceId] = + built.orchestrator.ingestExternalPush.mock.calls[0]; + expect(spaceId).toBe('space-1'); + expect(workspaceId).toBe('ws-1'); + }); + + it('an unresolvable workspace -> 401 (credentials cannot be validated without one)', async () => { + const built = build({ workspace: null }); + const { reply, state } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + // Without a workspace we cannot run verifyUserCredentials, so credentials + // are not validated -> 401 (the 401-before-404 ordering is preserved: an + // unauthenticated request never reaches the space-existence 404). + expect(built.workspaceRepo.findFirst).toHaveBeenCalledTimes(1); + expect(built.authService.verifyUserCredentials).not.toHaveBeenCalled(); + expect(state.statusCode).toBe(401); + expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"'); + expect(built.backend.run).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/server/src/integrations/git-sync/http/git-http.service.ts b/apps/server/src/integrations/git-sync/http/git-http.service.ts new file mode 100644 index 00000000..7d58b20d --- /dev/null +++ b/apps/server/src/integrations/git-sync/http/git-http.service.ts @@ -0,0 +1,319 @@ +import { Injectable, Logger, UnauthorizedException } from '@nestjs/common'; +import type { FastifyReply, FastifyRequest } from 'fastify'; +import { AuthService } from '../../../core/auth/services/auth.service'; +import SpaceAbilityFactory from '../../../core/casl/abilities/space-ability.factory'; +import { + SpaceCaslAction, + SpaceCaslSubject, +} from '../../../core/casl/interfaces/space-ability.type'; +import { SpaceRepo } from '@docmost/db/repos/space/space.repo'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { User } from '@docmost/db/types/entity.types'; +import { parseBasicAuth } from '../../mcp/mcp-auth.helpers'; +import { EnvironmentService } from '../../environment/environment.service'; +import { VaultRegistryService } from '../services/vault-registry.service'; +import { + GitSyncLockHeldError, + GitSyncOrchestrator, +} from '../services/git-sync.orchestrator'; +import { GitHttpBackendService } from './git-http-backend.service'; +import { + decideGitHttpGate, + parseGitPath, + resolveServiceKind, + GitHttpServiceKind, +} from './git-http.helpers'; + +const WWW_AUTHENTICATE = 'Basic realm="gitmost"'; + +/** + * The /git smart-HTTP host. Wires request parsing, the reused auth primitives + * (HTTP Basic -> AuthService.verifyUserCredentials), per-space gating + * (EnvironmentService flags + space.settings.gitSync.enabled), CASL authz + * (SpaceAbilityFactory), and dispatch to `git http-backend`: + * - fetch (read) -> ensureServable then stream http-backend directly (no lock). + * - push (write) -> ensureServable then orchestrator.ingestExternalPush, which + * runs the receive-pack under the space lock and then a Docmost cycle. + * + * Mounted at the ROOT (`/git/...`) by a raw Fastify route in main.ts (the global + * `/api` prefix does not apply). Never logs the password or Authorization header. + */ +@Injectable() +export class GitHttpService { + private readonly logger = new Logger(GitHttpService.name); + + constructor( + private readonly environmentService: EnvironmentService, + private readonly authService: AuthService, + private readonly spaceRepo: SpaceRepo, + private readonly workspaceRepo: WorkspaceRepo, + private readonly spaceAbilityFactory: SpaceAbilityFactory, + private readonly vaultRegistry: VaultRegistryService, + private readonly orchestrator: GitSyncOrchestrator, + private readonly backend: GitHttpBackendService, + ) {} + + /** + * Resolve the workspace for a /git request the SAME way DomainMiddleware does, + * because Nest middleware does NOT run for this raw root-mounted route (it is + * registered under the global '/api' router), so `req.raw.workspaceId` is never + * populated here. We replicate DomainMiddleware / McpService: + * - self-hosted (single workspace) -> workspaceRepo.findFirst(); + * - cloud (multi-tenant) -> resolve by the host-header subdomain. + * Returns null when no workspace resolves; the gate then 404s (after the + * 401-before-404 credential check encoded in decideGitHttpGate). + */ + private async resolveWorkspaceId(req: FastifyRequest): Promise { + try { + if (this.environmentService.isSelfHosted()) { + const workspace = await this.workspaceRepo.findFirst(); + return workspace?.id ?? null; + } + if (this.environmentService.isCloud()) { + const host = this.headerValue(req.headers['host']); + const subdomain = host ? host.split('.')[0] : ''; + if (!subdomain) return null; + const workspace = await this.workspaceRepo.findByHostname(subdomain); + return workspace?.id ?? null; + } + } catch (err) { + // A DB error resolving the workspace must not leak details; treat as + // unresolvable (the gate will 404, unless creds are missing -> 401 first). + this.logger.warn( + `git-http: workspace resolution error: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + return null; + } + + /** + * Handle one `/git/.git/` request. `rest` is the path AFTER + * the `/git/` prefix (no query string). The Fastify reply is hijacked before + * any streaming so the binary CGI body is written directly to the raw socket. + */ + async handle(req: FastifyRequest, reply: FastifyReply): Promise { + const rawReq = req.raw; + const rawRes = reply.raw; + + // --- parse the URL into spaceId + subpath ------------------------------- + const rest = this.extractRest(req.url); + const parsedPath = rest === null ? null : parseGitPath(rest); + + // --- resolve the requested git service kind (read vs write) ------------- + const service = + typeof req.query === 'object' && req.query !== null + ? (req.query as Record).service + : undefined; + const serviceKind: GitHttpServiceKind | null = parsedPath + ? resolveServiceKind({ + method: req.method, + subpath: parsedPath.subpath, + service, + }) + : null; + + // --- authenticate (HTTP Basic) ------------------------------------------ + const authHeader = req.headers['authorization']; + const basic = parseBasicAuth( + Array.isArray(authHeader) ? authHeader[0] : authHeader, + ); + // Resolve the workspace ourselves — DomainMiddleware does NOT run for this + // raw root route, so `req.raw.workspaceId` is never set (see resolver doc). + const workspaceId: string | null = await this.resolveWorkspaceId(req); + + let user: User | undefined; + let credentialsValid = false; + if (basic && workspaceId) { + try { + user = await this.authService.verifyUserCredentials( + { email: basic.email, password: basic.password }, + workspaceId, + ); + credentialsValid = true; + } catch (err) { + if (!(err instanceof UnauthorizedException)) { + // A non-credential failure (e.g. DB error): treat as invalid creds for + // the gate (a 401), and log without leaking the password/header. + this.logger.warn( + `git-http: credential check error: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + credentialsValid = false; + } + } + + // --- resolve the space + per-space gating + CASL ------------------------ + let spaceExists = false; + let spaceGitSyncEnabled = false; + let spaceId: string | undefined; + let permissionGranted = false; + if (credentialsValid && user && workspaceId && parsedPath && serviceKind) { + const space = await this.spaceRepo.findById( + parsedPath.spaceId, + workspaceId, + ); + if (space) { + spaceExists = true; + spaceId = space.id; + spaceGitSyncEnabled = + (space.settings as any)?.gitSync?.enabled === true; + + // Only evaluate CASL when the space is actually a sync candidate — an + // unrelated space stays a 404 (existence is never revealed). + if (spaceGitSyncEnabled) { + try { + const ability = await this.spaceAbilityFactory.createForUser( + user, + space.id, + ); + const action = + serviceKind === 'write' + ? SpaceCaslAction.Manage + : SpaceCaslAction.Read; + permissionGranted = ability.can(action, SpaceCaslSubject.Page); + } catch { + // createForUser throws NotFoundException when the user has no role in + // the space — that is simply "no permission" here. + permissionGranted = false; + } + } + } + } + + // --- the gate decision (pure) ------------------------------------------- + const decision = decideGitHttpGate({ + hasCredentials: Boolean(basic), + credentialsValid, + serviceKind, + gitSyncEnabled: this.environmentService.isGitSyncEnabled(), + gitHttpEnabled: this.environmentService.isGitSyncHttpEnabled(), + spaceExists, + spaceGitSyncEnabled, + permissionGranted, + }); + + if (decision.kind === 'unauthorized') { + reply + .header('WWW-Authenticate', WWW_AUTHENTICATE) + .status(401) + .send('Authentication required'); + return; + } + if (decision.kind === 'bad-request') { + reply.status(400).send('Bad request'); + return; + } + if (decision.kind === 'not-found') { + reply.status(404).send('Not found'); + return; + } + if (decision.kind === 'forbidden') { + reply.status(403).send('Forbidden'); + return; + } + + // decision.kind === 'proceed' — guaranteed below (narrowing for TS). + if (!parsedPath || !serviceKind || !spaceId || !user || !workspaceId) { + // Defensive: 'proceed' implies these are set, but keep TS + runtime safe. + reply.status(500).send('Internal server error'); + return; + } + + // --- dispatch to git http-backend --------------------------------------- + const backendRequest = { + spaceId, + subpath: parsedPath.subpath, + method: req.method, + queryString: this.extractQueryString(req.url), + contentType: this.headerValue(req.headers['content-type']) ?? '', + gitProtocol: this.headerValue(req.headers['git-protocol']), + remoteUser: user.email, + }; + + try { + // Idempotently make the vault servable (repo + receive/upload config). + await this.vaultRegistry.ensureServable(spaceId); + } catch (err) { + this.logger.error( + `git-http: failed to prepare vault for space ${spaceId}: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + if (!reply.sent) reply.status(500).send('Internal server error'); + return; + } + + // Hijack the reply so the backend can stream the raw (possibly binary) CGI + // response directly to the socket (mirrors the MCP transport pattern). + reply.hijack(); + + if (serviceKind === 'read') { + // Fetch/clone: stream http-backend directly, no lock (read-only). + await this.backend.run(backendRequest, rawReq, rawRes); + return; + } + + // Push: run the receive-pack under the space lock, then a Docmost cycle. + try { + await this.orchestrator.ingestExternalPush(spaceId, workspaceId, () => + this.backend.run(backendRequest, rawReq, rawRes), + ); + } catch (err) { + if (err instanceof GitSyncLockHeldError) { + // The lock could not be acquired and the receive-pack never ran, so the + // response is still unwritten — answer 503 so git retries. + if (!rawRes.headersSent) { + rawRes.statusCode = 503; + rawRes.setHeader('Content-Type', 'text/plain'); + rawRes.setHeader('Retry-After', '1'); + } + try { + rawRes.end('git-sync busy, retry'); + } catch { + /* ignore */ + } + return; + } + // Any other error: the receive-pack closure handles its own response, so + // we only log here and make sure the socket is closed. + this.logger.error( + `git-http: push ingestion error for space ${spaceId}: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + try { + if (!rawRes.writableEnded) rawRes.end(); + } catch { + /* ignore */ + } + } + } + + /** Normalise a possibly-array header value to its first string. */ + private headerValue(value: string | string[] | undefined): string | undefined { + if (Array.isArray(value)) return value[0]; + return value; + } + + /** + * Extract the part of the URL AFTER `/git/` and BEFORE the query string. + * Returns null when the URL is not under `/git/`. + */ + private extractRest(url: string): string | null { + const qIdx = url.indexOf('?'); + const pathname = qIdx === -1 ? url : url.slice(0, qIdx); + const prefix = '/git/'; + if (!pathname.startsWith(prefix)) return null; + return pathname.slice(prefix.length); + } + + /** The raw query string without the leading '?', or '' when none. */ + private extractQueryString(url: string): string { + const qIdx = url.indexOf('?'); + return qIdx === -1 ? '' : url.slice(qIdx + 1); + } +} diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts index 8ac08347..f4476a4f 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts @@ -34,6 +34,19 @@ interface EnabledSpace { workspaceId: string; } +/** + * Thrown by `ingestExternalPush` when the per-space lock cannot be acquired (a + * poll cycle is mid-flight on this or another replica). The /git HTTP handler + * maps it to a 503 so the git client retries rather than racing a cycle's + * working-tree checkout/merge. + */ +export class GitSyncLockHeldError extends Error { + constructor(public readonly spaceId: string) { + super(`git-sync: space ${spaceId} is busy (lock held); retry the push`); + this.name = 'GitSyncLockHeldError'; + } +} + /** Small status summary returned by `runOnce` (for the admin trigger + logs). */ export interface GitSyncRunStatus { spaceId: string; @@ -123,6 +136,35 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { } } + /** + * CAS-guarded TTL refresh: extend the lock's TTL ONLY while WE still own it + * (the stored value matches our instanceId) — never extend another replica's + * lock that took over after our TTL expired. Used by the heartbeat in + * `withSpaceLock` so a long-running push (client-controlled receive-pack + the + * Docmost cycle) cannot outlive the lock and let a concurrent cycle race the + * working tree. Logs (warn) but never throws — a failed refresh must not break + * the cycle it is protecting. + */ + private async refreshLock(spaceId: string): Promise { + const lua = + 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("pexpire", KEYS[1], ARGV[2]) else return 0 end'; + try { + await this.redis.eval( + lua, + 1, + GIT_SYNC_LOCK_PREFIX + spaceId, + this.instanceId, + String(GIT_SYNC_LOCK_TTL_MS), + ); + } catch (err) { + this.logger.warn( + `git-sync: failed to refresh lock for space ${spaceId}: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + } + // --- enabled-space enumeration (plan §10) -------------------------------- /** @@ -188,30 +230,136 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { return { spaceId, ran: false, skipped: 'no-service-user' }; } - // In-process mutex: never run two overlapping cycles for the same space on - // this instance (the Redis lock guards cross-instance, this guards in-proc). - if (this.running.has(spaceId)) { - return { spaceId, ran: false, skipped: 'in-progress' }; - } - - // Redis leader lock: only the holder runs the cycle (plan §9). - if (!(await this.acquire(spaceId))) { - return { spaceId, ran: false, skipped: 'lock-held' }; - } - - this.running.add(spaceId); + // Run the full cycle under the per-space lock. withSpaceLock owns the + // in-process mutex (no overlapping cycles on this instance) AND the Redis + // leader lock (single writer across replicas), and returns a skip sentinel + // when it could not enter — surfaced here as the existing skipped:'in-progress' + // / 'lock-held' status so runOnce's observable behavior is unchanged. try { - return await this.driveCycle(spaceId, workspaceId, serviceUserId); + const result = await this.withSpaceLock(spaceId, () => + this.driveCycle(spaceId, workspaceId, serviceUserId), + ); + if ('skipped' in result && !('spaceId' in result)) { + return { spaceId, ran: false, skipped: result.skipped }; + } + return result; } catch (err) { const message = err instanceof Error ? err.message : String(err); this.logger.error(`git-sync: cycle failed for space ${spaceId}: ${message}`); return { spaceId, ran: false, error: message }; + } + } + + /** + * Run `fn` under the per-space lock: the in-process mutex (no overlapping + * cycles on this instance) AND the Redis leader lock (single writer across + * replicas). Returns `fn`'s result, or a skip sentinel when the lock could not + * be acquired — `{ skipped: 'in-progress' }` (this instance is mid-cycle) or + * `{ skipped: 'lock-held' }` (another replica holds the Redis lock). The mutex + * + Redis lock are always released in a `finally`, even when `fn` throws (the + * throw propagates to the caller). This is the single reusable wrapper shared + * by `runOnce` (the poll/admin cycle) and `ingestExternalPush` (a push from a + * git client over HTTP) so both serialize against each other identically. + */ + async withSpaceLock( + spaceId: string, + fn: () => Promise, + ): Promise { + if (this.running.has(spaceId)) { + return { skipped: 'in-progress' }; + } + if (!(await this.acquire(spaceId))) { + return { skipped: 'lock-held' }; + } + this.running.add(spaceId); + // Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so + // a long push (client-controlled receive-pack + the Docmost cycle) cannot + // outlive the fixed TTL and let a concurrent cycle race the working tree. The + // refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the + // timer from holding the event loop open; it is ALWAYS cleared in `finally`. + const heartbeat = setInterval(() => { + void this.refreshLock(spaceId); + }, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3))); + heartbeat.unref?.(); + try { + return await fn(); } finally { + clearInterval(heartbeat); this.running.delete(spaceId); await this.release(spaceId); } } + /** + * Ingest a push that arrived over smart-HTTP (the /git host). Under the SAME + * per-space lock the poll cycle uses, it: + * 1. runs `runReceivePack()` — the closure that spawns `git http-backend` for + * the receive-pack request and finishes streaming the HTTP response to the + * client. The client's push result is determined here. + * 2. THEN — still holding the lock — runs the full Docmost cycle (the same + * `driveCycle` body `runOnce` uses) so the freshly received commits on + * `main` flow back into Docmost pages. + * + * If the cycle body in step 2 throws, it is LOGGED but NOT rethrown: the push + * already succeeded and the commits are durable on `main`, so the poll-interval + * backstop will reconcile them on the next tick. The receive-pack itself is the + * load-bearing step. + * + * Lock contention: if the lock cannot be acquired (a poll cycle is mid-flight), + * this throws a `GitSyncLockHeldError`. The HTTP handler converts that to a 503 + * so git surfaces a retryable error to the user (chosen over blocking the + * request behind a potentially long cycle). The receive-pack is NOT run when + * the lock is held — we never write to the working tree concurrently with a + * cycle. + */ + async ingestExternalPush( + spaceId: string, + workspaceId: string, + runReceivePack: () => Promise, + ): Promise { + if (!this.environmentService.isGitSyncEnabled()) { + // The HTTP gate already checks this, but be defensive: never run a cycle + // when sync is globally off. + throw new GitSyncLockHeldError(spaceId); + } + const serviceUserId = this.environmentService.getGitSyncServiceUserId(); + + const result = await this.withSpaceLock(spaceId, async () => { + // 1) Stream the receive-pack to the client (durable commits land on main). + await runReceivePack(); + + // 2) Reconcile the new commits into Docmost. A service user is required to + // attribute the writes; without one we cannot run the cycle — the commits + // are still durable and the poll backstop will pick them up once configured. + if (!serviceUserId) { + this.logger.error( + 'git-sync: GIT_SYNC_SERVICE_USER_ID is required to ingest an external ' + + 'push — the push is durable on main; skipping the immediate cycle.', + ); + return; + } + try { + await this.driveCycle(spaceId, workspaceId, serviceUserId); + } catch (err) { + // Do NOT rethrow: the push succeeded and the commits are durable on main; + // the poll-interval backstop retries the cycle. Log for visibility. + this.logger.error( + `git-sync: post-push cycle failed for space ${spaceId} (push is ` + + `durable; poll will retry): ${ + err instanceof Error ? err.message : String(err) + }`, + ); + } + return; + }); + + // The lock was held (in-progress or another replica) — surface to the caller + // so the HTTP handler can answer 503 and let git retry. + if (typeof result === 'object' && result !== null && 'skipped' in result) { + throw new GitSyncLockHeldError(spaceId); + } + } + /** * The actual engine wiring (plan §11). Mirrors the engine's own `main`: * PULL — readExisting -> computePullActions -> applyPullActions, diff --git a/apps/server/src/integrations/git-sync/services/vault-registry.service.ts b/apps/server/src/integrations/git-sync/services/vault-registry.service.ts index b7a50637..f9adbc86 100644 --- a/apps/server/src/integrations/git-sync/services/vault-registry.service.ts +++ b/apps/server/src/integrations/git-sync/services/vault-registry.service.ts @@ -1,8 +1,12 @@ import { Injectable, Logger } from '@nestjs/common'; import { mkdir } from 'node:fs/promises'; -import { VaultGit } from '@docmost/git-sync'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { VaultGit, vaultGitEnv } from '@docmost/git-sync'; import { EnvironmentService } from '../../environment/environment.service'; +const execFileAsync = promisify(execFile); + /** * Resolves the on-disk vault location per space and owns the (lazily created, * cached) `VaultGit` instance for each one (plan §3/§5). @@ -41,4 +45,49 @@ export class VaultRegistryService { this.vaults.set(spaceId, vault); return vault; } + + /** + * Make a space's vault repo servable over smart-HTTP (the /git host). Ensures + * the repo exists (engine `ensureRepo`: `git init -b main` + initial commit + + * branches; idempotent), then sets the LOCAL git config a `git http-backend` + * push needs: + * + * - receive.denyCurrentBranch=updateInstead — a push to the checked-out + * `main` updates the working tree too (the engine's human-facing branch). + * Requires a clean tree, which is guaranteed between cycles / under the + * orchestrator lock that wraps an external push. + * - receive.denyNonFastForwards=true — block force-push so a client cannot + * rewrite the engine's history on `main`. + * - http.receivepack=true / http.uploadpack=true — explicitly allow the + * receive/upload services over HTTP. + * + * All four are set idempotently (plain `git config` overwrites the local + * value). Returns the absolute vault path. Idempotent and safe to call before + * every request. + */ + async ensureServable(spaceId: string): Promise { + const vault = await this.getVault(spaceId); + const path = this.vaultPath(spaceId); + + // ensureRepo also verifies git is available on its first git call; it does + // `git init -b main` + an initial commit + the engine branches. Idempotent. + await vault.ensureRepo(); + + const configs: Array<[string, string]> = [ + ['receive.denyCurrentBranch', 'updateInstead'], + ['receive.denyNonFastForwards', 'true'], + ['http.receivepack', 'true'], + ['http.uploadpack', 'true'], + ]; + for (const [key, value] of configs) { + await execFileAsync('git', ['config', key, value], { + cwd: path, + // Use the engine's cwd-isolated env (strips GIT_DIR / GIT_WORK_TREE) so + // the config is written to THIS vault's local config, nothing else. + env: vaultGitEnv(), + }); + } + + return path; + } } diff --git a/apps/server/src/main.ts b/apps/server/src/main.ts index 1fb140c1..ee5582b1 100644 --- a/apps/server/src/main.ts +++ b/apps/server/src/main.ts @@ -15,6 +15,7 @@ import { InternalLogFilter } from './common/logger/internal-log-filter'; import { EnvironmentService } from './integrations/environment/environment.service'; import { resolveFrameHeader } from './common/helpers'; import { resolveTrustProxy } from './integrations/environment/trust-proxy.util'; +import { GitHttpService } from './integrations/git-sync/http/git-http.service'; async function bootstrap() { const app = await NestFactory.create( @@ -106,6 +107,23 @@ async function bootstrap() { }, ); + // git smart-HTTP POST bodies use these media types. Register PASSTHROUGH + // content-type parsers so Fastify does NOT buffer/parse them (it would + // otherwise reject the unknown type with 415); the /git handler streams the + // raw Node request (request.raw) to `git http-backend` stdin instead. A + // passthrough parser also bypasses the bodyLimit, so large pushes are not + // truncated (the bytes are never buffered by Fastify). + app + .getHttpAdapter() + .getInstance() + .addContentTypeParser( + [ + 'application/x-git-upload-pack-request', + 'application/x-git-receive-pack-request', + ], + (_req, payload, done) => done(null, payload), + ); + app .getHttpAdapter() .getInstance() @@ -153,6 +171,25 @@ async function bootstrap() { app.useGlobalInterceptors(new TransformHttpResponseInterceptor(reflector)); app.enableShutdownHooks(); + // git smart-HTTP host (the /git/.git/... subtree). Registered as a + // RAW Fastify route — NOT a Nest controller under the global '/api' prefix — + // so it lives at the ROOT and a single wildcard reliably captures the whole + // multi-segment subtree (avoiding the path-to-regexp v8 wildcard / global- + // prefix-exclude ambiguity in NestJS v11). The handler is resolved from the + // Nest container so all auth/authz/gating still runs. NOTE: Nest middleware + // (DomainMiddleware) does NOT run for this raw root route — it is bound to the + // Nest router under the global '/api' prefix — so request.raw.workspaceId is + // NOT populated here; GitHttpService resolves the workspace itself (mirroring + // DomainMiddleware). The Fastify wildcard '/git/*' captures the multi-segment + // subpath; the handler re-parses req.url itself. + const gitHttpService = app.get(GitHttpService); + app + .getHttpAdapter() + .getInstance() + .all('/git/*', async (request, reply) => { + await gitHttpService.handle(request as any, reply as any); + }); + const logger = new Logger('NestApplication'); process.on('unhandledRejection', (reason, promise) => {