Files
gitmost/apps/server/src/integrations/git-sync/http/git-http.service.ts
T
claude code agent 227 b47751349f fix(git-sync): kill spurious marker-leaking conflict, concurrent-edit loss, flapping HEAD
Three more git-sync QA defects from the 2nd live pass on PR #119, plus a
callout-fidelity nit:

1. SPURIOUS conflict leaked raw markers into canonical main (root cause). On an
   ordinary round-trip the only difference between the docmost mirror (normalize-
   on-write) and a user's raw push is trailing/empty-line normalization, which made
   git's line-based docmost->main merge CONFLICT, and the wedge fix then committed
   the file WITH literal <<<<<<< / ======= / >>>>>>> markers onto main (git and the
   DB silently diverged for cycles). Fix: on a conflict, normalize trailing/empty
   lines on BOTH sides (showStage :2:/:3:) before comparing — a trailing-only diff
   is recognized as spurious and resolved to the clean normalized form. A GENUINE
   same-block conflict is auto-resolved to OURS (git wins, mirroring the live-doc
   3-way rule); the docmost side stays on the `docmost` branch + page history. Raw
   markers NEVER reach main again.

2. Concurrent UI<->git edit silently lost the UI side. The git->Docmost 3-way merge
   ran against a live Y.Doc that hadn't yet received the user's debounced in-flight
   edit, so git clean-applied (no conflict detected) and the edit vanished even on a
   different block. Fix: flush the pending debounced store before the merge so the
   in-flight edit is drained into the live doc first — a different-block edit is
   merged, a same-block one is detected and pinned to history (recoverable).

3. Smart-HTTP HEAD flapped to the read-only `docmost` mirror (~1/4 of clones). The
   engine transiently checks out `docmost` mid-pull and the host advertises whatever
   HEAD resolves to. Fix: VaultGit.pinHeadToMain(); the cycle restores HEAD->main in
   a finally; and the upload-pack ref advertisement is served HEAD-pinned under the
   per-space lock so it can never observe a mid-cycle HEAD.

4. (callout) clampCalloutType now mirrors the editor's GITHUB_ALERT_TYPE_MAP for
   non-schema aliases (tip->success, caution->danger, important->info) instead of
   flatly collapsing to info. The editor schema genuinely supports only the six
   banner types, so unknown types still fall back to info (by design).

Tests: deterministic real-git trailing-blank round-trip (no conflict, no markers,
in sync over 2 cycles) + genuine-conflict no-marker-leak; HEAD advertisement
stability; pre/post-flush concurrent-edit survival; serveReadAdvertisement lock
pin; widened callout-alias coverage. Engine vitest + server tsc + collaboration /
git-http / orchestrator specs all green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 22:05:32 +03:00

465 lines
19 KiB
TypeScript

import {
Injectable,
Logger,
OnModuleDestroy,
UnauthorizedException,
} from '@nestjs/common';
import type { FastifyReply, FastifyRequest } from 'fastify';
import { AuthService } from '../../../core/auth/services/auth.service';
import SpaceAbilityFactory from '../../../core/casl/abilities/space-ability.factory';
import {
SpaceCaslAction,
SpaceCaslSubject,
} from '../../../core/casl/interfaces/space-ability.type';
import { SpaceRepo } from '@docmost/db/repos/space/space.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { User } from '@docmost/db/types/entity.types';
import {
parseBasicAuth,
FailedLoginLimiter,
clientIp,
isCredentialsFailure,
} from '../../mcp/mcp-auth.helpers';
import { resolveRequestWorkspace } from '../../../common/helpers/resolve-request-workspace';
import { EnvironmentService } from '../../environment/environment.service';
import { VaultRegistryService } from '../services/vault-registry.service';
import {
GitSyncLockHeldError,
GitSyncOrchestrator,
} from '../services/git-sync.orchestrator';
import { GitHttpBackendService } from './git-http-backend.service';
import {
decideGitHttpGate,
parseGitPath,
resolveServiceKind,
GitHttpServiceKind,
} from './git-http.helpers';
const WWW_AUTHENTICATE = 'Basic realm="gitmost"';
/**
* The /git smart-HTTP host. Wires request parsing, the reused auth primitives
* (HTTP Basic -> AuthService.verifyUserCredentials), per-space gating
* (EnvironmentService flags + space.settings.gitSync.enabled), CASL authz
* (SpaceAbilityFactory), and dispatch to `git http-backend`:
* - fetch (read) -> ensureServable then stream http-backend directly (no lock).
* - push (write) -> ensureServable then orchestrator.ingestExternalPush, which
* runs the receive-pack under the space lock and then a Docmost cycle.
*
* Mounted at the ROOT (`/git/...`) by a raw Fastify route in main.ts (the global
* `/api` prefix does not apply). Never logs the password or Authorization header.
*/
@Injectable()
export class GitHttpService implements OnModuleDestroy {
private readonly logger = new Logger(GitHttpService.name);
/**
* In-process brute-force speed bump for the /git HTTP-Basic path. The raw
* `/git/*` Fastify route bypasses the Nest pipeline (so ThrottlerGuard, which is
* only on controllers, never runs) and there is no fastify rate-limit plugin, so
* without this `verifyUserCredentials` (bcrypt) would run unthrottled on every
* request once GIT_SYNC_HTTP_ENABLED is on. Mirrors the /mcp Basic path EXACTLY
* (FailedLoginLimiter, same 5/60s thresholds, the same per-IP / per-IP+email /
* global-per-email keys) so the two auth seams cannot diverge. A speed bump, not
* a hard boundary (in-process, per replica).
*/
private readonly failedLogins = new FailedLoginLimiter(5, 60_000);
/** Periodic sweep to bound limiter memory (mirrors McpService / mcp http.ts). */
private readonly sweepIntervalMs = 60_000;
private readonly sweepTimer: NodeJS.Timeout;
constructor(
private readonly environmentService: EnvironmentService,
private readonly authService: AuthService,
private readonly spaceRepo: SpaceRepo,
private readonly workspaceRepo: WorkspaceRepo,
private readonly spaceAbilityFactory: SpaceAbilityFactory,
private readonly vaultRegistry: VaultRegistryService,
private readonly orchestrator: GitSyncOrchestrator,
private readonly backend: GitHttpBackendService,
) {
this.sweepTimer = setInterval(() => {
try {
this.failedLogins.sweep();
} catch (err) {
this.logger.error('git-http failed-login limiter sweep failed', err as Error);
}
}, this.sweepIntervalMs);
// Never keep the event loop alive solely for the sweep timer.
this.sweepTimer.unref?.();
}
onModuleDestroy(): void {
clearInterval(this.sweepTimer);
}
/**
* Resolve the workspace for a /git request the SAME way DomainMiddleware does,
* because Nest middleware does NOT run for this raw root-mounted route (it is
* registered under the global '/api' router), so `req.raw.workspaceId` is never
* populated here. Delegates to the shared `resolveRequestWorkspace` helper (the
* SAME self-hosted/cloud branch DomainMiddleware uses) and returns just the id:
* - self-hosted (single workspace) -> workspaceRepo.findFirst();
* - cloud (multi-tenant) -> resolve by the host-header subdomain.
* Returns null when no workspace resolves; the gate then 404s (after the
* 401-before-404 credential check encoded in decideGitHttpGate).
*/
private async resolveWorkspaceId(req: FastifyRequest): Promise<string | null> {
try {
// Same self-hosted/cloud resolution DomainMiddleware uses — shared so the
// branch cannot drift between the two call sites.
const workspace = await resolveRequestWorkspace(
this.environmentService,
this.workspaceRepo,
this.headerValue(req.headers['host']),
);
return workspace?.id ?? null;
} catch (err) {
// A DB error resolving the workspace must not leak details; treat as
// unresolvable (the gate will 404, unless creds are missing -> 401 first).
this.logger.warn(
`git-http: workspace resolution error: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
return null;
}
/**
* Handle one `/git/<spaceId>.git/<subpath>` request. `rest` is the path AFTER
* the `/git/` prefix (no query string). The Fastify reply is hijacked before
* any streaming so the binary CGI body is written directly to the raw socket.
*/
async handle(req: FastifyRequest, reply: FastifyReply): Promise<void> {
const rawReq = req.raw;
const rawRes = reply.raw;
// --- parse the URL into spaceId + subpath -------------------------------
const rest = this.extractRest(req.url);
const parsedPath = rest === null ? null : parseGitPath(rest);
// --- resolve the requested git service kind (read vs write) -------------
const service =
typeof req.query === 'object' && req.query !== null
? (req.query as Record<string, string | undefined>).service
: undefined;
const serviceKind: GitHttpServiceKind | null = parsedPath
? resolveServiceKind({
method: req.method,
subpath: parsedPath.subpath,
service,
})
: null;
// --- authenticate (HTTP Basic) ------------------------------------------
const authHeader = req.headers['authorization'];
const basic = parseBasicAuth(
Array.isArray(authHeader) ? authHeader[0] : authHeader,
);
// Resolve the workspace ourselves — DomainMiddleware does NOT run for this
// raw root route, so `req.raw.workspaceId` is never set (see resolver doc).
const workspaceId: string | null = await this.resolveWorkspaceId(req);
let user: User | undefined;
let credentialsValid = false;
let throttled = false;
if (basic && workspaceId) {
// Brute-force speed bump, mirroring the /mcp Basic path EXACTLY. Reserve
// ALL three keys ATOMICALLY and BEFORE bcrypt (tryReserve folds the check
// and the increment into one synchronous step), so the (threshold+1)-th
// attempt is rejected before verifyUserCredentials/bcrypt ever runs and
// concurrent attempts for one email cannot all observe count=0. The
// reservation IS the recorded failure: a genuine credential failure leaves
// it in place, a SUCCESS clears it (reset), a non-credential error releases
// it (so it cannot burn a victim's budget).
const emailLc = basic.email.toLowerCase();
const ip = clientIp(req);
const ipKey = `ip:${ip}`;
const ipEmailKey = `ip-email:${ip}:${emailLc}`;
// GLOBAL per-email backstop (no IP): the only key that survives IP / XFF
// rotation, so it is the real account-brute defense (see mcp-auth.helpers).
const emailKey = `email:${emailLc}`;
const ipOk = this.failedLogins.tryReserve(ipKey);
const ipEmailOk = this.failedLogins.tryReserve(ipEmailKey);
const emailOk = this.failedLogins.tryReserve(emailKey);
if (!ipOk || !ipEmailOk || !emailOk) {
// Blocked: release only the keys we actually reserved this call so an
// already-throttled request does not over-charge keys still under budget
// (matches the /mcp reserve model). Do NOT run bcrypt.
if (ipOk) this.failedLogins.release(ipKey);
if (ipEmailOk) this.failedLogins.release(ipEmailKey);
if (emailOk) this.failedLogins.release(emailKey);
throttled = true;
} else {
try {
user = await this.authService.verifyUserCredentials(
{ email: basic.email, password: basic.password },
workspaceId,
);
credentialsValid = true;
// Success: clear the per-IP and per-IP+email budgets fully; for the
// GLOBAL per-email key only release the one increment THIS request took
// (do not reset() it, or a victim's own success would wipe a parallel
// attacker's accumulated failures for that email — same rule as /mcp).
this.failedLogins.reset(ipKey);
this.failedLogins.reset(ipEmailKey);
this.failedLogins.release(emailKey);
} catch (err) {
// Only a genuine credentials failure (wrong email/password) keeps the
// reservation (it IS the recorded failure). Any other error — DB error,
// etc. — is NOT a password-guess signal, so release the reservation so
// it cannot burn a victim's limiter budget. credentialsValid stays
// false either way (the gate then 401s).
if (!isCredentialsFailure(err)) {
this.failedLogins.release(ipKey);
this.failedLogins.release(ipEmailKey);
this.failedLogins.release(emailKey);
}
if (!(err instanceof UnauthorizedException)) {
// A non-credential failure (e.g. DB error): treat as invalid creds
// for the gate (a 401), and log without leaking the password/header.
this.logger.warn(
`git-http: credential check error: ${
err instanceof Error ? err.message : String(err)
}`,
);
}
credentialsValid = false;
}
}
}
// Brute-force throttle tripped: reject BEFORE the gate (and before any space
// lookup), so a throttled attacker gets a uniform 429 with no bcrypt and no
// existence signal. WWW-Authenticate is still sent so a legitimate client
// re-prompts after the window.
if (throttled) {
reply
.header('WWW-Authenticate', WWW_AUTHENTICATE)
.header('Retry-After', '60')
.status(429)
.send('Too many failed authentication attempts. Try again later.');
return;
}
// --- resolve the space + per-space gating + CASL ------------------------
let spaceExists = false;
let spaceGitSyncEnabled = false;
let spaceId: string | undefined;
// The user has SOME role in the space. SECURITY: a non-member must get the
// SAME 404 a missing/disabled space gets — never a 403 — or the 403↔404 split
// would let any authenticated user brute-force slugs to learn which spaces
// exist / have sync enabled (the leak this gate's contract forbids). 403 is
// reserved for a MEMBER who lacks the required role (existence already known).
let userIsSpaceMember = false;
let permissionGranted = false;
if (credentialsValid && user && workspaceId && parsedPath && serviceKind) {
const space = await this.spaceRepo.findById(
parsedPath.spaceId,
workspaceId,
);
if (space) {
spaceExists = true;
spaceId = space.id;
spaceGitSyncEnabled =
(space.settings as any)?.gitSync?.enabled === true;
// Only evaluate CASL when the space is actually a sync candidate — an
// unrelated space stays a 404 (existence is never revealed).
if (spaceGitSyncEnabled) {
try {
const ability = await this.spaceAbilityFactory.createForUser(
user,
space.id,
);
// createForUser RESOLVED -> the user holds a role in this space (it
// throws NotFound for a non-member). Record membership BEFORE the
// permission check: a member lacking the role -> 403; a non-member ->
// 404 (handled by the gate via userIsSpaceMember=false below).
userIsSpaceMember = true;
const action =
serviceKind === 'write'
? SpaceCaslAction.Manage
: SpaceCaslAction.Read;
permissionGranted = ability.can(action, SpaceCaslSubject.Page);
} catch {
// createForUser throws NotFoundException when the user has no role in
// the space (a non-member). Leave userIsSpaceMember=false so the gate
// returns 404, NOT 403 — a non-member must not be able to tell this
// space apart from a non-existent one. (Any other error also falls
// here and is treated as non-member -> 404, the safe default that
// never reveals existence.)
userIsSpaceMember = false;
permissionGranted = false;
}
}
}
}
// --- the gate decision (pure) -------------------------------------------
const decision = decideGitHttpGate({
hasCredentials: Boolean(basic),
credentialsValid,
serviceKind,
gitSyncEnabled: this.environmentService.isGitSyncEnabled(),
gitHttpEnabled: this.environmentService.isGitSyncHttpEnabled(),
spaceExists,
spaceGitSyncEnabled,
userIsSpaceMember,
permissionGranted,
});
if (decision.kind === 'unauthorized') {
reply
.header('WWW-Authenticate', WWW_AUTHENTICATE)
.status(401)
.send('Authentication required');
return;
}
if (decision.kind === 'bad-request') {
reply.status(400).send('Bad request');
return;
}
if (decision.kind === 'not-found') {
reply.status(404).send('Not found');
return;
}
if (decision.kind === 'forbidden') {
reply.status(403).send('Forbidden');
return;
}
// decision.kind === 'proceed' — guaranteed below (narrowing for TS).
if (!parsedPath || !serviceKind || !spaceId || !user || !workspaceId) {
// Defensive: 'proceed' implies these are set, but keep TS + runtime safe.
reply.status(500).send('Internal server error');
return;
}
// --- dispatch to git http-backend ---------------------------------------
const backendRequest = {
spaceId,
subpath: parsedPath.subpath,
method: req.method,
queryString: this.extractQueryString(req.url),
contentType: this.headerValue(req.headers['content-type']) ?? '',
gitProtocol: this.headerValue(req.headers['git-protocol']),
remoteUser: user.email,
};
try {
// Idempotently make the vault servable (repo + receive/upload config).
await this.vaultRegistry.ensureServable(spaceId);
} catch (err) {
this.logger.error(
`git-http: failed to prepare vault for space ${spaceId}: ${
err instanceof Error ? err.message : String(err)
}`,
);
if (!reply.sent) reply.status(500).send('Internal server error');
return;
}
// Hijack the reply so the backend can stream the raw (possibly binary) CGI
// response directly to the socket (mirrors the MCP transport pattern).
reply.hijack();
// Only the ACTUAL pack-receiving write (POST git-receive-pack) runs under the
// space lock + a Docmost cycle. Everything else streams the http-backend
// directly with NO lock and NO cycle: a fetch/clone (read), AND the
// write-AUTHORIZED but READ-ONLY ref advertisement
// (GET info/refs?service=git-receive-pack). Running a cycle on info/refs is
// both wasteful and HARMFUL — it holds the per-space lock, so the push's
// immediately-following POST git-receive-pack collides with it and 503s
// (a deterministic push failure). Authz already happened above via the gate.
const isReceivePack =
req.method === 'POST' && parsedPath.subpath === 'git-receive-pack';
if (serviceKind === 'read' || !isReceivePack) {
// The clone's default branch comes from the HEAD symref advertised by the
// upload-pack ref advertisement (or a dumb `GET HEAD`). The engine
// transiently checks out the read-only `docmost` mirror mid-cycle, so serve
// THAT advertisement with HEAD pinned to `main` under the per-space lock so
// a clone never defaults to `docmost` (bug #3). Pack streaming and every
// other read are resolved by object SHA and need no pin, so they stream
// directly (no lock) as before.
const isReadAdvertise =
req.method === 'GET' &&
((parsedPath.subpath === 'info/refs' &&
service === 'git-upload-pack') ||
parsedPath.subpath === 'HEAD');
if (isReadAdvertise) {
await this.orchestrator.serveReadAdvertisement(spaceId, () =>
this.backend.run(backendRequest, rawReq, rawRes),
);
} else {
await this.backend.run(backendRequest, rawReq, rawRes);
}
return;
}
// Push: run the receive-pack under the space lock, then a Docmost cycle.
try {
await this.orchestrator.ingestExternalPush(
spaceId,
workspaceId,
// The lock's lost-lock signal is threaded into the backend so the
// receive-pack child is killed if the lock lapses mid-write (warning #3).
(signal) => this.backend.run(backendRequest, rawReq, rawRes, signal),
);
} catch (err) {
if (err instanceof GitSyncLockHeldError) {
// The lock could not be acquired and the receive-pack never ran, so the
// response is still unwritten — answer 503 so git retries.
if (!rawRes.headersSent) {
rawRes.statusCode = 503;
rawRes.setHeader('Content-Type', 'text/plain');
rawRes.setHeader('Retry-After', '1');
}
try {
rawRes.end('git-sync busy, retry');
} catch {
/* ignore */
}
return;
}
// Any other error: the receive-pack closure handles its own response, so
// we only log here and make sure the socket is closed.
this.logger.error(
`git-http: push ingestion error for space ${spaceId}: ${
err instanceof Error ? err.message : String(err)
}`,
);
try {
if (!rawRes.writableEnded) rawRes.end();
} catch {
/* ignore */
}
}
}
/** Normalise a possibly-array header value to its first string. */
private headerValue(value: string | string[] | undefined): string | undefined {
if (Array.isArray(value)) return value[0];
return value;
}
/**
* Extract the part of the URL AFTER `/git/` and BEFORE the query string.
* Returns null when the URL is not under `/git/`.
*/
private extractRest(url: string): string | null {
const qIdx = url.indexOf('?');
const pathname = qIdx === -1 ? url : url.slice(0, qIdx);
const prefix = '/git/';
if (!pathname.startsWith(prefix)) return null;
return pathname.slice(prefix.length);
}
/** The raw query string without the leading '?', or '' when none. */
private extractQueryString(url: string): string {
const qIdx = url.indexOf('?');
return qIdx === -1 ? '' : url.slice(qIdx + 1);
}
}