fix(git-sync): address PR #119 review (#1571)

Resolve the code-review findings from comment #1571 on PR #119.

Engine (packages/git-sync):
- Idempotent CREATE on retry: before createPage, look the page up in the
  live Docmost tree by (parentPageId, title) and ADOPT it instead of
  duplicating when a prior cycle created it but failed to persist the
  pageId back to disk. Only trust a COMPLETE tree for the lookup; fall
  back to createPage otherwise. Covered by new tests incl. a complete=false
  regression-lock.
- Route applyPullActions diagnostics through an injected logger instead of
  bare console (thread log from the cycle).
- Add a timeout to the git execFile chokepoint (runRaw) so a hung git
  subprocess cannot wedge a sync cycle.
- Translate remaining Russian code comments to English.
- Remove dead standalone-CLI code (parseArgs/PushParsedArgs,
  parseSettings/envSchema, loadSettingsOrExit + config-errors.ts) and the
  matching index exports/specs; keep the Settings type.
- Fix the dangling docs link in package.json.
- Add a schema-surface snapshot guard so any drift in the vendored
  document schema is a loud, must-review CI failure (+ provenance header).

Server (apps/server):
- Add a configurable watchdog timeout to the spawned git http-backend so a
  stalled push cannot hold the per-space lock forever
  (GIT_SYNC_BACKEND_TIMEOUT_MS).
- Close the in-process TOCTOU window in SpaceLockService.withSpaceLock by
  reserving the slot synchronously before acquire.
- Add tests: removePage git-sync provenance (both branches), ensureServable
  force-push-protection git configs, and the phase-B+ datasource methods.

Docs / build:
- AGENTS.md: list git-sync as the fifth workspace package and note the
  three schema mirrors; fix the dangling git-sync-plan.md backlog link.
- pnpm-lock.yaml: add the missing @docmost/git-sync workspace link so
  pnpm install --frozen-lockfile (CI default) succeeds.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-26 00:06:44 +03:00
committed by claude code agent 227
parent 52959de2f3
commit 28d2560dfd
31 changed files with 767 additions and 462 deletions

View File

@@ -713,5 +713,65 @@ describe('PageService', () => {
expect(payload.lastUpdatedSource).toBeUndefined();
});
});
describe('removePage()', () => {
// removePage forwards a `source` 4th arg to pageRepo.removePage: 'git-sync'
// for a git-sync-driven soft-delete (so the change-listener loop-guard skips
// its own write), undefined otherwise.
const makeService = () => {
const pageRepo = {
removePage: jest.fn().mockResolvedValue(undefined),
};
const svc = new PageService(
pageRepo as any, // pageRepo
{} as any, // pagePermissionRepo
{} as any, // attachmentRepo
{} as any, // db
{} as any, // storageService
{} as any, // attachmentQueue
{} as any, // aiQueue
{} as any, // generalQueue
{} as any, // eventEmitter
{} as any, // collaborationGateway
{} as any, // watcherService
{} as any, // transclusionService
);
return { svc, pageRepo };
};
it("forwards 'git-sync' as the source for a git-sync soft-delete", async () => {
const { svc, pageRepo } = makeService();
await svc.removePage('page-1', 'user-1', 'ws-1', GIT_SYNC);
expect(pageRepo.removePage).toHaveBeenCalledTimes(1);
const [pageId, userId, workspaceId, source] =
pageRepo.removePage.mock.calls[0];
expect(pageId).toBe('page-1');
expect(userId).toBe('user-1');
expect(workspaceId).toBe('ws-1');
expect(source).toBe('git-sync');
});
it('forwards undefined as the source for a plain user delete', async () => {
const { svc, pageRepo } = makeService();
await svc.removePage('page-1', 'user-1', 'ws-1', USER_PROVENANCE);
const [, , , source] = pageRepo.removePage.mock.calls[0];
expect(source).toBeUndefined();
});
it('forwards undefined as the source when no provenance is given', async () => {
const { svc, pageRepo } = makeService();
await svc.removePage('page-1', 'user-1', 'ws-1');
const [, , , source] = pageRepo.removePage.mock.calls[0];
expect(source).toBeUndefined();
});
});
});
});

View File

@@ -384,6 +384,21 @@ export class EnvironmentService {
return Number.isFinite(parsed) && parsed > 0 ? parsed : 15000;
}
/**
* Spawned `git http-backend` watchdog timeout in ms (default 120000). Bounds a
* single smart-HTTP request so a stalled `git-receive-pack` cannot hold the
* per-space lock forever (the child is killed and a 500 sent on expiry). A NaN /
* non-positive value falls back to the default so a bad override can never
* disable the watchdog.
*/
getGitSyncBackendTimeoutMs(): number {
const v = parseInt(
this.configService.get<string>('GIT_SYNC_BACKEND_TIMEOUT_MS', '120000'),
10,
);
return Number.isFinite(v) && v > 0 ? v : 120000;
}
/**
* Event debounce window in ms (default 2000). A NaN / non-positive value falls
* back to the default so a bad override can never disable the debounce.

View File

@@ -38,6 +38,8 @@ function fakeChild() {
end: jest.fn(),
write: jest.fn(),
});
// The watchdog kills the child on timeout; capture the signal.
child.kill = jest.fn();
return child;
}
@@ -80,8 +82,13 @@ const baseRequest: GitHttpBackendRequest = {
remoteUser: 'alice@example.com',
};
function buildService() {
const env = { getGitSyncDataDir: jest.fn(() => '/vaults') };
function buildService(backendTimeoutMs = 120000) {
const env = {
getGitSyncDataDir: jest.fn(() => '/vaults'),
// The watchdog timeout for the spawned git http-backend. Tests inject a tiny
// value (or use fake timers) to drive the timeout branch.
getGitSyncBackendTimeoutMs: jest.fn(() => backendTimeoutMs),
};
return new GitHttpBackendService(env as any);
}
@@ -182,6 +189,56 @@ describe('GitHttpBackendService.run', () => {
await p;
});
it('(d) timeout: a child that never closes is killed and a 500 is sent', async () => {
// The child never emits stdout/close (a stalled git-receive-pack). With a
// tiny injected watchdog timeout the run() promise must still resolve: the
// child is killed and a clean 500 is sent (no headers were sent yet).
const child = fakeChild();
spawnMock.mockReturnValue(child);
const service = buildService(5); // 5ms watchdog
const res = fakeRes();
const warnSpy = jest.spyOn(Logger.prototype, 'warn');
// run() resolves only via the watchdog firing (no close/error emitted).
await service.run(baseRequest, fakeReq(), res);
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
expect(warnSpy).toHaveBeenCalled();
expect(res.statusCode).toBe(500);
expect(res.end).toHaveBeenCalledWith('Internal server error');
});
it('(d) timeout watchdog is cleared on a normal close (no kill, no 500)', async () => {
// A normal request that completes well within the watchdog window must NOT be
// killed and must NOT trip the timeout 500 — the timer is cleared on close.
jest.useFakeTimers();
try {
const child = fakeChild();
spawnMock.mockReturnValue(child);
const service = buildService(120000);
const res = fakeRes();
const p = service.run(baseRequest, fakeReq(), res);
// loadGitSync resolves on a real microtask; advance it under fake timers.
await Promise.resolve();
await Promise.resolve();
child.stdout.emit(
'data',
Buffer.from('Status: 200 OK\r\nContent-Type: text/plain\r\n\r\nOK', 'utf8'),
);
child.emit('close', 0);
await p;
// The watchdog never fired even if we advance past its window.
jest.advanceTimersByTime(200000);
expect(child.kill).not.toHaveBeenCalled();
expect(res.statusCode).toBe(200);
} finally {
jest.useRealTimers();
}
});
it('spawn throwing synchronously -> 500 (spawn-failed)', async () => {
spawnMock.mockImplementation(() => {
throw new Error('spawn EACCES');

View File

@@ -176,6 +176,45 @@ export class GitHttpBackendService {
return done();
}
// Watchdog: a client that opens git-receive-pack and stalls keeps the
// child alive forever, so run() never resolves and (because this runs
// inside withSpaceLock) the per-space lock is held + heartbeat-refreshed
// indefinitely. Bound the request: on expiry kill the child, send a clean
// 500 if nothing was sent yet, and settle the promise. The log carries no
// client echo / credentials / body. `.unref()` so the timer never keeps the
// event loop alive; ALWAYS cleared in the close/error handlers below.
const timer = setTimeout(() => {
this.logger.warn(
`git http-backend timed out after ` +
`${this.environmentService.getGitSyncBackendTimeoutMs()}ms; killing child`,
);
try {
child.kill('SIGTERM');
// Escalate to SIGKILL shortly after in case SIGTERM is ignored.
const sigkill = setTimeout(() => {
try {
child.kill('SIGKILL');
} catch {
/* ignore */
}
}, 2000);
sigkill.unref?.();
} catch {
/* ignore */
}
if (!headerParsed && !rawRes.headersSent) {
this.send500(rawRes, 'timeout');
} else {
try {
rawRes.end();
} catch {
/* ignore */
}
}
done();
}, this.environmentService.getGitSyncBackendTimeoutMs());
timer.unref?.();
// Accumulate stdout until we have the full CGI header block, then write the
// parsed status/headers and start streaming the remaining body bytes.
let headerParsed = false;
@@ -221,6 +260,7 @@ export class GitHttpBackendService {
});
child.on('error', (err) => {
clearTimeout(timer);
if (!headerParsed && !rawRes.headersSent) {
this.send500(rawRes, 'child-error', err);
} else {
@@ -235,6 +275,7 @@ export class GitHttpBackendService {
});
child.on('close', (code) => {
clearTimeout(timer);
if (!headerParsed && !rawRes.headersSent) {
// The child exited before emitting a complete CGI header block.
this.logger.error(

View File

@@ -375,13 +375,109 @@ describe('GitmostDataSourceService', () => {
describe('restorePage', () => {
it('restores via the repo restore path scoped to the workspace', async () => {
const { service, mocks } = build();
await service.bind(CTX).restorePage('p1');
const res = await service.bind(CTX).restorePage('p1');
// Stamps lastUpdatedSource='git-sync' on restore (loop-guard, PR #119).
expect(mocks.pageRepo.restorePage).toHaveBeenCalledWith(
'p1',
'ws-1',
'git-sync',
);
expect(res).toEqual({ id: 'p1' });
});
});
// Phase-B+ continuous-sync methods: not yet called by the engine but wired into
// the GitSyncClient seam (PR #119 review #5). Exercised via the bound client.
describe('listRecentSince', () => {
it('queries non-deleted pages newest-first and ISO-stringifies updatedAt', async () => {
const rows = [
{
id: 'p1',
slugId: 's1',
title: 'A',
parentPageId: null,
spaceId: 'space-1',
updatedAt: new Date('2026-06-20T10:00:00.000Z'),
},
];
const { service, mocks } = build(rows);
const qb = mocks.db.selectFrom.mock.results; // populated after the call
const out = (await service
.bind(CTX)
.listRecentSince('space-1', '2026-06-19T00:00:00.000Z', 100)) as any[];
// Query builder shaped against the `pages` table with the expected chain.
expect(mocks.db.selectFrom).toHaveBeenCalledWith('pages');
const builder = qb[0].value;
expect(builder.select).toHaveBeenCalled();
expect(builder.orderBy).toHaveBeenCalledWith('updatedAt', 'desc');
// deletedAt is null + the conditional spaceId / since / cap clauses.
const whereArgs = builder.where.mock.calls.map((c: any[]) => c[0]);
expect(whereArgs).toContain('deletedAt');
expect(whereArgs).toContain('spaceId');
expect(whereArgs).toContain('updatedAt');
expect(builder.limit).toHaveBeenCalledWith(100);
expect(out).toEqual([
{
id: 'p1',
slugId: 's1',
title: 'A',
parentPageId: null,
spaceId: 'space-1',
updatedAt: '2026-06-20T10:00:00.000Z',
},
]);
});
it('omits the spaceId / since / cap clauses when not supplied', async () => {
const { service, mocks } = build([]);
await service.bind(CTX).listRecentSince(undefined, null);
const builder = mocks.db.selectFrom.mock.results[0].value;
const whereArgs = builder.where.mock.calls.map((c: any[]) => c[0]);
// Only the deletedAt-is-null guard; no spaceId / updatedAt> clauses.
expect(whereArgs).toEqual(['deletedAt']);
expect(builder.limit).not.toHaveBeenCalled();
});
});
describe('listTrash', () => {
it('queries soft-deleted pages and ISO-stringifies deletedAt (null stays null)', async () => {
const rows = [
{
id: 'p1',
slugId: 's1',
title: 'Trashed',
parentPageId: null,
spaceId: 'space-1',
deletedAt: new Date('2026-06-21T09:00:00.000Z'),
},
{
id: 'p2',
slugId: 's2',
title: 'NoDate',
parentPageId: null,
spaceId: 'space-1',
deletedAt: null,
},
];
const { service, mocks } = build(rows);
const out = (await service.bind(CTX).listTrash('space-1')) as any[];
expect(mocks.db.selectFrom).toHaveBeenCalledWith('pages');
const builder = mocks.db.selectFrom.mock.results[0].value;
const whereCalls = builder.where.mock.calls;
// deletedAt is-not null (the trash predicate) + spaceId filter.
expect(whereCalls).toContainEqual(['deletedAt', 'is not', null]);
expect(whereCalls).toContainEqual(['spaceId', '=', 'space-1']);
expect(builder.orderBy).toHaveBeenCalledWith('deletedAt', 'desc');
expect(out[0].deletedAt).toBe('2026-06-21T09:00:00.000Z');
expect(out[1].deletedAt).toBeNull();
});
});
});

View File

@@ -111,25 +111,33 @@ export class SpaceLockService {
if (this.running.has(spaceId)) {
return { skipped: 'in-progress' };
}
if (!(await this.acquire(spaceId))) {
return { skipped: 'lock-held' };
}
// Reserve the in-process slot synchronously (before any await) so two
// concurrent same-space calls on THIS instance cannot both pass the guard and
// race acquire(). Redis NX is already authoritative across replicas; this just
// closes the in-process TOCTOU window. Released in the outer finally on every
// path (acquire-failure, fn-throw, normal completion).
this.running.add(spaceId);
// Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so
// a long push (client-controlled receive-pack + the Docmost cycle) cannot
// outlive the fixed TTL and let a concurrent cycle race the working tree. The
// refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the
// timer from holding the event loop open; it is ALWAYS cleared in `finally`.
const heartbeat = setInterval(() => {
void this.refreshLock(spaceId);
}, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3)));
heartbeat.unref?.();
try {
return await fn();
if (!(await this.acquire(spaceId))) {
return { skipped: 'lock-held' };
}
// Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so
// a long push (client-controlled receive-pack + the Docmost cycle) cannot
// outlive the fixed TTL and let a concurrent cycle race the working tree. The
// refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the
// timer from holding the event loop open; it is ALWAYS cleared in `finally`.
const heartbeat = setInterval(() => {
void this.refreshLock(spaceId);
}, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3)));
heartbeat.unref?.();
try {
return await fn();
} finally {
clearInterval(heartbeat);
await this.release(spaceId);
}
} finally {
clearInterval(heartbeat);
this.running.delete(spaceId);
await this.release(spaceId);
}
}
}

View File

@@ -7,18 +7,33 @@
// `loadGitSync()` bridge (the ESM `@docmost/git-sync` package cannot be
// `require()`d under jest), so we mock that loader rather than the package.
import { mkdir } from 'node:fs/promises';
import { execFile } from 'node:child_process';
import { loadGitSync } from '../git-sync.loader';
jest.mock('node:fs/promises', () => ({
mkdir: jest.fn(async () => undefined),
}));
// ensureServable shells out via `promisify(execFile)`; mock execFile with a
// callback-style fn so promisify resolves. Each `git config <key> <value>` call
// is recorded so the four config writes (incl. the security-critical
// receive.denyNonFastForwards=true) can be asserted.
jest.mock('node:child_process', () => ({
execFile: jest.fn((_cmd: string, _args: string[], _opts: any, cb: any) =>
cb(null, { stdout: '', stderr: '' }),
),
}));
// Cheap VaultGit stub: records the path it was constructed with; no shell-out.
// Declared with a `mock`-prefixed name so jest allows referencing it inside the
// hoisted `jest.mock` factory below.
// `ensureRepo` is a resolved jest.fn so ensureServable can call it. Declared with
// a `mock`-prefixed name so jest allows referencing it inside the hoisted
// `jest.mock` factory below.
const mockVaultGit = jest
.fn()
.mockImplementation((path: string) => ({ path }));
.mockImplementation((path: string) => ({
path,
ensureRepo: jest.fn().mockResolvedValue(undefined),
}));
jest.mock('../git-sync.loader', () => ({
loadGitSync: jest.fn(async () => ({
@@ -32,6 +47,7 @@ import { VaultRegistryService } from './vault-registry.service';
type AnyMock = jest.Mock;
const mkdirMock = mkdir as unknown as AnyMock;
const execFileMock = execFile as unknown as AnyMock;
const VaultGitMock = mockVaultGit;
void loadGitSync;
@@ -78,4 +94,52 @@ describe('VaultRegistryService', () => {
});
});
});
describe('ensureServable', () => {
it('ensures the repo then writes the four force-push-protection git configs', async () => {
const { service } = build('/vaults');
const path = await service.ensureServable('space-1');
expect(path).toBe('/vaults/space-1');
// ensureRepo ran first on the cached vault.
const vault = await service.getVault('space-1');
expect((vault as any).ensureRepo).toHaveBeenCalledTimes(1);
// Collect every `git config <key> <value>` write.
const configWrites = execFileMock.mock.calls
.filter(([cmd, args]) => cmd === 'git' && args[0] === 'config')
.map(([, args]) => [args[1], args[2]]);
expect(configWrites).toEqual([
['receive.denyCurrentBranch', 'updateInstead'],
// Security-critical: blocks force-push / history rewrites on main.
['receive.denyNonFastForwards', 'true'],
['http.receivepack', 'true'],
['http.uploadpack', 'true'],
]);
// Every config write targets THIS vault's cwd.
for (const [cmd, args, opts] of execFileMock.mock.calls) {
if (cmd === 'git' && args[0] === 'config') {
expect(opts.cwd).toBe('/vaults/space-1');
}
}
});
it('rejects (and writes no git config) when ensureRepo rejects', async () => {
const { service } = build('/vaults');
const vault = await service.getVault('space-1');
(vault as any).ensureRepo.mockRejectedValueOnce(new Error('init failed'));
await expect(service.ensureServable('space-1')).rejects.toThrow(
'init failed',
);
const configWrites = execFileMock.mock.calls.filter(
([cmd, args]) => cmd === 'git' && args[0] === 'config',
);
expect(configWrites).toHaveLength(0);
});
});
});