fix(git-sync): address review — configurable poll, always-on loop-guard, cleanup

Comprehensive-review follow-ups (APPROVE WITH SUGGESTIONS; no critical issues):
- poll interval is now actually configurable: replaced the hardcoded
  @Interval('git-sync-poll', 15000) with a dynamic SchedulerRegistry interval
  registered in onModuleInit from getGitSyncPollIntervalMs() (cleared in
  onModuleDestroy); /status and the real cadence now share one config source.
  Boots logging 'poll interval registered (Nms)'.
- loop-guard now ALWAYS applies: the lastUpdatedSource==='git-sync' skip was
  nested inside the !spaceId/!workspaceId branch, so structural self-writes
  (CREATE/MOVE/RESTORE/SOFT_DELETE, which carry spaceId+workspaceId) bypassed it
  and re-triggered cycles. Fetch the page row once, guard unconditionally, then
  resolve space/workspace.
- remove the dead PAGE_CONTENT_UPDATED subscription (it's a BullMQ job, never an
  EventEmitter event; body edits arrive via PAGE_UPDATED).
- fix the stale datasource comment (PageService DOES stamp 'git-sync' now).
- env getters: parseInt radix 10 + NaN/<=0 fallback for poll/debounce (+ max
  deletes), with 6 new environment.service.spec tests.

tsc clean; jest 723 pass; live cycle re-verified post-refactor (ran, push
applied, unflagged 92-page space untouched).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-21 16:01:37 +03:00
parent 582e1976cc
commit 593f181bbc
7 changed files with 155 additions and 46 deletions

View File

@@ -35,4 +35,46 @@ describe('EnvironmentService', () => {
expect(withEnv('not-a-number').getGitSyncMaxDeletesPerCycle()).toBe(5);
});
});
describe('getGitSyncPollIntervalMs', () => {
const withEnv = (value?: string) =>
new EnvironmentService({
get: (_key: string, fallback?: string) => value ?? fallback,
} as any);
it('defaults to 15000 when unset', () => {
expect(withEnv().getGitSyncPollIntervalMs()).toBe(15000);
});
it('parses a valid positive int', () => {
expect(withEnv('30000').getGitSyncPollIntervalMs()).toBe(30000);
});
it('falls back to 15000 for non-positive or unparseable values', () => {
expect(withEnv('0').getGitSyncPollIntervalMs()).toBe(15000);
expect(withEnv('-100').getGitSyncPollIntervalMs()).toBe(15000);
expect(withEnv('not-a-number').getGitSyncPollIntervalMs()).toBe(15000);
});
});
describe('getGitSyncDebounceMs', () => {
const withEnv = (value?: string) =>
new EnvironmentService({
get: (_key: string, fallback?: string) => value ?? fallback,
} as any);
it('defaults to 2000 when unset', () => {
expect(withEnv().getGitSyncDebounceMs()).toBe(2000);
});
it('parses a valid positive int', () => {
expect(withEnv('500').getGitSyncDebounceMs()).toBe(500);
});
it('falls back to 2000 for non-positive or unparseable values', () => {
expect(withEnv('0').getGitSyncDebounceMs()).toBe(2000);
expect(withEnv('-5').getGitSyncDebounceMs()).toBe(2000);
expect(withEnv('not-a-number').getGitSyncDebounceMs()).toBe(2000);
});
});
});

View File

@@ -360,18 +360,28 @@ export class EnvironmentService {
return this.configService.get<string>('GIT_SYNC_REMOTE_TEMPLATE');
}
/** Poll-safety interval in ms (default 15000). */
/**
* Poll-safety interval in ms (default 15000). A NaN / non-positive value falls
* back to the default so a bad override can never disable or zero the poll loop.
*/
getGitSyncPollIntervalMs(): number {
return parseInt(
const parsed = parseInt(
this.configService.get<string>('GIT_SYNC_POLL_INTERVAL_MS', '15000'),
10,
);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 15000;
}
/** Event debounce window in ms (default 2000). */
/**
* Event debounce window in ms (default 2000). A NaN / non-positive value falls
* back to the default so a bad override can never disable the debounce.
*/
getGitSyncDebounceMs(): number {
return parseInt(
const parsed = parseInt(
this.configService.get<string>('GIT_SYNC_DEBOUNCE_MS', '2000'),
10,
);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 2000;
}
/**
@@ -384,6 +394,7 @@ export class EnvironmentService {
getGitSyncMaxDeletesPerCycle(): number {
const parsed = parseInt(
this.configService.get<string>('GIT_SYNC_MAX_DELETES_PER_CYCLE', '5'),
10,
);
return Number.isFinite(parsed) && parsed > 0 ? parsed : 5;
}

View File

@@ -13,14 +13,16 @@ import { EventName } from '../../common/events/event.contants';
* The page lifecycle events the git-sync listener reacts to (plan §10). A change
* to any of these in an enabled space schedules a debounced sync cycle.
* - PAGE_CREATED / PAGE_UPDATED / PAGE_MOVED — structural + content edits;
* - PAGE_CONTENT_UPDATED — the collab body-save job (real name in the enum);
* - PAGE_SOFT_DELETED / PAGE_RESTORED — Trash transitions (deletes are soft);
* - PAGE_MOVED_TO_SPACE — cross-space move (cross-repo, plan §5).
*
* NOTE: body edits arrive via PAGE_UPDATED (emitted from persistence.extension),
* NOT via EventName.PAGE_CONTENT_UPDATED — that name is a BullMQ queue-job name,
* not an EventEmitter2 event, so @OnEvent would never fire for it.
*/
export const GIT_SYNC_PAGE_EVENTS = [
EventName.PAGE_CREATED,
EventName.PAGE_UPDATED,
EventName.PAGE_CONTENT_UPDATED,
EventName.PAGE_MOVED,
EventName.PAGE_MOVED_TO_SPACE,
EventName.PAGE_SOFT_DELETED,

View File

@@ -21,9 +21,10 @@ import { GitSyncController } from './git-sync.controller';
* - EnvironmentModule (global) — EnvironmentService config;
* - CollaborationModule — exports CollaborationGateway for native body writes;
* - PageModule — exports PageService for structural mutations;
* - ScheduleModule (NOT forRoot) — so @Interval is discovered. forRoot() is
* already registered globally by TelemetryModule; importing the plain module
* here avoids a duplicate scheduler registration (plan §6 note).
* - ScheduleModule (NOT forRoot) — so SchedulerRegistry is injectable (the
* orchestrator registers a DYNAMIC poll interval in onModuleInit). forRoot()
* is already registered globally by TelemetryModule; importing the plain
* module here avoids a duplicate scheduler registration (plan §6 note).
*
* RedisService is provided by the global RedisModule (app.module) and CASL's
* WorkspaceAbilityFactory by the global CaslModule — both resolve without an

View File

@@ -35,7 +35,9 @@ interface DebounceEntry {
*
* Loop-guard (best-effort, plan §10/§8.2): an event whose page row already reads
* `lastUpdatedSource === 'git-sync'` is the orchestrator's OWN write, so we skip
* it to avoid a write -> event -> sync echo. This is the cheap first guard; the
* it to avoid a write -> event -> sync echo. The guard ALWAYS runs (the page row
* is fetched for every event, structural ones included). This is the cheap first
* guard; the
* full bodyHash + updatedAt loop-guard (consuming the push side's
* `PushedPageRecord`) is a later hardening step (plan §8.2) — noted, not built
* here. The poll-safety interval still converges anything this guard drops.
@@ -53,8 +55,8 @@ export class PageChangeListener {
/**
* One handler bound to ALL git-sync page events (the array form of `@OnEvent`).
* Resolves the affected page's space + workspace, applies the cheap loop-guard,
* and schedules the debounced cycle.
* Fetches the page row once to apply the loop-guard (unconditionally) and to
* resolve the page's space + workspace, then schedules the debounced cycle.
*/
@OnEvent(GIT_SYNC_PAGE_EVENTS as unknown as string[])
async handlePageEvent(event: PageEventLike): Promise<void> {
@@ -64,21 +66,24 @@ export class PageChangeListener {
const pageId = this.firstPageId(event);
if (!pageId) return;
// Prefer a spaceId carried on the event; otherwise read the page row (also
// gives us the loop-guard source). A missing page (hard-deleted) is ignored.
let spaceId = this.eventSpaceId(event, pageId);
let workspaceId = event.workspaceId;
// The loop-guard MUST always run — even structural events that already
// carry spaceId+workspaceId could be the orchestrator's OWN write (it stamps
// lastUpdatedSource='git-sync' on create/update/move/rename + body writes).
// So ALWAYS fetch the page row: it gives us the loop-guard source AND fills
// in any missing space/workspace in a single read. A missing page
// (hard-deleted) is ignored.
const page = await this.pageRepo.findById(pageId, {
includeContent: false,
});
if (!page) return;
if (!spaceId || !workspaceId) {
const page = await this.pageRepo.findById(pageId, {
includeContent: false,
});
if (!page) return;
spaceId = spaceId ?? page.spaceId;
workspaceId = workspaceId ?? page.workspaceId;
// Loop-guard: skip our own writes (best-effort, plan §8.2).
if (page.lastUpdatedSource === 'git-sync') return;
}
// Loop-guard: skip our own writes to avoid a write -> event -> sync echo
// (best-effort, plan §8.2). Applies unconditionally now.
if (page.lastUpdatedSource === 'git-sync') return;
// Prefer ids carried on the event; fall back to the row we already fetched.
const spaceId = this.eventSpaceId(event, pageId) ?? page.spaceId;
const workspaceId = event.workspaceId ?? page.workspaceId;
if (!spaceId || !workspaceId) return;
this.schedule(spaceId, workspaceId);

View File

@@ -1,5 +1,10 @@
import { Injectable, Logger } from '@nestjs/common';
import { Interval } from '@nestjs/schedule';
import {
Injectable,
Logger,
OnModuleDestroy,
OnModuleInit,
} from '@nestjs/common';
import { SchedulerRegistry } from '@nestjs/schedule';
import { RedisService } from '@nestjs-labs/nestjs-ioredis';
import type { Redis } from 'ioredis';
import { randomUUID } from 'node:crypto';
@@ -60,18 +65,21 @@ export interface GitSyncRunStatus {
* first; per-space opt-in is now REQUIRED on top of it.
*/
@Injectable()
export class GitSyncOrchestrator {
export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
private readonly logger = new Logger(GitSyncOrchestrator.name);
private readonly redis: Redis;
/** Unique per process instance — the leader-lock value (CAS on release). */
private readonly instanceId = randomUUID();
/** In-process per-space mutex: spaceIds with a cycle currently running. */
private readonly running = new Set<string>();
/** The registered poll-interval name, or null when none is registered. */
private pollIntervalName: string | null = null;
constructor(
private readonly environmentService: EnvironmentService,
private readonly dataSource: GitmostDataSourceService,
private readonly vaultRegistry: VaultRegistryService,
private readonly schedulerRegistry: SchedulerRegistry,
redisService: RedisService,
@InjectKysely() private readonly db: KyselyDB,
) {
@@ -346,18 +354,60 @@ export class GitSyncOrchestrator {
// --- poll-safety interval (plan §10) -------------------------------------
/** Registered interval name (shared by registration + teardown). */
private static readonly POLL_INTERVAL_NAME = 'git-sync-poll';
/**
* Poll-safety loop: catches events missed by the listener and reconciles after
* downtime. Gated on GIT_SYNC_ENABLED. The interval is a fixed value because
* `@Interval` cannot read config at class-eval time — the body short-circuits
* when disabled. Each enabled space runs under its own lock (overlaps skipped).
* Register the poll-safety interval DYNAMICALLY so it honors the configured
* GIT_SYNC_POLL_INTERVAL_MS (a static `@Interval` decorator could only hardcode
* a value at class-eval time, before config is readable — diverging from what
* `/status` reports). When git-sync is disabled we register nothing.
*
* ScheduleModule: registered ONCE globally by TelemetryModule
* (ScheduleModule.forRoot()); GitSyncModule imports the plain ScheduleModule so
* @Interval is discovered without a duplicate forRoot (plan §6 note).
* ScheduleModule: forRoot() is registered ONCE globally by TelemetryModule;
* GitSyncModule imports the plain ScheduleModule so SchedulerRegistry is
* injectable without a duplicate forRoot (plan §6 note).
*/
@Interval('git-sync-poll', 15000)
async poll(): Promise<void> {
onModuleInit(): void {
if (!this.environmentService.isGitSyncEnabled()) return;
const ms = this.environmentService.getGitSyncPollIntervalMs();
const handle = setInterval(() => {
void this.pollTick();
}, ms);
// Do not keep the event loop alive solely for the poll timer.
handle.unref?.();
this.schedulerRegistry.addInterval(
GitSyncOrchestrator.POLL_INTERVAL_NAME,
handle,
);
this.pollIntervalName = GitSyncOrchestrator.POLL_INTERVAL_NAME;
this.logger.log(`git-sync: poll interval registered (${ms}ms).`);
}
/** Tear down the dynamic interval on shutdown (guard against double-delete). */
onModuleDestroy(): void {
if (!this.pollIntervalName) return;
try {
// deleteInterval clears the timer and removes it from the registry.
this.schedulerRegistry.deleteInterval(this.pollIntervalName);
} catch (err) {
this.logger.warn(
`git-sync: failed to delete poll interval: ${
err instanceof Error ? err.message : String(err)
}`,
);
} finally {
this.pollIntervalName = null;
}
}
/**
* One poll tick: catches events missed by the listener and reconciles after
* downtime. Gated on GIT_SYNC_ENABLED (defensive — the interval is only
* registered when enabled). Each enabled space runs under its own lock
* (overlaps skipped). Never throws (runOnce swallows per-space errors).
*/
private async pollTick(): Promise<void> {
if (!this.environmentService.isGitSyncEnabled()) return;
let spaces: EnabledSpace[];
try {

View File

@@ -30,14 +30,12 @@ export interface GitSyncBindContext {
}
/**
* The git-sync provenance carried into PageService writes. PageService stamps
* `lastUpdatedSource = 'agent'` only when `provenance.actor === 'agent'`; for any
* other actor it leaves the column at its default ('user'). So create/move/rename
* through PageService DO NOT yet stamp 'git-sync' on the page row — see the note
* in the report. Body writes (writeBody, §3.3) DO stamp 'git-sync' because the
* collab context's `actor: 'git-sync'` flows into PersistenceExtension. We pass a
* 'git-sync' provenance anyway so that when PageService is extended to honor it,
* the marker propagates without touching the datasource.
* The git-sync provenance carried into PageService writes. PageService.create/
* update/movePage honor this provenance and stamp `lastUpdatedSource = 'git-sync'`
* on the page row when `provenance.actor === 'git-sync'`. Body writes (writeBody,
* §3.3) likewise stamp 'git-sync' because the collab context's `actor: 'git-sync'`
* flows into PersistenceExtension. So ALL git-sync structural + body writes mark
* the row's source, which the listener's loop-guard reads to skip our own writes.
*/
const GIT_SYNC_PROVENANCE: AuthProvenanceData = {
actor: 'git-sync',