Files
gitmost/apps/server/src/integrations/git-sync/services/space-lock.service.spec.ts
T
claude code agent 227 b7e5cb6970 fix(git-sync): push 503 starvation + concurrent-edit marker leak/silent loss
Bug #1 (push 503 starvation): an external receive-pack that briefly overlapped
a poll cycle immediately 503'd because the per-space single-writer lock was
held. Add a BOUNDED retry-acquire on the PUSH path only (SpaceLockService
.withSpaceLock acquireRetry: capped exponential backoff up to ~5s); a transient
overlap now waits and succeeds, a genuinely stuck cycle still 503s after the
bound. The poll cycle passes no retry (immediate skip). Push result stays
deterministic: the receive-pack only runs once the lock is held, so a 503 never
leaves a half-applied ref.

Bug #2 (concurrent-edit marker leak + silent same-block loss):
- Marker leak (a): the push UPDATE path stripped markers for the body sent to
  Docmost but left raw <<<<<<</>>>>>>> committed on the published `main` vault
  forever (autoMergeConflicts ON). Now the cleaned body is written back to the
  vault file + recorded in writtenBack so runPush commits it on `main` and the
  vault converges to clean bytes.
- Marker leak (b): pin merge.conflictStyle=merge in ensureRepo and teach
  stripConflictMarkers/hasConflictMarkers about the diff3 `|||||||` base section
  (drop the marker AND the stale base region) so diff3/zdiff3 conflicts can
  never leak `|||||||` + base content into a page. Also scrub the 3-way merge
  BASE markdown.
- Silent same-block loss: the block 3-way merge still resolves same-block
  conflicts deterministically to git, but it is no longer silent: diff3Plan now
  reports a conflict count (mergeXmlFragments3WayWithStats), gitSyncWriteBody
  logs it, and the persistence boundary-snapshot now fires for git-sync writes
  over a non-git-sync baseline so the human's pre-merge content is preserved in
  page history (recoverable). Full both-preserved persisted-conflict UI remains
  the deferred redesign.

Tests: space-lock bounded-retry (success/stuck/poll-immediate); push vault-clean
+ diff3 |||||||  strip; ensureRepo conflictStyle pin; diff3Plan/3-way conflict
counts; persistence git-sync boundary snapshot. Server tsc clean; git-sync
vitest + server collaboration/git-sync jest all green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 20:03:21 +03:00

331 lines
12 KiB
TypeScript

// Unit tests for SpaceLockService in ISOLATION. The lock is exercised against a
// fake redis (mock `set`/`eval`) and we assert the exact ARGUMENTS passed to
// redis — the test-coverage gap this refactor (PR #119 #2) closes: acquire uses
// `SET ... PX <ttl> NX`, release uses a DEL-CAS Lua, and the heartbeat refresh
// uses a PEXPIRE-CAS Lua, all keyed by the same private instanceId.
import { Logger } from '@nestjs/common';
import { SpaceLockService } from './space-lock.service';
import {
GIT_SYNC_LOCK_PREFIX,
GIT_SYNC_LOCK_TTL_MS,
} from '../git-sync.constants';
type AnyMock = jest.Mock;
interface Built {
service: SpaceLockService;
redis: { set: AnyMock; eval: AnyMock };
}
function build(): Built {
const redis = {
// Default: lock acquired. Tests override per-case.
set: jest.fn(async () => 'OK'),
eval: jest.fn(async () => 1),
};
const redisService = { getOrThrow: jest.fn(() => redis) };
const service = new SpaceLockService(redisService as any);
return { service, redis };
}
/** Drain queued microtasks so awaited continuations inside the lock run. */
async function flushMicrotasks(): Promise<void> {
await Promise.resolve();
await Promise.resolve();
await Promise.resolve();
}
beforeEach(() => {
jest.clearAllMocks();
});
describe('SpaceLockService', () => {
describe('acquire (SET NX/PX)', () => {
it('calls redis.set with (prefix+spaceId, <instanceId>, PX, ttl, NX) and reuses the instanceId on release', async () => {
const { service, redis } = build();
const result = await service.withSpaceLock('space-1', async () => 'ok');
expect(result).toBe('ok');
// acquire arguments
expect(redis.set).toHaveBeenCalledTimes(1);
const [key, instanceId, px, ttl, nx] = redis.set.mock.calls[0];
expect(key).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
expect(typeof instanceId).toBe('string');
expect(instanceId.length).toBeGreaterThan(0);
expect(px).toBe('PX');
expect(ttl).toBe(GIT_SYNC_LOCK_TTL_MS);
expect(nx).toBe('NX');
// release (eval) reuses the SAME instanceId as ARGV[1]
expect(redis.eval).toHaveBeenCalledTimes(1);
const [, , relKey, relInstanceId] = redis.eval.mock.calls[0];
expect(relKey).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
expect(relInstanceId).toBe(instanceId);
});
});
describe('release (DEL-CAS Lua)', () => {
it('returns the fn result and runs a get/del CAS-compared release in finally', async () => {
const { service, redis } = build();
const result = await service.withSpaceLock('space-1', async () => 42);
expect(result).toBe(42);
expect(redis.eval).toHaveBeenCalledTimes(1);
const [lua, numKeys, key, instanceId] = redis.eval.mock.calls[0];
expect(lua).toContain('get');
expect(lua).toContain('del');
expect(lua).toContain('== ARGV[1]');
expect(numKeys).toBe(1);
expect(key).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
expect(typeof instanceId).toBe('string');
});
});
describe('lock held by another replica', () => {
it("returns { skipped: 'lock-held' } without running fn or releasing when set != 'OK'", async () => {
const { service, redis } = build();
redis.set.mockResolvedValueOnce(null);
const fn = jest.fn(async () => 'ran');
const result = await service.withSpaceLock('space-1', fn);
expect(result).toEqual({ skipped: 'lock-held' });
expect(fn).not.toHaveBeenCalled();
// No release: we never acquired it.
expect(redis.eval).not.toHaveBeenCalled();
});
});
describe('in-process mutex', () => {
it("a second withSpaceLock on the same space mid-flight returns { skipped: 'in-progress' } without a second set", async () => {
const { service, redis } = build();
let release!: () => void;
const gate = new Promise<void>((resolve) => {
release = resolve;
});
const first = service.withSpaceLock('space-1', async () => {
await gate;
return 'first';
});
// Let the first call acquire + enter the running set.
await flushMicrotasks();
const second = await service.withSpaceLock('space-1', async () => 'second');
expect(second).toEqual({ skipped: 'in-progress' });
// Only the first call hit redis.set — the mutex short-circuits the second.
expect(redis.set).toHaveBeenCalledTimes(1);
release();
await expect(first).resolves.toBe('first');
});
});
// Bug #1 (push 503 starvation): the PUSH path passes a bounded acquireRetry so a
// transient overlap with a poll cycle is retried (and succeeds) instead of an
// immediate 503. A genuinely stuck lock still skips after the bound. The poll
// cycle passes NO retry (immediate skip), so only the push path waits.
describe('bounded acquire-retry (push path)', () => {
const retry = { timeoutMs: 5_000, baseMs: 100, maxMs: 500 };
it('retries the acquire and SUCCEEDS when the lock is briefly held then released', async () => {
const { service, redis } = build();
// First acquire attempt fails (lock briefly held by a cycle), the next
// succeeds — the bounded retry must turn this into a SUCCESS, not a skip.
redis.set
.mockResolvedValueOnce(null) // attempt 1: held
.mockResolvedValueOnce(null) // attempt 2: still held
.mockResolvedValue('OK'); // attempt 3+: released -> acquired
const fn = jest.fn(async () => 'pushed');
const result = await service.withSpaceLock('space-1', fn, {
acquireRetry: retry,
});
expect(result).toBe('pushed');
expect(fn).toHaveBeenCalledTimes(1);
expect(redis.set.mock.calls.length).toBeGreaterThanOrEqual(3);
// The acquired lock is released in finally (DEL-CAS eval).
expect(redis.eval).toHaveBeenCalledTimes(1);
expect(redis.eval.mock.calls[0][0]).toContain('del');
});
it('still skips (lock-held) after the bound when the lock stays stuck — and never runs fn', async () => {
const { service, redis } = build();
redis.set.mockResolvedValue(null); // permanently held
const fn = jest.fn(async () => 'pushed');
const result = await service.withSpaceLock('space-1', fn, {
acquireRetry: { timeoutMs: 300, baseMs: 50, maxMs: 100 },
});
expect(result).toEqual({ skipped: 'lock-held' });
expect(fn).not.toHaveBeenCalled();
// It retried more than once before giving up (bound > one interval).
expect(redis.set.mock.calls.length).toBeGreaterThan(1);
// Never acquired -> never released.
expect(redis.eval).not.toHaveBeenCalled();
});
it('without acquireRetry (poll path) a held lock skips IMMEDIATELY (single attempt)', async () => {
const { service, redis } = build();
redis.set.mockResolvedValue(null);
const fn = jest.fn(async () => 'cycle');
const result = await service.withSpaceLock('space-1', fn);
expect(result).toEqual({ skipped: 'lock-held' });
expect(redis.set).toHaveBeenCalledTimes(1); // no retry
expect(fn).not.toHaveBeenCalled();
});
});
describe('fn throwing', () => {
it('propagates the throw AND still releases (eval) in finally', async () => {
const { service, redis } = build();
const boom = new Error('boom');
await expect(
service.withSpaceLock('space-1', async () => {
throw boom;
}),
).rejects.toBe(boom);
// Release still ran despite the throw.
expect(redis.eval).toHaveBeenCalledTimes(1);
const [lua] = redis.eval.mock.calls[0];
expect(lua).toContain('del');
});
});
describe('heartbeat refresh (PEXPIRE-CAS Lua)', () => {
it('extends the lock via a pexpire CAS-Lua with the same instanceId while fn is in flight', async () => {
jest.useFakeTimers();
try {
const { service, redis } = build();
let release!: () => void;
const gate = new Promise<void>((resolve) => {
release = resolve;
});
const run = service.withSpaceLock('space-1', async () => {
await gate;
return 'done';
});
// Let acquire resolve and the running.add + setInterval registration run.
await flushMicrotasks();
// Capture the instanceId used on acquire so we can assert it is reused.
const instanceId = redis.set.mock.calls[0][1];
// Advance past one heartbeat interval (≈ TTL/3) to fire refreshLock.
jest.advanceTimersByTime(Math.floor(GIT_SYNC_LOCK_TTL_MS / 3));
await flushMicrotasks();
// The refresh eval ran (release has not, fn still awaiting the gate).
expect(redis.eval).toHaveBeenCalledTimes(1);
const [lua, numKeys, key, argInstanceId, ttlArg] =
redis.eval.mock.calls[0];
expect(lua).toContain('pexpire');
expect(lua).toContain('== ARGV[1]');
expect(numKeys).toBe(1);
expect(key).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
expect(argInstanceId).toBe(instanceId);
expect(ttlArg).toBe(String(GIT_SYNC_LOCK_TTL_MS));
// Let fn finish; release runs in finally (second eval, the DEL-CAS).
release();
await flushMicrotasks();
await expect(run).resolves.toBe('done');
expect(redis.eval).toHaveBeenCalledTimes(2);
expect(redis.eval.mock.calls[1][0]).toContain('del');
} finally {
jest.useRealTimers();
}
});
});
// The lost-lock guard: a heartbeat refresh that cannot CONFIRM we still own the
// lock (CAS miss, res !== 1) OR that throws (Redis error) aborts the supplied
// controller so the in-flight protected fn stops instead of writing blind after
// a possible lock takeover. `withSpaceLock` threads that signal into `fn`.
describe('abort-on-lost-lock', () => {
it('aborts the in-flight fn when the heartbeat refresh CAS-MISSES (eval -> 0)', async () => {
jest.useFakeTimers();
try {
const { service, redis } = build();
let release!: () => void;
const gate = new Promise<void>((resolve) => {
release = resolve;
});
let captured: AbortSignal | undefined;
const run = service.withSpaceLock('space-1', async (signal) => {
captured = signal;
await gate;
return 'done';
});
// Let acquire resolve and the setInterval register.
await flushMicrotasks();
expect(captured).toBeDefined();
expect(captured!.aborted).toBe(false);
// The refresh CAS-misses: the key no longer holds our instanceId.
redis.eval.mockResolvedValue(0);
jest.advanceTimersByTime(Math.floor(GIT_SYNC_LOCK_TTL_MS / 3));
await flushMicrotasks();
// The lost lock aborted the protected fn's signal.
expect(captured!.aborted).toBe(true);
release();
await flushMicrotasks();
await expect(run).resolves.toBe('done');
} finally {
jest.useRealTimers();
}
});
it('aborts the in-flight fn when the heartbeat refresh THROWS (Redis error)', async () => {
jest.useFakeTimers();
try {
const { service, redis } = build();
let release!: () => void;
const gate = new Promise<void>((resolve) => {
release = resolve;
});
let captured: AbortSignal | undefined;
const run = service.withSpaceLock('space-1', async (signal) => {
captured = signal;
await gate;
return 'done';
});
await flushMicrotasks();
expect(captured!.aborted).toBe(false);
// The refresh eval rejects (Redis down). release() in finally must still
// resolve, so only reject the NEXT (heartbeat) call, then go back to OK.
redis.eval.mockRejectedValueOnce(new Error('redis down'));
jest.advanceTimersByTime(Math.floor(GIT_SYNC_LOCK_TTL_MS / 3));
await flushMicrotasks();
expect(captured!.aborted).toBe(true);
release();
await flushMicrotasks();
await expect(run).resolves.toBe('done');
} finally {
jest.useRealTimers();
}
});
});
});
// Silence the warn logger if a refresh/release path ever logs (defensive).
beforeAll(() => {
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
});