d5079aa1d8
A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML <img> (like video/diagrams) when it carries layout attrs; bare images stay . Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
102 lines
3.8 KiB
TypeScript
102 lines
3.8 KiB
TypeScript
// Red-team finding #10: single-writer guarantee across replicas must survive a
|
|
// TTL lapse with a swallowed heartbeat refresh. Two SpaceLockService instances
|
|
// (A, B) share ONE redis store. A holds 'X' and stays in-flight; the lock key
|
|
// then disappears (TTL expiry while refreshLock silently failed). B must NOT be
|
|
// able to acquire 'X' and run its fn concurrently with A — that would be two
|
|
// writers racing the same working tree. This test asserts the DESIRED
|
|
// single-writer behavior, so it FAILS today if the lapse lets B in.
|
|
import { Logger } from '@nestjs/common';
|
|
import { SpaceLockService } from './space-lock.service';
|
|
import { GIT_SYNC_LOCK_PREFIX } from '../git-sync.constants';
|
|
|
|
/**
|
|
* Minimal shared fake redis honoring exactly the two primitives the lock uses:
|
|
* - `SET key val PX ttl NX` → 'OK' only when the key is absent (NX semantics).
|
|
* - `eval(<get/del CAS>|<get/pexpire CAS>, 1, key, instanceId[, ttl])` →
|
|
* compares the stored value to ARGV[1] before del/pexpire (CAS).
|
|
* TTL expiry is not time-driven here; tests simulate it by mutating `store`.
|
|
*/
|
|
function makeSharedRedis() {
|
|
const store = new Map<string, string>();
|
|
return {
|
|
store,
|
|
async set(key: string, val: string, _px: 'PX', _ttl: number, nx: 'NX') {
|
|
if (nx === 'NX' && store.has(key)) return null;
|
|
store.set(key, val);
|
|
return 'OK';
|
|
},
|
|
async eval(lua: string, _numKeys: number, key: string, argInstanceId: string) {
|
|
// Only act when WE still own the key (CAS), mirroring the Lua scripts.
|
|
if (store.get(key) !== argInstanceId) return 0;
|
|
if (lua.includes('del')) {
|
|
store.delete(key);
|
|
return 1;
|
|
}
|
|
// pexpire CAS refresh: value matches, "extend" is a no-op in the fake.
|
|
return 1;
|
|
},
|
|
};
|
|
}
|
|
|
|
function buildInstance(redis: ReturnType<typeof makeSharedRedis>) {
|
|
const redisService = { getOrThrow: jest.fn(() => redis) };
|
|
return new SpaceLockService(redisService as any);
|
|
}
|
|
|
|
async function flushMicrotasks(): Promise<void> {
|
|
await Promise.resolve();
|
|
await Promise.resolve();
|
|
await Promise.resolve();
|
|
}
|
|
|
|
beforeAll(() => {
|
|
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
|
|
});
|
|
|
|
describe('SpaceLockService — finding #10 single-writer across TTL lapse', () => {
|
|
it('B must not run its fn concurrently with an in-flight A after the lock key vanishes', async () => {
|
|
const redis = makeSharedRedis();
|
|
const A = buildInstance(redis);
|
|
const B = buildInstance(redis);
|
|
|
|
let aRunning = false;
|
|
let releaseA!: () => void;
|
|
const gateA = new Promise<void>((resolve) => {
|
|
releaseA = resolve;
|
|
});
|
|
|
|
// A acquires 'X' and stays in-flight awaiting the gate.
|
|
const aResult = A.withSpaceLock('X', async () => {
|
|
aRunning = true;
|
|
await gateA;
|
|
aRunning = false;
|
|
return 'A-done';
|
|
});
|
|
await flushMicrotasks();
|
|
|
|
// Sanity: A is in-flight and owns the redis key.
|
|
expect(aRunning).toBe(true);
|
|
expect(redis.store.has(GIT_SYNC_LOCK_PREFIX + 'X')).toBe(true);
|
|
|
|
// Simulate TTL lapse with a swallowed heartbeat refresh: the lock key
|
|
// disappears from the shared store while A is still running.
|
|
redis.store.delete(GIT_SYNC_LOCK_PREFIX + 'X');
|
|
|
|
// Now B tries to take 'X'. Desired: rejected as 'lock-held' (single writer);
|
|
// and under no circumstance may fn2 run while A is still in flight.
|
|
let bRanWhileARunning = false;
|
|
const bResult = await B.withSpaceLock('X', async () => {
|
|
bRanWhileARunning = aRunning; // captures whether A was still in-flight
|
|
return 'B-done';
|
|
});
|
|
|
|
// Single-writer assertions: B did NOT execute concurrently with A.
|
|
expect(bRanWhileARunning).toBe(false);
|
|
expect(bResult).toEqual({ skipped: 'lock-held' });
|
|
|
|
// Cleanup: let A finish.
|
|
releaseA();
|
|
await expect(aResult).resolves.toBe('A-done');
|
|
});
|
|
});
|