From 3d7f434b0cfc483640663072123ff8e34be0e132 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Fri, 26 Jun 2026 01:29:02 +0300 Subject: [PATCH] =?UTF-8?q?fix(git-sync):=20red-team=20hardening=20?= =?UTF-8?q?=E2=80=94=2012=20confirmed=20sync-breaking=20bugs=20+=20regress?= =?UTF-8?q?ion=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML (like video/diagrams) when it carries layout attrs; bare images stay ![](src). Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../extensions/persistence.extension.spec.ts | 11 +- .../extensions/persistence.extension.ts | 13 +- .../extensions/redteam-provenance.spec.ts | 10 + .../git-sync-converter-gate.spec.ts | 39 +- .../services/git-sync.orchestrator.ts | 13 +- .../services/redteam-space-lock.spec.ts | 101 +++++ .../services/redteam-three-way.spec.ts | 20 + .../git-sync/services/space-lock.service.ts | 52 ++- .../git-sync/services/three-way-merge.ts | 146 ++++++- packages/git-sync/src/engine/cycle.ts | 28 +- packages/git-sync/src/engine/layout.ts | 92 ++--- packages/git-sync/src/engine/push.ts | 160 +++++++- packages/git-sync/src/lib/docmost-schema.ts | 359 +++++++++++++++++- .../git-sync/src/lib/markdown-converter.ts | 161 +++++++- .../test/markdown-converter-gaps.test.ts | 28 +- .../git-sync/test/redteam-apply-push.test.ts | 159 ++++++++ .../git-sync/test/redteam-converter.test.ts | 89 +++++ .../test/redteam-layout-title.test.ts | 71 ++++ .../git-sync/test/redteam-push-cycle.test.ts | 196 ++++++++++ .../test/schema-surface-snapshot.test.ts | 8 + 20 files changed, 1621 insertions(+), 135 deletions(-) create mode 100644 apps/server/src/collaboration/extensions/redteam-provenance.spec.ts create mode 100644 apps/server/src/integrations/git-sync/services/redteam-space-lock.spec.ts create mode 100644 apps/server/src/integrations/git-sync/services/redteam-three-way.spec.ts create mode 100644 packages/git-sync/test/redteam-apply-push.test.ts create mode 100644 packages/git-sync/test/redteam-converter.test.ts create mode 100644 packages/git-sync/test/redteam-layout-title.test.ts create mode 100644 packages/git-sync/test/redteam-push-cycle.test.ts diff --git a/apps/server/src/collaboration/extensions/persistence.extension.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.spec.ts index 40145b93..b74ae7ca 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.spec.ts @@ -142,17 +142,22 @@ describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)', expect(sourceOf(pageRepo)).toBe('git-sync'); }); - it("keeps 'agent' even when the storing writer is 'git-sync' (agent > git-sync)", async () => { + it("keeps 'git-sync' for an explicit git-sync store even with a sticky agent marker (#14 loop-guard)", async () => { const { ext, pageRepo } = build(); // An agent edit landed earlier in the coalescing window (sticky marker), - // then a git-sync writer performs the store. Agent precedence must win. + // then a git-sync writer performs the store. Red-team finding #14: an + // EXPLICIT current-write actor is authoritative for THIS write, so the + // store must stay 'git-sync' — otherwise the PageChangeListener loop-guard + // (keyed on lastUpdatedSource === 'git-sync') fails to recognize git-sync's + // own write and re-exports it. Explicit 'agent' still wins (see below); the + // sticky marker only promotes a plain human writer to 'agent'. await ext.onChange(makeChangePayload('agent')); await ext.onStoreDocument( makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }), ); - expect(sourceOf(pageRepo)).toBe('agent'); + expect(sourceOf(pageRepo)).toBe('git-sync'); }); it("tags 'agent' when the storing writer itself is the agent (no prior onChange)", async () => { diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 13b090cc..f83ecf49 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -52,11 +52,16 @@ export function resolveSource( stickyTouched: boolean, contextActor?: string, ): ProvenanceSource { - // Precedence: agent > git-sync > user. The sticky agent marker wins so a - // window that mixed an agent edit stays tagged 'agent'; otherwise a native - // git-sync write (plan §8.1) tags 'git-sync'; a plain human edit stays 'user'. - if (stickyTouched || contextActor === 'agent') return 'agent'; + // An EXPLICIT current-write actor is authoritative for THIS write and wins + // over the sticky-agent fallback. Order: explicit 'agent' > explicit + // 'git-sync' > sticky agent marker > plain human 'user'. The git-sync case + // must NOT be masked by the sticky marker, or the PageChangeListener + // loop-guard (which keys on lastUpdatedSource === 'git-sync') would re-export + // git-sync's own writes (#14). Explicit agent still wins so a window that + // mixed an agent edit stays tagged 'agent'. + if (contextActor === 'agent') return 'agent'; if (contextActor === 'git-sync') return 'git-sync'; + if (stickyTouched) return 'agent'; return 'user'; } diff --git a/apps/server/src/collaboration/extensions/redteam-provenance.spec.ts b/apps/server/src/collaboration/extensions/redteam-provenance.spec.ts new file mode 100644 index 00000000..f63a7ea5 --- /dev/null +++ b/apps/server/src/collaboration/extensions/redteam-provenance.spec.ts @@ -0,0 +1,10 @@ +import { resolveSource } from './persistence.extension'; + +// Red-team finding #14: an explicit git-sync write (no agent edit in the +// coalescing window) must keep the 'git-sync' source so the git-sync +// listener's loop-guard can recognize its own writes and not re-export them. +describe('resolveSource — #14 git-sync provenance loop-guard', () => { + it('keeps git-sync source for an explicit git-sync write (stickyTouched=true, actor=git-sync)', () => { + expect(resolveSource(true, 'git-sync')).toBe('git-sync'); + }); +}); diff --git a/apps/server/src/collaboration/git-sync-converter-gate.spec.ts b/apps/server/src/collaboration/git-sync-converter-gate.spec.ts index d911d4e1..c15c70ca 100644 --- a/apps/server/src/collaboration/git-sync-converter-gate.spec.ts +++ b/apps/server/src/collaboration/git-sync-converter-gate.spec.ts @@ -402,7 +402,7 @@ describe('git-sync converter §13.1 idempotency gate (editor-ext schema)', () => // data-* attrs, as it already does for video/diagrams), these assertions flip // and the image fixture should be promoted into the green CORPUS above. // --------------------------------------------------------------------------- -describe('git-sync converter §13.1 KNOWN DIVERGENCE (markdown image lossiness)', () => { +describe('git-sync converter §13.1 image dimensions preserved (was KNOWN DIVERGENCE)', () => { const imageDoc = doc({ type: 'image', attrs: { @@ -413,29 +413,26 @@ describe('git-sync converter §13.1 KNOWN DIVERGENCE (markdown image lossiness)' }, }); - it('drops width/height/align (markdown ![](src) cannot carry them); the block-image hoist no longer leaves an empty paragraph', async () => { + it('preserves width/height/align by exporting an HTML (PR #119 round-trip fix)', async () => { const { md, canonNormalized } = await runGate(imageDoc); - // Export is plain markdown image syntax — no dimensions/align survive. - expect(md.trim()).toBe('![](https://example.com/pic.png)'); + // A top-level image carrying layout attrs is now exported as a schema- + // matching HTML (the same path video/diagrams already use), so the + // dimensions and alignment survive the round trip instead of collapsing to + // bare `![](src)`. + expect(md.trim()).toBe( + '', + ); - // The round-tripped doc carries ONLY src (+ alt=""). The leading empty - // paragraph that the block-image hoist used to leave behind (a phantom - // blank-gap on every sync) is now stripped on import (git-sync fix), so the - // doc is just the image — no empty-paragraph artifact. - expect(canonNormalized).toEqual({ - type: 'doc', - content: [ - { - type: 'image', - attrs: { alt: '', src: 'https://example.com/pic.png' }, - }, - ], - }); - - // Still NOT canonically equal to the original: width/height/align are an - // intrinsic markdown-transport loss (unrelated to the empty-paragraph fix). - expect(docsCanonicallyEqual(imageDoc, canonNormalized)).toBe(false); + // The round-tripped image keeps src + the layout attrs. width/height are + // re-imported as strings (matching the video/audio/pdf string convention), + // so assert the values rather than the JS type. + const imgAttrs = (canonNormalized as any).content[0].attrs; + expect((canonNormalized as any).content[0].type).toBe('image'); + expect(imgAttrs.src).toBe('https://example.com/pic.png'); + expect(imgAttrs.align).toBe('center'); + expect(String(imgAttrs.width)).toBe('640'); + expect(String(imgAttrs.height)).toBe('480'); }); }); diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts index 96d73ee3..ebb15b91 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts @@ -151,8 +151,8 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { // when it could not enter — surfaced here as the existing skipped:'in-progress' // / 'lock-held' status so runOnce's observable behavior is unchanged. try { - const result = await this.spaceLock.withSpaceLock(spaceId, () => - this.driveCycle(spaceId, workspaceId, serviceUserId), + const result = await this.spaceLock.withSpaceLock(spaceId, (signal) => + this.driveCycle(spaceId, workspaceId, serviceUserId, signal), ); if ('skipped' in result && !('spaceId' in result)) { return { spaceId, ran: false, skipped: result.skipped }; @@ -199,7 +199,7 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { } const serviceUserId = this.environmentService.getGitSyncServiceUserId(); - const result = await this.spaceLock.withSpaceLock(spaceId, async () => { + const result = await this.spaceLock.withSpaceLock(spaceId, async (signal) => { // 1) Stream the receive-pack to the client (durable commits land on main). await runReceivePack(); @@ -214,7 +214,7 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { return; } try { - await this.driveCycle(spaceId, workspaceId, serviceUserId); + await this.driveCycle(spaceId, workspaceId, serviceUserId, signal); } catch (err) { // Do NOT rethrow: the push succeeded and the commits are durable on main; // the poll-interval backstop retries the cycle. Log for visibility. @@ -246,6 +246,7 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { spaceId: string, workspaceId: string, serviceUserId: string, + signal?: AbortSignal, ): Promise { const { runCycle } = await loadGitSync(); const settings = this.buildSettings(spaceId); @@ -254,6 +255,10 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { const maxDeletes = this.environmentService.getGitSyncMaxDeletesPerCycle(); const result = await runCycle({ + // Cooperative-abort signal from the per-space lock: if a heartbeat refresh + // cannot confirm the lock, the cycle bails before its next destructive + // write phase instead of writing blind after a possible lock loss. + signal, spaceId, client, vault, diff --git a/apps/server/src/integrations/git-sync/services/redteam-space-lock.spec.ts b/apps/server/src/integrations/git-sync/services/redteam-space-lock.spec.ts new file mode 100644 index 00000000..20f1f1a1 --- /dev/null +++ b/apps/server/src/integrations/git-sync/services/redteam-space-lock.spec.ts @@ -0,0 +1,101 @@ +// Red-team finding #10: single-writer guarantee across replicas must survive a +// TTL lapse with a swallowed heartbeat refresh. Two SpaceLockService instances +// (A, B) share ONE redis store. A holds 'X' and stays in-flight; the lock key +// then disappears (TTL expiry while refreshLock silently failed). B must NOT be +// able to acquire 'X' and run its fn concurrently with A — that would be two +// writers racing the same working tree. This test asserts the DESIRED +// single-writer behavior, so it FAILS today if the lapse lets B in. +import { Logger } from '@nestjs/common'; +import { SpaceLockService } from './space-lock.service'; +import { GIT_SYNC_LOCK_PREFIX } from '../git-sync.constants'; + +/** + * Minimal shared fake redis honoring exactly the two primitives the lock uses: + * - `SET key val PX ttl NX` → 'OK' only when the key is absent (NX semantics). + * - `eval(|, 1, key, instanceId[, ttl])` → + * compares the stored value to ARGV[1] before del/pexpire (CAS). + * TTL expiry is not time-driven here; tests simulate it by mutating `store`. + */ +function makeSharedRedis() { + const store = new Map(); + return { + store, + async set(key: string, val: string, _px: 'PX', _ttl: number, nx: 'NX') { + if (nx === 'NX' && store.has(key)) return null; + store.set(key, val); + return 'OK'; + }, + async eval(lua: string, _numKeys: number, key: string, argInstanceId: string) { + // Only act when WE still own the key (CAS), mirroring the Lua scripts. + if (store.get(key) !== argInstanceId) return 0; + if (lua.includes('del')) { + store.delete(key); + return 1; + } + // pexpire CAS refresh: value matches, "extend" is a no-op in the fake. + return 1; + }, + }; +} + +function buildInstance(redis: ReturnType) { + const redisService = { getOrThrow: jest.fn(() => redis) }; + return new SpaceLockService(redisService as any); +} + +async function flushMicrotasks(): Promise { + await Promise.resolve(); + await Promise.resolve(); + await Promise.resolve(); +} + +beforeAll(() => { + jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined); +}); + +describe('SpaceLockService — finding #10 single-writer across TTL lapse', () => { + it('B must not run its fn concurrently with an in-flight A after the lock key vanishes', async () => { + const redis = makeSharedRedis(); + const A = buildInstance(redis); + const B = buildInstance(redis); + + let aRunning = false; + let releaseA!: () => void; + const gateA = new Promise((resolve) => { + releaseA = resolve; + }); + + // A acquires 'X' and stays in-flight awaiting the gate. + const aResult = A.withSpaceLock('X', async () => { + aRunning = true; + await gateA; + aRunning = false; + return 'A-done'; + }); + await flushMicrotasks(); + + // Sanity: A is in-flight and owns the redis key. + expect(aRunning).toBe(true); + expect(redis.store.has(GIT_SYNC_LOCK_PREFIX + 'X')).toBe(true); + + // Simulate TTL lapse with a swallowed heartbeat refresh: the lock key + // disappears from the shared store while A is still running. + redis.store.delete(GIT_SYNC_LOCK_PREFIX + 'X'); + + // Now B tries to take 'X'. Desired: rejected as 'lock-held' (single writer); + // and under no circumstance may fn2 run while A is still in flight. + let bRanWhileARunning = false; + const bResult = await B.withSpaceLock('X', async () => { + bRanWhileARunning = aRunning; // captures whether A was still in-flight + return 'B-done'; + }); + + // Single-writer assertions: B did NOT execute concurrently with A. + expect(bRanWhileARunning).toBe(false); + expect(bResult).toEqual({ skipped: 'lock-held' }); + + // Cleanup: let A finish. + releaseA(); + await expect(aResult).resolves.toBe('A-done'); + }); +}); diff --git a/apps/server/src/integrations/git-sync/services/redteam-three-way.spec.ts b/apps/server/src/integrations/git-sync/services/redteam-three-way.spec.ts new file mode 100644 index 00000000..bf60a987 --- /dev/null +++ b/apps/server/src/integrations/git-sync/services/redteam-three-way.spec.ts @@ -0,0 +1,20 @@ +import { diff3Plan, type Pick } from './three-way-merge'; + +// Materialize a plan into the merged key sequence for assertion. +function apply(plan: Pick[], live: string[], target: string[]): string[] { + return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index])); +} + +const merge = (o: string[], a: string[], b: string[]): string[] => + apply(diff3Plan(o, a, b), a, b); + +describe('diff3Plan red-team #9 (human edit + adjacent git insert)', () => { + it('keeps human block-2 edit AND applies git insert of 2.5', () => { + // base: 1 2 3 + // live: 1 H 3 (human rewrote block 2) + // target: 1 2 2.5 3 (git inserted 2.5 after block 2) + expect( + merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '2.5', '3']), + ).toEqual(['1', 'H', '2.5', '3']); + }); +}); diff --git a/apps/server/src/integrations/git-sync/services/space-lock.service.ts b/apps/server/src/integrations/git-sync/services/space-lock.service.ts index 33f774bd..0c9c4a52 100644 --- a/apps/server/src/integrations/git-sync/services/space-lock.service.ts +++ b/apps/server/src/integrations/git-sync/services/space-lock.service.ts @@ -22,6 +22,16 @@ export class SpaceLockService { private readonly instanceId = randomUUID(); /** In-process per-space mutex: spaceIds with a cycle currently running. */ private readonly running = new Set(); + /** + * Process-wide single-writer guard: spaceId -> instanceId of the live holder. + * Unlike `running` (scoped to ONE service instance), this is shared by every + * SpaceLockService in the process, so even if the Redis lock key lapses + * (swallowed heartbeat / TTL expiry) a SECOND holder in the same process + * cannot start a concurrent cycle for the same space — it is rejected + * 'lock-held'. The cross-PROCESS race is handled by the Redis lock plus + * abort-on-refresh-failure (and, as a follow-up, fencing tokens). + */ + private static readonly liveLocks = new Map(); constructor(redisService: RedisService) { this.redis = redisService.getOrThrow(); @@ -70,26 +80,42 @@ export class SpaceLockService { * lock that took over after our TTL expired. Used by the heartbeat in * `withSpaceLock` so a long-running push (client-controlled receive-pack + the * Docmost cycle) cannot outlive the lock and let a concurrent cycle race the - * working tree. Logs (warn) but never throws — a failed refresh must not break - * the cycle it is protecting. + * working tree. Never throws (a thrown timer callback would crash the process), + * but a refresh it cannot CONFIRM is treated as a LOST lock: it aborts the + * supplied controller so the in-flight protected fn stops instead of writing + * blind while another replica may already have taken over the lock. */ - private async refreshLock(spaceId: string): Promise { + private async refreshLock( + spaceId: string, + controller?: AbortController, + ): Promise { const lua = 'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("pexpire", KEYS[1], ARGV[2]) else return 0 end'; try { - await this.redis.eval( + const res = await this.redis.eval( lua, 1, GIT_SYNC_LOCK_PREFIX + spaceId, this.instanceId, String(GIT_SYNC_LOCK_TTL_MS), ); + // CAS miss (res !== 1): we no longer own the key — our TTL lapsed and + // another replica may hold it now. Abort the in-flight cycle rather than + // swallowing the loss and racing the working tree. + if (res !== 1) { + this.logger.warn( + `git-sync: lock for space ${spaceId} lost during refresh — aborting in-flight cycle`, + ); + controller?.abort(); + } } catch (err) { this.logger.warn( `git-sync: failed to refresh lock for space ${spaceId}: ${ err instanceof Error ? err.message : String(err) }`, ); + // A refresh we cannot confirm means we may no longer hold the lock; abort. + controller?.abort(); } } @@ -106,38 +132,50 @@ export class SpaceLockService { */ async withSpaceLock( spaceId: string, - fn: () => Promise, + fn: (signal: AbortSignal) => Promise, ): Promise { if (this.running.has(spaceId)) { return { skipped: 'in-progress' }; } + // Cross-instance, same-process single-writer guard: another live holder (a + // different SpaceLockService in this process) is mid-cycle for this space. + // This survives a swallowed heartbeat / Redis TTL lapse, so a second writer + // in the process cannot race the working tree — it is rejected 'lock-held'. + if (SpaceLockService.liveLocks.has(spaceId)) { + return { skipped: 'lock-held' }; + } // Reserve the in-process slot synchronously (before any await) so two // concurrent same-space calls on THIS instance cannot both pass the guard and // race acquire(). Redis NX is already authoritative across replicas; this just // closes the in-process TOCTOU window. Released in the outer finally on every // path (acquire-failure, fn-throw, normal completion). this.running.add(spaceId); + SpaceLockService.liveLocks.set(spaceId, this.instanceId); try { if (!(await this.acquire(spaceId))) { return { skipped: 'lock-held' }; } + // Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the + // protected fn can stop instead of writing blind after our lock lapsed. + const controller = new AbortController(); // Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so // a long push (client-controlled receive-pack + the Docmost cycle) cannot // outlive the fixed TTL and let a concurrent cycle race the working tree. The // refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the // timer from holding the event loop open; it is ALWAYS cleared in `finally`. const heartbeat = setInterval(() => { - void this.refreshLock(spaceId); + void this.refreshLock(spaceId, controller); }, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3))); heartbeat.unref?.(); try { - return await fn(); + return await fn(controller.signal); } finally { clearInterval(heartbeat); await this.release(spaceId); } } finally { this.running.delete(spaceId); + SpaceLockService.liveLocks.delete(spaceId); } } } diff --git a/apps/server/src/integrations/git-sync/services/three-way-merge.ts b/apps/server/src/integrations/git-sync/services/three-way-merge.ts index eb1d2126..debf1bbe 100644 --- a/apps/server/src/integrations/git-sync/services/three-way-merge.ts +++ b/apps/server/src/integrations/git-sync/services/three-way-merge.ts @@ -50,22 +50,125 @@ function matchMap(pairs: Array<[number, number]>): Map { return m; } -const keysEqual = (x: string[], y: string[]): boolean => - x.length === y.length && x.every((v, k) => v === y[k]); +/** + * One change `side` made to `base` within a region: base blocks `[oStart,oEnd)` + * were replaced by the side's blocks listed in `content` (region-local indices). + * A pure insert has `oStart === oEnd`; a pure delete has empty `content`. + */ +interface Hunk { + oStart: number; + oEnd: number; + content: number[]; +} /** - * Resolve one region (the live slice, target slice, and base slice that occupy - * the same span between two anchors). 'target' (git) wins ties and conflicts. + * Diff `o` against one side as a list of non-overlapping hunks (the base spans + * the side rewrote/inserted/deleted), derived from their LCS alignment. */ -function decideRegion( - aRegion: string[], - bRegion: string[], - oRegion: string[], -): 'live' | 'target' { - if (keysEqual(aRegion, bRegion)) return 'target'; // same edit on both sides - if (keysEqual(aRegion, oRegion)) return 'target'; // live unchanged -> git's edit - if (keysEqual(bRegion, oRegion)) return 'live'; // git unchanged -> human's edit - return 'target'; // genuine conflict -> git wins +function buildHunks(o: string[], side: string[]): Hunk[] { + const pairs = lcsPairs(o, side); // [oIdx, sideIdx] kept (unchanged) blocks + const hunks: Hunk[] = []; + let prevO = -1; + let prevS = -1; + const flush = (curO: number, curS: number): void => { + const oStart = prevO + 1; + const oEnd = curO; + const content: number[] = []; + for (let s = prevS + 1; s < curS; s++) content.push(s); + if (oEnd > oStart || content.length > 0) hunks.push({ oStart, oEnd, content }); + }; + for (const [oIdx, sIdx] of pairs) { + flush(oIdx, sIdx); + prevO = oIdx; + prevS = sIdx; + } + flush(o.length, side.length); + return hunks; +} + +/** + * Do two hunks (one per side) touch the same base region? Pure inserts only + * collide when nested strictly inside the other hunk's base span (or, for two + * inserts, at the same gap); changes sitting at a shared boundary do not. + */ +function hunksOverlap(a: Hunk, b: Hunk): boolean { + const aIns = a.oStart === a.oEnd; + const bIns = b.oStart === b.oEnd; + if (aIns && bIns) return a.oStart === b.oStart; + if (aIns) return b.oStart < a.oStart && a.oStart < b.oEnd; + if (bIns) return a.oStart < b.oStart && b.oStart < a.oEnd; + return Math.max(a.oStart, b.oStart) < Math.min(a.oEnd, b.oEnd); +} + +interface LocalPick { + src: 'live' | 'target'; + local: number; +} + +/** + * Fine-grained three-way merge of ONE inter-anchor region. Combines the human's + * and git's NON-overlapping hunks (e.g. a human edit to one block plus a git + * insert/delete of OTHER blocks in the same region) so neither change is lost. + * Returns the merged region as region-local picks, or `null` when the two sides + * changed the SAME base block — a genuine conflict the caller resolves by the + * original all-or-nothing rule (git wins the whole region). + */ +function tryMergeRegion( + o: string[], + a: string[], + b: string[], +): LocalPick[] | null { + const aHunks = buildHunks(o, a); + const bHunks = buildHunks(o, b); + + // Any overlap between a human hunk and a git hunk is a real conflict; bail so + // the caller falls back to git-wins (preserving the original behavior). + for (const ah of aHunks) { + for (const bh of bHunks) { + if (hunksOverlap(ah, bh)) return null; + } + } + + // Disjoint: live index of each base block that BOTH sides kept (stable). + const aKept = matchMap(lcsPairs(o, a)); // base index -> live index + + const out: LocalPick[] = []; + let pa = 0; + let pb = 0; + let oi = 0; + while (oi < o.length || pa < aHunks.length || pb < bHunks.length) { + const ah = pa < aHunks.length ? aHunks[pa] : null; + const bh = pb < bHunks.length ? bHunks[pb] : null; + const nextStart = Math.min( + ah ? ah.oStart : o.length, + bh ? bh.oStart : o.length, + ); + + // Emit stable base blocks (kept by both) until the next hunk, from LIVE. + while (oi < nextStart) { + out.push({ src: 'live', local: aKept.get(oi) as number }); + oi++; + } + if (!ah && !bh) break; + + // Apply the hunk at oi. When both sides act here they are disjoint, so the + // pure-insert (oEnd === oi) is emitted before the side that consumes base oi. + const aHere = ah !== null && ah.oStart === oi; + const bHere = bh !== null && bh.oStart === oi; + let useA: boolean; + if (aHere && bHere) { + useA = ah!.oEnd === oi; // insert side first; otherwise either order is fine + } else { + useA = aHere; + } + const h = (useA ? ah : bh) as Hunk; + const src: 'live' | 'target' = useA ? 'live' : 'target'; + for (const idx of h.content) out.push({ src, local: idx }); + oi = h.oEnd; + if (useA) pa++; + else pb++; + } + return out; } export interface Pick { @@ -96,13 +199,22 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] { const bEnd = anchor < o.length ? (oToB.get(anchor) as number) : b.length; // Resolve the region [oi,anchor) that one or both sides rewrote/inserted. - const take = decideRegion( + // Try a fine-grained three-way merge first so a human block-edit survives a + // git insert/delete of OTHER blocks in the same region; only a genuine + // same-block conflict (null) falls back to the original git-wins rule. + const merged = tryMergeRegion( + o.slice(oi, anchor), a.slice(ai, aEnd), b.slice(bi, bEnd), - o.slice(oi, anchor), ); - if (take === 'live') { - for (let k = ai; k < aEnd; k++) res.push({ src: 'live', index: k }); + if (merged) { + for (const p of merged) { + res.push( + p.src === 'live' + ? { src: 'live', index: ai + p.local } + : { src: 'target', index: bi + p.local }, + ); + } } else { for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k }); } diff --git a/packages/git-sync/src/engine/cycle.ts b/packages/git-sync/src/engine/cycle.ts index f019115f..89d821ee 100644 --- a/packages/git-sync/src/engine/cycle.ts +++ b/packages/git-sync/src/engine/cycle.ts @@ -26,6 +26,16 @@ export interface RunCycleDeps { settings: Settings; fs: CycleFs; log: (line: string) => void; + /** + * Optional cooperative-abort signal. The caller (orchestrator) wires this to + * the per-space lock: if a heartbeat refresh cannot CONFIRM the lock is still + * held (CAS-miss / Redis error), the signal is aborted and the cycle bails at + * its next checkpoint (before the pull-apply and before the push-apply — the + * two destructive write phases) instead of writing blind after a possible + * lock loss. This is a COARSE best-effort guard; a fully fenced cross-process + * single-writer still needs the fencing-token redesign (follow-up). + */ + signal?: AbortSignal; /** * Delete-cap hook (the ONLY caller-specific policy). Called with the push * dry-run's planned delete count (`Number.POSITIVE_INFINITY` when the dry-run @@ -47,6 +57,13 @@ export interface RunCycleResult { skipped?: "merge-in-progress"; pull?: { written: number; deleted: number; conflict: boolean }; push?: { mode: string; failures: number }; + /** + * Forwarded from the push result: `true` when the push REFUSED to fast-forward + * a divergent `docmost` mirror (the §5 invariant — `docmost` mirrors what + * Docmost contains — is broken). Surfaced here so a caller driving `runCycle` + * can detect the breach without scraping logs (red-team #15). + */ + divergentDocmost?: boolean; } /** @@ -70,7 +87,7 @@ export interface RunCycleResult { * Lock + cap POLICY live in the caller; this owns only the mechanics. */ export async function runCycle(deps: RunCycleDeps): Promise { - const { spaceId, client, vault, settings, fs, log, resolveApplyClient } = + const { spaceId, client, vault, settings, fs, log, resolveApplyClient, signal } = deps; const vaultRoot = settings.vaultPath; const abs = (relPath: string) => `${vaultRoot}/${relPath}`; @@ -107,6 +124,9 @@ export async function runCycle(deps: RunCycleDeps): Promise { existing, }); + // Bail before the first destructive write phase if the lock was lost. + signal?.throwIfAborted(); + const pullResult = await applyPullActions( { client, @@ -150,6 +170,9 @@ export async function runCycle(deps: RunCycleDeps): Promise { applyClient = resolveApplyClient(plannedDeletes, client); } + // Bail before pushing to Docmost if the lock was lost during pull. + signal?.throwIfAborted(); + const pushResult = await runPush( { ...pushDeps, makeClient: () => applyClient }, { dryRun: false }, @@ -166,5 +189,8 @@ export async function runCycle(deps: RunCycleDeps): Promise { mode: pushResult.mode, failures: pushResult.failures?.length ?? 0, }, + // Forward a divergent-`docmost` escalation so the caller can act on the §5 + // invariant breach without scraping logs (red-team #15). + divergentDocmost: pushResult.divergentDocmost ?? false, }; } diff --git a/packages/git-sync/src/engine/layout.ts b/packages/git-sync/src/engine/layout.ts index 63fd7d92..fec092fa 100644 --- a/packages/git-sync/src/engine/layout.ts +++ b/packages/git-sync/src/engine/layout.ts @@ -54,23 +54,54 @@ export function buildVaultLayout(pages: PageNode[]): Map { if (p && p.id && !byId.has(p.id)) byId.set(p.id, p); } - // Resolve each node's display name once, deterministically, tracking sibling - // collisions per parent. `usedBySibling` maps a parent key -> set of names - // already taken under that parent. The bucket key is the node's parent ONLY - // when that parent is actually present in `byId`; otherwise (null parent, or - // an orphan whose parent is outside the input set) the node buckets at - // `"__root__"`. This is critical: orphans land at the vault root (see - // `folderSegmentsFor`), so they MUST share the root bucket with real root - // pages to be disambiguated against each other here — making `nameById` final - // before any `segments` are computed, so no ancestor name can drift later. - const usedBySibling = new Map>(); - const nameById = new Map(); + // Resolve each node's display name once, deterministically. The bucket key is + // the node's parent ONLY when that parent is actually present in `byId`; + // otherwise (null parent, or an orphan whose parent is outside the input set) + // the node buckets at `"__root__"`. This is critical: orphans land at the vault + // root (see `folderSegmentsFor`), so they MUST share the root bucket with real + // root pages to be disambiguated against each other here — making `nameById` + // final before any `segments` are computed, so no ancestor name can drift. + const parentKeyOf = (p: PageNode): string => + p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__"; + // Group nodes by (parentKey, sanitized base title) so sibling collisions are + // resolved by a STABLE rule that does NOT depend on input array order. Dedupe + // ids (first occurrence wins, matching `byId`). + const siblingGroups = new Map(); + const namedIds = new Set(); for (const p of pages) { - if (p && p.id && !nameById.has(p.id)) { - const parentKey = - p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__"; - nameById.set(p.id, nameForNode(p, parentKey, usedBySibling)); + if (!p || !p.id || namedIds.has(p.id)) continue; + namedIds.add(p.id); + const key = `${parentKeyOf(p)}\u0000${sanitizeTitle(p.title ?? "")}`; + const bucket = siblingGroups.get(key); + if (bucket) bucket.push(p); + else siblingGroups.set(key, [p]); + } + // Assign each node its display name. Within a colliding group, sort the + // siblings by their stable disambiguation key (`slugId` else `id`) and let the + // FIRST keep the bare sanitized title; every OTHER gets the ` ~` + // suffix. This makes `nameById` a pure function of the page SET — reordering + // the input never moves the suffix onto a different page (red-team #4a). The + // suffix is itself sanitized (the slugId/id is untrusted and must never inject + // a path separator). + const nameById = new Map(); + const disambKeyOf = (p: PageNode): string => p.slugId ?? p.id; + for (const bucket of siblingGroups.values()) { + const base = sanitizeTitle(bucket[0].title ?? ""); + if (bucket.length === 1) { + nameById.set(bucket[0].id, base); + continue; } + const sorted = [...bucket].sort((a, b) => { + const ka = disambKeyOf(a); + const kb = disambKeyOf(b); + return ka < kb ? -1 : ka > kb ? 1 : 0; + }); + sorted.forEach((p, i) => { + nameById.set( + p.id, + i === 0 ? base : disambiguate(base, sanitizeTitle(disambKeyOf(p))), + ); + }); } // Every id we index above MUST get a resolved name; this helper returns it @@ -169,34 +200,3 @@ export function buildVaultLayout(pages: PageNode[]): Map { return layout; } -/** - * Compute a deterministic, collision-free name for a node among its SIBLINGS. - * `usedBySibling` maps a parent key -> set of names already taken, so two - * siblings that sanitize to the same name get a stable ` ~slugId` suffix - * (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the - * slugId/id is a second untrusted-data channel that must never leak a path - * separator into the name. `parentKey` is supplied by the caller (it resolves - * to `"__root__"` for root pages AND for orphans whose parent is outside the - * input set, so they share one bucket). The name is COSMETIC; identity lives in - * the meta block. - */ -function nameForNode( - node: PageNode, - parentKey: string, - usedBySibling: Map>, -): string { - let used = usedBySibling.get(parentKey); - if (!used) { - used = new Set(); - usedBySibling.set(parentKey, used); - } - - let name = sanitizeTitle(node.title ?? ""); - if (used.has(name)) { - // Sibling collision: disambiguate with the stable, sanitized slugId (fall - // back to the sanitized pageId if no slugId is present). - name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id)); - } - used.add(name); - return name; -} diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts index 166c9643..08a6b8fa 100644 --- a/packages/git-sync/src/engine/push.ts +++ b/packages/git-sync/src/engine/push.ts @@ -133,8 +133,27 @@ export function classifyRenameMoves( return renamesMoves.map((rm) => { const newParent = deps.resolveParentPageId(rm.newPath, "current"); const oldParent = deps.resolveParentPageId(rm.oldPath, "prev"); - const newTitle = deps.metaAt(rm.newPath, "current")?.title; - const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title; + // Strip the cosmetic ` ~` disambiguation suffix before comparing + // titles: it is a LOCAL filesystem artifact (`buildVaultLayout` appends it to + // a colliding sibling's stem), NOT part of the page's real title. A pure + // disambiguation file-rename ('Report.md' -> 'Report ~a1.md') must therefore + // NOT be pushed to Docmost as a title change (red-team #4b), and any title we + // DO push must carry the real title ('Report'), never the suffixed form. + const rawNewTitle = deps.metaAt(rm.newPath, "current")?.title; + const rawOldTitle = deps.metaAt(rm.oldPath, "prev")?.title; + // A PURE disambiguation rename only APPENDS a cosmetic ` ~` to the + // SAME title (layout.ts), so the real Docmost title is unchanged. Strip the + // suffix ONLY when the new name is exactly the old title plus that suffix — + // never blindly strip a genuine retitle whose new title legitimately ends in + // ` ~token` (e.g. "Budget ~draft" -> "Budget ~final"), which would corrupt + // the title in Docmost / drop a real rename (review finding). + const isCosmeticDisambiguation = + typeof rawNewTitle === "string" && + typeof rawOldTitle === "string" && + rawNewTitle !== rawOldTitle && + stripDisambiguationSuffix(rawNewTitle) === rawOldTitle; + const newTitle = isCosmeticDisambiguation ? rawOldTitle : rawNewTitle; + const oldTitle = rawOldTitle; const out: RenameMoveActionClassified = { pageId: rm.pageId, @@ -646,7 +665,11 @@ export async function applyPushActions( // Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter // is engine metadata, never page content. The server converts the markdown // it receives verbatim, so stripping here keeps the id out of Docmost. - const body = parsePageFile(await deps.readFile(u.path)).body; + // Also strip any git conflict markers — they must NEVER reach Docmost + // (SPEC §9, red-team #13); content on both sides is preserved. + const body = stripConflictMarkers( + parsePageFile(await deps.readFile(u.path)).body, + ); // The last-synced version of this file (pre-image) is the common ancestor // for a 3-way merge against the live page, so concurrent human edits are // not clobbered (review #5). Null when the file is new at last-pushed. Its @@ -689,6 +712,10 @@ export async function applyPushActions( // folder, so (parentPageId, title) identifies the page; a match means a prior // cycle already created it, so we ADOPT instead of duplicating. let liveByParentTitle: Map | null = null; + // A (parentPageId, title) that more than ONE live page shares is AMBIGUOUS: + // adopting one of them would silently overwrite an arbitrary, possibly-unrelated + // sibling (red-team #6). Such keys are recorded here and EXCLUDED from adoption. + const ambiguousAdoptKeys = new Set(); if (actions.creates.length > 0) { const live = await client.listSpaceTree(deps.spaceId); // Only trust a COMPLETE tree for retry-adopt: a truncated tree could miss an @@ -699,32 +726,56 @@ export async function applyPushActions( liveByParentTitle = new Map(); for (const n of live.pages) { const key = `${n.parentPageId ?? " root"} ${n.title ?? ""}`; - // Keep the FIRST node for a key (the layout makes this unique in practice). - if (!liveByParentTitle.has(key)) liveByParentTitle.set(key, n.id); + // First node claims the key; a SECOND match marks it ambiguous so neither + // is ever adopted-over (the create falls back to a fresh createPage). + if (liveByParentTitle.has(key)) ambiguousAdoptKeys.add(key); + else liveByParentTitle.set(key, n.id); } } } - for (const c of actions.creates) { + // Order creates PARENT-before-CHILD (red-team #12): a child whose parent is + // ALSO a fresh create must run AFTER its parent so the parent's just-assigned + // pageId is available to parent it (otherwise it is placed at the space ROOT). + const orderedCreates = orderCreatesParentFirst(actions.creates); + // Track pageIds assigned (or adopted) to each create's PATH in THIS batch, so a + // child can resolve its freshly-created parent's id without depending on the + // on-disk write-back being observable yet (red-team #12). + const createdIdByPath = new Map(); + for (const c of orderedCreates) { try { const text = await deps.readFile(c.path); - const { body } = parsePageFile(text); + // Conflict markers must never reach Docmost (SPEC §9, red-team #13); strip + // them from the create body too, preserving both sides' content. + const body = stripConflictMarkers(parsePageFile(text).body); // Derive create args from the PATH (native-Obsidian, SPEC §5): title from // the filename, parent from the enclosing folder's folder-note, space from // the run (the vault's space). `parentPageId: null` -> created at ROOT. const title = titleFromPath(c.path); + // Resolve the parent from the PATH (SPEC §5). Prefer an id assigned to the + // parent's folder-note EARLIER in this same batch — a freshly-created parent + // whose on-disk write-back may not be observable yet (red-team #12; creates + // are ordered parent-before-child so the parent already ran). + const parentFile = parentFolderFile(c.path); const parentPageId = - (await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined; + (parentFile !== null ? createdIdByPath.get(parentFile) : undefined) ?? + (await resolveParentPageIdViaTree(deps, c.path, "current")) ?? + undefined; // Retry-adopt (#1 idempotency): a prior cycle already created this page in // Docmost but failed to persist the pageId back to the file, so it was // re-seen as a create. Adopt the existing page instead of duplicating it: // write the id back (file becomes tracked) and push the body as an UPDATE - // (idempotent — targets by pageId). Do NOT call createPage again. + // (idempotent — targets by pageId). Do NOT call createPage again. SKIP + // adoption when the (parent, title) is AMBIGUOUS — adopting an arbitrary + // duplicate-title sibling would silently overwrite it (red-team #6). const adoptKey = `${parentPageId ?? " root"} ${title}`; - const existingId = liveByParentTitle?.get(adoptKey); + const existingId = ambiguousAdoptKeys.has(adoptKey) + ? undefined + : liveByParentTitle?.get(adoptKey); if (existingId) { const rewritten = serializePageFile(existingId, body); await deps.writeFile(c.path, rewritten); writtenBack.push({ path: c.path, pageId: existingId }); + createdIdByPath.set(c.path, existingId); const adopted = await client.importPageMarkdown(existingId, body, null); pushed.push({ pageId: existingId, @@ -749,6 +800,7 @@ export async function applyPushActions( const rewritten = serializePageFile(assignedPageId, body); await deps.writeFile(c.path, rewritten); writtenBack.push({ path: c.path, pageId: assignedPageId }); + createdIdByPath.set(c.path, assignedPageId); // §10 loop-guard data for the created page (hash the pushed BODY). pushed.push({ pageId: assignedPageId, @@ -942,6 +994,35 @@ export function parentFolderFile(path: string): string | null { return folderNote; } +/** + * Order CREATE actions so a create whose parent folder-note is ALSO being created + * appears AFTER its parent (red-team #12). A child created before its fresh parent + * cannot resolve the parent's pageId and would be placed at the space ROOT. + * Topological over the `parentFolderFile` relation, restricted to paths within the + * create set; an `inProgress` guard makes a malformed parent cycle safe. + */ +export function orderCreatesParentFirst(creates: CreateAction[]): CreateAction[] { + const byPath = new Map(); + for (const c of creates) byPath.set(c.path, c); + const ordered: CreateAction[] = []; + const visited = new Set(); + const inProgress = new Set(); + const visit = (c: CreateAction): void => { + if (visited.has(c.path) || inProgress.has(c.path)) return; + inProgress.add(c.path); + const parent = parentFolderFile(c.path); + if (parent !== null && parent !== c.path) { + const parentCreate = byPath.get(parent); + if (parentCreate) visit(parentCreate); + } + inProgress.delete(c.path); + visited.add(c.path); + ordered.push(c); + }; + for (const c of creates) visit(c); + return ordered; +} + /** * Whether a vault path is a Docmost PAGE file (design §"Adoption"): a `.md` file * with NO dot-segment anywhere in its path. This excludes `.obsidian/` config, @@ -955,6 +1036,51 @@ export function isPageFile(path: string): boolean { return !path.split("/").some((seg) => seg.startsWith(".")); } +/** + * Git conflict-marker scan + strip (SPEC §9 — conflict markers must NEVER reach + * Docmost). A body is treated as conflicted only when it carries BOTH a begin + * (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext + * heading underline (`=======`) is not mistaken for a conflict. When conflicted, + * the three marker line types are removed while BOTH sides' content is preserved + * (no data loss): the marker SYNTAX never reaches Docmost, but the human's content + * does — where the conflict is visible and fixable rather than silently dropped. + */ +const CONFLICT_BEGIN_RE = /^<{7}/m; +const CONFLICT_END_RE = /^>{7}/m; +const CONFLICT_BEGIN_LINE_RE = /^<{7}/; +const CONFLICT_SEP_LINE_RE = /^={7}/; +const CONFLICT_END_LINE_RE = /^>{7}/; + +export function hasConflictMarkers(body: string): boolean { + return CONFLICT_BEGIN_RE.test(body) && CONFLICT_END_RE.test(body); +} + +function stripConflictMarkers(body: string): string { + if (!hasConflictMarkers(body)) return body; + // Remove ONLY the three marker line types, and treat a `=======` line as a + // conflict separator ONLY when we are between a `<<<<<<<` begin and a `>>>>>>>` + // end — so a legitimate Markdown setext heading underline (`=======`) outside a + // conflict block is preserved (review finding). Both conflict sides' content is + // kept; only the marker SYNTAX is dropped. + let inBlock = false; + const out: string[] = []; + for (const line of body.split("\n")) { + if (CONFLICT_BEGIN_LINE_RE.test(line)) { + inBlock = true; + continue; + } + if (CONFLICT_END_LINE_RE.test(line)) { + inBlock = false; + continue; + } + if (inBlock && CONFLICT_SEP_LINE_RE.test(line)) { + continue; + } + out.push(line); + } + return out.join("\n"); +} + /** The last path segment of a forward-slash path (the folder/file base name). */ function baseSegment(path: string): string { const slash = path.lastIndexOf("/"); @@ -974,6 +1100,20 @@ function titleFromPath(path: string): string { return base.endsWith(".md") ? base.slice(0, -3) : base; } +/** + * The exact ` ~` disambiguation suffix `buildVaultLayout`/`disambiguate` + * append to a colliding sibling's file stem (layout.ts): a single trailing + * ` ~` (no slash, no further `~`). It is a COSMETIC, local + * filesystem artifact — never part of the page's real Docmost title — so it is + * stripped before a path-derived title is compared/pushed (red-team #4b). + */ +const DISAMBIGUATION_SUFFIX_RE = / ~[^/~]+$/; + +/** Remove a single trailing ` ~` disambiguation suffix, if present. */ +function stripDisambiguationSuffix(title: string): string { + return title.replace(DISAMBIGUATION_SUFFIX_RE, ""); +} + /** * Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the * NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the diff --git a/packages/git-sync/src/lib/docmost-schema.ts b/packages/git-sync/src/lib/docmost-schema.ts index 9259af39..9d9b4deb 100644 --- a/packages/git-sync/src/lib/docmost-schema.ts +++ b/packages/git-sync/src/lib/docmost-schema.ts @@ -204,11 +204,38 @@ const DocmostAttributes = Extension.create({ types: ["image"], attributes: { align: { default: null }, - attachmentId: { default: null }, - aspectRatio: { default: null }, + // imageToHtml emits these Docmost-specific image attrs as data-*; map + // them back explicitly so a top-level image (or one inside a column) + // round-trips them. Without a parseHTML the default reads the bare + // attribute name (e.g. getAttribute("attachmentId") -> null) and the + // value — including the attachmentId that links the image to its + // stored file — is silently dropped on every round-trip (data loss). + attachmentId: { + default: null, + parseHTML: (el: HTMLElement) => + el.getAttribute("data-attachment-id"), + renderHTML: (attrs: Record) => + attrs.attachmentId + ? { "data-attachment-id": attrs.attachmentId } + : {}, + }, + aspectRatio: { + default: null, + parseHTML: (el: HTMLElement) => + el.getAttribute("data-aspect-ratio"), + renderHTML: (attrs: Record) => + attrs.aspectRatio != null + ? { "data-aspect-ratio": attrs.aspectRatio } + : {}, + }, height: { default: null }, placeholder: { default: null }, - size: { default: null }, + size: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-size"), + renderHTML: (attrs: Record) => + attrs.size != null ? { "data-size": attrs.size } : {}, + }, width: { default: null }, }, }, @@ -1030,6 +1057,300 @@ const PageBreak = Node.create({ }, }); +/** + * Footnote feature (mirror of @docmost/editor-ext footnote, matching the MCP + * schema mirror). Three nodes connected by `id`: + * - FootnoteReference: inline atom marker in the body (); + * - FootnotesList: a single bottom container (
); + * - FootnoteDefinition: one editable note keyed by id (
). + * The visible number is not stored; it is derived from reference order. The + * parse rule uses priority 100 so it beats the Superscript mark's + * rule (otherwise an empty reference parses as an empty superscript and drops). + */ +const FootnoteReference = Node.create({ + name: "footnoteReference", + priority: 101, + group: "inline", + inline: true, + atom: true, + selectable: true, + draggable: false, + addAttributes() { + return { + id: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-id"), + renderHTML: (attrs: Record) => + attrs.id ? { "data-id": attrs.id } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: "sup[data-footnote-ref]", priority: 100 }]; + }, + renderHTML({ HTMLAttributes }) { + return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }]; + }, +}); + +const FootnotesList = Node.create({ + name: "footnotesList", + group: "block", + content: "footnoteDefinition+", + isolating: true, + selectable: false, + defining: true, + parseHTML() { + return [{ tag: "section[data-footnotes]" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0]; + }, +}); + +const FootnoteDefinition = Node.create({ + name: "footnoteDefinition", + content: "paragraph+", + defining: true, + isolating: true, + selectable: false, + addAttributes() { + return { + id: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-id"), + renderHTML: (attrs: Record) => + attrs.id ? { "data-id": attrs.id } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: "div[data-footnote-def]" }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0]; + }, +}); + +/** + * Encode/decode the htmlEmbed `source` (arbitrary HTML/CSS/JS) to/from base64 + * for the `data-source` attribute. Ported from @docmost/editor-ext so the + * markdown-converter HTML path (generateJSON via parseHTML) round-trips the + * raw source losslessly and keeps it inert while it sits in the attribute. + * `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so UTF-8 survives. + */ +export function encodeHtmlEmbedSource(source: string): string { + if (!source) return ""; + try { + if (typeof btoa === "function") { + return btoa(encodeURIComponent(source)); + } + return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64"); + } catch { + return ""; + } +} + +export function decodeHtmlEmbedSource(encoded: string): string { + if (!encoded) return ""; + try { + if (typeof atob === "function") { + return decodeURIComponent(atob(encoded)); + } + return decodeURIComponent(Buffer.from(encoded, "base64").toString("utf-8")); + } catch { + return ""; + } +} + +/** + * Docmost raw HTML embed. Block atom; the client renders `source` inside a + * sandboxed iframe. Mirrors the @docmost/editor-ext node — `source` rides the + * `data-source` attribute base64-encoded (this is an HTML/generateJSON path, so + * it MUST use base64 to avoid double-encoding / injection). + */ +const HtmlEmbed = Node.create({ + name: "htmlEmbed", + group: "block", + inline: false, + isolating: true, + atom: true, + defining: true, + draggable: true, + addAttributes() { + return { + source: { + default: "", + parseHTML: (el: HTMLElement) => + decodeHtmlEmbedSource(el.getAttribute("data-source") || ""), + renderHTML: (attrs: Record) => ({ + "data-source": encodeHtmlEmbedSource(attrs.source || ""), + }), + }, + height: { + default: null, + parseHTML: (el: HTMLElement) => { + const v = el.getAttribute("data-height"); + if (!v) return null; + const n = parseInt(v, 10); + return Number.isFinite(n) ? n : null; + }, + renderHTML: (attrs: Record) => + attrs.height != null ? { "data-height": String(attrs.height) } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="htmlEmbed"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "htmlEmbed", ...HTMLAttributes }]; + }, +}); + +/** + * Inline status pill. Mirrors @docmost/editor-ext status: the label rides in + * the element's TEXT content (not an attribute) and the color in data-color. + */ +const Status = Node.create({ + name: "status", + group: "inline", + inline: true, + atom: true, + selectable: true, + draggable: true, + addAttributes() { + return { + text: { + default: "", + parseHTML: (el: HTMLElement) => el.textContent || "", + }, + color: { + default: "gray", + parseHTML: (el: HTMLElement) => el.getAttribute("data-color") || "gray", + renderHTML: (attrs: Record) => ({ + "data-color": attrs.color ?? "gray", + }), + }, + }; + }, + parseHTML() { + return [{ tag: 'span[data-type="status"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return [ + "span", + { "data-type": "status", "data-color": HTMLAttributes["data-color"] }, + `${HTMLAttributes.text ?? ""}`, + ]; + }, +}); + +/** + * Whole-page live embed. Holds only a `sourcePageId` reference. Mirrors + * @docmost/editor-ext pageEmbed. Block atom. + */ +const PageEmbed = Node.create({ + name: "pageEmbed", + group: "block", + atom: true, + isolating: true, + selectable: true, + draggable: true, + addAttributes() { + return { + sourcePageId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-source-page-id"), + renderHTML: (attrs: Record) => + attrs.sourcePageId + ? { "data-source-page-id": attrs.sourcePageId } + : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="pageEmbed"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "pageEmbed", ...HTMLAttributes }]; + }, +}); + +/** + * Block node types allowed inside a `transclusionSource` (mirrors + * @docmost/editor-ext transclusion constants). Excludes transclusion nodes + * (no nesting) and child-only nodes. + */ +const TRANSCLUSION_SOURCE_CONTENT_EXPRESSION = + "(paragraph | heading | blockquote | codeBlock | horizontalRule | bulletList" + + " | orderedList | taskList | image | video | audio | attachment | callout" + + " | details | embed | mathBlock | table | drawio | excalidraw | pdf" + + " | subpages | columns | youtube)+"; + +/** Sync-source block: editable content shared into transclusion references. */ +const TransclusionSource = Node.create({ + name: "transclusionSource", + group: "block", + content: TRANSCLUSION_SOURCE_CONTENT_EXPRESSION, + defining: true, + isolating: true, + addAttributes() { + return { + id: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-id"), + renderHTML: (attrs: Record) => + attrs.id ? { "data-id": attrs.id } : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="transclusionSource"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return ["div", { "data-type": "transclusionSource", ...HTMLAttributes }, 0]; + }, +}); + +/** Live reference to a transcluded block/page. Block atom. */ +const TransclusionReference = Node.create({ + name: "transclusionReference", + group: "block", + atom: true, + selectable: true, + draggable: false, + addAttributes() { + return { + sourcePageId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-source-page-id"), + renderHTML: (attrs: Record) => + attrs.sourcePageId + ? { "data-source-page-id": attrs.sourcePageId } + : {}, + }, + transclusionId: { + default: null, + parseHTML: (el: HTMLElement) => el.getAttribute("data-transclusion-id"), + renderHTML: (attrs: Record) => + attrs.transclusionId + ? { "data-transclusion-id": attrs.transclusionId } + : {}, + }, + }; + }, + parseHTML() { + return [{ tag: 'div[data-type="transclusionReference"]' }]; + }, + renderHTML({ HTMLAttributes }) { + return [ + "div", + { "data-type": "transclusionReference", ...HTMLAttributes }, + ]; + }, +}); + /** * Full extension list. Image is block-level (matches Docmost); the * ProseMirror DOM parser hoists found inside

automatically. @@ -1041,7 +1362,29 @@ export const docmostExtensions = [ heading: {}, link: { openOnClick: false }, }), - Image.configure({ inline: false }), + // Preserve image width/height as the AUTHORED string. Without an explicit + // parseHTML the stock Image node attribute falls back to tiptap core's + // `fromString`, which coerces a numeric width like "320" into the number 320 + // — changing the stored type on every markdown round-trip (Docmost stores + // these as strings, e.g. "320" or "50%", matching how video/audio/pdf are + // handled in this mirror). The node attribute is applied AFTER the global + // DocmostAttributes one, so the fix must live on the Image node itself. + Image.extend({ + addAttributes() { + const parent = (this.parent?.() ?? {}) as Record; + return { + ...parent, + width: { + ...parent.width, + parseHTML: (el: HTMLElement) => el.getAttribute("width"), + }, + height: { + ...parent.height, + parseHTML: (el: HTMLElement) => el.getAttribute("height"), + }, + }; + }, + }).configure({ inline: false }), TaskList, TaskItem.configure({ nested: true }), // Highlight stores its color unescaped and Docmost interpolates it into @@ -1094,5 +1437,13 @@ export const docmostExtensions = [ Audio, Pdf, PageBreak, + FootnoteReference, + FootnotesList, + FootnoteDefinition, + HtmlEmbed, + Status, + PageEmbed, + TransclusionSource, + TransclusionReference, DocmostAttributes, ]; diff --git a/packages/git-sync/src/lib/markdown-converter.ts b/packages/git-sync/src/lib/markdown-converter.ts index 7cd4b320..738e1d1b 100644 --- a/packages/git-sync/src/lib/markdown-converter.ts +++ b/packages/git-sync/src/lib/markdown-converter.ts @@ -1,3 +1,18 @@ +import { encodeHtmlEmbedSource } from "./docmost-schema.js"; + +/** + * Hard cap on processNode recursion depth (see the depth guard below). + * + * Chosen well above any realistic document (the deepest legitimate nesting the + * editor can produce is far shallower) yet far below the point where the + * converter's own call stack overflows. The heaviest shape (deeply nested + * lists) costs ~5 JS frames per level and the runtime stack holds ~10k frames, + * so the measured overflow is around level ~650 (deeply nested lists); 400 + * leaves a comfortable margin while still rendering pathological-but-bounded + * docs in full (the 200-level stress fixture reaches depth ~204). + */ +const MAX_NODE_DEPTH = 400; + /** * Convert ProseMirror/TipTap JSON content to Markdown * Supports all Docmost-specific node types and extensions @@ -43,7 +58,34 @@ export function convertProseMirrorToMarkdown(content: any): string { .replace(/\(/g, "%28") .replace(/\)/g, "%29"); + // Recursion depth guard. processNode is mutually recursive (directly and via + // processListItem/processTaskItem/blockToHtml), and a pathologically nested + // document (e.g. tens of thousands of nested blockquotes) would otherwise + // overflow the call stack and throw a RangeError, which would abort the sync + // and prevent the page from ever being written. We track the live nesting + // depth in a closure counter (the wrapper below) so we NEVER throw: past the + // limit we stop recursing and emit the node's own text (or nothing) instead. + // Normal documents never approach MAX_NODE_DEPTH, so their output is byte- + // identical. NOTE: the wrapper signature is (node) only — several callers use + // `.map(processNode)`, which would otherwise pass the array index as a second + // argument; the wrapper ignores extra arguments so that is harmless. + let nodeDepth = 0; const processNode = (node: any): string => { + if (nodeDepth >= MAX_NODE_DEPTH) { + // Bail out of deeper recursion without throwing. A text node still has + // its own content worth keeping; a container at the limit collapses to + // "" (its already-too-deep subtree is dropped) rather than overflowing. + return typeof node?.text === "string" ? node.text : ""; + } + nodeDepth++; + try { + return processNodeInner(node); + } finally { + nodeDepth--; + } + }; + + const processNodeInner = (node: any): string => { const type = node.type; const nodeContent = node.content || []; @@ -182,7 +224,16 @@ export function convertProseMirrorToMarkdown(content: any): string { .map(processNode) .join("") .replace(/\n+$/, ""); - return "```" + language + "\n" + code + "\n```"; + // CommonMark: an inner ``` run inside the code would prematurely close + // a 3-backtick fence (corrupting the block on re-import). Use an outer + // fence one backtick longer than the longest backtick run in the code + // (minimum 3) so the inner fence is always content. + const longestBacktickRun = (code.match(/`+/g) || []).reduce( + (max: number, run: string) => Math.max(max, run.length), + 0, + ); + const fence = "`".repeat(Math.max(3, longestBacktickRun + 1)); + return fence + language + "\n" + code + "\n" + fence; case "bulletList": return nodeContent @@ -228,16 +279,35 @@ export function convertProseMirrorToMarkdown(content: any): string { // a bare "\n" would be reimported as a soft break and lost. return " \n"; - case "image": - const imgAlt = node.attrs?.alt || ""; + case "image": { + const imgAttrs = node.attrs || {}; + // A top-level image with layout/identity attrs beyond src/alt cannot be + // expressed by markdown `![](src)` — width/height/align/size/ + // attachmentId/aspectRatio would be silently dropped on export and lost + // on re-import. Emit the SAME schema-matching used inside columns + // (imageToHtml) so those attrs survive the round-trip. A bare image + // (only src/alt, optionally a title — which has no schema attr) keeps + // the lighter markdown form so existing image round-trip tests hold. + const hasLayoutAttrs = + imgAttrs.width != null || + imgAttrs.height != null || + imgAttrs.align || + imgAttrs.size != null || + imgAttrs.attachmentId || + imgAttrs.aspectRatio != null; + if (hasLayoutAttrs) { + return imageToHtml(node); + } + const imgAlt = imgAttrs.alt || ""; // Neutralize characters that could break out of the markdown image // URL: spaces/newlines and parentheses would terminate the (...) target // and let a stored src inject following markdown/HTML. Percent-encode // them so the URL stays a single inert token. - const imgSrc = encodeMdUrl(node.attrs?.src); + const imgSrc = encodeMdUrl(imgAttrs.src); // No "caption" attribute exists in the Docmost image schema, so we do // not emit one (the previous caption branch was dead). return `![${imgAlt}](${imgSrc})`; + } case "video": { // Emit the schema-matching

`; + } + + case "footnoteReference": { + // Inline atom marker. The schema reads its id from data-id on a + // sup[data-footnote-ref]; the visible number is derived, not stored. + const attrs = node.attrs || {}; + const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : ""; + return ``; + } + + case "footnotesList": { + // Bottom container of footnote definitions (section[data-footnotes]). + const inner = nodeContent.map((n: any) => blockToHtml(n)).join(""); + return `
${inner}
`; + } + + case "footnoteDefinition": { + // One footnote note keyed by id (div[data-footnote-def]). + const attrs = node.attrs || {}; + const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : ""; + const inner = nodeContent.map((n: any) => blockToHtml(n)).join(""); + return `
${inner}
`; + } + + case "pageEmbed": { + // Whole-page live embed; the schema reads data-source-page-id. + const attrs = node.attrs || {}; + const parts: string[] = [`data-type="pageEmbed"`]; + if (attrs.sourcePageId) + parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`); + return `
`; + } + + case "transclusionReference": { + // Live reference to a transcluded block/page. Block atom; the schema + // reads data-source-page-id and data-transclusion-id. + const attrs = node.attrs || {}; + const parts: string[] = [`data-type="transclusionReference"`]; + if (attrs.sourcePageId) + parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`); + if (attrs.transclusionId) + parts.push( + `data-transclusion-id="${escapeAttr(attrs.transclusionId)}"`, + ); + return `
`; + } + + case "transclusionSource": { + // Sync-source container; the schema reads data-id and re-parses its + // block children, so render them as schema-matching HTML. + const attrs = node.attrs || {}; + const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : ""; + const inner = nodeContent.map((n: any) => blockToHtml(n)).join(""); + return `
${inner}
`; + } + default: // Fallback: process children return nodeContent.map(processNode).join(""); @@ -782,6 +929,12 @@ export function convertProseMirrorToMarkdown(content: any): string { case "attachment": case "drawio": case "excalidraw": + case "htmlEmbed": + case "footnotesList": + case "footnoteDefinition": + case "pageEmbed": + case "transclusionSource": + case "transclusionReference": return processNode(block); default: // Any still-unhandled block type: NEVER fall back to markdown inside a diff --git a/packages/git-sync/test/markdown-converter-gaps.test.ts b/packages/git-sync/test/markdown-converter-gaps.test.ts index 18e85bb0..00b3e582 100644 --- a/packages/git-sync/test/markdown-converter-gaps.test.ts +++ b/packages/git-sync/test/markdown-converter-gaps.test.ts @@ -419,33 +419,33 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => { }); describe('converter gap coverage — documented round-trip data loss (specs 12–14)', () => { - // 12. A 3-backtick fence inside a codeBlock body is NOT lengthened: the inner - // fence prematurely terminates the block, splitting it into three nodes. - it('a triple-backtick fence inside a codeBlock body is lossy (fence collision)', async () => { + // 12. A 3-backtick fence inside a codeBlock body is now lengthened: the outer + // fence widens to (longest inner run + 1) backticks per CommonMark, so the + // inner ``` is treated as content and the block survives as ONE node. + it('a triple-backtick fence inside a codeBlock body round-trips via a widened fence', async () => { const d = doc({ type: 'codeBlock', attrs: { language: 'js' }, content: [{ type: 'text', text: '```\ninner\n```' }], }); const md1 = convertProseMirrorToMarkdown(d); - expect(md1).toBe('```js\n```\ninner\n```\n```'); + // Outer fence widened to 4 backticks; the inner 3-backtick fence is content. + expect(md1).toBe('````js\n```\ninner\n```\n````'); const doc2 = await markdownToProseMirror(md1); - // The inner fence split the block into THREE top-level nodes. + // The block survives as a SINGLE code block (no premature split). const top = doc2.content || []; - expect(top).toHaveLength(3); + expect(top).toHaveLength(1); expect(top[0].type).toBe('codeBlock'); expect(top[0].attrs?.language).toBe('js'); - expect(top[0].content?.[0]).toMatchObject({ type: 'text', text: '\n' }); - expect(top[1].type).toBe('paragraph'); - expect(top[1].content?.[0]).toMatchObject({ type: 'text', text: 'inner' }); - expect(top[2].type).toBe('codeBlock'); - expect(top[2].attrs?.language).toBeNull(); - expect(top[2].content?.[0]).toMatchObject({ type: 'text', text: '\n' }); + expect(top[0].content?.[0]?.text).toContain('```\ninner\n```'); const md2 = convertProseMirrorToMarkdown(doc2); - expect(md2).not.toBe(md1); // not byte-stable - expect(docsCanonicallyEqual(d, doc2)).toBe(false); // documented data loss + expect(md2).toBe(md1); // byte-stable + // Canonically the re-imported code text gains a single trailing newline + // (marked re-adds it; the exporter strips it back, hence byte stability). + // The fence is no longer lossy: the inner fence and content fully survive. + expect(docsCanonicallyEqual(d, doc2)).toBe(false); }); // 13. A leading ordered-list marker in paragraph text is NOT escaped, so a diff --git a/packages/git-sync/test/redteam-apply-push.test.ts b/packages/git-sync/test/redteam-apply-push.test.ts new file mode 100644 index 00000000..4ff6325d --- /dev/null +++ b/packages/git-sync/test/redteam-apply-push.test.ts @@ -0,0 +1,159 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest'; +import { applyPushActions } from '../src/engine/push'; +import type { ApplyPushDeps, PushActions } from '../src/engine/push'; + +const SPACE_ID = 'sp-test'; + +/** A recording client fake; listSpaceTree/createPage configurable per test. */ +function makeClient() { + return { + listSpaceTree: vi.fn(async () => ({ + pages: [] as { id: string; parentPageId?: string | null; title?: string }[], + complete: true, + })), + importPageMarkdown: vi.fn(async () => ({ success: true })), + createPage: vi.fn( + async ( + title: string, + _content: string, + _spaceId: string, + _parentPageId?: string, + ) => ({ data: { id: 'assigned-id', title }, success: true }), + ), + deletePage: vi.fn(async () => ({ success: true })), + movePage: vi.fn(async () => ({ success: true })), + renamePage: vi.fn(async () => ({ success: true })), + }; +} + +function makeGit() { + return { + updateRef: vi.fn(async () => {}), + fastForwardBranch: vi.fn(async () => ({ ok: true })), + showFileAtRef: vi.fn(async () => null), + }; +} + +/** A recording fs fake over a path->text store (writes are read back). */ +function makeFs(initial: Record = {}) { + const store: Record = { ...initial }; + const fs = { + readFile: vi.fn(async (path: string) => { + if (!(path in store)) throw new Error(`no such file: ${path}`); + return store[path]; + }), + writeFile: vi.fn(async (path: string, text: string) => { + store[path] = text; + }), + }; + return { fs, store }; +} + +function deps(client: any, git: any, fs: ReturnType): ApplyPushDeps { + return { + client, + git: git as any, + readFile: fs.fs.readFile, + writeFile: fs.fs.writeFile, + spaceId: SPACE_ID, + }; +} + +function actions(partial: Partial): PushActions { + return { + creates: [], + updates: [], + deletes: [], + renamesMoves: [], + skipped: [], + ...partial, + }; +} + +beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +// === Finding #6 — adopt must NOT clobber an arbitrary duplicate-title sibling === +// The retry-adopt map keys pages by (parentPageId|root, title). When TWO root +// siblings share the title 'Foo', the key collides and the map keeps the FIRST +// (p1). A brand-new untracked 'Foo/Foo.md' (no gitmost_id) then "adopts" p1 and +// pushes its body over it via importPageMarkdown — silently overwriting an +// arbitrary, possibly unrelated, existing page. Desired: a fresh createPage, or +// an ambiguity skip — NEVER a silent overwrite of an existing sibling. +describe('redteam #6 — adopt clobbers wrong duplicate-title sibling', () => { + it('does NOT overwrite an arbitrary duplicate-title sibling (p1) via importPageMarkdown', async () => { + const client = makeClient(); + client.listSpaceTree.mockResolvedValue({ + pages: [ + { id: 'p1', parentPageId: null, title: 'Foo' }, + { id: 'p2', parentPageId: null, title: 'Foo' }, + ], + complete: true, + }); + const git = makeGit(); + // A brand-new local file with NO gitmost_id frontmatter. + const fs = makeFs({ 'Foo/Foo.md': '# Foo\n\nfresh foo body\n' }); + + await applyPushActions( + deps(client, git, fs), + actions({ creates: [{ path: 'Foo/Foo.md' }] }), + ); + + // The wrong sibling must never be overwritten with our body. + const clobberedP1 = client.importPageMarkdown.mock.calls.some( + (c: any[]) => c[0] === 'p1', + ); + expect(clobberedP1).toBe(false); + }); +}); + +// === Finding #12 — new child under new parent must be parented, not put at ROOT === +// creates are applied in path order: 'Proj/Apple.md' (Apple < Proj) BEFORE +// 'Proj/Proj.md'. When Apple is created first, its parent folder-note +// 'Proj/Proj.md' has no gitmost_id yet, so the parent resolves to null and Apple +// is created at the SPACE ROOT instead of under Proj. Desired: the parent page is +// created before its child, so Apple's createPage receives Proj's assigned id. +describe('redteam #12 — new child under new parent placed at ROOT', () => { + it('createPage for Apple receives parentPageId === the id assigned to Proj', async () => { + let seq = 0; + const client = makeClient(); + client.createPage.mockImplementation( + async (title: string) => ({ + data: { id: `id-${++seq}`, title }, + success: true, + }), + ); + const git = makeGit(); + // Both brand-new local files, neither carrying a gitmost_id yet. writeFile + // updates the store so readFile reads back any pageId written during the run. + const fs = makeFs({ + 'Proj/Apple.md': '# Apple\n\napple body\n', + 'Proj/Proj.md': '# Proj\n\nproj body\n', + }); + + await applyPushActions( + deps(client, git, fs), + actions({ + creates: [{ path: 'Proj/Apple.md' }, { path: 'Proj/Proj.md' }], + }), + ); + + const calls = client.createPage.mock.calls; + const results = client.createPage.mock.results; + const projIdx = calls.findIndex((c: any[]) => c[0] === 'Proj'); + const appleIdx = calls.findIndex((c: any[]) => c[0] === 'Apple'); + expect(projIdx).toBeGreaterThanOrEqual(0); + expect(appleIdx).toBeGreaterThanOrEqual(0); + const projId = ((await results[projIdx].value) as any).data.id; + const appleParentPageId = calls[appleIdx][3]; + + // Apple is a child of Proj -> it must be created under Proj, not at ROOT. + expect(appleParentPageId).toBe(projId); + }); +}); diff --git a/packages/git-sync/test/redteam-converter.test.ts b/packages/git-sync/test/redteam-converter.test.ts new file mode 100644 index 00000000..a6f88f68 --- /dev/null +++ b/packages/git-sync/test/redteam-converter.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'vitest'; +// Import the converter DIRECTLY from src (NOT the docmost-client barrel, which +// pulls in collaboration.ts and mutates the global DOM at import time), matching +// the other converter unit tests. markdownToProseMirror is imported for the +// round-trip cases; loading it mutates the global DOM via jsdom (required for +// @tiptap/html's generateJSON under Node) — this is expected. +import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js'; +import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js'; + +const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes }); + +// --------------------------------------------------------------------------- +// #1 editor-ext atoms dropped: the `default` branch (markdown-converter.ts +// ~584-586) collapses unknown atoms to "" by mapping their (empty) children. +// --------------------------------------------------------------------------- +describe('#1 editor-ext atoms dropped', () => { + it('preserves an inline status atom text', () => { + const d = doc({ + type: 'paragraph', + content: [{ type: 'status', attrs: { text: 'Done' } }], + }); + expect(convertProseMirrorToMarkdown(d)).toContain('Done'); + }); + + it('preserves a block htmlEmbed atom', () => { + const d = doc({ type: 'htmlEmbed', attrs: { source: 'hi' } }); + expect(convertProseMirrorToMarkdown(d)).not.toBe(''); + }); + + it('preserves a footnoteReference atom', () => { + const d = doc({ + type: 'paragraph', + content: [{ type: 'footnoteReference', attrs: { id: 'fn1', referenceNumber: 1 } }], + }); + expect(convertProseMirrorToMarkdown(d)).not.toBe(''); + }); +}); + +// --------------------------------------------------------------------------- +// #2 top-level image attrs lost: a top-level image emits markdown ![](src), +// which carries no width/height/align/attachmentId. +// --------------------------------------------------------------------------- +describe('#2 top-level image attrs lost', () => { + it('keeps width through export and re-import', async () => { + const d = doc({ + type: 'image', + attrs: { src: '/files/x.png', width: '320', height: '200', align: 'right', attachmentId: 'a1' }, + }); + const md = convertProseMirrorToMarkdown(d); + expect(md).toContain('320'); + const back = await markdownToProseMirror(md); + expect(back.content[0].attrs.width).toBe('320'); + }); +}); + +// --------------------------------------------------------------------------- +// #3 code-fence corruption: a code block whose TEXT contains a ``` fence must +// be emitted with a wider outer fence so the inner fence survives. +// --------------------------------------------------------------------------- +describe('#3 code-fence corruption', () => { + it('round-trips a code block containing an inner fence', async () => { + const code = '```js\nfoo()\n```'; + const d = doc({ + type: 'codeBlock', + attrs: { language: '' }, + content: [{ type: 'text', text: code }], + }); + const md1 = convertProseMirrorToMarkdown(d); + const back = await markdownToProseMirror(md1); + const md2 = convertProseMirrorToMarkdown(back); + expect(md2).toBe(md1); + }); +}); + +// --------------------------------------------------------------------------- +// #16 depth guard: deep recursion in processNode overflows the stack (today a +// RangeError) instead of being guarded. +// --------------------------------------------------------------------------- +describe('#16 depth guard', () => { + it('does not throw on a deeply nested blockquote doc', () => { + const DEPTH = 50000; + let node: any = { type: 'paragraph', content: [{ type: 'text', text: 'x' }] }; + for (let i = 0; i < DEPTH; i++) { + node = { type: 'blockquote', content: [node] }; + } + const d = doc(node); + expect(() => convertProseMirrorToMarkdown(d)).not.toThrow(); + }); +}); diff --git a/packages/git-sync/test/redteam-layout-title.test.ts b/packages/git-sync/test/redteam-layout-title.test.ts new file mode 100644 index 00000000..3473145d --- /dev/null +++ b/packages/git-sync/test/redteam-layout-title.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, it } from 'vitest'; +import { buildVaultLayout, type PageNode } from '../src/engine/layout.js'; +import { classifyRenameMoves } from '../src/engine/push.js'; +import type { + ClassifyRenameMovesDeps, + MetaSide, + RenameMoveAction, +} from '../src/engine/push.js'; +import type { DocmostMdMeta } from '../src/lib/index.js'; + +// RED-TEAM finding #4 (two facets): +// (a) buildVaultLayout disambiguation is ORDER-DEPENDENT: which of two +// equally-titled root pages keeps the bare stem (and which gets the +// ` ~slugId` suffix) depends purely on input array order. The layout is +// supposed to be a deterministic function of the page SET, so reordering +// the input must not move the suffix onto a different page. +// (b) The page title derived from a DISAMBIGUATED filename ('Report ~a1.md') +// never strips the cosmetic ` ~slugId` suffix, so a pure disambiguation +// file-rename is mis-classified as a real title RENAME that would push the +// suffix ('Report ~a1') back into Docmost as the page's actual title. + +describe('redteam #4a — buildVaultLayout is stable under input reorder', () => { + it('keeps the same stem for page A regardless of input order', () => { + const A: PageNode = { id: 'A', title: 'Report', slugId: 'a1', parentPageId: null }; + const B: PageNode = { id: 'B', title: 'Report', slugId: 'b2', parentPageId: null }; + + const l1 = buildVaultLayout([A, B]); + const l2 = buildVaultLayout([B, A]); + + // Identity (pageId A) must resolve to the same file stem no matter how the + // flat page list happened to be ordered. + expect(l2.get('A')?.stem).toBe(l1.get('A')?.stem); + }); +}); + +describe('redteam #4b — disambiguation suffix is not a title change', () => { + // Mirror production push.ts `titleFromPath` EXACTLY: the synthetic native meta + // sets `title = baseName(path) without ".md"`. This is the real derivation the + // injected `metaAt` carries in `main`. + function titleFromPath(path: string): string { + const slash = path.lastIndexOf('/'); + const base = slash < 0 ? path : path.slice(slash + 1); + return base.endsWith('.md') ? base.slice(0, -3) : base; + } + + function deps(): ClassifyRenameMovesDeps { + const metaAt = (path: string, _side: MetaSide): DocmostMdMeta | null => ({ + version: 1, + title: titleFromPath(path), + pageId: 'p1', + }); + // Same enclosing folder (root) on both sides -> no reparent. + const resolveParentPageId = (_path: string, _side: MetaSide): string | null => null; + return { metaAt, resolveParentPageId }; + } + + it('does NOT emit a rename when only a ~slugId suffix was appended', () => { + // A sibling collision appeared, so the file 'Report.md' was relocated to the + // disambiguated 'Report ~a1.md'. The page TITLE in Docmost is still 'Report'. + const rms: RenameMoveAction[] = [ + { pageId: 'p1', oldPath: 'Report.md', newPath: 'Report ~a1.md' }, + ]; + + const [classified] = classifyRenameMoves(rms, deps()); + + // Desired behaviour: a pure disambiguation file-rename is cosmetic/local and + // must NOT be pushed as a title change. (If any rename WERE emitted it must + // carry the real title 'Report', never the suffixed 'Report ~a1'.) + expect(classified.rename).toBeUndefined(); + }); +}); diff --git a/packages/git-sync/test/redteam-push-cycle.test.ts b/packages/git-sync/test/redteam-push-cycle.test.ts new file mode 100644 index 00000000..496717c1 --- /dev/null +++ b/packages/git-sync/test/redteam-push-cycle.test.ts @@ -0,0 +1,196 @@ +import { describe, expect, it, vi } from 'vitest'; +import { runPush, LAST_PUSHED_REF, DOCMOST_BRANCH } from '../src/engine/push'; +import type { PushDeps } from '../src/engine/push'; +import type { Settings } from '../src/engine/settings'; +import { runCycle, type RunCycleDeps } from '../src/engine/cycle'; +import { serializePageFile } from '../src/lib/page-file'; + +// Red-team confirmations for PR #119 (git-sync). Each test asserts the DESIRED +// behavior, so it FAILS today iff the bug is real. + +function makeSettings(): Settings { + return { + docmostApiUrl: 'https://docmost.example.com', + docmostEmail: 'you@example.com', + docmostPassword: 'secret', + docmostSpaceId: 'space-1', + vaultPath: '/vault', + pollIntervalMs: 15000, + debounceMs: 2000, + logLevel: 'info', + } as Settings; +} + +// --------------------------------------------------------------------------- +// #13 — conflict markers must never reach Docmost (SPEC §9), even when there is +// NO in-progress merge (markers committed on `main` by some other path). The +// push apply reads the body and hands it to importPageMarkdown verbatim; the +// DESIRED behavior is a content scan that prevents a `<<<<<<<` body from being +// pushed. Assert the pushed body does NOT contain a conflict marker. +// --------------------------------------------------------------------------- +function makePushGit(opts: { + changes: { status: 'A' | 'M' | 'D' | 'R' | 'C'; path: string; oldPath?: string }[]; + lastPushed?: string | null; +}) { + const calls = { updateRef: [] as { ref: string; target: string }[] }; + const git: PushDeps['git'] = { + assertGitAvailable: vi.fn(async () => {}), + ensureRepo: vi.fn(async () => {}), + isMergeInProgress: vi.fn(async () => false), // NO merge in progress + checkout: vi.fn(async () => {}), + stageAll: vi.fn(async () => {}), + commit: vi.fn(async () => false), + readRef: vi.fn(async (ref: string) => + ref === LAST_PUSHED_REF ? (opts.lastPushed ?? 'base-sha') : null, + ), + revParse: vi.fn(async (ref: string) => { + if (ref === DOCMOST_BRANCH) return 'doc-sha'; + if (ref === 'main') return 'main-sha'; + return null; + }), + diffNameStatus: vi.fn(async () => opts.changes), + showFileAtRef: vi.fn(async () => null), + updateRef: vi.fn(async (ref: string, target: string) => { + calls.updateRef.push({ ref, target }); + }), + fastForwardBranch: vi.fn(async () => ({ ok: true })), + listTrackedFiles: vi.fn(async () => [] as string[]), + }; + return { git, calls }; +} + +describe('#13 conflict markers reach Docmost', () => { + it('does NOT push a body containing a `<<<<<<< HEAD` conflict marker', async () => { + const conflictBody = + '<<<<<<< HEAD\nmy line\n=======\ntheir line\n>>>>>>> feature\n'; + const file = serializePageFile('p-1', conflictBody); + const { git } = makePushGit({ changes: [{ status: 'M', path: 'Doc.md' }] }); + + const importPageMarkdown = vi.fn(async () => ({ success: true })); + const client = { + listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })), + importPageMarkdown, + createPage: vi.fn(), + deletePage: vi.fn(), + movePage: vi.fn(), + renamePage: vi.fn(), + }; + + const deps: PushDeps = { + settings: makeSettings(), + git, + makeClient: () => client as any, + readFile: vi.fn(async (path: string) => { + if (path === 'Doc.md') return file; + throw new Error(`no such file: ${path}`); + }), + writeFile: vi.fn(async () => {}), + log: () => {}, + }; + + const res = await runPush(deps, { dryRun: false }); + expect(res.mode).toBe('apply'); + + // The body actually sent to Docmost (2nd positional arg is the markdown body). + expect(importPageMarkdown).toHaveBeenCalledTimes(1); + const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any; + + // DESIRED: a content scan gates conflict markers; the body must be clean. + expect(pushedBody).not.toContain('<<<<<<<'); + expect(pushedBody).not.toContain('======='); + expect(pushedBody).not.toContain('>>>>>>>'); + }); +}); + +// --------------------------------------------------------------------------- +// #15 — a divergent `docmost` mirror (fastForwardBranch refuses) is escalated by +// runPush (`divergentDocmost: true`), but runCycle forwards only {mode, failures} +// — the divergence is DROPPED from RunCycleResult. DESIRED: the cycle result +// surfaces the divergence so the caller can act on it. +// --------------------------------------------------------------------------- +function fakeVault(overrides: Record = {}) { + const order: string[] = []; + const rec = + (name: string, ret?: any) => + async (...args: any[]) => { + order.push(args.length ? `${name}:${args.join(',')}` : name); + return ret; + }; + const vault: any = { + order, + assertGitAvailable: rec('assertGitAvailable'), + ensureRepo: rec('ensureRepo'), + isMergeInProgress: vi.fn(async () => false), + ensureBranch: rec('ensureBranch'), + checkout: rec('checkout'), + listTrackedFiles: vi.fn(async () => [] as string[]), + stageAll: rec('stageAll'), + commit: rec('commit', false), + merge: rec('merge', { ok: true, conflict: false, output: '' }), + readRef: vi.fn(async () => null), + revParse: vi.fn(async () => 'main-commit-sha'), + diffNameStatus: vi.fn(async () => [] as any[]), + showFileAtRef: vi.fn(async () => ''), + updateRef: rec('updateRef'), + // The mirror diverged: the ff is REFUSED. runPush escalates this as + // divergentDocmost; the question is whether runCycle surfaces it. + fastForwardBranch: rec('fastForwardBranch', { + ok: false, + reason: 'not-fast-forward', + }), + ...overrides, + }; + return vault; +} + +function baseDeps(vault: any, over: Partial = {}): RunCycleDeps { + return { + spaceId: 'space-1', + client: { + listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })), + getPageJson: vi.fn(), + importPageMarkdown: vi.fn(), + createPage: vi.fn(), + deletePage: vi.fn(), + movePage: vi.fn(), + renamePage: vi.fn(), + listRecentSince: vi.fn(), + listTrash: vi.fn(), + restorePage: vi.fn(), + } as any, + vault, + settings: { vaultPath: '/vault' } as any, + fs: { + readFile: vi.fn(async () => ''), + writeFile: vi.fn(async () => undefined), + mkdir: vi.fn(async () => undefined), + rm: vi.fn(async () => undefined), + }, + log: vi.fn(), + ...over, + }; +} + +describe('#15 divergence dropped by runCycle', () => { + it('surfaces the divergent `docmost` mirror in RunCycleResult', async () => { + const vault = fakeVault(); + const deps = baseDeps(vault); + + const res = await runCycle(deps); + expect(res.ran).toBe(true); + + // The push DID refuse to fast-forward the divergent mirror. + expect(vault.order).toContain( + 'fastForwardBranch:docmost,main-commit-sha', + ); + + // DESIRED: the cycle result surfaces the divergence (some warning/flag), so a + // caller driving runCycle can see the §5 invariant breach without scraping + // logs. Today RunCycleResult.push is only {mode, failures}. + const divergence = + (res as any).divergentDocmost ?? + (res.push as any)?.divergentDocmost ?? + (res as any).warning; + expect(divergence).toBeTruthy(); + }); +}); diff --git a/packages/git-sync/test/schema-surface-snapshot.test.ts b/packages/git-sync/test/schema-surface-snapshot.test.ts index 319899e3..829ee311 100644 --- a/packages/git-sync/test/schema-surface-snapshot.test.ts +++ b/packages/git-sync/test/schema-surface-snapshot.test.ts @@ -77,10 +77,14 @@ const expectedSurface: SurfaceEntry[] = [ { name: "drawio", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "size", "src", "title", "width"] }, { name: "embed", kind: "node", attrs: ["align", "height", "provider", "src", "width"] }, { name: "excalidraw", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "size", "src", "title", "width"] }, + { name: "footnoteDefinition", kind: "node", attrs: ["id"] }, + { name: "footnoteReference", kind: "node", attrs: ["id"] }, + { name: "footnotesList", kind: "node", attrs: [] }, { name: "hardBreak", kind: "node", attrs: [] }, { name: "heading", kind: "node", attrs: ["id", "indent", "level", "textAlign"] }, { name: "highlight", kind: "mark", attrs: ["color"] }, { name: "horizontalRule", kind: "node", attrs: [] }, + { name: "htmlEmbed", kind: "node", attrs: ["height", "source"] }, { name: "image", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "placeholder", "size", "src", "title", "width"] }, { name: "italic", kind: "mark", attrs: [] }, { name: "link", kind: "mark", attrs: ["class", "href", "internal", "rel", "target", "title"] }, @@ -90,8 +94,10 @@ const expectedSurface: SurfaceEntry[] = [ { name: "mention", kind: "node", attrs: ["anchorId", "creatorId", "entityId", "entityType", "id", "label", "slugId"] }, { name: "orderedList", kind: "node", attrs: ["start", "type"] }, { name: "pageBreak", kind: "node", attrs: [] }, + { name: "pageEmbed", kind: "node", attrs: ["sourcePageId"] }, { name: "paragraph", kind: "node", attrs: ["id", "indent", "textAlign"] }, { name: "pdf", kind: "node", attrs: ["attachmentId", "height", "name", "placeholder", "size", "src", "width"] }, + { name: "status", kind: "node", attrs: ["color", "text"] }, { name: "strike", kind: "mark", attrs: [] }, { name: "subpages", kind: "node", attrs: [] }, { name: "subscript", kind: "mark", attrs: [] }, @@ -104,6 +110,8 @@ const expectedSurface: SurfaceEntry[] = [ { name: "taskList", kind: "node", attrs: [] }, { name: "text", kind: "node", attrs: [] }, { name: "textStyle", kind: "mark", attrs: ["color"] }, + { name: "transclusionReference", kind: "node", attrs: ["sourcePageId", "transclusionId"] }, + { name: "transclusionSource", kind: "node", attrs: ["id"] }, { name: "underline", kind: "mark", attrs: [] }, { name: "video", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "placeholder", "size", "src", "width"] }, { name: "youtube", kind: "node", attrs: ["align", "height", "src", "width"] },