diff --git a/apps/server/src/collaboration/collaboration.handler.git-sync.spec.ts b/apps/server/src/collaboration/collaboration.handler.git-sync.spec.ts index 2aadfbde..a9a7bcea 100644 --- a/apps/server/src/collaboration/collaboration.handler.git-sync.spec.ts +++ b/apps/server/src/collaboration/collaboration.handler.git-sync.spec.ts @@ -147,6 +147,86 @@ describe('CollaborationHandler.gitSyncWriteBody (owner-routed body write)', () = ]); }); + it('FLUSHES the pending debounced store BEFORE merging so an in-flight edit survives (finding #2)', async () => { + // QA #119 finding #2: the 3-way merge must run against the latest live-doc + // state. A concurrent UI edit that is still in-flight (the store is debounced) + // must be drained into the live doc BEFORE git merges, or git clean-applies and + // the edit is silently dropped — even on a DIFFERENT block. Model the drain via + // the pending-store flush: when it runs, the in-flight block-0 edit lands. + const shared = new Y.Doc(); + const frag = shared.getXmlFragment('default'); + shared.transact(() => { + frag.insert( + 0, + [ + { text: 'alpha', id: 'p1' }, + { text: 'beta', id: 'p2' }, + ].map((s) => { + const el = new Y.XmlElement('paragraph'); + el.setAttribute('id', s.id); + const t = new Y.XmlText(); + t.insert(0, s.text); + el.insert(0, [t]); + return el; + }), + ); + }); + + const order: string[] = []; + const debouncer = { + isDebounced: jest.fn(() => true), + executeNow: jest.fn(async () => { + order.push('flush'); + // The in-flight client edit to block 0 only lands once the pending store + // is flushed (i.e. the event loop is drained) — BEFORE the merge. + shared.transact(() => + ((frag.get(0) as Y.XmlElement).get(0) as Y.XmlText).insert(5, ' EDIT'), + ); + }), + }; + const openDirectConnection = jest.fn(async () => ({ + transact: async (fn: (doc: Y.Doc) => void) => { + order.push('merge'); + fn(shared); + }, + disconnect: jest.fn(async () => undefined), + })); + const hocuspocus = { openDirectConnection, debouncer } as any; + + const handler = new CollaborationHandler(); + await handler.getHandlers(hocuspocus).gitSyncWriteBody('page.x', { + prosemirrorJson: pmDoc('alpha', 'beta2'), // git changes block 1 + baseProsemirrorJson: pmDoc('alpha', 'beta'), + userId: 'svc-user', + }); + + // The flush ran, and it ran BEFORE the merge transaction. + expect(debouncer.executeNow).toHaveBeenCalledTimes(1); + expect(order).toEqual(['flush', 'merge']); + // Both the in-flight block-0 edit and git's block-1 change survive — the + // pre-flush bug would have produced ['alpha', 'beta2'] (UI edit dropped). + expect(texts(shared.getXmlFragment('default'))).toEqual([ + 'alpha EDIT', + 'beta2', + ]); + }); + + it('does not flush when no store is pending (isDebounced false)', async () => { + const { hocuspocus, shared } = fakeHocuspocus([{ text: 'a', id: 'p1' }]); + const executeNow = jest.fn(); + (hocuspocus as any).debouncer = { + isDebounced: jest.fn(() => false), + executeNow, + }; + const handler = new CollaborationHandler(); + await handler.getHandlers(hocuspocus).gitSyncWriteBody('page.x', { + prosemirrorJson: pmDoc('a', 'b'), + userId: 'svc-user', + }); + expect(executeNow).not.toHaveBeenCalled(); + expect(texts(shared.getXmlFragment('default'))).toEqual(['a', 'b']); + }); + it('crash-safe: a transform failure never opens the connection or mutates the live doc', async () => { const { hocuspocus, shared } = fakeHocuspocus([{ text: 'alpha', id: 'p1' }]); const before = texts(shared.getXmlFragment('default')); diff --git a/apps/server/src/collaboration/collaboration.handler.ts b/apps/server/src/collaboration/collaboration.handler.ts index f4e4d34a..5d56150e 100644 --- a/apps/server/src/collaboration/collaboration.handler.ts +++ b/apps/server/src/collaboration/collaboration.handler.ts @@ -158,6 +158,24 @@ export class CollaborationHandler { ) : null; + // CONCURRENT-EDIT FLUSH (QA #119, finding #2). The 3-way merge below runs + // against the LIVE Y.Doc, so a concurrent UI edit is only preserved if it + // is already part of that doc. A user's edit is debounced before it lands + // (the editor batches; the collab store is debounced up to 10s), so the + // merge could otherwise run against a PRE-EDIT doc: git would then + // clean-apply (no same-block conflict detected) and the in-flight UI edit + // — even on a DIFFERENT block — would be silently dropped. + // + // Flushing the pending debounced store here (a) drains the event loop so a + // just-arrived client Yjs update is applied to the live doc BEFORE we + // merge, and (b) persists the live doc so the merge baseline is current + // even on the doc-reload-from-DB path. After the flush the merge sees the + // latest state, so an edit on a different block is MERGED (not overwritten) + // and a genuine same-block edit is detected as a conflict -> the + // boundary-snapshot in PersistenceExtension pins it to page history + // (recoverable) instead of vanishing silently. + await this.flushPendingStore(hocuspocus, documentName); + // actor:'git-sync' + the service user flow into PersistenceExtension // (lastUpdatedSource='git-sync', lastUpdatedById=userId). await this.withYdocConnection( @@ -195,6 +213,33 @@ export class CollaborationHandler { }; } + /** + * Flush any pending DEBOUNCED store for `documentName` so the live Y.Doc and the + * DB are current BEFORE a git-sync merge reads them (QA #119, finding #2 — + * concurrent UI edit silently lost). Mirrors the PersistenceExtension.onDisconnect + * flush: only acts when a store is actually pending (`isDebounced`), runs the + * SAME scheduled payload (`executeNow`, preserving the edit's context/actor), and + * never throws — a flush failure must not abort the git-sync write. Awaiting it + * also drains the event loop, so a client Yjs update sitting in the socket buffer + * is applied to the live doc before the merge transaction runs. + */ + private async flushPendingStore( + hocuspocus: Hocuspocus, + documentName: string, + ): Promise { + const debounceId = `onStoreDocument-${documentName}`; + try { + const debouncer = (hocuspocus as any)?.debouncer; + if (!debouncer?.isDebounced?.(debounceId)) return; + await debouncer.executeNow(debounceId); + } catch (err) { + this.logger.warn( + `git-sync pre-merge flush failed for ${documentName}: ` + + (err instanceof Error ? err.message : String(err)), + ); + } + } + async withYdocConnection( hocuspocus: Hocuspocus, documentName: string, diff --git a/apps/server/src/collaboration/merge/yjs-body-merge.callout.spec.ts b/apps/server/src/collaboration/merge/yjs-body-merge.callout.spec.ts index 4204cf7e..844ba2a1 100644 --- a/apps/server/src/collaboration/merge/yjs-body-merge.callout.spec.ts +++ b/apps/server/src/collaboration/merge/yjs-body-merge.callout.spec.ts @@ -152,8 +152,18 @@ describe('git-sync callout type fidelity (QA "callout type -> [!info]")', () => }); } + it('maps a known GitHub/Obsidian alias to the editor banner (tip -> success)', async () => { + // `tip` is not a schema callout type — it is an input alias the editor itself + // maps onto the supported set (GITHUB_ALERT_TYPE_MAP: tip -> success). git-sync + // mirrors that so the ingest lands on the closest banner instead of flatly info. + const content = editorPage('tip'); + const gitContent = await gitRoundTrip(content); + const co = gitContent.find((b: any) => b.type === 'callout'); + expect(co?.attrs?.type).toBe('success'); + }); + it('flattens a genuinely unknown callout type to info', async () => { - const content = editorPage('tip'); // not an editor-canonical type + const content = editorPage('banana'); // not a type and not a known alias const gitContent = await gitRoundTrip(content); const co = gitContent.find((b: any) => b.type === 'callout'); expect(co?.attrs?.type).toBe('info'); diff --git a/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts b/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts index b229cc62..fe68fd4e 100644 --- a/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts +++ b/apps/server/src/integrations/git-sync/http/git-http.service.spec.ts @@ -46,7 +46,10 @@ interface Built { abilityFactory: { createForUser: AnyMock }; abilityCan: AnyMock; vaultRegistry: { ensureServable: AnyMock }; - orchestrator: { ingestExternalPush: AnyMock }; + orchestrator: { + ingestExternalPush: AnyMock; + serveReadAdvertisement: AnyMock; + }; backend: { run: AnyMock }; } @@ -88,7 +91,14 @@ function build(opts: BuildOptions = {}): Built { }; const vaultRegistry = { ensureServable: jest.fn(async () => undefined) }; - const orchestrator = { ingestExternalPush: jest.fn(async () => undefined) }; + const orchestrator = { + ingestExternalPush: jest.fn(async () => undefined), + // The read-advertisement wrapper pins HEAD under the lock then serves; the + // mock just runs the serve callback so the read path still hits backend.run. + serveReadAdvertisement: jest.fn( + async (_spaceId: string, serve: () => Promise) => serve(), + ), + }; const backend = { run: jest.fn(async () => undefined) }; const service = new GitHttpService( @@ -231,6 +241,48 @@ describe('GitHttpService.handle', () => { expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled(); }); + it('upload-pack ref advertisement is served HEAD-pinned via serveReadAdvertisement (bug #3)', async () => { + // GET info/refs?service=git-upload-pack carries the HEAD symref a clone reads + // for its default branch, so it must be served with HEAD pinned to `main` + // (under the lock) — not streamed raw — or a clone racing a mid-pull cycle + // would default to the read-only `docmost` mirror. + const built = build({ abilityCan: true }); + const { reply } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/info/refs?service=git-upload-pack', + method: 'GET', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + expect(built.orchestrator.serveReadAdvertisement).toHaveBeenCalledTimes(1); + expect(built.orchestrator.serveReadAdvertisement.mock.calls[0][0]).toBe( + 'space-1', + ); + // The wrapper still streams the backend (the mock runs the serve callback). + expect(built.backend.run).toHaveBeenCalledTimes(1); + expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled(); + }); + + it('a POST git-upload-pack pack fetch streams directly (no HEAD-pin needed, resolved by SHA)', async () => { + // The pack negotiation is object-SHA based; only the ref advertisement carries + // the HEAD symref, so the pack POST streams the backend directly (no lock). + const built = build({ abilityCan: true }); + const { reply } = fakeReply(); + const req = fakeRequest({ + url: '/git/space-1.git/git-upload-pack', + method: 'POST', + authorization: basic('dev@example.com', 'pw'), + }); + + await built.service.handle(req, reply); + + expect(built.orchestrator.serveReadAdvertisement).not.toHaveBeenCalled(); + expect(built.backend.run).toHaveBeenCalledTimes(1); + expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled(); + }); + it('cloud deployment resolves the workspace by the host subdomain', async () => { const built = build({ selfHosted: false }); const { reply } = fakeReply(); diff --git a/apps/server/src/integrations/git-sync/http/git-http.service.ts b/apps/server/src/integrations/git-sync/http/git-http.service.ts index 2cfcdd33..b1967bfa 100644 --- a/apps/server/src/integrations/git-sync/http/git-http.service.ts +++ b/apps/server/src/integrations/git-sync/http/git-http.service.ts @@ -376,7 +376,25 @@ export class GitHttpService implements OnModuleDestroy { const isReceivePack = req.method === 'POST' && parsedPath.subpath === 'git-receive-pack'; if (serviceKind === 'read' || !isReceivePack) { - await this.backend.run(backendRequest, rawReq, rawRes); + // The clone's default branch comes from the HEAD symref advertised by the + // upload-pack ref advertisement (or a dumb `GET HEAD`). The engine + // transiently checks out the read-only `docmost` mirror mid-cycle, so serve + // THAT advertisement with HEAD pinned to `main` under the per-space lock so + // a clone never defaults to `docmost` (bug #3). Pack streaming and every + // other read are resolved by object SHA and need no pin, so they stream + // directly (no lock) as before. + const isReadAdvertise = + req.method === 'GET' && + ((parsedPath.subpath === 'info/refs' && + service === 'git-upload-pack') || + parsedPath.subpath === 'HEAD'); + if (isReadAdvertise) { + await this.orchestrator.serveReadAdvertisement(spaceId, () => + this.backend.run(backendRequest, rawReq, rawRes), + ); + } else { + await this.backend.run(backendRequest, rawReq, rawRes); + } return; } diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts index 94094bc3..1d73e98f 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts @@ -118,6 +118,7 @@ function build(opts: BuildOptions = {}): Built { ensureBranch: jest.fn(async () => undefined), checkout: jest.fn(async () => undefined), listTrackedFiles: jest.fn(async () => []), + pinHeadToMain: jest.fn(async () => undefined), ...(vaultOverrides as Record), }; const vaultRegistry = { @@ -380,6 +381,11 @@ describe('GitSyncOrchestrator', () => { expect(order).toEqual(['receive-pack', 'cycle']); }); + // Explicit timeout: ingestExternalPush exhausts the full bounded + // acquire-retry budget (GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS = 5_000ms) before it + // gives up and throws, which races jest's DEFAULT 5_000ms test timeout — flaky + // on a loaded/slow runner. Give it headroom so it deterministically observes + // the eventual LockHeldError instead of timing out first. it('throws GitSyncLockHeldError and does NOT run the receive-pack when the lock is held', async () => { const built = build(); built.redis.set.mockResolvedValue(null); // acquire fails → lock-held @@ -392,7 +398,7 @@ describe('GitSyncOrchestrator', () => { // We must never write to the working tree concurrently with a cycle. expect(runReceivePack).not.toHaveBeenCalled(); expect(runCycleMock).not.toHaveBeenCalled(); - }); + }, 15_000); it('swallows a post-push cycle error (the push is durable; poll retries)', async () => { jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined); @@ -444,6 +450,37 @@ describe('GitSyncOrchestrator', () => { }); }); + describe('serveReadAdvertisement (bug #3 — stable advertised HEAD)', () => { + it('pins HEAD to main and serves under the space lock', async () => { + const built = build(); + const serve = jest.fn(async () => undefined); + + await built.orchestrator.serveReadAdvertisement('space-1', serve); + + // The lock was taken (redis SET NX) and released (CAS eval). + expect(built.redis.set).toHaveBeenCalledTimes(1); + expect(built.redis.eval).toHaveBeenCalled(); + // HEAD pinned BEFORE serving, on the right vault. + expect(built.vaultRegistry.getVault).toHaveBeenCalledWith('space-1'); + expect(built.vault.pinHeadToMain).toHaveBeenCalledTimes(1); + expect(serve).toHaveBeenCalledTimes(1); + const pinOrder = built.vault.pinHeadToMain.mock.invocationCallOrder[0]; + const serveOrder = serve.mock.invocationCallOrder[0]; + expect(pinOrder).toBeLessThan(serveOrder); + }); + + it('serves WITHOUT a pin/lock when git-sync is globally disabled', async () => { + const built = build({ enabled: false }); + const serve = jest.fn(async () => undefined); + + await built.orchestrator.serveReadAdvertisement('space-1', serve); + + expect(serve).toHaveBeenCalledTimes(1); + expect(built.redis.set).not.toHaveBeenCalled(); + expect(built.vault.pinHeadToMain).not.toHaveBeenCalled(); + }); + }); + describe('module lifecycle', () => { it('registers exactly one interval on init and tears it down idempotently on destroy', () => { const built = build(); diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts index b00cf58c..f9f8cf13 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.ts @@ -305,6 +305,53 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy { } } + /** + * Serve a git smart-HTTP READ ADVERTISEMENT (`GET info/refs?service=git-upload-pack` + * or a dumb `GET HEAD`) with the repo's symbolic `HEAD` deterministically pinned + * to `main` (bug #3). The advertised `HEAD` symref decides a clone's default + * branch; the engine transiently checks out the read-only `docmost` mirror during + * a cycle, so an unsynchronized advertisement could route a clone to `docmost` + * (~1/4 of clones under continuous syncing). + * + * Running the pin + the advertisement under the SAME per-space lock the cycle + * uses guarantees no cycle is mid-flight while we pin (HEAD cannot flap) and that + * the pin never corrupts a cycle's checkout. The advertisement is cheap (a ref + * listing, no pack stream), so holding the lock for it is fine. A bounded + * retry-acquire absorbs a brief overlap with a cycle; if the lock still cannot be + * taken (a long cycle), we fall back to serving WITHOUT the pin — the cycle's + * finally-restore leaves HEAD on `main` between cycles, so the advertisement is + * still almost always correct (degrades only under sustained contention). + */ + async serveReadAdvertisement( + spaceId: string, + serve: () => Promise, + ): Promise { + if (!this.environmentService.isGitSyncEnabled()) { + await serve(); + return; + } + const result = await this.spaceLock.withSpaceLock( + spaceId, + async () => { + const vault = await this.vaultRegistry.getVault(spaceId); + await vault.pinHeadToMain(); + await serve(); + }, + { + acquireRetry: { + timeoutMs: GIT_SYNC_PUSH_LOCK_RETRY_TOTAL_MS, + baseMs: GIT_SYNC_PUSH_LOCK_RETRY_BASE_MS, + maxMs: GIT_SYNC_PUSH_LOCK_RETRY_MAX_MS, + }, + }, + ); + // Lock contended for the whole budget (in-progress / another replica): serve + // anyway. `serve` (backend.run) never ran inside the lock in this case. + if (typeof result === 'object' && result !== null && 'skipped' in result) { + await serve(); + } + } + /** * Drive ONE reconcile cycle for a space. The PULL->PUSH branch choreography * lives in the engine's `runCycle` (so it can never drift from the engine it diff --git a/packages/git-sync/src/engine/cycle.ts b/packages/git-sync/src/engine/cycle.ts index 9f01ed5b..00e17c28 100644 --- a/packages/git-sync/src/engine/cycle.ts +++ b/packages/git-sync/src/engine/cycle.ts @@ -1,4 +1,4 @@ -import { VaultGit } from "./git.js"; +import { VaultGit, DEFAULT_BRANCH } from "./git.js"; import { GitSyncClient } from "./client.types.js"; import { Settings } from "./settings.js"; import { readExisting, computePullActions, applyPullActions } from "./pull.js"; @@ -142,67 +142,87 @@ export async function runCycle(deps: RunCycleDeps): Promise { } } - // 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring). - await vault.ensureBranch("docmost", "main"); - await vault.checkout("docmost"); + try { + // 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring). + await vault.ensureBranch("docmost", "main"); + await vault.checkout("docmost"); - // 4. PULL -------------------------------------------------------------------- - const existing = await readExisting({ - listTracked: () => vault.listTrackedFiles("*.md"), - readFile: (relPath) => safeFs.readFile(abs(relPath)), - }); + // 4. PULL ------------------------------------------------------------------ + const existing = await readExisting({ + listTracked: () => vault.listTrackedFiles("*.md"), + readFile: (relPath) => safeFs.readFile(abs(relPath)), + }); - const tree = await client.listSpaceTree(spaceId); - const pullActions = computePullActions({ - pages: tree.pages, - treeComplete: tree.complete, - existing, - }); + const tree = await client.listSpaceTree(spaceId); + const pullActions = computePullActions({ + pages: tree.pages, + treeComplete: tree.complete, + existing, + }); - // Bail before the first destructive write phase if the lock was lost. - signal?.throwIfAborted(); + // Bail before the first destructive write phase if the lock was lost. + signal?.throwIfAborted(); - const pullResult = await applyPullActions( - { - client, + const pullResult = await applyPullActions( + { + client, + git: vault, + writeFile: (absPath, text) => safeFs.writeFile(absPath, text), + mkdir: (absDir) => safeFs.mkdir(absDir), + rm: (absPath) => safeFs.rm(absPath), + log, + }, + pullActions, + vaultRoot, + ); + + // 5. PUSH ------------------------------------------------------------------ + const pushDeps = { + settings, git: vault, - writeFile: (absPath, text) => safeFs.writeFile(absPath, text), - mkdir: (absDir) => safeFs.mkdir(absDir), - rm: (absPath) => safeFs.rm(absPath), + makeClient: () => client, + readFile: (relPath: string) => safeFs.readFile(abs(relPath)), + writeFile: (relPath: string, text: string) => + safeFs.writeFile(abs(relPath), text), log, - }, - pullActions, - vaultRoot, - ); + }; - // 5. PUSH -------------------------------------------------------------------- - const pushDeps = { - settings, - git: vault, - makeClient: () => client, - readFile: (relPath: string) => safeFs.readFile(abs(relPath)), - writeFile: (relPath: string, text: string) => safeFs.writeFile(abs(relPath), text), - log, - }; + // Bail before pushing to Docmost if the lock was lost during pull. + signal?.throwIfAborted(); - // Bail before pushing to Docmost if the lock was lost during pull. - signal?.throwIfAborted(); + const pushResult = await runPush(pushDeps, { dryRun: false }); - const pushResult = await runPush(pushDeps, { dryRun: false }); - - return { - ran: true, - pull: { - written: pullResult.written, - deleted: pullResult.deleted, - conflict: pullResult.merge.conflict, - }, - push: { - mode: pushResult.mode, - failures: pushResult.failures?.length ?? 0, - }, - // Forward a divergent-`docmost` escalation so the caller can act on the §5 - // invariant breach without scraping logs (red-team #15). - divergentDocmost: pushResult.divergentDocmost ?? false, - }; + return { + ran: true, + pull: { + written: pullResult.written, + deleted: pullResult.deleted, + conflict: pullResult.merge.conflict, + }, + push: { + mode: pushResult.mode, + failures: pushResult.failures?.length ?? 0, + }, + // Forward a divergent-`docmost` escalation so the caller can act on the §5 + // invariant breach without scraping logs (red-team #15). + divergentDocmost: pushResult.divergentDocmost ?? false, + }; + } finally { + // STABLE SERVED HEAD (bug #3). The pull transiently checks out the read-only + // `docmost` mirror, and the smart-HTTP host advertises whatever HEAD resolves + // to — so a clone racing a cycle could default to `docmost`. The happy path + // already ends on `main` (runPush), but a throw mid-pull would leave HEAD on + // `docmost`; restore it here so the advertised default branch is `main` BETWEEN + // cycles. Best-effort: skipped if the lock was lost (do not write the working + // tree after a possible takeover), and a failing checkout (e.g. a dirty tree + // from an aborted write) is swallowed — the next cycle's recovery resyncs and + // the read advertisement pins HEAD under the lock regardless. + if (!signal?.aborted) { + try { + await vault.checkout(DEFAULT_BRANCH); + } catch { + /* best-effort: next cycle recovers; advertisement pins HEAD under lock */ + } + } + } } diff --git a/packages/git-sync/src/engine/git.ts b/packages/git-sync/src/engine/git.ts index 9c029235..39d52255 100644 --- a/packages/git-sync/src/engine/git.ts +++ b/packages/git-sync/src/engine/git.ts @@ -683,6 +683,43 @@ export class VaultGit { if (r.code !== 0) return null; return r.stdout; } + + /** + * Read ONE side of a conflicted file from the merge index (`git show :N:path`), + * where the stage `N` is the standard 3-way merge slot: + * 1 = merge BASE (common ancestor), 2 = OURS (the current branch = `main`), + * 3 = THEIRS (the merged-in branch = `docmost`). + * Returns the blob text, or `null` when that stage is absent (e.g. an add/add + * conflict has no base, a modify/delete conflict has only one content side). + * + * Used by the pull cycle (SPEC §9) to RESOLVE a conflicted docmost->main merge + * deterministically instead of committing raw conflict markers onto the + * published `main`: a conflict whose two sides differ ONLY in trailing/empty + * lines is SPURIOUS (normalize -> identical -> clean), and a genuine conflict is + * resolved to a clean side (no `<<<<<<<`/`>>>>>>>` markers ever reach `main`). + */ + async showStage(stage: 1 | 2 | 3, path: string): Promise { + const r = await this.runRaw(["show", `:${stage}:${path}`]); + if (r.code !== 0) return null; + return r.stdout; + } + + /** + * Pin the repo's symbolic `HEAD` to `main` WITHOUT touching the working tree or + * index (`git symbolic-ref HEAD refs/heads/main`). The smart-HTTP host advertises + * whatever `HEAD` resolves to as the clone's default branch, so a clone that + * races a cycle mid-pull (when the engine has transiently checked out the + * read-only `docmost` mirror) would otherwise default to `docmost`. Pinning HEAD + * back to the canonical writable branch makes the advertised symref deterministic. + * + * symbolic-ref only rewrites `.git/HEAD`; it does NOT move the working tree, so + * it must only ever run when the working tree is ALREADY on `main` (between + * cycles / under the per-space lock with no cycle in flight) — otherwise HEAD and + * the index would desync. Callers serialize this with the engine via the lock. + */ + async pinHeadToMain(): Promise { + await this.run(["symbolic-ref", "HEAD", `refs/heads/${DEFAULT_BRANCH}`]); + } } /** diff --git a/packages/git-sync/src/engine/pull.ts b/packages/git-sync/src/engine/pull.ts index 771735bd..b541c67a 100644 --- a/packages/git-sync/src/engine/pull.ts +++ b/packages/git-sync/src/engine/pull.ts @@ -65,6 +65,26 @@ function relToAbs(vaultRoot: string, relPath: string): string { return [vaultRoot, ...relPath.split("/")].join("/"); } +/** + * Canonicalize a file's TRAILING whitespace: drop any trailing blank / + * whitespace-only lines (and trailing spaces on the last line) and end with + * exactly one newline; an empty body becomes a single "\n". This matches + * `serializePageFile`'s trailing form (`body.trim()` + a single "\n"). + * + * Why (SPEC §9 spurious-conflict fix): the engine writes pages in their + * normalize-on-write form (one trailing newline), but a user can push a `.md` to + * `main` with EXTRA trailing/empty lines (e.g. a double-blank-line append). When + * the docmost mirror (normalized) and `main` (raw) both change near end-of-file, + * git's line-based 3-way merge reports a CONFLICT even though the only difference + * is trailing blank lines. Normalizing BOTH sides before comparing collapses that + * difference to nothing, so the pull cycle can recognize the conflict as SPURIOUS + * and resolve it cleanly instead of committing raw conflict markers onto `main`. + */ +function normalizeTrailingWhitespace(text: string): string { + const body = text.replace(/[\s]+$/, ""); + return body.length > 0 ? `${body}\n` : "\n"; +} + /** Convert an absolute/relative segment list under the vault to a relPath. */ function segmentsToRelPath(segments: string[], stem: string): string { return [...segments, `${stem}.md`].join("/"); @@ -226,6 +246,7 @@ export interface ApplyPullActionsDeps { | "merge" | "listUnmergedPaths" | "commitMerge" + | "showStage" >; /** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */ writeFile: (absPath: string, text: string) => Promise; @@ -249,10 +270,13 @@ export interface ApplyResult { committed: boolean; merge: { ok: boolean; conflict: boolean; output: string }; /** - * Vault-relative paths of the page(s) that CONFLICTED in the docmost -> main - * merge and were committed WITH conflict markers (so the rest of the space - * keeps syncing — SPEC §9 wedge fix). Empty on a clean merge. The push side - * isolates these (per-page failure when `autoMergeConflicts` is off). + * Vault-relative paths of the page(s) that had a GENUINE same-block conflict in + * the docmost -> main merge and were AUTO-RESOLVED to the git/main side (git + * wins, SPEC §9) — committed CLEAN, never with raw conflict markers. Empty on a + * clean merge AND when the only conflicts were spurious trailing-whitespace + * differences (those are normalized, not reported). Surfaced for logging / + * /status visibility; the docmost-side content stays recoverable via the + * `docmost` branch + page history. */ conflictedPaths: string[]; } @@ -422,32 +446,88 @@ export async function applyPullActions( // Merge docmost -> main. A CONFLICT must NOT wedge the whole space (the // reported bug: ONE same-line conflict on ONE page froze sync for EVERY page // in both directions because the next cycle's `isMergeInProgress` check kept - // skipping the entire space). So instead of leaving the vault mid-merge, we - // COMMIT the conflicted merge with markers in place (SPEC §9 wedge fix): the - // cleanly-merged pages land, the conflicted page carries its markers on `main` - // and is isolated by the push side (a per-page failure when `autoMergeConflicts` - // is off — the markers never reach Docmost), and the NEXT cycle is NOT wedged. - // Recovery: resolve the markers in git; the next push then sends the clean body. + // skipping the entire space). It must ALSO never commit raw `<<<<<<<`/`>>>>>>>` + // markers onto the published `main` (round-1 round-2: external clones would see + // the markers AND the body re-conflicts every cycle while git and Docmost + // silently diverge). So on a conflict we RESOLVE each conflicted file to a + // clean, marker-free form and commit that (SPEC §9): + // + // - SPURIOUS conflict — the ROOT CAUSE of the leak: the two sides differ ONLY + // in trailing/empty-line normalization (the engine writes one trailing + // newline; a user pushed extra blank lines). Once both sides are + // `normalizeTrailingWhitespace`d they are IDENTICAL, so this is no real + // conflict at all: write the normalized form. Content stays in sync; git + // and the page never diverge. + // - GENUINE same-block conflict: resolve to OURS (the `main`/git side), so git + // wins the published branch — mirroring the live-doc 3-way "git wins" rule. + // The docmost-side content is preserved on the `docmost` branch and remains + // recoverable via page history; the next push carries git's body to Docmost, + // so both sides converge. No markers ever reach `main`. await git.checkout(DEFAULT_BRANCH); const merge = await git.merge(DOCMOST_BRANCH); let conflictedPaths: string[] = []; + let mergeResult = merge; if (merge.conflict) { - conflictedPaths = await git.listUnmergedPaths(); + const unmerged = await git.listUnmergedPaths(); + const genuine: string[] = []; + for (const rel of unmerged) { + const ours = await git.showStage(2, rel); // main side + const theirs = await git.showStage(3, rel); // docmost side + if ( + ours !== null && + theirs !== null && + normalizeTrailingWhitespace(ours) === normalizeTrailingWhitespace(theirs) + ) { + // SPURIOUS: identical once trailing/empty-line normalization is applied. + // Commit the canonical (normalized) form — no conflict, no markers. + await deps.writeFile( + relToAbs(vaultRoot, rel), + normalizeTrailingWhitespace(theirs), + ); + } else { + // GENUINE conflict: resolve to the non-null side (OURS preferred so git + // wins the published branch; THEIRS kept when OURS is absent — e.g. a + // modify/delete conflict — to avoid dropping the remaining content). If + // BOTH are null (delete/delete) leave it; commitMerge's `git add -A` + // stages the deletion. + genuine.push(rel); + const resolved = ours ?? theirs; + if (resolved !== null) { + await deps.writeFile(relToAbs(vaultRoot, rel), resolved); + } + } + } + conflictedPaths = genuine; await git.commitMerge( - `docmost: sync with unresolved conflict in ${conflictedPaths.length} page(s)`, + genuine.length > 0 + ? `docmost: sync, ${genuine.length} page(s) auto-resolved (git wins, SPEC §9)` + : `docmost: sync (trailing-whitespace conflicts normalized, SPEC §9)`, { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL, trailers: [SOURCE_TRAILER], }, ); - log( - `pull: merge of docmost -> main CONFLICTED on ${conflictedPaths.length} ` + - `page(s): ${conflictedPaths.join(", ")}. Committed the merge WITH ` + - `conflict markers so the rest of the space keeps syncing (SPEC §9). The ` + - `conflicted page(s) are isolated on push (markers never reach Docmost); ` + - `resolve the markers in git to recover.`, - ); + // The committed tree is CLEAN (every conflicted file was overwritten with a + // marker-free resolution). `conflict` now reflects only the GENUINE conflicts + // that were auto-resolved (git won); a merge that conflicted ONLY on trailing + // whitespace is reported as clean so /status does not cry wolf. + mergeResult = { ok: true, conflict: genuine.length > 0, output: merge.output }; + if (genuine.length > 0) { + log( + `pull: merge of docmost -> main had ${genuine.length} GENUINE conflict(s) ` + + `auto-resolved to the git/main side (git wins, SPEC §9): ` + + `${genuine.join(", ")}. NO conflict markers were written to main; the ` + + `docmost-side content is on the 'docmost' branch and recoverable via ` + + `page history, and the next push reconciles Docmost to the git body.`, + ); + } else { + log( + `pull: merge of docmost -> main conflicted ONLY on trailing/empty-line ` + + `normalization (${unmerged.length} file(s)) — auto-normalized, no ` + + `markers, content stays in sync (SPEC §9 spurious-conflict fix).`, + ); + } } else if (!merge.ok) { log(`pull: merge of docmost -> main failed: ${merge.output}`); } @@ -459,7 +539,7 @@ export async function applyPullActions( deleted, failed, committed, - merge, + merge: mergeResult, conflictedPaths, }; } diff --git a/packages/git-sync/src/lib/docmost-schema.ts b/packages/git-sync/src/lib/docmost-schema.ts index 304a7c92..a511e8f4 100644 --- a/packages/git-sync/src/lib/docmost-schema.ts +++ b/packages/git-sync/src/lib/docmost-schema.ts @@ -59,12 +59,43 @@ function getStyleProperty(element: HTMLElement, propertyName: string): string | * `[!note]` / `[!default]` callout authored in the editor would come back as * `[!info]` after a git sync (the QA "callout type -> [!info]" fidelity loss). * `note` and `default` were previously absent and so were being flattened. + * + * The editor SCHEMA genuinely only supports these six banner types — there is no + * `tip`/`caution`/`important`/`question` callout node. So those are NOT first- + * class types we can round-trip literally; they are INPUT ALIASES (GitHub/Obsidian + * alert syntax). The editor's own paste/import path maps them onto the supported + * set (see `GITHUB_ALERT_TYPE_MAP` in + * `@docmost/editor-ext` markdown/utils/github-callout.marked.ts: + * tip -> success, caution -> danger, important -> info). We mirror that aliasing + * here so an ingested `> [!tip]` / `> [!caution]` lands on the closest real banner + * (success / danger) instead of flatly collapsing to `info` — matching exactly how + * the editor itself would interpret the same alias. A schema type always maps to + * itself first (idempotent round-trip); the alias map only rewrites NON-schema + * names; anything still unknown falls back to `info`. */ const CALLOUT_TYPES = ["default", "info", "note", "success", "warning", "danger"]; -export const clampCalloutType = (value: string | null | undefined): string => - value && CALLOUT_TYPES.includes(value.toLowerCase()) - ? value.toLowerCase() - : "info"; +/** + * NON-schema callout aliases -> their closest supported banner. Mirrors the + * editor's `GITHUB_ALERT_TYPE_MAP` for the names that are NOT already schema + * types (a schema type is preserved as-is and never consulted here). Keeping + * these in lockstep means git-sync ingest and an editor paste interpret the same + * `> [!alias]` identically. + */ +const CALLOUT_TYPE_ALIASES: Record = { + tip: "success", + caution: "danger", + important: "info", +}; +export const clampCalloutType = (value: string | null | undefined): string => { + if (!value) return "info"; + const lower = value.toLowerCase(); + // A real schema type round-trips to itself (idempotent). + if (CALLOUT_TYPES.includes(lower)) return lower; + // A known GitHub/Obsidian alias maps to the editor's closest banner. + if (CALLOUT_TYPE_ALIASES[lower]) return CALLOUT_TYPE_ALIASES[lower]; + // Anything else is collapsed to the safe default (matches the editor). + return "info"; +}; /** * Allowlist guard for CSS color values imported from HTML. diff --git a/packages/git-sync/test/apply-pull-actions.test.ts b/packages/git-sync/test/apply-pull-actions.test.ts index c4d07096..6651f9e6 100644 --- a/packages/git-sync/test/apply-pull-actions.test.ts +++ b/packages/git-sync/test/apply-pull-actions.test.ts @@ -44,12 +44,24 @@ function makeClient(opts?: { failFor?: Set }) { } /** A git fake recording the order of ops; merge result is configurable. */ -function makeGit(merge: { ok: boolean; conflict: boolean; output?: string } = { - ok: true, - conflict: false, -}) { +function makeGit( + merge: { ok: boolean; conflict: boolean; output?: string } = { + ok: true, + conflict: false, + }, + conflictStages?: { + unmerged?: string[]; + /** path -> { ours, theirs } blob content for showStage(2|3, path). */ + stages?: Record; + }, +) { const order: string[] = []; let committedSubject: string | undefined; + const unmerged = conflictStages?.unmerged ?? ['Conflicted.md']; + // Default stages: genuinely-different ours/theirs (a real same-block conflict). + const stages = conflictStages?.stages ?? { + 'Conflicted.md': { ours: 'git side\n', theirs: 'docmost side\n' }, + }; const git = { stageAll: vi.fn(async () => { order.push('stageAll'); @@ -66,7 +78,12 @@ function makeGit(merge: { ok: boolean; conflict: boolean; output?: string } = { order.push('merge'); return { ok: merge.ok, conflict: merge.conflict, output: merge.output ?? '' }; }), - listUnmergedPaths: vi.fn(async () => ['Conflicted.md']), + listUnmergedPaths: vi.fn(async () => unmerged), + showStage: vi.fn(async (stage: 1 | 2 | 3, path: string) => { + const s = stages[path]; + if (!s) return null; + return stage === 2 ? s.ours : stage === 3 ? s.theirs : null; + }), commitMerge: vi.fn(async (subject: string) => { order.push(`commitMerge:${subject}`); }), @@ -407,13 +424,22 @@ describe('applyPullActions — commit subject reflects ACTUAL counts', () => { }); describe('applyPullActions — merge result is surfaced, not swallowed', () => { - it('COMMITS a conflicting merge with markers (no wedge) and surfaces conflictedPaths', async () => { - // Regression for the WEDGE bug (QA #119): a conflicting docmost -> main merge - // must NOT be left mid-merge (which wedged the whole space). It is committed - // WITH markers so the rest of the space keeps syncing; the conflicted page is - // surfaced in `conflictedPaths` and isolated by the push side. + it('GENUINE conflict: auto-resolves to OURS (git wins), no markers, surfaces conflictedPaths', async () => { + // QA #119 round-2: a genuine same-block docmost -> main conflict must NOT be + // committed with raw markers onto `main` (external clones would see them and + // the body re-conflicts forever). It is auto-resolved to the git/main side + // (git wins, SPEC §9), the conflicted page is surfaced in `conflictedPaths`, + // and the merge is committed CLEAN (no wedge). const { client } = makeClient(); - const g = makeGit({ ok: false, conflict: true, output: 'CONFLICT' }); + const g = makeGit( + { ok: false, conflict: true, output: 'CONFLICT' }, + { + unmerged: ['Conflicted.md'], + stages: { + 'Conflicted.md': { ours: 'git wins body\n', theirs: 'docmost body\n' }, + }, + }, + ); const fs = makeFs(); const res = await applyPullActions( @@ -421,14 +447,55 @@ describe('applyPullActions — merge result is surfaced, not swallowed', () => { actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }), VAULT, ); + // A genuine conflict was detected and auto-resolved (git won): reported as a + // (now-clean) committed merge with the conflicting page surfaced. expect(res.merge.conflict).toBe(true); - expect(res.merge.ok).toBe(false); - // The merge was COMMITTED (vault no longer mid-merge) and the bad page named. - expect(g.git.commitMerge).toHaveBeenCalledTimes(1); + expect(res.merge.ok).toBe(true); expect(res.conflictedPaths).toEqual(['Conflicted.md']); + // The conflicted file was rewritten with OURS (git side) — NO markers. + const resolved = fs.writes.find((w) => w.abs === '/vault/Conflicted.md'); + expect(resolved?.text).toBe('git wins body\n'); + expect(resolved?.text).not.toContain('<<<<<<<'); + expect(resolved?.text).not.toContain('>>>>>>>'); + // The merge was COMMITTED (vault no longer mid-merge). + expect(g.git.commitMerge).toHaveBeenCalledTimes(1); expect(g.order.some((o) => o.startsWith('commitMerge:'))).toBe(true); }); + it('SPURIOUS conflict (trailing-blank only): normalizes clean, NOT reported as a conflict', async () => { + // Root-cause fix: when the two sides differ ONLY in trailing/empty lines (the + // normalize-on-write form vs a user's blank-line append), the conflict is + // spurious — both normalize to the same text. It is resolved to the normalized + // form (no markers) and NOT counted as a conflict (so /status does not cry wolf). + const { client } = makeClient(); + const g = makeGit( + { ok: false, conflict: true, output: 'CONFLICT' }, + { + unmerged: ['Trailing.md'], + stages: { + // Same content; OURS has a double-blank-line append, THEIRS is normalized. + 'Trailing.md': { ours: 'Hello world\n\n\n', theirs: 'Hello world\n' }, + }, + }, + ); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }), + VAULT, + ); + // No GENUINE conflict — reported clean. + expect(res.merge.conflict).toBe(false); + expect(res.merge.ok).toBe(true); + expect(res.conflictedPaths).toEqual([]); + // The file was rewritten to the canonical normalized form (single trailing \n). + const resolved = fs.writes.find((w) => w.abs === '/vault/Trailing.md'); + expect(resolved?.text).toBe('Hello world\n'); + // Still committed (clears the merge), but as a clean merge. + expect(g.git.commitMerge).toHaveBeenCalledTimes(1); + }); + it('returns ok:false conflict:false on a non-conflict merge failure', async () => { const { client } = makeClient(); const g = makeGit({ ok: false, conflict: false, output: 'some error' }); diff --git a/packages/git-sync/test/docmost-schema-attrs.test.ts b/packages/git-sync/test/docmost-schema-attrs.test.ts index b89fe6a4..fb7a3591 100644 --- a/packages/git-sync/test/docmost-schema-attrs.test.ts +++ b/packages/git-sync/test/docmost-schema-attrs.test.ts @@ -77,8 +77,20 @@ describe('clampCalloutType', () => { expect(clampCalloutType('success')).toBe('success'); }); + it('maps GitHub/Obsidian alert ALIASES to the editor banner (not flatly info)', () => { + // The editor schema has no tip/caution/important callout node — they are input + // aliases the editor's own paste path maps onto the supported set + // (GITHUB_ALERT_TYPE_MAP in editor-ext). git-sync mirrors that aliasing so an + // ingested `> [!tip]` / `> [!caution]` lands on the closest real banner instead + // of collapsing everything to `info`. + expect(clampCalloutType('tip')).toBe('success'); + expect(clampCalloutType('TIP')).toBe('success'); + expect(clampCalloutType('caution')).toBe('danger'); + expect(clampCalloutType('important')).toBe('info'); + }); + it('falls back to "info" for genuinely unknown types', () => { - expect(clampCalloutType('tip')).toBe('info'); + expect(clampCalloutType('question')).toBe('info'); expect(clampCalloutType('banana')).toBe('info'); }); diff --git a/packages/git-sync/test/head-advertise.test.ts b/packages/git-sync/test/head-advertise.test.ts new file mode 100644 index 00000000..4fce23c5 --- /dev/null +++ b/packages/git-sync/test/head-advertise.test.ts @@ -0,0 +1,97 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { + VaultGit, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, +} from '../src/engine/git'; + +/** + * QA #119 bug #3 — the smart-HTTP host advertises whatever `HEAD` resolves to as + * a clone's default branch. The engine transiently checks out the read-only + * `docmost` mirror during a cycle, so a clone racing a cycle could default to + * `docmost`. `VaultGit.pinHeadToMain()` pins the symref back to `main` so the + * advertised HEAD is deterministic. Verified against a REAL temp git repo, + * including the actual `git upload-pack --advertise-refs` HEAD symref capability + * a clone reads. Skips gracefully if git is unavailable. + */ + +const execFileAsync = promisify(execFile); + +async function gitAvailable(): Promise { + try { + await execFileAsync('git', ['--version']); + return true; + } catch { + return false; + } +} + +describe('VaultGit.pinHeadToMain — advertised HEAD is stably main (real git)', () => { + let available = false; + let dir: string; + + beforeAll(async () => { + available = await gitAvailable(); + }); + + afterEach(async () => { + if (dir) await rm(dir, { recursive: true, force: true }); + }); + + async function headSymref(vault: string): Promise { + const { stdout } = await execFileAsync( + 'git', + ['symbolic-ref', '--short', 'HEAD'], + { cwd: vault }, + ); + return stdout.trim(); + } + + /** The HEAD symref a clone would read from `git upload-pack --advertise-refs`. */ + async function advertisedHead(vault: string): Promise { + const { stdout } = await execFileAsync( + 'git', + ['upload-pack', '--advertise-refs', vault], + { cwd: vault }, + ); + // protocol v0/v2 advertise `symref=HEAD:refs/heads/` in the caps. + const m = stdout.match(/symref=HEAD:refs\/heads\/([^\s\0]+)/); + return m ? m[1] : null; + } + + it('pins HEAD back to main after the engine checked out docmost', async () => { + if (!available) return; + dir = await mkdtemp(join(tmpdir(), 'docmost-head-')); + const git = new VaultGit(dir); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + await writeFile(join(dir, 'A.md'), 'hello\n', 'utf8'); + await git.stageAll(); + await git.commit('seed', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + // Keep docmost reachable as a real branch ref. + await execFileAsync('git', ['branch', '-f', 'docmost', 'main'], { cwd: dir }); + + // Simulate a cycle mid-pull: the engine checks out the read-only mirror. + await git.checkout('docmost'); + expect(await headSymref(dir)).toBe('docmost'); + expect(await advertisedHead(dir)).toBe('docmost'); // the bug, pre-pin + + // Pin: the advertised default branch must be `main` again. + await git.pinHeadToMain(); + expect(await headSymref(dir)).toBe('main'); + expect(await advertisedHead(dir)).toBe('main'); + + // Idempotent: pinning when already on main is a clean no-op. + await git.pinHeadToMain(); + expect(await headSymref(dir)).toBe('main'); + expect(await advertisedHead(dir)).toBe('main'); + }); +}); diff --git a/packages/git-sync/test/markdown-to-prosemirror-gaps.test.ts b/packages/git-sync/test/markdown-to-prosemirror-gaps.test.ts index ebec6e9d..7fdf29e9 100644 --- a/packages/git-sync/test/markdown-to-prosemirror-gaps.test.ts +++ b/packages/git-sync/test/markdown-to-prosemirror-gaps.test.ts @@ -307,23 +307,33 @@ describe('import: highlight/textStyle color sanitization (parseHTML)', () => { }); // --------------------------------------------------------------------------- -// Spec 2. Importing an unsupported callout fence clamps the type to 'info'. +// Spec 2. Importing a non-schema callout fence resolves the type via the editor's +// alias map (known GitHub/Obsidian aliases) or clamps to 'info' (unknown). // -// preprocessCallouts emits div[data-type=callout][data-callout-type=tip]; the -// schema's Callout.type parseHTML pipes 'tip' through clampCalloutType, which -// maps the unknown type to the 'info' default. End-to-end import-side clamp. +// preprocessCallouts emits div[data-type=callout][data-callout-type=]; the +// schema's Callout.type parseHTML pipes it through clampCalloutType. A known alias +// (`tip`) maps to the editor's banner (`success`); a genuinely unknown type +// (`banana`) clamps to the 'info' default. End-to-end import-side resolution. // --------------------------------------------------------------------------- -describe('import: unsupported callout fence clamps type to info', () => { - it("imports ':::tip' as a callout whose attrs.type === 'info'", async () => { +describe('import: non-schema callout fence resolves via alias map / clamps to info', () => { + it("imports ':::tip' as a callout whose attrs.type === 'success' (alias)", async () => { const doc = await markdownToProseMirror(':::tip\nhello\n:::'); const callouts = findAll(doc, 'callout'); expect(callouts).toHaveLength(1); - expect(callouts[0].attrs.type).toBe('info'); + expect(callouts[0].attrs.type).toBe('success'); // The body paragraph survived inside the callout. expect(allText(callouts[0])).toContain('hello'); const paras = findAll(callouts[0], 'paragraph'); expect(paras.length).toBeGreaterThanOrEqual(1); }); + + it("imports ':::banana' (unknown) as a callout whose attrs.type === 'info'", async () => { + const doc = await markdownToProseMirror(':::banana\nhello\n:::'); + const callouts = findAll(doc, 'callout'); + expect(callouts).toHaveLength(1); + expect(callouts[0].attrs.type).toBe('info'); + expect(allText(callouts[0])).toContain('hello'); + }); }); // --------------------------------------------------------------------------- diff --git a/packages/git-sync/test/pull-conflict-normalize.test.ts b/packages/git-sync/test/pull-conflict-normalize.test.ts new file mode 100644 index 00000000..f5ef239d --- /dev/null +++ b/packages/git-sync/test/pull-conflict-normalize.test.ts @@ -0,0 +1,199 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, readFile, rm, writeFile, mkdir } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { + VaultGit, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, +} from '../src/engine/git'; +import { applyPullActions, type PullActions } from '../src/engine/pull'; + +/** + * QA #119 round-2 — the docmost -> main merge must NEVER commit raw conflict + * markers onto the published `main` (external clones would see them and the body + * re-conflicts every cycle while git and the DB silently diverge). These run + * against a REAL temp git repo: + * + * 1. SPURIOUS conflict (the root cause): two sides that differ ONLY in + * trailing/empty lines (normalize-on-write vs a user's blank-line append) + * must NOT conflict — they auto-normalize, no markers, and stay in sync over + * repeated cycles. + * 2. GENUINE same-block conflict: still must not leak raw markers into `main` + * (auto-resolved to the git/main side; the docmost side stays recoverable on + * the `docmost` branch). + * + * Skips gracefully if git is unavailable. + */ + +const execFileAsync = promisify(execFile); + +async function gitAvailable(): Promise { + try { + await execFileAsync('git', ['--version']); + return true; + } catch { + return false; + } +} + +/** PullActions with everything empty except the given overrides. */ +function actions(partial: Partial = {}): PullActions { + return { + toWrite: [], + moved: [], + toDelete: [], + deletionDecision: { apply: true }, + existingCount: 0, + plannedDeleteCount: 0, + ...partial, + }; +} + +/** Real-fs/real-git deps for applyPullActions (no client calls when toWrite empty). */ +function realDeps(git: VaultGit) { + return { + client: { + getPageJson: async () => { + throw new Error('getPageJson should not be called in these tests'); + }, + }, + git, + writeFile: async (abs: string, text: string) => { + await writeFile(abs, text, 'utf8'); + }, + mkdir: async (abs: string) => { + await mkdir(abs, { recursive: true }); + }, + rm: async (abs: string) => { + await rm(abs, { force: true }); + }, + log: () => {}, + }; +} + +const PAGE = (body: string) => `---\ngitmost_id: p1\n---\n\n${body}`; + +describe('pull merge — spurious vs genuine conflict (real git)', () => { + let available = false; + let dir: string; + + beforeAll(async () => { + available = await gitAvailable(); + }); + + afterEach(async () => { + if (dir) await rm(dir, { recursive: true, force: true }); + }); + + async function commitOn(git: VaultGit, subject: string): Promise { + await git.stageAll(); + await git.commit(subject, { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + } + + /** + * Build a repo where `main` and `docmost` have DIVERGED from a shared base on + * the SAME file, so `applyPullActions`'s docmost -> main merge does a real + * 3-way merge. `ours`/`theirs`/`base` are the file BODIES for main/docmost/base. + */ + async function divergedRepo(opts: { + base: string; + ours: string; + theirs: string; + }): Promise<{ vault: string; git: VaultGit; file: string }> { + dir = await mkdtemp(join(tmpdir(), 'docmost-conflict-')); + const git = new VaultGit(dir); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + const file = 'Doc.md'; + + // base commit on main, then re-fork docmost from it (merge-base = base). + await writeFile(join(dir, file), PAGE(opts.base), 'utf8'); + await commitOn(git, 'base'); + await execFileAsync('git', ['branch', '-f', 'docmost', 'main'], { cwd: dir }); + + // docmost side. + await git.checkout('docmost'); + await writeFile(join(dir, file), PAGE(opts.theirs), 'utf8'); + await commitOn(git, 'docmost: change'); + + // main side (diverges from base too -> a real 3-way merge, not a ff). + await git.checkout('main'); + await writeFile(join(dir, file), PAGE(opts.ours), 'utf8'); + await commitOn(git, 'local: change'); + + // The cycle calls applyPullActions while on `docmost`. + await git.checkout('docmost'); + return { vault: dir, git, file }; + } + + it('SPURIOUS: a trailing-blank-only diff does NOT conflict, no markers, stays in sync', async () => { + if (!available) return; + // base ends "World\n\n", main appends another blank, docmost normalizes to one. + const { vault, git, file } = await divergedRepo({ + base: 'World\n\n', + ours: 'World\n\n\n', + theirs: 'World\n', + }); + + const res = await applyPullActions(realDeps(git), actions(), vault); + + // No GENUINE conflict reported. + expect(res.merge.conflict).toBe(false); + expect(res.merge.ok).toBe(true); + expect(res.conflictedPaths).toEqual([]); + // The vault is not wedged mid-merge. + expect(await git.isMergeInProgress()).toBe(false); + + // `main` carries the clean normalized body — NO conflict markers. + const onMain = await readFile(join(vault, file), 'utf8'); + expect(onMain).not.toContain('<<<<<<<'); + expect(onMain).not.toContain('======='); + expect(onMain).not.toContain('>>>>>>>'); + expect(onMain).toContain('World'); + + // A SECOND identical pull cycle is a clean no-op (git and content stay in + // sync — no re-conflict, no churn). docmost is now an ancestor of main. + await git.checkout('docmost'); + const res2 = await applyPullActions(realDeps(git), actions(), vault); + expect(res2.merge.conflict).toBe(false); + expect(res2.conflictedPaths).toEqual([]); + const onMain2 = await readFile(join(vault, file), 'utf8'); + expect(onMain2).not.toContain('<<<<<<<'); + }); + + it('GENUINE: a same-block content conflict does NOT leak raw markers into main', async () => { + if (!available) return; + const { vault, git, file } = await divergedRepo({ + base: 'Original line\n', + ours: 'Edited by GIT\n', + theirs: 'Edited by DOCMOST\n', + }); + + const res = await applyPullActions(realDeps(git), actions(), vault); + + // A genuine conflict is detected + auto-resolved (git wins) — reported, clean. + expect(res.merge.conflict).toBe(true); + expect(res.merge.ok).toBe(true); + expect(res.conflictedPaths).toEqual([file]); + expect(await git.isMergeInProgress()).toBe(false); + + const onMain = await readFile(join(vault, file), 'utf8'); + // CARDINAL invariant: no raw conflict markers ever on the published main. + expect(onMain).not.toContain('<<<<<<<'); + expect(onMain).not.toContain('======='); + expect(onMain).not.toContain('>>>>>>>'); + // Git/main side won the published branch. + expect(onMain).toContain('Edited by GIT'); + expect(onMain).not.toContain('Edited by DOCMOST'); + + // The docmost side stays recoverable on the `docmost` branch. + const onDocmost = await git.showFileAtRef('docmost', file); + expect(onDocmost).toContain('Edited by DOCMOST'); + }); +});