fix(git-sync): kill spurious marker-leaking conflict, concurrent-edit loss, flapping HEAD
Three more git-sync QA defects from the 2nd live pass on PR #119, plus a callout-fidelity nit: 1. SPURIOUS conflict leaked raw markers into canonical main (root cause). On an ordinary round-trip the only difference between the docmost mirror (normalize- on-write) and a user's raw push is trailing/empty-line normalization, which made git's line-based docmost->main merge CONFLICT, and the wedge fix then committed the file WITH literal <<<<<<< / ======= / >>>>>>> markers onto main (git and the DB silently diverged for cycles). Fix: on a conflict, normalize trailing/empty lines on BOTH sides (showStage :2:/:3:) before comparing — a trailing-only diff is recognized as spurious and resolved to the clean normalized form. A GENUINE same-block conflict is auto-resolved to OURS (git wins, mirroring the live-doc 3-way rule); the docmost side stays on the `docmost` branch + page history. Raw markers NEVER reach main again. 2. Concurrent UI<->git edit silently lost the UI side. The git->Docmost 3-way merge ran against a live Y.Doc that hadn't yet received the user's debounced in-flight edit, so git clean-applied (no conflict detected) and the edit vanished even on a different block. Fix: flush the pending debounced store before the merge so the in-flight edit is drained into the live doc first — a different-block edit is merged, a same-block one is detected and pinned to history (recoverable). 3. Smart-HTTP HEAD flapped to the read-only `docmost` mirror (~1/4 of clones). The engine transiently checks out `docmost` mid-pull and the host advertises whatever HEAD resolves to. Fix: VaultGit.pinHeadToMain(); the cycle restores HEAD->main in a finally; and the upload-pack ref advertisement is served HEAD-pinned under the per-space lock so it can never observe a mid-cycle HEAD. 4. (callout) clampCalloutType now mirrors the editor's GITHUB_ALERT_TYPE_MAP for non-schema aliases (tip->success, caution->danger, important->info) instead of flatly collapsing to info. The editor schema genuinely supports only the six banner types, so unknown types still fall back to info (by design). Tests: deterministic real-git trailing-blank round-trip (no conflict, no markers, in sync over 2 cycles) + genuine-conflict no-marker-leak; HEAD advertisement stability; pre/post-flush concurrent-edit survival; serveReadAdvertisement lock pin; widened callout-alias coverage. Engine vitest + server tsc + collaboration / git-http / orchestrator specs all green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { VaultGit } from "./git.js";
|
||||
import { VaultGit, DEFAULT_BRANCH } from "./git.js";
|
||||
import { GitSyncClient } from "./client.types.js";
|
||||
import { Settings } from "./settings.js";
|
||||
import { readExisting, computePullActions, applyPullActions } from "./pull.js";
|
||||
@@ -142,67 +142,87 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
|
||||
await vault.ensureBranch("docmost", "main");
|
||||
await vault.checkout("docmost");
|
||||
try {
|
||||
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
|
||||
await vault.ensureBranch("docmost", "main");
|
||||
await vault.checkout("docmost");
|
||||
|
||||
// 4. PULL --------------------------------------------------------------------
|
||||
const existing = await readExisting({
|
||||
listTracked: () => vault.listTrackedFiles("*.md"),
|
||||
readFile: (relPath) => safeFs.readFile(abs(relPath)),
|
||||
});
|
||||
// 4. PULL ------------------------------------------------------------------
|
||||
const existing = await readExisting({
|
||||
listTracked: () => vault.listTrackedFiles("*.md"),
|
||||
readFile: (relPath) => safeFs.readFile(abs(relPath)),
|
||||
});
|
||||
|
||||
const tree = await client.listSpaceTree(spaceId);
|
||||
const pullActions = computePullActions({
|
||||
pages: tree.pages,
|
||||
treeComplete: tree.complete,
|
||||
existing,
|
||||
});
|
||||
const tree = await client.listSpaceTree(spaceId);
|
||||
const pullActions = computePullActions({
|
||||
pages: tree.pages,
|
||||
treeComplete: tree.complete,
|
||||
existing,
|
||||
});
|
||||
|
||||
// Bail before the first destructive write phase if the lock was lost.
|
||||
signal?.throwIfAborted();
|
||||
// Bail before the first destructive write phase if the lock was lost.
|
||||
signal?.throwIfAborted();
|
||||
|
||||
const pullResult = await applyPullActions(
|
||||
{
|
||||
client,
|
||||
const pullResult = await applyPullActions(
|
||||
{
|
||||
client,
|
||||
git: vault,
|
||||
writeFile: (absPath, text) => safeFs.writeFile(absPath, text),
|
||||
mkdir: (absDir) => safeFs.mkdir(absDir),
|
||||
rm: (absPath) => safeFs.rm(absPath),
|
||||
log,
|
||||
},
|
||||
pullActions,
|
||||
vaultRoot,
|
||||
);
|
||||
|
||||
// 5. PUSH ------------------------------------------------------------------
|
||||
const pushDeps = {
|
||||
settings,
|
||||
git: vault,
|
||||
writeFile: (absPath, text) => safeFs.writeFile(absPath, text),
|
||||
mkdir: (absDir) => safeFs.mkdir(absDir),
|
||||
rm: (absPath) => safeFs.rm(absPath),
|
||||
makeClient: () => client,
|
||||
readFile: (relPath: string) => safeFs.readFile(abs(relPath)),
|
||||
writeFile: (relPath: string, text: string) =>
|
||||
safeFs.writeFile(abs(relPath), text),
|
||||
log,
|
||||
},
|
||||
pullActions,
|
||||
vaultRoot,
|
||||
);
|
||||
};
|
||||
|
||||
// 5. PUSH --------------------------------------------------------------------
|
||||
const pushDeps = {
|
||||
settings,
|
||||
git: vault,
|
||||
makeClient: () => client,
|
||||
readFile: (relPath: string) => safeFs.readFile(abs(relPath)),
|
||||
writeFile: (relPath: string, text: string) => safeFs.writeFile(abs(relPath), text),
|
||||
log,
|
||||
};
|
||||
// Bail before pushing to Docmost if the lock was lost during pull.
|
||||
signal?.throwIfAborted();
|
||||
|
||||
// Bail before pushing to Docmost if the lock was lost during pull.
|
||||
signal?.throwIfAborted();
|
||||
const pushResult = await runPush(pushDeps, { dryRun: false });
|
||||
|
||||
const pushResult = await runPush(pushDeps, { dryRun: false });
|
||||
|
||||
return {
|
||||
ran: true,
|
||||
pull: {
|
||||
written: pullResult.written,
|
||||
deleted: pullResult.deleted,
|
||||
conflict: pullResult.merge.conflict,
|
||||
},
|
||||
push: {
|
||||
mode: pushResult.mode,
|
||||
failures: pushResult.failures?.length ?? 0,
|
||||
},
|
||||
// Forward a divergent-`docmost` escalation so the caller can act on the §5
|
||||
// invariant breach without scraping logs (red-team #15).
|
||||
divergentDocmost: pushResult.divergentDocmost ?? false,
|
||||
};
|
||||
return {
|
||||
ran: true,
|
||||
pull: {
|
||||
written: pullResult.written,
|
||||
deleted: pullResult.deleted,
|
||||
conflict: pullResult.merge.conflict,
|
||||
},
|
||||
push: {
|
||||
mode: pushResult.mode,
|
||||
failures: pushResult.failures?.length ?? 0,
|
||||
},
|
||||
// Forward a divergent-`docmost` escalation so the caller can act on the §5
|
||||
// invariant breach without scraping logs (red-team #15).
|
||||
divergentDocmost: pushResult.divergentDocmost ?? false,
|
||||
};
|
||||
} finally {
|
||||
// STABLE SERVED HEAD (bug #3). The pull transiently checks out the read-only
|
||||
// `docmost` mirror, and the smart-HTTP host advertises whatever HEAD resolves
|
||||
// to — so a clone racing a cycle could default to `docmost`. The happy path
|
||||
// already ends on `main` (runPush), but a throw mid-pull would leave HEAD on
|
||||
// `docmost`; restore it here so the advertised default branch is `main` BETWEEN
|
||||
// cycles. Best-effort: skipped if the lock was lost (do not write the working
|
||||
// tree after a possible takeover), and a failing checkout (e.g. a dirty tree
|
||||
// from an aborted write) is swallowed — the next cycle's recovery resyncs and
|
||||
// the read advertisement pins HEAD under the lock regardless.
|
||||
if (!signal?.aborted) {
|
||||
try {
|
||||
await vault.checkout(DEFAULT_BRANCH);
|
||||
} catch {
|
||||
/* best-effort: next cycle recovers; advertisement pins HEAD under lock */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -683,6 +683,43 @@ export class VaultGit {
|
||||
if (r.code !== 0) return null;
|
||||
return r.stdout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read ONE side of a conflicted file from the merge index (`git show :N:path`),
|
||||
* where the stage `N` is the standard 3-way merge slot:
|
||||
* 1 = merge BASE (common ancestor), 2 = OURS (the current branch = `main`),
|
||||
* 3 = THEIRS (the merged-in branch = `docmost`).
|
||||
* Returns the blob text, or `null` when that stage is absent (e.g. an add/add
|
||||
* conflict has no base, a modify/delete conflict has only one content side).
|
||||
*
|
||||
* Used by the pull cycle (SPEC §9) to RESOLVE a conflicted docmost->main merge
|
||||
* deterministically instead of committing raw conflict markers onto the
|
||||
* published `main`: a conflict whose two sides differ ONLY in trailing/empty
|
||||
* lines is SPURIOUS (normalize -> identical -> clean), and a genuine conflict is
|
||||
* resolved to a clean side (no `<<<<<<<`/`>>>>>>>` markers ever reach `main`).
|
||||
*/
|
||||
async showStage(stage: 1 | 2 | 3, path: string): Promise<string | null> {
|
||||
const r = await this.runRaw(["show", `:${stage}:${path}`]);
|
||||
if (r.code !== 0) return null;
|
||||
return r.stdout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pin the repo's symbolic `HEAD` to `main` WITHOUT touching the working tree or
|
||||
* index (`git symbolic-ref HEAD refs/heads/main`). The smart-HTTP host advertises
|
||||
* whatever `HEAD` resolves to as the clone's default branch, so a clone that
|
||||
* races a cycle mid-pull (when the engine has transiently checked out the
|
||||
* read-only `docmost` mirror) would otherwise default to `docmost`. Pinning HEAD
|
||||
* back to the canonical writable branch makes the advertised symref deterministic.
|
||||
*
|
||||
* symbolic-ref only rewrites `.git/HEAD`; it does NOT move the working tree, so
|
||||
* it must only ever run when the working tree is ALREADY on `main` (between
|
||||
* cycles / under the per-space lock with no cycle in flight) — otherwise HEAD and
|
||||
* the index would desync. Callers serialize this with the engine via the lock.
|
||||
*/
|
||||
async pinHeadToMain(): Promise<void> {
|
||||
await this.run(["symbolic-ref", "HEAD", `refs/heads/${DEFAULT_BRANCH}`]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -65,6 +65,26 @@ function relToAbs(vaultRoot: string, relPath: string): string {
|
||||
return [vaultRoot, ...relPath.split("/")].join("/");
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonicalize a file's TRAILING whitespace: drop any trailing blank /
|
||||
* whitespace-only lines (and trailing spaces on the last line) and end with
|
||||
* exactly one newline; an empty body becomes a single "\n". This matches
|
||||
* `serializePageFile`'s trailing form (`body.trim()` + a single "\n").
|
||||
*
|
||||
* Why (SPEC §9 spurious-conflict fix): the engine writes pages in their
|
||||
* normalize-on-write form (one trailing newline), but a user can push a `.md` to
|
||||
* `main` with EXTRA trailing/empty lines (e.g. a double-blank-line append). When
|
||||
* the docmost mirror (normalized) and `main` (raw) both change near end-of-file,
|
||||
* git's line-based 3-way merge reports a CONFLICT even though the only difference
|
||||
* is trailing blank lines. Normalizing BOTH sides before comparing collapses that
|
||||
* difference to nothing, so the pull cycle can recognize the conflict as SPURIOUS
|
||||
* and resolve it cleanly instead of committing raw conflict markers onto `main`.
|
||||
*/
|
||||
function normalizeTrailingWhitespace(text: string): string {
|
||||
const body = text.replace(/[\s]+$/, "");
|
||||
return body.length > 0 ? `${body}\n` : "\n";
|
||||
}
|
||||
|
||||
/** Convert an absolute/relative segment list under the vault to a relPath. */
|
||||
function segmentsToRelPath(segments: string[], stem: string): string {
|
||||
return [...segments, `${stem}.md`].join("/");
|
||||
@@ -226,6 +246,7 @@ export interface ApplyPullActionsDeps {
|
||||
| "merge"
|
||||
| "listUnmergedPaths"
|
||||
| "commitMerge"
|
||||
| "showStage"
|
||||
>;
|
||||
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
|
||||
writeFile: (absPath: string, text: string) => Promise<void>;
|
||||
@@ -249,10 +270,13 @@ export interface ApplyResult {
|
||||
committed: boolean;
|
||||
merge: { ok: boolean; conflict: boolean; output: string };
|
||||
/**
|
||||
* Vault-relative paths of the page(s) that CONFLICTED in the docmost -> main
|
||||
* merge and were committed WITH conflict markers (so the rest of the space
|
||||
* keeps syncing — SPEC §9 wedge fix). Empty on a clean merge. The push side
|
||||
* isolates these (per-page failure when `autoMergeConflicts` is off).
|
||||
* Vault-relative paths of the page(s) that had a GENUINE same-block conflict in
|
||||
* the docmost -> main merge and were AUTO-RESOLVED to the git/main side (git
|
||||
* wins, SPEC §9) — committed CLEAN, never with raw conflict markers. Empty on a
|
||||
* clean merge AND when the only conflicts were spurious trailing-whitespace
|
||||
* differences (those are normalized, not reported). Surfaced for logging /
|
||||
* /status visibility; the docmost-side content stays recoverable via the
|
||||
* `docmost` branch + page history.
|
||||
*/
|
||||
conflictedPaths: string[];
|
||||
}
|
||||
@@ -422,32 +446,88 @@ export async function applyPullActions(
|
||||
// Merge docmost -> main. A CONFLICT must NOT wedge the whole space (the
|
||||
// reported bug: ONE same-line conflict on ONE page froze sync for EVERY page
|
||||
// in both directions because the next cycle's `isMergeInProgress` check kept
|
||||
// skipping the entire space). So instead of leaving the vault mid-merge, we
|
||||
// COMMIT the conflicted merge with markers in place (SPEC §9 wedge fix): the
|
||||
// cleanly-merged pages land, the conflicted page carries its markers on `main`
|
||||
// and is isolated by the push side (a per-page failure when `autoMergeConflicts`
|
||||
// is off — the markers never reach Docmost), and the NEXT cycle is NOT wedged.
|
||||
// Recovery: resolve the markers in git; the next push then sends the clean body.
|
||||
// skipping the entire space). It must ALSO never commit raw `<<<<<<<`/`>>>>>>>`
|
||||
// markers onto the published `main` (round-1 round-2: external clones would see
|
||||
// the markers AND the body re-conflicts every cycle while git and Docmost
|
||||
// silently diverge). So on a conflict we RESOLVE each conflicted file to a
|
||||
// clean, marker-free form and commit that (SPEC §9):
|
||||
//
|
||||
// - SPURIOUS conflict — the ROOT CAUSE of the leak: the two sides differ ONLY
|
||||
// in trailing/empty-line normalization (the engine writes one trailing
|
||||
// newline; a user pushed extra blank lines). Once both sides are
|
||||
// `normalizeTrailingWhitespace`d they are IDENTICAL, so this is no real
|
||||
// conflict at all: write the normalized form. Content stays in sync; git
|
||||
// and the page never diverge.
|
||||
// - GENUINE same-block conflict: resolve to OURS (the `main`/git side), so git
|
||||
// wins the published branch — mirroring the live-doc 3-way "git wins" rule.
|
||||
// The docmost-side content is preserved on the `docmost` branch and remains
|
||||
// recoverable via page history; the next push carries git's body to Docmost,
|
||||
// so both sides converge. No markers ever reach `main`.
|
||||
await git.checkout(DEFAULT_BRANCH);
|
||||
const merge = await git.merge(DOCMOST_BRANCH);
|
||||
let conflictedPaths: string[] = [];
|
||||
let mergeResult = merge;
|
||||
if (merge.conflict) {
|
||||
conflictedPaths = await git.listUnmergedPaths();
|
||||
const unmerged = await git.listUnmergedPaths();
|
||||
const genuine: string[] = [];
|
||||
for (const rel of unmerged) {
|
||||
const ours = await git.showStage(2, rel); // main side
|
||||
const theirs = await git.showStage(3, rel); // docmost side
|
||||
if (
|
||||
ours !== null &&
|
||||
theirs !== null &&
|
||||
normalizeTrailingWhitespace(ours) === normalizeTrailingWhitespace(theirs)
|
||||
) {
|
||||
// SPURIOUS: identical once trailing/empty-line normalization is applied.
|
||||
// Commit the canonical (normalized) form — no conflict, no markers.
|
||||
await deps.writeFile(
|
||||
relToAbs(vaultRoot, rel),
|
||||
normalizeTrailingWhitespace(theirs),
|
||||
);
|
||||
} else {
|
||||
// GENUINE conflict: resolve to the non-null side (OURS preferred so git
|
||||
// wins the published branch; THEIRS kept when OURS is absent — e.g. a
|
||||
// modify/delete conflict — to avoid dropping the remaining content). If
|
||||
// BOTH are null (delete/delete) leave it; commitMerge's `git add -A`
|
||||
// stages the deletion.
|
||||
genuine.push(rel);
|
||||
const resolved = ours ?? theirs;
|
||||
if (resolved !== null) {
|
||||
await deps.writeFile(relToAbs(vaultRoot, rel), resolved);
|
||||
}
|
||||
}
|
||||
}
|
||||
conflictedPaths = genuine;
|
||||
await git.commitMerge(
|
||||
`docmost: sync with unresolved conflict in ${conflictedPaths.length} page(s)`,
|
||||
genuine.length > 0
|
||||
? `docmost: sync, ${genuine.length} page(s) auto-resolved (git wins, SPEC §9)`
|
||||
: `docmost: sync (trailing-whitespace conflicts normalized, SPEC §9)`,
|
||||
{
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
trailers: [SOURCE_TRAILER],
|
||||
},
|
||||
);
|
||||
log(
|
||||
`pull: merge of docmost -> main CONFLICTED on ${conflictedPaths.length} ` +
|
||||
`page(s): ${conflictedPaths.join(", ")}. Committed the merge WITH ` +
|
||||
`conflict markers so the rest of the space keeps syncing (SPEC §9). The ` +
|
||||
`conflicted page(s) are isolated on push (markers never reach Docmost); ` +
|
||||
`resolve the markers in git to recover.`,
|
||||
);
|
||||
// The committed tree is CLEAN (every conflicted file was overwritten with a
|
||||
// marker-free resolution). `conflict` now reflects only the GENUINE conflicts
|
||||
// that were auto-resolved (git won); a merge that conflicted ONLY on trailing
|
||||
// whitespace is reported as clean so /status does not cry wolf.
|
||||
mergeResult = { ok: true, conflict: genuine.length > 0, output: merge.output };
|
||||
if (genuine.length > 0) {
|
||||
log(
|
||||
`pull: merge of docmost -> main had ${genuine.length} GENUINE conflict(s) ` +
|
||||
`auto-resolved to the git/main side (git wins, SPEC §9): ` +
|
||||
`${genuine.join(", ")}. NO conflict markers were written to main; the ` +
|
||||
`docmost-side content is on the 'docmost' branch and recoverable via ` +
|
||||
`page history, and the next push reconciles Docmost to the git body.`,
|
||||
);
|
||||
} else {
|
||||
log(
|
||||
`pull: merge of docmost -> main conflicted ONLY on trailing/empty-line ` +
|
||||
`normalization (${unmerged.length} file(s)) — auto-normalized, no ` +
|
||||
`markers, content stays in sync (SPEC §9 spurious-conflict fix).`,
|
||||
);
|
||||
}
|
||||
} else if (!merge.ok) {
|
||||
log(`pull: merge of docmost -> main failed: ${merge.output}`);
|
||||
}
|
||||
@@ -459,7 +539,7 @@ export async function applyPullActions(
|
||||
deleted,
|
||||
failed,
|
||||
committed,
|
||||
merge,
|
||||
merge: mergeResult,
|
||||
conflictedPaths,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -59,12 +59,43 @@ function getStyleProperty(element: HTMLElement, propertyName: string): string |
|
||||
* `[!note]` / `[!default]` callout authored in the editor would come back as
|
||||
* `[!info]` after a git sync (the QA "callout type -> [!info]" fidelity loss).
|
||||
* `note` and `default` were previously absent and so were being flattened.
|
||||
*
|
||||
* The editor SCHEMA genuinely only supports these six banner types — there is no
|
||||
* `tip`/`caution`/`important`/`question` callout node. So those are NOT first-
|
||||
* class types we can round-trip literally; they are INPUT ALIASES (GitHub/Obsidian
|
||||
* alert syntax). The editor's own paste/import path maps them onto the supported
|
||||
* set (see `GITHUB_ALERT_TYPE_MAP` in
|
||||
* `@docmost/editor-ext` markdown/utils/github-callout.marked.ts:
|
||||
* tip -> success, caution -> danger, important -> info). We mirror that aliasing
|
||||
* here so an ingested `> [!tip]` / `> [!caution]` lands on the closest real banner
|
||||
* (success / danger) instead of flatly collapsing to `info` — matching exactly how
|
||||
* the editor itself would interpret the same alias. A schema type always maps to
|
||||
* itself first (idempotent round-trip); the alias map only rewrites NON-schema
|
||||
* names; anything still unknown falls back to `info`.
|
||||
*/
|
||||
const CALLOUT_TYPES = ["default", "info", "note", "success", "warning", "danger"];
|
||||
export const clampCalloutType = (value: string | null | undefined): string =>
|
||||
value && CALLOUT_TYPES.includes(value.toLowerCase())
|
||||
? value.toLowerCase()
|
||||
: "info";
|
||||
/**
|
||||
* NON-schema callout aliases -> their closest supported banner. Mirrors the
|
||||
* editor's `GITHUB_ALERT_TYPE_MAP` for the names that are NOT already schema
|
||||
* types (a schema type is preserved as-is and never consulted here). Keeping
|
||||
* these in lockstep means git-sync ingest and an editor paste interpret the same
|
||||
* `> [!alias]` identically.
|
||||
*/
|
||||
const CALLOUT_TYPE_ALIASES: Record<string, string> = {
|
||||
tip: "success",
|
||||
caution: "danger",
|
||||
important: "info",
|
||||
};
|
||||
export const clampCalloutType = (value: string | null | undefined): string => {
|
||||
if (!value) return "info";
|
||||
const lower = value.toLowerCase();
|
||||
// A real schema type round-trips to itself (idempotent).
|
||||
if (CALLOUT_TYPES.includes(lower)) return lower;
|
||||
// A known GitHub/Obsidian alias maps to the editor's closest banner.
|
||||
if (CALLOUT_TYPE_ALIASES[lower]) return CALLOUT_TYPE_ALIASES[lower];
|
||||
// Anything else is collapsed to the safe default (matches the editor).
|
||||
return "info";
|
||||
};
|
||||
|
||||
/**
|
||||
* Allowlist guard for CSS color values imported from HTML.
|
||||
|
||||
Reference in New Issue
Block a user