fix(git-sync): red-team hardening — 12 confirmed sync-breaking bugs + regression tests
A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML <img> (like video/diagrams) when it carries layout attrs; bare images stay . Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -54,23 +54,54 @@ export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
|
||||
}
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
// Resolve each node's display name once, deterministically. The bucket key is
|
||||
// the node's parent ONLY when that parent is actually present in `byId`;
|
||||
// otherwise (null parent, or an orphan whose parent is outside the input set)
|
||||
// the node buckets at `"__root__"`. This is critical: orphans land at the vault
|
||||
// root (see `folderSegmentsFor`), so they MUST share the root bucket with real
|
||||
// root pages to be disambiguated against each other here — making `nameById`
|
||||
// final before any `segments` are computed, so no ancestor name can drift.
|
||||
const parentKeyOf = (p: PageNode): string =>
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
// Group nodes by (parentKey, sanitized base title) so sibling collisions are
|
||||
// resolved by a STABLE rule that does NOT depend on input array order. Dedupe
|
||||
// ids (first occurrence wins, matching `byId`).
|
||||
const siblingGroups = new Map<string, PageNode[]>();
|
||||
const namedIds = new Set<string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey =
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
if (!p || !p.id || namedIds.has(p.id)) continue;
|
||||
namedIds.add(p.id);
|
||||
const key = `${parentKeyOf(p)}\u0000${sanitizeTitle(p.title ?? "")}`;
|
||||
const bucket = siblingGroups.get(key);
|
||||
if (bucket) bucket.push(p);
|
||||
else siblingGroups.set(key, [p]);
|
||||
}
|
||||
// Assign each node its display name. Within a colliding group, sort the
|
||||
// siblings by their stable disambiguation key (`slugId` else `id`) and let the
|
||||
// FIRST keep the bare sanitized title; every OTHER gets the ` ~<slugId>`
|
||||
// suffix. This makes `nameById` a pure function of the page SET — reordering
|
||||
// the input never moves the suffix onto a different page (red-team #4a). The
|
||||
// suffix is itself sanitized (the slugId/id is untrusted and must never inject
|
||||
// a path separator).
|
||||
const nameById = new Map<string, string>();
|
||||
const disambKeyOf = (p: PageNode): string => p.slugId ?? p.id;
|
||||
for (const bucket of siblingGroups.values()) {
|
||||
const base = sanitizeTitle(bucket[0].title ?? "");
|
||||
if (bucket.length === 1) {
|
||||
nameById.set(bucket[0].id, base);
|
||||
continue;
|
||||
}
|
||||
const sorted = [...bucket].sort((a, b) => {
|
||||
const ka = disambKeyOf(a);
|
||||
const kb = disambKeyOf(b);
|
||||
return ka < kb ? -1 : ka > kb ? 1 : 0;
|
||||
});
|
||||
sorted.forEach((p, i) => {
|
||||
nameById.set(
|
||||
p.id,
|
||||
i === 0 ? base : disambiguate(base, sanitizeTitle(disambKeyOf(p))),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
@@ -169,34 +200,3 @@ export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
return layout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
parentKey: string,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user