fix(git-sync): close #119 blockers — dead edit-revert guard, cross-space guard, red suite (F5/S2/G1/A1/F7)

F5 (HIGH data-loss): guard #2 (GS-EDIT-REVERT) called a local key-sorting equality that
never matched a real page (block ids + materialized defaults differ), so the guard was
dead and a web edit on a git-sync space was silently reverted within one poll cycle. Use
the package's authoritative docsCanonicallyEqual (strips block id + normalizes
KNOWN_DEFAULTS), wired through the git-sync loader like sanitizeTitle; delete the dead
local canonicalize/canonicalJsonEqual.
S2 (security): importPageMarkdown targeted a page by the vault-file id without a spaceId
check (deletePage had one) — a space-A vault file carrying space-B's page id could
resurrect/overwrite/clear B's page. Mirror deletePage's guard: skip when the loaded page
lives in a different space than ctx.spaceId.
G1 (jest green): add sanitizeTitle + docsCanonicallyEqual to the loadGitSync mock; update
the converter-gate + package golden expectations to the genuinely-fixed output (paragraph
textAlign now round-trips, multi-block table cells emit HTML tables); fix the orchestrator
spec's stale mock so the per-space enabled gate (added later) is satisfied.
A1: the converter dropped heading textAlign on export (bare '## text'); emit a styled
<hN> when aligned, symmetric to paragraphs — round-trips losslessly (level + align), no
churn for unaligned headings.
F7 (docs): reword the false 'single choke point' title-strip comment; correct push.ts
docstrings that still described the removed standalone-CLI/daemon model.

Adds regression tests: the F5 acceptance test (canonically-equal content with real uuids
=> writePageBody NOT called), the S2 cross-space import guard, and the A1 heading
round-trip.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
agent_coder
2026-07-03 00:13:08 +03:00
parent 320b200ac8
commit 5d45f5a85e
9 changed files with 362 additions and 111 deletions
@@ -56,10 +56,19 @@ interface BuildOptions {
vaultOverrides?: Record<string, unknown>;
/**
* The row `buildSettings` reads for the per-space `autoMergeConflicts` flag
* (`executeTakeFirst`). Default: the SAFE off value. Pass `undefined` to model
* a missing row (no space / no settings).
* (`executeTakeFirst` on the `autoMergeConflicts`-aliased query). Default: the
* SAFE off value. Pass `undefined` to model a missing row (no space / no
* settings).
*/
settingsRow?: { autoMergeConflicts: boolean } | undefined;
/**
* The per-space opt-in flag `runOnce` reads via `isSpaceGitSyncEnabled`
* (`executeTakeFirst` on the `enabled`-aliased query — a DIFFERENT query from
* the `autoMergeConflicts` read above; commit c838fdee added it). Default
* `true` so a build() space passes the per-space gate and the cycle proceeds.
* Set `false` to model a space that did NOT opt in (skipped:'space-not-enabled').
*/
spaceEnabled?: boolean;
}
interface Built {
@@ -83,6 +92,7 @@ function build(opts: BuildOptions = {}): Built {
pollIntervalMs = 15000,
debounceMs = 2000,
vaultOverrides = {},
spaceEnabled = true,
} = opts;
// Distinguish "key omitted" (default off row) from "key present but undefined"
// (a deliberately MISSING settings row).
@@ -138,15 +148,36 @@ function build(opts: BuildOptions = {}): Built {
};
const redisService = { getOrThrow: jest.fn(() => redis) };
// Chainable Kysely stub. `buildSettings` reads the space's
// `gitSync.autoMergeConflicts` flag via
// `selectFrom('spaces').select(...).where('id','=',id).executeTakeFirst()`;
// default it to the SAFE off value. `enabledSpaces` uses `.execute()`.
// Chainable Kysely stub. TWO distinct single-row queries run through the same
// builder and both end in `executeTakeFirst`, so the stub must tell them apart
// by the column ALIAS the real code selects (they are otherwise identical):
// - `isSpaceGitSyncEnabled` (per-space opt-in gate in `runOnce`, added by
// c838fdee): `.select(sql\`...->>'enabled' = 'true'\`.as('enabled'))` ->
// the code reads `row?.enabled`. Return `{ enabled: spaceEnabled }`.
// - `buildSettings`: `.select(sql\`...->>'autoMergeConflicts' = 'true'\`
// .as('autoMergeConflicts'))` -> the code reads `row?.autoMergeConflicts`.
// Return `settingsRow` (default SAFE off; `undefined` models a missing row).
// `enabledSpaces` uses `.select([...strings]).execute()` (no alias, no
// executeTakeFirst) and is stubbed/replaced in the tests that exercise it.
const db = (() => {
let lastAlias: string | undefined;
const aliasOf = (sel: unknown): string | undefined => {
try {
const node = (sel as { toOperationNode?: () => any })?.toOperationNode?.();
return node?.kind === 'AliasNode' ? node.alias?.name : undefined;
} catch {
return undefined;
}
};
const builder: any = {
select: () => builder,
select: (sel: unknown) => {
const alias = aliasOf(sel);
if (alias) lastAlias = alias;
return builder;
},
where: () => builder,
executeTakeFirst: async () => settingsRow,
executeTakeFirst: async () =>
lastAlias === 'enabled' ? { enabled: spaceEnabled } : settingsRow,
execute: async () => [],
};
return { selectFrom: () => builder };
@@ -43,10 +43,26 @@ jest.mock('../git-sync.loader', () => ({
type: 'doc',
content: [{ type: 'paragraph' }],
}),
// renamePage funnels the current title through sanitizeTitle to detect the
// sanitized-stem echo; identity is the correct default here (none of the
// rename fixtures use filename-hostile chars, so the sanitized form equals
// the input and the guard never fires).
sanitizeTitle: (title: string) => title,
// importPageMarkdown guard #2 uses docsCanonicallyEqual to skip a no-op
// re-ingest. A key-order-insensitive JSON compare is a sufficient stand-in
// for the unit tests (the real semantic equality is covered by the
// @docmost/git-sync converter tests); returning false for genuinely
// different docs lets the write paths under test proceed.
docsCanonicallyEqual: (a: unknown, b: unknown) =>
JSON.stringify(a) === JSON.stringify(b),
})),
}));
import { GitmostDataSourceService } from './gitmost-datasource.service';
// The loader is mocked above; this binding is the hoisted jest.fn, so a single
// test can swap the runtime bridge (e.g. a smarter `docsCanonicallyEqual`) via
// `mockResolvedValueOnce` without perturbing the default used by every other test.
import { loadGitSync } from '../git-sync.loader';
// Focused unit/contract test for the native GitSyncClient adapter.
// No DB, no real collab server: the repos/services/gateway are mocked and we
@@ -261,6 +277,79 @@ describe('GitmostDataSourceService', () => {
expect(res.updatedAt).toBeUndefined();
});
// F5 acceptance, criterion (b): guard #2 (docsCanonicallyEqual) must SKIP the
// re-ingest when the freshly-parsed doc is canonically equal to the page's
// current DB content even though the two are NOT byte-identical — the DB copy
// carries the real per-block uuids (comments anchor to them) while a fresh
// parse has none. Skipping is what protects a concurrent, not-yet-flushed
// human edit from being clobbered by an idempotent poll re-ingest.
//
// NON-VACUITY: `currentContent` here differs from `doc` in raw JSON (it has an
// `attrs.id` uuid `doc` lacks). The default mock `docsCanonicallyEqual` is a
// plain `JSON.stringify` compare — it would return FALSE for these inputs, so
// if guard #2 were removed (or reverted to a canonicalJsonEqual that does not
// strip ids) writePageBody WOULD be called and this test would fail. We model
// the package's authoritative id-stripping equality (returns TRUE here) by
// swapping in a `docsCanonicallyEqual` that normalizes away block ids, proving
// it is the SEMANTIC equality — not a byte compare — that suppresses the write.
it('does NOT call writePageBody (guard #2) when content is canonically equal despite differing block ids (F5 criterion b)', async () => {
const { service, mocks } = build();
const realUuid = '11111111-1111-4111-8111-111111111111';
// The DB row's content carries a REAL per-block uuid; a fresh parse does not.
// These two docs are canonically equal (id-only difference) but NOT
// byte-identical, so a naive JSON compare treats the page as "changed".
mocks.pageRepo.findById.mockResolvedValue({
id: 'p1',
updatedAt: new Date('2026-06-20T11:00:00.000Z'),
content: {
type: 'doc',
content: [{ type: 'paragraph', attrs: { id: realUuid } }],
},
});
// Swap the runtime bridge for THIS call only: same passthrough parse/convert
// as the default mock (so `doc` is the id-less `{type:'doc',[paragraph]}`),
// but `docsCanonicallyEqual` strips block ids before comparing — the real
// converter's semantics. It returns TRUE for (doc, currentContent) here.
const stripIds = (node: any): any => {
if (Array.isArray(node)) return node.map(stripIds);
if (node && typeof node === 'object') {
const out: any = {};
for (const [k, v] of Object.entries(node)) {
if (k === 'attrs' && v && typeof v === 'object') {
const { id: _id, ...rest } = v as Record<string, unknown>;
if (Object.keys(rest).length) out.attrs = stripIds(rest);
} else {
out[k] = stripIds(v);
}
}
return out;
}
return node;
};
(loadGitSync as jest.Mock).mockResolvedValueOnce({
parseDocmostMarkdown: (md: string) => ({ meta: {}, body: md }),
markdownToProseMirror: async () => ({
type: 'doc',
content: [{ type: 'paragraph' }],
}),
sanitizeTitle: (title: string) => title,
docsCanonicallyEqual: (a: unknown, b: unknown) =>
JSON.stringify(stripIds(a)) === JSON.stringify(stripIds(b)),
});
// No baseMarkdown -> guard #1 (fullMarkdown === baseMarkdown) cannot fire, so
// the outcome is decided purely by guard #2.
const res = await service
.bind(CTX)
.importPageMarkdown('p1', '# Hello\n\nworld');
// Guard #2 fired: the redundant re-ingest is skipped, so the concurrent
// human edit in the live doc is NOT clobbered.
expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled();
// The unchanged page's updatedAt is still surfaced from the DB row.
expect(res.updatedAt).toBe('2026-06-20T11:00:00.000Z');
});
// The 2-way path (no base) is covered above; this exercises the THREE-WAY
// branch that only fires when a `baseMarkdown` is supplied (review #5). The
// merge dispatch itself now lives in the collab handler (gitSyncWriteBody);
@@ -290,6 +379,43 @@ describe('GitmostDataSourceService', () => {
);
});
});
// The cross-space confused-deputy guard (review S2) fires only when
// ctx.spaceId is bound. A vault file in space A can carry space B's pageId;
// without this check the reconciling space could overwrite B's page body — a
// write it has no authority over. When the resolved page already lives in a
// DIFFERENT space, the import is SKIPPED (writePageBody not called) and the
// page's own updatedAt is returned unchanged.
describe('cross-space guard (ctx.spaceId bound, review S2)', () => {
it('does NOT call writePageBody when the target page lives in another space', async () => {
const { service, mocks } = build();
// The resolved page is in space-2, but the reconciling context is space-1.
// Its content DIFFERS from the parsed doc, so guard #2 (docsCanonicallyEqual)
// cannot be what suppresses the write — proving NON-VACUITY: without the S2
// guard the flow would reach writeBody and writePageBody WOULD be called.
mocks.pageRepo.findById.mockResolvedValue({
id: 'p1',
deletedAt: null,
spaceId: 'space-2',
updatedAt: new Date('2026-06-21T09:00:00.000Z'),
content: {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'B body' }] },
],
},
});
const res = await service
.bind(CTX_SPACE)
.importPageMarkdown('p1', '# Hello\n\nworld');
// The cross-space page is preserved: no body write happened.
expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled();
// Early-return shape: only the page's own updatedAt is surfaced.
expect(res).toEqual({ updatedAt: '2026-06-21T09:00:00.000Z' });
});
});
});
describe('createPage', () => {
@@ -46,34 +46,6 @@ const GIT_SYNC_PROVENANCE: AuthProvenanceData = {
aiChatId: null,
};
/**
* Recursively serialize a JSON value with object keys sorted, so two values that
* differ ONLY in key order (or are otherwise structurally identical) compare
* equal. Arrays keep their order (order is meaningful in ProseMirror docs).
* Used to detect a semantically no-op body ingest despite an unstable
* markdown<->ProseMirror round-trip (see importPageMarkdown guard #2).
*/
function canonicalize(value: unknown): unknown {
if (Array.isArray(value)) return value.map(canonicalize);
if (value && typeof value === 'object') {
const out: Record<string, unknown> = {};
for (const key of Object.keys(value as Record<string, unknown>).sort()) {
out[key] = canonicalize((value as Record<string, unknown>)[key]);
}
return out;
}
return value;
}
/** True iff `a` and `b` are equal ignoring object key order. */
function canonicalJsonEqual(a: unknown, b: unknown): boolean {
try {
return JSON.stringify(canonicalize(a)) === JSON.stringify(canonicalize(b));
} catch {
return false;
}
}
/**
* Native, in-process implementation of the engine's `GitSyncClient` seam
* Reads go through repositories (PageRepo/SpaceRepo); body writes go
@@ -222,6 +194,28 @@ export class GitmostDataSourceService {
const currentPage = await this.pageRepo.findById(pageId, {
includeContent: true,
});
// Cross-space confused-deputy guard (review S2). The target `pageId` comes
// from THIS space's vault file frontmatter, but a file in space A could carry
// space B's page id. Without this check that file could resurrect (via
// restorePage), overwrite the body (writeBody), or clear the content of B's
// page — a cross-space write the reconciling space has no authority over.
// Mirror deletePage's guard (same `ctx.spaceId` source, same fail-safe
// direction): when the reconciling space is known and the resolved page
// already lives in a DIFFERENT space, skip — touch nothing. Only applies when
// the page exists; a not-found page (no currentPage) still proceeds as before
// so a legitimate same-space ingest is unaffected.
if (
ctx.spaceId &&
currentPage &&
currentPage.spaceId !== ctx.spaceId
) {
this.logger.log(
`git-sync[${ctx.spaceId}] skip import of page ${pageId}: page lives in space ${currentPage.spaceId} (cross-space vault reference; page preserved)`,
);
return {
updatedAt: new Date(currentPage.updatedAt).toISOString(),
};
}
// Revert-of-delete undelete (review warning). If the target page is currently
// SOFT-DELETED, this ingest is a git-revert that re-added the page's file
// (the push classifier saw an add carrying a known pageId -> UPDATE). Writing
@@ -246,19 +240,26 @@ export class GitmostDataSourceService {
};
}
const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync();
const { parseDocmostMarkdown, markdownToProseMirror, docsCanonicallyEqual } =
await loadGitSync();
const { body } = parseDocmostMarkdown(fullMarkdown);
const doc = await markdownToProseMirror(body);
// Idempotency guard #2 (defense-in-depth). Even when the vault file text
// differs cosmetically, the PARSED body can be SEMANTICALLY identical to the
// page's current Docmost content — the markdown<->ProseMirror round-trip is
// not byte-stable (e.g. JSON key order `{text,type}` vs `{type,text}`,
// default attrs), so upstream change-detection mis-flags such pages as
// changed every cycle. Compare by CANONICAL JSON (recursively key-sorted); if
// the incoming body already equals current content, this ingest is a no-op —
// skip it so a concurrent live edit is never clobbered and the vault never
// churns. A genuine content change is not canonically equal, so it proceeds.
// not byte-stable, so upstream change-detection mis-flags such pages as
// changed every cycle. But the divergence is NOT just JSON key order: a fresh
// `markdownToProseMirror(doc)` carries new/null block ids and materialized
// schema default attrs, whereas `currentContent` (from the DB) carries the
// real per-block uuids (to which comments are anchored) and KNOWN_DEFAULTS.
// A key-order-only compare therefore NEVER matches a real collab page, so use
// the package's authoritative `docsCanonicallyEqual` — the same equality the
// converter's round-trip losslessness tests use, which strips block ids and
// normalizes KNOWN_DEFAULTS. If the incoming body already equals current
// content, this ingest is a no-op — skip it so a concurrent live edit is
// never clobbered and the vault never churns. A genuine content change is not
// canonically equal, so it proceeds.
const currentContent =
typeof currentPage?.content === 'string'
? (() => {
@@ -269,7 +270,7 @@ export class GitmostDataSourceService {
}
})()
: currentPage?.content;
if (currentContent && canonicalJsonEqual(doc, currentContent)) {
if (currentContent && docsCanonicallyEqual(doc, currentContent)) {
return {
updatedAt: new Date(currentPage!.updatedAt).toISOString(),
};
@@ -447,10 +448,12 @@ export class GitmostDataSourceService {
// LOCAL filesystem artifact and must NEVER become the page's real Docmost
// title. A filename-derived title can carry it back in on ingest (observed:
// intermittent same-title collision left a page permanently titled
// "Title ~<slugId>"). Strip it at this single choke point every git-sync
// title write funnels through — but ONLY when the trailing token equals THIS
// page's own slugId, so a genuine user title that legitimately ends in
// ` ~token` is never corrupted (slugId is a random nanoid; no real collision).
// "Title ~<slugId>"). Strip it here on the RENAME path (this is where a
// filename-derived title lands as a page's real title); other title-write
// paths (e.g. createPage / importPageMarkdown) are separate and not covered
// by this choke point. Strip ONLY when the trailing token equals THIS page's
// own slugId, so a genuine user title that legitimately ends in ` ~token` is
// never corrupted (slugId is a random nanoid; no real collision).
const suffix = ` ~${page.slugId}`;
const cleanTitle =
page.slugId && title.endsWith(suffix)