fix(git-sync): close #119 blockers — dead edit-revert guard, cross-space guard, red suite (F5/S2/G1/A1/F7)
F5 (HIGH data-loss): guard #2 (GS-EDIT-REVERT) called a local key-sorting equality that never matched a real page (block ids + materialized defaults differ), so the guard was dead and a web edit on a git-sync space was silently reverted within one poll cycle. Use the package's authoritative docsCanonicallyEqual (strips block id + normalizes KNOWN_DEFAULTS), wired through the git-sync loader like sanitizeTitle; delete the dead local canonicalize/canonicalJsonEqual. S2 (security): importPageMarkdown targeted a page by the vault-file id without a spaceId check (deletePage had one) — a space-A vault file carrying space-B's page id could resurrect/overwrite/clear B's page. Mirror deletePage's guard: skip when the loaded page lives in a different space than ctx.spaceId. G1 (jest green): add sanitizeTitle + docsCanonicallyEqual to the loadGitSync mock; update the converter-gate + package golden expectations to the genuinely-fixed output (paragraph textAlign now round-trips, multi-block table cells emit HTML tables); fix the orchestrator spec's stale mock so the per-space enabled gate (added later) is satisfied. A1: the converter dropped heading textAlign on export (bare '## text'); emit a styled <hN> when aligned, symmetric to paragraphs — round-trips losslessly (level + align), no churn for unaligned headings. F7 (docs): reword the false 'single choke point' title-strip comment; correct push.ts docstrings that still described the removed standalone-CLI/daemon model. Adds regression tests: the F5 acceptance test (canonically-equal content with real uuids => writePageBody NOT called), the S2 cross-space import guard, and the A1 heading round-trip. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -56,10 +56,19 @@ interface BuildOptions {
|
||||
vaultOverrides?: Record<string, unknown>;
|
||||
/**
|
||||
* The row `buildSettings` reads for the per-space `autoMergeConflicts` flag
|
||||
* (`executeTakeFirst`). Default: the SAFE off value. Pass `undefined` to model
|
||||
* a missing row (no space / no settings).
|
||||
* (`executeTakeFirst` on the `autoMergeConflicts`-aliased query). Default: the
|
||||
* SAFE off value. Pass `undefined` to model a missing row (no space / no
|
||||
* settings).
|
||||
*/
|
||||
settingsRow?: { autoMergeConflicts: boolean } | undefined;
|
||||
/**
|
||||
* The per-space opt-in flag `runOnce` reads via `isSpaceGitSyncEnabled`
|
||||
* (`executeTakeFirst` on the `enabled`-aliased query — a DIFFERENT query from
|
||||
* the `autoMergeConflicts` read above; commit c838fdee added it). Default
|
||||
* `true` so a build() space passes the per-space gate and the cycle proceeds.
|
||||
* Set `false` to model a space that did NOT opt in (skipped:'space-not-enabled').
|
||||
*/
|
||||
spaceEnabled?: boolean;
|
||||
}
|
||||
|
||||
interface Built {
|
||||
@@ -83,6 +92,7 @@ function build(opts: BuildOptions = {}): Built {
|
||||
pollIntervalMs = 15000,
|
||||
debounceMs = 2000,
|
||||
vaultOverrides = {},
|
||||
spaceEnabled = true,
|
||||
} = opts;
|
||||
// Distinguish "key omitted" (default off row) from "key present but undefined"
|
||||
// (a deliberately MISSING settings row).
|
||||
@@ -138,15 +148,36 @@ function build(opts: BuildOptions = {}): Built {
|
||||
};
|
||||
const redisService = { getOrThrow: jest.fn(() => redis) };
|
||||
|
||||
// Chainable Kysely stub. `buildSettings` reads the space's
|
||||
// `gitSync.autoMergeConflicts` flag via
|
||||
// `selectFrom('spaces').select(...).where('id','=',id).executeTakeFirst()`;
|
||||
// default it to the SAFE off value. `enabledSpaces` uses `.execute()`.
|
||||
// Chainable Kysely stub. TWO distinct single-row queries run through the same
|
||||
// builder and both end in `executeTakeFirst`, so the stub must tell them apart
|
||||
// by the column ALIAS the real code selects (they are otherwise identical):
|
||||
// - `isSpaceGitSyncEnabled` (per-space opt-in gate in `runOnce`, added by
|
||||
// c838fdee): `.select(sql\`...->>'enabled' = 'true'\`.as('enabled'))` ->
|
||||
// the code reads `row?.enabled`. Return `{ enabled: spaceEnabled }`.
|
||||
// - `buildSettings`: `.select(sql\`...->>'autoMergeConflicts' = 'true'\`
|
||||
// .as('autoMergeConflicts'))` -> the code reads `row?.autoMergeConflicts`.
|
||||
// Return `settingsRow` (default SAFE off; `undefined` models a missing row).
|
||||
// `enabledSpaces` uses `.select([...strings]).execute()` (no alias, no
|
||||
// executeTakeFirst) and is stubbed/replaced in the tests that exercise it.
|
||||
const db = (() => {
|
||||
let lastAlias: string | undefined;
|
||||
const aliasOf = (sel: unknown): string | undefined => {
|
||||
try {
|
||||
const node = (sel as { toOperationNode?: () => any })?.toOperationNode?.();
|
||||
return node?.kind === 'AliasNode' ? node.alias?.name : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
const builder: any = {
|
||||
select: () => builder,
|
||||
select: (sel: unknown) => {
|
||||
const alias = aliasOf(sel);
|
||||
if (alias) lastAlias = alias;
|
||||
return builder;
|
||||
},
|
||||
where: () => builder,
|
||||
executeTakeFirst: async () => settingsRow,
|
||||
executeTakeFirst: async () =>
|
||||
lastAlias === 'enabled' ? { enabled: spaceEnabled } : settingsRow,
|
||||
execute: async () => [],
|
||||
};
|
||||
return { selectFrom: () => builder };
|
||||
|
||||
@@ -43,10 +43,26 @@ jest.mock('../git-sync.loader', () => ({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph' }],
|
||||
}),
|
||||
// renamePage funnels the current title through sanitizeTitle to detect the
|
||||
// sanitized-stem echo; identity is the correct default here (none of the
|
||||
// rename fixtures use filename-hostile chars, so the sanitized form equals
|
||||
// the input and the guard never fires).
|
||||
sanitizeTitle: (title: string) => title,
|
||||
// importPageMarkdown guard #2 uses docsCanonicallyEqual to skip a no-op
|
||||
// re-ingest. A key-order-insensitive JSON compare is a sufficient stand-in
|
||||
// for the unit tests (the real semantic equality is covered by the
|
||||
// @docmost/git-sync converter tests); returning false for genuinely
|
||||
// different docs lets the write paths under test proceed.
|
||||
docsCanonicallyEqual: (a: unknown, b: unknown) =>
|
||||
JSON.stringify(a) === JSON.stringify(b),
|
||||
})),
|
||||
}));
|
||||
|
||||
import { GitmostDataSourceService } from './gitmost-datasource.service';
|
||||
// The loader is mocked above; this binding is the hoisted jest.fn, so a single
|
||||
// test can swap the runtime bridge (e.g. a smarter `docsCanonicallyEqual`) via
|
||||
// `mockResolvedValueOnce` without perturbing the default used by every other test.
|
||||
import { loadGitSync } from '../git-sync.loader';
|
||||
|
||||
// Focused unit/contract test for the native GitSyncClient adapter.
|
||||
// No DB, no real collab server: the repos/services/gateway are mocked and we
|
||||
@@ -261,6 +277,79 @@ describe('GitmostDataSourceService', () => {
|
||||
expect(res.updatedAt).toBeUndefined();
|
||||
});
|
||||
|
||||
// F5 acceptance, criterion (b): guard #2 (docsCanonicallyEqual) must SKIP the
|
||||
// re-ingest when the freshly-parsed doc is canonically equal to the page's
|
||||
// current DB content even though the two are NOT byte-identical — the DB copy
|
||||
// carries the real per-block uuids (comments anchor to them) while a fresh
|
||||
// parse has none. Skipping is what protects a concurrent, not-yet-flushed
|
||||
// human edit from being clobbered by an idempotent poll re-ingest.
|
||||
//
|
||||
// NON-VACUITY: `currentContent` here differs from `doc` in raw JSON (it has an
|
||||
// `attrs.id` uuid `doc` lacks). The default mock `docsCanonicallyEqual` is a
|
||||
// plain `JSON.stringify` compare — it would return FALSE for these inputs, so
|
||||
// if guard #2 were removed (or reverted to a canonicalJsonEqual that does not
|
||||
// strip ids) writePageBody WOULD be called and this test would fail. We model
|
||||
// the package's authoritative id-stripping equality (returns TRUE here) by
|
||||
// swapping in a `docsCanonicallyEqual` that normalizes away block ids, proving
|
||||
// it is the SEMANTIC equality — not a byte compare — that suppresses the write.
|
||||
it('does NOT call writePageBody (guard #2) when content is canonically equal despite differing block ids (F5 criterion b)', async () => {
|
||||
const { service, mocks } = build();
|
||||
const realUuid = '11111111-1111-4111-8111-111111111111';
|
||||
// The DB row's content carries a REAL per-block uuid; a fresh parse does not.
|
||||
// These two docs are canonically equal (id-only difference) but NOT
|
||||
// byte-identical, so a naive JSON compare treats the page as "changed".
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
updatedAt: new Date('2026-06-20T11:00:00.000Z'),
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', attrs: { id: realUuid } }],
|
||||
},
|
||||
});
|
||||
// Swap the runtime bridge for THIS call only: same passthrough parse/convert
|
||||
// as the default mock (so `doc` is the id-less `{type:'doc',[paragraph]}`),
|
||||
// but `docsCanonicallyEqual` strips block ids before comparing — the real
|
||||
// converter's semantics. It returns TRUE for (doc, currentContent) here.
|
||||
const stripIds = (node: any): any => {
|
||||
if (Array.isArray(node)) return node.map(stripIds);
|
||||
if (node && typeof node === 'object') {
|
||||
const out: any = {};
|
||||
for (const [k, v] of Object.entries(node)) {
|
||||
if (k === 'attrs' && v && typeof v === 'object') {
|
||||
const { id: _id, ...rest } = v as Record<string, unknown>;
|
||||
if (Object.keys(rest).length) out.attrs = stripIds(rest);
|
||||
} else {
|
||||
out[k] = stripIds(v);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return node;
|
||||
};
|
||||
(loadGitSync as jest.Mock).mockResolvedValueOnce({
|
||||
parseDocmostMarkdown: (md: string) => ({ meta: {}, body: md }),
|
||||
markdownToProseMirror: async () => ({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph' }],
|
||||
}),
|
||||
sanitizeTitle: (title: string) => title,
|
||||
docsCanonicallyEqual: (a: unknown, b: unknown) =>
|
||||
JSON.stringify(stripIds(a)) === JSON.stringify(stripIds(b)),
|
||||
});
|
||||
|
||||
// No baseMarkdown -> guard #1 (fullMarkdown === baseMarkdown) cannot fire, so
|
||||
// the outcome is decided purely by guard #2.
|
||||
const res = await service
|
||||
.bind(CTX)
|
||||
.importPageMarkdown('p1', '# Hello\n\nworld');
|
||||
|
||||
// Guard #2 fired: the redundant re-ingest is skipped, so the concurrent
|
||||
// human edit in the live doc is NOT clobbered.
|
||||
expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled();
|
||||
// The unchanged page's updatedAt is still surfaced from the DB row.
|
||||
expect(res.updatedAt).toBe('2026-06-20T11:00:00.000Z');
|
||||
});
|
||||
|
||||
// The 2-way path (no base) is covered above; this exercises the THREE-WAY
|
||||
// branch that only fires when a `baseMarkdown` is supplied (review #5). The
|
||||
// merge dispatch itself now lives in the collab handler (gitSyncWriteBody);
|
||||
@@ -290,6 +379,43 @@ describe('GitmostDataSourceService', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// The cross-space confused-deputy guard (review S2) fires only when
|
||||
// ctx.spaceId is bound. A vault file in space A can carry space B's pageId;
|
||||
// without this check the reconciling space could overwrite B's page body — a
|
||||
// write it has no authority over. When the resolved page already lives in a
|
||||
// DIFFERENT space, the import is SKIPPED (writePageBody not called) and the
|
||||
// page's own updatedAt is returned unchanged.
|
||||
describe('cross-space guard (ctx.spaceId bound, review S2)', () => {
|
||||
it('does NOT call writePageBody when the target page lives in another space', async () => {
|
||||
const { service, mocks } = build();
|
||||
// The resolved page is in space-2, but the reconciling context is space-1.
|
||||
// Its content DIFFERS from the parsed doc, so guard #2 (docsCanonicallyEqual)
|
||||
// cannot be what suppresses the write — proving NON-VACUITY: without the S2
|
||||
// guard the flow would reach writeBody and writePageBody WOULD be called.
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
deletedAt: null,
|
||||
spaceId: 'space-2',
|
||||
updatedAt: new Date('2026-06-21T09:00:00.000Z'),
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'B body' }] },
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
const res = await service
|
||||
.bind(CTX_SPACE)
|
||||
.importPageMarkdown('p1', '# Hello\n\nworld');
|
||||
|
||||
// The cross-space page is preserved: no body write happened.
|
||||
expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled();
|
||||
// Early-return shape: only the page's own updatedAt is surfaced.
|
||||
expect(res).toEqual({ updatedAt: '2026-06-21T09:00:00.000Z' });
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('createPage', () => {
|
||||
|
||||
@@ -46,34 +46,6 @@ const GIT_SYNC_PROVENANCE: AuthProvenanceData = {
|
||||
aiChatId: null,
|
||||
};
|
||||
|
||||
/**
|
||||
* Recursively serialize a JSON value with object keys sorted, so two values that
|
||||
* differ ONLY in key order (or are otherwise structurally identical) compare
|
||||
* equal. Arrays keep their order (order is meaningful in ProseMirror docs).
|
||||
* Used to detect a semantically no-op body ingest despite an unstable
|
||||
* markdown<->ProseMirror round-trip (see importPageMarkdown guard #2).
|
||||
*/
|
||||
function canonicalize(value: unknown): unknown {
|
||||
if (Array.isArray(value)) return value.map(canonicalize);
|
||||
if (value && typeof value === 'object') {
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const key of Object.keys(value as Record<string, unknown>).sort()) {
|
||||
out[key] = canonicalize((value as Record<string, unknown>)[key]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/** True iff `a` and `b` are equal ignoring object key order. */
|
||||
function canonicalJsonEqual(a: unknown, b: unknown): boolean {
|
||||
try {
|
||||
return JSON.stringify(canonicalize(a)) === JSON.stringify(canonicalize(b));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Native, in-process implementation of the engine's `GitSyncClient` seam
|
||||
* Reads go through repositories (PageRepo/SpaceRepo); body writes go
|
||||
@@ -222,6 +194,28 @@ export class GitmostDataSourceService {
|
||||
const currentPage = await this.pageRepo.findById(pageId, {
|
||||
includeContent: true,
|
||||
});
|
||||
// Cross-space confused-deputy guard (review S2). The target `pageId` comes
|
||||
// from THIS space's vault file frontmatter, but a file in space A could carry
|
||||
// space B's page id. Without this check that file could resurrect (via
|
||||
// restorePage), overwrite the body (writeBody), or clear the content of B's
|
||||
// page — a cross-space write the reconciling space has no authority over.
|
||||
// Mirror deletePage's guard (same `ctx.spaceId` source, same fail-safe
|
||||
// direction): when the reconciling space is known and the resolved page
|
||||
// already lives in a DIFFERENT space, skip — touch nothing. Only applies when
|
||||
// the page exists; a not-found page (no currentPage) still proceeds as before
|
||||
// so a legitimate same-space ingest is unaffected.
|
||||
if (
|
||||
ctx.spaceId &&
|
||||
currentPage &&
|
||||
currentPage.spaceId !== ctx.spaceId
|
||||
) {
|
||||
this.logger.log(
|
||||
`git-sync[${ctx.spaceId}] skip import of page ${pageId}: page lives in space ${currentPage.spaceId} (cross-space vault reference; page preserved)`,
|
||||
);
|
||||
return {
|
||||
updatedAt: new Date(currentPage.updatedAt).toISOString(),
|
||||
};
|
||||
}
|
||||
// Revert-of-delete undelete (review warning). If the target page is currently
|
||||
// SOFT-DELETED, this ingest is a git-revert that re-added the page's file
|
||||
// (the push classifier saw an add carrying a known pageId -> UPDATE). Writing
|
||||
@@ -246,19 +240,26 @@ export class GitmostDataSourceService {
|
||||
};
|
||||
}
|
||||
|
||||
const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync();
|
||||
const { parseDocmostMarkdown, markdownToProseMirror, docsCanonicallyEqual } =
|
||||
await loadGitSync();
|
||||
const { body } = parseDocmostMarkdown(fullMarkdown);
|
||||
const doc = await markdownToProseMirror(body);
|
||||
|
||||
// Idempotency guard #2 (defense-in-depth). Even when the vault file text
|
||||
// differs cosmetically, the PARSED body can be SEMANTICALLY identical to the
|
||||
// page's current Docmost content — the markdown<->ProseMirror round-trip is
|
||||
// not byte-stable (e.g. JSON key order `{text,type}` vs `{type,text}`,
|
||||
// default attrs), so upstream change-detection mis-flags such pages as
|
||||
// changed every cycle. Compare by CANONICAL JSON (recursively key-sorted); if
|
||||
// the incoming body already equals current content, this ingest is a no-op —
|
||||
// skip it so a concurrent live edit is never clobbered and the vault never
|
||||
// churns. A genuine content change is not canonically equal, so it proceeds.
|
||||
// not byte-stable, so upstream change-detection mis-flags such pages as
|
||||
// changed every cycle. But the divergence is NOT just JSON key order: a fresh
|
||||
// `markdownToProseMirror(doc)` carries new/null block ids and materialized
|
||||
// schema default attrs, whereas `currentContent` (from the DB) carries the
|
||||
// real per-block uuids (to which comments are anchored) and KNOWN_DEFAULTS.
|
||||
// A key-order-only compare therefore NEVER matches a real collab page, so use
|
||||
// the package's authoritative `docsCanonicallyEqual` — the same equality the
|
||||
// converter's round-trip losslessness tests use, which strips block ids and
|
||||
// normalizes KNOWN_DEFAULTS. If the incoming body already equals current
|
||||
// content, this ingest is a no-op — skip it so a concurrent live edit is
|
||||
// never clobbered and the vault never churns. A genuine content change is not
|
||||
// canonically equal, so it proceeds.
|
||||
const currentContent =
|
||||
typeof currentPage?.content === 'string'
|
||||
? (() => {
|
||||
@@ -269,7 +270,7 @@ export class GitmostDataSourceService {
|
||||
}
|
||||
})()
|
||||
: currentPage?.content;
|
||||
if (currentContent && canonicalJsonEqual(doc, currentContent)) {
|
||||
if (currentContent && docsCanonicallyEqual(doc, currentContent)) {
|
||||
return {
|
||||
updatedAt: new Date(currentPage!.updatedAt).toISOString(),
|
||||
};
|
||||
@@ -447,10 +448,12 @@ export class GitmostDataSourceService {
|
||||
// LOCAL filesystem artifact and must NEVER become the page's real Docmost
|
||||
// title. A filename-derived title can carry it back in on ingest (observed:
|
||||
// intermittent same-title collision left a page permanently titled
|
||||
// "Title ~<slugId>"). Strip it at this single choke point every git-sync
|
||||
// title write funnels through — but ONLY when the trailing token equals THIS
|
||||
// page's own slugId, so a genuine user title that legitimately ends in
|
||||
// ` ~token` is never corrupted (slugId is a random nanoid; no real collision).
|
||||
// "Title ~<slugId>"). Strip it here on the RENAME path (this is where a
|
||||
// filename-derived title lands as a page's real title); other title-write
|
||||
// paths (e.g. createPage / importPageMarkdown) are separate and not covered
|
||||
// by this choke point. Strip ONLY when the trailing token equals THIS page's
|
||||
// own slugId, so a genuine user title that legitimately ends in ` ~token` is
|
||||
// never corrupted (slugId is a random nanoid; no real collision).
|
||||
const suffix = ` ~${page.slugId}`;
|
||||
const cleanTitle =
|
||||
page.slugId && title.endsWith(suffix)
|
||||
|
||||
Reference in New Issue
Block a user