fix(git-sync): close #119 blockers — dead edit-revert guard, cross-space guard, red suite (F5/S2/G1/A1/F7)

F5 (HIGH data-loss): guard #2 (GS-EDIT-REVERT) called a local key-sorting equality that
never matched a real page (block ids + materialized defaults differ), so the guard was
dead and a web edit on a git-sync space was silently reverted within one poll cycle. Use
the package's authoritative docsCanonicallyEqual (strips block id + normalizes
KNOWN_DEFAULTS), wired through the git-sync loader like sanitizeTitle; delete the dead
local canonicalize/canonicalJsonEqual.
S2 (security): importPageMarkdown targeted a page by the vault-file id without a spaceId
check (deletePage had one) — a space-A vault file carrying space-B's page id could
resurrect/overwrite/clear B's page. Mirror deletePage's guard: skip when the loaded page
lives in a different space than ctx.spaceId.
G1 (jest green): add sanitizeTitle + docsCanonicallyEqual to the loadGitSync mock; update
the converter-gate + package golden expectations to the genuinely-fixed output (paragraph
textAlign now round-trips, multi-block table cells emit HTML tables); fix the orchestrator
spec's stale mock so the per-space enabled gate (added later) is satisfied.
A1: the converter dropped heading textAlign on export (bare '## text'); emit a styled
<hN> when aligned, symmetric to paragraphs — round-trips losslessly (level + align), no
churn for unaligned headings.
F7 (docs): reword the false 'single choke point' title-strip comment; correct push.ts
docstrings that still described the removed standalone-CLI/daemon model.

Adds regression tests: the F5 acceptance test (canonically-equal content with real uuids
=> writePageBody NOT called), the S2 cross-space import guard, and the A1 heading
round-trip.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
agent_coder
2026-07-03 00:13:08 +03:00
parent 320b200ac8
commit 5d45f5a85e
9 changed files with 362 additions and 111 deletions
@@ -111,6 +111,17 @@ const CORPUS: Record<string, any> = {
para(text('Second paragraph.')),
),
// A non-default paragraph alignment now round-trips (item #7 fix): it exports
// as `<p style="text-align:center">` and the schema's paragraph parseHTML
// reads `style="text-align"` back onto `textAlign` on import, so the alignment
// survives the full editor-ext write path. Promoted from the old KNOWN
// DIVERGENCE block (which only heading alignment still occupies).
'aligned paragraph (textAlign center)': doc({
type: 'paragraph',
attrs: { textAlign: 'center' },
content: [text('centered')],
}),
'inline marks (bold/italic/strike/code)': doc(
para(
text('normal '),
@@ -446,40 +457,20 @@ describe('git-sync converter §13.1 image dimensions preserved (was KNOWN DIVERG
});
// ---------------------------------------------------------------------------
// KNOWN DIVERGENCE — text alignment (item #7; isolated, not silently dropped).
// KNOWN DIVERGENCE — HEADING text alignment (item #7; isolated, not silently
// dropped). PARAGRAPH alignment now round-trips (exported as
// `<p style="text-align:...">`, re-parsed by the paragraph parseHTML) and lives
// in the green CORPUS above; only HEADING alignment still diverges:
//
// editor-ext registers TextAlign for heading+paragraph, and the SERVER schema
// fully supports it — the loss is intrinsic to the MARKDOWN transport:
// • A heading's `textAlign` is NOT exported at all — a heading emits plain
// markdown `## text` with no alignment syntax — so any non-default heading
// alignment is dropped on a full round trip.
//
// • A paragraph's `textAlign` is EXPORTED as `<div align="...">text</div>`
// (markdown-converter case "paragraph"), but on import the converter's
// docmost-schema declares `textAlign` WITHOUT a parseHTML mapping, so the
// `align` attribute is never recovered -> it imports as `textAlign:null`
// and canonicalizes away. A heading's alignment is not even exported.
// • Therefore any non-default alignment is dropped on a full round trip.
//
// If the converter is ever taught to parse `align`/`text-align` back onto the
// block, this assertion flips and an aligned-paragraph fixture should be
// promoted into the green CORPUS above.
// If the converter is ever taught to export + re-parse heading alignment, this
// assertion flips and an aligned-heading fixture should be promoted into the
// green CORPUS above.
// ---------------------------------------------------------------------------
describe('git-sync converter §13.1 KNOWN DIVERGENCE (text alignment dropped)', () => {
it('drops a paragraph textAlign on the markdown round trip', async () => {
const alignedDoc = doc({
type: 'paragraph',
attrs: { textAlign: 'center' },
content: [text('centered')],
});
const { canonNormalized } = await runGate(alignedDoc);
// The round-tripped paragraph carries no alignment.
expect(canonNormalized).toEqual({
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'centered' }] }],
});
expect(docsCanonicallyEqual(alignedDoc, canonNormalized)).toBe(false);
});
describe('git-sync converter §13.1 KNOWN DIVERGENCE (heading text alignment dropped)', () => {
it('drops a heading textAlign (headings do not export alignment at all)', async () => {
const alignedHeading = doc({
type: 'heading',
@@ -7,6 +7,7 @@ import type {
parseDocmostMarkdown as parseDocmostMarkdownFn,
markdownToProseMirror as markdownToProseMirrorFn,
sanitizeTitle as sanitizeTitleFn,
docsCanonicallyEqual as docsCanonicallyEqualFn,
} from '@docmost/git-sync';
/**
@@ -22,6 +23,7 @@ interface GitSyncModule {
parseDocmostMarkdown: typeof parseDocmostMarkdownFn;
markdownToProseMirror: typeof markdownToProseMirrorFn;
sanitizeTitle: typeof sanitizeTitleFn;
docsCanonicallyEqual: typeof docsCanonicallyEqualFn;
}
// The CJS->ESM dynamic-import bridge lives in one shared helper
@@ -56,10 +56,19 @@ interface BuildOptions {
vaultOverrides?: Record<string, unknown>;
/**
* The row `buildSettings` reads for the per-space `autoMergeConflicts` flag
* (`executeTakeFirst`). Default: the SAFE off value. Pass `undefined` to model
* a missing row (no space / no settings).
* (`executeTakeFirst` on the `autoMergeConflicts`-aliased query). Default: the
* SAFE off value. Pass `undefined` to model a missing row (no space / no
* settings).
*/
settingsRow?: { autoMergeConflicts: boolean } | undefined;
/**
* The per-space opt-in flag `runOnce` reads via `isSpaceGitSyncEnabled`
* (`executeTakeFirst` on the `enabled`-aliased query — a DIFFERENT query from
* the `autoMergeConflicts` read above; commit c838fdee added it). Default
* `true` so a build() space passes the per-space gate and the cycle proceeds.
* Set `false` to model a space that did NOT opt in (skipped:'space-not-enabled').
*/
spaceEnabled?: boolean;
}
interface Built {
@@ -83,6 +92,7 @@ function build(opts: BuildOptions = {}): Built {
pollIntervalMs = 15000,
debounceMs = 2000,
vaultOverrides = {},
spaceEnabled = true,
} = opts;
// Distinguish "key omitted" (default off row) from "key present but undefined"
// (a deliberately MISSING settings row).
@@ -138,15 +148,36 @@ function build(opts: BuildOptions = {}): Built {
};
const redisService = { getOrThrow: jest.fn(() => redis) };
// Chainable Kysely stub. `buildSettings` reads the space's
// `gitSync.autoMergeConflicts` flag via
// `selectFrom('spaces').select(...).where('id','=',id).executeTakeFirst()`;
// default it to the SAFE off value. `enabledSpaces` uses `.execute()`.
// Chainable Kysely stub. TWO distinct single-row queries run through the same
// builder and both end in `executeTakeFirst`, so the stub must tell them apart
// by the column ALIAS the real code selects (they are otherwise identical):
// - `isSpaceGitSyncEnabled` (per-space opt-in gate in `runOnce`, added by
// c838fdee): `.select(sql\`...->>'enabled' = 'true'\`.as('enabled'))` ->
// the code reads `row?.enabled`. Return `{ enabled: spaceEnabled }`.
// - `buildSettings`: `.select(sql\`...->>'autoMergeConflicts' = 'true'\`
// .as('autoMergeConflicts'))` -> the code reads `row?.autoMergeConflicts`.
// Return `settingsRow` (default SAFE off; `undefined` models a missing row).
// `enabledSpaces` uses `.select([...strings]).execute()` (no alias, no
// executeTakeFirst) and is stubbed/replaced in the tests that exercise it.
const db = (() => {
let lastAlias: string | undefined;
const aliasOf = (sel: unknown): string | undefined => {
try {
const node = (sel as { toOperationNode?: () => any })?.toOperationNode?.();
return node?.kind === 'AliasNode' ? node.alias?.name : undefined;
} catch {
return undefined;
}
};
const builder: any = {
select: () => builder,
select: (sel: unknown) => {
const alias = aliasOf(sel);
if (alias) lastAlias = alias;
return builder;
},
where: () => builder,
executeTakeFirst: async () => settingsRow,
executeTakeFirst: async () =>
lastAlias === 'enabled' ? { enabled: spaceEnabled } : settingsRow,
execute: async () => [],
};
return { selectFrom: () => builder };
@@ -43,10 +43,26 @@ jest.mock('../git-sync.loader', () => ({
type: 'doc',
content: [{ type: 'paragraph' }],
}),
// renamePage funnels the current title through sanitizeTitle to detect the
// sanitized-stem echo; identity is the correct default here (none of the
// rename fixtures use filename-hostile chars, so the sanitized form equals
// the input and the guard never fires).
sanitizeTitle: (title: string) => title,
// importPageMarkdown guard #2 uses docsCanonicallyEqual to skip a no-op
// re-ingest. A key-order-insensitive JSON compare is a sufficient stand-in
// for the unit tests (the real semantic equality is covered by the
// @docmost/git-sync converter tests); returning false for genuinely
// different docs lets the write paths under test proceed.
docsCanonicallyEqual: (a: unknown, b: unknown) =>
JSON.stringify(a) === JSON.stringify(b),
})),
}));
import { GitmostDataSourceService } from './gitmost-datasource.service';
// The loader is mocked above; this binding is the hoisted jest.fn, so a single
// test can swap the runtime bridge (e.g. a smarter `docsCanonicallyEqual`) via
// `mockResolvedValueOnce` without perturbing the default used by every other test.
import { loadGitSync } from '../git-sync.loader';
// Focused unit/contract test for the native GitSyncClient adapter.
// No DB, no real collab server: the repos/services/gateway are mocked and we
@@ -261,6 +277,79 @@ describe('GitmostDataSourceService', () => {
expect(res.updatedAt).toBeUndefined();
});
// F5 acceptance, criterion (b): guard #2 (docsCanonicallyEqual) must SKIP the
// re-ingest when the freshly-parsed doc is canonically equal to the page's
// current DB content even though the two are NOT byte-identical — the DB copy
// carries the real per-block uuids (comments anchor to them) while a fresh
// parse has none. Skipping is what protects a concurrent, not-yet-flushed
// human edit from being clobbered by an idempotent poll re-ingest.
//
// NON-VACUITY: `currentContent` here differs from `doc` in raw JSON (it has an
// `attrs.id` uuid `doc` lacks). The default mock `docsCanonicallyEqual` is a
// plain `JSON.stringify` compare — it would return FALSE for these inputs, so
// if guard #2 were removed (or reverted to a canonicalJsonEqual that does not
// strip ids) writePageBody WOULD be called and this test would fail. We model
// the package's authoritative id-stripping equality (returns TRUE here) by
// swapping in a `docsCanonicallyEqual` that normalizes away block ids, proving
// it is the SEMANTIC equality — not a byte compare — that suppresses the write.
it('does NOT call writePageBody (guard #2) when content is canonically equal despite differing block ids (F5 criterion b)', async () => {
const { service, mocks } = build();
const realUuid = '11111111-1111-4111-8111-111111111111';
// The DB row's content carries a REAL per-block uuid; a fresh parse does not.
// These two docs are canonically equal (id-only difference) but NOT
// byte-identical, so a naive JSON compare treats the page as "changed".
mocks.pageRepo.findById.mockResolvedValue({
id: 'p1',
updatedAt: new Date('2026-06-20T11:00:00.000Z'),
content: {
type: 'doc',
content: [{ type: 'paragraph', attrs: { id: realUuid } }],
},
});
// Swap the runtime bridge for THIS call only: same passthrough parse/convert
// as the default mock (so `doc` is the id-less `{type:'doc',[paragraph]}`),
// but `docsCanonicallyEqual` strips block ids before comparing — the real
// converter's semantics. It returns TRUE for (doc, currentContent) here.
const stripIds = (node: any): any => {
if (Array.isArray(node)) return node.map(stripIds);
if (node && typeof node === 'object') {
const out: any = {};
for (const [k, v] of Object.entries(node)) {
if (k === 'attrs' && v && typeof v === 'object') {
const { id: _id, ...rest } = v as Record<string, unknown>;
if (Object.keys(rest).length) out.attrs = stripIds(rest);
} else {
out[k] = stripIds(v);
}
}
return out;
}
return node;
};
(loadGitSync as jest.Mock).mockResolvedValueOnce({
parseDocmostMarkdown: (md: string) => ({ meta: {}, body: md }),
markdownToProseMirror: async () => ({
type: 'doc',
content: [{ type: 'paragraph' }],
}),
sanitizeTitle: (title: string) => title,
docsCanonicallyEqual: (a: unknown, b: unknown) =>
JSON.stringify(stripIds(a)) === JSON.stringify(stripIds(b)),
});
// No baseMarkdown -> guard #1 (fullMarkdown === baseMarkdown) cannot fire, so
// the outcome is decided purely by guard #2.
const res = await service
.bind(CTX)
.importPageMarkdown('p1', '# Hello\n\nworld');
// Guard #2 fired: the redundant re-ingest is skipped, so the concurrent
// human edit in the live doc is NOT clobbered.
expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled();
// The unchanged page's updatedAt is still surfaced from the DB row.
expect(res.updatedAt).toBe('2026-06-20T11:00:00.000Z');
});
// The 2-way path (no base) is covered above; this exercises the THREE-WAY
// branch that only fires when a `baseMarkdown` is supplied (review #5). The
// merge dispatch itself now lives in the collab handler (gitSyncWriteBody);
@@ -290,6 +379,43 @@ describe('GitmostDataSourceService', () => {
);
});
});
// The cross-space confused-deputy guard (review S2) fires only when
// ctx.spaceId is bound. A vault file in space A can carry space B's pageId;
// without this check the reconciling space could overwrite B's page body — a
// write it has no authority over. When the resolved page already lives in a
// DIFFERENT space, the import is SKIPPED (writePageBody not called) and the
// page's own updatedAt is returned unchanged.
describe('cross-space guard (ctx.spaceId bound, review S2)', () => {
it('does NOT call writePageBody when the target page lives in another space', async () => {
const { service, mocks } = build();
// The resolved page is in space-2, but the reconciling context is space-1.
// Its content DIFFERS from the parsed doc, so guard #2 (docsCanonicallyEqual)
// cannot be what suppresses the write — proving NON-VACUITY: without the S2
// guard the flow would reach writeBody and writePageBody WOULD be called.
mocks.pageRepo.findById.mockResolvedValue({
id: 'p1',
deletedAt: null,
spaceId: 'space-2',
updatedAt: new Date('2026-06-21T09:00:00.000Z'),
content: {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'B body' }] },
],
},
});
const res = await service
.bind(CTX_SPACE)
.importPageMarkdown('p1', '# Hello\n\nworld');
// The cross-space page is preserved: no body write happened.
expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled();
// Early-return shape: only the page's own updatedAt is surfaced.
expect(res).toEqual({ updatedAt: '2026-06-21T09:00:00.000Z' });
});
});
});
describe('createPage', () => {
@@ -46,34 +46,6 @@ const GIT_SYNC_PROVENANCE: AuthProvenanceData = {
aiChatId: null,
};
/**
* Recursively serialize a JSON value with object keys sorted, so two values that
* differ ONLY in key order (or are otherwise structurally identical) compare
* equal. Arrays keep their order (order is meaningful in ProseMirror docs).
* Used to detect a semantically no-op body ingest despite an unstable
* markdown<->ProseMirror round-trip (see importPageMarkdown guard #2).
*/
function canonicalize(value: unknown): unknown {
if (Array.isArray(value)) return value.map(canonicalize);
if (value && typeof value === 'object') {
const out: Record<string, unknown> = {};
for (const key of Object.keys(value as Record<string, unknown>).sort()) {
out[key] = canonicalize((value as Record<string, unknown>)[key]);
}
return out;
}
return value;
}
/** True iff `a` and `b` are equal ignoring object key order. */
function canonicalJsonEqual(a: unknown, b: unknown): boolean {
try {
return JSON.stringify(canonicalize(a)) === JSON.stringify(canonicalize(b));
} catch {
return false;
}
}
/**
* Native, in-process implementation of the engine's `GitSyncClient` seam
* Reads go through repositories (PageRepo/SpaceRepo); body writes go
@@ -222,6 +194,28 @@ export class GitmostDataSourceService {
const currentPage = await this.pageRepo.findById(pageId, {
includeContent: true,
});
// Cross-space confused-deputy guard (review S2). The target `pageId` comes
// from THIS space's vault file frontmatter, but a file in space A could carry
// space B's page id. Without this check that file could resurrect (via
// restorePage), overwrite the body (writeBody), or clear the content of B's
// page — a cross-space write the reconciling space has no authority over.
// Mirror deletePage's guard (same `ctx.spaceId` source, same fail-safe
// direction): when the reconciling space is known and the resolved page
// already lives in a DIFFERENT space, skip — touch nothing. Only applies when
// the page exists; a not-found page (no currentPage) still proceeds as before
// so a legitimate same-space ingest is unaffected.
if (
ctx.spaceId &&
currentPage &&
currentPage.spaceId !== ctx.spaceId
) {
this.logger.log(
`git-sync[${ctx.spaceId}] skip import of page ${pageId}: page lives in space ${currentPage.spaceId} (cross-space vault reference; page preserved)`,
);
return {
updatedAt: new Date(currentPage.updatedAt).toISOString(),
};
}
// Revert-of-delete undelete (review warning). If the target page is currently
// SOFT-DELETED, this ingest is a git-revert that re-added the page's file
// (the push classifier saw an add carrying a known pageId -> UPDATE). Writing
@@ -246,19 +240,26 @@ export class GitmostDataSourceService {
};
}
const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync();
const { parseDocmostMarkdown, markdownToProseMirror, docsCanonicallyEqual } =
await loadGitSync();
const { body } = parseDocmostMarkdown(fullMarkdown);
const doc = await markdownToProseMirror(body);
// Idempotency guard #2 (defense-in-depth). Even when the vault file text
// differs cosmetically, the PARSED body can be SEMANTICALLY identical to the
// page's current Docmost content — the markdown<->ProseMirror round-trip is
// not byte-stable (e.g. JSON key order `{text,type}` vs `{type,text}`,
// default attrs), so upstream change-detection mis-flags such pages as
// changed every cycle. Compare by CANONICAL JSON (recursively key-sorted); if
// the incoming body already equals current content, this ingest is a no-op —
// skip it so a concurrent live edit is never clobbered and the vault never
// churns. A genuine content change is not canonically equal, so it proceeds.
// not byte-stable, so upstream change-detection mis-flags such pages as
// changed every cycle. But the divergence is NOT just JSON key order: a fresh
// `markdownToProseMirror(doc)` carries new/null block ids and materialized
// schema default attrs, whereas `currentContent` (from the DB) carries the
// real per-block uuids (to which comments are anchored) and KNOWN_DEFAULTS.
// A key-order-only compare therefore NEVER matches a real collab page, so use
// the package's authoritative `docsCanonicallyEqual` — the same equality the
// converter's round-trip losslessness tests use, which strips block ids and
// normalizes KNOWN_DEFAULTS. If the incoming body already equals current
// content, this ingest is a no-op — skip it so a concurrent live edit is
// never clobbered and the vault never churns. A genuine content change is not
// canonically equal, so it proceeds.
const currentContent =
typeof currentPage?.content === 'string'
? (() => {
@@ -269,7 +270,7 @@ export class GitmostDataSourceService {
}
})()
: currentPage?.content;
if (currentContent && canonicalJsonEqual(doc, currentContent)) {
if (currentContent && docsCanonicallyEqual(doc, currentContent)) {
return {
updatedAt: new Date(currentPage!.updatedAt).toISOString(),
};
@@ -447,10 +448,12 @@ export class GitmostDataSourceService {
// LOCAL filesystem artifact and must NEVER become the page's real Docmost
// title. A filename-derived title can carry it back in on ingest (observed:
// intermittent same-title collision left a page permanently titled
// "Title ~<slugId>"). Strip it at this single choke point every git-sync
// title write funnels through — but ONLY when the trailing token equals THIS
// page's own slugId, so a genuine user title that legitimately ends in
// ` ~token` is never corrupted (slugId is a random nanoid; no real collision).
// "Title ~<slugId>"). Strip it here on the RENAME path (this is where a
// filename-derived title lands as a page's real title); other title-write
// paths (e.g. createPage / importPageMarkdown) are separate and not covered
// by this choke point. Strip ONLY when the trailing token equals THIS page's
// own slugId, so a genuine user title that legitimately ends in ` ~token` is
// never corrupted (slugId is a random nanoid; no real collision).
const suffix = ` ~${page.slugId}`;
const cleanTitle =
page.slugId && title.endsWith(suffix)
+20 -17
View File
@@ -1373,11 +1373,12 @@ function extractUpdatedAt(result: unknown): { updatedAt?: string } {
// --- runnable push orchestration (`runPush`) ---------------------------------
//
// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit
// `runPush` is the FS->Docmost twin of the pull direction: it wires the VaultGit
// diff/ref primitives + the PURE `computePushActions` planner + the THIN
// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the
// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO
// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that
// `applyPushActions` applier into one runnable cycle. It is driven IN-PROCESS by
// the NestJS server (there is no standalone CLI). SAFE BY DEFAULT — a cycle
// defaults to DRY-RUN (plan only, NO Docmost writes, NO ref advance) unless the
// caller passes `opts.dryRun === false`; that apply path is the ONLY one that
// builds a client and mutates Docmost.
//
// Every external effect is injected (`PushDeps`): production wires the live
@@ -1388,7 +1389,7 @@ function extractUpdatedAt(result: unknown): { updatedAt?: string } {
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
* which the loop-guard keys on; the identity is for history readability only.
* When the vault repo already has a configured `user.name`/`user.email`, git
* uses that for the working-tree commit; this is the fallback the daemon stamps.
* uses that for the working-tree commit; this is the fallback the engine stamps.
*/
export const LOCAL_AUTHOR_NAME = "Local";
export const LOCAL_AUTHOR_EMAIL = "local@local";
@@ -1400,7 +1401,7 @@ export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
* Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that
* touches the outside world is here so tests pass fakes). `makeClient` is a
* FACTORY, not a client — a dry-run must build NO client at all (it is never
* called), and only `--apply` invokes it.
* called), and only the apply path (`opts.dryRun === false`) invokes it.
*/
export interface PushDeps {
settings: Settings;
@@ -1420,7 +1421,7 @@ export interface PushDeps {
| "fastForwardBranch"
| "listTrackedFiles"
>;
/** Build a real client — called ONLY on `--apply`, never on dry-run. */
/** Build a real client — called ONLY on the apply path, never on dry-run. */
makeClient: (settings: Settings) => ApplyPushDeps["client"];
/** Read a file's full text by its vault-relative (forward-slash) path. */
readFile: (path: string) => Promise<string>;
@@ -1448,12 +1449,12 @@ export interface PushRunResult {
renamesMoves: number;
skipped: number;
};
/** The applier's structured result — ONLY present on the `--apply` path. */
/** The applier's structured result — ONLY present on the apply path. */
applied?: ApplyPushResult;
/**
* True when `applyPushActions` REFUSED to fast-forward a divergent `docmost`
* mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded
* into the CLI's non-zero exit.
* mirror (SPEC §5 invariant broken). Logged prominently and surfaced on this
* result so the caller (the server orchestrator) escalates it.
*/
divergentDocmost?: boolean;
/** Per-page failures from the applier (empty/absent on a clean run). */
@@ -1461,11 +1462,13 @@ export interface PushRunResult {
}
/**
* Run one FS->Docmost push cycle (SPEC §6 "FS -> Docmost"), DRY-RUN BY DEFAULT.
* Run one FS->Docmost push cycle (SPEC §6 "FS -> Docmost"), DRY-RUN BY DEFAULT
* (the apply path runs only when `opts.dryRun === false`).
*
* Steps (mirrors `pull.ts`):
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
* non-zero-ish result) if a merge is in progress — never push on top of an
* an `aborted: "merge-in-progress"` result) if a merge is in progress —
* never push on top of an
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
* Docmost (SPEC §9).
* 2. Checkout `main` (the human-facing branch the push reads from).
@@ -1478,12 +1481,12 @@ export interface PushRunResult {
* the PURE `computePushActions`.
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
* calls, NO ref advance.
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
* 7. Apply path (`opts.dryRun === false`): build the client, run `applyPushActions(..., pushedCommit=main)`,
* then (a) if any pageIds were written back (creates), commit them on `main`
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
* WARNING and a non-zero-ish flag. Then log a one-line summary.
* WARNING and the `divergentDocmost` result flag. Then log a one-line summary.
*/
export async function runPush(
deps: PushDeps,
@@ -1492,8 +1495,8 @@ export async function runPush(
const { git, settings, log } = deps;
const dryRun = opts.dryRun;
// 1. Preflight git. Fail fast (actionable message via main().catch) if the git
// binary is missing — the vault state store relies on it.
// 1. Preflight git. Fail fast (the thrown error propagates to the caller) if
// the git binary is missing — the vault state store relies on it.
await git.assertGitAvailable();
await git.ensureRepo();
@@ -1622,7 +1625,7 @@ export async function runPush(
return { mode: "dry-run", base, pushedCommit, planned };
}
// 7. --apply: build the REAL client and execute. This is the ONLY write path.
// 7. Apply path: build the REAL client and execute. This is the ONLY write path.
const client = deps.makeClient(settings);
const applied = await applyPushActions(
{
@@ -154,6 +154,21 @@ export function convertProseMirrorToMarkdown(content: any): string {
case "heading":
const level = node.attrs?.level || 1;
const headingText = nodeContent.map(processNode).join("");
const headingAlign = node.attrs?.textAlign;
if (headingAlign && headingAlign !== "left") {
// Emit alignment as a styled `<hN>` so it round-trips losslessly,
// symmetric to the paragraph case above (review F5/A1). The bare
// `## text` markdown form carries NO alignment, so an aligned heading
// would silently drop textAlign on export. A styled `<hN>` re-parses:
// the heading parse rule (tag:"h1".."h6") matches and the textAlign
// global-attribute parseHTML (docmost-schema) reads the style back,
// preserving BOTH level and textAlign. escapeAttr keeps the align
// value injection-safe, exactly like the paragraph arm.
return `<h${level} style="text-align:${escapeAttr(headingAlign)}">${headingText}</h${level}>`;
}
// No alignment (or the default "left"): keep the plain `## text`
// markdown form — HTML-ifying an unaligned heading would be needless
// churn, exactly as the paragraph case keeps plain text when unaligned.
return "#".repeat(level) + " " + headingText;
case "text":
@@ -256,9 +256,11 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => {
expect(out).toBe('> ```js\n> a\n> b\n> ```');
});
// 4. A GFM body cell with TWO block children (paragraph + bulletList): joined
// by a space, the list's newline collapsed so the row stays intact.
it('a GFM body cell with paragraph+list joins them by a space (no "p1- a")', () => {
// 4. A body cell with TWO block children (paragraph + bulletList) cannot be a
// GFM pipe row (inline-only). #8 emits the WHOLE table as HTML <table> so
// the paragraph and the list each survive as their own block instead of
// being lossily flattened into one "p1 - a" pipe cell.
it('a table cell with paragraph+list emits an HTML <table> (blocks preserved)', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'table',
@@ -285,7 +287,9 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => {
],
}),
);
expect(out).toBe('| h |\n| --- |\n| p1 - a |');
expect(out).toBe(
'<table><tbody><tr><th><p>h</p></th></tr><tr><td><p>p1</p><ul><li><p>a</p></li></ul></td></tr></tbody></table>',
);
});
// 5. code + link co-occur: the schema's `code` mark excludes all other marks
@@ -391,8 +395,11 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => {
expect(out).toBe('> - x\n> - y');
});
// 11. GFM (non-spanned) cell: multi-block space-join + pipe-escape + newline-collapse.
it('a GFM cell escapes a literal pipe and collapses newlines across two paragraphs', () => {
// 11. A non-spanned cell with TWO block paragraphs: #8 emits the whole table
// as HTML <table>, so each paragraph stays its own <p> and the literal
// pipe needs no escaping inside HTML text (the old GFM path space-joined
// the blocks into one line and escaped the pipe to \|).
it('a table cell with two paragraphs emits an HTML <table> (blocks kept, no pipe-escape)', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'table',
@@ -413,7 +420,9 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => {
],
}),
);
expect(out).toBe('| h |\n| --- |\n| a\\|b c |');
expect(out).toBe(
'<table><tbody><tr><th><p>h</p></th></tr><tr><td><p>a|b</p><p>c</p></td></tr></tbody></table>',
);
});
});
@@ -775,3 +784,62 @@ describe('converter gap coverage — raw-HTML container round-trips (specs 15–
expect(md1).not.toContain('$x_i$');
});
});
// ===========================================================================
// 30. heading.textAlign round-trip (A1). The paragraph case already exports a
// non-default alignment as a styled `<p style="text-align:…">` that re-parses
// losslessly; headings used to emit only the bare `## text` form, silently
// DROPPING textAlign on export. The heading case is now symmetric: an aligned
// heading exports as `<hN style="text-align:…">` and re-parses back to a heading
// carrying BOTH the level and the textAlign, so the round-trip is lossless; an
// UNaligned heading still emits the bare `## text` markdown form (no churn).
// ===========================================================================
const alignedHeading = (level: number, align: string, ...inline: any[]) => ({
type: 'heading',
attrs: { level, textAlign: align },
content: inline,
});
describe('heading.textAlign round-trip (A1)', () => {
it('an aligned heading exports as <hN style="text-align:…"> (not bare ##)', () => {
expect(convertProseMirrorToMarkdown(doc(alignedHeading(2, 'center', text('Title'))))).toBe(
'<h2 style="text-align:center">Title</h2>',
);
});
it('survives export -> import -> export losslessly (level AND textAlign preserved)', async () => {
const input = alignedHeading(2, 'center', text('Title'));
const { md1, doc2, md2 } = await roundTrip(input);
// Export direction: a styled <hN>, injection-safe via escapeAttr.
expect(md1).toBe('<h2 style="text-align:center">Title</h2>');
// Import direction: re-parses to a heading node with the level AND textAlign
// (the raw <hN style> HTML block flows through marked -> generateJSON, where
// the heading parse rule matches and the textAlign global attr reads the
// style back). Byte-stable second export closes the loop.
const h = doc2.content[0];
expect(h.type).toBe('heading');
expect(h.attrs.level).toBe(2);
expect(h.attrs.textAlign).toBe('center');
expect(md2).toBe(md1);
// Canonical equality of the re-parsed doc against the original input doc.
expect(docsCanonicallyEqual(doc2, doc(input))).toBe(true);
});
it('a right-aligned h3 round-trips its level and alignment', async () => {
const { doc2 } = await roundTrip(alignedHeading(3, 'right', text('Head')));
const h = doc2.content[0];
expect(h.type).toBe('heading');
expect(h.attrs.level).toBe(3);
expect(h.attrs.textAlign).toBe('right');
});
it('an UNaligned heading still emits the bare "## text" form (no HTML churn)', () => {
const bare = convertProseMirrorToMarkdown(doc(heading(2, text('Plain'))));
expect(bare).toBe('## Plain');
expect(bare).not.toContain('<h2');
// The default "left" alignment is likewise NOT wrapped.
expect(
convertProseMirrorToMarkdown(doc(alignedHeading(2, 'left', text('Plain')))),
).toBe('## Plain');
});
});
@@ -129,10 +129,16 @@ describe('inline-mark matrix (underline/sub/sup/highlight±color/textStyle/comme
});
});
describe('paragraph.textAlign -> <div align>', () => {
it('non-default alignment wraps the paragraph in <div align="...">', () => {
describe('paragraph.textAlign -> <p style="text-align:...">', () => {
it('non-default alignment emits an HTML <p style="text-align:...">', () => {
// #7 fix: a non-default paragraph alignment now round-trips. It is exported
// as an HTML `<p style="text-align:center">` (the schema's paragraph
// parseHTML reads `style="text-align"` back onto `textAlign` on import), so
// the alignment survives instead of collapsing to bare text. (The old
// `<div align="center">` form was NOT re-parsed onto the paragraph and was
// therefore lossy.)
expect(c({ type: 'paragraph', attrs: { textAlign: 'center' }, content: [text('x')] })).toBe(
'<div align="center">x</div>',
'<p style="text-align:center">x</p>',
);
});
@@ -190,10 +196,14 @@ describe('escaping idempotence (SPEC §11 phantom-diff guard)', () => {
});
});
describe('table-cell sanitization (| and newline must not corrupt the GFM row)', () => {
it('escapes a literal pipe and collapses an inter-block newline in a cell', () => {
// A cell with a pipe in one paragraph and a second block paragraph: the pipe
// is escaped to \| and the block join (a space) keeps the row intact.
describe('multi-block table cell -> HTML <table> (#8: GFM pipes cannot hold block content)', () => {
it('emits the whole table as HTML <table> so a multi-paragraph cell survives', () => {
// A cell holding TWO block paragraphs cannot be represented by a GFM pipe
// row (one inline line only) — the old GFM path collapsed the two blocks
// into one line ("a\|b c"), losing the block boundary and forcing a fragile
// pipe-escape. #8 emits the WHOLE table as raw HTML <table> instead: the
// schema's table-family parseHTML round-trips it, each paragraph stays its
// own <p>, and the literal pipe needs no escaping inside HTML text.
const out = c({
type: 'table',
content: [
@@ -205,7 +215,9 @@ describe('table-cell sanitization (| and newline must not corrupt the GFM row)',
]},
],
});
expect(out).toBe('| H |\n| --- |\n| a\\|b c |');
expect(out).toBe(
'<table><tbody><tr><th><p>H</p></th></tr><tr><td><p>a|b</p><p>c</p></td></tr></tbody></table>',
);
});
});