From 5d45f5a85eb43e3b0b483fa133b6a381aec32fad Mon Sep 17 00:00:00 2001 From: agent_coder Date: Fri, 3 Jul 2026 00:13:08 +0300 Subject: [PATCH] =?UTF-8?q?fix(git-sync):=20close=20#119=20blockers=20?= =?UTF-8?q?=E2=80=94=20dead=20edit-revert=20guard,=20cross-space=20guard,?= =?UTF-8?q?=20red=20suite=20(F5/S2/G1/A1/F7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit F5 (HIGH data-loss): guard #2 (GS-EDIT-REVERT) called a local key-sorting equality that never matched a real page (block ids + materialized defaults differ), so the guard was dead and a web edit on a git-sync space was silently reverted within one poll cycle. Use the package's authoritative docsCanonicallyEqual (strips block id + normalizes KNOWN_DEFAULTS), wired through the git-sync loader like sanitizeTitle; delete the dead local canonicalize/canonicalJsonEqual. S2 (security): importPageMarkdown targeted a page by the vault-file id without a spaceId check (deletePage had one) — a space-A vault file carrying space-B's page id could resurrect/overwrite/clear B's page. Mirror deletePage's guard: skip when the loaded page lives in a different space than ctx.spaceId. G1 (jest green): add sanitizeTitle + docsCanonicallyEqual to the loadGitSync mock; update the converter-gate + package golden expectations to the genuinely-fixed output (paragraph textAlign now round-trips, multi-block table cells emit HTML tables); fix the orchestrator spec's stale mock so the per-space enabled gate (added later) is satisfied. A1: the converter dropped heading textAlign on export (bare '## text'); emit a styled when aligned, symmetric to paragraphs — round-trips losslessly (level + align), no churn for unaligned headings. F7 (docs): reword the false 'single choke point' title-strip comment; correct push.ts docstrings that still described the removed standalone-CLI/daemon model. Adds regression tests: the F5 acceptance test (canonically-equal content with real uuids => writePageBody NOT called), the S2 cross-space import guard, and the A1 heading round-trip. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../git-sync-converter-gate.spec.ts | 53 +++----- .../integrations/git-sync/git-sync.loader.ts | 2 + .../services/git-sync.orchestrator.spec.ts | 47 +++++-- .../gitmost-datasource.service.spec.ts | 126 ++++++++++++++++++ .../services/gitmost-datasource.service.ts | 83 ++++++------ packages/git-sync/src/engine/push.ts | 37 ++--- .../git-sync/src/lib/markdown-converter.ts | 15 +++ .../test/markdown-converter-gaps.test.ts | 82 +++++++++++- .../test/markdown-converter-golden.test.ts | 28 ++-- 9 files changed, 362 insertions(+), 111 deletions(-) diff --git a/apps/server/src/collaboration/git-sync-converter-gate.spec.ts b/apps/server/src/collaboration/git-sync-converter-gate.spec.ts index 0c928748..73a3a0b8 100644 --- a/apps/server/src/collaboration/git-sync-converter-gate.spec.ts +++ b/apps/server/src/collaboration/git-sync-converter-gate.spec.ts @@ -111,6 +111,17 @@ const CORPUS: Record = { para(text('Second paragraph.')), ), + // A non-default paragraph alignment now round-trips (item #7 fix): it exports + // as `

` and the schema's paragraph parseHTML + // reads `style="text-align"` back onto `textAlign` on import, so the alignment + // survives the full editor-ext write path. Promoted from the old KNOWN + // DIVERGENCE block (which only heading alignment still occupies). + 'aligned paragraph (textAlign center)': doc({ + type: 'paragraph', + attrs: { textAlign: 'center' }, + content: [text('centered')], + }), + 'inline marks (bold/italic/strike/code)': doc( para( text('normal '), @@ -446,40 +457,20 @@ describe('git-sync converter §13.1 image dimensions preserved (was KNOWN DIVERG }); // --------------------------------------------------------------------------- -// KNOWN DIVERGENCE — text alignment (item #7; isolated, not silently dropped). +// KNOWN DIVERGENCE — HEADING text alignment (item #7; isolated, not silently +// dropped). PARAGRAPH alignment now round-trips (exported as +// `

`, re-parsed by the paragraph parseHTML) and lives +// in the green CORPUS above; only HEADING alignment still diverges: // -// editor-ext registers TextAlign for heading+paragraph, and the SERVER schema -// fully supports it — the loss is intrinsic to the MARKDOWN transport: +// • A heading's `textAlign` is NOT exported at all — a heading emits plain +// markdown `## text` with no alignment syntax — so any non-default heading +// alignment is dropped on a full round trip. // -// • A paragraph's `textAlign` is EXPORTED as `

text
` -// (markdown-converter case "paragraph"), but on import the converter's -// docmost-schema declares `textAlign` WITHOUT a parseHTML mapping, so the -// `align` attribute is never recovered -> it imports as `textAlign:null` -// and canonicalizes away. A heading's alignment is not even exported. -// • Therefore any non-default alignment is dropped on a full round trip. -// -// If the converter is ever taught to parse `align`/`text-align` back onto the -// block, this assertion flips and an aligned-paragraph fixture should be -// promoted into the green CORPUS above. +// If the converter is ever taught to export + re-parse heading alignment, this +// assertion flips and an aligned-heading fixture should be promoted into the +// green CORPUS above. // --------------------------------------------------------------------------- -describe('git-sync converter §13.1 KNOWN DIVERGENCE (text alignment dropped)', () => { - it('drops a paragraph textAlign on the markdown round trip', async () => { - const alignedDoc = doc({ - type: 'paragraph', - attrs: { textAlign: 'center' }, - content: [text('centered')], - }); - - const { canonNormalized } = await runGate(alignedDoc); - - // The round-tripped paragraph carries no alignment. - expect(canonNormalized).toEqual({ - type: 'doc', - content: [{ type: 'paragraph', content: [{ type: 'text', text: 'centered' }] }], - }); - expect(docsCanonicallyEqual(alignedDoc, canonNormalized)).toBe(false); - }); - +describe('git-sync converter §13.1 KNOWN DIVERGENCE (heading text alignment dropped)', () => { it('drops a heading textAlign (headings do not export alignment at all)', async () => { const alignedHeading = doc({ type: 'heading', diff --git a/apps/server/src/integrations/git-sync/git-sync.loader.ts b/apps/server/src/integrations/git-sync/git-sync.loader.ts index bc025021..234a3c55 100644 --- a/apps/server/src/integrations/git-sync/git-sync.loader.ts +++ b/apps/server/src/integrations/git-sync/git-sync.loader.ts @@ -7,6 +7,7 @@ import type { parseDocmostMarkdown as parseDocmostMarkdownFn, markdownToProseMirror as markdownToProseMirrorFn, sanitizeTitle as sanitizeTitleFn, + docsCanonicallyEqual as docsCanonicallyEqualFn, } from '@docmost/git-sync'; /** @@ -22,6 +23,7 @@ interface GitSyncModule { parseDocmostMarkdown: typeof parseDocmostMarkdownFn; markdownToProseMirror: typeof markdownToProseMirrorFn; sanitizeTitle: typeof sanitizeTitleFn; + docsCanonicallyEqual: typeof docsCanonicallyEqualFn; } // The CJS->ESM dynamic-import bridge lives in one shared helper diff --git a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts index f9ce6e03..cd0d3fe6 100644 --- a/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts +++ b/apps/server/src/integrations/git-sync/services/git-sync.orchestrator.spec.ts @@ -56,10 +56,19 @@ interface BuildOptions { vaultOverrides?: Record; /** * The row `buildSettings` reads for the per-space `autoMergeConflicts` flag - * (`executeTakeFirst`). Default: the SAFE off value. Pass `undefined` to model - * a missing row (no space / no settings). + * (`executeTakeFirst` on the `autoMergeConflicts`-aliased query). Default: the + * SAFE off value. Pass `undefined` to model a missing row (no space / no + * settings). */ settingsRow?: { autoMergeConflicts: boolean } | undefined; + /** + * The per-space opt-in flag `runOnce` reads via `isSpaceGitSyncEnabled` + * (`executeTakeFirst` on the `enabled`-aliased query — a DIFFERENT query from + * the `autoMergeConflicts` read above; commit c838fdee added it). Default + * `true` so a build() space passes the per-space gate and the cycle proceeds. + * Set `false` to model a space that did NOT opt in (skipped:'space-not-enabled'). + */ + spaceEnabled?: boolean; } interface Built { @@ -83,6 +92,7 @@ function build(opts: BuildOptions = {}): Built { pollIntervalMs = 15000, debounceMs = 2000, vaultOverrides = {}, + spaceEnabled = true, } = opts; // Distinguish "key omitted" (default off row) from "key present but undefined" // (a deliberately MISSING settings row). @@ -138,15 +148,36 @@ function build(opts: BuildOptions = {}): Built { }; const redisService = { getOrThrow: jest.fn(() => redis) }; - // Chainable Kysely stub. `buildSettings` reads the space's - // `gitSync.autoMergeConflicts` flag via - // `selectFrom('spaces').select(...).where('id','=',id).executeTakeFirst()`; - // default it to the SAFE off value. `enabledSpaces` uses `.execute()`. + // Chainable Kysely stub. TWO distinct single-row queries run through the same + // builder and both end in `executeTakeFirst`, so the stub must tell them apart + // by the column ALIAS the real code selects (they are otherwise identical): + // - `isSpaceGitSyncEnabled` (per-space opt-in gate in `runOnce`, added by + // c838fdee): `.select(sql\`...->>'enabled' = 'true'\`.as('enabled'))` -> + // the code reads `row?.enabled`. Return `{ enabled: spaceEnabled }`. + // - `buildSettings`: `.select(sql\`...->>'autoMergeConflicts' = 'true'\` + // .as('autoMergeConflicts'))` -> the code reads `row?.autoMergeConflicts`. + // Return `settingsRow` (default SAFE off; `undefined` models a missing row). + // `enabledSpaces` uses `.select([...strings]).execute()` (no alias, no + // executeTakeFirst) and is stubbed/replaced in the tests that exercise it. const db = (() => { + let lastAlias: string | undefined; + const aliasOf = (sel: unknown): string | undefined => { + try { + const node = (sel as { toOperationNode?: () => any })?.toOperationNode?.(); + return node?.kind === 'AliasNode' ? node.alias?.name : undefined; + } catch { + return undefined; + } + }; const builder: any = { - select: () => builder, + select: (sel: unknown) => { + const alias = aliasOf(sel); + if (alias) lastAlias = alias; + return builder; + }, where: () => builder, - executeTakeFirst: async () => settingsRow, + executeTakeFirst: async () => + lastAlias === 'enabled' ? { enabled: spaceEnabled } : settingsRow, execute: async () => [], }; return { selectFrom: () => builder }; diff --git a/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.spec.ts b/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.spec.ts index 87838690..af8ec9c0 100644 --- a/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.spec.ts +++ b/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.spec.ts @@ -43,10 +43,26 @@ jest.mock('../git-sync.loader', () => ({ type: 'doc', content: [{ type: 'paragraph' }], }), + // renamePage funnels the current title through sanitizeTitle to detect the + // sanitized-stem echo; identity is the correct default here (none of the + // rename fixtures use filename-hostile chars, so the sanitized form equals + // the input and the guard never fires). + sanitizeTitle: (title: string) => title, + // importPageMarkdown guard #2 uses docsCanonicallyEqual to skip a no-op + // re-ingest. A key-order-insensitive JSON compare is a sufficient stand-in + // for the unit tests (the real semantic equality is covered by the + // @docmost/git-sync converter tests); returning false for genuinely + // different docs lets the write paths under test proceed. + docsCanonicallyEqual: (a: unknown, b: unknown) => + JSON.stringify(a) === JSON.stringify(b), })), })); import { GitmostDataSourceService } from './gitmost-datasource.service'; +// The loader is mocked above; this binding is the hoisted jest.fn, so a single +// test can swap the runtime bridge (e.g. a smarter `docsCanonicallyEqual`) via +// `mockResolvedValueOnce` without perturbing the default used by every other test. +import { loadGitSync } from '../git-sync.loader'; // Focused unit/contract test for the native GitSyncClient adapter. // No DB, no real collab server: the repos/services/gateway are mocked and we @@ -261,6 +277,79 @@ describe('GitmostDataSourceService', () => { expect(res.updatedAt).toBeUndefined(); }); + // F5 acceptance, criterion (b): guard #2 (docsCanonicallyEqual) must SKIP the + // re-ingest when the freshly-parsed doc is canonically equal to the page's + // current DB content even though the two are NOT byte-identical — the DB copy + // carries the real per-block uuids (comments anchor to them) while a fresh + // parse has none. Skipping is what protects a concurrent, not-yet-flushed + // human edit from being clobbered by an idempotent poll re-ingest. + // + // NON-VACUITY: `currentContent` here differs from `doc` in raw JSON (it has an + // `attrs.id` uuid `doc` lacks). The default mock `docsCanonicallyEqual` is a + // plain `JSON.stringify` compare — it would return FALSE for these inputs, so + // if guard #2 were removed (or reverted to a canonicalJsonEqual that does not + // strip ids) writePageBody WOULD be called and this test would fail. We model + // the package's authoritative id-stripping equality (returns TRUE here) by + // swapping in a `docsCanonicallyEqual` that normalizes away block ids, proving + // it is the SEMANTIC equality — not a byte compare — that suppresses the write. + it('does NOT call writePageBody (guard #2) when content is canonically equal despite differing block ids (F5 criterion b)', async () => { + const { service, mocks } = build(); + const realUuid = '11111111-1111-4111-8111-111111111111'; + // The DB row's content carries a REAL per-block uuid; a fresh parse does not. + // These two docs are canonically equal (id-only difference) but NOT + // byte-identical, so a naive JSON compare treats the page as "changed". + mocks.pageRepo.findById.mockResolvedValue({ + id: 'p1', + updatedAt: new Date('2026-06-20T11:00:00.000Z'), + content: { + type: 'doc', + content: [{ type: 'paragraph', attrs: { id: realUuid } }], + }, + }); + // Swap the runtime bridge for THIS call only: same passthrough parse/convert + // as the default mock (so `doc` is the id-less `{type:'doc',[paragraph]}`), + // but `docsCanonicallyEqual` strips block ids before comparing — the real + // converter's semantics. It returns TRUE for (doc, currentContent) here. + const stripIds = (node: any): any => { + if (Array.isArray(node)) return node.map(stripIds); + if (node && typeof node === 'object') { + const out: any = {}; + for (const [k, v] of Object.entries(node)) { + if (k === 'attrs' && v && typeof v === 'object') { + const { id: _id, ...rest } = v as Record; + if (Object.keys(rest).length) out.attrs = stripIds(rest); + } else { + out[k] = stripIds(v); + } + } + return out; + } + return node; + }; + (loadGitSync as jest.Mock).mockResolvedValueOnce({ + parseDocmostMarkdown: (md: string) => ({ meta: {}, body: md }), + markdownToProseMirror: async () => ({ + type: 'doc', + content: [{ type: 'paragraph' }], + }), + sanitizeTitle: (title: string) => title, + docsCanonicallyEqual: (a: unknown, b: unknown) => + JSON.stringify(stripIds(a)) === JSON.stringify(stripIds(b)), + }); + + // No baseMarkdown -> guard #1 (fullMarkdown === baseMarkdown) cannot fire, so + // the outcome is decided purely by guard #2. + const res = await service + .bind(CTX) + .importPageMarkdown('p1', '# Hello\n\nworld'); + + // Guard #2 fired: the redundant re-ingest is skipped, so the concurrent + // human edit in the live doc is NOT clobbered. + expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled(); + // The unchanged page's updatedAt is still surfaced from the DB row. + expect(res.updatedAt).toBe('2026-06-20T11:00:00.000Z'); + }); + // The 2-way path (no base) is covered above; this exercises the THREE-WAY // branch that only fires when a `baseMarkdown` is supplied (review #5). The // merge dispatch itself now lives in the collab handler (gitSyncWriteBody); @@ -290,6 +379,43 @@ describe('GitmostDataSourceService', () => { ); }); }); + + // The cross-space confused-deputy guard (review S2) fires only when + // ctx.spaceId is bound. A vault file in space A can carry space B's pageId; + // without this check the reconciling space could overwrite B's page body — a + // write it has no authority over. When the resolved page already lives in a + // DIFFERENT space, the import is SKIPPED (writePageBody not called) and the + // page's own updatedAt is returned unchanged. + describe('cross-space guard (ctx.spaceId bound, review S2)', () => { + it('does NOT call writePageBody when the target page lives in another space', async () => { + const { service, mocks } = build(); + // The resolved page is in space-2, but the reconciling context is space-1. + // Its content DIFFERS from the parsed doc, so guard #2 (docsCanonicallyEqual) + // cannot be what suppresses the write — proving NON-VACUITY: without the S2 + // guard the flow would reach writeBody and writePageBody WOULD be called. + mocks.pageRepo.findById.mockResolvedValue({ + id: 'p1', + deletedAt: null, + spaceId: 'space-2', + updatedAt: new Date('2026-06-21T09:00:00.000Z'), + content: { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'B body' }] }, + ], + }, + }); + + const res = await service + .bind(CTX_SPACE) + .importPageMarkdown('p1', '# Hello\n\nworld'); + + // The cross-space page is preserved: no body write happened. + expect(mocks.collabGateway.writePageBody).not.toHaveBeenCalled(); + // Early-return shape: only the page's own updatedAt is surfaced. + expect(res).toEqual({ updatedAt: '2026-06-21T09:00:00.000Z' }); + }); + }); }); describe('createPage', () => { diff --git a/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts b/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts index 867630dd..f7c8eb32 100644 --- a/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts +++ b/apps/server/src/integrations/git-sync/services/gitmost-datasource.service.ts @@ -46,34 +46,6 @@ const GIT_SYNC_PROVENANCE: AuthProvenanceData = { aiChatId: null, }; -/** - * Recursively serialize a JSON value with object keys sorted, so two values that - * differ ONLY in key order (or are otherwise structurally identical) compare - * equal. Arrays keep their order (order is meaningful in ProseMirror docs). - * Used to detect a semantically no-op body ingest despite an unstable - * markdown<->ProseMirror round-trip (see importPageMarkdown guard #2). - */ -function canonicalize(value: unknown): unknown { - if (Array.isArray(value)) return value.map(canonicalize); - if (value && typeof value === 'object') { - const out: Record = {}; - for (const key of Object.keys(value as Record).sort()) { - out[key] = canonicalize((value as Record)[key]); - } - return out; - } - return value; -} - -/** True iff `a` and `b` are equal ignoring object key order. */ -function canonicalJsonEqual(a: unknown, b: unknown): boolean { - try { - return JSON.stringify(canonicalize(a)) === JSON.stringify(canonicalize(b)); - } catch { - return false; - } -} - /** * Native, in-process implementation of the engine's `GitSyncClient` seam * Reads go through repositories (PageRepo/SpaceRepo); body writes go @@ -222,6 +194,28 @@ export class GitmostDataSourceService { const currentPage = await this.pageRepo.findById(pageId, { includeContent: true, }); + // Cross-space confused-deputy guard (review S2). The target `pageId` comes + // from THIS space's vault file frontmatter, but a file in space A could carry + // space B's page id. Without this check that file could resurrect (via + // restorePage), overwrite the body (writeBody), or clear the content of B's + // page — a cross-space write the reconciling space has no authority over. + // Mirror deletePage's guard (same `ctx.spaceId` source, same fail-safe + // direction): when the reconciling space is known and the resolved page + // already lives in a DIFFERENT space, skip — touch nothing. Only applies when + // the page exists; a not-found page (no currentPage) still proceeds as before + // so a legitimate same-space ingest is unaffected. + if ( + ctx.spaceId && + currentPage && + currentPage.spaceId !== ctx.spaceId + ) { + this.logger.log( + `git-sync[${ctx.spaceId}] skip import of page ${pageId}: page lives in space ${currentPage.spaceId} (cross-space vault reference; page preserved)`, + ); + return { + updatedAt: new Date(currentPage.updatedAt).toISOString(), + }; + } // Revert-of-delete undelete (review warning). If the target page is currently // SOFT-DELETED, this ingest is a git-revert that re-added the page's file // (the push classifier saw an add carrying a known pageId -> UPDATE). Writing @@ -246,19 +240,26 @@ export class GitmostDataSourceService { }; } - const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync(); + const { parseDocmostMarkdown, markdownToProseMirror, docsCanonicallyEqual } = + await loadGitSync(); const { body } = parseDocmostMarkdown(fullMarkdown); const doc = await markdownToProseMirror(body); // Idempotency guard #2 (defense-in-depth). Even when the vault file text // differs cosmetically, the PARSED body can be SEMANTICALLY identical to the // page's current Docmost content — the markdown<->ProseMirror round-trip is - // not byte-stable (e.g. JSON key order `{text,type}` vs `{type,text}`, - // default attrs), so upstream change-detection mis-flags such pages as - // changed every cycle. Compare by CANONICAL JSON (recursively key-sorted); if - // the incoming body already equals current content, this ingest is a no-op — - // skip it so a concurrent live edit is never clobbered and the vault never - // churns. A genuine content change is not canonically equal, so it proceeds. + // not byte-stable, so upstream change-detection mis-flags such pages as + // changed every cycle. But the divergence is NOT just JSON key order: a fresh + // `markdownToProseMirror(doc)` carries new/null block ids and materialized + // schema default attrs, whereas `currentContent` (from the DB) carries the + // real per-block uuids (to which comments are anchored) and KNOWN_DEFAULTS. + // A key-order-only compare therefore NEVER matches a real collab page, so use + // the package's authoritative `docsCanonicallyEqual` — the same equality the + // converter's round-trip losslessness tests use, which strips block ids and + // normalizes KNOWN_DEFAULTS. If the incoming body already equals current + // content, this ingest is a no-op — skip it so a concurrent live edit is + // never clobbered and the vault never churns. A genuine content change is not + // canonically equal, so it proceeds. const currentContent = typeof currentPage?.content === 'string' ? (() => { @@ -269,7 +270,7 @@ export class GitmostDataSourceService { } })() : currentPage?.content; - if (currentContent && canonicalJsonEqual(doc, currentContent)) { + if (currentContent && docsCanonicallyEqual(doc, currentContent)) { return { updatedAt: new Date(currentPage!.updatedAt).toISOString(), }; @@ -447,10 +448,12 @@ export class GitmostDataSourceService { // LOCAL filesystem artifact and must NEVER become the page's real Docmost // title. A filename-derived title can carry it back in on ingest (observed: // intermittent same-title collision left a page permanently titled - // "Title ~"). Strip it at this single choke point every git-sync - // title write funnels through — but ONLY when the trailing token equals THIS - // page's own slugId, so a genuine user title that legitimately ends in - // ` ~token` is never corrupted (slugId is a random nanoid; no real collision). + // "Title ~"). Strip it here on the RENAME path (this is where a + // filename-derived title lands as a page's real title); other title-write + // paths (e.g. createPage / importPageMarkdown) are separate and not covered + // by this choke point. Strip ONLY when the trailing token equals THIS page's + // own slugId, so a genuine user title that legitimately ends in ` ~token` is + // never corrupted (slugId is a random nanoid; no real collision). const suffix = ` ~${page.slugId}`; const cleanTitle = page.slugId && title.endsWith(suffix) diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts index 7a1db5bd..7bb82fee 100644 --- a/packages/git-sync/src/engine/push.ts +++ b/packages/git-sync/src/engine/push.ts @@ -1373,11 +1373,12 @@ function extractUpdatedAt(result: unknown): { updatedAt?: string } { // --- runnable push orchestration (`runPush`) --------------------------------- // -// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit +// `runPush` is the FS->Docmost twin of the pull direction: it wires the VaultGit // diff/ref primitives + the PURE `computePushActions` planner + the THIN -// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the -// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO -// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that +// `applyPushActions` applier into one runnable cycle. It is driven IN-PROCESS by +// the NestJS server (there is no standalone CLI). SAFE BY DEFAULT — a cycle +// defaults to DRY-RUN (plan only, NO Docmost writes, NO ref advance) unless the +// caller passes `opts.dryRun === false`; that apply path is the ONLY one that // builds a client and mutates Docmost. // // Every external effect is injected (`PushDeps`): production wires the live @@ -1388,7 +1389,7 @@ function extractUpdatedAt(result: unknown): { updatedAt?: string } { * push direction (SPEC §7.3). The provenance is carried by the trailer (below), * which the loop-guard keys on; the identity is for history readability only. * When the vault repo already has a configured `user.name`/`user.email`, git - * uses that for the working-tree commit; this is the fallback the daemon stamps. + * uses that for the working-tree commit; this is the fallback the engine stamps. */ export const LOCAL_AUTHOR_NAME = "Local"; export const LOCAL_AUTHOR_EMAIL = "local@local"; @@ -1400,7 +1401,7 @@ export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local"; * Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that * touches the outside world is here so tests pass fakes). `makeClient` is a * FACTORY, not a client — a dry-run must build NO client at all (it is never - * called), and only `--apply` invokes it. + * called), and only the apply path (`opts.dryRun === false`) invokes it. */ export interface PushDeps { settings: Settings; @@ -1420,7 +1421,7 @@ export interface PushDeps { | "fastForwardBranch" | "listTrackedFiles" >; - /** Build a real client — called ONLY on `--apply`, never on dry-run. */ + /** Build a real client — called ONLY on the apply path, never on dry-run. */ makeClient: (settings: Settings) => ApplyPushDeps["client"]; /** Read a file's full text by its vault-relative (forward-slash) path. */ readFile: (path: string) => Promise; @@ -1448,12 +1449,12 @@ export interface PushRunResult { renamesMoves: number; skipped: number; }; - /** The applier's structured result — ONLY present on the `--apply` path. */ + /** The applier's structured result — ONLY present on the apply path. */ applied?: ApplyPushResult; /** * True when `applyPushActions` REFUSED to fast-forward a divergent `docmost` - * mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded - * into the CLI's non-zero exit. + * mirror (SPEC §5 invariant broken). Logged prominently and surfaced on this + * result so the caller (the server orchestrator) escalates it. */ divergentDocmost?: boolean; /** Per-page failures from the applier (empty/absent on a clean run). */ @@ -1461,11 +1462,13 @@ export interface PushRunResult { } /** - * Run one FS->Docmost push cycle (SPEC §6 "FS -> Docmost"), DRY-RUN BY DEFAULT. + * Run one FS->Docmost push cycle (SPEC §6 "FS -> Docmost"), DRY-RUN BY DEFAULT + * (the apply path runs only when `opts.dryRun === false`). * * Steps (mirrors `pull.ts`): * 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message + - * non-zero-ish result) if a merge is in progress — never push on top of an + * an `aborted: "merge-in-progress"` result) if a merge is in progress — + * never push on top of an * unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach * Docmost (SPEC §9). * 2. Checkout `main` (the human-facing branch the push reads from). @@ -1478,12 +1481,12 @@ export interface PushRunResult { * the PURE `computePushActions`. * 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost * calls, NO ref advance. - * 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`, + * 7. Apply path (`opts.dryRun === false`): build the client, run `applyPushActions(..., pushedCommit=main)`, * then (a) if any pageIds were written back (creates), commit them on `main` * with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the * new commit so the recorded pageIds are persisted in what Docmost mirrors; * (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent - * WARNING and a non-zero-ish flag. Then log a one-line summary. + * WARNING and the `divergentDocmost` result flag. Then log a one-line summary. */ export async function runPush( deps: PushDeps, @@ -1492,8 +1495,8 @@ export async function runPush( const { git, settings, log } = deps; const dryRun = opts.dryRun; - // 1. Preflight git. Fail fast (actionable message via main().catch) if the git - // binary is missing — the vault state store relies on it. + // 1. Preflight git. Fail fast (the thrown error propagates to the caller) if + // the git binary is missing — the vault state store relies on it. await git.assertGitAvailable(); await git.ensureRepo(); @@ -1622,7 +1625,7 @@ export async function runPush( return { mode: "dry-run", base, pushedCommit, planned }; } - // 7. --apply: build the REAL client and execute. This is the ONLY write path. + // 7. Apply path: build the REAL client and execute. This is the ONLY write path. const client = deps.makeClient(settings); const applied = await applyPushActions( { diff --git a/packages/git-sync/src/lib/markdown-converter.ts b/packages/git-sync/src/lib/markdown-converter.ts index 72506de6..c8227cbf 100644 --- a/packages/git-sync/src/lib/markdown-converter.ts +++ b/packages/git-sync/src/lib/markdown-converter.ts @@ -154,6 +154,21 @@ export function convertProseMirrorToMarkdown(content: any): string { case "heading": const level = node.attrs?.level || 1; const headingText = nodeContent.map(processNode).join(""); + const headingAlign = node.attrs?.textAlign; + if (headingAlign && headingAlign !== "left") { + // Emit alignment as a styled `` so it round-trips losslessly, + // symmetric to the paragraph case above (review F5/A1). The bare + // `## text` markdown form carries NO alignment, so an aligned heading + // would silently drop textAlign on export. A styled `` re-parses: + // the heading parse rule (tag:"h1".."h6") matches and the textAlign + // global-attribute parseHTML (docmost-schema) reads the style back, + // preserving BOTH level and textAlign. escapeAttr keeps the align + // value injection-safe, exactly like the paragraph arm. + return `${headingText}`; + } + // No alignment (or the default "left"): keep the plain `## text` + // markdown form — HTML-ifying an unaligned heading would be needless + // churn, exactly as the paragraph case keeps plain text when unaligned. return "#".repeat(level) + " " + headingText; case "text": diff --git a/packages/git-sync/test/markdown-converter-gaps.test.ts b/packages/git-sync/test/markdown-converter-gaps.test.ts index f08684ce..47ab05bc 100644 --- a/packages/git-sync/test/markdown-converter-gaps.test.ts +++ b/packages/git-sync/test/markdown-converter-gaps.test.ts @@ -256,9 +256,11 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => { expect(out).toBe('> ```js\n> a\n> b\n> ```'); }); - // 4. A GFM body cell with TWO block children (paragraph + bulletList): joined - // by a space, the list's newline collapsed so the row stays intact. - it('a GFM body cell with paragraph+list joins them by a space (no "p1- a")', () => { + // 4. A body cell with TWO block children (paragraph + bulletList) cannot be a + // GFM pipe row (inline-only). #8 emits the WHOLE table as HTML so + // the paragraph and the list each survive as their own block instead of + // being lossily flattened into one "p1 - a" pipe cell. + it('a table cell with paragraph+list emits an HTML
(blocks preserved)', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'table', @@ -285,7 +287,9 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => { ], }), ); - expect(out).toBe('| h |\n| --- |\n| p1 - a |'); + expect(out).toBe( + '

h

p1

  • a

', + ); }); // 5. code + link co-occur: the schema's `code` mark excludes all other marks @@ -391,8 +395,11 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => { expect(out).toBe('> - x\n> - y'); }); - // 11. GFM (non-spanned) cell: multi-block space-join + pipe-escape + newline-collapse. - it('a GFM cell escapes a literal pipe and collapses newlines across two paragraphs', () => { + // 11. A non-spanned cell with TWO block paragraphs: #8 emits the whole table + // as HTML , so each paragraph stays its own

and the literal + // pipe needs no escaping inside HTML text (the old GFM path space-joined + // the blocks into one line and escaped the pipe to \|). + it('a table cell with two paragraphs emits an HTML

(blocks kept, no pipe-escape)', () => { const out = convertProseMirrorToMarkdown( doc({ type: 'table', @@ -413,7 +420,9 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => { ], }), ); - expect(out).toBe('| h |\n| --- |\n| a\\|b c |'); + expect(out).toBe( + '

h

a|b

c

', + ); }); }); @@ -775,3 +784,62 @@ describe('converter gap coverage — raw-HTML container round-trips (specs 15– expect(md1).not.toContain('$x_i$'); }); }); + +// =========================================================================== +// 30. heading.textAlign round-trip (A1). The paragraph case already exports a +// non-default alignment as a styled `

` that re-parses +// losslessly; headings used to emit only the bare `## text` form, silently +// DROPPING textAlign on export. The heading case is now symmetric: an aligned +// heading exports as `` and re-parses back to a heading +// carrying BOTH the level and the textAlign, so the round-trip is lossless; an +// UNaligned heading still emits the bare `## text` markdown form (no churn). +// =========================================================================== +const alignedHeading = (level: number, align: string, ...inline: any[]) => ({ + type: 'heading', + attrs: { level, textAlign: align }, + content: inline, +}); + +describe('heading.textAlign round-trip (A1)', () => { + it('an aligned heading exports as (not bare ##)', () => { + expect(convertProseMirrorToMarkdown(doc(alignedHeading(2, 'center', text('Title'))))).toBe( + '

Title

', + ); + }); + + it('survives export -> import -> export losslessly (level AND textAlign preserved)', async () => { + const input = alignedHeading(2, 'center', text('Title')); + const { md1, doc2, md2 } = await roundTrip(input); + // Export direction: a styled , injection-safe via escapeAttr. + expect(md1).toBe('

Title

'); + // Import direction: re-parses to a heading node with the level AND textAlign + // (the raw HTML block flows through marked -> generateJSON, where + // the heading parse rule matches and the textAlign global attr reads the + // style back). Byte-stable second export closes the loop. + const h = doc2.content[0]; + expect(h.type).toBe('heading'); + expect(h.attrs.level).toBe(2); + expect(h.attrs.textAlign).toBe('center'); + expect(md2).toBe(md1); + // Canonical equality of the re-parsed doc against the original input doc. + expect(docsCanonicallyEqual(doc2, doc(input))).toBe(true); + }); + + it('a right-aligned h3 round-trips its level and alignment', async () => { + const { doc2 } = await roundTrip(alignedHeading(3, 'right', text('Head'))); + const h = doc2.content[0]; + expect(h.type).toBe('heading'); + expect(h.attrs.level).toBe(3); + expect(h.attrs.textAlign).toBe('right'); + }); + + it('an UNaligned heading still emits the bare "## text" form (no HTML churn)', () => { + const bare = convertProseMirrorToMarkdown(doc(heading(2, text('Plain')))); + expect(bare).toBe('## Plain'); + expect(bare).not.toContain('
', () => { - it('non-default alignment wraps the paragraph in
', () => { +describe('paragraph.textAlign ->

', () => { + it('non-default alignment emits an HTML

', () => { + // #7 fix: a non-default paragraph alignment now round-trips. It is exported + // as an HTML `

` (the schema's paragraph + // parseHTML reads `style="text-align"` back onto `textAlign` on import), so + // the alignment survives instead of collapsing to bare text. (The old + // `

` form was NOT re-parsed onto the paragraph and was + // therefore lossy.) expect(c({ type: 'paragraph', attrs: { textAlign: 'center' }, content: [text('x')] })).toBe( - '
x
', + '

x

', ); }); @@ -190,10 +196,14 @@ describe('escaping idempotence (SPEC §11 phantom-diff guard)', () => { }); }); -describe('table-cell sanitization (| and newline must not corrupt the GFM row)', () => { - it('escapes a literal pipe and collapses an inter-block newline in a cell', () => { - // A cell with a pipe in one paragraph and a second block paragraph: the pipe - // is escaped to \| and the block join (a space) keeps the row intact. +describe('multi-block table cell -> HTML (#8: GFM pipes cannot hold block content)', () => { + it('emits the whole table as HTML
so a multi-paragraph cell survives', () => { + // A cell holding TWO block paragraphs cannot be represented by a GFM pipe + // row (one inline line only) — the old GFM path collapsed the two blocks + // into one line ("a\|b c"), losing the block boundary and forcing a fragile + // pipe-escape. #8 emits the WHOLE table as raw HTML
instead: the + // schema's table-family parseHTML round-trips it, each paragraph stays its + // own

, and the literal pipe needs no escaping inside HTML text. const out = c({ type: 'table', content: [ @@ -205,7 +215,9 @@ describe('table-cell sanitization (| and newline must not corrupt the GFM row)', ]}, ], }); - expect(out).toBe('| H |\n| --- |\n| a\\|b c |'); + expect(out).toBe( + '

H

a|b

c

', + ); }); });