refactor(git-sync): internalize the engine — first-class ESM, no vendoring bridge (#119 review)

Closes the architecture item from the #119 review: drop the "vendored from
docmost-sync" framing and the CJS↔ESM `Function('import()')` bridge so the engine
is a normal first-class gitmost package.

Part 1 — vendoring markers removed (prose only, zero behavior change): reworded
"VENDORED into gitmost" / "vendored from docmost-sync" / "Engine LOGIC is
byte-identical" / "it's a port" comments across the engine. Behavior-bearing
strings are untouched: BOT_AUTHOR_NAME/EMAIL and the `Docmost-Sync-Source:`
provenance trailers (changing them would break git authorship + the loop-guard).

Part 2 — the package is now ESM (matching the sibling @docmost/mcp): `type: module`,
tsconfig Node16, `.js` extensions on relative imports, and a static
`import { marked }` replacing the `new Function('return import(...)')` /
`loadMarked` hack — the bridge is GONE from the package. The CommonJS NestJS
server loads the now-ESM engine via a new `git-sync.loader.ts` that mirrors the
existing `docmost-client.loader.ts` mcp loader exactly (Function-indirected
dynamic import + cached promise + retry-on-reject). The 4 server consumers
(orchestrator/datasource/vault-registry/git-http-backend) call `await loadGitSync()`
for value exports; types stay `import type` (erased). The converter-gate spec —
which needs the real converter — loads the package's TS source via a jest
moduleNameMapper + isolatedModules (documented in that spec); the other git-sync
specs mock the loader.

Verified: engine builds pure ESM (no Function/require leftover), vitest 614,
editor-ext build, server + client tsc, full server jest 1397/0. Live stand
smoke-test: server starts clean on the ESM engine (no ERR_REQUIRE_ESM), a real
sync cycle runs through the loader, and the basic e2e suite is 12/12 (clone via
git-http-backend, push, pull, delete, 3-way merge — all through the new loader).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-24 14:23:40 +03:00
parent 3a91e0eca9
commit 5da12e89f9
29 changed files with 270 additions and 205 deletions

View File

@@ -1,9 +1,7 @@
/**
* docmost-sync ADDITION (not present in docmost-mcp).
*
* Semantic canonicalization of ProseMirror/TipTap documents for the Phase-0
* round-trip idempotency check (SPEC §11, "Задача №0", option (б): compare a
* CANONICALIZED form rather than raw bytes).
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
* form rather than raw bytes).
*
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
* `indent: null` where the source omitted it) and regenerates per-block ids on
@@ -12,8 +10,7 @@
* normalizes a document so that two semantically-equal docs compare deep-equal
* regardless of block ids and absent-vs-explicit-default-null attributes.
*
* This file is intentionally a NEW, self-contained module so it is trivial to
* backport into docmost-mcp without touching existing code.
* It is a self-contained module with no external dependencies.
*/
/**

View File

@@ -21,7 +21,7 @@ import { getSchema } from "@tiptap/core";
import { Node } from "@tiptap/pm/model";
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
import { docmostExtensions } from "./docmost-schema";
import { docmostExtensions } from "./docmost-schema.js";
/** A single inserted/deleted change with its containing-block context. */
export interface DiffChange {

View File

@@ -1,28 +1,26 @@
/**
* Public surface of the vendored pure converter (the `lib/` half of the
* docmost-sync `docmost-client` package). This barrel re-exports only the
* Public surface of the pure converter (`lib/`). This barrel re-exports the
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
* markdown -> ProseMirror import path, and semantic canonicalization for the
* round-trip idempotency check (SPEC §11).
*
* The REST client, websocket/collab write-path, auth-utils and page-lock from
* the upstream package are deliberately NOT vendored (the gitmost server writes
* natively).
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
* here — the gitmost server writes natively.
*/
export {
serializeDocmostMarkdown,
parseDocmostMarkdown,
serializeDocmostMarkdownBody,
} from "./markdown-document";
export type { DocmostMdMeta } from "./markdown-document";
} from "./markdown-document.js";
export type { DocmostMdMeta } from "./markdown-document.js";
export { convertProseMirrorToMarkdown } from "./markdown-converter";
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
export { markdownToProseMirror } from "./markdown-to-prosemirror";
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
export {
canonicalizeContent,
docsCanonicallyEqual,
} from "./canonicalize";
export { parsePageFile, serializePageFile } from "./page-file";
} from "./canonicalize.js";
export { parsePageFile, serializePageFile } from "./page-file.js";

View File

@@ -135,11 +135,9 @@ export function parseDocmostMarkdown(full: string): {
return { meta, body, comments };
}
// --- docmost-sync addition (backport target: docmost-mcp/src/lib/markdown-document.ts) ---
/**
* Serialize a self-contained markdown file with the meta block + body ONLY —
* NO trailing `docmost:comments` block. The docmost-sync engine never touches
* NO trailing `docmost:comments` block. The sync engine never touches
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
* the body, where comment threads survive only as inline `<span
* data-comment-id>` anchor marks inside the body.

View File

@@ -1,67 +1,16 @@
/**
* Pure markdown -> ProseMirror conversion (extracted from docmost-sync's
* `packages/docmost-client/src/lib/collaboration.ts`).
* Pure markdown -> ProseMirror conversion.
*
* Only the PURE converter path is vendored here: `markdownToProseMirror`
* (marked -> HTML -> generateJSON) plus the two pre/post processors it needs
* (`preprocessCallouts`, `bridgeTaskLists`). The collaboration/websocket
* write-path (Hocuspocus, Yjs, `ws`, `withPageLock`, `sanitizeForYjs`) that
* lives in the same upstream file is intentionally NOT vendored — the gitmost
* server writes page bodies natively through the collab gateway.
* The converter path is `markdownToProseMirror` (marked -> HTML ->
* generateJSON) plus the two pre/post processors it needs (`preprocessCallouts`,
* `bridgeTaskLists`). The gitmost server writes the resulting page bodies
* natively through the collab gateway, so no websocket/Yjs write-path lives
* here.
*/
import { generateJSON } from "@tiptap/html";
import { JSDOM } from "jsdom";
import { docmostExtensions } from "./docmost-schema";
/**
* Structural type for the bits of the `marked` ESM module we use: just the
* `marked` named export's `parse` method (markdown -> HTML string).
*/
interface MarkedModule {
marked: { parse(markdown: string): string | Promise<string> };
}
// `marked` is ESM-only. Under this package's CommonJS build TS would otherwise
// downlevel a literal `import()` to `require()`, which cannot load an ESM-only
// module. Indirect through `Function` so the real dynamic `import()` survives
// compilation and loads ESM from CommonJS at runtime in Node (same trick as
// apps/server/src/core/ai-chat/tools/docmost-client.loader.ts).
const esmImport = new Function(
"specifier",
"return import(specifier)",
) as (specifier: string) => Promise<unknown>;
// Memoize the in-flight/loaded module so the dynamic import runs at most once.
let markedPromise: Promise<MarkedModule> | null = null;
/**
* Lazily load the ESM-only `marked` module (cached).
*
* In the built CommonJS package (Node, jest with ts-jest) the `esmImport`
* Function trick performs a real dynamic `import()` of the ESM module. Under
* vitest, however, the transformed module is evaluated without a dynamic-import
* callback, so `new Function('return import(...)')` throws "A dynamic import
* callback was not specified"; there `require('marked')` succeeds because the
* test runner's loader interops ESM. We therefore try the Function import first
* and fall back to `require` so BOTH runtimes resolve `marked` transparently.
*/
async function loadMarked(): Promise<MarkedModule["marked"]> {
if (!markedPromise) {
markedPromise = (esmImport("marked") as Promise<MarkedModule>)
.catch(() => {
// Function-trick import is unavailable (e.g. under vitest's evaluator):
// fall back to require, which the test runner can interop for ESM.
// eslint-disable-next-line @typescript-eslint/no-var-requires
return require("marked") as MarkedModule;
})
.catch((err) => {
// Do not cache a rejected import — allow the next call to retry.
markedPromise = null;
throw err;
});
}
return (await markedPromise).marked;
}
import { marked } from "marked";
import { docmostExtensions } from "./docmost-schema.js";
// Setup DOM environment for Tiptap HTML parsing in Node.js
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
@@ -110,8 +59,6 @@ async function preprocessCallouts(markdown: string): Promise<string> {
return markdown;
}
const marked = await loadMarked();
// Recursively transform a slice of lines, converting top-level callouts in
// that slice into <div> blocks and rendering their inner content (which may
// itself contain nested callouts) through this same function.
@@ -379,7 +326,6 @@ function stripEmptyParagraphs(node: any): any {
export async function markdownToProseMirror(
markdownContent: string,
): Promise<any> {
const marked = await loadMarked();
const withCallouts = await preprocessCallouts(markdownContent);
const html = await marked.parse(withCallouts);
const bridged = bridgeTaskLists(html);