feat(git-sync): CommonJS build + §13.1 editor-ext idempotency gate (Phase A.2)

Make @docmost/git-sync natively consumable by the CommonJS server (and jest):
build to CommonJS (tsconfig module CommonJS, drop type:module, strip .js from
relative imports), and lazy-load the only ESM-only dep (marked) via the dynamic
Function('import()') trick (mirrors docmost-client.loader.ts) with a require()
fallback so vitest's evaluator works too. git-sync tests stay green (314 pass,
3 expected fail).

Add the §13.1 idempotency gate (apps/server .../git-sync-converter-gate.spec.ts):
13 editor-ext docs (paragraphs/headings, marks, links, bullet/ordered/task lists,
blockquote, callouts, code block, hr, table, nested mix) round-trip
content(editor-ext) -> convertProseMirrorToMarkdown -> markdownToProseMirror ->
TiptapTransformer.toYdoc/fromYdoc(tiptapExtensions) -> canonicalize and assert
docsCanonicallyEqual. All green => the vendored converter's docmost-schema is
schema-compatible with editor-ext (no node/mark/attr loss), which the plan §13.1
requires before Phase B. The one intrinsic markdown-image lossiness (width/height
/align can't ride plain ![](src)) is isolated in a KNOWN DIVERGENCE block, not
hidden. Server tsc clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-21 14:25:43 +03:00
parent 87e023b755
commit 86f02927df
15 changed files with 548 additions and 108 deletions

View File

@@ -0,0 +1,349 @@
/**
* §13.1 IDEMPOTENCY GATE — the blocking gate for git-sync Phase B.
*
* Proves the vendored `@docmost/git-sync` pure converter is schema-compatible
* with the server's REAL editor-ext document schema: a representative corpus of
* editor-ext ProseMirror documents must survive a full round trip through the
* actual server write path without losing any node / mark / attribute.
*
* Pipeline per document (plan §13.1):
* 1. md = convertProseMirrorToMarkdown(content) // git-sync export
* 2. doc = await markdownToProseMirror(md) // git-sync import
* 3. push `doc` through the REAL editor-ext Yjs write path the server uses:
* ydoc = TiptapTransformer.toYdoc(doc, 'default', tiptapExtensions)
* normalized = TiptapTransformer.fromYdoc(ydoc, 'default')
* This is exactly what PersistenceExtension does on store
* (apps/server/src/collaboration/extensions/persistence.extension.ts:96/115)
* with the same `tiptapExtensions` (collaboration.util.ts) and the same
* `@hocuspocus/transformer`, so the gate exercises the real schema
* validation that runs on a git-sync write (plan §3.3).
* 4. assert docsCanonicallyEqual(canon(original), canon(normalized)) === true
*
* Any node / mark / attr that editor-ext drops (because the vendored
* docmost-schema named it differently, or declares a different default) makes
* the gate FAIL for that document — exactly the schema-divergence plan §3.3 /
* §13.1 warn about. Genuine, irreducible divergences are isolated into the
* clearly-named `KNOWN DIVERGENCE` block at the bottom (never silently hidden).
*
* Requires the workspace packages built first:
* pnpm --filter @docmost/editor-ext build
* pnpm --filter @docmost/git-sync build
*/
import { TiptapTransformer } from '@hocuspocus/transformer';
// Import the server's real schema FIRST so `@docmost/editor-ext` resolves to its
// built CJS `dist` (its `main`). Importing the ESM `@docmost/git-sync` package
// first flips jest's resolver to editor-ext's `module` (src) field, which then
// drags in React node views (navigator-less) and breaks the node test env.
import { tiptapExtensions } from './collaboration.util';
import {
convertProseMirrorToMarkdown,
markdownToProseMirror,
canonicalizeContent,
docsCanonicallyEqual,
} from '@docmost/git-sync';
/**
* Run a single editor-ext document through the full gate pipeline and return
* the canonical original vs the canonical doc as it lands after the real Yjs
* write path, plus the intermediate markdown for diagnostics.
*/
async function runGate(original: any): Promise<{
md: string;
imported: any;
normalized: any;
canonOriginal: any;
canonNormalized: any;
}> {
// 1) editor-ext JSON -> markdown (git-sync export).
const md = convertProseMirrorToMarkdown(original);
// 2) markdown -> ProseMirror JSON (git-sync import, docmost-schema).
const imported = await markdownToProseMirror(md);
// 3) push through the REAL editor-ext schema via the server's Yjs write path.
// toYdoc validates `imported` against tiptapExtensions (throws on an
// unknown node, drops unknown attrs); fromYdoc reads it back as the
// normalized editor-ext JSON the server would persist.
const ydoc = TiptapTransformer.toYdoc(imported, 'default', tiptapExtensions);
const normalized = TiptapTransformer.fromYdoc(ydoc, 'default');
return {
md,
imported,
normalized,
canonOriginal: canonicalizeContent(original),
canonNormalized: canonicalizeContent(normalized),
};
}
const doc = (...content: any[]) => ({ type: 'doc', content });
const text = (t: string, marks?: any[]) =>
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
const para = (...content: any[]) => ({ type: 'paragraph', content });
// ---------------------------------------------------------------------------
// Corpus: editor-ext ProseMirror documents covering the common node/mark types.
// Node / mark / attr names and DEFAULTS are taken from the real schema —
// editor-ext (packages/editor-ext/src) + the server's tiptapExtensions
// (collaboration.util.ts) — NOT guessed. Where editor-ext materializes a
// non-null default on import (e.g. image.align="center", callout.type, list
// start) the fixture pre-authors that materialized value so the round trip is
// already at its fixpoint (matches how the engine normalizes-on-write, SPEC §11).
// ---------------------------------------------------------------------------
const CORPUS: Record<string, any> = {
'paragraphs + headings (h1-h3)': doc(
{ type: 'heading', attrs: { level: 1 }, content: [text('Heading one')] },
{ type: 'heading', attrs: { level: 2 }, content: [text('Heading two')] },
{ type: 'heading', attrs: { level: 3 }, content: [text('Heading three')] },
para(text('A plain paragraph of text.')),
para(text('Second paragraph.')),
),
'inline marks (bold/italic/strike/code)': doc(
para(
text('normal '),
text('bold', [{ type: 'bold' }]),
text(' '),
text('italic', [{ type: 'italic' }]),
text(' '),
text('struck', [{ type: 'strike' }]),
text(' '),
text('code', [{ type: 'code' }]),
),
),
'links': doc(
para(
text('see '),
text('the site', [
{ type: 'link', attrs: { href: 'https://example.com' } },
]),
text(' for more'),
),
),
'bullet list': doc({
type: 'bulletList',
content: [
{ type: 'listItem', content: [para(text('first'))] },
{ type: 'listItem', content: [para(text('second'))] },
{ type: 'listItem', content: [para(text('third'))] },
],
}),
'ordered list': doc({
type: 'orderedList',
attrs: { start: 1 },
content: [
{ type: 'listItem', content: [para(text('one'))] },
{ type: 'listItem', content: [para(text('two'))] },
],
}),
'task list (checkbox)': doc({
type: 'taskList',
content: [
{
type: 'taskItem',
attrs: { checked: true },
content: [para(text('done item'))],
},
{
type: 'taskItem',
attrs: { checked: false },
content: [para(text('todo item'))],
},
],
}),
'blockquote': doc({
type: 'blockquote',
content: [para(text('a quoted line')), para(text('second quoted line'))],
}),
'callout (info)': doc({
type: 'callout',
attrs: { type: 'info' },
content: [para(text('an informational callout'))],
}),
'callout (warning)': doc({
type: 'callout',
attrs: { type: 'warning' },
content: [para(text('a warning callout'))],
}),
'code block (with language)': doc({
type: 'codeBlock',
attrs: { language: 'typescript' },
// A fenced code block's body is stored with a trailing newline (the form a
// markdown ``` fence round-trips to: marked normalizes the code text to end
// in "\n"). Authoring the fixture at that fixpoint mirrors how the engine
// normalizes-on-write (SPEC §11): codeBlock + `language` round-trip exactly.
content: [text('const a: number = 1;\nconsole.log(a);\n')],
}),
'horizontal rule': doc(
para(text('before')),
{ type: 'horizontalRule' },
para(text('after')),
),
'table (header row + cells)': doc({
type: 'table',
content: [
{
type: 'tableRow',
content: [
{
type: 'tableHeader',
attrs: { colspan: 1, rowspan: 1, colwidth: null },
content: [para(text('Name'))],
},
{
type: 'tableHeader',
attrs: { colspan: 1, rowspan: 1, colwidth: null },
content: [para(text('Value'))],
},
],
},
{
type: 'tableRow',
content: [
{
type: 'tableCell',
attrs: { colspan: 1, rowspan: 1, colwidth: null },
content: [para(text('alpha'))],
},
{
type: 'tableCell',
attrs: { colspan: 1, rowspan: 1, colwidth: null },
content: [para(text('1'))],
},
],
},
],
}),
'nested / mixed document': doc(
{ type: 'heading', attrs: { level: 1 }, content: [text('Mixed')] },
para(
text('intro with '),
text('bold', [{ type: 'bold' }]),
text(' and a '),
text('link', [{ type: 'link', attrs: { href: 'https://example.com' } }]),
text('.'),
),
{
type: 'bulletList',
content: [
{
type: 'listItem',
content: [
para(text('item with '), text('code', [{ type: 'code' }])),
],
},
{
type: 'listItem',
content: [
para(text('item with sublist')),
{
type: 'bulletList',
content: [
{ type: 'listItem', content: [para(text('nested a'))] },
{ type: 'listItem', content: [para(text('nested b'))] },
],
},
],
},
],
},
{
type: 'callout',
attrs: { type: 'success' },
content: [
para(text('callout body')),
{ type: 'codeBlock', attrs: { language: 'bash' }, content: [text('echo hi\n')] },
],
},
{
type: 'blockquote',
content: [para(text('quote at the end'))],
},
),
};
describe('git-sync converter §13.1 idempotency gate (editor-ext schema)', () => {
for (const [name, original] of Object.entries(CORPUS)) {
it(`round-trips losslessly: ${name}`, async () => {
const { md, canonOriginal, canonNormalized } = await runGate(original);
const equal = docsCanonicallyEqual(original, canonNormalized);
if (!equal) {
// Surface a readable diff so a real divergence is actionable.
// eslint-disable-next-line no-console
console.error(
`\n[GATE FAIL] ${name}\n--- markdown ---\n${md}\n` +
`--- canonical original ---\n${JSON.stringify(canonOriginal, null, 2)}\n` +
`--- canonical round-tripped ---\n${JSON.stringify(canonNormalized, null, 2)}\n`,
);
}
expect(equal).toBe(true);
});
}
});
// ---------------------------------------------------------------------------
// KNOWN DIVERGENCE — images (isolated so it does NOT silently weaken the gate).
//
// This is NOT a schema-name divergence: the `image` NODE itself round-trips
// through editor-ext fine (it survives toYdoc under the real tiptapExtensions).
// The loss is intrinsic to MARKDOWN, the on-disk transport format git-sync uses:
//
// 1. `convertProseMirrorToMarkdown` emits a standard `![alt](src)` image
// (markdown-converter.ts case "image"). Standard markdown image syntax has
// no way to express `width` / `height` / `align`, so those attrs are
// DROPPED on export and cannot be recovered on import.
// 2. A block-level image is hoisted out of its line by the HTML re-parser,
// leaving a leading EMPTY paragraph (the same block-image-hoist limitation
// documented in packages/git-sync/test/fixtures/known-limitations).
//
// The gate documents the EXACT lossy shape below. If the converter is ever
// taught to preserve image dimensions (e.g. by emitting an HTML <img> with
// data-* attrs, as it already does for video/diagrams), these assertions flip
// and the image fixture should be promoted into the green CORPUS above.
// ---------------------------------------------------------------------------
describe('git-sync converter §13.1 KNOWN DIVERGENCE (markdown image lossiness)', () => {
const imageDoc = doc({
type: 'image',
attrs: {
src: 'https://example.com/pic.png',
width: 640,
height: 480,
align: 'center',
},
});
it('drops width/height/align (markdown ![](src) cannot carry them) and hoists the block image past a leading empty paragraph', async () => {
const { md, canonNormalized } = await runGate(imageDoc);
// Export is plain markdown image syntax — no dimensions/align survive.
expect(md.trim()).toBe('![](https://example.com/pic.png)');
// The round-tripped doc is the documented lossy shape: a leading empty
// paragraph (block-image hoist) + an image carrying ONLY src (+ alt="").
expect(canonNormalized).toEqual({
type: 'doc',
content: [
{ type: 'paragraph' },
{
type: 'image',
attrs: { alt: '', src: 'https://example.com/pic.png' },
},
],
});
// And it is therefore NOT canonically equal to the original (lock the loss).
expect(docsCanonicallyEqual(imageDoc, canonNormalized)).toBe(false);
});
});

View File

@@ -1,3 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.bodyHash = bodyHash;
/**
* Loop-guard primitives (SPEC §10). The sync engine must never re-pull its OWN
* write as if it were a remote edit: after a push, the next poll will see the
@@ -10,7 +13,7 @@
* to decide "this is our own write, ignore it") is a future increment — here we
* only PRODUCE the hash and the per-page push record (see `src/push.ts`).
*/
import { createHash } from "node:crypto";
const node_crypto_1 = require("node:crypto");
/**
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
* the same input string always yields the same digest, a different input a
@@ -23,6 +26,6 @@ import { createHash } from "node:crypto";
* caller is responsible for passing a canonical/stable representation if it
* wants hash equality across cosmetic-only differences.
*/
export function bodyHash(markdownBody) {
return createHash("sha256").update(markdownBody, "utf8").digest("hex");
function bodyHash(markdownBody) {
return (0, node_crypto_1.createHash)("sha256").update(markdownBody, "utf8").digest("hex");
}

View File

@@ -1,3 +1,4 @@
"use strict";
/**
* Pure reconciliation planner (SPEC §5/§6/§8).
*
@@ -11,6 +12,10 @@
* This module is intentionally PURE (no IO, no git) so the whole plan is
* unit-testable. The actual file writing / git operations happen in pull.ts.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.MASS_DELETE_FRACTION = exports.MASS_DELETE_MIN_EXISTING = void 0;
exports.planReconciliation = planReconciliation;
exports.decideAbsenceDeletions = decideAbsenceDeletions;
/**
* Compute the reconciliation plan.
*
@@ -33,7 +38,7 @@
* path is removed (as an absence/move) so the vault converges to exactly the
* live set.
*/
export function planReconciliation(live, existing) {
function planReconciliation(live, existing) {
// Desired path for each live pageId.
const liveByPageId = new Map();
// Set of all paths that WILL be written (never delete/remove one of these).
@@ -81,9 +86,9 @@ export function planReconciliation(live, existing) {
* Below this many tracked files the mass-delete fraction guard is not applied
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
*/
export const MASS_DELETE_MIN_EXISTING = 4;
exports.MASS_DELETE_MIN_EXISTING = 4;
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
export const MASS_DELETE_FRACTION = 0.5;
exports.MASS_DELETE_FRACTION = 0.5;
/**
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
@@ -100,7 +105,7 @@ export const MASS_DELETE_FRACTION = 0.5;
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
* its old-path removal is real (handled by the caller separately).
*/
export function decideAbsenceDeletions(args) {
function decideAbsenceDeletions(args) {
const { treeComplete, liveCount, existingCount, deleteCount } = args;
// No tracked files, or nothing to delete -> trivially fine to "apply".
if (existingCount === 0 || deleteCount === 0)
@@ -109,8 +114,8 @@ export function decideAbsenceDeletions(args) {
return { apply: false, reason: "incomplete-fetch" };
if (liveCount === 0)
return { apply: false, reason: "empty-live" };
if (existingCount >= MASS_DELETE_MIN_EXISTING &&
deleteCount > existingCount * MASS_DELETE_FRACTION) {
if (existingCount >= exports.MASS_DELETE_MIN_EXISTING &&
deleteCount > existingCount * exports.MASS_DELETE_FRACTION) {
return { apply: false, reason: "mass-delete" };
}
return { apply: true };

View File

@@ -1,3 +1,4 @@
"use strict";
/**
* Deterministic filename strategy (SPEC §12).
*
@@ -6,6 +7,9 @@
* functions are intentionally dependency-free and pure, so they are trivially
* unit-testable.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.sanitizeTitle = sanitizeTitle;
exports.disambiguate = disambiguate;
// Printable characters forbidden in file names on common filesystems (mainly
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
// Spaces are NOT in this set; whitespace is normalized separately below.
@@ -64,7 +68,7 @@ function stripControlChars(input) {
* result, an all-dots result, or a reserved Windows device name by prefixing
* with "_".
*/
export function sanitizeTitle(title) {
function sanitizeTitle(title) {
let name = stripControlChars(title ?? "")
.replace(FORBIDDEN_PRINTABLE_RE, "-")
.replace(WHITESPACE_RUN_RE, " ")
@@ -92,6 +96,6 @@ export function sanitizeTitle(title) {
* to the same name. Appends a stable suffix built from the page's `slugId`, so
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
*/
export function disambiguate(name, slugId) {
function disambiguate(name, slugId) {
return `${name} ~${slugId}`;
}

View File

@@ -1,3 +1,4 @@
"use strict";
/**
* Headless, Docmost-equivalent document diff.
*
@@ -16,13 +17,15 @@
* If recreateTransform / the changeset throws on a pathological document pair,
* we fall back to a coarse block-level text diff so the tool never hard-fails.
*/
import { getSchema } from "@tiptap/core";
import { Node } from "@tiptap/pm/model";
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
import { docmostExtensions } from "./docmost-schema.js";
Object.defineProperty(exports, "__esModule", { value: true });
exports.diffDocs = diffDocs;
const core_1 = require("@tiptap/core");
const model_1 = require("@tiptap/pm/model");
const changeset_1 = require("@tiptap/pm/changeset");
const prosemirror_recreate_transform_1 = require("@fellow/prosemirror-recreate-transform");
const docmost_schema_1 = require("./docmost-schema");
/** Build the schema once; it is pure and reused across calls. */
const schema = getSchema(docmostExtensions);
const schema = (0, core_1.getSchema)(docmost_schema_1.docmostExtensions);
/** Recursively concatenate the plain text of a JSON node. */
function plainText(node) {
if (!node || typeof node !== "object")
@@ -209,7 +212,7 @@ function renderMarkdown(result, fellBack) {
* @param newDocJson the later document
* @param notesHeading heading delimiting body from notes for footnote counting
*/
export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
let changes = [];
let inserted = 0;
@@ -217,15 +220,15 @@ export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примеча
let fellBack = false;
const changedBlocks = new Set();
try {
const oldNode = Node.fromJSON(schema, oldDocJson);
const newNode = Node.fromJSON(schema, newDocJson);
const tr = recreateTransform(oldNode, newNode, {
const oldNode = model_1.Node.fromJSON(schema, oldDocJson);
const newNode = model_1.Node.fromJSON(schema, newDocJson);
const tr = (0, prosemirror_recreate_transform_1.recreateTransform)(oldNode, newNode, {
complexSteps: false,
wordDiffs: true,
simplifyDiff: true,
});
const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
const simplified = simplifyChanges(changeSet.changes, newNode);
const changeSet = changeset_1.ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
const simplified = (0, changeset_1.simplifyChanges)(changeSet.changes, newNode);
for (const change of simplified) {
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
if (change.toA > change.fromA) {

View File

@@ -1,3 +1,9 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.docmostExtensions = exports.sanitizeCssColor = exports.clampCalloutType = void 0;
/**
* Full TipTap extension set matching the real Docmost document schema.
*
@@ -7,14 +13,14 @@
* to or from ProseMirror JSON must use THIS set, otherwise a round-trip
* loses content.
*/
import StarterKit from "@tiptap/starter-kit";
import Image from "@tiptap/extension-image";
import TaskList from "@tiptap/extension-task-list";
import TaskItem from "@tiptap/extension-task-item";
import Highlight from "@tiptap/extension-highlight";
import Subscript from "@tiptap/extension-subscript";
import Superscript from "@tiptap/extension-superscript";
import { Node, Extension, Mark } from "@tiptap/core";
const starter_kit_1 = __importDefault(require("@tiptap/starter-kit"));
const extension_image_1 = __importDefault(require("@tiptap/extension-image"));
const extension_task_list_1 = __importDefault(require("@tiptap/extension-task-list"));
const extension_task_item_1 = __importDefault(require("@tiptap/extension-task-item"));
const extension_highlight_1 = __importDefault(require("@tiptap/extension-highlight"));
const extension_subscript_1 = __importDefault(require("@tiptap/extension-subscript"));
const extension_superscript_1 = __importDefault(require("@tiptap/extension-superscript"));
const core_1 = require("@tiptap/core");
// Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this
// package can stay on the same @tiptap/core version as the editor and avoid a
// duplicate-tiptap version split in the monorepo. Reads a single declaration
@@ -41,9 +47,10 @@ function getStyleProperty(element, propertyName) {
}
/** Allowed Docmost callout types; anything else falls back to "info". */
const CALLOUT_TYPES = ["info", "warning", "danger", "success"];
export const clampCalloutType = (value) => value && CALLOUT_TYPES.includes(value.toLowerCase())
const clampCalloutType = (value) => value && CALLOUT_TYPES.includes(value.toLowerCase())
? value.toLowerCase()
: "info";
exports.clampCalloutType = clampCalloutType;
/**
* Allowlist guard for CSS color values imported from HTML.
*
@@ -61,14 +68,15 @@ export const clampCalloutType = (value) => value && CALLOUT_TYPES.includes(value
* digits, %, ., commas, spaces and slashes
*/
const SAFE_COLOR_RE = /^(?:[a-zA-Z]+|#(?:[0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|(?:rgb|rgba|hsl|hsla)\([0-9.,%/\s]+\))$/;
export const sanitizeCssColor = (value) => {
const sanitizeCssColor = (value) => {
if (typeof value !== "string")
return null;
const color = value.trim();
return color && SAFE_COLOR_RE.test(color) ? color : null;
};
exports.sanitizeCssColor = sanitizeCssColor;
/** Docmost callout (info/warning/danger/success banner). */
const Callout = Node.create({
const Callout = core_1.Node.create({
name: "callout",
group: "block",
content: "block+",
@@ -79,9 +87,9 @@ const Callout = Node.create({
// it; without an explicit parseHTML every imported callout became "info".
type: {
default: "info",
parseHTML: (el) => clampCalloutType(el.getAttribute("data-callout-type")),
parseHTML: (el) => (0, exports.clampCalloutType)(el.getAttribute("data-callout-type")),
renderHTML: (attrs) => ({
"data-callout-type": clampCalloutType(attrs.type),
"data-callout-type": (0, exports.clampCalloutType)(attrs.type),
}),
},
icon: {
@@ -99,7 +107,7 @@ const Callout = Node.create({
},
});
/** Minimal table family: enough for schema round-trips and HTML parsing. */
const Table = Node.create({
const Table = core_1.Node.create({
name: "table",
group: "block",
content: "tableRow+",
@@ -111,7 +119,7 @@ const Table = Node.create({
return ["table", ["tbody", 0]];
},
});
const TableRow = Node.create({
const TableRow = core_1.Node.create({
name: "tableRow",
content: "(tableCell | tableHeader)*",
parseHTML() {
@@ -134,7 +142,7 @@ const cellAttributes = () => ({
renderHTML: (attrs) => attrs.align ? { align: attrs.align } : {},
},
});
const TableCell = Node.create({
const TableCell = core_1.Node.create({
name: "tableCell",
content: "block+",
isolating: true,
@@ -146,7 +154,7 @@ const TableCell = Node.create({
return ["td", 0];
},
});
const TableHeader = Node.create({
const TableHeader = core_1.Node.create({
name: "tableHeader",
content: "block+",
isolating: true,
@@ -163,7 +171,7 @@ const TableHeader = Node.create({
* do not declare. Without these, Node.fromJSON silently drops them —
* including the block ids that heading anchors rely on.
*/
const DocmostAttributes = Extension.create({
const DocmostAttributes = core_1.Extension.create({
name: "docmostAttributes",
addGlobalAttributes() {
return [
@@ -205,7 +213,7 @@ const DocmostAttributes = Extension.create({
* which breaks update_page_json and edit_page_text on every commented page.
* Mirrors Docmost's @docmost/editor-ext comment mark (commentId / resolved).
*/
const Comment = Mark.create({
const Comment = core_1.Mark.create({
name: "comment",
exitable: true,
inclusive: false,
@@ -238,15 +246,15 @@ const Comment = Mark.create({
* attribute. The parsed color is passed through the allowlist guard so a crafted
* style cannot break out of the attribute when Docmost re-renders it.
*/
const TextStyle = Mark.create({
const TextStyle = core_1.Mark.create({
name: "textStyle",
addAttributes() {
return {
color: {
default: null,
parseHTML: (el) => sanitizeCssColor(el.style.color || el.getAttribute("data-color")),
parseHTML: (el) => (0, exports.sanitizeCssColor)(el.style.color || el.getAttribute("data-color")),
renderHTML: (attrs) => {
const color = sanitizeCssColor(attrs.color);
const color = (0, exports.sanitizeCssColor)(attrs.color);
return color ? { style: `color: ${color}` } : {};
},
},
@@ -289,7 +297,7 @@ const TextStyle = Mark.create({
* pattern these follow.
*/
/** Docmost @mention (user/page reference). Inline atom. */
const Mention = Node.create({
const Mention = core_1.Node.create({
name: "mention",
group: "inline",
inline: true,
@@ -343,7 +351,7 @@ const Mention = Node.create({
},
});
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
const MathInline = Node.create({
const MathInline = core_1.Node.create({
name: "mathInline",
group: "inline",
inline: true,
@@ -365,7 +373,7 @@ const MathInline = Node.create({
},
});
/** Block KaTeX expression. Carries the LaTeX source in `text`. */
const MathBlock = Node.create({
const MathBlock = core_1.Node.create({
name: "mathBlock",
group: "block",
atom: true,
@@ -387,7 +395,7 @@ const MathBlock = Node.create({
},
});
/** Collapsible <details> wrapper: summary + content children. */
const Details = Node.create({
const Details = core_1.Node.create({
name: "details",
group: "block",
content: "detailsSummary detailsContent",
@@ -410,7 +418,7 @@ const Details = Node.create({
},
});
/** Clickable summary line of a <details> block. */
const DetailsSummary = Node.create({
const DetailsSummary = core_1.Node.create({
name: "detailsSummary",
group: "block",
content: "inline*",
@@ -425,7 +433,7 @@ const DetailsSummary = Node.create({
},
});
/** Body of a <details> block. Permissive content so fromYdoc output validates. */
const DetailsContent = Node.create({
const DetailsContent = core_1.Node.create({
name: "detailsContent",
group: "block",
// Docmost declares block* (an empty details body is valid); block+ would
@@ -441,7 +449,7 @@ const DetailsContent = Node.create({
},
});
/** File attachment card (non-image upload). Block atom. */
const Attachment = Node.create({
const Attachment = core_1.Node.create({
name: "attachment",
group: "block",
inline: false,
@@ -493,7 +501,7 @@ const Attachment = Node.create({
},
});
/** Uploaded <video> player. Block atom. */
const Video = Node.create({
const Video = core_1.Node.create({
name: "video",
group: "block",
isolating: true,
@@ -564,7 +572,7 @@ const Video = Node.create({
* references this type, so accept it as a generic block atom that preserves
* its src so legacy/external documents survive a round-trip.
*/
const Youtube = Node.create({
const Youtube = core_1.Node.create({
name: "youtube",
group: "block",
inline: false,
@@ -606,7 +614,7 @@ const Youtube = Node.create({
},
});
/** Generic embed (provider iframe). Block atom. */
const Embed = Node.create({
const Embed = core_1.Node.create({
name: "embed",
group: "block",
inline: false,
@@ -713,7 +721,7 @@ const diagramAttributes = () => ({
},
});
/** draw.io diagram. Block atom (image-backed). */
const Drawio = Node.create({
const Drawio = core_1.Node.create({
name: "drawio",
group: "block",
inline: false,
@@ -730,7 +738,7 @@ const Drawio = Node.create({
},
});
/** Excalidraw diagram. Block atom (image-backed). */
const Excalidraw = Node.create({
const Excalidraw = core_1.Node.create({
name: "excalidraw",
group: "block",
inline: false,
@@ -747,7 +755,7 @@ const Excalidraw = Node.create({
},
});
/** Multi-column layout container holding one or more `column` children. */
const Columns = Node.create({
const Columns = core_1.Node.create({
name: "columns",
group: "block",
content: "column+",
@@ -777,7 +785,7 @@ const Columns = Node.create({
},
});
/** Single column within a `columns` layout. */
const Column = Node.create({
const Column = core_1.Node.create({
name: "column",
group: "block",
content: "block+",
@@ -808,7 +816,7 @@ const Column = Node.create({
* declares no attributes; the markdown-converter has a `case "subpages"`, so
* the read path can emit it and toYdoc must accept it. Block atom.
*/
const Subpages = Node.create({
const Subpages = core_1.Node.create({
name: "subpages",
group: "block",
inline: false,
@@ -824,7 +832,7 @@ const Subpages = Node.create({
},
});
/** Uploaded <audio> player. Block atom. Mirrors Docmost audio attrs. */
const Audio = Node.create({
const Audio = core_1.Node.create({
name: "audio",
group: "block",
inline: false,
@@ -864,7 +872,7 @@ const Audio = Node.create({
},
});
/** Embedded PDF viewer. Block atom. Mirrors Docmost pdf attrs. */
const Pdf = Node.create({
const Pdf = core_1.Node.create({
name: "pdf",
group: "block",
inline: false,
@@ -919,7 +927,7 @@ const Pdf = Node.create({
},
});
/** Page break (print/export divider). Block atom; Docmost declares no attrs. */
const PageBreak = Node.create({
const PageBreak = core_1.Node.create({
name: "pageBreak",
group: "block",
inline: false,
@@ -939,35 +947,35 @@ const PageBreak = Node.create({
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
* StarterKit v3 already bundles the link extension, configured here.
*/
export const docmostExtensions = [
StarterKit.configure({
exports.docmostExtensions = [
starter_kit_1.default.configure({
codeBlock: {},
heading: {},
link: { openOnClick: false },
}),
Image.configure({ inline: false }),
TaskList,
TaskItem.configure({ nested: true }),
extension_image_1.default.configure({ inline: false }),
extension_task_list_1.default,
extension_task_item_1.default.configure({ nested: true }),
// Highlight stores its color unescaped and Docmost interpolates it into
// style="background-color: ${color}". Wrap the color attribute's parseHTML
// with the same allowlist guard used by textStyle so a crafted import color
// cannot break out of the style attribute. Multicolor behavior is preserved.
Highlight.extend({
extension_highlight_1.default.extend({
addAttributes() {
const parent = this.parent?.() ?? {};
return {
...parent,
color: {
...parent.color,
parseHTML: (el) => sanitizeCssColor(el.getAttribute("data-color") ||
parseHTML: (el) => (0, exports.sanitizeCssColor)(el.getAttribute("data-color") ||
getStyleProperty(el, "background-color") ||
el.style.backgroundColor),
},
};
},
}).configure({ multicolor: true }),
Subscript,
Superscript,
extension_subscript_1.default,
extension_superscript_1.default,
// StarterKit does not provide a textStyle mark, so register ours; without it
// generateJSON drops <span style="color: ...">, defeating the color import.
TextStyle,

View File

@@ -1,3 +1,4 @@
"use strict";
/**
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
* tree by node id.
@@ -13,6 +14,19 @@
* never mutated. All functions are defensively null-safe: missing/!Array
* `content`, non-object nodes, and absent `attrs` are tolerated.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.blockPlainText = blockPlainText;
exports.buildOutline = buildOutline;
exports.getNodeByRef = getNodeByRef;
exports.replaceNodeById = replaceNodeById;
exports.deleteNodeById = deleteNodeById;
exports.sanitizeForYjs = sanitizeForYjs;
exports.findUnstorableAttr = findUnstorableAttr;
exports.insertNodeRelative = insertNodeRelative;
exports.readTable = readTable;
exports.insertTableRow = insertTableRow;
exports.deleteTableRow = deleteTableRow;
exports.updateTableCell = updateTableCell;
/** Deep-clone a JSON-serializable value without mutating the original. */
function clone(value) {
if (typeof structuredClone === "function") {
@@ -36,7 +50,7 @@ function matchesId(node, nodeId) {
* joined `blockPlainText` of their `content` children. Returns "" for nullish
* or non-object inputs.
*/
export function blockPlainText(node) {
function blockPlainText(node) {
if (!isObject(node))
return "";
let out = "";
@@ -66,7 +80,7 @@ function truncate(text, n) {
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
* a missing or non-object doc/content yields `[]`.
*/
export function buildOutline(doc) {
function buildOutline(doc) {
if (!isObject(doc) || !Array.isArray(doc.content))
return [];
const out = [];
@@ -109,7 +123,7 @@ export function buildOutline(doc) {
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
* so callers can mutate it without touching the input doc. Null-safe.
*/
export function getNodeByRef(doc, ref) {
function getNodeByRef(doc, ref) {
if (!isObject(doc))
return null;
// "#<n>": index into the top-level content array.
@@ -149,7 +163,7 @@ export function getNodeByRef(doc, ref) {
* is the number of nodes substituted. A fresh clone of `newNode` is used for
* each match so they do not share references.
*/
export function replaceNodeById(doc, nodeId, newNode) {
function replaceNodeById(doc, nodeId, newNode) {
const out = clone(doc);
let replaced = 0;
// Walk a content array, replacing direct matches and recursing into the
@@ -180,7 +194,7 @@ export function replaceNodeById(doc, nodeId, newNode) {
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
* the number of nodes removed.
*/
export function deleteNodeById(doc, nodeId) {
function deleteNodeById(doc, nodeId) {
const out = clone(doc);
let deleted = 0;
// Filter a content array in place, dropping matches and recursing into the
@@ -214,7 +228,7 @@ export function deleteNodeById(doc, nodeId) {
* returns it; the input is never mutated. Defensively null-safe like the rest
* of the file.
*/
export function sanitizeForYjs(doc) {
function sanitizeForYjs(doc) {
const out = clone(doc);
// Drop every key whose value is strictly `undefined` from an attrs object.
const stripUndefined = (attrs) => {
@@ -252,7 +266,7 @@ export function sanitizeForYjs(doc) {
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
* every attribute is storable. Null-safe.
*/
export function findUnstorableAttr(doc) {
function findUnstorableAttr(doc) {
const isUnstorable = (value) => {
if (value === undefined)
return "undefined";
@@ -384,7 +398,7 @@ function findAnchorChain(doc, opts) {
* false when the anchor could not be resolved (the doc is returned unchanged
* apart from being cloned).
*/
export function insertNodeRelative(doc, node, opts) {
function insertNodeRelative(doc, node, opts) {
const out = clone(doc);
const fresh = clone(node);
// Defensive: stay null-safe like the other exports — a missing opts means
@@ -605,7 +619,7 @@ function makeCellParagraph(id, text) {
* so callers can `patch_node` a cell for rich-formatted edits.
* - `path`: index path of the table within the doc.
*/
export function readTable(doc, tableRef) {
function readTable(doc, tableRef) {
const root = clone(doc);
const located = locateTable(root, tableRef);
if (located == null)
@@ -645,7 +659,7 @@ export function readTable(doc, tableRef) {
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
* the row there; otherwise the row is appended at the end.
*/
export function insertTableRow(doc, tableRef, cells, index) {
function insertTableRow(doc, tableRef, cells, index) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
@@ -706,7 +720,7 @@ export function insertTableRow(doc, tableRef, cells, index) {
* `deleted` is false only when the table cannot be located. Throws on an
* out-of-range index, and refuses to delete the table's only row.
*/
export function deleteTableRow(doc, tableRef, index) {
function deleteTableRow(doc, tableRef, index) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
@@ -732,7 +746,7 @@ export function deleteTableRow(doc, tableRef, index) {
* that reuses the cell's existing first-paragraph id when present, else a fresh
* one.
*/
export function updateTableCell(doc, tableRef, row, col, text) {
function updateTableCell(doc, tableRef, row, col, text) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)

View File

@@ -3,11 +3,13 @@
"version": "0.1.0",
"description": "Vendored pure converter + pure sync engine for the Docmost <-> git Markdown sync (Phase A). See docs/git-sync-plan.md.",
"private": true,
"type": "module",
"main": "./build/index.js",
"types": "./build/index.d.ts",
"exports": {
".": "./build/index.js"
".": {
"types": "./build/index.d.ts",
"default": "./build/index.js"
}
},
"scripts": {
"build": "tsc",

View File

@@ -10,7 +10,7 @@
* lives in each file's meta block (pageId / slugId).
*/
import { sanitizeTitle, disambiguate } from "./sanitize.js";
import { sanitizeTitle, disambiguate } from "./sanitize";
/** Flat page node as returned by `listAllSpacePages` (no content). */
export interface PageNode {

View File

@@ -17,7 +17,7 @@ import {
markdownToProseMirror,
serializeDocmostMarkdownBody,
type DocmostMdMeta,
} from "../lib/index.js";
} from "../lib/index";
/**
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte

View File

@@ -15,8 +15,8 @@ export {
markdownToProseMirror,
canonicalizeContent,
docsCanonicallyEqual,
} from "./lib/index.js";
export type { DocmostMdMeta } from "./lib/index.js";
} from "./lib/index";
export type { DocmostMdMeta } from "./lib/index";
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
// loop-guard body hash.
@@ -25,7 +25,7 @@ export {
decideAbsenceDeletions,
MASS_DELETE_MIN_EXISTING,
MASS_DELETE_FRACTION,
} from "./engine/reconcile.js";
} from "./engine/reconcile";
export type {
LiveEntry,
ExistingEntry,
@@ -33,14 +33,14 @@ export type {
MovedEntry,
ReconciliationPlan,
DeletionDecision,
} from "./engine/reconcile.js";
} from "./engine/reconcile";
export { buildVaultLayout } from "./engine/layout.js";
export type { PageNode, VaultEntry } from "./engine/layout.js";
export { buildVaultLayout } from "./engine/layout";
export type { PageNode, VaultEntry } from "./engine/layout";
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
export { sanitizeTitle, disambiguate } from "./engine/sanitize";
export { stabilizePageFile } from "./engine/stabilize.js";
export type { PageMeta } from "./engine/stabilize.js";
export { stabilizePageFile } from "./engine/stabilize";
export type { PageMeta } from "./engine/stabilize";
export { bodyHash } from "./engine/loop-guard.js";
export { bodyHash } from "./engine/loop-guard";

View File

@@ -21,7 +21,7 @@ import { getSchema } from "@tiptap/core";
import { Node } from "@tiptap/pm/model";
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
import { docmostExtensions } from "./docmost-schema.js";
import { docmostExtensions } from "./docmost-schema";
/** A single inserted/deleted change with its containing-block context. */
export interface DiffChange {

View File

@@ -14,14 +14,14 @@ export {
serializeDocmostMarkdown,
parseDocmostMarkdown,
serializeDocmostMarkdownBody,
} from "./markdown-document.js";
export type { DocmostMdMeta } from "./markdown-document.js";
} from "./markdown-document";
export type { DocmostMdMeta } from "./markdown-document";
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
export { convertProseMirrorToMarkdown } from "./markdown-converter";
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
export { markdownToProseMirror } from "./markdown-to-prosemirror";
export {
canonicalizeContent,
docsCanonicallyEqual,
} from "./canonicalize.js";
} from "./canonicalize";

View File

@@ -9,10 +9,59 @@
* lives in the same upstream file is intentionally NOT vendored — the gitmost
* server writes page bodies natively through the collab gateway (plan §3.3).
*/
import { marked } from "marked";
import { generateJSON } from "@tiptap/html";
import { JSDOM } from "jsdom";
import { docmostExtensions } from "./docmost-schema.js";
import { docmostExtensions } from "./docmost-schema";
/**
* Structural type for the bits of the `marked` ESM module we use: just the
* `marked` named export's `parse` method (markdown -> HTML string).
*/
interface MarkedModule {
marked: { parse(markdown: string): string | Promise<string> };
}
// `marked` is ESM-only. Under this package's CommonJS build TS would otherwise
// downlevel a literal `import()` to `require()`, which cannot load an ESM-only
// module. Indirect through `Function` so the real dynamic `import()` survives
// compilation and loads ESM from CommonJS at runtime in Node (same trick as
// apps/server/src/core/ai-chat/tools/docmost-client.loader.ts).
const esmImport = new Function(
"specifier",
"return import(specifier)",
) as (specifier: string) => Promise<unknown>;
// Memoize the in-flight/loaded module so the dynamic import runs at most once.
let markedPromise: Promise<MarkedModule> | null = null;
/**
* Lazily load the ESM-only `marked` module (cached).
*
* In the built CommonJS package (Node, jest with ts-jest) the `esmImport`
* Function trick performs a real dynamic `import()` of the ESM module. Under
* vitest, however, the transformed module is evaluated without a dynamic-import
* callback, so `new Function('return import(...)')` throws "A dynamic import
* callback was not specified"; there `require('marked')` succeeds because the
* test runner's loader interops ESM. We therefore try the Function import first
* and fall back to `require` so BOTH runtimes resolve `marked` transparently.
*/
async function loadMarked(): Promise<MarkedModule["marked"]> {
if (!markedPromise) {
markedPromise = (esmImport("marked") as Promise<MarkedModule>)
.catch(() => {
// Function-trick import is unavailable (e.g. under vitest's evaluator):
// fall back to require, which the test runner can interop for ESM.
// eslint-disable-next-line @typescript-eslint/no-var-requires
return require("marked") as MarkedModule;
})
.catch((err) => {
// Do not cache a rejected import — allow the next call to retry.
markedPromise = null;
throw err;
});
}
return (await markedPromise).marked;
}
// Setup DOM environment for Tiptap HTML parsing in Node.js
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
@@ -61,6 +110,8 @@ async function preprocessCallouts(markdown: string): Promise<string> {
return markdown;
}
const marked = await loadMarked();
// Recursively transform a slice of lines, converting top-level callouts in
// that slice into <div> blocks and rendering their inner content (which may
// itself contain nested callouts) through this same function.
@@ -290,6 +341,7 @@ function bridgeTaskLists(html: string): string {
export async function markdownToProseMirror(
markdownContent: string,
): Promise<any> {
const marked = await loadMarked();
const withCallouts = await preprocessCallouts(markdownContent);
const html = await marked.parse(withCallouts);
const bridged = bridgeTaskLists(html);

View File

@@ -1,8 +1,8 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "Node16",
"moduleResolution": "Node16",
"module": "CommonJS",
"moduleResolution": "Node",
"outDir": "./build",
"rootDir": "./src",
"strict": true,