diff --git a/.gitignore b/.gitignore index 2619e48e..6db827eb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ data # compiled output /dist -node_modules/ +node_modules # git-sync compiled output (built in CI/Docker via `pnpm build`, never committed, # so src/ and prod can never silently diverge). diff --git a/packages/git-sync/package.json b/packages/git-sync/package.json index cce08975..251725c5 100644 --- a/packages/git-sync/package.json +++ b/packages/git-sync/package.json @@ -20,6 +20,7 @@ }, "license": "MIT", "dependencies": { + "@docmost/prosemirror-markdown": "workspace:*", "@tiptap/core": "3.20.4", "@tiptap/extension-highlight": "3.20.4", "@tiptap/extension-image": "3.20.4", diff --git a/packages/git-sync/src/engine/pull.ts b/packages/git-sync/src/engine/pull.ts index b541c67a..3d7868d5 100644 --- a/packages/git-sync/src/engine/pull.ts +++ b/packages/git-sync/src/engine/pull.ts @@ -31,7 +31,7 @@ */ import { dirname } from "node:path"; import { sep } from "node:path"; -import { parsePageFile, serializePageFile } from "../lib/page-file.js"; +import { parsePageFile, serializePageFile } from "@docmost/prosemirror-markdown"; import type { GitSyncClient } from "./client.types.js"; import { buildVaultLayout, type PageNode } from "./layout.js"; import { diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts index 63d28530..903931be 100644 --- a/packages/git-sync/src/engine/push.ts +++ b/packages/git-sync/src/engine/push.ts @@ -26,8 +26,11 @@ * the gitmost server drives the engine in-process (there is no standalone CLI * entry point). */ -import { type DocmostMdMeta } from "../lib/index.js"; -import { parsePageFile, serializePageFile } from "../lib/page-file.js"; +import { + type DocmostMdMeta, + parsePageFile, + serializePageFile, +} from "@docmost/prosemirror-markdown"; import type { GitSyncClient } from "./client.types.js"; import type { DiffEntry } from "./git.js"; import { VaultGit, DEFAULT_BRANCH } from "./git.js"; diff --git a/packages/git-sync/src/engine/stabilize.ts b/packages/git-sync/src/engine/stabilize.ts index a075b634..ce1acdcf 100644 --- a/packages/git-sync/src/engine/stabilize.ts +++ b/packages/git-sync/src/engine/stabilize.ts @@ -17,7 +17,7 @@ import { markdownToProseMirror, serializeDocmostMarkdownBody, type DocmostMdMeta, -} from "../lib/index.js"; +} from "@docmost/prosemirror-markdown"; /** * Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte diff --git a/packages/git-sync/src/index.ts b/packages/git-sync/src/index.ts index a52ca8d3..8c9e87eb 100644 --- a/packages/git-sync/src/index.ts +++ b/packages/git-sync/src/index.ts @@ -8,6 +8,10 @@ */ // Pure converter (markdown <-> ProseMirror, file envelope, canonicalization). +// Re-exported from the standalone `@docmost/prosemirror-markdown` package, +// which is the single source of truth for the converter core; git-sync keeps +// only the engine (vault/git/orchestrator) and re-surfaces the converter for +// in-process consumers of the git-sync barrel. export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, @@ -16,8 +20,8 @@ export { markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, -} from "./lib/index.js"; -export type { DocmostMdMeta } from "./lib/index.js"; +} from "@docmost/prosemirror-markdown"; +export type { DocmostMdMeta } from "@docmost/prosemirror-markdown"; // Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize, // loop-guard body hash. @@ -123,4 +127,4 @@ export { } from "./engine/path-guard.js"; export type { PathGuardIo, VaultPathUnsafeReason } from "./engine/path-guard.js"; -export { parsePageFile, serializePageFile } from "./lib/page-file.js"; +export { parsePageFile, serializePageFile } from "@docmost/prosemirror-markdown"; diff --git a/packages/git-sync/src/lib/canonicalize.ts b/packages/git-sync/src/lib/canonicalize.ts deleted file mode 100644 index 99ff5bc6..00000000 --- a/packages/git-sync/src/lib/canonicalize.ts +++ /dev/null @@ -1,247 +0,0 @@ -/** - * Semantic canonicalization of ProseMirror/TipTap documents for the round-trip - * idempotency check (SPEC §11, "Task #0", option (b): compare a CANONICALIZED - * form rather than raw bytes). - * - * `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g. - * `indent: null` where the source omitted it) and regenerates per-block ids on - * every import. A raw deep-equal of the source doc against the re-imported doc - * therefore diverges even when the two are semantically identical. This module - * normalizes a document so that two semantically-equal docs compare deep-equal - * regardless of block ids and absent-vs-explicit-default-null attributes. - * - * It is a self-contained module with no external dependencies. - */ - -/** - * Known NON-NULL schema defaults that `markdownToProseMirror` materializes on - * import, keyed by node/mark type → { attr: defaultValue }. - * - * Why this exists: `canonicalizeAttrs` already treats an absent attr as - * equivalent to an explicit `null`/`undefined`. But several Docmost schema - * attributes default to a NON-null value, so import fills them in even when the - * source omitted them — making "attr absent" diverge from "attr at its default - * value" under a raw deep-equal. To keep "absent ≡ explicit-default", we ALSO - * drop any attr whose value equals its known schema default. A non-default - * value (e.g. `orderedList.start: 5`) is NOT a default, so it is KEPT. - * - * Every entry below was read from `packages/docmost-client/src/lib/ - * docmost-schema.ts` (the line refs are the exact `default:` declarations) and - * confirmed to be materialized by an export→import→export round-trip: - * - mark `link` target / rel — DocmostAttributes + StarterKit link. - * StarterKit's link extension defaults `target: "_blank"` and - * `rel: "noopener noreferrer nofollow"`; both materialize on import - * (empirically confirmed) even when the source had only `href`. - * - mark `comment` resolved — docmost-schema.ts L213-214 (`default: false`). - * - node `orderedList` start — provided by StarterKit's orderedList - * (`default: 1`); materializes on import (empirically confirmed). - * - node `drawio`/`excalidraw`/`video`/`youtube`/`embed` align — the diagram - * attribute set and the media nodes declare `align: { default: "center" }` - * (docmost-schema.ts L745-750 diagramAttributes; L564 video; L626 youtube; - * L667 embed). The diagram `align` is the one the round-trip materializes - * (docmost-schema.ts L745); the media/embed entries normalize the SAME - * `align` default for consistency. Note: this only normalizes `align` — - * full canonical stability of `embed` is separately limited by the - * converter coercing numeric `width`/`height` to strings, which is outside - * canonicalize's scope. - * - * NOTE: `image` has NO non-null align default — its `align` defaults to `null` - * (docmost-schema.ts L174), so it is already handled by the null-drop rule and - * is intentionally NOT listed here. - */ -const KNOWN_DEFAULTS: Record> = { - // mark types - link: { - target: "_blank", - rel: "noopener noreferrer nofollow", - }, - comment: { - resolved: false, - }, - // node types - orderedList: { - start: 1, - }, - drawio: { - align: "center", - }, - excalidraw: { - align: "center", - }, - video: { - align: "center", - }, - youtube: { - align: "center", - }, - embed: { - align: "center", - }, -}; - -/** - * Prune an `attrs` object in place on a fresh copy: drop keys whose value is - * `null` or `undefined` (an absent attribute and an explicit default of `null` - * are semantically equivalent here). Optionally also drop a node-level `id` - * (block ids are regenerated on import, SPEC §11). ALSO drop any attr whose - * value equals the node/mark `type`'s known NON-null schema default - * (`KNOWN_DEFAULTS`), so "attr absent" ≡ "attr at its default value" — without - * this, the import-materialized `link.target`/`comment.resolved`/ - * `orderedList.start`/diagram `align` defaults would be a phantom diff. Every - * non-default attribute value is KEPT (level, language, src, href, commentId, - * width, a non-default `start`/`align`, ...). - * - * Returns the pruned attrs object, or `undefined` if nothing meaningful is - * left (so the caller can drop the `attrs` key entirely: `{attrs:{}}` ≡ no - * attrs). - */ -function canonicalizeAttrs( - attrs: Record, - dropId: boolean, - type: string | undefined, -): Record | undefined { - const defaults = type ? KNOWN_DEFAULTS[type] : undefined; - const out: Record = {}; - // Stable key order so a JSON.stringify of the canonical form is comparable - // regardless of the input's key order. - for (const key of Object.keys(attrs).sort()) { - // Block ids are regenerated on import; drop them on NODE attrs only. - if (dropId && key === "id") continue; - const value = attrs[key]; - // Absent ≡ explicit-default-null/undefined. - if (value === null || value === undefined) continue; - // Absent ≡ explicit known non-null default (e.g. link.target="_blank"). - // A non-default value (e.g. orderedList.start=5) does NOT match, so it is - // kept. The `comment` mark's `commentId` is never a default, so it always - // survives (SPEC §3); only its `resolved: false` default is normalized away. - if (defaults && key in defaults && value === defaults[key]) continue; - out[key] = value; - } - return Object.keys(out).length > 0 ? out : undefined; -} - -/** - * Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two - * semantically-equal documents compare deep-equal. Rules (applied recursively - * to the node, its `content`, and its `marks`): - * - * 1. Remove node-level `attrs.id` (regenerated on import). Mark attrs are NOT - * touched for `id` (marks carry no block id; only their meaningful attrs). - * 2. In any `attrs` object (node OR mark) drop keys whose value is `null`/ - * `undefined` (absent ≡ explicit default null) OR equals that node/mark - * type's known non-null schema default (absent ≡ explicit default). - * Keep every non-default value. The type is passed into the attrs - * normalizer so it can look up `KNOWN_DEFAULTS`. - * 3. If an `attrs` object becomes empty after pruning, drop the `attrs` key. - * 4. Preserve `marks` (including the `comment` mark and its `commentId` — a - * meaningful anchor per SPEC §3; never strip it). - * 5. Preserve `text`, `type`, and `content` order exactly. - * 6. Never mutate the input. - */ -export function canonicalizeContent(node: any): any { - if (Array.isArray(node)) { - return node.map((child) => canonicalizeContent(child)); - } - if (node === null || typeof node !== "object") { - // Primitive leaf (string/number/boolean/null): returned as-is. - return node; - } - - // A node is a mark when it has a `type` but never carries block `content` - // and lives inside a `marks` array. We cannot tell from the node alone, so - // we distinguish at the recursion site: node `attrs` drop `id`, mark `attrs` - // do not. This is handled by passing a `dropId` flag down for the `attrs` - // key specifically (nodes) vs the `marks[].attrs` path (marks). - const out: Record = {}; - for (const key of Object.keys(node)) { - if (key === "attrs" && node.attrs && typeof node.attrs === "object") { - // Node-level attrs: drop the block id, null/undefined attrs, and any - // attr at this node type's known non-null schema default. - const canon = canonicalizeAttrs( - node.attrs as Record, - true, - typeof node.type === "string" ? node.type : undefined, - ); - if (canon !== undefined) out.attrs = canon; - // else: drop the `attrs` key entirely (rule 3). - } else if (key === "marks" && Array.isArray(node.marks)) { - // Marks: keep them all (incl. comment); canonicalize their attrs but do - // NOT drop `id` (a mark's `id` would be a meaningful attr, not a block - // id). An empty marks array is dropped so `marks:[]` ≡ no marks. - const marks = (node.marks as any[]).map((mark) => canonicalizeMark(mark)); - if (marks.length > 0) out.marks = marks; - } else { - out[key] = canonicalizeContent(node[key]); - } - } - return out; -} - -/** - * Canonicalize a single mark: keep `type`, prune its `attrs` (null/undefined - * AND known non-null defaults dropped, empty attrs removed) but NEVER drop a - * mark's attribute as a "block id" — marks have no block id, only meaningful - * attrs (href, commentId, color, level, ...). Meaningful NON-default attrs - * survive (the `comment` mark's `commentId` is never a default, so it always - * survives — SPEC §3); only known defaults like `link.target="_blank"`, - * `link.rel="noopener…"` and `comment.resolved=false` are normalized away. - */ -function canonicalizeMark(mark: any): any { - if (mark === null || typeof mark !== "object") return mark; - const out: Record = {}; - for (const key of Object.keys(mark)) { - if (key === "attrs" && mark.attrs && typeof mark.attrs === "object") { - const canon = canonicalizeAttrs( - mark.attrs as Record, - false, - typeof mark.type === "string" ? mark.type : undefined, - ); - if (canon !== undefined) out.attrs = canon; - } else { - out[key] = canonicalizeContent(mark[key]); - } - } - return out; -} - -/** - * Deep structural equality of two values that is key-order-insensitive. - * Used to compare canonical forms. (`canonicalizeContent` already emits - * `attrs` in a stable key order, but the top-level node keys preserve input - * order, so we compare structurally rather than by string.) - */ -function deepEqual(a: any, b: any): boolean { - if (a === b) return true; - if (typeof a !== typeof b) return false; - if (a === null || b === null) return a === b; - if (typeof a !== "object") return false; - - const aIsArr = Array.isArray(a); - const bIsArr = Array.isArray(b); - if (aIsArr !== bIsArr) return false; - - if (aIsArr) { - if (a.length !== b.length) return false; - for (let i = 0; i < a.length; i++) { - if (!deepEqual(a[i], b[i])) return false; - } - return true; - } - - const aKeys = Object.keys(a); - const bKeys = Object.keys(b); - if (aKeys.length !== bKeys.length) return false; - for (const k of aKeys) { - if (!Object.prototype.hasOwnProperty.call(b, k)) return false; - if (!deepEqual(a[k], b[k])) return false; - } - return true; -} - -/** - * True when two ProseMirror documents are semantically equal: equal after - * canonicalization (block ids stripped, absent-vs-default-null normalized). - */ -export function docsCanonicallyEqual(a: any, b: any): boolean { - return deepEqual(canonicalizeContent(a), canonicalizeContent(b)); -} diff --git a/packages/git-sync/src/lib/docmost-schema.ts b/packages/git-sync/src/lib/docmost-schema.ts deleted file mode 100644 index 276efe90..00000000 --- a/packages/git-sync/src/lib/docmost-schema.ts +++ /dev/null @@ -1,1544 +0,0 @@ -/** - * Full TipTap extension set matching the real Docmost document schema. - * - * The default StarterKit-only schema silently destroys Docmost-specific - * nodes (callout, table) and drops attributes it does not know about - * (node ids, image sizing, link targets). Every code path that converts - * to or from ProseMirror JSON must use THIS set, otherwise a round-trip - * loses content. - * - * PROVENANCE / KEEP IN SYNC: this file is a VENDORED MIRROR of the canonical - * Docmost document schema in `@docmost/editor-ext`. The node/mark/attribute - * surface MUST be kept in sync with editor-ext — anything present there but - * missing here is silently dropped on a round-trip (data loss). The exported - * `docmostExtensions` surface is guarded by `test/schema-surface-snapshot.test.ts`, - * which fails loudly on any drift; when it does, re-verify parity against - * `@docmost/editor-ext` before updating the snapshot. - */ -import StarterKit from "@tiptap/starter-kit"; -import Image from "@tiptap/extension-image"; -import TaskList from "@tiptap/extension-task-list"; -import TaskItem from "@tiptap/extension-task-item"; -import Highlight from "@tiptap/extension-highlight"; -import Subscript from "@tiptap/extension-subscript"; -import Superscript from "@tiptap/extension-superscript"; -import { Node, Extension, Mark } from "@tiptap/core"; - -// Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this -// package can stay on the same @tiptap/core version as the editor and avoid a -// duplicate-tiptap version split in the monorepo. Reads a single declaration -// from an element's inline `style` attribute, last-wins, case-insensitive. -function getStyleProperty(element: HTMLElement, propertyName: string): string | null { - const styleAttr = element.getAttribute("style"); - if (!styleAttr) { - return null; - } - const decls = styleAttr.split(";").map((decl) => decl.trim()).filter(Boolean); - const target = propertyName.toLowerCase(); - for (let i = decls.length - 1; i >= 0; i -= 1) { - const decl = decls[i]; - const colonIndex = decl.indexOf(":"); - if (colonIndex === -1) { - continue; - } - const prop = decl.slice(0, colonIndex).trim().toLowerCase(); - if (prop === target) { - return decl.slice(colonIndex + 1).trim(); - } - } - return null; -} - -/** - * Allowed Docmost callout types; anything else falls back to "info". - * - * This MUST stay in lockstep with the editor's canonical set - * (`getValidCalloutType` in `@docmost/editor-ext` callout/utils.ts: - * default | info | note | success | warning | danger). A type missing here is - * silently flattened to "info" on the markdown -> ProseMirror round-trip, so a - * `[!note]` / `[!default]` callout authored in the editor would come back as - * `[!info]` after a git sync (the QA "callout type -> [!info]" fidelity loss). - * `note` and `default` were previously absent and so were being flattened. - * - * The editor SCHEMA genuinely only supports these six banner types — there is no - * `tip`/`caution`/`important`/`question` callout node. So those are NOT first- - * class types we can round-trip literally; they are INPUT ALIASES (GitHub/Obsidian - * alert syntax). The editor's own paste/import path maps them onto the supported - * set (see `GITHUB_ALERT_TYPE_MAP` in - * `@docmost/editor-ext` markdown/utils/github-callout.marked.ts: - * tip -> success, caution -> danger, important -> info). We mirror that aliasing - * here so an ingested `> [!tip]` / `> [!caution]` lands on the closest real banner - * (success / danger) instead of flatly collapsing to `info` — matching exactly how - * the editor itself would interpret the same alias. A schema type always maps to - * itself first (idempotent round-trip); the alias map only rewrites NON-schema - * names; anything still unknown falls back to `info`. - */ -const CALLOUT_TYPES = ["default", "info", "note", "success", "warning", "danger"]; -/** - * NON-schema callout aliases -> their closest supported banner. Mirrors the - * editor's `GITHUB_ALERT_TYPE_MAP` for the names that are NOT already schema - * types (a schema type is preserved as-is and never consulted here). Keeping - * these in lockstep means git-sync ingest and an editor paste interpret the same - * `> [!alias]` identically. - */ -const CALLOUT_TYPE_ALIASES: Record = { - tip: "success", - caution: "danger", - important: "info", -}; -export const clampCalloutType = (value: string | null | undefined): string => { - if (!value) return "info"; - const lower = value.toLowerCase(); - // A real schema type round-trips to itself (idempotent). - if (CALLOUT_TYPES.includes(lower)) return lower; - // A known GitHub/Obsidian alias maps to the editor's closest banner. - if (CALLOUT_TYPE_ALIASES[lower]) return CALLOUT_TYPE_ALIASES[lower]; - // Anything else is collapsed to the safe default (matches the editor). - return "info"; -}; - -/** - * Allowlist guard for CSS color values imported from HTML. - * - * Docmost interpolates stored mark colors straight into an inline style - * attribute (e.g. style="background-color: ${color}" / "color: ${color}"). - * An unsanitized value such as `red; --x: url(...)` or `red">