feat(git-sync): vendor pure converter + engine into @docmost/git-sync (Phase A.1)
First step of docs/git-sync-plan.md. New workspace package @docmost/git-sync vendoring the PURE parts from docmost-sync (HEAD b03eb35): - lib: markdown-converter, markdown-document, canonicalize, docmost-schema, node-ops, diff, and an extracted markdown-to-prosemirror (only the pure marked->HTML->generateJSON path from upstream collaboration.ts; no websocket). - engine (pure, no IO): reconcile, layout, sanitize, stabilize, loop-guard. Ported the upstream pure-module + round-trip corpus tests (vitest): 314 pass, 3 expected upstream known-limitation fails. tsc clean. No server wiring yet. docmost-schema inlines getStyleProperty (as packages/mcp does — @tiptap/core 3.20.4 doesn't export it). IO engine (pull/push/git/settings) deferred to later Phase A/B steps; the editor-ext idempotency gate (plan §13.1) is the next step. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,170 +0,0 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export function buildVaultLayout(pages) {
|
||||
// Index pages by id so the parent chain can be walked. Guard against
|
||||
// duplicate ids in the input (first one wins).
|
||||
const byId = new Map();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !byId.has(p.id))
|
||||
byId.set(p.id, p);
|
||||
}
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map();
|
||||
const nameById = new Map();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey = p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
}
|
||||
}
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
// and THROWS if it is somehow absent, rather than silently recomputing a
|
||||
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
|
||||
// its target file).
|
||||
const nameOf = (id) => {
|
||||
const name = nameById.get(id);
|
||||
if (name === undefined) {
|
||||
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
|
||||
}
|
||||
return name;
|
||||
};
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node) => {
|
||||
const ancestors = [];
|
||||
const visited = new Set();
|
||||
let current = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(nameOf(current.id));
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
// First pass: compute the provisional { segments, stem } for every node.
|
||||
const layout = new Map();
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || layout.has(p.id))
|
||||
continue;
|
||||
layout.set(p.id, {
|
||||
segments: folderSegmentsFor(p),
|
||||
stem: nameOf(p.id),
|
||||
});
|
||||
}
|
||||
// FOLDER-NOTE transform (native-Obsidian layout): a page WITH CHILDREN lives at
|
||||
// `<…>/<stem>/<stem>.md` — its body is the folder-note INSIDE its own folder
|
||||
// (LostPaul Folder Notes convention), and its children sit alongside it in that
|
||||
// folder. A leaf stays `<…>/<stem>.md`. Children's segments already point into
|
||||
// the parent's folder (folderSegmentsFor walks ancestor NAMES), so only the
|
||||
// parent's own file relocates here; the sibling name pass above already made
|
||||
// the parent name unique, so folder == file name stays consistent.
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id)
|
||||
continue;
|
||||
const entry = layout.get(p.id);
|
||||
if (entry && p.hasChildren) {
|
||||
entry.segments = [...entry.segments, entry.stem];
|
||||
}
|
||||
}
|
||||
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
|
||||
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
|
||||
// above (orphans share the "__root__" bucket), so ancestor names are final
|
||||
// before `segments` are built and this pass should rarely/never re-stem an
|
||||
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
|
||||
// slugId/id, then (if still colliding) appends the id.
|
||||
//
|
||||
// Process FOLDER-NOTES (pages with children) FIRST so a parent claims its
|
||||
// canonical `<name>/<name>.md` before a same-named CHILD — the child (a leaf)
|
||||
// is the one that disambiguates, never the folder-note.
|
||||
const usedPaths = new Set();
|
||||
const seenIds = new Set();
|
||||
const pathKey = (e) => [...e.segments, e.stem].join("/");
|
||||
const ordered = pages
|
||||
.filter((p) => Boolean(p && p.id))
|
||||
.sort((a, b) => Number(Boolean(b.hasChildren)) - Number(Boolean(a.hasChildren)));
|
||||
for (const p of ordered) {
|
||||
if (seenIds.has(p.id))
|
||||
continue;
|
||||
seenIds.add(p.id);
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry)
|
||||
continue;
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// First attempt: disambiguate the stem with the sanitized slugId (or id).
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// Still colliding: append the (sanitized) id as a last resort. The id
|
||||
// is globally unique, so this always resolves the collision.
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
|
||||
}
|
||||
}
|
||||
usedPaths.add(pathKey(entry));
|
||||
}
|
||||
return layout;
|
||||
}
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(node, parentKey, usedBySibling) {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/**
|
||||
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
|
||||
* round-trip harness that drives these lives in the package's tests, not in the
|
||||
* engine.
|
||||
*/
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export declare function stripBlockIds(node: any): any;
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export declare function firstDivergence(a: any, b: any, path?: string): {
|
||||
path: string;
|
||||
a: any;
|
||||
b: any;
|
||||
} | null;
|
||||
@@ -1,70 +0,0 @@
|
||||
/**
|
||||
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
|
||||
* round-trip harness that drives these lives in the package's tests, not in the
|
||||
* engine.
|
||||
*/
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export function stripBlockIds(node) {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map(stripBlockIds);
|
||||
}
|
||||
if (node && typeof node === "object") {
|
||||
const out = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Drop the `id` attr; keep every other attribute.
|
||||
const { id, ...rest } = node.attrs;
|
||||
void id;
|
||||
out.attrs = stripBlockIds(rest);
|
||||
}
|
||||
else {
|
||||
out[key] = stripBlockIds(node[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export function firstDivergence(a, b, path = "$") {
|
||||
if (a === b)
|
||||
return null;
|
||||
const ta = typeof a;
|
||||
const tb = typeof b;
|
||||
if (ta !== tb || a === null || b === null) {
|
||||
return { path, a, b };
|
||||
}
|
||||
if (ta !== "object") {
|
||||
return { path, a, b };
|
||||
}
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr)
|
||||
return { path, a, b };
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length) {
|
||||
return { path: `${path}.length`, a: a.length, b: b.length };
|
||||
}
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const d = firstDivergence(a[i], b[i], `${path}[${i}]`);
|
||||
if (d)
|
||||
return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
|
||||
for (const k of keys) {
|
||||
const d = firstDivergence(a[k], b[k], `${path}.${k}`);
|
||||
if (d)
|
||||
return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
41
packages/git-sync/build/engine/stabilize.d.ts
vendored
41
packages/git-sync/build/engine/stabilize.d.ts
vendored
@@ -1,41 +0,0 @@
|
||||
/**
|
||||
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
|
||||
* compatible so files produced here match `exportPageBody`'s output exactly.
|
||||
*/
|
||||
export interface PageMeta {
|
||||
version: 1;
|
||||
pageId: string;
|
||||
slugId: string;
|
||||
title: string;
|
||||
spaceId: string;
|
||||
parentPageId: string | null;
|
||||
}
|
||||
/**
|
||||
* Produce the self-contained `.md` file text for a page from its raw
|
||||
* ProseMirror `content` + identity meta, in the verified fixpoint form.
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content)
|
||||
* doc2 = markdownToProseMirror(md1) // one import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...and re-export
|
||||
* file = serializeDocmostMarkdownBody(meta, stableBody)
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the
|
||||
* known converter asymmetries.
|
||||
*/
|
||||
export declare function stabilizePageFile(content: unknown, meta: PageMeta): Promise<string>;
|
||||
/**
|
||||
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
|
||||
* envelope:
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content) // export...
|
||||
* doc2 = markdownToProseMirror(md1) // ...import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the known
|
||||
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
|
||||
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
|
||||
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
|
||||
*/
|
||||
export declare function stabilizePageBody(content: unknown): Promise<string>;
|
||||
31
packages/git-sync/build/index.d.ts
vendored
31
packages/git-sync/build/index.d.ts
vendored
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* Public surface of `@docmost/git-sync`.
|
||||
*
|
||||
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
|
||||
* canonicalization) and the sync engine (reconcile planner, vault layout,
|
||||
* pull/push, the git wrapper, and the settings parser) that the gitmost server
|
||||
* drives in-process.
|
||||
*/
|
||||
export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, parseDocmostMarkdown, convertProseMirrorToMarkdown, markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, } from "./lib/index.js";
|
||||
export type { DocmostMdMeta } from "./lib/index.js";
|
||||
export { planReconciliation, decideAbsenceDeletions, MASS_DELETE_MIN_EXISTING, MASS_DELETE_FRACTION, } from "./engine/reconcile.js";
|
||||
export type { LiveEntry, ExistingEntry, WriteEntry, MovedEntry, ReconciliationPlan, DeletionDecision, } from "./engine/reconcile.js";
|
||||
export { buildVaultLayout } from "./engine/layout.js";
|
||||
export type { PageNode, VaultEntry } from "./engine/layout.js";
|
||||
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
|
||||
export { stabilizePageFile } from "./engine/stabilize.js";
|
||||
export type { PageMeta } from "./engine/stabilize.js";
|
||||
export { bodyHash } from "./engine/loop-guard.js";
|
||||
export type { GitSyncClient, GitSyncPageNodeLite } from "./engine/client.types.js";
|
||||
export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git.js";
|
||||
export type { DiffEntry, MergeResult, CommitOptions } from "./engine/git.js";
|
||||
export { readExisting, computePullActions, applyPullActions, } from "./engine/pull.js";
|
||||
export type { ReadExistingDeps, PullActionsInput, PullActions, ApplyPullActionsDeps, ApplyResult, } from "./engine/pull.js";
|
||||
export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push.js";
|
||||
export type { CreateAction, UpdateAction, DeleteAction, RenameMoveAction, RenameMoveActionClassified, ClassifyRenameMovesDeps, PushActions, PushActionsInput, MetaSide, ApplyPushDeps, WrittenBackPage, PushedPageRecord, PushFailure, PushNoop, ApplyPushResult, PushDeps, PushRunResult, PushParsedArgs, } from "./engine/push.js";
|
||||
export { parseSettings, envSchema } from "./engine/settings.js";
|
||||
export type { Settings } from "./engine/settings.js";
|
||||
export { loadSettingsOrExit } from "./engine/config-errors.js";
|
||||
export { runCycle } from "./engine/cycle.js";
|
||||
export type { RunCycleDeps, RunCycleResult, CycleFs, } from "./engine/cycle.js";
|
||||
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
|
||||
@@ -1,24 +0,0 @@
|
||||
/**
|
||||
* Public surface of `@docmost/git-sync`.
|
||||
*
|
||||
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
|
||||
* canonicalization) and the sync engine (reconcile planner, vault layout,
|
||||
* pull/push, the git wrapper, and the settings parser) that the gitmost server
|
||||
* drives in-process.
|
||||
*/
|
||||
// Pure converter (markdown <-> ProseMirror, file envelope, canonicalization).
|
||||
export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, parseDocmostMarkdown, convertProseMirrorToMarkdown, markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, } from "./lib/index.js";
|
||||
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
|
||||
// loop-guard body hash.
|
||||
export { planReconciliation, decideAbsenceDeletions, MASS_DELETE_MIN_EXISTING, MASS_DELETE_FRACTION, } from "./engine/reconcile.js";
|
||||
export { buildVaultLayout } from "./engine/layout.js";
|
||||
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
|
||||
export { stabilizePageFile } from "./engine/stabilize.js";
|
||||
export { bodyHash } from "./engine/loop-guard.js";
|
||||
export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git.js";
|
||||
export { readExisting, computePullActions, applyPullActions, } from "./engine/pull.js";
|
||||
export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push.js";
|
||||
export { parseSettings, envSchema } from "./engine/settings.js";
|
||||
export { loadSettingsOrExit } from "./engine/config-errors.js";
|
||||
export { runCycle } from "./engine/cycle.js";
|
||||
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
|
||||
38
packages/git-sync/build/lib/canonicalize.d.ts
vendored
38
packages/git-sync/build/lib/canonicalize.d.ts
vendored
@@ -1,38 +0,0 @@
|
||||
/**
|
||||
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
|
||||
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
|
||||
* form rather than raw bytes).
|
||||
*
|
||||
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
|
||||
* `indent: null` where the source omitted it) and regenerates per-block ids on
|
||||
* every import. A raw deep-equal of the source doc against the re-imported doc
|
||||
* therefore diverges even when the two are semantically identical. This module
|
||||
* normalizes a document so that two semantically-equal docs compare deep-equal
|
||||
* regardless of block ids and absent-vs-explicit-default-null attributes.
|
||||
*
|
||||
* It is a self-contained module with no external dependencies.
|
||||
*/
|
||||
/**
|
||||
* Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two
|
||||
* semantically-equal documents compare deep-equal. Rules (applied recursively
|
||||
* to the node, its `content`, and its `marks`):
|
||||
*
|
||||
* 1. Remove node-level `attrs.id` (regenerated on import). Mark attrs are NOT
|
||||
* touched for `id` (marks carry no block id; only their meaningful attrs).
|
||||
* 2. In any `attrs` object (node OR mark) drop keys whose value is `null`/
|
||||
* `undefined` (absent ≡ explicit default null) OR equals that node/mark
|
||||
* type's known non-null schema default (absent ≡ explicit default).
|
||||
* Keep every non-default value. The type is passed into the attrs
|
||||
* normalizer so it can look up `KNOWN_DEFAULTS`.
|
||||
* 3. If an `attrs` object becomes empty after pruning, drop the `attrs` key.
|
||||
* 4. Preserve `marks` (including the `comment` mark and its `commentId` — a
|
||||
* meaningful anchor per SPEC §3; never strip it).
|
||||
* 5. Preserve `text`, `type`, and `content` order exactly.
|
||||
* 6. Never mutate the input.
|
||||
*/
|
||||
export declare function canonicalizeContent(node: any): any;
|
||||
/**
|
||||
* True when two ProseMirror documents are semantically equal: equal after
|
||||
* canonicalization (block ids stripped, absent-vs-default-null normalized).
|
||||
*/
|
||||
export declare function docsCanonicallyEqual(a: any, b: any): boolean;
|
||||
16
packages/git-sync/build/lib/index.d.ts
vendored
16
packages/git-sync/build/lib/index.d.ts
vendored
@@ -1,16 +0,0 @@
|
||||
/**
|
||||
* Public surface of the pure converter (`lib/`). This barrel re-exports the
|
||||
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
|
||||
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
|
||||
* markdown -> ProseMirror import path, and semantic canonicalization for the
|
||||
* round-trip idempotency check (SPEC §11).
|
||||
*
|
||||
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
|
||||
* here — the gitmost server writes natively.
|
||||
*/
|
||||
export { serializeDocmostMarkdown, parseDocmostMarkdown, serializeDocmostMarkdownBody, } from "./markdown-document.js";
|
||||
export type { DocmostMdMeta } from "./markdown-document.js";
|
||||
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
|
||||
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
|
||||
export { canonicalizeContent, docsCanonicallyEqual, } from "./canonicalize.js";
|
||||
export { parsePageFile, serializePageFile } from "./page-file.js";
|
||||
@@ -1,15 +0,0 @@
|
||||
/**
|
||||
* Public surface of the pure converter (`lib/`). This barrel re-exports the
|
||||
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
|
||||
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
|
||||
* markdown -> ProseMirror import path, and semantic canonicalization for the
|
||||
* round-trip idempotency check (SPEC §11).
|
||||
*
|
||||
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
|
||||
* here — the gitmost server writes natively.
|
||||
*/
|
||||
export { serializeDocmostMarkdown, parseDocmostMarkdown, serializeDocmostMarkdownBody, } from "./markdown-document.js";
|
||||
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
|
||||
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
|
||||
export { canonicalizeContent, docsCanonicallyEqual, } from "./canonicalize.js";
|
||||
export { parsePageFile, serializePageFile } from "./page-file.js";
|
||||
@@ -1,801 +0,0 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
*/
|
||||
export function convertProseMirrorToMarkdown(content) {
|
||||
if (!content || !content.content)
|
||||
return "";
|
||||
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||
// </> back inside attribute values, so escaping them would corrupt the
|
||||
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||
// keeps the value inert against attribute-injection while staying idempotent.
|
||||
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||
// value, and parse5 does not decode ' back inside attribute values, so
|
||||
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||
const escapeAttr = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """);
|
||||
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||
const escapeHtmlText = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
// Percent-encode characters that would break out of a markdown URL target
|
||||
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||
// single inert token (used for image/video/youtube srcs).
|
||||
const encodeMdUrl = (value) => String(value || "")
|
||||
.replace(/\s/g, (c) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
const processNode = (node) => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
switch (type) {
|
||||
case "doc":
|
||||
return nodeContent.map(processNode).join("\n\n");
|
||||
case "paragraph":
|
||||
const text = nodeContent.map(processNode).join("");
|
||||
const align = node.attrs?.textAlign;
|
||||
if (align && align !== "left") {
|
||||
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||
}
|
||||
return text || "";
|
||||
case "heading":
|
||||
const level = node.attrs?.level || 1;
|
||||
const headingText = nodeContent.map(processNode).join("");
|
||||
return "#".repeat(level) + " " + headingText;
|
||||
case "text":
|
||||
let textContent = node.text || "";
|
||||
// Apply marks (bold, italic, code, etc.)
|
||||
if (node.marks) {
|
||||
// The schema's `code` mark declares `excludes: "_"` — it excludes every
|
||||
// other inline mark — so the editor can NEVER produce a text run that
|
||||
// carries `code` together with another mark, and on import any
|
||||
// co-occurring mark is always dropped (the run comes back as code-only).
|
||||
// The lossless, byte-stable behavior is therefore: when a run has the
|
||||
// `code` mark, emit ONLY the backtick code span and ignore every other
|
||||
// mark, so md1 is already code-only and md2 === md1. Runs WITHOUT a code
|
||||
// mark are rendered exactly as before.
|
||||
const markTypes = node.marks.map((m) => m.type);
|
||||
const hasCode = markTypes.includes("code");
|
||||
if (hasCode) {
|
||||
textContent = `\`${textContent}\``;
|
||||
return textContent;
|
||||
}
|
||||
const codeCombined = false;
|
||||
for (const mark of node.marks) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
textContent = codeCombined
|
||||
? `<strong>${textContent}</strong>`
|
||||
: `**${textContent}**`;
|
||||
break;
|
||||
case "italic":
|
||||
textContent = codeCombined
|
||||
? `<em>${textContent}</em>`
|
||||
: `*${textContent}*`;
|
||||
break;
|
||||
case "code":
|
||||
// When combined with another mark, wrap as <code> so the
|
||||
// surrounding HTML marks can nest around it; otherwise use the
|
||||
// plain backtick span.
|
||||
textContent = codeCombined
|
||||
? `<code>${textContent}</code>`
|
||||
: `\`${textContent}\``;
|
||||
break;
|
||||
case "link": {
|
||||
const href = mark.attrs?.href || "";
|
||||
const title = mark.attrs?.title;
|
||||
if (codeCombined) {
|
||||
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||
const safeHref = escapeAttr(href);
|
||||
if (title) {
|
||||
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||
}
|
||||
else {
|
||||
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||
}
|
||||
}
|
||||
else if (title) {
|
||||
// Emit the optional markdown link title; escape an embedded
|
||||
// double-quote so it cannot terminate the title string early.
|
||||
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||
}
|
||||
else {
|
||||
textContent = `[${textContent}](${href})`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "strike":
|
||||
textContent = codeCombined
|
||||
? `<s>${textContent}</s>`
|
||||
: `~~${textContent}~~`;
|
||||
break;
|
||||
case "underline":
|
||||
textContent = `<u>${textContent}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
textContent = `<sub>${textContent}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
textContent = `<sup>${textContent}</sup>`;
|
||||
break;
|
||||
case "highlight": {
|
||||
// Preserve a null/empty color as a plain highlight (a bare
|
||||
// <mark> with no background-color); only emit the style when a
|
||||
// color is actually set, so a plain highlight is not forced to
|
||||
// yellow on export.
|
||||
const color = mark.attrs?.color;
|
||||
textContent = color
|
||||
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||
: `<mark>${textContent}</mark>`;
|
||||
break;
|
||||
}
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color) {
|
||||
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
case "comment": {
|
||||
// Emit the inline comment anchor so highlights round-trip. The
|
||||
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||
// commentId/resolved).
|
||||
const cid = mark.attrs?.commentId;
|
||||
if (cid) {
|
||||
const resolvedAttr = mark.attrs?.resolved
|
||||
? ` data-resolved="true"`
|
||||
: "";
|
||||
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return textContent;
|
||||
case "codeBlock":
|
||||
const language = node.attrs?.language || "";
|
||||
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||
// here would let the text grow by "\n" on each round-trip. Removing
|
||||
// every trailing newline makes repeated cycles stable.
|
||||
const code = nodeContent
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
.map((item) => processListItem(item, "-"))
|
||||
.join("\n");
|
||||
case "orderedList":
|
||||
return nodeContent
|
||||
.map((item, index) => processListItem(item, `${index + 1}.`))
|
||||
.join("\n");
|
||||
case "taskList":
|
||||
return nodeContent.map((item) => processTaskItem(item)).join("\n");
|
||||
case "taskItem":
|
||||
// Delegate to the same helper used by taskList so multi-block and
|
||||
// nested task items render and indent consistently.
|
||||
return processTaskItem(node);
|
||||
case "listItem":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "blockquote":
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line so code blocks / multi-paragraph
|
||||
// quotes round-trip correctly.
|
||||
return nodeContent
|
||||
.map((n) => processNode(n)
|
||||
.split("\n")
|
||||
.map((line) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"))
|
||||
.join("\n>\n");
|
||||
case "horizontalRule":
|
||||
return "---";
|
||||
case "hardBreak":
|
||||
// Two trailing spaces before the newline encode a markdown hard break;
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
case "image":
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||
// not emit one (the previous caption branch was dead).
|
||||
return ``;
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||
// from the standard attributes and the remaining attrs from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||
// data-type, so the schema parser ignores it and just hoists the video.
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
case "youtube": {
|
||||
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||
// src from data-src and width/height/align from data-* attributes.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "table": {
|
||||
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||
const tableRows = nodeContent;
|
||||
if (tableRows.length === 0)
|
||||
return "";
|
||||
const hasSpan = tableRows.some((row) => (row.content || []).some((cell) => (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1));
|
||||
if (hasSpan) {
|
||||
// Render each cell's block children to HTML (marked does NOT parse
|
||||
// markdown inside a raw HTML block, so emitting markdown here would
|
||||
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||
const renderHtmlCell = (cell) => {
|
||||
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||
const a = cell.attrs || {};
|
||||
const cellParts = [];
|
||||
if ((a.colspan ?? 1) > 1)
|
||||
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||
if ((a.rowspan ?? 1) > 1)
|
||||
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||
if (a.align)
|
||||
cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||
const open = cellParts.length
|
||||
? `<${tag} ${cellParts.join(" ")}>`
|
||||
: `<${tag}>`;
|
||||
const inner = (cell.content || [])
|
||||
.map((block) => blockToHtml(block))
|
||||
.join("");
|
||||
return `${open}${inner}</${tag}>`;
|
||||
};
|
||||
const htmlRows = tableRows
|
||||
.map((row) => `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`)
|
||||
.join("");
|
||||
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||
}
|
||||
// No merged cells: emit a GFM table (header row + separator) so the
|
||||
// markdown can be parsed back into a table on re-import.
|
||||
const rows = tableRows.map(processNode);
|
||||
const headerCells = tableRows[0]?.content || [];
|
||||
const columns = headerCells.length || 1;
|
||||
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||
const markers = Array.from({ length: columns }, (_, i) => {
|
||||
const align = headerCells[i]?.attrs?.align;
|
||||
switch (align) {
|
||||
case "left":
|
||||
return ":--";
|
||||
case "center":
|
||||
return ":-:";
|
||||
case "right":
|
||||
return "--:";
|
||||
default:
|
||||
return "---";
|
||||
}
|
||||
});
|
||||
const separator = "| " + markers.join(" | ") + " |";
|
||||
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||
}
|
||||
case "tableRow":
|
||||
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||
case "tableCell":
|
||||
case "tableHeader": {
|
||||
// Join multiple block children with a space (not "") so adjacent blocks
|
||||
// like a paragraph followed by a list don't collide into "line1- a".
|
||||
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||
// line break cannot corrupt the surrounding GFM row.
|
||||
return nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\r?\n/g, " ")
|
||||
.replace(/\|/g, "\\|");
|
||||
}
|
||||
case "callout":
|
||||
const calloutType = node.attrs?.type || "info";
|
||||
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||
case "details":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "detailsSummary":
|
||||
const summaryText = nodeContent.map(processNode).join("");
|
||||
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||
case "detailsContent":
|
||||
const detailsText = nodeContent.map(processNode).join("\n");
|
||||
return `${detailsText}\n</details>`;
|
||||
case "mathInline": {
|
||||
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||
// back, so the previous form was lossy.
|
||||
const inlineMath = node.attrs?.text || "";
|
||||
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||
}
|
||||
case "mathBlock": {
|
||||
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||
// for the schema's default parser to recover it.
|
||||
const blockMath = node.attrs?.text || "";
|
||||
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||
}
|
||||
case "mention": {
|
||||
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||
// plain text that cannot re-parse.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="mention"`];
|
||||
if (attrs.id)
|
||||
parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||
if (attrs.label)
|
||||
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||
if (attrs.entityType)
|
||||
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||
if (attrs.entityId)
|
||||
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||
if (attrs.slugId)
|
||||
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||
if (attrs.creatorId)
|
||||
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||
if (attrs.anchorId)
|
||||
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||
// Keep the label as visible text content too; the schema reads attrs
|
||||
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||
const mentionLabel = attrs.label || attrs.id || "";
|
||||
// The label is visible element TEXT content here (the data-* attrs above
|
||||
// carry the real values), so escape it for the text context, not attrs.
|
||||
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||
}
|
||||
case "attachment": {
|
||||
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "drawio":
|
||||
case "excalidraw": {
|
||||
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null)
|
||||
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null)
|
||||
parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "embed": {
|
||||
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||
// src/provider/align/width/height from data-* attributes so the node
|
||||
// (and its provider iframe info) survives the round-trip.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "audio": {
|
||||
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||
// schema reads src from src and attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||
// inline-level HTML that marked would wrap in <p>.
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
case "pdf": {
|
||||
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||
// The schema reads src/width/height from standard attrs and name/
|
||||
// attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "columns": {
|
||||
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||
// multi-column layout survives. Without a case the children were
|
||||
// concatenated with no separator and the text merged. The schema reads
|
||||
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||
// which marked would not re-parse inside a raw HTML block).
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="columns"`];
|
||||
if (attrs.layout)
|
||||
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "column": {
|
||||
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||
// column width from data-width. Children are rendered as HTML so their
|
||||
// formatting survives inside this raw HTML block.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="column"`];
|
||||
if (attrs.width)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "pageBreak":
|
||||
// Emit the schema-matching div[data-type="pageBreak"] so marked passes
|
||||
// it through as a block and generateJSON rebuilds the pageBreak atom.
|
||||
// Without this case the node fell through to `default` and rendered ""
|
||||
// (the divider silently disappeared and could not round-trip).
|
||||
return `<div data-type="pageBreak"></div>`;
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
}
|
||||
};
|
||||
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||
// accepts so it re-imports as the matching ProseMirror mark.
|
||||
const inlineToHtml = (inlineNodes) => (inlineNodes || [])
|
||||
.map((n) => {
|
||||
if (n.type === "hardBreak")
|
||||
return "<br>";
|
||||
if (n.type !== "text") {
|
||||
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||
return processNode(n);
|
||||
}
|
||||
let t = escapeHtmlText(n.text || "");
|
||||
for (const mark of n.marks || []) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
t = `<strong>${t}</strong>`;
|
||||
break;
|
||||
case "italic":
|
||||
t = `<em>${t}</em>`;
|
||||
break;
|
||||
case "code":
|
||||
t = `<code>${t}</code>`;
|
||||
break;
|
||||
case "strike":
|
||||
t = `<s>${t}</s>`;
|
||||
break;
|
||||
case "underline":
|
||||
t = `<u>${t}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
t = `<sub>${t}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
t = `<sup>${t}</sup>`;
|
||||
break;
|
||||
case "link":
|
||||
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||
break;
|
||||
case "highlight":
|
||||
t = mark.attrs?.color
|
||||
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||
: `<mark>${t}</mark>`;
|
||||
break;
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color)
|
||||
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||
break;
|
||||
case "comment":
|
||||
// Inline comment anchor inside a raw-HTML container (columns /
|
||||
// spanned table cells), so commented text there also round-trips.
|
||||
if (mark.attrs?.commentId) {
|
||||
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.join("");
|
||||
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||
// and would survive as literal text. The Image extension reads src/alt from
|
||||
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||
// attributes, so emit them by name.
|
||||
const imageToHtml = (node) => {
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.title)
|
||||
parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<img ${parts.join(" ")}>`;
|
||||
};
|
||||
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||
// schema reads the banner type from data-callout-type. Children are rendered
|
||||
// as HTML so they survive inside a raw-HTML container.
|
||||
const calloutToHtml = (node) => {
|
||||
const type = (node.attrs?.type || "info").toLowerCase();
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||
};
|
||||
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||
const detailsToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<details>${inner}</details>`;
|
||||
};
|
||||
const detailsSummaryToHtml = (node) => `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||
const detailsContentToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="detailsContent">${inner}</div>`;
|
||||
};
|
||||
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||
const taskListToHtml = (node) => {
|
||||
const items = (node.content || [])
|
||||
.map((it) => {
|
||||
const checked = it.attrs?.checked ? "true" : "false";
|
||||
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||
})
|
||||
.join("");
|
||||
return `<ul data-type="taskList">${items}</ul>`;
|
||||
};
|
||||
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||
// as literal text on re-import. Nodes whose processNode case already produces
|
||||
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||
// table) are delegated to processNode; the markdown-emitting cases
|
||||
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||
const blockToHtml = (block) => {
|
||||
const children = block.content || [];
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return `<p>${inlineToHtml(children)}</p>`;
|
||||
case "heading": {
|
||||
const level = block.attrs?.level || 1;
|
||||
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||
}
|
||||
case "bulletList":
|
||||
return `<ul>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ul>`;
|
||||
case "orderedList":
|
||||
return `<ol>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ol>`;
|
||||
case "codeBlock": {
|
||||
const lang = block.attrs?.language || "";
|
||||
// The code itself is element TEXT content (between <code> tags), so it
|
||||
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||
const code = escapeHtmlText(children
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, ""));
|
||||
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||
return `<pre><code${cls}>${code}</code></pre>`;
|
||||
}
|
||||
case "image":
|
||||
return imageToHtml(block);
|
||||
case "blockquote":
|
||||
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||
case "horizontalRule":
|
||||
return "<hr>";
|
||||
case "callout":
|
||||
return calloutToHtml(block);
|
||||
case "details":
|
||||
return detailsToHtml(block);
|
||||
case "detailsSummary":
|
||||
return detailsSummaryToHtml(block);
|
||||
case "detailsContent":
|
||||
return detailsContentToHtml(block);
|
||||
case "taskList":
|
||||
return taskListToHtml(block);
|
||||
case "taskItem":
|
||||
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||
// the schema parses it; wrap it in a single-item taskList.
|
||||
return taskListToHtml({ content: [block] });
|
||||
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||
// mention, etc. already emit schema-matching HTML from processNode.
|
||||
case "table":
|
||||
case "columns":
|
||||
case "column":
|
||||
case "mathBlock":
|
||||
case "video":
|
||||
case "audio":
|
||||
case "pdf":
|
||||
case "youtube":
|
||||
case "embed":
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||
// children in a <div> so their content is preserved; if it has no block
|
||||
// children, render its inline content instead.
|
||||
if (children.length && children.some((c) => c.type !== "text")) {
|
||||
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||
}
|
||||
return `<div>${inlineToHtml(children)}</div>`;
|
||||
}
|
||||
};
|
||||
// Render the block children of a list item to HTML (a listItem holds block+
|
||||
// content). Mirrors processListItem but for the HTML fallback path.
|
||||
const blockChildrenToHtml = (item) => (item.content || []).map((b) => blockToHtml(b)).join("");
|
||||
// Indent the rendered children of a list item under a marker prefix.
|
||||
// Each child block is a (possibly multi-line) string. The very first physical
|
||||
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||
// other line — the remaining lines of the first child AND all lines of every
|
||||
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||
// to align under the marker. Without indenting these continuation lines, the
|
||||
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||
//
|
||||
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||
// same as the visible prefix width:
|
||||
// - bullet "- " -> 2 columns
|
||||
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||
// re-import. Callers therefore pass the exact indent width to use.
|
||||
const indentItemChildren = (childStrings, prefix, indentWidth) => {
|
||||
const indent = " ".repeat(indentWidth);
|
||||
const lines = [];
|
||||
childStrings.forEach((child, childIndex) => {
|
||||
child.split("\n").forEach((line, lineIndex) => {
|
||||
if (childIndex === 0 && lineIndex === 0) {
|
||||
// First physical line of the first block gets the marker.
|
||||
lines.push(`${prefix} ${line}`);
|
||||
}
|
||||
else {
|
||||
// Indent every continuation line by the marker width; keep blank
|
||||
// lines blank rather than emitting trailing whitespace.
|
||||
lines.push(line.length ? `${indent}${line}` : "");
|
||||
}
|
||||
});
|
||||
});
|
||||
return lines.join("\n");
|
||||
};
|
||||
const processListItem = (item, prefix) => {
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||
// since for those the visible prefix IS the list marker.
|
||||
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||
};
|
||||
const processTaskItem = (item) => {
|
||||
const checked = item.attrs?.checked || false;
|
||||
const checkbox = checked ? "[x]" : "[ ]";
|
||||
const prefix = `- ${checkbox}`;
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
// An empty task item still needs its checkbox marker; without this guard
|
||||
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||
// checkbox is item content, NOT part of the marker. So the continuation
|
||||
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||
return indentItemChildren(childStrings, prefix, 2);
|
||||
};
|
||||
return processNode(content).trim();
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
/**
|
||||
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||
*
|
||||
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||
* a page through "download -> edit body -> re-upload":
|
||||
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||
* threads.
|
||||
*
|
||||
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||
*
|
||||
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||
* literal occurrence is left in the body untouched.
|
||||
*
|
||||
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||
* the file but are NOT pushed back to the server on import — only the inline
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
*/
|
||||
export interface DocmostMdMeta {
|
||||
version: number;
|
||||
pageId?: string;
|
||||
slugId?: string;
|
||||
title?: string;
|
||||
spaceId?: string;
|
||||
parentPageId?: string | null;
|
||||
}
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export declare function serializeDocmostMarkdown(meta: DocmostMdMeta, body: string, comments: any[]): string;
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
* corresponding value is returned as `null` and the whole input is treated as
|
||||
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export declare function parseDocmostMarkdown(full: string): {
|
||||
meta: DocmostMdMeta | null;
|
||||
body: string;
|
||||
comments: any[] | null;
|
||||
};
|
||||
/**
|
||||
* Serialize a self-contained markdown file with the meta block + body ONLY —
|
||||
* NO trailing `docmost:comments` block. The sync engine never touches
|
||||
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
|
||||
* the body, where comment threads survive only as inline `<span
|
||||
* data-comment-id>` anchor marks inside the body.
|
||||
*
|
||||
* `parseDocmostMarkdown` already tolerates a missing comments block (it returns
|
||||
* `comments: null` and treats the rest as body), so a file produced here
|
||||
* round-trips cleanly through the parser.
|
||||
*/
|
||||
export declare function serializeDocmostMarkdownBody(meta: DocmostMdMeta, body: string): string;
|
||||
@@ -1,306 +0,0 @@
|
||||
/**
|
||||
* Pure markdown -> ProseMirror conversion.
|
||||
*
|
||||
* The converter path is `markdownToProseMirror` (marked -> HTML ->
|
||||
* generateJSON) plus the two pre/post processors it needs (`preprocessCallouts`,
|
||||
* `bridgeTaskLists`). The gitmost server writes the resulting page bodies
|
||||
* natively through the collab gateway, so no websocket/Yjs write-path lives
|
||||
* here.
|
||||
*/
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { marked } from "marked";
|
||||
import { docmostExtensions } from "./docmost-schema.js";
|
||||
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||
global.window = dom.window;
|
||||
global.document = dom.window.document;
|
||||
// @ts-ignore
|
||||
global.Element = dom.window.Element;
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown) {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines) => {
|
||||
const out = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0)
|
||||
break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
return out.join("\n");
|
||||
};
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html) {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li) => {
|
||||
const found = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox") {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter((child) => child.tagName === "LI");
|
||||
if (items.length === 0)
|
||||
continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0))
|
||||
continue;
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked = input != null &&
|
||||
(input.hasAttribute("checked") || input.checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
/**
|
||||
* Recursively strip content-less paragraph nodes from a generated doc.
|
||||
*
|
||||
* A block-level atom whose markdown form is INLINE (e.g. the block `image`'s
|
||||
* ``, or a bare media element) is wrapped by marked in a <p>; the schema
|
||||
* then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph
|
||||
* sibling. On the next export that empty `<p>` renders to "" and the doc "\n\n"
|
||||
* join injects a phantom blank gap, so the markdown is not byte-stable.
|
||||
*
|
||||
* Markdown blank lines are separators, never content, so generateJSON only ever
|
||||
* produces an empty paragraph as such a hoist artifact — removing them is safe
|
||||
* and general (it also subsumes the <div>-wrapper workaround the `video` case
|
||||
* uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent
|
||||
* or an empty array; every other node (including atoms without `content`) is
|
||||
* preserved, and we recurse into the content of any node that has children.
|
||||
*/
|
||||
function stripEmptyParagraphs(node) {
|
||||
if (!node || !Array.isArray(node.content)) {
|
||||
// Atom / leaf node (no children to recurse into): keep as-is.
|
||||
return node;
|
||||
}
|
||||
const mapped = node.content.map((child) => stripEmptyParagraphs(child));
|
||||
const isEmptyParagraph = (child) => !!child &&
|
||||
child.type === "paragraph" &&
|
||||
(!Array.isArray(child.content) || child.content.length === 0);
|
||||
const filtered = mapped.filter((child) => !isEmptyParagraph(child));
|
||||
// Schema-validity guard: several nodes require NON-empty block content
|
||||
// (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout,
|
||||
// and the doc root). For an empty one of those, generateJSON materializes a
|
||||
// single empty paragraph as its OBLIGATORY content — that is not a hoist
|
||||
// artifact. If stripping would empty the container, keep ONE empty paragraph
|
||||
// so the result stays schema-valid (an empty cell/quote must not become `[]`).
|
||||
const cleaned = filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered;
|
||||
return { ...node, content: cleaned };
|
||||
}
|
||||
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||
export async function markdownToProseMirror(markdownContent) {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const html = await marked.parse(withCallouts);
|
||||
const bridged = bridgeTaskLists(html);
|
||||
const doc = generateJSON(bridged, docmostExtensions);
|
||||
return stripEmptyParagraphs(doc);
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
{"version":"4.1.6","results":[[":test/node-ops.test.ts",{"duration":73.83617300000003,"failed":false}],[":test/markdown-converter.test.ts",{"duration":52.24364600000001,"failed":false}],[":test/diff.test.ts",{"duration":48.002140000000054,"failed":false}],[":test/node-ops-extra.test.ts",{"duration":64.79457399999995,"failed":false}],[":test/reconcile.test.ts",{"duration":13.454662000000042,"failed":false}],[":test/canonicalize.test.ts",{"duration":15.510864999999967,"failed":false}],[":test/markdown-roundtrip.property.test.ts",{"duration":10142.778976,"failed":false}],[":test/stabilize.test.ts",{"duration":180.60366900000008,"failed":false}],[":test/canonicalize-extra.test.ts",{"duration":265.1806279999996,"failed":false}],[":test/loop-guard.test.ts",{"duration":9.12148000000002,"failed":false}],[":test/markdown-document.test.ts",{"duration":9.338571000000002,"failed":false}],[":test/sanitize.test.ts",{"duration":20.903294999999957,"failed":false}],[":test/markdown-converter-golden.test.ts",{"duration":20.178874000000008,"failed":false}],[":test/roundtrip-corpus.test.ts",{"duration":375.9727969999999,"failed":false}],[":test/layout.test.ts",{"duration":25.806564999999978,"failed":false}],[":test/markdown-document-envelope.test.ts",{"duration":17.760928999999976,"failed":false}],[":test/roundtrip.test.ts",{"duration":202.1052659999998,"failed":false}],[":test/compute-push-actions.test.ts",{"duration":18.895632999999975,"failed":false}],[":test/apply-pull-actions.test.ts",{"duration":312.7543149999997,"failed":false}],[":test/git.test.ts",{"duration":2510.628562,"failed":false}],[":test/run-push.test.ts",{"duration":52.35109799999998,"failed":false}],[":test/compute-pull-actions.test.ts",{"duration":12.83178799999996,"failed":false}],[":test/apply-push-actions.test.ts",{"duration":40.049105,"failed":false}],[":test/classify-rename-moves.test.ts",{"duration":11.772115999999983,"failed":false}],[":test/git-merge.test.ts",{"duration":394.734729,"failed":false}],[":test/read-existing.test.ts",{"duration":9.485771000000113,"failed":false}],[":test/config-errors-invalid.test.ts",{"duration":22.83441799999997,"failed":false}],[":test/run-push-realgit.test.ts",{"duration":341.63427,"failed":false}],[":test/settings.test.ts",{"duration":18.815516000000002,"failed":false}],[":test/config-errors.test.ts",{"duration":22.358415000000036,"failed":false}],[":test/git-sync-client.contract.test-d.ts",{"duration":0,"failed":false}],[":test/engine-gaps.test.ts",{"duration":107.23285100000021,"failed":false}],[":test/markdown-converter-gaps.test.ts",{"duration":397.53935699999965,"failed":false}],[":test/git-integration-gaps.test.ts",{"duration":401.41072199999996,"failed":false}],[":test/markdown-to-prosemirror-gaps.test.ts",{"duration":446.77069600000004,"failed":false}],[":test/zzprobe.test.ts",{"duration":206.321958,"failed":false}],[":test/_probe_rt.test.ts",{"duration":113.90998200000013,"failed":false}],[":test/_probe2.test.ts",{"duration":87.88095900000008,"failed":false}],[":test/zz-probe.test.ts",{"duration":61.425263000000086,"failed":false}],[":test/zzz-probe.test.ts",{"duration":128.94683599999985,"failed":true}],[":test/_probe.test.ts",{"duration":135.79946900000004,"failed":false}],[":test/__probe.test.ts",{"duration":5.685652999999945,"failed":false}],[":test/markdown-converter-html-marks.test.ts",{"duration":10.321619999999996,"failed":false}],[":test/_probe/probe.test.ts",{"duration":71.38958900000011,"failed":false}],[":test/media-roundtrip.test.ts",{"duration":196.99739999999997,"failed":false}],[":test/diagram-roundtrip.test.ts",{"duration":82.55217999999968,"failed":false}],[":test/git-error-paths.test.ts",{"duration":303.43118300000003,"failed":false}],[":test/zzprobe2.test.ts",{"duration":54.94561099999987,"failed":false}],[":test/zzprobe3.test.ts",{"duration":77.88595900000018,"failed":false}],[":test/docmost-schema-attrs.test.ts",{"duration":10.282551000000012,"failed":false}],[":test/_valid_probe.test.ts",{"duration":92.35715300000015,"failed":false}],[":test/strip-empty-paragraphs-validity.test.ts",{"duration":127.7716620000001,"failed":false}],[":test/cycle.test.ts",{"duration":17.375657000000047,"failed":false}],[":test/cycle-roundtrip.test.ts",{"duration":582.6821960000002,"failed":false}],[":test/vault-index.test.ts",{"duration":9.033900000000017,"failed":false}],[":test/page-file.test.ts",{"duration":7.111135999999988,"failed":false}]]}
|
||||
41
packages/git-sync/package.json
Normal file
41
packages/git-sync/package.json
Normal file
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"name": "@docmost/git-sync",
|
||||
"version": "0.1.0",
|
||||
"description": "Vendored pure converter + pure sync engine for the Docmost <-> git Markdown sync (Phase A). See docs/git-sync-plan.md.",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"main": "./build/index.js",
|
||||
"types": "./build/index.d.ts",
|
||||
"exports": {
|
||||
".": "./build/index.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"watch": "tsc --watch",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest"
|
||||
},
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@fellow/prosemirror-recreate-transform": "^1.2.3",
|
||||
"@tiptap/core": "3.20.4",
|
||||
"@tiptap/extension-highlight": "3.20.4",
|
||||
"@tiptap/extension-image": "3.20.4",
|
||||
"@tiptap/extension-subscript": "3.20.4",
|
||||
"@tiptap/extension-superscript": "3.20.4",
|
||||
"@tiptap/extension-task-item": "3.20.4",
|
||||
"@tiptap/extension-task-list": "3.20.4",
|
||||
"@tiptap/html": "3.20.4",
|
||||
"@tiptap/pm": "3.20.4",
|
||||
"@tiptap/starter-kit": "3.20.4",
|
||||
"jsdom": "25.0.0",
|
||||
"marked": "17.0.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/jsdom": "^21.1.7",
|
||||
"@types/node": "^20.0.0",
|
||||
"fast-check": "^4.8.0",
|
||||
"typescript": "^5.0.0",
|
||||
"vitest": "4.1.6"
|
||||
}
|
||||
}
|
||||
177
packages/git-sync/src/engine/layout.ts
Normal file
177
packages/git-sync/src/engine/layout.ts
Normal file
@@ -0,0 +1,177 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
|
||||
/** Flat page node as returned by `listAllSpacePages` (no content). */
|
||||
export interface PageNode {
|
||||
id: string;
|
||||
title?: string;
|
||||
slugId?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
}
|
||||
|
||||
/** A page's resolved vault destination: folder path + file stem. */
|
||||
export interface VaultEntry {
|
||||
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
|
||||
segments: string[];
|
||||
/** The page's own file name without extension. */
|
||||
stem: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
// Index pages by id so the parent chain can be walked. Guard against
|
||||
// duplicate ids in the input (first one wins).
|
||||
const byId = new Map<string, PageNode>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
|
||||
}
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey =
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
}
|
||||
}
|
||||
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
// and THROWS if it is somehow absent, rather than silently recomputing a
|
||||
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
|
||||
// its target file).
|
||||
const nameOf = (id: string): string => {
|
||||
const name = nameById.get(id);
|
||||
if (name === undefined) {
|
||||
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
|
||||
}
|
||||
return name;
|
||||
};
|
||||
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node: PageNode): string[] => {
|
||||
const ancestors: string[] = [];
|
||||
const visited = new Set<string>();
|
||||
let current: PageNode | undefined = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(nameOf(current.id));
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
|
||||
// First pass: compute the provisional { segments, stem } for every node.
|
||||
const layout = new Map<string, VaultEntry>();
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || layout.has(p.id)) continue;
|
||||
layout.set(p.id, {
|
||||
segments: folderSegmentsFor(p),
|
||||
stem: nameOf(p.id),
|
||||
});
|
||||
}
|
||||
|
||||
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
|
||||
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
|
||||
// above (orphans share the "__root__" bucket), so ancestor names are final
|
||||
// before `segments` are built and this pass should rarely/never re-stem an
|
||||
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
|
||||
// slugId/id, then (if still colliding) appends the id.
|
||||
const usedPaths = new Set<string>();
|
||||
const seenIds = new Set<string>();
|
||||
const pathKey = (e: VaultEntry): string => [...e.segments, e.stem].join("/");
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || seenIds.has(p.id)) continue;
|
||||
seenIds.add(p.id);
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry) continue;
|
||||
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// First attempt: disambiguate the stem with the sanitized slugId (or id).
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// Still colliding: append the (sanitized) id as a last resort. The id
|
||||
// is globally unique, so this always resolves the collision.
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
|
||||
}
|
||||
}
|
||||
usedPaths.add(pathKey(entry));
|
||||
}
|
||||
|
||||
return layout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
parentKey: string,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
29
packages/git-sync/src/engine/loop-guard.ts
Normal file
29
packages/git-sync/src/engine/loop-guard.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
/**
|
||||
* Loop-guard primitives (SPEC §10). The sync engine must never re-pull its OWN
|
||||
* write as if it were a remote edit: after a push, the next poll will see the
|
||||
* page it just wrote with a fresh `updatedAt`. To suppress that, we key on two
|
||||
* signals — the body HASH of what we pushed (this module) and the `updatedAt`
|
||||
* returned by the write — recorded per page at push time.
|
||||
*
|
||||
* This module owns the PURE, deterministic body-hash. The CONSUMPTION on the
|
||||
* pull side (comparing an incoming page's body hash against the last pushed hash
|
||||
* to decide "this is our own write, ignore it") is a future increment — here we
|
||||
* only PRODUCE the hash and the per-page push record (see `src/push.ts`).
|
||||
*/
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
/**
|
||||
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
|
||||
* the same input string always yields the same digest, a different input a
|
||||
* different one. Used to recognize our own write later (loop suppression).
|
||||
*
|
||||
* We hash the body STRING as-is (UTF-8) with SHA-256 and return lowercase hex.
|
||||
* SPEC §10 keys on the body hash rather than file bytes; callers decide WHAT
|
||||
* counts as "the body" (here it is the exact string passed in — typically the
|
||||
* self-contained markdown that was pushed). No normalization is applied: the
|
||||
* caller is responsible for passing a canonical/stable representation if it
|
||||
* wants hash equality across cosmetic-only differences.
|
||||
*/
|
||||
export function bodyHash(markdownBody: string): string {
|
||||
return createHash("sha256").update(markdownBody, "utf8").digest("hex");
|
||||
}
|
||||
200
packages/git-sync/src/engine/reconcile.ts
Normal file
200
packages/git-sync/src/engine/reconcile.ts
Normal file
@@ -0,0 +1,200 @@
|
||||
/**
|
||||
* Pure reconciliation planner (SPEC §5/§6/§8).
|
||||
*
|
||||
* Given the desired live set of files (computed from the current Docmost tree)
|
||||
* and the set of files currently tracked in the vault, compute what to write,
|
||||
* what to move (old path to remove), and what to delete. Identity is `pageId`
|
||||
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
|
||||
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
|
||||
* the live tree is a DELETE.
|
||||
*
|
||||
* This module is intentionally PURE (no IO, no git) so the whole plan is
|
||||
* unit-testable. The actual file writing / git operations happen in pull.ts.
|
||||
*/
|
||||
|
||||
/** A page that SHOULD exist in the vault at a given path. */
|
||||
export interface LiveEntry {
|
||||
pageId: string;
|
||||
/** Vault-relative path (forward-slash), e.g. `Space/Parent/Child.md`. */
|
||||
relPath: string;
|
||||
}
|
||||
|
||||
/** A page currently tracked in the vault (pageId parsed from its meta). */
|
||||
export interface ExistingEntry {
|
||||
pageId: string;
|
||||
/** Vault-relative path (forward-slash) of the tracked file. */
|
||||
relPath: string;
|
||||
}
|
||||
|
||||
/** A page to (re)write at its destination path. */
|
||||
export interface WriteEntry {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}
|
||||
|
||||
/** A page that moved: written at its NEW relPath, with the OLD path removed. */
|
||||
export interface MovedEntry {
|
||||
pageId: string;
|
||||
fromRelPath: string;
|
||||
toRelPath: string;
|
||||
/**
|
||||
* Whether the old path (`fromRelPath`) is SAFE to remove. False when another
|
||||
* live page will (re)write that exact path (path reuse): removing it would
|
||||
* destroy real data, so the caller must skip the removal. The move itself is
|
||||
* still recorded (the new path is written regardless).
|
||||
*/
|
||||
removeOldPath: boolean;
|
||||
}
|
||||
|
||||
/** The full reconciliation plan. */
|
||||
export interface ReconciliationPlan {
|
||||
/**
|
||||
* Pages present in `live` -> (re)write at their relPath. This naturally
|
||||
* covers add, content-update (same path) AND move (same pageId, new path),
|
||||
* since every live page is (re)written regardless of whether it existed.
|
||||
*/
|
||||
toWrite: WriteEntry[];
|
||||
/**
|
||||
* Vault-relative paths to delete because their tracked pageId is ABSENT from
|
||||
* `live` (page removed/trashed). This set is ONLY absence-based deletions —
|
||||
* the OLD paths of moved pages are NOT here (they live in `moved` and are
|
||||
* applied separately by the caller). Keeping the two apart lets pull.ts gate
|
||||
* absence deletions behind the incomplete-fetch suppression + mass-delete
|
||||
* guard (SPEC §8) while still applying real moves.
|
||||
*/
|
||||
toDelete: string[];
|
||||
/**
|
||||
* Tracked pages whose relPath changed. The caller writes the page at
|
||||
* `toRelPath`, then removes `fromRelPath` — but ONLY after the new-path write
|
||||
* succeeded. The old path is NOT in `toDelete`.
|
||||
*/
|
||||
moved: MovedEntry[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the reconciliation plan.
|
||||
*
|
||||
* Rules:
|
||||
* - Every `live` page is written at its relPath (covers add + update + move).
|
||||
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
|
||||
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
|
||||
* path is written) and is NEVER added to `toDelete`.
|
||||
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
|
||||
* is added to `toDelete`.
|
||||
*
|
||||
* Notes:
|
||||
* - Safety filter (no data loss): no path that is a live TARGET path of any
|
||||
* page is ever deleted/removed (a write owns it). This applies to BOTH the
|
||||
* absence `toDelete` set AND a moved page's old-path removal — if a moved
|
||||
* page's OLD path is reused by ANOTHER live page, the move records no old
|
||||
* path to remove, because that path will be (re)written.
|
||||
* - `existing` may legitimately contain duplicate pageIds (two stray files
|
||||
* carrying the same meta pageId); each such file that is not the live target
|
||||
* path is removed (as an absence/move) so the vault converges to exactly the
|
||||
* live set.
|
||||
*/
|
||||
export function planReconciliation(
|
||||
live: LiveEntry[],
|
||||
existing: ExistingEntry[],
|
||||
): ReconciliationPlan {
|
||||
// Desired path for each live pageId.
|
||||
const liveByPageId = new Map<string, string>();
|
||||
// Set of all paths that WILL be written (never delete/remove one of these).
|
||||
const liveTargetPaths = new Set<string>();
|
||||
for (const e of live) {
|
||||
liveByPageId.set(e.pageId, e.relPath);
|
||||
liveTargetPaths.add(e.relPath);
|
||||
}
|
||||
|
||||
const toWrite: WriteEntry[] = live.map((e) => ({
|
||||
pageId: e.pageId,
|
||||
relPath: e.relPath,
|
||||
}));
|
||||
|
||||
const moved: MovedEntry[] = [];
|
||||
// Absence-based deletions ONLY (tracked pageId absent from `live`). Use a Set
|
||||
// so the same path coming from multiple existing rows is queued only once.
|
||||
const toDeleteSet = new Set<string>();
|
||||
|
||||
for (const ex of existing) {
|
||||
const liveRel = liveByPageId.get(ex.pageId);
|
||||
if (liveRel === undefined) {
|
||||
// Tracked page is gone from the live tree -> absence delete.
|
||||
// Never queue a path a live page will (re)write (path reuse -> no loss).
|
||||
if (!liveTargetPaths.has(ex.relPath)) toDeleteSet.add(ex.relPath);
|
||||
continue;
|
||||
}
|
||||
if (liveRel !== ex.relPath) {
|
||||
// Same pageId, different path -> a MOVE. Record it so the caller can write
|
||||
// the new path first, then remove the old one. If the old path is itself a
|
||||
// live target (reused by another page), it must NOT be removed — the write
|
||||
// owns it — so flag `removeOldPath: false` (move still recorded).
|
||||
moved.push({
|
||||
pageId: ex.pageId,
|
||||
fromRelPath: ex.relPath,
|
||||
toRelPath: liveRel,
|
||||
removeOldPath: !liveTargetPaths.has(ex.relPath),
|
||||
});
|
||||
}
|
||||
// liveRel === ex.relPath -> content-update in place; nothing extra to do
|
||||
// (the write above re-emits the file; identical bytes => git no-op).
|
||||
}
|
||||
|
||||
const toDelete = [...toDeleteSet];
|
||||
|
||||
return { toWrite, toDelete, moved };
|
||||
}
|
||||
|
||||
/**
|
||||
* Below this many tracked files the mass-delete fraction guard is not applied
|
||||
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
|
||||
*/
|
||||
export const MASS_DELETE_MIN_EXISTING = 4;
|
||||
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
|
||||
export const MASS_DELETE_FRACTION = 0.5;
|
||||
|
||||
/** Why absence-based deletions were (or were not) applied this cycle. */
|
||||
export type DeletionDecision =
|
||||
| { apply: true }
|
||||
| { apply: false; reason: "incomplete-fetch" | "empty-live" | "mass-delete" };
|
||||
|
||||
/**
|
||||
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
|
||||
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
|
||||
* testable without live creds or git:
|
||||
*
|
||||
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
|
||||
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
|
||||
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
|
||||
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
|
||||
* (almost always a failed fetch, never a real "delete everything").
|
||||
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
|
||||
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
|
||||
*
|
||||
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
|
||||
* its old-path removal is real (handled by the caller separately).
|
||||
*/
|
||||
export function decideAbsenceDeletions(args: {
|
||||
treeComplete: boolean;
|
||||
liveCount: number;
|
||||
existingCount: number;
|
||||
deleteCount: number;
|
||||
}): DeletionDecision {
|
||||
const { treeComplete, liveCount, existingCount, deleteCount } = args;
|
||||
|
||||
// No tracked files, or nothing to delete -> trivially fine to "apply".
|
||||
if (existingCount === 0 || deleteCount === 0) return { apply: true };
|
||||
|
||||
if (!treeComplete) return { apply: false, reason: "incomplete-fetch" };
|
||||
|
||||
if (liveCount === 0) return { apply: false, reason: "empty-live" };
|
||||
|
||||
if (
|
||||
existingCount >= MASS_DELETE_MIN_EXISTING &&
|
||||
deleteCount > existingCount * MASS_DELETE_FRACTION
|
||||
) {
|
||||
return { apply: false, reason: "mass-delete" };
|
||||
}
|
||||
|
||||
return { apply: true };
|
||||
}
|
||||
77
packages/git-sync/src/engine/roundtrip-helpers.ts
Normal file
77
packages/git-sync/src/engine/roundtrip-helpers.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Pure helpers extracted from the docmost-sync Phase-0 idempotency harness
|
||||
* (`src/roundtrip.ts`). Only the IO-free comparison utilities are vendored —
|
||||
* the CLI scaffold (`--fixture`/`--page`/`--corpus`, `loadSettings`, the
|
||||
* `DocmostClient` live path and `process.exit`) is NOT vendored (plan §2.1:
|
||||
* the roundtrip harness moves into the package's tests, not the engine).
|
||||
*/
|
||||
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export function stripBlockIds(node: any): any {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map(stripBlockIds);
|
||||
}
|
||||
if (node && typeof node === "object") {
|
||||
const out: any = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Drop the `id` attr; keep every other attribute.
|
||||
const { id, ...rest } = node.attrs as Record<string, unknown>;
|
||||
void id;
|
||||
out.attrs = stripBlockIds(rest);
|
||||
} else {
|
||||
out[key] = stripBlockIds(node[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export function firstDivergence(
|
||||
a: any,
|
||||
b: any,
|
||||
path = "$",
|
||||
): { path: string; a: any; b: any } | null {
|
||||
if (a === b) return null;
|
||||
|
||||
const ta = typeof a;
|
||||
const tb = typeof b;
|
||||
if (ta !== tb || a === null || b === null) {
|
||||
return { path, a, b };
|
||||
}
|
||||
if (ta !== "object") {
|
||||
return { path, a, b };
|
||||
}
|
||||
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr) return { path, a, b };
|
||||
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length) {
|
||||
return { path: `${path}.length`, a: a.length, b: b.length };
|
||||
}
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const d = firstDivergence(a[i], b[i], `${path}[${i}]`);
|
||||
if (d) return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
|
||||
for (const k of keys) {
|
||||
const d = firstDivergence(a[k], b[k], `${path}.${k}`);
|
||||
if (d) return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
109
packages/git-sync/src/engine/sanitize.ts
Normal file
109
packages/git-sync/src/engine/sanitize.ts
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* Deterministic filename strategy (SPEC §12).
|
||||
*
|
||||
* The file name is COSMETIC — the source of truth for the file<->page link is
|
||||
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
|
||||
* functions are intentionally dependency-free and pure, so they are trivially
|
||||
* unit-testable.
|
||||
*/
|
||||
|
||||
// Printable characters forbidden in file names on common filesystems (mainly
|
||||
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
|
||||
// Spaces are NOT in this set; whitespace is normalized separately below.
|
||||
// ASCII control characters (code points 0..31) are stripped in a separate pass
|
||||
// (see stripControlChars) to keep this literal free of embedded control bytes.
|
||||
const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g;
|
||||
|
||||
// Runs of whitespace (including tabs/newlines) collapse to a single space.
|
||||
const WHITESPACE_RUN_RE = /\s+/g;
|
||||
|
||||
// Reserved Windows device names (case-insensitive). A bare match (with or
|
||||
// without an extension) is unusable as a file name, so it is prefixed with "_".
|
||||
const RESERVED_WINDOWS_NAMES = new Set([
|
||||
"con",
|
||||
"prn",
|
||||
"aux",
|
||||
"nul",
|
||||
"com1",
|
||||
"com2",
|
||||
"com3",
|
||||
"com4",
|
||||
"com5",
|
||||
"com6",
|
||||
"com7",
|
||||
"com8",
|
||||
"com9",
|
||||
"lpt1",
|
||||
"lpt2",
|
||||
"lpt3",
|
||||
"lpt4",
|
||||
"lpt5",
|
||||
"lpt6",
|
||||
"lpt7",
|
||||
"lpt8",
|
||||
"lpt9",
|
||||
]);
|
||||
|
||||
// Cap on the sanitized length to stay well within filesystem path-component
|
||||
// limits (255 bytes on most FSes) while leaving room for an extension and a
|
||||
// disambiguation suffix.
|
||||
const MAX_LENGTH = 120;
|
||||
|
||||
/**
|
||||
* Replace every ASCII control character (code points 0..31) with "-". Done by
|
||||
* scanning code points rather than a control-range regex literal, so the source
|
||||
* file carries no embedded control bytes.
|
||||
*/
|
||||
function stripControlChars(input: string): string {
|
||||
let out = "";
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
out += input.charCodeAt(i) < 32 ? "-" : input[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize a page title into a safe file-name component (WITHOUT extension).
|
||||
*
|
||||
* Steps: replace forbidden / control characters with "-", collapse whitespace
|
||||
* runs to a single space, trim, cap the length, then guard against an empty
|
||||
* result, an all-dots result, or a reserved Windows device name by prefixing
|
||||
* with "_".
|
||||
*/
|
||||
export function sanitizeTitle(title: string): string {
|
||||
let name = stripControlChars(title ?? "")
|
||||
.replace(FORBIDDEN_PRINTABLE_RE, "-")
|
||||
.replace(WHITESPACE_RUN_RE, " ")
|
||||
.trim();
|
||||
|
||||
if (name.length > MAX_LENGTH) {
|
||||
name = name.slice(0, MAX_LENGTH).trim();
|
||||
}
|
||||
|
||||
// Compare the base name (before the first dot) against reserved names, so
|
||||
// both "CON" and "con.md" are caught.
|
||||
const base = name.split(".")[0]?.toLowerCase() ?? "";
|
||||
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
|
||||
// reserved Windows device name is unusable as a path component. The all-dots
|
||||
// case is a path-traversal hazard in particular: an unprefixed ".." would
|
||||
// become a parent-directory segment and let a page escape the vault, so it
|
||||
// MUST be neutralized here (becomes "_..", which is a literal file name).
|
||||
if (
|
||||
name.length === 0 ||
|
||||
/^\.+$/.test(name) ||
|
||||
RESERVED_WINDOWS_NAMES.has(base)
|
||||
) {
|
||||
name = "_" + name;
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disambiguate a sanitized name when two siblings in the same folder collapse
|
||||
* to the same name. Appends a stable suffix built from the page's `slugId`, so
|
||||
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
|
||||
*/
|
||||
export function disambiguate(name: string, slugId: string): string {
|
||||
return `${name} ~${slugId}`;
|
||||
}
|
||||
@@ -12,7 +12,26 @@
|
||||
* Already-stable content is unaffected (the pass is idempotent), so re-pulls of
|
||||
* unchanged pages produce identical bytes and git sees no diff.
|
||||
*/
|
||||
import { convertProseMirrorToMarkdown, markdownToProseMirror, serializeDocmostMarkdownBody, } from "../lib/index.js";
|
||||
import {
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
serializeDocmostMarkdownBody,
|
||||
type DocmostMdMeta,
|
||||
} from "../lib/index.js";
|
||||
|
||||
/**
|
||||
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
|
||||
* compatible so files produced here match `exportPageBody`'s output exactly.
|
||||
*/
|
||||
export interface PageMeta {
|
||||
version: 1;
|
||||
pageId: string;
|
||||
slugId: string;
|
||||
title: string;
|
||||
spaceId: string;
|
||||
parentPageId: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce the self-contained `.md` file text for a page from its raw
|
||||
* ProseMirror `content` + identity meta, in the verified fixpoint form.
|
||||
@@ -26,27 +45,14 @@ import { convertProseMirrorToMarkdown, markdownToProseMirror, serializeDocmostMa
|
||||
* idempotent for already-stable content, and the convergence point for the
|
||||
* known converter asymmetries.
|
||||
*/
|
||||
export async function stabilizePageFile(content, meta) {
|
||||
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
|
||||
// DocmostMdMeta (a superset with optional fields) for the serializer.
|
||||
return serializeDocmostMarkdownBody(meta, await stabilizePageBody(content));
|
||||
}
|
||||
/**
|
||||
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
|
||||
* envelope:
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content) // export...
|
||||
* doc2 = markdownToProseMirror(md1) // ...import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the known
|
||||
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
|
||||
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
|
||||
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
|
||||
*/
|
||||
export async function stabilizePageBody(content) {
|
||||
const md1 = convertProseMirrorToMarkdown(content);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
return convertProseMirrorToMarkdown(doc2);
|
||||
export async function stabilizePageFile(
|
||||
content: unknown,
|
||||
meta: PageMeta,
|
||||
): Promise<string> {
|
||||
const md1 = convertProseMirrorToMarkdown(content);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const stableBody = convertProseMirrorToMarkdown(doc2);
|
||||
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
|
||||
// DocmostMdMeta (a superset with optional fields) for the serializer.
|
||||
return serializeDocmostMarkdownBody(meta as DocmostMdMeta, stableBody);
|
||||
}
|
||||
46
packages/git-sync/src/index.ts
Normal file
46
packages/git-sync/src/index.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
/**
|
||||
* Public surface of `@docmost/git-sync`.
|
||||
*
|
||||
* Phase A (plan §12.A) vendors only the PURE converter + pure engine modules
|
||||
* from docmost-sync. Server integration (GitmostDataSource, orchestrator,
|
||||
* VaultGit, pull/push) is added in later steps.
|
||||
*/
|
||||
|
||||
// Pure converter (markdown <-> ProseMirror, file envelope, canonicalization).
|
||||
export {
|
||||
serializeDocmostMarkdown,
|
||||
serializeDocmostMarkdownBody,
|
||||
parseDocmostMarkdown,
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
canonicalizeContent,
|
||||
docsCanonicallyEqual,
|
||||
} from "./lib/index.js";
|
||||
export type { DocmostMdMeta } from "./lib/index.js";
|
||||
|
||||
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
|
||||
// loop-guard body hash.
|
||||
export {
|
||||
planReconciliation,
|
||||
decideAbsenceDeletions,
|
||||
MASS_DELETE_MIN_EXISTING,
|
||||
MASS_DELETE_FRACTION,
|
||||
} from "./engine/reconcile.js";
|
||||
export type {
|
||||
LiveEntry,
|
||||
ExistingEntry,
|
||||
WriteEntry,
|
||||
MovedEntry,
|
||||
ReconciliationPlan,
|
||||
DeletionDecision,
|
||||
} from "./engine/reconcile.js";
|
||||
|
||||
export { buildVaultLayout } from "./engine/layout.js";
|
||||
export type { PageNode, VaultEntry } from "./engine/layout.js";
|
||||
|
||||
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
|
||||
|
||||
export { stabilizePageFile } from "./engine/stabilize.js";
|
||||
export type { PageMeta } from "./engine/stabilize.js";
|
||||
|
||||
export { bodyHash } from "./engine/loop-guard.js";
|
||||
@@ -1,7 +1,9 @@
|
||||
/**
|
||||
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
|
||||
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
|
||||
* form rather than raw bytes).
|
||||
* docmost-sync ADDITION (not present in docmost-mcp).
|
||||
*
|
||||
* Semantic canonicalization of ProseMirror/TipTap documents for the Phase-0
|
||||
* round-trip idempotency check (SPEC §11, "Задача №0", option (б): compare a
|
||||
* CANONICALIZED form rather than raw bytes).
|
||||
*
|
||||
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
|
||||
* `indent: null` where the source omitted it) and regenerates per-block ids on
|
||||
@@ -10,8 +12,10 @@
|
||||
* normalizes a document so that two semantically-equal docs compare deep-equal
|
||||
* regardless of block ids and absent-vs-explicit-default-null attributes.
|
||||
*
|
||||
* It is a self-contained module with no external dependencies.
|
||||
* This file is intentionally a NEW, self-contained module so it is trivial to
|
||||
* backport into docmost-mcp without touching existing code.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Known NON-NULL schema defaults that `markdownToProseMirror` materializes on
|
||||
* import, keyed by node/mark type → { attr: defaultValue }.
|
||||
@@ -48,35 +52,36 @@
|
||||
* (docmost-schema.ts L174), so it is already handled by the null-drop rule and
|
||||
* is intentionally NOT listed here.
|
||||
*/
|
||||
const KNOWN_DEFAULTS = {
|
||||
// mark types
|
||||
link: {
|
||||
target: "_blank",
|
||||
rel: "noopener noreferrer nofollow",
|
||||
},
|
||||
comment: {
|
||||
resolved: false,
|
||||
},
|
||||
// node types
|
||||
orderedList: {
|
||||
start: 1,
|
||||
},
|
||||
drawio: {
|
||||
align: "center",
|
||||
},
|
||||
excalidraw: {
|
||||
align: "center",
|
||||
},
|
||||
video: {
|
||||
align: "center",
|
||||
},
|
||||
youtube: {
|
||||
align: "center",
|
||||
},
|
||||
embed: {
|
||||
align: "center",
|
||||
},
|
||||
const KNOWN_DEFAULTS: Record<string, Record<string, unknown>> = {
|
||||
// mark types
|
||||
link: {
|
||||
target: "_blank",
|
||||
rel: "noopener noreferrer nofollow",
|
||||
},
|
||||
comment: {
|
||||
resolved: false,
|
||||
},
|
||||
// node types
|
||||
orderedList: {
|
||||
start: 1,
|
||||
},
|
||||
drawio: {
|
||||
align: "center",
|
||||
},
|
||||
excalidraw: {
|
||||
align: "center",
|
||||
},
|
||||
video: {
|
||||
align: "center",
|
||||
},
|
||||
youtube: {
|
||||
align: "center",
|
||||
},
|
||||
embed: {
|
||||
align: "center",
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Prune an `attrs` object in place on a fresh copy: drop keys whose value is
|
||||
* `null` or `undefined` (an absent attribute and an explicit default of `null`
|
||||
@@ -93,29 +98,31 @@ const KNOWN_DEFAULTS = {
|
||||
* left (so the caller can drop the `attrs` key entirely: `{attrs:{}}` ≡ no
|
||||
* attrs).
|
||||
*/
|
||||
function canonicalizeAttrs(attrs, dropId, type) {
|
||||
const defaults = type ? KNOWN_DEFAULTS[type] : undefined;
|
||||
const out = {};
|
||||
// Stable key order so a JSON.stringify of the canonical form is comparable
|
||||
// regardless of the input's key order.
|
||||
for (const key of Object.keys(attrs).sort()) {
|
||||
// Block ids are regenerated on import; drop them on NODE attrs only.
|
||||
if (dropId && key === "id")
|
||||
continue;
|
||||
const value = attrs[key];
|
||||
// Absent ≡ explicit-default-null/undefined.
|
||||
if (value === null || value === undefined)
|
||||
continue;
|
||||
// Absent ≡ explicit known non-null default (e.g. link.target="_blank").
|
||||
// A non-default value (e.g. orderedList.start=5) does NOT match, so it is
|
||||
// kept. The `comment` mark's `commentId` is never a default, so it always
|
||||
// survives (SPEC §3); only its `resolved: false` default is normalized away.
|
||||
if (defaults && key in defaults && value === defaults[key])
|
||||
continue;
|
||||
out[key] = value;
|
||||
}
|
||||
return Object.keys(out).length > 0 ? out : undefined;
|
||||
function canonicalizeAttrs(
|
||||
attrs: Record<string, unknown>,
|
||||
dropId: boolean,
|
||||
type: string | undefined,
|
||||
): Record<string, unknown> | undefined {
|
||||
const defaults = type ? KNOWN_DEFAULTS[type] : undefined;
|
||||
const out: Record<string, unknown> = {};
|
||||
// Stable key order so a JSON.stringify of the canonical form is comparable
|
||||
// regardless of the input's key order.
|
||||
for (const key of Object.keys(attrs).sort()) {
|
||||
// Block ids are regenerated on import; drop them on NODE attrs only.
|
||||
if (dropId && key === "id") continue;
|
||||
const value = attrs[key];
|
||||
// Absent ≡ explicit-default-null/undefined.
|
||||
if (value === null || value === undefined) continue;
|
||||
// Absent ≡ explicit known non-null default (e.g. link.target="_blank").
|
||||
// A non-default value (e.g. orderedList.start=5) does NOT match, so it is
|
||||
// kept. The `comment` mark's `commentId` is never a default, so it always
|
||||
// survives (SPEC §3); only its `resolved: false` default is normalized away.
|
||||
if (defaults && key in defaults && value === defaults[key]) continue;
|
||||
out[key] = value;
|
||||
}
|
||||
return Object.keys(out).length > 0 ? out : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two
|
||||
* semantically-equal documents compare deep-equal. Rules (applied recursively
|
||||
@@ -134,43 +141,45 @@ function canonicalizeAttrs(attrs, dropId, type) {
|
||||
* 5. Preserve `text`, `type`, and `content` order exactly.
|
||||
* 6. Never mutate the input.
|
||||
*/
|
||||
export function canonicalizeContent(node) {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map((child) => canonicalizeContent(child));
|
||||
export function canonicalizeContent(node: any): any {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map((child) => canonicalizeContent(child));
|
||||
}
|
||||
if (node === null || typeof node !== "object") {
|
||||
// Primitive leaf (string/number/boolean/null): returned as-is.
|
||||
return node;
|
||||
}
|
||||
|
||||
// A node is a mark when it has a `type` but never carries block `content`
|
||||
// and lives inside a `marks` array. We cannot tell from the node alone, so
|
||||
// we distinguish at the recursion site: node `attrs` drop `id`, mark `attrs`
|
||||
// do not. This is handled by passing a `dropId` flag down for the `attrs`
|
||||
// key specifically (nodes) vs the `marks[].attrs` path (marks).
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Node-level attrs: drop the block id, null/undefined attrs, and any
|
||||
// attr at this node type's known non-null schema default.
|
||||
const canon = canonicalizeAttrs(
|
||||
node.attrs as Record<string, unknown>,
|
||||
true,
|
||||
typeof node.type === "string" ? node.type : undefined,
|
||||
);
|
||||
if (canon !== undefined) out.attrs = canon;
|
||||
// else: drop the `attrs` key entirely (rule 3).
|
||||
} else if (key === "marks" && Array.isArray(node.marks)) {
|
||||
// Marks: keep them all (incl. comment); canonicalize their attrs but do
|
||||
// NOT drop `id` (a mark's `id` would be a meaningful attr, not a block
|
||||
// id). An empty marks array is dropped so `marks:[]` ≡ no marks.
|
||||
const marks = (node.marks as any[]).map((mark) => canonicalizeMark(mark));
|
||||
if (marks.length > 0) out.marks = marks;
|
||||
} else {
|
||||
out[key] = canonicalizeContent(node[key]);
|
||||
}
|
||||
if (node === null || typeof node !== "object") {
|
||||
// Primitive leaf (string/number/boolean/null): returned as-is.
|
||||
return node;
|
||||
}
|
||||
// A node is a mark when it has a `type` but never carries block `content`
|
||||
// and lives inside a `marks` array. We cannot tell from the node alone, so
|
||||
// we distinguish at the recursion site: node `attrs` drop `id`, mark `attrs`
|
||||
// do not. This is handled by passing a `dropId` flag down for the `attrs`
|
||||
// key specifically (nodes) vs the `marks[].attrs` path (marks).
|
||||
const out = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Node-level attrs: drop the block id, null/undefined attrs, and any
|
||||
// attr at this node type's known non-null schema default.
|
||||
const canon = canonicalizeAttrs(node.attrs, true, typeof node.type === "string" ? node.type : undefined);
|
||||
if (canon !== undefined)
|
||||
out.attrs = canon;
|
||||
// else: drop the `attrs` key entirely (rule 3).
|
||||
}
|
||||
else if (key === "marks" && Array.isArray(node.marks)) {
|
||||
// Marks: keep them all (incl. comment); canonicalize their attrs but do
|
||||
// NOT drop `id` (a mark's `id` would be a meaningful attr, not a block
|
||||
// id). An empty marks array is dropped so `marks:[]` ≡ no marks.
|
||||
const marks = node.marks.map((mark) => canonicalizeMark(mark));
|
||||
if (marks.length > 0)
|
||||
out.marks = marks;
|
||||
}
|
||||
else {
|
||||
out[key] = canonicalizeContent(node[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonicalize a single mark: keep `type`, prune its `attrs` (null/undefined
|
||||
* AND known non-null defaults dropped, empty attrs removed) but NEVER drop a
|
||||
@@ -180,66 +189,62 @@ export function canonicalizeContent(node) {
|
||||
* survives — SPEC §3); only known defaults like `link.target="_blank"`,
|
||||
* `link.rel="noopener…"` and `comment.resolved=false` are normalized away.
|
||||
*/
|
||||
function canonicalizeMark(mark) {
|
||||
if (mark === null || typeof mark !== "object")
|
||||
return mark;
|
||||
const out = {};
|
||||
for (const key of Object.keys(mark)) {
|
||||
if (key === "attrs" && mark.attrs && typeof mark.attrs === "object") {
|
||||
const canon = canonicalizeAttrs(mark.attrs, false, typeof mark.type === "string" ? mark.type : undefined);
|
||||
if (canon !== undefined)
|
||||
out.attrs = canon;
|
||||
}
|
||||
else {
|
||||
out[key] = canonicalizeContent(mark[key]);
|
||||
}
|
||||
function canonicalizeMark(mark: any): any {
|
||||
if (mark === null || typeof mark !== "object") return mark;
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const key of Object.keys(mark)) {
|
||||
if (key === "attrs" && mark.attrs && typeof mark.attrs === "object") {
|
||||
const canon = canonicalizeAttrs(
|
||||
mark.attrs as Record<string, unknown>,
|
||||
false,
|
||||
typeof mark.type === "string" ? mark.type : undefined,
|
||||
);
|
||||
if (canon !== undefined) out.attrs = canon;
|
||||
} else {
|
||||
out[key] = canonicalizeContent(mark[key]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deep structural equality of two values that is key-order-insensitive.
|
||||
* Used to compare canonical forms. (`canonicalizeContent` already emits
|
||||
* `attrs` in a stable key order, but the top-level node keys preserve input
|
||||
* order, so we compare structurally rather than by string.)
|
||||
*/
|
||||
function deepEqual(a, b) {
|
||||
if (a === b)
|
||||
return true;
|
||||
if (typeof a !== typeof b)
|
||||
return false;
|
||||
if (a === null || b === null)
|
||||
return a === b;
|
||||
if (typeof a !== "object")
|
||||
return false;
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr)
|
||||
return false;
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length)
|
||||
return false;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (!deepEqual(a[i], b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
const aKeys = Object.keys(a);
|
||||
const bKeys = Object.keys(b);
|
||||
if (aKeys.length !== bKeys.length)
|
||||
return false;
|
||||
for (const k of aKeys) {
|
||||
if (!Object.prototype.hasOwnProperty.call(b, k))
|
||||
return false;
|
||||
if (!deepEqual(a[k], b[k]))
|
||||
return false;
|
||||
function deepEqual(a: any, b: any): boolean {
|
||||
if (a === b) return true;
|
||||
if (typeof a !== typeof b) return false;
|
||||
if (a === null || b === null) return a === b;
|
||||
if (typeof a !== "object") return false;
|
||||
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr) return false;
|
||||
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length) return false;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (!deepEqual(a[i], b[i])) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const aKeys = Object.keys(a);
|
||||
const bKeys = Object.keys(b);
|
||||
if (aKeys.length !== bKeys.length) return false;
|
||||
for (const k of aKeys) {
|
||||
if (!Object.prototype.hasOwnProperty.call(b, k)) return false;
|
||||
if (!deepEqual(a[k], b[k])) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* True when two ProseMirror documents are semantically equal: equal after
|
||||
* canonicalization (block ids stripped, absent-vs-default-null normalized).
|
||||
*/
|
||||
export function docsCanonicallyEqual(a, b) {
|
||||
return deepEqual(canonicalizeContent(a), canonicalizeContent(b));
|
||||
export function docsCanonicallyEqual(a: any, b: any): boolean {
|
||||
return deepEqual(canonicalizeContent(a), canonicalizeContent(b));
|
||||
}
|
||||
319
packages/git-sync/src/lib/diff.ts
Normal file
319
packages/git-sync/src/lib/diff.ts
Normal file
@@ -0,0 +1,319 @@
|
||||
/**
|
||||
* Headless, Docmost-equivalent document diff.
|
||||
*
|
||||
* Docmost's history editor computes a change set with the exact pipeline below
|
||||
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
|
||||
* editor decorations. This module runs the SAME computation but serializes the
|
||||
* result to text + integrity counts instead of decorations, so a diff can be
|
||||
* previewed without a browser.
|
||||
*
|
||||
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
|
||||
* maintained published fork of the MIT prosemirror-recreate-steps source that
|
||||
* Docmost vendors in @docmost/editor-ext; it exposes the identical
|
||||
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
|
||||
* signature.
|
||||
*
|
||||
* If recreateTransform / the changeset throws on a pathological document pair,
|
||||
* we fall back to a coarse block-level text diff so the tool never hard-fails.
|
||||
*/
|
||||
|
||||
import { getSchema } from "@tiptap/core";
|
||||
import { Node } from "@tiptap/pm/model";
|
||||
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
|
||||
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
|
||||
import { docmostExtensions } from "./docmost-schema.js";
|
||||
|
||||
/** A single inserted/deleted change with its containing-block context. */
|
||||
export interface DiffChange {
|
||||
op: "insert" | "delete";
|
||||
/** Lead (plain) text of the block that contains the change, for context. */
|
||||
block: string;
|
||||
/** The inserted or deleted text. */
|
||||
text: string;
|
||||
}
|
||||
|
||||
/** Integrity counts as [old, new] tuples; footnoteMarkers as [oldList, newList]. */
|
||||
export interface DiffIntegrity {
|
||||
images: [number, number];
|
||||
links: [number, number];
|
||||
tables: [number, number];
|
||||
callouts: [number, number];
|
||||
footnoteMarkers: [number[], number[]];
|
||||
}
|
||||
|
||||
export interface DiffResult {
|
||||
summary: { inserted: number; deleted: number; blocksChanged: number };
|
||||
integrity: DiffIntegrity;
|
||||
changes: DiffChange[];
|
||||
/** Human-readable unified-ish summary. */
|
||||
markdown: string;
|
||||
}
|
||||
|
||||
/** Build the schema once; it is pure and reused across calls. */
|
||||
const schema = getSchema(docmostExtensions);
|
||||
|
||||
/** Recursively concatenate the plain text of a JSON node. */
|
||||
function plainText(node: any): string {
|
||||
if (!node || typeof node !== "object") return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string") out += node.text;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) out += plainText(child);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
|
||||
function countNodes(doc: any, pred: (node: any) => boolean): number {
|
||||
let n = 0;
|
||||
const visit = (node: any): void => {
|
||||
if (!node || typeof node !== "object") return;
|
||||
if (pred(node)) n++;
|
||||
if (Array.isArray(node.content)) for (const c of node.content) visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
|
||||
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
|
||||
* run); counting link-bearing runs would over-count it. Walking the tree and
|
||||
* collecting hrefs into a Set keys each distinct link once. Link marks with a
|
||||
* missing/empty href are bucketed under a single "" key so a malformed link is
|
||||
* still counted as one.
|
||||
*/
|
||||
function countUniqueLinks(doc: any): number {
|
||||
const hrefs = new Set<string>();
|
||||
const visit = (node: any): void => {
|
||||
if (!node || typeof node !== "object") return;
|
||||
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||
for (const m of node.marks) {
|
||||
if (m && m.type === "link") {
|
||||
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
|
||||
hrefs.add(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) for (const c of node.content) visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return hrefs.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the ordered list of integers from `[N]` footnote markers found in the
|
||||
* BODY only (every top-level block before the first "Примечания..." notes
|
||||
* heading; if no such heading, the whole doc). Returned in reading order.
|
||||
*/
|
||||
function footnoteMarkers(doc: any, notesHeading: string): number[] {
|
||||
const top: any[] = Array.isArray(doc?.content) ? doc.content : [];
|
||||
const notesIdx = top.findIndex(
|
||||
(n) =>
|
||||
n &&
|
||||
n.type === "heading" &&
|
||||
plainText(n).trim() === notesHeading,
|
||||
);
|
||||
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
|
||||
const markers: number[] = [];
|
||||
const re = /\[(\d+)\]/g;
|
||||
for (const block of bodyBlocks) {
|
||||
const text = plainText(block);
|
||||
let m: RegExpExecArray | null;
|
||||
re.lastIndex = 0;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
markers.push(Number(m[1]));
|
||||
}
|
||||
}
|
||||
return markers;
|
||||
}
|
||||
|
||||
/** Compute the [old,new] integrity tuples for two JSON docs. */
|
||||
function computeIntegrity(
|
||||
oldDoc: any,
|
||||
newDoc: any,
|
||||
notesHeading: string,
|
||||
): DiffIntegrity {
|
||||
const images: [number, number] = [
|
||||
countNodes(oldDoc, (n) => n.type === "image"),
|
||||
countNodes(newDoc, (n) => n.type === "image"),
|
||||
];
|
||||
const links: [number, number] = [
|
||||
countUniqueLinks(oldDoc),
|
||||
countUniqueLinks(newDoc),
|
||||
];
|
||||
const tables: [number, number] = [
|
||||
countNodes(oldDoc, (n) => n.type === "table"),
|
||||
countNodes(newDoc, (n) => n.type === "table"),
|
||||
];
|
||||
const callouts: [number, number] = [
|
||||
countNodes(oldDoc, (n) => n.type === "callout"),
|
||||
countNodes(newDoc, (n) => n.type === "callout"),
|
||||
];
|
||||
const fns: [number[], number[]] = [
|
||||
footnoteMarkers(oldDoc, notesHeading),
|
||||
footnoteMarkers(newDoc, notesHeading),
|
||||
];
|
||||
return { images, links, tables, callouts, footnoteMarkers: fns };
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the lead text of the top-level block in a ProseMirror Node that
|
||||
* contains the given document position. Returns "" when out of range.
|
||||
*/
|
||||
function blockContextAt(node: Node, pos: number): string {
|
||||
try {
|
||||
const clamped = Math.max(0, Math.min(pos, node.content.size));
|
||||
const $pos = node.resolve(clamped);
|
||||
// depth 1 is the top-level block in a doc node.
|
||||
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
|
||||
const text = block.textContent || "";
|
||||
return text.length > 80 ? text.slice(0, 77) + "..." : text;
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/** Truncate a string for the markdown summary. */
|
||||
function truncate(s: string, n = 120): string {
|
||||
return s.length > n ? s.slice(0, n - 3) + "..." : s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
|
||||
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
|
||||
*/
|
||||
function coarseDiff(oldDoc: any, newDoc: any): DiffChange[] {
|
||||
const oldBlocks: any[] = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
|
||||
const newBlocks: any[] = Array.isArray(newDoc?.content) ? newDoc.content : [];
|
||||
const oldTexts = oldBlocks.map(plainText);
|
||||
const newTexts = newBlocks.map(plainText);
|
||||
const oldSet = new Set(oldTexts);
|
||||
const newSet = new Set(newTexts);
|
||||
const changes: DiffChange[] = [];
|
||||
for (const t of oldTexts) {
|
||||
if (!newSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "delete", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
for (const t of newTexts) {
|
||||
if (!oldSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "insert", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
return changes;
|
||||
}
|
||||
|
||||
/** Build the human-readable unified-ish markdown summary. */
|
||||
function renderMarkdown(
|
||||
result: Omit<DiffResult, "markdown">,
|
||||
fellBack: boolean,
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
const { summary, integrity, changes } = result;
|
||||
lines.push(
|
||||
`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`,
|
||||
);
|
||||
if (fellBack) {
|
||||
lines.push("");
|
||||
lines.push("> note: precise diff failed; coarse block-level diff shown.");
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Integrity (old -> new)");
|
||||
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
|
||||
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
|
||||
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
|
||||
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
|
||||
lines.push(
|
||||
`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`,
|
||||
);
|
||||
lines.push("");
|
||||
lines.push("## Changes");
|
||||
if (changes.length === 0) {
|
||||
lines.push("(no textual changes)");
|
||||
} else {
|
||||
for (const c of changes) {
|
||||
const sign = c.op === "insert" ? "+" : "-";
|
||||
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
|
||||
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
|
||||
}
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
|
||||
* serialize the result to text + integrity counts.
|
||||
*
|
||||
* @param oldDocJson the earlier document
|
||||
* @param newDocJson the later document
|
||||
* @param notesHeading heading delimiting body from notes for footnote counting
|
||||
*/
|
||||
export function diffDocs(
|
||||
oldDocJson: any,
|
||||
newDocJson: any,
|
||||
notesHeading: string = "Примечания переводчика",
|
||||
): DiffResult {
|
||||
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
|
||||
|
||||
let changes: DiffChange[] = [];
|
||||
let inserted = 0;
|
||||
let deleted = 0;
|
||||
let fellBack = false;
|
||||
const changedBlocks = new Set<string>();
|
||||
|
||||
try {
|
||||
const oldNode = Node.fromJSON(schema, oldDocJson);
|
||||
const newNode = Node.fromJSON(schema, newDocJson);
|
||||
const tr = recreateTransform(oldNode, newNode, {
|
||||
complexSteps: false,
|
||||
wordDiffs: true,
|
||||
simplifyDiff: true,
|
||||
});
|
||||
const changeSet = ChangeSet.create(oldNode).addSteps(
|
||||
tr.doc,
|
||||
tr.mapping.maps,
|
||||
[],
|
||||
);
|
||||
const simplified = simplifyChanges(changeSet.changes, newNode);
|
||||
|
||||
for (const change of simplified) {
|
||||
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
|
||||
if (change.toA > change.fromA) {
|
||||
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
deleted += text.length;
|
||||
const block = blockContextAt(oldNode, change.fromA);
|
||||
changes.push({ op: "delete", block, text });
|
||||
if (block) changedBlocks.add("d:" + block);
|
||||
}
|
||||
}
|
||||
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
|
||||
if (change.toB > change.fromB) {
|
||||
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
inserted += text.length;
|
||||
const block = blockContextAt(newNode, change.fromB);
|
||||
changes.push({ op: "insert", block, text });
|
||||
if (block) changedBlocks.add("i:" + block);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Pathological pair: degrade to a coarse block-level diff so we never throw.
|
||||
fellBack = true;
|
||||
changes = coarseDiff(oldDocJson, newDocJson);
|
||||
for (const c of changes) {
|
||||
if (c.op === "insert") inserted += c.text.length;
|
||||
else deleted += c.text.length;
|
||||
if (c.block) changedBlocks.add(c.op[0] + ":" + c.block);
|
||||
}
|
||||
}
|
||||
|
||||
const partial: Omit<DiffResult, "markdown"> = {
|
||||
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
|
||||
integrity,
|
||||
changes,
|
||||
};
|
||||
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
|
||||
}
|
||||
1090
packages/git-sync/src/lib/docmost-schema.ts
Normal file
1090
packages/git-sync/src/lib/docmost-schema.ts
Normal file
File diff suppressed because it is too large
Load Diff
27
packages/git-sync/src/lib/index.ts
Normal file
27
packages/git-sync/src/lib/index.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Public surface of the vendored pure converter (the `lib/` half of the
|
||||
* docmost-sync `docmost-client` package). This barrel re-exports only the
|
||||
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
|
||||
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
|
||||
* markdown -> ProseMirror import path, and semantic canonicalization for the
|
||||
* round-trip idempotency check (SPEC §11).
|
||||
*
|
||||
* The REST client, websocket/collab write-path, auth-utils and page-lock from
|
||||
* the upstream package are deliberately NOT vendored (the gitmost server writes
|
||||
* natively — plan §2.2/§2.3).
|
||||
*/
|
||||
export {
|
||||
serializeDocmostMarkdown,
|
||||
parseDocmostMarkdown,
|
||||
serializeDocmostMarkdownBody,
|
||||
} from "./markdown-document.js";
|
||||
export type { DocmostMdMeta } from "./markdown-document.js";
|
||||
|
||||
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
|
||||
|
||||
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
|
||||
|
||||
export {
|
||||
canonicalizeContent,
|
||||
docsCanonicallyEqual,
|
||||
} from "./canonicalize.js";
|
||||
861
packages/git-sync/src/lib/markdown-converter.ts
Normal file
861
packages/git-sync/src/lib/markdown-converter.ts
Normal file
@@ -0,0 +1,861 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
*/
|
||||
export function convertProseMirrorToMarkdown(content: any): string {
|
||||
if (!content || !content.content) return "";
|
||||
|
||||
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||
// </> back inside attribute values, so escaping them would corrupt the
|
||||
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||
// keeps the value inert against attribute-injection while staying idempotent.
|
||||
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||
// value, and parse5 does not decode ' back inside attribute values, so
|
||||
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||
const escapeAttr = (value: unknown): string =>
|
||||
String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """);
|
||||
|
||||
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||
const escapeHtmlText = (value: unknown): string =>
|
||||
String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
|
||||
// Percent-encode characters that would break out of a markdown URL target
|
||||
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||
// single inert token (used for image/video/youtube srcs).
|
||||
const encodeMdUrl = (value: unknown): string =>
|
||||
String(value || "")
|
||||
.replace(/\s/g, (c: string) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
|
||||
const processNode = (node: any): string => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
|
||||
switch (type) {
|
||||
case "doc":
|
||||
return nodeContent.map(processNode).join("\n\n");
|
||||
|
||||
case "paragraph":
|
||||
const text = nodeContent.map(processNode).join("");
|
||||
const align = node.attrs?.textAlign;
|
||||
if (align && align !== "left") {
|
||||
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||
}
|
||||
return text || "";
|
||||
|
||||
case "heading":
|
||||
const level = node.attrs?.level || 1;
|
||||
const headingText = nodeContent.map(processNode).join("");
|
||||
return "#".repeat(level) + " " + headingText;
|
||||
|
||||
case "text":
|
||||
let textContent = node.text || "";
|
||||
// Apply marks (bold, italic, code, etc.)
|
||||
if (node.marks) {
|
||||
// Markdown code spans (`...`) cannot carry inner formatting, so when a
|
||||
// run has the `code` mark alongside ANY other mark, backtick syntax
|
||||
// would leak literal ** / []() into the code text. In that case emit
|
||||
// nested HTML (<code> innermost, the other marks wrapping it as HTML)
|
||||
// so the output is at least well-formed and re-parseable.
|
||||
//
|
||||
// NOTE: this does NOT round-trip both marks. The schema's `code` mark
|
||||
// has `excludes: "_"` (it excludes every other mark), so on import the
|
||||
// co-occurring mark is always dropped — the run comes back as `code`
|
||||
// only. We keep the emission simple and accept that the other mark is
|
||||
// lost; preserving both is impossible while `code` excludes them.
|
||||
// Only use the backtick form when `code` is the sole mark.
|
||||
const markTypes = node.marks.map((m: any) => m.type);
|
||||
const hasCode = markTypes.includes("code");
|
||||
const codeCombined = hasCode && markTypes.length > 1;
|
||||
for (const mark of node.marks) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
textContent = codeCombined
|
||||
? `<strong>${textContent}</strong>`
|
||||
: `**${textContent}**`;
|
||||
break;
|
||||
case "italic":
|
||||
textContent = codeCombined
|
||||
? `<em>${textContent}</em>`
|
||||
: `*${textContent}*`;
|
||||
break;
|
||||
case "code":
|
||||
// When combined with another mark, wrap as <code> so the
|
||||
// surrounding HTML marks can nest around it; otherwise use the
|
||||
// plain backtick span.
|
||||
textContent = codeCombined
|
||||
? `<code>${textContent}</code>`
|
||||
: `\`${textContent}\``;
|
||||
break;
|
||||
case "link": {
|
||||
const href = mark.attrs?.href || "";
|
||||
const title = mark.attrs?.title;
|
||||
if (codeCombined) {
|
||||
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||
const safeHref = escapeAttr(href);
|
||||
if (title) {
|
||||
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||
} else {
|
||||
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||
}
|
||||
} else if (title) {
|
||||
// Emit the optional markdown link title; escape an embedded
|
||||
// double-quote so it cannot terminate the title string early.
|
||||
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||
} else {
|
||||
textContent = `[${textContent}](${href})`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "strike":
|
||||
textContent = codeCombined
|
||||
? `<s>${textContent}</s>`
|
||||
: `~~${textContent}~~`;
|
||||
break;
|
||||
case "underline":
|
||||
textContent = `<u>${textContent}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
textContent = `<sub>${textContent}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
textContent = `<sup>${textContent}</sup>`;
|
||||
break;
|
||||
case "highlight": {
|
||||
// Preserve a null/empty color as a plain highlight (a bare
|
||||
// <mark> with no background-color); only emit the style when a
|
||||
// color is actually set, so a plain highlight is not forced to
|
||||
// yellow on export.
|
||||
const color = mark.attrs?.color;
|
||||
textContent = color
|
||||
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||
: `<mark>${textContent}</mark>`;
|
||||
break;
|
||||
}
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color) {
|
||||
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
case "comment": {
|
||||
// Emit the inline comment anchor so highlights round-trip. The
|
||||
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||
// commentId/resolved).
|
||||
const cid = mark.attrs?.commentId;
|
||||
if (cid) {
|
||||
const resolvedAttr = mark.attrs?.resolved
|
||||
? ` data-resolved="true"`
|
||||
: "";
|
||||
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return textContent;
|
||||
|
||||
case "codeBlock":
|
||||
const language = node.attrs?.language || "";
|
||||
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||
// here would let the text grow by "\n" on each round-trip. Removing
|
||||
// every trailing newline makes repeated cycles stable.
|
||||
const code = nodeContent
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
.map((item: any) => processListItem(item, "-"))
|
||||
.join("\n");
|
||||
|
||||
case "orderedList":
|
||||
return nodeContent
|
||||
.map((item: any, index: number) =>
|
||||
processListItem(item, `${index + 1}.`),
|
||||
)
|
||||
.join("\n");
|
||||
|
||||
case "taskList":
|
||||
return nodeContent.map((item: any) => processTaskItem(item)).join("\n");
|
||||
|
||||
case "taskItem":
|
||||
// Delegate to the same helper used by taskList so multi-block and
|
||||
// nested task items render and indent consistently.
|
||||
return processTaskItem(node);
|
||||
|
||||
case "listItem":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
|
||||
case "blockquote":
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line so code blocks / multi-paragraph
|
||||
// quotes round-trip correctly.
|
||||
return nodeContent
|
||||
.map((n: any) =>
|
||||
processNode(n)
|
||||
.split("\n")
|
||||
.map((line: string) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"),
|
||||
)
|
||||
.join("\n>\n");
|
||||
|
||||
case "horizontalRule":
|
||||
return "---";
|
||||
|
||||
case "hardBreak":
|
||||
// Two trailing spaces before the newline encode a markdown hard break;
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
|
||||
case "image":
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||
// not emit one (the previous caption branch was dead).
|
||||
return ``;
|
||||
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||
// from the standard attributes and the remaining attrs from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt) parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||
// data-type, so the schema parser ignores it and just hoists the video.
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
|
||||
case "youtube": {
|
||||
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||
// src from data-src and width/height/align from data-* attributes.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "table": {
|
||||
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||
const tableRows: any[] = nodeContent;
|
||||
if (tableRows.length === 0) return "";
|
||||
const hasSpan = tableRows.some((row: any) =>
|
||||
(row.content || []).some(
|
||||
(cell: any) =>
|
||||
(cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1,
|
||||
),
|
||||
);
|
||||
|
||||
if (hasSpan) {
|
||||
// Render each cell's block children to HTML (marked does NOT parse
|
||||
// markdown inside a raw HTML block, so emitting markdown here would
|
||||
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||
const renderHtmlCell = (cell: any): string => {
|
||||
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||
const a = cell.attrs || {};
|
||||
const cellParts: string[] = [];
|
||||
if ((a.colspan ?? 1) > 1)
|
||||
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||
if ((a.rowspan ?? 1) > 1)
|
||||
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||
if (a.align) cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||
const open = cellParts.length
|
||||
? `<${tag} ${cellParts.join(" ")}>`
|
||||
: `<${tag}>`;
|
||||
const inner = (cell.content || [])
|
||||
.map((block: any) => blockToHtml(block))
|
||||
.join("");
|
||||
return `${open}${inner}</${tag}>`;
|
||||
};
|
||||
const htmlRows = tableRows
|
||||
.map(
|
||||
(row: any) =>
|
||||
`<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`,
|
||||
)
|
||||
.join("");
|
||||
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||
}
|
||||
|
||||
// No merged cells: emit a GFM table (header row + separator) so the
|
||||
// markdown can be parsed back into a table on re-import.
|
||||
const rows = tableRows.map(processNode);
|
||||
const headerCells = tableRows[0]?.content || [];
|
||||
const columns = headerCells.length || 1;
|
||||
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||
const markers = Array.from({ length: columns }, (_, i) => {
|
||||
const align = headerCells[i]?.attrs?.align;
|
||||
switch (align) {
|
||||
case "left":
|
||||
return ":--";
|
||||
case "center":
|
||||
return ":-:";
|
||||
case "right":
|
||||
return "--:";
|
||||
default:
|
||||
return "---";
|
||||
}
|
||||
});
|
||||
const separator = "| " + markers.join(" | ") + " |";
|
||||
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||
}
|
||||
|
||||
case "tableRow":
|
||||
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||
|
||||
case "tableCell":
|
||||
case "tableHeader": {
|
||||
// Join multiple block children with a space (not "") so adjacent blocks
|
||||
// like a paragraph followed by a list don't collide into "line1- a".
|
||||
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||
// line break cannot corrupt the surrounding GFM row.
|
||||
return nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\r?\n/g, " ")
|
||||
.replace(/\|/g, "\\|");
|
||||
}
|
||||
|
||||
case "callout":
|
||||
const calloutType = node.attrs?.type || "info";
|
||||
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||
|
||||
case "details":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
|
||||
case "detailsSummary":
|
||||
const summaryText = nodeContent.map(processNode).join("");
|
||||
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||
|
||||
case "detailsContent":
|
||||
const detailsText = nodeContent.map(processNode).join("\n");
|
||||
return `${detailsText}\n</details>`;
|
||||
|
||||
case "mathInline": {
|
||||
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||
// back, so the previous form was lossy.
|
||||
const inlineMath = node.attrs?.text || "";
|
||||
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||
}
|
||||
|
||||
case "mathBlock": {
|
||||
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||
// for the schema's default parser to recover it.
|
||||
const blockMath = node.attrs?.text || "";
|
||||
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||
}
|
||||
|
||||
case "mention": {
|
||||
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||
// plain text that cannot re-parse.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="mention"`];
|
||||
if (attrs.id) parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||
if (attrs.label)
|
||||
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||
if (attrs.entityType)
|
||||
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||
if (attrs.entityId)
|
||||
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||
if (attrs.slugId)
|
||||
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||
if (attrs.creatorId)
|
||||
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||
if (attrs.anchorId)
|
||||
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||
// Keep the label as visible text content too; the schema reads attrs
|
||||
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||
const mentionLabel = attrs.label || attrs.id || "";
|
||||
// The label is visible element TEXT content here (the data-* attrs above
|
||||
// carry the real values), so escape it for the text context, not attrs.
|
||||
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||
}
|
||||
|
||||
case "attachment": {
|
||||
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "drawio":
|
||||
case "excalidraw": {
|
||||
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null)
|
||||
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null) parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "embed": {
|
||||
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||
// src/provider/align/width/height from data-* attributes so the node
|
||||
// (and its provider iframe info) survives the round-trip.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "audio": {
|
||||
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||
// schema reads src from src and attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||
// inline-level HTML that marked would wrap in <p>.
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
|
||||
case "pdf": {
|
||||
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||
// The schema reads src/width/height from standard attrs and name/
|
||||
// attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name) parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "columns": {
|
||||
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||
// multi-column layout survives. Without a case the children were
|
||||
// concatenated with no separator and the text merged. The schema reads
|
||||
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||
// which marked would not re-parse inside a raw HTML block).
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="columns"`];
|
||||
if (attrs.layout)
|
||||
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
|
||||
case "column": {
|
||||
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||
// column width from data-width. Children are rendered as HTML so their
|
||||
// formatting survives inside this raw HTML block.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="column"`];
|
||||
if (attrs.width)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
}
|
||||
};
|
||||
|
||||
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||
// accepts so it re-imports as the matching ProseMirror mark.
|
||||
const inlineToHtml = (inlineNodes: any[]): string =>
|
||||
(inlineNodes || [])
|
||||
.map((n: any) => {
|
||||
if (n.type === "hardBreak") return "<br>";
|
||||
if (n.type !== "text") {
|
||||
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||
return processNode(n);
|
||||
}
|
||||
let t = escapeHtmlText(n.text || "");
|
||||
for (const mark of n.marks || []) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
t = `<strong>${t}</strong>`;
|
||||
break;
|
||||
case "italic":
|
||||
t = `<em>${t}</em>`;
|
||||
break;
|
||||
case "code":
|
||||
t = `<code>${t}</code>`;
|
||||
break;
|
||||
case "strike":
|
||||
t = `<s>${t}</s>`;
|
||||
break;
|
||||
case "underline":
|
||||
t = `<u>${t}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
t = `<sub>${t}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
t = `<sup>${t}</sup>`;
|
||||
break;
|
||||
case "link":
|
||||
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||
break;
|
||||
case "highlight":
|
||||
t = mark.attrs?.color
|
||||
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||
: `<mark>${t}</mark>`;
|
||||
break;
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color)
|
||||
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||
break;
|
||||
case "comment":
|
||||
// Inline comment anchor inside a raw-HTML container (columns /
|
||||
// spanned table cells), so commented text there also round-trips.
|
||||
if (mark.attrs?.commentId) {
|
||||
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.join("");
|
||||
|
||||
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||
// and would survive as literal text. The Image extension reads src/alt from
|
||||
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||
// attributes, so emit them by name.
|
||||
const imageToHtml = (node: any): string => {
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt) parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.title) parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align) parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<img ${parts.join(" ")}>`;
|
||||
};
|
||||
|
||||
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||
// schema reads the banner type from data-callout-type. Children are rendered
|
||||
// as HTML so they survive inside a raw-HTML container.
|
||||
const calloutToHtml = (node: any): string => {
|
||||
const type = (node.attrs?.type || "info").toLowerCase();
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||
};
|
||||
|
||||
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||
const detailsToHtml = (node: any): string => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<details>${inner}</details>`;
|
||||
};
|
||||
const detailsSummaryToHtml = (node: any): string =>
|
||||
`<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||
const detailsContentToHtml = (node: any): string => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="detailsContent">${inner}</div>`;
|
||||
};
|
||||
|
||||
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||
const taskListToHtml = (node: any): string => {
|
||||
const items = (node.content || [])
|
||||
.map((it: any) => {
|
||||
const checked = it.attrs?.checked ? "true" : "false";
|
||||
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||
})
|
||||
.join("");
|
||||
return `<ul data-type="taskList">${items}</ul>`;
|
||||
};
|
||||
|
||||
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||
// as literal text on re-import. Nodes whose processNode case already produces
|
||||
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||
// table) are delegated to processNode; the markdown-emitting cases
|
||||
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||
const blockToHtml = (block: any): string => {
|
||||
const children = block.content || [];
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return `<p>${inlineToHtml(children)}</p>`;
|
||||
case "heading": {
|
||||
const level = block.attrs?.level || 1;
|
||||
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||
}
|
||||
case "bulletList":
|
||||
return `<ul>${children
|
||||
.map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ul>`;
|
||||
case "orderedList":
|
||||
return `<ol>${children
|
||||
.map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ol>`;
|
||||
case "codeBlock": {
|
||||
const lang = block.attrs?.language || "";
|
||||
// The code itself is element TEXT content (between <code> tags), so it
|
||||
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||
const code = escapeHtmlText(
|
||||
children
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, ""),
|
||||
);
|
||||
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||
return `<pre><code${cls}>${code}</code></pre>`;
|
||||
}
|
||||
case "image":
|
||||
return imageToHtml(block);
|
||||
case "blockquote":
|
||||
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||
case "horizontalRule":
|
||||
return "<hr>";
|
||||
case "callout":
|
||||
return calloutToHtml(block);
|
||||
case "details":
|
||||
return detailsToHtml(block);
|
||||
case "detailsSummary":
|
||||
return detailsSummaryToHtml(block);
|
||||
case "detailsContent":
|
||||
return detailsContentToHtml(block);
|
||||
case "taskList":
|
||||
return taskListToHtml(block);
|
||||
case "taskItem":
|
||||
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||
// the schema parses it; wrap it in a single-item taskList.
|
||||
return taskListToHtml({ content: [block] });
|
||||
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||
// mention, etc. already emit schema-matching HTML from processNode.
|
||||
case "table":
|
||||
case "columns":
|
||||
case "column":
|
||||
case "mathBlock":
|
||||
case "video":
|
||||
case "audio":
|
||||
case "pdf":
|
||||
case "youtube":
|
||||
case "embed":
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||
// children in a <div> so their content is preserved; if it has no block
|
||||
// children, render its inline content instead.
|
||||
if (children.length && children.some((c: any) => c.type !== "text")) {
|
||||
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||
}
|
||||
return `<div>${inlineToHtml(children)}</div>`;
|
||||
}
|
||||
};
|
||||
|
||||
// Render the block children of a list item to HTML (a listItem holds block+
|
||||
// content). Mirrors processListItem but for the HTML fallback path.
|
||||
const blockChildrenToHtml = (item: any): string =>
|
||||
(item.content || []).map((b: any) => blockToHtml(b)).join("");
|
||||
|
||||
// Indent the rendered children of a list item under a marker prefix.
|
||||
// Each child block is a (possibly multi-line) string. The very first physical
|
||||
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||
// other line — the remaining lines of the first child AND all lines of every
|
||||
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||
// to align under the marker. Without indenting these continuation lines, the
|
||||
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||
//
|
||||
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||
// same as the visible prefix width:
|
||||
// - bullet "- " -> 2 columns
|
||||
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||
// re-import. Callers therefore pass the exact indent width to use.
|
||||
const indentItemChildren = (
|
||||
childStrings: string[],
|
||||
prefix: string,
|
||||
indentWidth: number,
|
||||
): string => {
|
||||
const indent = " ".repeat(indentWidth);
|
||||
const lines: string[] = [];
|
||||
childStrings.forEach((child, childIndex) => {
|
||||
child.split("\n").forEach((line, lineIndex) => {
|
||||
if (childIndex === 0 && lineIndex === 0) {
|
||||
// First physical line of the first block gets the marker.
|
||||
lines.push(`${prefix} ${line}`);
|
||||
} else {
|
||||
// Indent every continuation line by the marker width; keep blank
|
||||
// lines blank rather than emitting trailing whitespace.
|
||||
lines.push(line.length ? `${indent}${line}` : "");
|
||||
}
|
||||
});
|
||||
});
|
||||
return lines.join("\n");
|
||||
};
|
||||
|
||||
const processListItem = (item: any, prefix: string): string => {
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
if (childStrings.length === 0) return prefix;
|
||||
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||
// since for those the visible prefix IS the list marker.
|
||||
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||
};
|
||||
|
||||
const processTaskItem = (item: any): string => {
|
||||
const checked = item.attrs?.checked || false;
|
||||
const checkbox = checked ? "[x]" : "[ ]";
|
||||
const prefix = `- ${checkbox}`;
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
// An empty task item still needs its checkbox marker; without this guard
|
||||
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||
if (childStrings.length === 0) return prefix;
|
||||
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||
// checkbox is item content, NOT part of the marker. So the continuation
|
||||
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||
return indentItemChildren(childStrings, prefix, 2);
|
||||
};
|
||||
|
||||
return processNode(content).trim();
|
||||
}
|
||||
@@ -26,6 +26,16 @@
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
*/
|
||||
|
||||
export interface DocmostMdMeta {
|
||||
version: number;
|
||||
pageId?: string;
|
||||
slugId?: string;
|
||||
title?: string;
|
||||
spaceId?: string;
|
||||
parentPageId?: string | null;
|
||||
}
|
||||
|
||||
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
||||
// the JSON text between the markers.
|
||||
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||
@@ -33,20 +43,28 @@ const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||
// rather than end-anchoring a single regex (which would mis-capture across a
|
||||
// literal opener that appears earlier in the body).
|
||||
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
||||
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export function serializeDocmostMarkdown(meta, body, comments) {
|
||||
const metaJson = JSON.stringify(meta);
|
||||
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||
const trimmedBody = (body ?? "").trim();
|
||||
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||
`${trimmedBody}\n\n` +
|
||||
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
|
||||
export function serializeDocmostMarkdown(
|
||||
meta: DocmostMdMeta,
|
||||
body: string,
|
||||
comments: any[],
|
||||
): string {
|
||||
const metaJson = JSON.stringify(meta);
|
||||
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||
const trimmedBody = (body ?? "").trim();
|
||||
return (
|
||||
`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||
`${trimmedBody}\n\n` +
|
||||
`<!-- docmost:comments\n${commentsJson}\n-->\n`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
@@ -55,56 +73,73 @@ export function serializeDocmostMarkdown(meta, body, comments) {
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export function parseDocmostMarkdown(full) {
|
||||
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||
let meta = null;
|
||||
let metaEnd = 0;
|
||||
const metaMatch = normalized.match(META_RE);
|
||||
if (metaMatch) {
|
||||
try {
|
||||
meta = JSON.parse(metaMatch[1]);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
// Body starts right after the matched meta block.
|
||||
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||
export function parseDocmostMarkdown(full: string): {
|
||||
meta: DocmostMdMeta | null;
|
||||
body: string;
|
||||
comments: any[] | null;
|
||||
} {
|
||||
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||
|
||||
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||
let meta: DocmostMdMeta | null = null;
|
||||
let metaEnd = 0;
|
||||
const metaMatch = normalized.match(META_RE);
|
||||
if (metaMatch) {
|
||||
try {
|
||||
meta = JSON.parse(metaMatch[1]);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Invalid docmost:meta JSON block: ${
|
||||
e instanceof Error ? e.message : String(e)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||
let lastOpenStart = -1;
|
||||
let lastOpenEnd = -1;
|
||||
let m;
|
||||
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||
lastOpenStart = m.index;
|
||||
lastOpenEnd = m.index + m[0].length;
|
||||
// Body starts right after the matched meta block.
|
||||
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||
}
|
||||
|
||||
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||
let lastOpenStart = -1;
|
||||
let lastOpenEnd = -1;
|
||||
let m: RegExpExecArray | null;
|
||||
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||
lastOpenStart = m.index;
|
||||
lastOpenEnd = m.index + m[0].length;
|
||||
}
|
||||
|
||||
let comments: any[] | null = null;
|
||||
let bodyEnd = normalized.length;
|
||||
if (lastOpenStart !== -1) {
|
||||
const rest = normalized.slice(lastOpenEnd);
|
||||
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||
if (close) {
|
||||
const jsonText = rest.slice(0, close.index);
|
||||
try {
|
||||
comments = JSON.parse(jsonText);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Invalid docmost:comments JSON block: ${
|
||||
e instanceof Error ? e.message : String(e)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||
}
|
||||
let comments = null;
|
||||
let bodyEnd = normalized.length;
|
||||
if (lastOpenStart !== -1) {
|
||||
const rest = normalized.slice(lastOpenEnd);
|
||||
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||
if (close) {
|
||||
const jsonText = rest.slice(0, close.index);
|
||||
try {
|
||||
comments = JSON.parse(jsonText);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||
}
|
||||
}
|
||||
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||
return { meta, body, comments };
|
||||
}
|
||||
|
||||
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||
return { meta, body, comments };
|
||||
}
|
||||
|
||||
// --- docmost-sync addition (backport target: docmost-mcp/src/lib/markdown-document.ts) ---
|
||||
|
||||
/**
|
||||
* Serialize a self-contained markdown file with the meta block + body ONLY —
|
||||
* NO trailing `docmost:comments` block. The sync engine never touches
|
||||
* NO trailing `docmost:comments` block. The docmost-sync engine never touches
|
||||
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
|
||||
* the body, where comment threads survive only as inline `<span
|
||||
* data-comment-id>` anchor marks inside the body.
|
||||
@@ -113,6 +148,9 @@ export function parseDocmostMarkdown(full) {
|
||||
* `comments: null` and treats the rest as body), so a file produced here
|
||||
* round-trips cleanly through the parser.
|
||||
*/
|
||||
export function serializeDocmostMarkdownBody(meta, body) {
|
||||
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
|
||||
export function serializeDocmostMarkdownBody(
|
||||
meta: DocmostMdMeta,
|
||||
body: string,
|
||||
): string {
|
||||
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
|
||||
}
|
||||
297
packages/git-sync/src/lib/markdown-to-prosemirror.ts
Normal file
297
packages/git-sync/src/lib/markdown-to-prosemirror.ts
Normal file
@@ -0,0 +1,297 @@
|
||||
/**
|
||||
* Pure markdown -> ProseMirror conversion (extracted from docmost-sync's
|
||||
* `packages/docmost-client/src/lib/collaboration.ts`).
|
||||
*
|
||||
* Only the PURE converter path is vendored here: `markdownToProseMirror`
|
||||
* (marked -> HTML -> generateJSON) plus the two pre/post processors it needs
|
||||
* (`preprocessCallouts`, `bridgeTaskLists`). The collaboration/websocket
|
||||
* write-path (Hocuspocus, Yjs, `ws`, `withPageLock`, `sanitizeForYjs`) that
|
||||
* lives in the same upstream file is intentionally NOT vendored — the gitmost
|
||||
* server writes page bodies natively through the collab gateway (plan §3.3).
|
||||
*/
|
||||
import { marked } from "marked";
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { docmostExtensions } from "./docmost-schema.js";
|
||||
|
||||
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||
global.window = dom.window as any;
|
||||
global.document = dom.window.document;
|
||||
// @ts-ignore
|
||||
global.Element = dom.window.Element;
|
||||
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown: string): Promise<string> {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines: string[]): Promise<string> => {
|
||||
const out: string[] = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines: string[] = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0) break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(
|
||||
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
|
||||
);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
|
||||
return out.join("\n");
|
||||
};
|
||||
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html: string): string {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li: Element): Element[] => {
|
||||
const found: Element[] = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (
|
||||
child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox"
|
||||
) {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(
|
||||
child.querySelectorAll(":scope > input[type='checkbox']"),
|
||||
)) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter(
|
||||
(child) => child.tagName === "LI",
|
||||
);
|
||||
if (items.length === 0) continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue;
|
||||
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target: Element = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked =
|
||||
input != null &&
|
||||
(input.hasAttribute("checked") || (input as any).checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
|
||||
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||
export async function markdownToProseMirror(
|
||||
markdownContent: string,
|
||||
): Promise<any> {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const html = await marked.parse(withCallouts);
|
||||
const bridged = bridgeTaskLists(html);
|
||||
return generateJSON(bridged, docmostExtensions);
|
||||
}
|
||||
897
packages/git-sync/src/lib/node-ops.ts
Normal file
897
packages/git-sync/src/lib/node-ops.ts
Normal file
@@ -0,0 +1,897 @@
|
||||
/**
|
||||
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
|
||||
* tree by node id.
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
|
||||
* table cells hold their children in `content` just like any other block, so a
|
||||
* single recursive walk reaches them all.
|
||||
*
|
||||
* Every exported function operates on a DEEP CLONE of the input document and
|
||||
* returns the new document. The input doc and any `newNode`/`node` argument are
|
||||
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone<T>(value: T): T {
|
||||
if (typeof structuredClone === "function") {
|
||||
return structuredClone(value);
|
||||
}
|
||||
// Fallback for environments without structuredClone.
|
||||
return JSON.parse(JSON.stringify(value)) as T;
|
||||
}
|
||||
|
||||
/** True if `value` is a non-null object (and not an array). */
|
||||
function isObject(value: any): value is Record<string, any> {
|
||||
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
/** True if `node` carries the given id in `node.attrs.id`. */
|
||||
function matchesId(node: any, nodeId: string): boolean {
|
||||
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively concatenate all text contained in a node.
|
||||
*
|
||||
* Text nodes contribute their `text` string; container nodes contribute the
|
||||
* joined `blockPlainText` of their `content` children. Returns "" for nullish
|
||||
* or non-object inputs.
|
||||
*/
|
||||
export function blockPlainText(node: any): string {
|
||||
if (!isObject(node)) return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string") {
|
||||
out += node.text;
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
out += blockPlainText(child);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
|
||||
function truncate(text: string, n: number): string {
|
||||
return text.length > n ? text.slice(0, n) + "…" : text;
|
||||
}
|
||||
|
||||
/** One compact outline entry for a single top-level block. */
|
||||
export interface OutlineEntry {
|
||||
index: number;
|
||||
type: string | undefined;
|
||||
id: string | null;
|
||||
firstText: string;
|
||||
/** Present for headings only. */
|
||||
level?: number | null;
|
||||
/** Present for tables only. */
|
||||
rows?: number;
|
||||
cols?: number;
|
||||
header?: string[];
|
||||
/** Present for list blocks only (bulletList/orderedList/taskList). */
|
||||
items?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
|
||||
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
|
||||
* table cells — compactness is the point; use `getNodeByRef` to drill into a
|
||||
* specific block.
|
||||
*
|
||||
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
|
||||
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
|
||||
* cell texts as `header`; list blocks (types ending in "List") add `items`.
|
||||
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
|
||||
* a missing or non-object doc/content yields `[]`.
|
||||
*/
|
||||
export function buildOutline(doc: any): OutlineEntry[] {
|
||||
if (!isObject(doc) || !Array.isArray(doc.content)) return [];
|
||||
|
||||
const out: OutlineEntry[] = [];
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const block = doc.content[i];
|
||||
const type = isObject(block) ? block.type : undefined;
|
||||
const entry: OutlineEntry = {
|
||||
index: i,
|
||||
type,
|
||||
id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
|
||||
firstText: truncate(blockPlainText(block), 100),
|
||||
};
|
||||
|
||||
if (type === "heading") {
|
||||
entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
|
||||
} else if (type === "table") {
|
||||
const headerRow = block.content?.[0]?.content ?? [];
|
||||
entry.rows = block.content?.length ?? 0;
|
||||
entry.cols = block.content?.[0]?.content?.length ?? 0;
|
||||
entry.header = headerRow.map((cell: any) =>
|
||||
truncate(blockPlainText(cell), 40),
|
||||
);
|
||||
} else if (typeof type === "string" && type.endsWith("List")) {
|
||||
entry.items = block.content?.length ?? 0;
|
||||
}
|
||||
|
||||
out.push(entry);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a single node by reference and return `{ node, path, type }`, or
|
||||
* `null` when nothing matches.
|
||||
*
|
||||
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
|
||||
* `n` in `doc.content`. This is the only way to address table/tableRow/
|
||||
* tableCell nodes, which carry no `attrs.id`.
|
||||
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
|
||||
* tree with `attrs.id === ref` is returned.
|
||||
*
|
||||
* `path` is the array of child indices from the doc root down to the node
|
||||
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
|
||||
* so callers can mutate it without touching the input doc. Null-safe.
|
||||
*/
|
||||
export function getNodeByRef(
|
||||
doc: any,
|
||||
ref: string,
|
||||
): { node: any; path: number[]; type: string | undefined } | null {
|
||||
if (!isObject(doc)) return null;
|
||||
|
||||
// "#<n>": index into the top-level content array.
|
||||
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
|
||||
if (!isObject(block)) return null;
|
||||
return { node: clone(block), path: [index], type: block.type };
|
||||
}
|
||||
|
||||
// Otherwise: depth-first search for the first node with attrs.id === ref.
|
||||
const search = (
|
||||
node: any,
|
||||
trail: number[],
|
||||
): { node: any; path: number[]; type: string } | null => {
|
||||
if (!isObject(node)) return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const path = [...trail, i];
|
||||
if (matchesId(child, ref)) {
|
||||
return { node: clone(child), path, type: child.type };
|
||||
}
|
||||
const hit = search(child, path);
|
||||
if (hit != null) return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
return search(doc, []);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
|
||||
* `newNode`, anywhere in the tree (including inside callouts and table cells).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
|
||||
* is the number of nodes substituted. A fresh clone of `newNode` is used for
|
||||
* each match so they do not share references.
|
||||
*/
|
||||
export function replaceNodeById(
|
||||
doc: any,
|
||||
nodeId: string,
|
||||
newNode: any,
|
||||
): { doc: any; replaced: number } {
|
||||
const out = clone(doc);
|
||||
let replaced = 0;
|
||||
|
||||
// Walk a content array, replacing direct matches and recursing into the
|
||||
// (possibly new) children of non-matching nodes.
|
||||
const walkContent = (content: any[]): void => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, nodeId)) {
|
||||
content[i] = clone(newNode);
|
||||
replaced++;
|
||||
// Do not recurse into a freshly substituted node.
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, replaced };
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
|
||||
* array, anywhere in the tree (recursive, including callouts and tables).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
|
||||
* the number of nodes removed.
|
||||
*/
|
||||
export function deleteNodeById(
|
||||
doc: any,
|
||||
nodeId: string,
|
||||
): { doc: any; deleted: number } {
|
||||
const out = clone(doc);
|
||||
let deleted = 0;
|
||||
|
||||
// Filter a content array in place, dropping matches and recursing into the
|
||||
// surviving children.
|
||||
const walkContent = (content: any[]): any[] => {
|
||||
const kept: any[] = [];
|
||||
for (const child of content) {
|
||||
if (matchesId(child, nodeId)) {
|
||||
deleted++;
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
child.content = walkContent(child.content);
|
||||
}
|
||||
kept.push(child);
|
||||
}
|
||||
return kept;
|
||||
};
|
||||
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
out.content = walkContent(out.content);
|
||||
}
|
||||
return { doc: out, deleted };
|
||||
}
|
||||
|
||||
/**
|
||||
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
|
||||
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
|
||||
* "Unexpected content type" when asked to store an `undefined` attribute value).
|
||||
*
|
||||
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
|
||||
* legitimate JSON-storable values and are preserved. Operates on a clone and
|
||||
* returns it; the input is never mutated. Defensively null-safe like the rest
|
||||
* of the file.
|
||||
*/
|
||||
export function sanitizeForYjs(doc: any): any {
|
||||
const out = clone(doc);
|
||||
|
||||
// Drop every key whose value is strictly `undefined` from an attrs object.
|
||||
const stripUndefined = (attrs: any): void => {
|
||||
if (!isObject(attrs)) return;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
if (attrs[key] === undefined) {
|
||||
delete attrs[key];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const walk = (node: any): void => {
|
||||
if (!isObject(node)) return;
|
||||
stripUndefined(node.attrs);
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (const mark of node.marks) {
|
||||
if (isObject(mark)) stripUndefined(mark.attrs);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
walk(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Diagnostics helper: walk the tree and return a human-readable path string for
|
||||
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
|
||||
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
|
||||
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
|
||||
* every attribute is storable. Null-safe.
|
||||
*/
|
||||
export function findUnstorableAttr(doc: any): string | null {
|
||||
const isUnstorable = (value: any): string | null => {
|
||||
if (value === undefined) return "undefined";
|
||||
const t = typeof value;
|
||||
if (t === "function") return "function";
|
||||
if (t === "symbol") return "symbol";
|
||||
if (t === "bigint") return "bigint";
|
||||
return null;
|
||||
};
|
||||
|
||||
// Check an attrs object; return the offending sub-path or null.
|
||||
const checkAttrs = (attrs: any, basePath: string): string | null => {
|
||||
if (!isObject(attrs)) return null;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
const kind = isUnstorable(attrs[key]);
|
||||
if (kind != null) return `${basePath}.${key} (${kind})`;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
const walk = (node: any, path: string): string | null => {
|
||||
if (!isObject(node)) return null;
|
||||
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
|
||||
if (attrHit != null) return attrHit;
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (let i = 0; i < node.marks.length; i++) {
|
||||
const markHit = checkAttrs(
|
||||
node.marks[i]?.attrs,
|
||||
`${path}.marks[${i}].attrs`,
|
||||
);
|
||||
if (markHit != null) return markHit;
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const childHit = walk(node.content[i], `${path}.content[${i}]`);
|
||||
if (childHit != null) return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
// The root doc node carries no useful index, so start the path at "doc".
|
||||
if (!isObject(doc)) return null;
|
||||
const attrHit = checkAttrs(doc.attrs, "attrs");
|
||||
if (attrHit != null) return attrHit;
|
||||
if (Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const childHit = walk(doc.content[i], `content[${i}]`);
|
||||
if (childHit != null) return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Table structural node types and the container each must live directly inside.
|
||||
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
|
||||
* rather than blindly into the anchor's direct parent (which would corrupt the
|
||||
* table's nesting).
|
||||
*/
|
||||
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
|
||||
const REQUIRED_CONTAINER: Record<string, string> = {
|
||||
tableRow: "table",
|
||||
tableCell: "tableRow",
|
||||
tableHeader: "tableRow",
|
||||
};
|
||||
|
||||
/**
|
||||
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||
* including the matched node). Each chain entry is `{ node, index }` where
|
||||
* `index` is the node's position inside its parent's `content` array (the root
|
||||
* doc has index -1). Returns `null` when the anchor cannot be resolved.
|
||||
*/
|
||||
function findAnchorChain(
|
||||
doc: any,
|
||||
opts: InsertOptions,
|
||||
): { node: any; index: number }[] | null {
|
||||
if (!isObject(doc)) return null;
|
||||
|
||||
// DFS by id anywhere in the tree, accumulating the path.
|
||||
if (opts.anchorNodeId != null) {
|
||||
const targetId = opts.anchorNodeId;
|
||||
const search = (
|
||||
node: any,
|
||||
index: number,
|
||||
trail: { node: any; index: number }[],
|
||||
): { node: any; index: number }[] | null => {
|
||||
if (!isObject(node)) return null;
|
||||
const here = [...trail, { node, index }];
|
||||
if (matchesId(node, targetId)) return here;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const hit = search(node.content[i], i, here);
|
||||
if (hit != null) return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, -1, []);
|
||||
}
|
||||
|
||||
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
|
||||
return [
|
||||
{ node: doc, index: -1 },
|
||||
{ node: doc.content[i], index: i },
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Options controlling where `insertNodeRelative` places the new node. */
|
||||
export interface InsertOptions {
|
||||
position: "before" | "after" | "append";
|
||||
/** Resolve the anchor by node id anywhere in the tree (preferred). */
|
||||
anchorNodeId?: string;
|
||||
/** Fallback: first TOP-LEVEL block whose plain text includes this string. */
|
||||
anchorText?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a deep clone of `node` relative to an anchor.
|
||||
*
|
||||
* - position "append": push the node onto the top-level `doc.content`.
|
||||
* - position "before"/"after": locate the anchor and splice the node into the
|
||||
* anchor's parent `content` array immediately before / after it.
|
||||
*
|
||||
* Anchor resolution for before/after:
|
||||
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
|
||||
* anywhere in the tree (recursive);
|
||||
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
|
||||
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor could not be resolved (the doc is returned unchanged
|
||||
* apart from being cloned).
|
||||
*/
|
||||
export function insertNodeRelative(
|
||||
doc: any,
|
||||
node: any,
|
||||
opts: InsertOptions,
|
||||
): { doc: any; inserted: boolean } {
|
||||
const out = clone(doc);
|
||||
const fresh = clone(node);
|
||||
|
||||
// Defensive: stay null-safe like the other exports — a missing opts means
|
||||
// there is nothing actionable to do.
|
||||
if (!isObject(opts)) return { doc: out, inserted: false };
|
||||
|
||||
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
|
||||
|
||||
// "append": top-level push.
|
||||
if (opts.position === "append") {
|
||||
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
|
||||
// top level — appending one would produce invalid nesting.
|
||||
if (isStructural) {
|
||||
throw new Error(
|
||||
`insert_node: cannot append a ${node.type} at the top level; use ` +
|
||||
`position before/after with an anchor inside the target table`,
|
||||
);
|
||||
}
|
||||
if (isObject(out)) {
|
||||
if (!Array.isArray(out.content)) out.content = [];
|
||||
out.content.push(fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
|
||||
const offset = opts.position === "after" ? 1 : 0;
|
||||
|
||||
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
|
||||
// into the nearest enclosing table/tableRow rather than the anchor's direct
|
||||
// parent, so the row/cell lands at the correct level of the table.
|
||||
if (isStructural) {
|
||||
const containerType = REQUIRED_CONTAINER[node.type];
|
||||
const chain = findAnchorChain(out, opts);
|
||||
// Anchor not resolved at all — keep the existing "anchor not found" path.
|
||||
if (chain == null) return { doc: out, inserted: false };
|
||||
|
||||
// Find the DEEPEST ancestor (including the anchor itself) of the required
|
||||
// container type.
|
||||
let containerIdx = -1;
|
||||
for (let i = chain.length - 1; i >= 0; i--) {
|
||||
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
|
||||
containerIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (containerIdx === -1) {
|
||||
throw new Error(
|
||||
`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
|
||||
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
|
||||
`that lives inside the target table.`,
|
||||
);
|
||||
}
|
||||
|
||||
const container = chain[containerIdx].node;
|
||||
if (!Array.isArray(container.content)) container.content = [];
|
||||
|
||||
if (containerIdx === chain.length - 1) {
|
||||
// The matched container IS the anchor node itself (e.g. anchorText
|
||||
// resolved to the table block): append/prepend within it.
|
||||
const at = opts.position === "after" ? container.content.length : 0;
|
||||
container.content.splice(at, 0, fresh);
|
||||
} else {
|
||||
// The immediate child on the path leading to the anchor is the row/cell
|
||||
// to splice next to.
|
||||
const enclosingChildIndex = chain[containerIdx + 1].index;
|
||||
container.content.splice(enclosingChildIndex + offset, 0, fresh);
|
||||
}
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
|
||||
// Resolve by id anywhere in the tree: splice into the parent content array.
|
||||
if (opts.anchorNodeId != null) {
|
||||
let inserted = false;
|
||||
const walkContent = (content: any[]): void => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, opts.anchorNodeId as string)) {
|
||||
content.splice(i + offset, 0, fresh);
|
||||
inserted = true;
|
||||
return;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
if (inserted) return;
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, inserted };
|
||||
}
|
||||
|
||||
// Resolve by text: only top-level doc.content blocks are scanned.
|
||||
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||
for (let i = 0; i < out.content.length; i++) {
|
||||
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
|
||||
out.content.splice(i + offset, 0, fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// Table editing helpers
|
||||
//
|
||||
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
|
||||
// table -> { type:"table", content:[tableRow...] }
|
||||
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
|
||||
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
|
||||
// content:[paragraph...] }
|
||||
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
|
||||
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
|
||||
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
|
||||
// of the input doc (via `clone`) and never mutate their inputs.
|
||||
// ===========================================================================
|
||||
|
||||
/**
|
||||
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
|
||||
* `makeFreshId` so generated paragraph ids never collide with existing ones.
|
||||
*/
|
||||
function collectIds(node: any, used: Set<string>): void {
|
||||
if (!isObject(node)) return;
|
||||
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
|
||||
used.add(node.attrs.id);
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) collectIds(child, used);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fresh-id generator: returns a random Docmost-style id (12 chars from
|
||||
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
|
||||
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
|
||||
* exact string, so randomness is fine — and unlike a module-local counter it
|
||||
* needs no reset and cannot become predictable across calls.
|
||||
*/
|
||||
function makeFreshId(used: Set<string>): string {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
let id: string;
|
||||
do {
|
||||
id = "";
|
||||
for (let i = 0; i < 12; i++) {
|
||||
id += alphabet[Math.floor(Math.random() * alphabet.length)];
|
||||
}
|
||||
} while (used.has(id) || id === "");
|
||||
used.add(id);
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
|
||||
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
|
||||
* its index path. Returns null when no table matches.
|
||||
*
|
||||
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
|
||||
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
|
||||
* ancestor chain to the nearest `type === "table"` ancestor.
|
||||
*/
|
||||
function locateTable(
|
||||
rootClone: any,
|
||||
tableRef: string,
|
||||
): { table: any; path: number[] } | null {
|
||||
if (!isObject(rootClone)) return null;
|
||||
|
||||
// "#<n>": index into the top-level content array; must be a table.
|
||||
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(rootClone.content)
|
||||
? rootClone.content[index]
|
||||
: undefined;
|
||||
if (isObject(block) && block.type === "table") {
|
||||
return { table: block, path: [index] };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
|
||||
// climb to the nearest enclosing table.
|
||||
const search = (
|
||||
node: any,
|
||||
trail: { node: any; index: number }[],
|
||||
): { table: any; path: number[] } | null => {
|
||||
if (!isObject(node)) return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const here = [...trail, { node: child, index: i }];
|
||||
if (matchesId(child, tableRef)) {
|
||||
// Walk UP to the nearest table ancestor (including the match itself).
|
||||
for (let j = here.length - 1; j >= 0; j--) {
|
||||
if (isObject(here[j].node) && here[j].node.type === "table") {
|
||||
return {
|
||||
table: here[j].node,
|
||||
path: here.slice(0, j + 1).map((e) => e.index),
|
||||
};
|
||||
}
|
||||
}
|
||||
return null; // id found but no enclosing table
|
||||
}
|
||||
const hit = search(child, here);
|
||||
if (hit != null) return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
return search(rootClone, []);
|
||||
}
|
||||
|
||||
/** Build the plain-text → single-paragraph cell content used by all writers. */
|
||||
function makeCellParagraph(id: string, text: string): any {
|
||||
return {
|
||||
type: "paragraph",
|
||||
attrs: { id, indent: 0 },
|
||||
// Empty string → a paragraph with an empty content array.
|
||||
content: text ? [{ type: "text", text }] : [],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
|
||||
*
|
||||
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
|
||||
* Tables may be ragged (rows of differing length), so `cols` reflects only
|
||||
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
|
||||
* width.
|
||||
* - `cells`: `string[][]` of each cell's `blockPlainText`.
|
||||
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
|
||||
* so callers can `patch_node` a cell for rich-formatted edits.
|
||||
* - `path`: index path of the table within the doc.
|
||||
*/
|
||||
export function readTable(
|
||||
doc: any,
|
||||
tableRef: string,
|
||||
): {
|
||||
rows: number;
|
||||
cols: number;
|
||||
cells: string[][];
|
||||
cellIds: (string | null)[][];
|
||||
path: number[];
|
||||
} | null {
|
||||
const root = clone(doc);
|
||||
const located = locateTable(root, tableRef);
|
||||
if (located == null) return null;
|
||||
const { table, path } = located;
|
||||
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const cols = rowNodes[0]?.content?.length ?? 0;
|
||||
|
||||
const cells: string[][] = [];
|
||||
const cellIds: (string | null)[][] = [];
|
||||
for (const rowNode of rowNodes) {
|
||||
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
|
||||
const rowText: string[] = [];
|
||||
const rowIds: (string | null)[] = [];
|
||||
for (const cellNode of cellNodes) {
|
||||
rowText.push(blockPlainText(cellNode));
|
||||
// The cell's first paragraph carries the id used for patch_node.
|
||||
const firstPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
const id =
|
||||
isObject(firstPara) && isObject(firstPara.attrs)
|
||||
? firstPara.attrs.id ?? null
|
||||
: null;
|
||||
rowIds.push(id);
|
||||
}
|
||||
cells.push(rowText);
|
||||
cellIds.push(rowIds);
|
||||
}
|
||||
|
||||
return { rows, cols, cells, cellIds, path };
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
|
||||
*
|
||||
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
|
||||
* MORE cells than columns throws. Each new cell copies `colwidth` for its
|
||||
* column from the header row when present, gets a fresh-id paragraph, and a
|
||||
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
|
||||
* the row there; otherwise the row is appended at the end.
|
||||
*/
|
||||
export function insertTableRow(
|
||||
doc: any,
|
||||
tableRef: string,
|
||||
cells: string[],
|
||||
index?: number,
|
||||
): { doc: any; inserted: boolean } {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null) return { doc: out, inserted: false };
|
||||
const { table } = located;
|
||||
|
||||
if (!Array.isArray(table.content)) table.content = [];
|
||||
const rows = table.content.length;
|
||||
const headerRow = table.content[0];
|
||||
const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
|
||||
|
||||
// Column count is the WIDEST existing row, so the guard below stays
|
||||
// meaningful for ragged tables and the new row matches the table's width.
|
||||
// Fall back to the supplied cell count only when the table has no rows.
|
||||
let colCount = 0;
|
||||
for (const r of table.content) {
|
||||
if (isObject(r) && Array.isArray(r.content)) colCount = Math.max(colCount, r.content.length);
|
||||
}
|
||||
if (colCount === 0) colCount = Array.isArray(cells) ? cells.length : 0;
|
||||
|
||||
if (Array.isArray(cells) && cells.length > colCount) {
|
||||
throw new Error(
|
||||
`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`,
|
||||
);
|
||||
}
|
||||
|
||||
// Resolve the landing index up front so the cell-type decision and the splice
|
||||
// below agree: a valid integer in [0, rows] splices there, else we append.
|
||||
const landingIndex =
|
||||
typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
|
||||
? index
|
||||
: rows;
|
||||
|
||||
// Seed the id generator with every id already in the doc so the new cell
|
||||
// paragraph ids are unique within the whole document.
|
||||
const used = new Set<string>();
|
||||
collectIds(out, used);
|
||||
|
||||
const newCells: any[] = [];
|
||||
for (let i = 0; i < colCount; i++) {
|
||||
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
|
||||
const attrs: Record<string, any> = { colspan: 1, rowspan: 1 };
|
||||
// Copy this column's colwidth from the header row's cell when present.
|
||||
const colwidth = headerCells[i]?.attrs?.colwidth;
|
||||
if (colwidth !== undefined) attrs.colwidth = colwidth;
|
||||
// A row landing at index 0 becomes the new header row, so inherit the
|
||||
// current header cell's type per column (Docmost uses "tableHeader" there);
|
||||
// every other position is a plain data cell.
|
||||
const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
|
||||
newCells.push({
|
||||
type: cellType,
|
||||
attrs,
|
||||
content: [makeCellParagraph(makeFreshId(used), text)],
|
||||
});
|
||||
}
|
||||
|
||||
const newRow = { type: "tableRow", content: newCells };
|
||||
|
||||
// Splice at the resolved landing index (append when index was omitted/invalid).
|
||||
table.content.splice(landingIndex, 0, newRow);
|
||||
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
|
||||
* `deleted` is false only when the table cannot be located. Throws on an
|
||||
* out-of-range index, and refuses to delete the table's only row.
|
||||
*/
|
||||
export function deleteTableRow(
|
||||
doc: any,
|
||||
tableRef: string,
|
||||
index: number,
|
||||
): { doc: any; deleted: boolean } {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null) return { doc: out, deleted: false };
|
||||
const { table } = located;
|
||||
|
||||
if (!Array.isArray(table.content)) table.content = [];
|
||||
const rows = table.content.length;
|
||||
|
||||
if (!Number.isInteger(index) || index < 0 || index >= rows) {
|
||||
throw new Error(
|
||||
`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`,
|
||||
);
|
||||
}
|
||||
if (rows <= 1) {
|
||||
throw new Error(
|
||||
"table_delete_row: refusing to delete the only row of the table",
|
||||
);
|
||||
}
|
||||
|
||||
table.content.splice(index, 1);
|
||||
return { doc: out, deleted: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
|
||||
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
|
||||
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
|
||||
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
|
||||
* that reuses the cell's existing first-paragraph id when present, else a fresh
|
||||
* one.
|
||||
*/
|
||||
export function updateTableCell(
|
||||
doc: any,
|
||||
tableRef: string,
|
||||
row: number,
|
||||
col: number,
|
||||
text: string,
|
||||
): { doc: any; updated: boolean } {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null) return { doc: out, updated: false };
|
||||
const { table } = located;
|
||||
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const rowNode = rowNodes[row];
|
||||
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
|
||||
? rowNode.content.length
|
||||
: 0;
|
||||
|
||||
if (
|
||||
!Number.isInteger(row) ||
|
||||
row < 0 ||
|
||||
row >= rows ||
|
||||
!Number.isInteger(col) ||
|
||||
col < 0 ||
|
||||
col >= cols
|
||||
) {
|
||||
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
|
||||
}
|
||||
|
||||
const cellNode = rowNode.content[col];
|
||||
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
|
||||
const existingPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
let id =
|
||||
isObject(existingPara) && isObject(existingPara.attrs)
|
||||
? existingPara.attrs.id
|
||||
: undefined;
|
||||
if (typeof id !== "string" || id.length === 0) {
|
||||
const used = new Set<string>();
|
||||
collectIds(out, used);
|
||||
id = makeFreshId(used);
|
||||
}
|
||||
|
||||
cellNode.content = [makeCellParagraph(id, text)];
|
||||
return { doc: out, updated: true };
|
||||
}
|
||||
205
packages/git-sync/test/canonicalize-extra.test.ts
Normal file
205
packages/git-sync/test/canonicalize-extra.test.ts
Normal file
@@ -0,0 +1,205 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import fc from 'fast-check';
|
||||
// Barrel import (R-Infra alias resolves this to packages/docmost-client/src so
|
||||
// coverage measures the real source, not stale dist).
|
||||
import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gaps NOT covered by canonicalize.test.ts (test-strategy report §2 diff):
|
||||
// - the *.align family (drawio/excalidraw/video/youtube/embed): a "center"
|
||||
// default is dropped, a non-default value is kept;
|
||||
// - comment.resolved: TRUE is PRESERVED (only resolved:false is normalized);
|
||||
// - link.target / link.rel NON-default values are kept;
|
||||
// - property: canonicalizeContent is a fixpoint, docsCanonicallyEqual is
|
||||
// reflexive and symmetric.
|
||||
// The base file already covers id-stripping, null-drop, link/comment/orderedList
|
||||
// default-drop, key-order insensitivity, and a real-diff negative — not re-added.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('canonicalizeContent — *.align default family', () => {
|
||||
// Every diagram/media node whose schema `align` defaults to "center".
|
||||
const alignTypes = ['drawio', 'excalidraw', 'video', 'youtube', 'embed'];
|
||||
|
||||
for (const type of alignTypes) {
|
||||
it(`${type}: align "center" (the schema default) is dropped`, () => {
|
||||
const out = canonicalizeContent({
|
||||
type,
|
||||
attrs: { id: 'n-1', src: '/x', align: 'center' },
|
||||
});
|
||||
// align==default removed; the meaningful src survives.
|
||||
expect(out.attrs).toEqual({ src: '/x' });
|
||||
});
|
||||
|
||||
it(`${type}: a NON-default align (e.g. "right") is kept`, () => {
|
||||
const out = canonicalizeContent({
|
||||
type,
|
||||
attrs: { id: 'n-1', src: '/x', align: 'right' },
|
||||
});
|
||||
expect(out.attrs).toEqual({ src: '/x', align: 'right' });
|
||||
});
|
||||
}
|
||||
|
||||
it('image align is NOT in KNOWN_DEFAULTS: a non-null align survives, null is dropped', () => {
|
||||
// image.align defaults to null, so it is handled by the null-drop rule and
|
||||
// a real value ("left") must be kept (no spurious default match).
|
||||
const kept = canonicalizeContent({
|
||||
type: 'image',
|
||||
attrs: { id: 'i-1', src: '/a.png', align: 'left' },
|
||||
});
|
||||
expect(kept.attrs).toEqual({ src: '/a.png', align: 'left' });
|
||||
// An image with align:"center" must KEEP it (center is NOT a default for
|
||||
// image, only for the diagram/media family) — guards against over-matching.
|
||||
const center = canonicalizeContent({
|
||||
type: 'image',
|
||||
attrs: { id: 'i-2', src: '/b.png', align: 'center' },
|
||||
});
|
||||
expect(center.attrs).toEqual({ src: '/b.png', align: 'center' });
|
||||
});
|
||||
});
|
||||
|
||||
describe('canonicalizeContent — comment.resolved:true preserved (SPEC §11 L66)', () => {
|
||||
it('keeps resolved:true (a legitimate change, not a default to normalize away)', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'text',
|
||||
text: 'anchored',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: true } }],
|
||||
});
|
||||
// resolved:true is NON-default; it must survive alongside the commentId so a
|
||||
// resolve-vs-unresolved divergence is not falsely reported as equal.
|
||||
expect(out.marks).toEqual([
|
||||
{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: true } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('a resolved:true comment is NOT canonically equal to an unresolved one', () => {
|
||||
const resolved = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'c', resolved: true } }],
|
||||
};
|
||||
const open = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'c' } }],
|
||||
};
|
||||
expect(docsCanonicallyEqual(resolved, open)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('canonicalizeContent — link non-default target/rel kept', () => {
|
||||
it('keeps a NON-default link.target (e.g. "_self")', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'text',
|
||||
text: 'l',
|
||||
marks: [{ type: 'link', attrs: { href: 'https://e.com', target: '_self' } }],
|
||||
});
|
||||
// _self != the "_blank" default, so target must survive.
|
||||
expect(out.marks).toEqual([
|
||||
{ type: 'link', attrs: { href: 'https://e.com', target: '_self' } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('keeps a NON-default link.rel', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'text',
|
||||
text: 'l',
|
||||
marks: [{ type: 'link', attrs: { href: 'https://e.com', rel: 'nofollow' } }],
|
||||
});
|
||||
expect(out.marks).toEqual([
|
||||
{ type: 'link', attrs: { href: 'https://e.com', rel: 'nofollow' } },
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Property-based oracle checks (SPEC §11). The generated trees mix node/mark
|
||||
// types, ids, null attrs, known-default attrs and meaningful attrs, so the
|
||||
// invariants are exercised across the whole canonicalization surface.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// An attribute value: a meaningful value, a null/undefined, a block id, or a
|
||||
// known schema default — so pruning, id-drop, null-drop and default-drop all
|
||||
// fire during shrinking.
|
||||
const attrValueArb = fc.oneof(
|
||||
fc.string({ minLength: 1, maxLength: 6 }),
|
||||
fc.integer({ min: 0, max: 9 }),
|
||||
fc.boolean(),
|
||||
fc.constant(null),
|
||||
);
|
||||
|
||||
// A recursive ProseMirror-ish node arbitrary (bounded depth) with type, attrs
|
||||
// (incl. an id and possibly a known default), optional marks and content.
|
||||
const nodeArb: fc.Arbitrary<any> = fc.letrec((tie) => ({
|
||||
node: fc.record(
|
||||
{
|
||||
type: fc.constantFrom(
|
||||
'paragraph',
|
||||
'heading',
|
||||
'orderedList',
|
||||
'drawio',
|
||||
'video',
|
||||
'text',
|
||||
),
|
||||
text: fc.option(fc.string({ minLength: 0, maxLength: 5 }), { nil: undefined }),
|
||||
attrs: fc.option(
|
||||
fc.dictionary(
|
||||
fc.constantFrom('id', 'level', 'start', 'align', 'src', 'indent', 'keep'),
|
||||
attrValueArb,
|
||||
{ maxKeys: 4 },
|
||||
),
|
||||
{ nil: undefined },
|
||||
),
|
||||
marks: fc.option(
|
||||
fc.array(
|
||||
fc.record({
|
||||
type: fc.constantFrom('bold', 'link', 'comment'),
|
||||
attrs: fc.option(
|
||||
fc.dictionary(
|
||||
fc.constantFrom('href', 'target', 'rel', 'commentId', 'resolved'),
|
||||
fc.oneof(attrValueArb, fc.constant('_blank')),
|
||||
{ maxKeys: 3 },
|
||||
),
|
||||
{ nil: undefined },
|
||||
),
|
||||
}),
|
||||
{ maxLength: 2 },
|
||||
),
|
||||
{ nil: undefined },
|
||||
),
|
||||
content: fc.option(fc.array(tie('node'), { maxLength: 2 }), { nil: undefined }),
|
||||
},
|
||||
{ requiredKeys: ['type'] },
|
||||
),
|
||||
})).node;
|
||||
|
||||
describe('canonicalizeContent — property invariants (SPEC §11 oracle)', () => {
|
||||
it('is a fixpoint: f(f(x)) === f(x)', () => {
|
||||
fc.assert(
|
||||
fc.property(nodeArb, (node) => {
|
||||
const once = canonicalizeContent(node);
|
||||
const twice = canonicalizeContent(once);
|
||||
// The canonical form must already be stable under a second pass.
|
||||
expect(twice).toEqual(once);
|
||||
}),
|
||||
{ numRuns: 300 },
|
||||
);
|
||||
});
|
||||
|
||||
it('docsCanonicallyEqual is reflexive: equal(x, x) is always true', () => {
|
||||
fc.assert(
|
||||
fc.property(nodeArb, (node) => {
|
||||
expect(docsCanonicallyEqual(node, node)).toBe(true);
|
||||
}),
|
||||
{ numRuns: 300 },
|
||||
);
|
||||
});
|
||||
|
||||
it('docsCanonicallyEqual is symmetric: equal(a, b) === equal(b, a)', () => {
|
||||
fc.assert(
|
||||
fc.property(nodeArb, nodeArb, (a, b) => {
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(docsCanonicallyEqual(b, a));
|
||||
}),
|
||||
{ numRuns: 300 },
|
||||
);
|
||||
});
|
||||
});
|
||||
302
packages/git-sync/test/canonicalize.test.ts
Normal file
302
packages/git-sync/test/canonicalize.test.ts
Normal file
@@ -0,0 +1,302 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import via the package barrel to also assert the symbols are re-exported.
|
||||
import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
|
||||
|
||||
describe('canonicalizeContent', () => {
|
||||
it('strips node-level attrs.id, recursively', () => {
|
||||
const input = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'heading',
|
||||
attrs: { id: 'h-1', level: 2 },
|
||||
content: [{ type: 'text', text: 'Title' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
const out = canonicalizeContent(input);
|
||||
expect(out.content[0].attrs).toEqual({ level: 2 });
|
||||
// No `id` survives anywhere in the canonical tree.
|
||||
expect(JSON.stringify(out)).not.toContain('"id"');
|
||||
});
|
||||
|
||||
it('drops null/undefined attrs but keeps every non-null attr', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'paragraph',
|
||||
attrs: {
|
||||
id: 'p-1',
|
||||
indent: null,
|
||||
textAlign: undefined,
|
||||
level: 0,
|
||||
keep: 'yes',
|
||||
},
|
||||
content: [],
|
||||
});
|
||||
// null/undefined gone; non-null values (incl. 0 and false) kept.
|
||||
expect(out.attrs).toEqual({ keep: 'yes', level: 0 });
|
||||
});
|
||||
|
||||
it('removes an attrs object that becomes empty after pruning', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p-1', indent: null, textAlign: null },
|
||||
content: [{ type: 'text', text: 'x' }],
|
||||
});
|
||||
// attrs had only an id + null defaults -> the whole attrs key is dropped.
|
||||
expect('attrs' in out).toBe(false);
|
||||
expect(out).toEqual({
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'x' }],
|
||||
});
|
||||
});
|
||||
|
||||
it('treats {attrs:{}} as equivalent to no attrs', () => {
|
||||
const withEmpty = canonicalizeContent({ type: 'paragraph', attrs: {} });
|
||||
const without = canonicalizeContent({ type: 'paragraph' });
|
||||
expect(withEmpty).toEqual(without);
|
||||
});
|
||||
|
||||
it('keeps comment marks + commentId but normalizes resolved:false default (SPEC §3 anchor)', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'text',
|
||||
text: 'anchored',
|
||||
marks: [
|
||||
{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: false } },
|
||||
],
|
||||
});
|
||||
// The comment mark is preserved; commentId (a meaningful anchor) survives,
|
||||
// but the `resolved: false` schema default is normalized away.
|
||||
expect(out.marks).toEqual([
|
||||
{ type: 'comment', attrs: { commentId: 'cmt-1' } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('drops known non-null schema defaults (link target/rel, comment resolved)', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'text',
|
||||
text: 'a link',
|
||||
marks: [
|
||||
{
|
||||
type: 'link',
|
||||
attrs: {
|
||||
href: 'https://example.com/page',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
// href (non-default) kept; target/rel (schema defaults) dropped.
|
||||
expect(out.marks).toEqual([
|
||||
{ type: 'link', attrs: { href: 'https://example.com/page' } },
|
||||
]);
|
||||
});
|
||||
|
||||
it('keeps a NON-default value that happens to share an attr name (orderedList start:5)', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'orderedList',
|
||||
attrs: { id: 'ol-1', start: 5 },
|
||||
content: [],
|
||||
});
|
||||
// start:5 is NOT the default (1), so it must survive.
|
||||
expect(out.attrs).toEqual({ start: 5 });
|
||||
});
|
||||
|
||||
it('keeps meaningful node/mark attrs (level, language, href, src, width)', () => {
|
||||
const out = canonicalizeContent({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'codeBlock',
|
||||
attrs: { id: 'c-1', language: 'js' },
|
||||
content: [{ type: 'text', text: 'x' }],
|
||||
},
|
||||
{
|
||||
type: 'image',
|
||||
attrs: { id: 'i-1', src: '/a.png', width: 100, height: null },
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'link',
|
||||
marks: [{ type: 'link', attrs: { href: 'https://e.com' } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
expect(out.content[0].attrs).toEqual({ language: 'js' });
|
||||
expect(out.content[1].attrs).toEqual({ src: '/a.png', width: 100 });
|
||||
expect(out.content[2].content[0].marks[0].attrs).toEqual({
|
||||
href: 'https://e.com',
|
||||
});
|
||||
});
|
||||
|
||||
it('preserves text, type and content order exactly', () => {
|
||||
const input = {
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'one' },
|
||||
{ type: 'text', text: 'two', marks: [{ type: 'bold' }] },
|
||||
{ type: 'text', text: 'three' },
|
||||
],
|
||||
};
|
||||
const out = canonicalizeContent(input);
|
||||
expect(out.content.map((n: any) => n.text)).toEqual([
|
||||
'one',
|
||||
'two',
|
||||
'three',
|
||||
]);
|
||||
expect(out.content[1].marks).toEqual([{ type: 'bold' }]);
|
||||
});
|
||||
|
||||
it('drops an empty marks array (marks:[] === no marks)', () => {
|
||||
const out = canonicalizeContent({ type: 'text', text: 'x', marks: [] });
|
||||
expect('marks' in out).toBe(false);
|
||||
});
|
||||
|
||||
it('does not mutate its input (frozen tree passes through unchanged)', () => {
|
||||
const input = Object.freeze({
|
||||
type: 'doc',
|
||||
content: Object.freeze([
|
||||
Object.freeze({
|
||||
type: 'paragraph',
|
||||
attrs: Object.freeze({ id: 'p-1', indent: null }),
|
||||
content: Object.freeze([Object.freeze({ type: 'text', text: 'x' })]),
|
||||
}),
|
||||
]),
|
||||
});
|
||||
const before = JSON.stringify(input);
|
||||
const out = canonicalizeContent(input);
|
||||
// Input is structurally identical after the call.
|
||||
expect(JSON.stringify(input)).toBe(before);
|
||||
// A fresh tree is returned.
|
||||
expect(out).not.toBe(input);
|
||||
expect('attrs' in out.content[0]).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('docsCanonicallyEqual', () => {
|
||||
it('is true when docs differ only by block ids', () => {
|
||||
const a = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'heading', attrs: { id: 'h-1', level: 1 }, content: [] },
|
||||
],
|
||||
};
|
||||
const b = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'heading', attrs: { id: 'h-DIFFERENT', level: 1 }, content: [] },
|
||||
],
|
||||
};
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(true);
|
||||
});
|
||||
|
||||
it('is true when one side omits an attr the other sets to default null', () => {
|
||||
const a = {
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p-1' },
|
||||
content: [{ type: 'text', text: 'x' }],
|
||||
};
|
||||
const b = {
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p-2', indent: null, textAlign: null },
|
||||
content: [{ type: 'text', text: 'x' }],
|
||||
};
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(true);
|
||||
});
|
||||
|
||||
it('is key-order-insensitive for attrs', () => {
|
||||
const a = { type: 'image', attrs: { src: '/a.png', width: 10 } };
|
||||
const b = { type: 'image', attrs: { width: 10, src: '/a.png' } };
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(true);
|
||||
});
|
||||
|
||||
it('is false for a real text difference', () => {
|
||||
const a = { type: 'text', text: 'hello' };
|
||||
const b = { type: 'text', text: 'world' };
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(false);
|
||||
});
|
||||
|
||||
it('is false for a real attr difference (different level)', () => {
|
||||
const a = { type: 'heading', attrs: { id: 'x', level: 1 } };
|
||||
const b = { type: 'heading', attrs: { id: 'y', level: 2 } };
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(false);
|
||||
});
|
||||
|
||||
it('is false when a meaningful mark attr differs (commentId)', () => {
|
||||
const a = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
|
||||
};
|
||||
const b = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'cmt-2' } }],
|
||||
};
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(false);
|
||||
});
|
||||
|
||||
it('is true when a link has only href vs one with the schema-default target/rel', () => {
|
||||
const a = {
|
||||
type: 'text',
|
||||
text: 'link',
|
||||
marks: [{ type: 'link', attrs: { href: 'https://example.com' } }],
|
||||
};
|
||||
const b = {
|
||||
type: 'text',
|
||||
text: 'link',
|
||||
marks: [
|
||||
{
|
||||
type: 'link',
|
||||
attrs: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(true);
|
||||
});
|
||||
|
||||
it('is true when an orderedList omits start vs one with the default start:1', () => {
|
||||
const a = { type: 'orderedList', content: [] };
|
||||
const b = { type: 'orderedList', attrs: { start: 1 }, content: [] };
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(true);
|
||||
});
|
||||
|
||||
it('is false when an orderedList has a non-default start (5 vs absent)', () => {
|
||||
const a = { type: 'orderedList', content: [] };
|
||||
const b = { type: 'orderedList', attrs: { start: 5 }, content: [] };
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(false);
|
||||
});
|
||||
|
||||
it('is true when a comment mark omits resolved vs one with the default false', () => {
|
||||
const a = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
|
||||
};
|
||||
const b = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: false } }],
|
||||
};
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(true);
|
||||
});
|
||||
|
||||
it('is false when a comment mark is dropped entirely', () => {
|
||||
const a = {
|
||||
type: 'text',
|
||||
text: 'x',
|
||||
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
|
||||
};
|
||||
const b = { type: 'text', text: 'x' };
|
||||
expect(docsCanonicallyEqual(a, b)).toBe(false);
|
||||
});
|
||||
});
|
||||
377
packages/git-sync/test/diff.test.ts
Normal file
377
packages/git-sync/test/diff.test.ts
Normal file
@@ -0,0 +1,377 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { diffDocs } from '../src/lib/diff.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ProseMirror JSON builders. diffDocs accepts plain JSON docs (it parses them
|
||||
// through the Docmost schema internally), so we only need minimal node shapes.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A paragraph; omit `text` for an empty paragraph (no content array entries). */
|
||||
const para = (text?: string) => ({
|
||||
type: 'paragraph',
|
||||
content: text ? [{ type: 'text', text }] : [],
|
||||
});
|
||||
|
||||
/** A heading (level 2 by default) carrying a single text run. */
|
||||
const heading = (text: string, level = 2) => ({
|
||||
type: 'heading',
|
||||
attrs: { level },
|
||||
content: [{ type: 'text', text }],
|
||||
});
|
||||
|
||||
/** A top-level doc node wrapping the given blocks. */
|
||||
const doc = (...content: any[]) => ({ type: 'doc', content });
|
||||
|
||||
/** An image node (atom). */
|
||||
const image = () => ({ type: 'image', attrs: {} });
|
||||
|
||||
/** A callout node wrapping one paragraph. */
|
||||
const callout = (text = 'note') => ({
|
||||
type: 'callout',
|
||||
attrs: { type: 'info' },
|
||||
content: [para(text)],
|
||||
});
|
||||
|
||||
/** A 1x1 table. */
|
||||
const table = (cell = 'c') => ({
|
||||
type: 'table',
|
||||
content: [
|
||||
{ type: 'tableRow', content: [{ type: 'tableCell', content: [para(cell)] }] },
|
||||
],
|
||||
});
|
||||
|
||||
/** A paragraph carrying a text run that bears a link mark with the given href. */
|
||||
const linkPara = (text: string, href: string | undefined, extraMarks: any[] = []) => ({
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text,
|
||||
marks: [{ type: 'link', attrs: href === undefined ? {} : { href } }, ...extraMarks],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
/** The diff.ts default for the notes-heading argument. */
|
||||
const DEFAULT_NOTES_HEADING = 'Примечания переводчика';
|
||||
|
||||
describe('diffDocs', () => {
|
||||
describe('textual changes (precise path)', () => {
|
||||
it('reports no changes for two identical docs', () => {
|
||||
const d = doc(para('hello world'));
|
||||
const result = diffDocs(d, d);
|
||||
|
||||
expect(result.changes).toHaveLength(0);
|
||||
expect(result.summary).toEqual({ inserted: 0, deleted: 0, blocksChanged: 0 });
|
||||
// The Changes section renders the sentinel line for an empty change list.
|
||||
expect(result.markdown).toContain('(no textual changes)');
|
||||
});
|
||||
|
||||
it('counts a pure insertion ("abc" -> "abcXY") and captures the inserted substring', () => {
|
||||
const result = diffDocs(doc(para('abc')), doc(para('abcXY')));
|
||||
|
||||
expect(result.summary.inserted).toBe(2);
|
||||
expect(result.summary.deleted).toBe(0);
|
||||
// Exactly one insert change whose text equals the inserted substring.
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
expect(inserts).toHaveLength(1);
|
||||
expect(inserts[0].text).toBe('XY');
|
||||
// No deletions on a pure insertion.
|
||||
expect(result.changes.filter((c) => c.op === 'delete')).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('counts a pure deletion ("abcXY" -> "abc") and captures the deleted substring', () => {
|
||||
const result = diffDocs(doc(para('abcXY')), doc(para('abc')));
|
||||
|
||||
expect(result.summary.deleted).toBe(2);
|
||||
expect(result.summary.inserted).toBe(0);
|
||||
const deletes = result.changes.filter((c) => c.op === 'delete');
|
||||
expect(deletes).toHaveLength(1);
|
||||
expect(deletes[0].text).toBe('XY');
|
||||
expect(result.changes.filter((c) => c.op === 'insert')).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('reports a word modification as a matched delete + insert with exact substrings', () => {
|
||||
const result = diffDocs(doc(para('hello world')), doc(para('hello there')));
|
||||
|
||||
// "world" (5) removed, "there" (5) added.
|
||||
expect(result.summary.inserted).toBe(5);
|
||||
expect(result.summary.deleted).toBe(5);
|
||||
|
||||
const deletes = result.changes.filter((c) => c.op === 'delete');
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
expect(deletes.map((c) => c.text)).toContain('world');
|
||||
expect(inserts.map((c) => c.text)).toContain('there');
|
||||
});
|
||||
|
||||
it('handles two empty docs without error', () => {
|
||||
const result = diffDocs({ type: 'doc', content: [] }, { type: 'doc', content: [] });
|
||||
|
||||
expect(result.changes).toHaveLength(0);
|
||||
expect(result.summary).toEqual({ inserted: 0, deleted: 0, blocksChanged: 0 });
|
||||
expect(result.markdown).toContain('(no textual changes)');
|
||||
});
|
||||
|
||||
it('reports an insertion into an empty doc', () => {
|
||||
const result = diffDocs({ type: 'doc', content: [] }, doc(para('brand new')));
|
||||
|
||||
expect(result.summary.inserted).toBeGreaterThan(0);
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
expect(inserts.length).toBeGreaterThan(0);
|
||||
// The inserted text is the new paragraph's content.
|
||||
expect(inserts.map((c) => c.text).join('')).toContain('brand new');
|
||||
});
|
||||
});
|
||||
|
||||
describe('integrity counting', () => {
|
||||
it('counts images, tables and callouts as old -> new tuples', () => {
|
||||
// old: 1 image, 1 callout, 1 table new: 2 images, 0 callouts, 1 table
|
||||
const oldDoc = doc(image(), callout(), table());
|
||||
const newDoc = doc(image(), image(), table());
|
||||
const { integrity } = diffDocs(oldDoc, newDoc);
|
||||
|
||||
expect(integrity.images).toEqual([1, 2]);
|
||||
expect(integrity.callouts).toEqual([1, 0]);
|
||||
expect(integrity.tables).toEqual([1, 1]);
|
||||
});
|
||||
|
||||
it('renders the integrity section verbatim in the markdown', () => {
|
||||
const oldDoc = doc(image(), callout(), table());
|
||||
const newDoc = doc(image(), image(), table());
|
||||
const { markdown } = diffDocs(oldDoc, newDoc);
|
||||
|
||||
// The integrity block is our own formatting, so exact lines are asserted.
|
||||
expect(markdown).toContain('## Integrity (old -> new)');
|
||||
expect(markdown).toContain('- images: 1 -> 2');
|
||||
expect(markdown).toContain('- callouts: 1 -> 0');
|
||||
expect(markdown).toContain('- tables: 1 -> 1');
|
||||
});
|
||||
|
||||
it('counts a single link split across two adjacent runs (shared href) as one link', () => {
|
||||
// Two text runs, both bearing a link to the SAME href; one also bold.
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'foo', marks: [{ type: 'link', attrs: { href: 'http://x' } }, { type: 'bold' }] },
|
||||
{ type: 'text', text: 'bar', marks: [{ type: 'link', attrs: { href: 'http://x' } }] },
|
||||
],
|
||||
});
|
||||
const { integrity } = diffDocs(d, d);
|
||||
|
||||
// Counting by unique href collapses the two runs into one link.
|
||||
expect(integrity.links).toEqual([1, 1]);
|
||||
});
|
||||
|
||||
it('counts distinct hrefs separately', () => {
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'one', marks: [{ type: 'link', attrs: { href: 'http://a' } }] },
|
||||
{ type: 'text', text: 'two', marks: [{ type: 'link', attrs: { href: 'http://b' } }] },
|
||||
],
|
||||
});
|
||||
const { integrity } = diffDocs(d, d);
|
||||
expect(integrity.links).toEqual([2, 2]);
|
||||
});
|
||||
|
||||
it('counts a link mark with a missing href once (bucketed under "")', () => {
|
||||
// Per source: a missing/empty href is collected under a single "" key, so a
|
||||
// malformed link is still counted exactly once.
|
||||
const d = linkPara('orphan', undefined);
|
||||
const { integrity } = diffDocs(d, d);
|
||||
expect(integrity.links).toEqual([1, 1]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('footnoteMarkers', () => {
|
||||
it('excludes markers after the default notes heading and preserves reading order', () => {
|
||||
// Body has [1] then [2]; the [99] sits AFTER the notes heading and must be
|
||||
// excluded from both old and new marker lists.
|
||||
const d = doc(
|
||||
para('intro [1] middle [2]'),
|
||||
heading(DEFAULT_NOTES_HEADING),
|
||||
para('[99] footnote body'),
|
||||
);
|
||||
const { integrity } = diffDocs(d, d);
|
||||
|
||||
expect(integrity.footnoteMarkers).toEqual([
|
||||
[1, 2],
|
||||
[1, 2],
|
||||
]);
|
||||
// Reading order: [1] precedes [2].
|
||||
expect(integrity.footnoteMarkers[1]).toEqual([1, 2]);
|
||||
});
|
||||
|
||||
it('honors a custom notesHeading argument', () => {
|
||||
const d = doc(para('a [1]'), heading('Notes'), para('[5] excluded'));
|
||||
const { integrity } = diffDocs(d, d, 'Notes');
|
||||
|
||||
// With the matching custom heading, [5] is excluded.
|
||||
expect(integrity.footnoteMarkers).toEqual([[1], [1]]);
|
||||
});
|
||||
|
||||
it('includes every marker when no notes heading is present', () => {
|
||||
// No heading equals the notesHeading -> the whole doc is the body.
|
||||
const d = doc(para('a [1] b [2]'), para('[3]'));
|
||||
const { integrity } = diffDocs(d, d);
|
||||
|
||||
expect(integrity.footnoteMarkers).toEqual([
|
||||
[1, 2, 3],
|
||||
[1, 2, 3],
|
||||
]);
|
||||
});
|
||||
|
||||
it('renders the footnoteMarkers integrity line verbatim', () => {
|
||||
const d = doc(para('x [1] y [2]'), heading(DEFAULT_NOTES_HEADING), para('[9]'));
|
||||
const { markdown } = diffDocs(d, d);
|
||||
expect(markdown).toContain('- footnoteMarkers: [1, 2] -> [1, 2]');
|
||||
});
|
||||
});
|
||||
|
||||
describe('coarse fallback', () => {
|
||||
// An unknown node type makes Node.fromJSON reject the doc, which throws
|
||||
// inside the precise pipeline and triggers the coarse block-level fallback.
|
||||
// (Confirmed by running the module: `{ type: '___nope' }` is not in the
|
||||
// schema, so parsing throws and `fellBack` becomes true.)
|
||||
it('degrades to a coarse block-level diff instead of throwing', () => {
|
||||
const oldDoc = doc(para('keep this'), { type: '___nope' });
|
||||
const newDoc = doc(para('keep this'), para('new block'));
|
||||
|
||||
// Must not throw.
|
||||
const result = diffDocs(oldDoc, newDoc);
|
||||
|
||||
// The fallback note appears in the markdown header area.
|
||||
expect(result.markdown).toContain('precise diff failed; coarse block-level diff shown.');
|
||||
// Only the genuinely new block is reported; the unchanged "keep this"
|
||||
// block is not.
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
expect(inserts).toHaveLength(1);
|
||||
expect(inserts[0].text).toBe('new block');
|
||||
});
|
||||
|
||||
it('does not report whitespace-only blocks in the fallback path', () => {
|
||||
// New doc adds a block whose plain text is only whitespace; coarseDiff
|
||||
// skips blocks whose trimmed text is empty.
|
||||
const oldDoc = doc({ type: '___nope' }, para('kept'));
|
||||
const newDoc = doc(para('kept'), para(' '));
|
||||
|
||||
const result = diffDocs(oldDoc, newDoc);
|
||||
|
||||
// Fallback was taken (precise path threw on the unknown node).
|
||||
expect(result.markdown).toContain('coarse block-level diff shown.');
|
||||
// No change is reported: "kept" is unchanged and " " is whitespace-only.
|
||||
expect(result.changes).toHaveLength(0);
|
||||
expect(result.summary).toEqual({ inserted: 0, deleted: 0, blocksChanged: 0 });
|
||||
});
|
||||
|
||||
it('still computes integrity (images/tables/callouts/footnotes) in the coarse-fallback branch', () => {
|
||||
// Regression guard: integrity is computed BEFORE the try/catch, so a
|
||||
// pathological pair that forces the fallback must NOT zero the integrity
|
||||
// counts. The unknown node forces the precise path to throw (fellBack).
|
||||
const oldDoc = doc(image(), callout(), table(), para('a [1]'), { type: '___nope' });
|
||||
const newDoc = doc(image(), image(), table(), para('b [2] [3]'));
|
||||
const result = diffDocs(oldDoc, newDoc);
|
||||
|
||||
// The fallback was taken...
|
||||
expect(result.markdown).toContain('coarse block-level diff shown.');
|
||||
// ...yet every integrity tuple is the real count, not [0,0].
|
||||
expect(result.integrity.images).toEqual([1, 2]);
|
||||
expect(result.integrity.callouts).toEqual([1, 0]);
|
||||
expect(result.integrity.tables).toEqual([1, 1]);
|
||||
// Footnote markers are counted from both docs even under the fallback.
|
||||
expect(result.integrity.footnoteMarkers).toEqual([[1], [2, 3]]);
|
||||
});
|
||||
|
||||
it('reports both a deletion and an insertion in the fallback path', () => {
|
||||
const oldDoc = doc(para('old paragraph'), { type: '___nope' });
|
||||
const newDoc = doc(para('new paragraph'));
|
||||
|
||||
const result = diffDocs(oldDoc, newDoc);
|
||||
|
||||
expect(result.markdown).toContain('coarse block-level diff shown.');
|
||||
const deletes = result.changes.filter((c) => c.op === 'delete');
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
// "old paragraph" no longer present -> deletion; "new paragraph" -> insertion.
|
||||
expect(deletes.map((c) => c.text)).toContain('old paragraph');
|
||||
expect(inserts.map((c) => c.text)).toContain('new paragraph');
|
||||
// Character counts accumulate from the reported texts.
|
||||
expect(result.summary.deleted).toBe('old paragraph'.length);
|
||||
expect(result.summary.inserted).toBe('new paragraph'.length);
|
||||
});
|
||||
});
|
||||
|
||||
describe('blockContextAt (DiffChange.block)', () => {
|
||||
it('truncates a >80-char block context with an ellipsis and keeps it non-empty', () => {
|
||||
// A 100-char paragraph with a one-char edit; the block context guards a
|
||||
// swallowed catch and must produce a truncated, non-empty string.
|
||||
const longText = 'X'.repeat(100);
|
||||
const result = diffDocs(doc(para(longText)), doc(para(longText + 'Z')));
|
||||
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
expect(inserts).toHaveLength(1);
|
||||
const block = inserts[0].block;
|
||||
expect(block.length).toBeGreaterThan(0);
|
||||
// Truncation rule: 77 chars + "..." = length 80, ending with "...".
|
||||
expect(block.endsWith('...')).toBe(true);
|
||||
expect(block).toHaveLength(80);
|
||||
});
|
||||
|
||||
it('keeps a short block context untruncated', () => {
|
||||
const result = diffDocs(doc(para('abc')), doc(para('abcXY')));
|
||||
const inserts = result.changes.filter((c) => c.op === 'insert');
|
||||
expect(inserts[0].block).toBe('abcXY');
|
||||
expect(inserts[0].block.endsWith('...')).toBe(false);
|
||||
});
|
||||
|
||||
it('dedups blocksChanged by op + block context (multiple edits in one block count once per op)', () => {
|
||||
// Two separate word edits inside a single paragraph produce 4 changes
|
||||
// (2 deletes + 2 inserts) but only 2 distinct block keys:
|
||||
// "d:the quick brown fox" and "i:the slow brown wolf".
|
||||
const result = diffDocs(
|
||||
doc(para('the quick brown fox')),
|
||||
doc(para('the slow brown wolf')),
|
||||
);
|
||||
|
||||
expect(result.changes.length).toBe(4);
|
||||
expect(result.summary.blocksChanged).toBe(2);
|
||||
});
|
||||
|
||||
it('counts one block key per op for edits spread across two blocks', () => {
|
||||
// Edits in two different paragraphs -> 4 distinct block keys.
|
||||
const result = diffDocs(
|
||||
doc(para('first line here'), para('second line here')),
|
||||
doc(para('first line HERE'), para('second line HERE')),
|
||||
);
|
||||
|
||||
expect(result.summary.blocksChanged).toBe(4);
|
||||
});
|
||||
});
|
||||
|
||||
describe('markdown rendering', () => {
|
||||
it('puts the summary counts in the markdown header', () => {
|
||||
const result = diffDocs(doc(para('abc')), doc(para('abcXY')));
|
||||
expect(result.markdown).toContain(
|
||||
'# Diff: 2 inserted / 0 deleted (1 blocks changed)',
|
||||
);
|
||||
});
|
||||
|
||||
it('renders each change with its op sign (loose membership, library-controlled order)', () => {
|
||||
const result = diffDocs(doc(para('hello world')), doc(para('hello there')));
|
||||
|
||||
// The Changes section is ordered by the diff library; assert membership,
|
||||
// not an exact ordered string. Scope to lines AFTER the "## Changes"
|
||||
// heading, since integrity lines also begin with "- ".
|
||||
const lines = result.markdown.split('\n');
|
||||
const changesIdx = lines.indexOf('## Changes');
|
||||
expect(changesIdx).toBeGreaterThanOrEqual(0);
|
||||
const changeLines = lines
|
||||
.slice(changesIdx + 1)
|
||||
.filter((l) => l.startsWith('+ ') || l.startsWith('- '));
|
||||
expect(changeLines.some((l) => l.startsWith('- ') && l.includes('world'))).toBe(true);
|
||||
expect(changeLines.some((l) => l.startsWith('+ ') && l.includes('there'))).toBe(true);
|
||||
// One delete line and one insert line.
|
||||
expect(changeLines.filter((l) => l.startsWith('- '))).toHaveLength(1);
|
||||
expect(changeLines.filter((l) => l.startsWith('+ '))).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
});
|
||||
36
packages/git-sync/test/fixtures/corpus/01-headings-paragraphs.json
vendored
Normal file
36
packages/git-sync/test/fixtures/corpus/01-headings-paragraphs.json
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "heading",
|
||||
"attrs": { "level": 1 },
|
||||
"content": [{ "type": "text", "text": "Level one heading" }]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "A plain paragraph of text." }]
|
||||
},
|
||||
{
|
||||
"type": "heading",
|
||||
"attrs": { "level": 2 },
|
||||
"content": [{ "type": "text", "text": "Level two heading" }]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{ "type": "text", "text": "First line of a paragraph" },
|
||||
{ "type": "hardBreak" },
|
||||
{ "type": "text", "text": "second line after a hard break." }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "heading",
|
||||
"attrs": { "level": 3 },
|
||||
"content": [{ "type": "text", "text": "Level three heading" }]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Closing paragraph." }]
|
||||
}
|
||||
]
|
||||
}
|
||||
62
packages/git-sync/test/fixtures/corpus/02-inline-marks.json
vendored
Normal file
62
packages/git-sync/test/fixtures/corpus/02-inline-marks.json
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{ "type": "text", "marks": [{ "type": "bold" }], "text": "bold" },
|
||||
{ "type": "text", "text": " " },
|
||||
{ "type": "text", "marks": [{ "type": "italic" }], "text": "italic" },
|
||||
{ "type": "text", "text": " " },
|
||||
{ "type": "text", "marks": [{ "type": "code" }], "text": "code" },
|
||||
{ "type": "text", "text": " " },
|
||||
{ "type": "text", "marks": [{ "type": "strike" }], "text": "strike" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"marks": [
|
||||
{
|
||||
"type": "link",
|
||||
"attrs": {
|
||||
"href": "https://example.com/page"
|
||||
}
|
||||
}
|
||||
],
|
||||
"text": "a link"
|
||||
},
|
||||
{ "type": "text", "text": ", " },
|
||||
{
|
||||
"type": "text",
|
||||
"marks": [{ "type": "highlight" }],
|
||||
"text": "highlighted"
|
||||
},
|
||||
{ "type": "text", "text": ", base" },
|
||||
{ "type": "text", "marks": [{ "type": "subscript" }], "text": "sub" },
|
||||
{ "type": "text", "text": " and base" },
|
||||
{ "type": "text", "marks": [{ "type": "superscript" }], "text": "sup" },
|
||||
{ "type": "text", "text": "." }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{ "type": "text", "text": "Here is a " },
|
||||
{
|
||||
"type": "text",
|
||||
"marks": [
|
||||
{
|
||||
"type": "comment",
|
||||
"attrs": { "commentId": "cmt-xyz789" }
|
||||
}
|
||||
],
|
||||
"text": "commented anchor span"
|
||||
},
|
||||
{ "type": "text", "text": " that must survive (SPEC §3)." }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
113
packages/git-sync/test/fixtures/corpus/03-lists.json
vendored
Normal file
113
packages/git-sync/test/fixtures/corpus/03-lists.json
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "bulletList",
|
||||
"content": [
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "First bullet" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Second bullet with a nested list" }]
|
||||
},
|
||||
{
|
||||
"type": "bulletList",
|
||||
"content": [
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Nested bullet A" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Nested bullet B" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "orderedList",
|
||||
"content": [
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "First ordered item" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Second ordered item" }]
|
||||
},
|
||||
{
|
||||
"type": "orderedList",
|
||||
"content": [
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Nested ordered one" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "taskList",
|
||||
"content": [
|
||||
{
|
||||
"type": "taskItem",
|
||||
"attrs": { "checked": true },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Done task" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "taskItem",
|
||||
"attrs": { "checked": false },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Pending task" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
38
packages/git-sync/test/fixtures/corpus/04-blocks.json
vendored
Normal file
38
packages/git-sync/test/fixtures/corpus/04-blocks.json
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "blockquote",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "A quoted line." }]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "A second quoted paragraph." }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "horizontalRule"
|
||||
},
|
||||
{
|
||||
"type": "codeBlock",
|
||||
"attrs": { "language": "js" },
|
||||
"content": [
|
||||
{ "type": "text", "text": "const a = 1;\nconsole.log(a);\n" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "callout",
|
||||
"attrs": { "type": "warning" },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "This is a warning callout." }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
85
packages/git-sync/test/fixtures/corpus/05-table.json
vendored
Normal file
85
packages/git-sync/test/fixtures/corpus/05-table.json
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "table",
|
||||
"content": [
|
||||
{
|
||||
"type": "tableRow",
|
||||
"content": [
|
||||
{
|
||||
"type": "tableHeader",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Name" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableHeader",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Value" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableRow",
|
||||
"content": [
|
||||
{
|
||||
"type": "tableCell",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "alpha" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableCell",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "1" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableRow",
|
||||
"content": [
|
||||
{
|
||||
"type": "tableCell",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "beta" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableCell",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "2" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
17
packages/git-sync/test/fixtures/corpus/06-diagrams.json
vendored
Normal file
17
packages/git-sync/test/fixtures/corpus/06-diagrams.json
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "A drawio and an excalidraw diagram follow." }]
|
||||
},
|
||||
{
|
||||
"type": "drawio",
|
||||
"attrs": { "src": "/api/files/def/flow.drawio", "align": "center", "attachmentId": "att-1" }
|
||||
},
|
||||
{
|
||||
"type": "excalidraw",
|
||||
"attrs": { "src": "/api/files/ghi/sketch.excalidraw", "align": "center", "attachmentId": "att-2" }
|
||||
}
|
||||
]
|
||||
}
|
||||
35
packages/git-sync/test/fixtures/corpus/07-textstyle-mention.json
vendored
Normal file
35
packages/git-sync/test/fixtures/corpus/07-textstyle-mention.json
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{ "type": "text", "text": "Some " },
|
||||
{
|
||||
"type": "text",
|
||||
"marks": [{ "type": "textStyle", "attrs": { "color": "#ff0000" } }],
|
||||
"text": "red colored"
|
||||
},
|
||||
{ "type": "text", "text": " text." }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{ "type": "text", "text": "Ping " },
|
||||
{
|
||||
"type": "mention",
|
||||
"attrs": {
|
||||
"id": "m-1",
|
||||
"label": "Alice",
|
||||
"entityType": "user",
|
||||
"entityId": "u-1",
|
||||
"slugId": "s-1",
|
||||
"creatorId": "c-1"
|
||||
}
|
||||
},
|
||||
{ "type": "text", "text": " please." }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
15
packages/git-sync/test/fixtures/corpus/08-details.json
vendored
Normal file
15
packages/git-sync/test/fixtures/corpus/08-details.json
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "details",
|
||||
"attrs": { "open": false },
|
||||
"content": [
|
||||
{ "type": "detailsSummary", "content": [{ "type": "text", "text": "Click to expand" }] },
|
||||
{ "type": "detailsContent", "content": [
|
||||
{ "type": "paragraph", "content": [{ "type": "text", "text": "Hidden body paragraph." }] }
|
||||
]}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
17
packages/git-sync/test/fixtures/corpus/09-columns.json
vendored
Normal file
17
packages/git-sync/test/fixtures/corpus/09-columns.json
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "columns",
|
||||
"attrs": { "layout": "two", "widthMode": "normal" },
|
||||
"content": [
|
||||
{ "type": "column", "attrs": { "width": 50 }, "content": [
|
||||
{ "type": "paragraph", "content": [{ "type": "text", "text": "Left column." }] }
|
||||
]},
|
||||
{ "type": "column", "attrs": { "width": 50 }, "content": [
|
||||
{ "type": "paragraph", "content": [{ "type": "text", "text": "Right column." }] }
|
||||
]}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
13
packages/git-sync/test/fixtures/corpus/10-mention-in-heading.json
vendored
Normal file
13
packages/git-sync/test/fixtures/corpus/10-mention-in-heading.json
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "heading",
|
||||
"attrs": { "level": 2 },
|
||||
"content": [
|
||||
{ "type": "text", "text": "Notes for " },
|
||||
{ "type": "mention", "attrs": { "id": "m-2", "label": "Bob", "entityType": "user", "entityId": "u-2", "slugId": "s-2", "creatorId": "c-2" } }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
21
packages/git-sync/test/fixtures/known-limitations/image-diagrams.json
vendored
Normal file
21
packages/git-sync/test/fixtures/known-limitations/image-diagrams.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "An image followed by two diagrams." }]
|
||||
},
|
||||
{
|
||||
"type": "image",
|
||||
"attrs": { "src": "/api/files/abc/diagram.png", "alt": "A picture" }
|
||||
},
|
||||
{
|
||||
"type": "drawio",
|
||||
"attrs": { "src": "/api/files/def/flow.drawio", "attachmentId": "att-1" }
|
||||
},
|
||||
{
|
||||
"type": "excalidraw",
|
||||
"attrs": { "src": "/api/files/ghi/sketch.excalidraw", "attachmentId": "att-2" }
|
||||
}
|
||||
]
|
||||
}
|
||||
151
packages/git-sync/test/fixtures/sample-doc.json
vendored
Normal file
151
packages/git-sync/test/fixtures/sample-doc.json
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "heading",
|
||||
"attrs": { "level": 1, "id": "h-1" },
|
||||
"content": [{ "type": "text", "text": "Round-trip sample" }]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"attrs": { "id": "p-1" },
|
||||
"content": [
|
||||
{ "type": "text", "text": "This paragraph has " },
|
||||
{ "type": "text", "marks": [{ "type": "bold" }], "text": "bold" },
|
||||
{ "type": "text", "text": ", " },
|
||||
{ "type": "text", "marks": [{ "type": "italic" }], "text": "italic" },
|
||||
{ "type": "text", "text": " and a " },
|
||||
{
|
||||
"type": "text",
|
||||
"marks": [
|
||||
{
|
||||
"type": "link",
|
||||
"attrs": {
|
||||
"href": "https://example.com"
|
||||
}
|
||||
}
|
||||
],
|
||||
"text": "link"
|
||||
},
|
||||
{ "type": "text", "text": "." }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"attrs": { "id": "p-2" },
|
||||
"content": [
|
||||
{ "type": "text", "text": "Here is a " },
|
||||
{
|
||||
"type": "text",
|
||||
"marks": [
|
||||
{ "type": "comment", "attrs": { "commentId": "cmt-abc123", "resolved": false } }
|
||||
],
|
||||
"text": "commented span"
|
||||
},
|
||||
{ "type": "text", "text": " that must survive the round-trip." }
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "bulletList",
|
||||
"attrs": { "id": "ul-1" },
|
||||
"content": [
|
||||
{
|
||||
"type": "listItem",
|
||||
"attrs": { "id": "li-1" },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"attrs": { "id": "p-3" },
|
||||
"content": [{ "type": "text", "text": "First bullet" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "listItem",
|
||||
"attrs": { "id": "li-2" },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"attrs": { "id": "p-4" },
|
||||
"content": [{ "type": "text", "text": "Second bullet" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "table",
|
||||
"attrs": { "id": "tbl-1" },
|
||||
"content": [
|
||||
{
|
||||
"type": "tableRow",
|
||||
"content": [
|
||||
{
|
||||
"type": "tableHeader",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Name" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableHeader",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "Value" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableRow",
|
||||
"content": [
|
||||
{
|
||||
"type": "tableCell",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "alpha" }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tableCell",
|
||||
"attrs": { "colspan": 1, "rowspan": 1 },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [{ "type": "text", "text": "1" }]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "callout",
|
||||
"attrs": { "type": "info", "id": "callout-1" },
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"attrs": { "id": "p-5" },
|
||||
"content": [{ "type": "text", "text": "This is an info callout." }]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "codeBlock",
|
||||
"attrs": { "language": "js", "id": "code-1" },
|
||||
"content": [
|
||||
{ "type": "text", "text": "const a = 1;\nconsole.log(a);\n" }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
144
packages/git-sync/test/layout.test.ts
Normal file
144
packages/git-sync/test/layout.test.ts
Normal file
@@ -0,0 +1,144 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { buildVaultLayout, type PageNode } from '../src/engine/layout.js';
|
||||
|
||||
describe('buildVaultLayout', () => {
|
||||
it('disambiguates two siblings with the same sanitized title via ~slugId', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Notes', slugId: 'slug-a', parentPageId: null },
|
||||
{ id: 'p2', title: 'Notes', slugId: 'slug-b', parentPageId: null },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('p1')).toEqual({ segments: [], stem: 'Notes' });
|
||||
expect(layout.get('p2')).toEqual({ segments: [], stem: 'Notes ~slug-b' });
|
||||
});
|
||||
|
||||
it('falls back to ~id when a colliding sibling has no slugId', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Notes', parentPageId: null },
|
||||
{ id: 'p2', title: 'Notes', parentPageId: null },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('p1')?.stem).toBe('Notes');
|
||||
expect(layout.get('p2')?.stem).toBe('Notes ~p2');
|
||||
});
|
||||
|
||||
it('does NOT collide identical titles under DIFFERENT parents (distinct segments)', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'a', title: 'Alpha', parentPageId: null },
|
||||
{ id: 'b', title: 'Beta', parentPageId: null },
|
||||
{ id: 'a1', title: 'Notes', parentPageId: 'a' },
|
||||
{ id: 'b1', title: 'Notes', parentPageId: 'b' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
// Same stem, but different folder segments => no disambiguation needed.
|
||||
expect(layout.get('a1')).toEqual({ segments: ['Alpha'], stem: 'Notes' });
|
||||
expect(layout.get('b1')).toEqual({ segments: ['Beta'], stem: 'Notes' });
|
||||
});
|
||||
|
||||
it('terminates on a 2-node parent cycle and yields a finite result', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'a', title: 'A', parentPageId: 'b' },
|
||||
{ id: 'b', title: 'B', parentPageId: 'a' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
// Both resolve to a finite path; the visited-guard breaks the cycle.
|
||||
expect(layout.size).toBe(2);
|
||||
const a = layout.get('a');
|
||||
const b = layout.get('b');
|
||||
expect(a).toBeDefined();
|
||||
expect(b).toBeDefined();
|
||||
// Each node's segment chain is bounded (no infinite walk).
|
||||
expect(a!.segments.length).toBeLessThanOrEqual(2);
|
||||
expect(b!.segments.length).toBeLessThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('maps a root page (parentPageId null) to empty segments', () => {
|
||||
const pages: PageNode[] = [{ id: 'root', title: 'Home', parentPageId: null }];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('root')).toEqual({ segments: [], stem: 'Home' });
|
||||
});
|
||||
|
||||
it('emits ancestors in root->leaf order for a deep chain', () => {
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'g', title: 'Grand', parentPageId: null },
|
||||
{ id: 'p', title: 'Parent', parentPageId: 'g' },
|
||||
{ id: 'c', title: 'Child', parentPageId: 'p' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
expect(layout.get('c')).toEqual({
|
||||
segments: ['Grand', 'Parent'],
|
||||
stem: 'Child',
|
||||
});
|
||||
});
|
||||
|
||||
it('disambiguates two orphan-parent pages with the same title at the path level', () => {
|
||||
// Both parents are OUTSIDE the input set, so both pages bucket at the root
|
||||
// with segments: []. Sibling-scoping cannot see this (different parentKeys),
|
||||
// so the final full-path pass must produce DISTINCT paths.
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'x', title: 'Orphan', slugId: 'sx', parentPageId: 'missing-1' },
|
||||
{ id: 'y', title: 'Orphan', slugId: 'sy', parentPageId: 'missing-2' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
const ex = layout.get('x')!;
|
||||
const ey = layout.get('y')!;
|
||||
const pathOf = (e: { segments: string[]; stem: string }) =>
|
||||
[...e.segments, e.stem].join('/');
|
||||
expect(pathOf(ex)).not.toBe(pathOf(ey));
|
||||
// The first keeps the plain stem; the later one is re-stemmed.
|
||||
expect(ex.stem).toBe('Orphan');
|
||||
expect(ey.stem).toBe('Orphan ~sy');
|
||||
});
|
||||
|
||||
it('sanitizes a slugId containing a path separator before using it as a suffix', () => {
|
||||
// A crafted slugId with "/" must NOT leak a path separator into the stem.
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Notes', slugId: 'a/b', parentPageId: null },
|
||||
{ id: 'p2', title: 'Notes', slugId: 'c/d', parentPageId: null },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
const stem = layout.get('p2')!.stem;
|
||||
expect(stem).not.toContain('/');
|
||||
expect(stem).not.toContain('\\');
|
||||
// The "/" was replaced by sanitizeTitle's dash substitution.
|
||||
expect(stem).toBe('Notes ~c-d');
|
||||
});
|
||||
|
||||
it('disambiguates two ORPHAN ancestors at the NAME pass so their children stay in sync', () => {
|
||||
// Two orphan PARENTS share the same title but live under DIFFERENT missing
|
||||
// parents, so sibling-scoping by raw parentPageId would never compare them.
|
||||
// Both bucket at the vault root, so they MUST be disambiguated in the name
|
||||
// pass (sharing the "__root__" bucket) BEFORE any child folder segment is
|
||||
// computed from the parent name — otherwise re-stemming a parent post-hoc
|
||||
// would desync its child's folder from the parent file.
|
||||
const pages: PageNode[] = [
|
||||
{ id: 'p1', title: 'Dup', slugId: 's1', parentPageId: 'missing-1' },
|
||||
{ id: 'p2', title: 'Dup', slugId: 's2', parentPageId: 'missing-2' },
|
||||
{ id: 'c1', title: 'Child', parentPageId: 'p1' },
|
||||
{ id: 'c2', title: 'Child', parentPageId: 'p2' },
|
||||
];
|
||||
const layout = buildVaultLayout(pages);
|
||||
const p1 = layout.get('p1')!;
|
||||
const p2 = layout.get('p2')!;
|
||||
const c1 = layout.get('c1')!;
|
||||
const c2 = layout.get('c2')!;
|
||||
|
||||
// The two orphan parents get DISTINCT stems, both at the root.
|
||||
expect(p1.segments).toEqual([]);
|
||||
expect(p2.segments).toEqual([]);
|
||||
expect(p1.stem).toBe('Dup');
|
||||
expect(p2.stem).toBe('Dup ~s2');
|
||||
expect(p1.stem).not.toBe(p2.stem);
|
||||
|
||||
// Each child's folder segment EXACTLY equals its parent's resolved stem
|
||||
// (no desync): the parent name is final before segments are built.
|
||||
expect(c1.segments).toEqual([p1.stem]);
|
||||
expect(c2.segments).toEqual([p2.stem]);
|
||||
|
||||
// All four full paths are unique.
|
||||
const pathOf = (e: { segments: string[]; stem: string }) =>
|
||||
[...e.segments, e.stem].join('/');
|
||||
const paths = [p1, p2, c1, c2].map(pathOf);
|
||||
expect(new Set(paths).size).toBe(paths.length);
|
||||
});
|
||||
});
|
||||
41
packages/git-sync/test/loop-guard.test.ts
Normal file
41
packages/git-sync/test/loop-guard.test.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { createHash } from 'node:crypto';
|
||||
import { bodyHash } from '../src/engine/loop-guard.js';
|
||||
|
||||
// Loop-guard body hash (SPEC §10 "хэш тела"). The hash is the signal a future
|
||||
// pull-side poll-suppression uses to recognize our OWN write. It MUST be
|
||||
// deterministic (same input -> same hash) and discriminating (different input ->
|
||||
// different hash).
|
||||
|
||||
describe('bodyHash (pure, SPEC §10)', () => {
|
||||
it('is deterministic — same input yields the same hash', () => {
|
||||
const body = '# Title\n\nsome body with <span data-comment-id="x">mark</span>\n';
|
||||
expect(bodyHash(body)).toBe(bodyHash(body));
|
||||
});
|
||||
|
||||
it('differs for different input', () => {
|
||||
expect(bodyHash('alpha')).not.toBe(bodyHash('beta'));
|
||||
// Even a one-character difference produces a different digest.
|
||||
expect(bodyHash('alpha')).not.toBe(bodyHash('alphb'));
|
||||
});
|
||||
|
||||
it('returns lowercase sha256 hex (64 chars)', () => {
|
||||
const h = bodyHash('hello');
|
||||
expect(h).toMatch(/^[0-9a-f]{64}$/);
|
||||
// Matches an independent sha256 of the same UTF-8 bytes.
|
||||
expect(h).toBe(createHash('sha256').update('hello', 'utf8').digest('hex'));
|
||||
});
|
||||
|
||||
it('hashes the empty string to the well-known sha256 empty digest', () => {
|
||||
expect(bodyHash('')).toBe(
|
||||
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855',
|
||||
);
|
||||
});
|
||||
|
||||
it('is sensitive to UTF-8 content (Cyrillic body)', () => {
|
||||
expect(bodyHash('Колонка')).not.toBe(bodyHash('Колонкa'));
|
||||
expect(bodyHash('Колонка')).toBe(
|
||||
createHash('sha256').update('Колонка', 'utf8').digest('hex'),
|
||||
);
|
||||
});
|
||||
});
|
||||
227
packages/git-sync/test/markdown-converter-golden.test.ts
Normal file
227
packages/git-sync/test/markdown-converter-golden.test.ts
Normal file
@@ -0,0 +1,227 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
|
||||
// collaboration.ts and mutates global DOM at import time).
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
|
||||
// markdown-converter.ts is the weakest pure module (report §2). These golden
|
||||
// tests close the gaps the base markdown-converter.test.ts leaves open:
|
||||
// columns/column wrapper, embed/audio/pdf (used to emit nothing), drawio/
|
||||
// excalidraw data-align presence rule, the remaining inline-mark matrix,
|
||||
// paragraph.textAlign, subpages + unknown-in-container fallback, escaping
|
||||
// idempotence, table-cell pipe/newline sanitization, and empty/single-column
|
||||
// tables. Cases already asserted in the base file are NOT repeated.
|
||||
|
||||
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
|
||||
const c = (node: any) => convertProseMirrorToMarkdown(doc(node));
|
||||
const text = (t: string, marks?: any[]) =>
|
||||
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
|
||||
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
|
||||
|
||||
describe('columns / column (raw-HTML layout wrapper)', () => {
|
||||
it('wraps a multi-column layout as nested data-type divs with the children inside (regression: children unwrapped)', () => {
|
||||
const out = c({
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two' },
|
||||
content: [
|
||||
{ type: 'column', attrs: { width: 50 }, content: [para(text('L'))] },
|
||||
{ type: 'column', content: [para(text('R'))] },
|
||||
],
|
||||
});
|
||||
expect(out).toBe(
|
||||
'<div data-type="columns" data-layout="two">' +
|
||||
'<div data-type="column" data-width="50"><p>L</p></div>' +
|
||||
'<div data-type="column"><p>R</p></div>' +
|
||||
'</div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('omits the default widthMode "normal" but emits a non-default one', () => {
|
||||
const normal = c({
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two', widthMode: 'normal' },
|
||||
content: [{ type: 'column', content: [para(text('x'))] }],
|
||||
});
|
||||
expect(normal).not.toContain('data-width-mode');
|
||||
const wide = c({
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two', widthMode: 'full' },
|
||||
content: [{ type: 'column', content: [para(text('x'))] }],
|
||||
});
|
||||
expect(wide).toContain('data-width-mode="full"');
|
||||
});
|
||||
});
|
||||
|
||||
describe('embed / audio / pdf (previously emitted nothing — invisible regression)', () => {
|
||||
it('embed emits div[data-type="embed"] with src/provider', () => {
|
||||
expect(c({ type: 'embed', attrs: { src: 'https://x.com/e', provider: 'iframe' } })).toBe(
|
||||
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe"></div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('audio emits a div-wrapped <audio> with src', () => {
|
||||
expect(c({ type: 'audio', attrs: { src: '/a.mp3' } })).toBe(
|
||||
'<div><audio src="/a.mp3"></audio></div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('pdf emits div[data-type="pdf"] with src and name', () => {
|
||||
expect(c({ type: 'pdf', attrs: { src: '/d.pdf', name: 'd.pdf' } })).toBe(
|
||||
'<div data-type="pdf" src="/d.pdf" data-name="d.pdf"></div>',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('drawio / excalidraw data-align asymmetry (SPEC §11)', () => {
|
||||
it('drawio: data-align is ABSENT when align is unset', () => {
|
||||
const out = c({ type: 'drawio', attrs: { src: '/d.drawio' } });
|
||||
expect(out).toBe('<div data-type="drawio" data-src="/d.drawio"></div>');
|
||||
expect(out).not.toContain('data-align');
|
||||
});
|
||||
|
||||
it('drawio: data-align is PRESENT for a non-default align', () => {
|
||||
expect(c({ type: 'drawio', attrs: { src: '/d.drawio', align: 'right' } })).toBe(
|
||||
'<div data-type="drawio" data-src="/d.drawio" data-align="right"></div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('excalidraw: data-align is ABSENT when align is unset', () => {
|
||||
const out = c({ type: 'excalidraw', attrs: { src: '/e.excalidraw' } });
|
||||
expect(out).toBe('<div data-type="excalidraw" data-src="/e.excalidraw"></div>');
|
||||
expect(out).not.toContain('data-align');
|
||||
});
|
||||
});
|
||||
|
||||
describe('inline-mark matrix (underline/sub/sup/highlight±color/textStyle/comment)', () => {
|
||||
it('emits the schema HTML for each remaining inline mark in one matrix', () => {
|
||||
const cases: [any[], string][] = [
|
||||
[[{ type: 'underline' }], '<u>m</u>'],
|
||||
[[{ type: 'subscript' }], '<sub>m</sub>'],
|
||||
[[{ type: 'superscript' }], '<sup>m</sup>'],
|
||||
[[{ type: 'highlight' }], '<mark>m</mark>'],
|
||||
[
|
||||
[{ type: 'highlight', attrs: { color: '#ff0000' } }],
|
||||
'<mark style="background-color: #ff0000">m</mark>',
|
||||
],
|
||||
[
|
||||
[{ type: 'textStyle', attrs: { color: '#00ff00' } }],
|
||||
'<span style="color: #00ff00">m</span>',
|
||||
],
|
||||
[
|
||||
[{ type: 'comment', attrs: { commentId: 'cid-1' } }],
|
||||
'<span data-comment-id="cid-1">m</span>',
|
||||
],
|
||||
[
|
||||
[{ type: 'comment', attrs: { commentId: 'cid-1', resolved: true } }],
|
||||
'<span data-comment-id="cid-1" data-resolved="true">m</span>',
|
||||
],
|
||||
];
|
||||
for (const [marks, expected] of cases) {
|
||||
expect(c(para(text('m', marks)))).toBe(expected);
|
||||
}
|
||||
});
|
||||
|
||||
it('a textStyle mark with no color emits nothing (plain text passes through)', () => {
|
||||
expect(c(para(text('plain', [{ type: 'textStyle', attrs: {} }])))).toBe('plain');
|
||||
});
|
||||
|
||||
it('a comment mark with no commentId emits nothing (plain text)', () => {
|
||||
expect(c(para(text('plain', [{ type: 'comment', attrs: {} }])))).toBe('plain');
|
||||
});
|
||||
});
|
||||
|
||||
describe('paragraph.textAlign -> <div align>', () => {
|
||||
it('non-default alignment wraps the paragraph in <div align="...">', () => {
|
||||
expect(c({ type: 'paragraph', attrs: { textAlign: 'center' }, content: [text('x')] })).toBe(
|
||||
'<div align="center">x</div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('textAlign "left" (the default) is NOT wrapped', () => {
|
||||
expect(c({ type: 'paragraph', attrs: { textAlign: 'left' }, content: [text('x')] })).toBe('x');
|
||||
});
|
||||
});
|
||||
|
||||
describe('subpages token + unknown-in-container fallback', () => {
|
||||
it('subpages emits the {{SUBPAGES}} placeholder token', () => {
|
||||
expect(c({ type: 'subpages' })).toBe('{{SUBPAGES}}');
|
||||
});
|
||||
|
||||
it('an unknown block inside a raw-HTML container is wrapped in <div> (never markdown)', () => {
|
||||
// Inside columns the children are rendered as HTML; an unknown block type
|
||||
// must NOT fall back to markdown (which would land as literal text on
|
||||
// re-import). It is wrapped in a <div> so its children survive.
|
||||
const out = c({
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two' },
|
||||
content: [
|
||||
{ type: 'column', content: [{ type: 'weirdBlock', content: [para(text('kept'))] }] },
|
||||
],
|
||||
});
|
||||
expect(out).toBe(
|
||||
'<div data-type="columns" data-layout="two">' +
|
||||
'<div data-type="column"><div><p>kept</p></div></div>' +
|
||||
'</div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('an unknown TOP-LEVEL block falls back to its children only (markdown context)', () => {
|
||||
expect(c({ type: 'totallyUnknown', content: [text('inner')] })).toBe('inner');
|
||||
});
|
||||
});
|
||||
|
||||
describe('escaping idempotence (SPEC §11 phantom-diff guard)', () => {
|
||||
it('escapeAttr escapes ONLY & and " in an attribute context, and is idempotent', () => {
|
||||
// The mathBlock `text` attr goes through escapeAttr. & -> &, " -> ".
|
||||
const once = c({ type: 'mathBlock', attrs: { text: 'a & "b"' } });
|
||||
expect(once).toBe(
|
||||
'<div data-type="mathBlock" data-katex="true" text="a & "b""></div>',
|
||||
);
|
||||
// < and > are deliberately NOT escaped (would accumulate on round-trips).
|
||||
const angled = c({ type: 'mathBlock', attrs: { text: 'a < b > c' } });
|
||||
expect(angled).toContain('text="a < b > c"');
|
||||
expect(angled).not.toContain('<');
|
||||
expect(angled).not.toContain('>');
|
||||
});
|
||||
|
||||
it('encodeMdUrl turns a space into %20 in an image src (single inert URL token)', () => {
|
||||
expect(c({ type: 'image', attrs: { alt: 'c', src: '/my pic.png' } })).toBe(
|
||||
'',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('table-cell sanitization (| and newline must not corrupt the GFM row)', () => {
|
||||
it('escapes a literal pipe and collapses an inter-block newline in a cell', () => {
|
||||
// A cell with a pipe in one paragraph and a second block paragraph: the pipe
|
||||
// is escaped to \| and the block join (a space) keeps the row intact.
|
||||
const out = c({
|
||||
type: 'table',
|
||||
content: [
|
||||
{ type: 'tableRow', content: [
|
||||
{ type: 'tableHeader', content: [para(text('H'))] },
|
||||
]},
|
||||
{ type: 'tableRow', content: [
|
||||
{ type: 'tableCell', content: [para(text('a|b')), para(text('c'))] },
|
||||
]},
|
||||
],
|
||||
});
|
||||
expect(out).toBe('| H |\n| --- |\n| a\\|b c |');
|
||||
});
|
||||
});
|
||||
|
||||
describe('empty / single-column tables', () => {
|
||||
it('a table with no rows renders as the empty string', () => {
|
||||
expect(c({ type: 'table', content: [] })).toBe('');
|
||||
});
|
||||
|
||||
it('a single-column GFM table emits one column with a "---" separator', () => {
|
||||
const out = c({
|
||||
type: 'table',
|
||||
content: [
|
||||
{ type: 'tableRow', content: [{ type: 'tableHeader', content: [para(text('Only'))] }] },
|
||||
{ type: 'tableRow', content: [{ type: 'tableCell', content: [para(text('v'))] }] },
|
||||
],
|
||||
});
|
||||
expect(out).toBe('| Only |\n| --- |\n| v |');
|
||||
});
|
||||
});
|
||||
507
packages/git-sync/test/markdown-converter.test.ts
Normal file
507
packages/git-sync/test/markdown-converter.test.ts
Normal file
@@ -0,0 +1,507 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
|
||||
// collaboration.ts and mutates global DOM at import time).
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
|
||||
// Wrap a single node in a minimal ProseMirror doc. The top-level converter
|
||||
// joins doc children with "\n\n" and then .trim()s the whole output, so a
|
||||
// single-node doc yields exactly that node's rendered (and trimmed) string.
|
||||
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
|
||||
// Convenience: a text node, optionally with marks.
|
||||
const text = (t: string, marks?: any[]) =>
|
||||
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
|
||||
// Convenience: a paragraph wrapping inline children.
|
||||
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
|
||||
|
||||
describe('convertProseMirrorToMarkdown', () => {
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('headings', () => {
|
||||
it('emits the right number of "#" for levels 1-6', () => {
|
||||
for (let level = 1; level <= 6; level++) {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'heading', attrs: { level }, content: [text('H')] }),
|
||||
);
|
||||
expect(out).toBe('#'.repeat(level) + ' H');
|
||||
}
|
||||
});
|
||||
|
||||
it('defaults to level 1 when level is missing', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'heading', content: [text('NoLevel')] }),
|
||||
);
|
||||
expect(out).toBe('# NoLevel');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('text marks', () => {
|
||||
it('bold', () => {
|
||||
expect(
|
||||
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'bold' }])))),
|
||||
).toBe('**x**');
|
||||
});
|
||||
|
||||
it('italic', () => {
|
||||
expect(
|
||||
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'italic' }])))),
|
||||
).toBe('*x*');
|
||||
});
|
||||
|
||||
it('strike', () => {
|
||||
expect(
|
||||
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'strike' }])))),
|
||||
).toBe('~~x~~');
|
||||
});
|
||||
|
||||
it('inline code (sole mark) uses backtick span', () => {
|
||||
expect(
|
||||
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'code' }])))),
|
||||
).toBe('`x`');
|
||||
});
|
||||
|
||||
it('code + another mark switches to nested HTML (no backtick form)', () => {
|
||||
// marks array order drives nesting: bold first wraps, then code wraps that.
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para(text('x', [{ type: 'bold' }, { type: 'code' }]))),
|
||||
);
|
||||
expect(out).toBe('<code><strong>x</strong></code>');
|
||||
});
|
||||
|
||||
it('code + strike combo emits <code> wrapping <s>', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para(text('x', [{ type: 'strike' }, { type: 'code' }]))),
|
||||
);
|
||||
expect(out).toBe('<code><s>x</s></code>');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('links', () => {
|
||||
it('href only', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para(text('site', [{ type: 'link', attrs: { href: 'https://e.com' } }]))),
|
||||
);
|
||||
expect(out).toBe('[site](https://e.com)');
|
||||
});
|
||||
|
||||
it('href + title with an embedded double quote is escaped', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(
|
||||
para(
|
||||
text('site', [
|
||||
{ type: 'link', attrs: { href: 'https://e.com', title: 'a "b" c' } },
|
||||
]),
|
||||
),
|
||||
),
|
||||
);
|
||||
// The markdown link-title form escapes the inner " as \".
|
||||
expect(out).toBe('[site](https://e.com "a \\"b\\" c")');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('image', () => {
|
||||
it('percent-encodes spaces and parentheses in src', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'image',
|
||||
attrs: { alt: 'cap', src: '/files/my pic (1).png' },
|
||||
}),
|
||||
);
|
||||
// space -> %20, ( -> %28, ) -> %29
|
||||
expect(out).toBe('');
|
||||
});
|
||||
|
||||
it('empty alt and missing src render harmlessly', () => {
|
||||
const out = convertProseMirrorToMarkdown(doc({ type: 'image', attrs: {} }));
|
||||
expect(out).toBe('![]()');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('codeBlock', () => {
|
||||
it('with language', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'codeBlock',
|
||||
attrs: { language: 'ts' },
|
||||
content: [text('const a = 1;')],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe('```ts\nconst a = 1;\n```');
|
||||
});
|
||||
|
||||
it('without language emits empty info string', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'codeBlock', content: [text('plain')] }),
|
||||
);
|
||||
expect(out).toBe('```\nplain\n```');
|
||||
});
|
||||
|
||||
it('strips ALL trailing newlines for idempotency', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'codeBlock', content: [text('a\n\n\n')] }),
|
||||
);
|
||||
// Every trailing "\n" is removed, then exactly one is re-added by the fence.
|
||||
expect(out).toBe('```\na\n```');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('lists', () => {
|
||||
it('bullet list', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{ type: 'listItem', content: [para(text('one'))] },
|
||||
{ type: 'listItem', content: [para(text('two'))] },
|
||||
],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe('- one\n- two');
|
||||
});
|
||||
|
||||
it('ordered list numbers items sequentially', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'orderedList',
|
||||
content: [
|
||||
{ type: 'listItem', content: [para(text('a'))] },
|
||||
{ type: 'listItem', content: [para(text('b'))] },
|
||||
{ type: 'listItem', content: [para(text('c'))] },
|
||||
],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe('1. a\n2. b\n3. c');
|
||||
});
|
||||
|
||||
it('nested bullet list indents the child by the 2-col marker width', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
para(text('parent')),
|
||||
{
|
||||
type: 'bulletList',
|
||||
content: [{ type: 'listItem', content: [para(text('child'))] }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
// First line carries the marker; the nested list is indented 2 columns.
|
||||
expect(out).toBe('- parent\n - child');
|
||||
});
|
||||
|
||||
it('nested ordered list indents by the wider 3-col marker width', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'orderedList',
|
||||
content: [
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
para(text('parent')),
|
||||
{
|
||||
type: 'orderedList',
|
||||
content: [{ type: 'listItem', content: [para(text('child'))] }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
// "1. " is 3 columns wide, so the continuation indent is 3 spaces.
|
||||
expect(out).toBe('1. parent\n 1. child');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('task list', () => {
|
||||
it('unchecked and checked items', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'taskList',
|
||||
content: [
|
||||
{ type: 'taskItem', attrs: { checked: false }, content: [para(text('todo'))] },
|
||||
{ type: 'taskItem', attrs: { checked: true }, content: [para(text('done'))] },
|
||||
],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe('- [ ] todo\n- [x] done');
|
||||
});
|
||||
|
||||
it('empty task item keeps its marker', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'taskList',
|
||||
content: [{ type: 'taskItem', attrs: { checked: false }, content: [] }],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe('- [ ]');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('blockquote', () => {
|
||||
it('single paragraph quote prefixes the line', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'blockquote', content: [para(text('quoted'))] }),
|
||||
);
|
||||
expect(out).toBe('> quoted');
|
||||
});
|
||||
|
||||
it('multi-paragraph quote separates blocks with a bare ">" line', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'blockquote',
|
||||
content: [para(text('first')), para(text('second'))],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe('> first\n>\n> second');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('breaks and rules', () => {
|
||||
it('horizontal rule', () => {
|
||||
expect(
|
||||
convertProseMirrorToMarkdown(doc({ type: 'horizontalRule' })),
|
||||
).toBe('---');
|
||||
});
|
||||
|
||||
it('hard break emits two trailing spaces then newline', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para(text('a'), { type: 'hardBreak' }, text('b'))),
|
||||
);
|
||||
expect(out).toBe('a \nb');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('tables', () => {
|
||||
it('GFM table emits alignment markers derived from header cells', () => {
|
||||
const headerRow = {
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{ type: 'tableHeader', attrs: { align: 'left' }, content: [para(text('L'))] },
|
||||
{ type: 'tableHeader', attrs: { align: 'center' }, content: [para(text('C'))] },
|
||||
{ type: 'tableHeader', attrs: { align: 'right' }, content: [para(text('R'))] },
|
||||
{ type: 'tableHeader', content: [para(text('N'))] },
|
||||
],
|
||||
};
|
||||
const bodyRow = {
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{ type: 'tableCell', content: [para(text('1'))] },
|
||||
{ type: 'tableCell', content: [para(text('2'))] },
|
||||
{ type: 'tableCell', content: [para(text('3'))] },
|
||||
{ type: 'tableCell', content: [para(text('4'))] },
|
||||
],
|
||||
};
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'table', content: [headerRow, bodyRow] }),
|
||||
);
|
||||
expect(out).toBe(
|
||||
[
|
||||
'| L | C | R | N |',
|
||||
'| :-- | :-: | --: | --- |',
|
||||
'| 1 | 2 | 3 | 4 |',
|
||||
].join('\n'),
|
||||
);
|
||||
});
|
||||
|
||||
it('spanned table (colspan/rowspan) emits raw <table> HTML', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'table',
|
||||
content: [
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableHeader',
|
||||
attrs: { colspan: 2 },
|
||||
content: [para(text('wide'))],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{ type: 'tableCell', content: [para(text('a'))] },
|
||||
{ type: 'tableCell', content: [para(text('b'))] },
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<table><tbody>' +
|
||||
'<tr><th colspan="2"><p>wide</p></th></tr>' +
|
||||
'<tr><td><p>a</p></td><td><p>b</p></td></tr>' +
|
||||
'</tbody></table>',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('callout and details', () => {
|
||||
it('callout uses lowercased type fence', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'callout',
|
||||
attrs: { type: 'WARNING' },
|
||||
content: [para(text('beware'))],
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(':::warning\nbeware\n:::');
|
||||
});
|
||||
|
||||
it('callout defaults to info', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'callout', content: [para(text('hi'))] }),
|
||||
);
|
||||
expect(out).toBe(':::info\nhi\n:::');
|
||||
});
|
||||
|
||||
it('details emits summary + content wrapped in <details>', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'details',
|
||||
content: [
|
||||
{ type: 'detailsSummary', content: [text('Title')] },
|
||||
{ type: 'detailsContent', content: [para(text('Body'))] },
|
||||
],
|
||||
}),
|
||||
);
|
||||
// details joins its children with "\n"; summary opens, content closes.
|
||||
expect(out).toBe('<details>\n<summary>Title</summary>\n\nBody\n</details>');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('math', () => {
|
||||
it('inline math carries LaTeX in a text attr WITHOUT escaping < or >', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para({ type: 'mathInline', attrs: { text: 'a < b' } })),
|
||||
);
|
||||
// < and > must NOT be HTML-escaped (idempotency); only & and " would be.
|
||||
expect(out).toBe(
|
||||
'<span data-type="mathInline" data-katex="true" text="a < b"></span>',
|
||||
);
|
||||
expect(out).not.toContain('<');
|
||||
});
|
||||
|
||||
it('block math carries LaTeX in a text attr WITHOUT escaping < or >', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'mathBlock', attrs: { text: 'x > y & z' } }),
|
||||
);
|
||||
// & IS escaped (entity-significant), but < and > are NOT.
|
||||
expect(out).toBe(
|
||||
'<div data-type="mathBlock" data-katex="true" text="x > y & z"></div>',
|
||||
);
|
||||
expect(out).not.toContain('<');
|
||||
expect(out).not.toContain('>');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('inline atoms and media', () => {
|
||||
it('mention emits schema span with data-* attrs and visible label', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(
|
||||
para({
|
||||
type: 'mention',
|
||||
attrs: { id: 'u1', label: 'Alice', entityType: 'user' },
|
||||
}),
|
||||
),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<span data-type="mention" data-id="u1" data-label="Alice" data-entity-type="user">@Alice</span>',
|
||||
);
|
||||
});
|
||||
|
||||
it('attachment emits div with schema data-attachment-* attrs', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'attachment',
|
||||
attrs: { url: '/files/x.zip', name: 'x.zip', mime: 'application/zip', size: 99 },
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<div data-type="attachment" data-attachment-url="/files/x.zip" ' +
|
||||
'data-attachment-name="x.zip" data-attachment-mime="application/zip" ' +
|
||||
'data-attachment-size="99"></div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('video emits a <div>-wrapped <video> with schema attrs', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'video',
|
||||
attrs: { src: '/v.mp4', alt: 'clip', width: 640 },
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<div><video src="/v.mp4" aria-label="clip" width="640"></video></div>',
|
||||
);
|
||||
});
|
||||
|
||||
it('youtube emits a div[data-type="youtube"] with data-src', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'youtube',
|
||||
attrs: { src: 'https://youtu.be/abc', width: 560, height: 315 },
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<div data-type="youtube" data-src="https://youtu.be/abc" ' +
|
||||
'data-width="560" data-height="315"></div>',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('edge cases', () => {
|
||||
it('null content returns ""', () => {
|
||||
expect(convertProseMirrorToMarkdown(null)).toBe('');
|
||||
});
|
||||
|
||||
it('empty object returns ""', () => {
|
||||
expect(convertProseMirrorToMarkdown({})).toBe('');
|
||||
});
|
||||
|
||||
it('doc with no content returns ""', () => {
|
||||
expect(convertProseMirrorToMarkdown({ type: 'doc' })).toBe('');
|
||||
});
|
||||
|
||||
it('unknown node type falls back to children-only (no throw, text preserved)', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'totallyUnknownType', content: [text('kept')] }),
|
||||
);
|
||||
expect(out).toBe('kept');
|
||||
});
|
||||
|
||||
it('deeply nested structure does not stack-overflow', () => {
|
||||
// Build a deeply nested bullet list (each level holds one nested list).
|
||||
let node: any = { type: 'listItem', content: [para(text('leaf'))] };
|
||||
for (let i = 0; i < 200; i++) {
|
||||
node = {
|
||||
type: 'listItem',
|
||||
content: [para(text('lvl')), { type: 'bulletList', content: [node] }],
|
||||
};
|
||||
}
|
||||
const root = doc({ type: 'bulletList', content: [node] });
|
||||
expect(() => convertProseMirrorToMarkdown(root)).not.toThrow();
|
||||
const out = convertProseMirrorToMarkdown(root);
|
||||
expect(out).toContain('leaf');
|
||||
expect(out.startsWith('- lvl')).toBe(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
218
packages/git-sync/test/markdown-document-envelope.test.ts
Normal file
218
packages/git-sync/test/markdown-document-envelope.test.ts
Normal file
@@ -0,0 +1,218 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
|
||||
// collaboration.ts and mutates global DOM at import time).
|
||||
import {
|
||||
serializeDocmostMarkdown,
|
||||
parseDocmostMarkdown,
|
||||
serializeDocmostMarkdownBody,
|
||||
type DocmostMdMeta,
|
||||
} from '../src/lib/markdown-document.js';
|
||||
|
||||
const meta: DocmostMdMeta = {
|
||||
version: 1,
|
||||
pageId: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Hello',
|
||||
spaceId: 'sp1',
|
||||
parentPageId: null,
|
||||
};
|
||||
|
||||
describe('serializeDocmostMarkdown / parseDocmostMarkdown', () => {
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('round-trip', () => {
|
||||
it('round-trips meta, body, and comments', () => {
|
||||
const body = '# Title\n\nSome **body** text.';
|
||||
const comments = [{ id: 'c1', text: 'a note' }];
|
||||
const full = serializeDocmostMarkdown(meta, body, comments);
|
||||
const parsed = parseDocmostMarkdown(full);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.body).toBe(body);
|
||||
expect(parsed.comments).toEqual(comments);
|
||||
});
|
||||
|
||||
it('emits a comments block with [] even when there are no comments', () => {
|
||||
const full = serializeDocmostMarkdown(meta, 'body', []);
|
||||
expect(full).toContain('<!-- docmost:comments\n[]\n-->');
|
||||
const parsed = parseDocmostMarkdown(full);
|
||||
expect(parsed.comments).toEqual([]);
|
||||
expect(parsed.body).toBe('body');
|
||||
});
|
||||
|
||||
it('non-array comments arg is normalized to [] in the serialized output', () => {
|
||||
const full = serializeDocmostMarkdown(meta, 'body', null as any);
|
||||
expect(full).toContain('<!-- docmost:comments\n[]\n-->');
|
||||
});
|
||||
|
||||
it('trims surrounding whitespace from the body on serialize', () => {
|
||||
const full = serializeDocmostMarkdown(meta, '\n\n body \n\n', []);
|
||||
const parsed = parseDocmostMarkdown(full);
|
||||
expect(parsed.body).toBe('body');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('missing blocks (tolerant parsing)', () => {
|
||||
it('missing meta block yields meta:null', () => {
|
||||
const input = 'Just a body.\n\n<!-- docmost:comments\n[]\n-->\n';
|
||||
const parsed = parseDocmostMarkdown(input);
|
||||
expect(parsed.meta).toBeNull();
|
||||
expect(parsed.body).toBe('Just a body.');
|
||||
expect(parsed.comments).toEqual([]);
|
||||
});
|
||||
|
||||
it('missing comments block yields comments:null and treats all as body', () => {
|
||||
const input =
|
||||
'<!-- docmost:meta\n' + JSON.stringify(meta) + '\n-->\n\nbody only';
|
||||
const parsed = parseDocmostMarkdown(input);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.comments).toBeNull();
|
||||
expect(parsed.body).toBe('body only');
|
||||
});
|
||||
|
||||
it('plain markdown with neither block: meta and comments null, whole input is body', () => {
|
||||
const input = '# Plain\n\nNo envelope here.';
|
||||
const parsed = parseDocmostMarkdown(input);
|
||||
expect(parsed.meta).toBeNull();
|
||||
expect(parsed.comments).toBeNull();
|
||||
expect(parsed.body).toBe(input);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('CRLF normalization', () => {
|
||||
it('parses a CRLF-encoded document the same as LF', () => {
|
||||
const lf = serializeDocmostMarkdown(meta, 'line one\nline two', [
|
||||
{ id: 'c1' },
|
||||
]);
|
||||
const crlf = lf.replace(/\n/g, '\r\n');
|
||||
const parsed = parseDocmostMarkdown(crlf);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.body).toBe('line one\nline two');
|
||||
expect(parsed.comments).toEqual([{ id: 'c1' }]);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('only the final document-ending comments block is captured', () => {
|
||||
it('an earlier literal docmost:comments opener inside the body stays in the body', () => {
|
||||
// The body documents the format and contains a literal opener that does
|
||||
// NOT end the document. Only the trailing block is treated as metadata.
|
||||
const bodyWithLiteral =
|
||||
'Here is how the format looks:\n\n<!-- docmost:comments\n[{"fake":true}]\n-->\n\nand more prose after it.';
|
||||
const full = serializeDocmostMarkdown(meta, bodyWithLiteral, [
|
||||
{ id: 'real' },
|
||||
]);
|
||||
const parsed = parseDocmostMarkdown(full);
|
||||
// The real (final) block parses into the comments...
|
||||
expect(parsed.comments).toEqual([{ id: 'real' }]);
|
||||
// ...and the earlier literal opener is preserved verbatim in the body.
|
||||
expect(parsed.body).toContain(
|
||||
'<!-- docmost:comments\n[{"fake":true}]\n-->',
|
||||
);
|
||||
expect(parsed.body).toContain('and more prose after it.');
|
||||
});
|
||||
|
||||
it('a literal opener whose closer does NOT end the doc is left entirely in the body', () => {
|
||||
// No real trailing block: the opener is not document-ending, so comments
|
||||
// stays null and nothing is stripped.
|
||||
const input =
|
||||
'<!-- docmost:meta\n' +
|
||||
JSON.stringify(meta) +
|
||||
'\n-->\n\nbody start\n\n<!-- docmost:comments\n[]\n-->\n\ntrailing text not ending the doc';
|
||||
const parsed = parseDocmostMarkdown(input);
|
||||
expect(parsed.comments).toBeNull();
|
||||
expect(parsed.body).toContain('<!-- docmost:comments');
|
||||
expect(parsed.body).toContain('trailing text not ending the doc');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('end-anchored comments closer tolerates CRLF + trailing whitespace', () => {
|
||||
it('captures the final comments block when its "-->" closer has CRLF and trailing spaces', () => {
|
||||
// The closer regex is /\r?\n-->[ \t]*\r?\n?\s*$/. Build a document whose
|
||||
// trailing comments block uses CRLF line endings AND has trailing spaces
|
||||
// after the "-->" closer, then assert it is still recognised as the
|
||||
// document-ending block (and the body is not polluted by it).
|
||||
const metaLine = JSON.stringify(meta);
|
||||
const crlfDoc =
|
||||
`<!-- docmost:meta\r\n${metaLine}\r\n-->\r\n\r\n` +
|
||||
`the body line\r\n\r\n` +
|
||||
`<!-- docmost:comments\r\n[{"id":"c-crlf"}]\r\n--> \r\n`;
|
||||
const parsed = parseDocmostMarkdown(crlfDoc);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.body).toBe('the body line');
|
||||
expect(parsed.comments).toEqual([{ id: 'c-crlf' }]);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('malformed JSON throws a clear error', () => {
|
||||
it('throws on malformed meta JSON', () => {
|
||||
const input = '<!-- docmost:meta\n{not valid json}\n-->\n\nbody';
|
||||
expect(() => parseDocmostMarkdown(input)).toThrow(/docmost:meta JSON/);
|
||||
});
|
||||
|
||||
it('throws on malformed comments JSON', () => {
|
||||
const input = 'body\n\n<!-- docmost:comments\n[not, valid]\n-->\n';
|
||||
expect(() => parseDocmostMarkdown(input)).toThrow(/docmost:comments JSON/);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('serializeDocmostMarkdownBody', () => {
|
||||
it('emits NO comments block', () => {
|
||||
const out = serializeDocmostMarkdownBody(meta, 'just the body');
|
||||
expect(out).not.toContain('docmost:comments');
|
||||
expect(out).toContain('<!-- docmost:meta');
|
||||
});
|
||||
|
||||
it('serialize -> parse preserves meta and the trimmed body, comments null (SPEC §3)', () => {
|
||||
const fullMeta: DocmostMdMeta = {
|
||||
version: 1,
|
||||
pageId: 'page-123',
|
||||
slugId: 'slug-abc',
|
||||
title: 'My Page',
|
||||
spaceId: 'space-1',
|
||||
parentPageId: 'parent-9',
|
||||
};
|
||||
const body = 'Hello\n\nWorld';
|
||||
const out = serializeDocmostMarkdownBody(fullMeta, body);
|
||||
const parsed = parseDocmostMarkdown(out);
|
||||
expect(parsed.meta).toEqual(fullMeta);
|
||||
expect(parsed.body).toBe(body);
|
||||
expect(parsed.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('preserves a null parentPageId for a root page', () => {
|
||||
const out = serializeDocmostMarkdownBody(meta, 'body text');
|
||||
const parsed = parseDocmostMarkdown(out);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('produces a parseable file for an empty or missing body', () => {
|
||||
const minimal: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
|
||||
|
||||
const emptyFile = serializeDocmostMarkdownBody(minimal, '');
|
||||
const parsedEmpty = parseDocmostMarkdown(emptyFile);
|
||||
expect(parsedEmpty.meta).toEqual(minimal);
|
||||
expect(parsedEmpty.body).toBe('');
|
||||
expect(parsedEmpty.comments).toBeNull();
|
||||
|
||||
// Missing body (undefined) — serializer coalesces to "".
|
||||
const missingFile = serializeDocmostMarkdownBody(
|
||||
minimal,
|
||||
undefined as unknown as string,
|
||||
);
|
||||
const parsedMissing = parseDocmostMarkdown(missingFile);
|
||||
expect(parsedMissing.meta).toEqual(minimal);
|
||||
expect(parsedMissing.body).toBe('');
|
||||
expect(parsedMissing.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('trims the body', () => {
|
||||
const out = serializeDocmostMarkdownBody(meta, '\n\n hi \n');
|
||||
const parsed = parseDocmostMarkdown(out);
|
||||
expect(parsed.body).toBe('hi');
|
||||
});
|
||||
});
|
||||
66
packages/git-sync/test/markdown-document.test.ts
Normal file
66
packages/git-sync/test/markdown-document.test.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
serializeDocmostMarkdownBody,
|
||||
parseDocmostMarkdown,
|
||||
type DocmostMdMeta,
|
||||
} from 'docmost-client';
|
||||
|
||||
describe('serializeDocmostMarkdownBody round-trip (SPEC §3)', () => {
|
||||
it('serialize -> parse preserves meta and the trimmed body, with no comments block', () => {
|
||||
const meta: DocmostMdMeta = {
|
||||
version: 1,
|
||||
pageId: 'page-123',
|
||||
slugId: 'slug-abc',
|
||||
title: 'My Page',
|
||||
spaceId: 'space-1',
|
||||
parentPageId: 'parent-9',
|
||||
};
|
||||
const body = 'Hello\n\nWorld';
|
||||
|
||||
const file = serializeDocmostMarkdownBody(meta, body);
|
||||
const parsed = parseDocmostMarkdown(file);
|
||||
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.body).toBe(body);
|
||||
// No trailing docmost:comments block was emitted (SPEC §3).
|
||||
expect(parsed.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('preserves a null parentPageId for a root page', () => {
|
||||
const meta: DocmostMdMeta = {
|
||||
version: 1,
|
||||
pageId: 'root-1',
|
||||
slugId: 'root-slug',
|
||||
title: 'Root',
|
||||
spaceId: 'space-1',
|
||||
parentPageId: null,
|
||||
};
|
||||
const file = serializeDocmostMarkdownBody(meta, 'body text');
|
||||
const parsed = parseDocmostMarkdown(file);
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.comments).toBeNull();
|
||||
});
|
||||
|
||||
it('produces a parseable file for an empty/missing body', () => {
|
||||
const meta: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
|
||||
|
||||
// Empty string body.
|
||||
const emptyFile = serializeDocmostMarkdownBody(meta, '');
|
||||
expect(() => parseDocmostMarkdown(emptyFile)).not.toThrow();
|
||||
const parsedEmpty = parseDocmostMarkdown(emptyFile);
|
||||
expect(parsedEmpty.meta).toEqual(meta);
|
||||
expect(parsedEmpty.body).toBe('');
|
||||
expect(parsedEmpty.comments).toBeNull();
|
||||
|
||||
// Missing body (undefined) — serializer coalesces to "".
|
||||
const missingFile = serializeDocmostMarkdownBody(
|
||||
meta,
|
||||
undefined as unknown as string,
|
||||
);
|
||||
expect(() => parseDocmostMarkdown(missingFile)).not.toThrow();
|
||||
const parsedMissing = parseDocmostMarkdown(missingFile);
|
||||
expect(parsedMissing.meta).toEqual(meta);
|
||||
expect(parsedMissing.body).toBe('');
|
||||
expect(parsedMissing.comments).toBeNull();
|
||||
});
|
||||
});
|
||||
698
packages/git-sync/test/markdown-roundtrip.property.test.ts
Normal file
698
packages/git-sync/test/markdown-roundtrip.property.test.ts
Normal file
@@ -0,0 +1,698 @@
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import fc from 'fast-check';
|
||||
|
||||
// These property tests run real ProseMirror<->Markdown conversion × NUM_RUNS, so
|
||||
// each takes ~4–5s. Inputs are DETERMINISTIC (fixed SEED below) — the only source
|
||||
// of flakiness is wall-clock: under the full suite's parallel worker load they can
|
||||
// exceed vitest's default 5000ms per-test timeout. Give them ample headroom so CI
|
||||
// (which gates the docker build, AGENTS.md) is deterministic regardless of load.
|
||||
vi.setConfig({ testTimeout: 30000 });
|
||||
// Import the converter DIRECTLY from src (NOT the docmost-client barrel) so we
|
||||
// match the path used by the other converter unit tests.
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
// markdownToProseMirror lives in collaboration.ts; importing it mutates the
|
||||
// global DOM via jsdom at module load time — this is expected and required for
|
||||
// @tiptap/html's generateJSON to run under Node.
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
import { stripBlockIds } from '../src/engine/roundtrip-helpers.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// WHY THIS TEST EXISTS (SPEC §11 / "Задача №0")
|
||||
//
|
||||
// git is the state store, and git diffs byte-for-byte. The sync daemon does
|
||||
// `export(markdown) -> import(ProseMirror) -> export(markdown)` on every pull,
|
||||
// so if the *second* export differs from the first by even one byte, every
|
||||
// pull produces a phantom diff -> endless commits/conflicts. The single
|
||||
// property git actually needs is therefore MARKDOWN BYTE-STABILITY:
|
||||
//
|
||||
// md2 := export(import(export(doc))) MUST equal md1 := export(doc)
|
||||
//
|
||||
// This file fuzzes that invariant with fast-check over randomly generated,
|
||||
// representative Docmost ProseMirror documents.
|
||||
//
|
||||
// ---------------------------------------------------------------------------
|
||||
// THE "SUPPORTED SPACE" PROBLEM
|
||||
//
|
||||
// A NAIVE generator surfaces two different kinds of `md2 !== md1`:
|
||||
//
|
||||
// (a) GENUINE converter limitations — documented below as `it.fails` repros.
|
||||
// (b) Inputs the converter LEGITIMATELY normalizes, i.e. markdown that is
|
||||
// ambiguous or that the schema rewrites to a canonical form. These are
|
||||
// NOT byte-stable by construction and are NOT bugs; the fix is to keep
|
||||
// the generator inside the byte-stable / supported space.
|
||||
//
|
||||
// The following were all empirically confirmed (by probing the live converter)
|
||||
// and are EXCLUDED from / canonicalized by the byte-stable arbitrary. Each is a
|
||||
// markdown ambiguity or a schema/ProseMirror normalization, NOT a converter bug.
|
||||
//
|
||||
// * Text that re-triggers block/inline markdown syntax on re-parse:
|
||||
// - a leading `>`/`*`/`-`/`#`/`1.` turns a paragraph into a blockquote/
|
||||
// list/heading;
|
||||
// - `a b` (2+ spaces) collapses to `a b`;
|
||||
// - `<b>` / `</div>` parse as real HTML tags (and run-concatenation can
|
||||
// form `<word>` across a run boundary);
|
||||
// - `&` / `<` decode back to `&` / `<`;
|
||||
// - a lone backtick is a code-span delimiter and re-pairs globally.
|
||||
// -> The text arbitrary emits space-joined tokens that BEGIN and END with an
|
||||
// alphanumeric word, with any single special char confined to the middle
|
||||
// (space-flanked). Every char the task requires (* _ [ ] ( ) | < > &, and
|
||||
// more) is covered this way; the backtick is exercised via code spans.
|
||||
// * A purely numeric image `alt` ("0") or link `title` ("0") is parsed back as
|
||||
// a NUMBER and dropped by the converter's `value || ""` -> alt/title always
|
||||
// carry at least one letter.
|
||||
// * Callout types other than info/success/warning/danger normalize to `info`
|
||||
// (schema only knows those four) -> generator restricts to those four.
|
||||
// * A list item / callout / blockquote with MULTIPLE block children: the
|
||||
// converter joins them with a single "\n", which marked re-parses as ONE
|
||||
// merged paragraph ("- p1\n p2" -> "- p1 p2"). -> container bodies hold a
|
||||
// SINGLE paragraph, optionally plus ONE nested list for lists.
|
||||
// * `orderedList.start` / `1)` markers normalize to `1.` -> not emitted.
|
||||
// * Two sibling lists sharing a marker family (bullet/task use "-", ordered
|
||||
// uses "1.") MERGE into one list -> no two list blocks are adjacent.
|
||||
// * TWO consecutive hard breaks render a blank line that marked eats as a
|
||||
// paragraph break, and a trailing hard break is trimmed -> consecutive/
|
||||
// trailing hard breaks are collapsed/removed.
|
||||
// * Adjacent text runs with IDENTICAL marks ("**a****b****c**" -> "**abc**").
|
||||
// A real ProseMirror doc never stores split same-mark runs (the editor
|
||||
// coalesces them) -> the generator merges them too (normalizeInline).
|
||||
//
|
||||
// The GENUINE, real-but-intentional non-roundtrip limitations are kept HONEST as
|
||||
// `it.fails` blocks below (so the suite stays green only because they are marked
|
||||
// expected-to-fail, never by hiding them):
|
||||
//
|
||||
// 1. The `code` mark COMBINED with any other mark. The converter emits nested
|
||||
// HTML (`<strong><code>x</code></strong>`), but the schema's `code` mark
|
||||
// declares `excludes: "_"`, so on import every co-occurring mark is dropped
|
||||
// and the run comes back as `code` only -> md2 == "`x`". Acknowledged in
|
||||
// markdown-converter.ts (the long comment above the marks switch);
|
||||
// impossible to round-trip both while `code` excludes them.
|
||||
// 2. A BLOCK-level `image` placed BETWEEN other blocks. The Docmost image node
|
||||
// is block-level but `` is inline; marked wraps it in a <p>, the
|
||||
// schema hoists the <img> out and leaves an empty paragraph sibling, which
|
||||
// injects an extra blank gap on the second export. An image IS byte-stable
|
||||
// as the sole block (edge artifacts get trimmed) — covered by a green test.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Run a full export -> import -> export cycle and return both markdown strings.
|
||||
async function roundTrip(doc: unknown): Promise<{ md1: string; md2: string; doc2: any }> {
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
return { md1, md2, doc2 };
|
||||
}
|
||||
|
||||
const SEED = 42;
|
||||
const NUM_RUNS = 100;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Inline text arbitraries
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Alphanumeric "word" (no markdown-significant characters). Length 1..6.
|
||||
const wordArb = fc
|
||||
.stringMatching(/^[A-Za-z0-9]{1,6}$/)
|
||||
.filter((w) => w.length > 0);
|
||||
|
||||
// A SINGLE markdown-significant character, emitted only as an isolated,
|
||||
// space-flanked token. Every char the task calls out plus a few more; each was
|
||||
// verified byte-stable in this position.
|
||||
//
|
||||
// NOTE: the backtick (`) is DELIBERATELY excluded from free-floating plain
|
||||
// text. A lone backtick is a markdown code-span DELIMITER, so its round-trip
|
||||
// depends on GLOBAL backtick pairing: a stray backtick in running text adjacent
|
||||
// to a real code span ("A ` " + `code`) re-pairs into a different code span and
|
||||
// loses a space — genuinely outside the byte-stable space. The backtick is
|
||||
// still fully exercised as the `code`-mark delimiter and inside code blocks.
|
||||
const specialCharArb = fc.constantFrom(
|
||||
'*', '_', '[', ']', '(', ')', '{', '}', '|', '<', '>', '&', '#', '!', '~', '=', '+', '-',
|
||||
);
|
||||
|
||||
// Build a "safe special" text string: a space-joined sequence of tokens that
|
||||
// always BEGINS and ENDS with an alphanumeric word, with any isolated special
|
||||
// chars confined to the MIDDLE (each space-flanked by words).
|
||||
//
|
||||
// Both boundary guarantees matter:
|
||||
// * Leading word: the line never opens with a block/inline trigger
|
||||
// (">", "*", "-", "#", "1." ...).
|
||||
// * Trailing word: adjacent text runs CONCATENATE with no separator, so a run
|
||||
// ending in a bare "<" beside a run starting with a letter would form a fake
|
||||
// HTML tag ("...0 <" + "A >" -> "0 <A >"), which marked/jsdom strips. Ending
|
||||
// every run with an alphanumeric word keeps every special internal and
|
||||
// space-flanked even after concatenation.
|
||||
const safeTextArb: fc.Arbitrary<string> = fc
|
||||
.tuple(
|
||||
wordArb,
|
||||
fc.array(fc.oneof(wordArb, specialCharArb), { minLength: 0, maxLength: 3 }),
|
||||
wordArb,
|
||||
)
|
||||
.map(([first, middle, last]) => [first, ...middle, last].join(' '));
|
||||
|
||||
// A plain alphanumeric phrase (1..3 words) for places where even isolated
|
||||
// specials are not wanted (e.g. code-block language, mention labels).
|
||||
const phraseArb: fc.Arbitrary<string> = fc
|
||||
.array(wordArb, { minLength: 1, maxLength: 3 })
|
||||
.map((ws) => ws.join(' '));
|
||||
|
||||
// A phrase guaranteed to contain at least one letter. Used for image alt text:
|
||||
// a PURELY numeric alt (e.g. "0", "00") is parsed back by the schema as a
|
||||
// NUMBER, and the converter's `alt || ""` then treats the number 0 as falsy and
|
||||
// DROPS the alt ("" -> "") — not byte-stable. A letter anywhere in
|
||||
// the alt keeps it a string and avoids the coercion.
|
||||
const letterPhraseArb: fc.Arbitrary<string> = fc
|
||||
.tuple(
|
||||
fc.stringMatching(/^[A-Za-z]{1,4}$/),
|
||||
fc.array(wordArb, { minLength: 0, maxLength: 2 }),
|
||||
)
|
||||
.map(([head, rest]) => [head, ...rest].join(' '));
|
||||
|
||||
|
||||
// A text run with an OPTIONAL single non-code mark (bold/italic/strike), or a
|
||||
// SOLE `code` mark, or a link. `code` is never combined with another mark in
|
||||
// the byte-stable arbitrary (that combination is the known bug, exercised
|
||||
// separately in the it.fails block). Marks wrap safe text, which stays stable
|
||||
// even when it contains isolated specials.
|
||||
const markedTextRunArb: fc.Arbitrary<any> = fc.oneof(
|
||||
// Plain text.
|
||||
safeTextArb.map((t) => ({ type: 'text', text: t })),
|
||||
// Single formatting mark.
|
||||
fc
|
||||
.tuple(safeTextArb, fc.constantFrom('bold', 'italic', 'strike'))
|
||||
.map(([t, m]) => ({ type: 'text', text: t, marks: [{ type: m }] })),
|
||||
// Sole code mark (backtick span). safeTextArb is already backtick-free, so the
|
||||
// code span content cannot contain an inner backtick (which would be
|
||||
// ambiguous to re-parse).
|
||||
safeTextArb.map((t) => ({ type: 'text', text: t, marks: [{ type: 'code' }] })),
|
||||
// Link with safe text and a paren/space-free href, optionally with a title.
|
||||
// The title rides in a markdown link-title attribute; a purely numeric title
|
||||
// is coerced to a number and dropped on re-import (same class of quirk as the
|
||||
// image alt), so the title always carries at least one letter.
|
||||
fc
|
||||
.tuple(
|
||||
phraseArb,
|
||||
fc.webUrl().filter((u) => !/[()\s]/.test(u)),
|
||||
fc.option(letterPhraseArb, { nil: undefined }),
|
||||
)
|
||||
.map(([t, href, title]) => ({
|
||||
type: 'text',
|
||||
text: t,
|
||||
marks: [{ type: 'link', attrs: title ? { href, title } : { href } }],
|
||||
})),
|
||||
// Inline COMMENT anchor (SPEC §3): a span[data-comment-id] that must survive
|
||||
// the round-trip byte-for-byte. The commentId is an alphanumeric token (no
|
||||
// attribute-breaking chars), and `resolved` rides as data-resolved="true"
|
||||
// only when true — both forms were verified byte-stable.
|
||||
fc
|
||||
.tuple(safeTextArb, fc.stringMatching(/^[A-Za-z0-9]{4,10}$/), fc.boolean())
|
||||
.map(([t, commentId, resolved]) => ({
|
||||
type: 'text',
|
||||
text: t,
|
||||
marks: [
|
||||
{
|
||||
type: 'comment',
|
||||
attrs: resolved ? { commentId, resolved: true } : { commentId },
|
||||
},
|
||||
],
|
||||
})),
|
||||
);
|
||||
|
||||
// Inline math node carrying LaTeX that includes the `a < b` the task asks for.
|
||||
const mathInlineArb: fc.Arbitrary<any> = fc
|
||||
.constantFrom('a < b', 'x^2 + y^2', 'a < b < c', '\\frac{1}{2}', 'E = mc^2')
|
||||
.map((text) => ({ type: 'mathInline', attrs: { text } }));
|
||||
|
||||
// Mention node (schema attrs); label/id are plain phrases.
|
||||
const mentionArb: fc.Arbitrary<any> = fc
|
||||
.tuple(phraseArb, fc.uuid(), fc.uuid())
|
||||
.map(([label, id, entityId]) => ({
|
||||
type: 'mention',
|
||||
attrs: { id, label, entityType: 'user', entityId },
|
||||
}));
|
||||
|
||||
const hardBreakArb: fc.Arbitrary<any> = fc.constant({ type: 'hardBreak' });
|
||||
|
||||
// Canonicalize a generated inline-content array the way ProseMirror itself
|
||||
// stores inline content, then trim the markdown-fragile edges. Applied to both
|
||||
// paragraph and heading inline content.
|
||||
//
|
||||
// 1) MERGE adjacent `text` runs that carry IDENTICAL marks. A real
|
||||
// ProseMirror document never stores two neighbouring runs with the same
|
||||
// mark set — the editor coalesces them into one. A naive generator that
|
||||
// leaves them split produces UNREALISTIC docs AND breaks byte-stability:
|
||||
// three adjacent bold runs export as "**a****b****c**", whose inner
|
||||
// "****" boundaries are ambiguous and re-parse as a single "**abc**".
|
||||
// Merging makes the generated doc canonical and the markdown stable.
|
||||
// 2) Collapse CONSECUTIVE hard breaks. Two in a row render as " \n \n",
|
||||
// whose middle whitespace-only line marked treats as a paragraph break, so
|
||||
// "a \n \nb" re-parses to "a\n\nb". A SINGLE hard break round-trips.
|
||||
// 3) Drop a TRAILING hard break: "... \n" sits at the paragraph edge and is
|
||||
// removed by the converter's .trim().
|
||||
const sameMarks = (a: any[] | undefined, b: any[] | undefined): boolean =>
|
||||
JSON.stringify(a ?? []) === JSON.stringify(b ?? []);
|
||||
|
||||
function normalizeInline(nodes: any[]): any[] {
|
||||
const out: any[] = [];
|
||||
for (const node of nodes) {
|
||||
const prev = out[out.length - 1];
|
||||
// Collapse a second consecutive hard break.
|
||||
if (node.type === 'hardBreak' && prev && prev.type === 'hardBreak') {
|
||||
continue;
|
||||
}
|
||||
// Merge an adjacent text run with the same marks.
|
||||
if (
|
||||
node.type === 'text' &&
|
||||
prev &&
|
||||
prev.type === 'text' &&
|
||||
sameMarks(prev.marks, node.marks)
|
||||
) {
|
||||
prev.text += node.text;
|
||||
continue;
|
||||
}
|
||||
// Clone text nodes so the in-place merge above never mutates a shared value.
|
||||
out.push(node.type === 'text' ? { ...node } : node);
|
||||
}
|
||||
while (out.length > 1 && out[out.length - 1].type === 'hardBreak') {
|
||||
out.pop();
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Inline content for a paragraph: at least one marked text run, optionally with
|
||||
// inline atoms (math/mention) and hard breaks interspersed. Always starts with a
|
||||
// text run so the paragraph never opens with a block trigger.
|
||||
const inlineContentArb: fc.Arbitrary<any[]> = fc
|
||||
.tuple(
|
||||
markedTextRunArb,
|
||||
fc.array(
|
||||
fc.oneof(
|
||||
{ weight: 5, arbitrary: markedTextRunArb },
|
||||
{ weight: 1, arbitrary: mathInlineArb },
|
||||
{ weight: 1, arbitrary: mentionArb },
|
||||
{ weight: 1, arbitrary: hardBreakArb },
|
||||
),
|
||||
{ minLength: 0, maxLength: 4 },
|
||||
),
|
||||
)
|
||||
.map(([first, rest]) => normalizeInline([first, ...rest]));
|
||||
|
||||
// Inline content for a HEADING — identical to a paragraph's, but WITHOUT hard
|
||||
// breaks. A hard break inside an ATX heading ("# a \nb") is NOT byte-stable:
|
||||
// marked does not honour a hard break inside a heading, so it re-parses as the
|
||||
// heading "# a" plus a separate paragraph "b" (md2 = "# a\n\nb"). math/mention/
|
||||
// link inside a heading are fine (verified) and stay in the menu.
|
||||
const headingInlineContentArb: fc.Arbitrary<any[]> = fc
|
||||
.tuple(
|
||||
markedTextRunArb,
|
||||
fc.array(
|
||||
fc.oneof(
|
||||
{ weight: 5, arbitrary: markedTextRunArb },
|
||||
{ weight: 1, arbitrary: mathInlineArb },
|
||||
{ weight: 1, arbitrary: mentionArb },
|
||||
),
|
||||
{ minLength: 0, maxLength: 4 },
|
||||
),
|
||||
)
|
||||
.map(([first, rest]) => normalizeInline([first, ...rest]));
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Block arbitraries
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const paragraphArb: fc.Arbitrary<any> = inlineContentArb.map((content) => ({
|
||||
type: 'paragraph',
|
||||
content,
|
||||
}));
|
||||
|
||||
const headingArb: fc.Arbitrary<any> = fc
|
||||
.tuple(fc.integer({ min: 1, max: 6 }), headingInlineContentArb)
|
||||
.map(([level, content]) => ({ type: 'heading', attrs: { level }, content }));
|
||||
|
||||
// Code block content: 1..4 lines of plain phrases (may contain specials inline,
|
||||
// which are inert inside a fenced block). Language is optional and is a single
|
||||
// lowercase token.
|
||||
const codeBlockArb: fc.Arbitrary<any> = fc
|
||||
.tuple(
|
||||
fc.option(fc.constantFrom('js', 'ts', 'python', 'go', 'rust', 'bash'), {
|
||||
nil: '',
|
||||
}),
|
||||
fc
|
||||
.array(safeTextArb, { minLength: 1, maxLength: 4 })
|
||||
.map((lines) => lines.join('\n')),
|
||||
)
|
||||
.map(([language, code]) => ({
|
||||
type: 'codeBlock',
|
||||
attrs: { language },
|
||||
content: [{ type: 'text', text: code }],
|
||||
}));
|
||||
|
||||
const blockquoteArb: fc.Arbitrary<any> = paragraphArb.map((p) => ({
|
||||
type: 'blockquote',
|
||||
content: [p],
|
||||
}));
|
||||
|
||||
const horizontalRuleArb: fc.Arbitrary<any> = fc.constant({
|
||||
type: 'horizontalRule',
|
||||
});
|
||||
|
||||
// Callout: ONE paragraph child; type restricted to the four the schema knows.
|
||||
const calloutArb: fc.Arbitrary<any> = fc
|
||||
.tuple(
|
||||
fc.constantFrom('info', 'success', 'warning', 'danger'),
|
||||
paragraphArb,
|
||||
)
|
||||
.map(([type, p]) => ({ type: 'callout', attrs: { type }, content: [p] }));
|
||||
|
||||
const mathBlockArb: fc.Arbitrary<any> = fc
|
||||
.constantFrom('a < b', 'a < b < c', '\\sum_{i=0}^{n} i', 'x = \\frac{-b}{2a}', '')
|
||||
.map((text) => ({ type: 'mathBlock', attrs: { text } }));
|
||||
|
||||
const imageArb: fc.Arbitrary<any> = fc
|
||||
.tuple(
|
||||
fc.webUrl(),
|
||||
// alt is a letter-bearing phrase OR empty. Brackets/parens leak into the
|
||||
// markdown image syntax (not byte-stable) so they are excluded, and a purely
|
||||
// numeric alt is coerced to a number and dropped (see letterPhraseArb), so
|
||||
// alt always carries at least one letter when non-empty.
|
||||
fc.option(letterPhraseArb, { nil: '' }),
|
||||
)
|
||||
.map(([src, alt]) => ({ type: 'image', attrs: { src, alt } }));
|
||||
|
||||
// A simple list item: ONE paragraph, optionally followed by ONE nested bullet
|
||||
// list (single level of nesting). depth controls whether nesting is allowed.
|
||||
function listItemArb(allowNest: boolean): fc.Arbitrary<any> {
|
||||
if (!allowNest) {
|
||||
return paragraphArb.map((p) => ({ type: 'listItem', content: [p] }));
|
||||
}
|
||||
return fc
|
||||
.tuple(
|
||||
paragraphArb,
|
||||
fc.option(
|
||||
fc.array(
|
||||
paragraphArb.map((p) => ({ type: 'listItem', content: [p] })),
|
||||
{ minLength: 1, maxLength: 3 },
|
||||
),
|
||||
{ nil: undefined },
|
||||
),
|
||||
)
|
||||
.map(([p, nested]) => ({
|
||||
type: 'listItem',
|
||||
content: nested
|
||||
? [p, { type: 'bulletList', content: nested }]
|
||||
: [p],
|
||||
}));
|
||||
}
|
||||
|
||||
const bulletListArb: fc.Arbitrary<any> = fc
|
||||
.array(listItemArb(true), { minLength: 1, maxLength: 4 })
|
||||
.map((items) => ({ type: 'bulletList', content: items }));
|
||||
|
||||
const orderedListArb: fc.Arbitrary<any> = fc
|
||||
.array(listItemArb(true), { minLength: 1, maxLength: 4 })
|
||||
.map((items) => ({ type: 'orderedList', content: items }));
|
||||
|
||||
// Task item: ONE paragraph, optional ONE nested bullet list.
|
||||
const taskItemArb: fc.Arbitrary<any> = fc
|
||||
.tuple(
|
||||
fc.boolean(),
|
||||
paragraphArb,
|
||||
fc.option(
|
||||
fc.array(listItemArb(false), { minLength: 1, maxLength: 2 }),
|
||||
{ nil: undefined },
|
||||
),
|
||||
)
|
||||
.map(([checked, p, nested]) => ({
|
||||
type: 'taskItem',
|
||||
attrs: { checked },
|
||||
content: nested ? [p, { type: 'bulletList', content: nested }] : [p],
|
||||
}));
|
||||
|
||||
const taskListArb: fc.Arbitrary<any> = fc
|
||||
.array(taskItemArb, { minLength: 1, maxLength: 4 })
|
||||
.map((items) => ({ type: 'taskList', content: items }));
|
||||
|
||||
// GFM table: a header row + 1..3 body rows, with a fixed column count (1..3) and
|
||||
// per-column alignment. Cells hold a single short paragraph of safe text.
|
||||
const tableArb: fc.Arbitrary<any> = fc
|
||||
.integer({ min: 1, max: 3 })
|
||||
.chain((cols) => {
|
||||
const cellArb = (header: boolean, align?: string) =>
|
||||
phraseArb.map((t) => ({
|
||||
type: header ? 'tableHeader' : 'tableCell',
|
||||
attrs: align ? { align } : {},
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: t }] }],
|
||||
}));
|
||||
const alignsArb = fc.array(
|
||||
fc.constantFrom(undefined, 'left', 'center', 'right'),
|
||||
{ minLength: cols, maxLength: cols },
|
||||
);
|
||||
return fc
|
||||
.tuple(
|
||||
alignsArb,
|
||||
fc.array(
|
||||
fc.constant(null), // body-row placeholders; cells filled below
|
||||
{ minLength: 1, maxLength: 3 },
|
||||
),
|
||||
)
|
||||
.chain(([aligns, bodyRows]) => {
|
||||
const headerRow = fc
|
||||
.tuple(...aligns.map((a) => cellArb(true, a)))
|
||||
.map((cells) => ({ type: 'tableRow', content: cells }));
|
||||
const bodyRowArbs = bodyRows.map(() =>
|
||||
fc
|
||||
.tuple(...aligns.map(() => cellArb(false)))
|
||||
.map((cells) => ({ type: 'tableRow', content: cells })),
|
||||
);
|
||||
return fc
|
||||
.tuple(headerRow, fc.tuple(...bodyRowArbs))
|
||||
.map(([h, body]) => ({ type: 'table', content: [h, ...body] }));
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Top-level document arbitrary
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// The full menu of block nodes that are byte-stable when SEQUENCED with other
|
||||
// blocks. NOTE: `image` is deliberately NOT in this menu — see the dedicated
|
||||
// image tests below. The Docmost `image` node is BLOCK-level, but its markdown
|
||||
// form `` is INLINE; marked wraps it in a <p>, the schema then hoists
|
||||
// the block <img> out and leaves an EMPTY paragraph beside it, so on the second
|
||||
// export the stray empty paragraph injects extra blank lines between siblings
|
||||
// ("p\n\n\n\nq" -> "p\n\n\n\n\n\nq"). An image is only byte-stable
|
||||
// when it is the SOLE block (the edge artifacts get .trim()'d away). It is
|
||||
// therefore covered by its own targeted tests, not mixed into multi-block docs.
|
||||
const blockArb: fc.Arbitrary<any> = fc.oneof(
|
||||
{ weight: 6, arbitrary: paragraphArb },
|
||||
{ weight: 3, arbitrary: headingArb },
|
||||
{ weight: 2, arbitrary: codeBlockArb },
|
||||
{ weight: 2, arbitrary: bulletListArb },
|
||||
{ weight: 2, arbitrary: orderedListArb },
|
||||
{ weight: 2, arbitrary: taskListArb },
|
||||
{ weight: 2, arbitrary: blockquoteArb },
|
||||
{ weight: 2, arbitrary: tableArb },
|
||||
{ weight: 2, arbitrary: calloutArb },
|
||||
{ weight: 1, arbitrary: horizontalRuleArb },
|
||||
{ weight: 1, arbitrary: mathBlockArb },
|
||||
);
|
||||
|
||||
const LIST_TYPES = new Set(['bulletList', 'orderedList', 'taskList']);
|
||||
|
||||
// A bounded document: 1..8 block nodes. Kept small so each run is cheap (each
|
||||
// run does a real marked + jsdom parse) and shrinking stays fast.
|
||||
//
|
||||
// Post-process: never let two LIST blocks sit directly adjacent. Two sibling
|
||||
// lists that share a marker family — bullet/task both use "-", ordered uses
|
||||
// "1." — are MERGED by markdown into a single list when only a blank line
|
||||
// separates them ("- a\n\n- b" -> one list -> "- a\n- b"), which is not
|
||||
// byte-stable. (A non-list block between two lists separates them fine, as does
|
||||
// a different marker family, but dropping every back-to-back list is the clean,
|
||||
// always-correct rule.) We drop a list block whenever the previously kept block
|
||||
// is also a list.
|
||||
const docArb: fc.Arbitrary<any> = fc
|
||||
.array(blockArb, { minLength: 1, maxLength: 8 })
|
||||
.map((content) => {
|
||||
const out: any[] = [];
|
||||
for (const block of content) {
|
||||
const prev = out[out.length - 1];
|
||||
if (
|
||||
prev &&
|
||||
LIST_TYPES.has(prev.type) &&
|
||||
LIST_TYPES.has(block.type)
|
||||
) {
|
||||
continue; // skip a list that would sit right after another list
|
||||
}
|
||||
out.push(block);
|
||||
}
|
||||
// Guarantee a non-empty document even if filtering removed everything but a
|
||||
// single dropped block (cannot happen here since the first block is always
|
||||
// kept, but keep the invariant explicit).
|
||||
return { type: 'doc', content: out.length ? out : content.slice(0, 1) };
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// The properties
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('markdown <-> ProseMirror round-trip (property-based)', () => {
|
||||
it('the generator covers every targeted node type at least once', () => {
|
||||
// A sanity check that the arbitrary actually exercises the intended node
|
||||
// variety within NUM_RUNS — not a correctness property, just coverage.
|
||||
const seen = new Set<string>();
|
||||
const collect = (node: any) => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
if (node.type) seen.add(node.type);
|
||||
for (const m of node.marks ?? []) seen.add(`mark:${m.type}`);
|
||||
for (const c of node.content ?? []) collect(c);
|
||||
};
|
||||
fc.assert(
|
||||
fc.property(docArb, (doc) => {
|
||||
collect(doc);
|
||||
return true;
|
||||
}),
|
||||
{ numRuns: NUM_RUNS, seed: SEED },
|
||||
);
|
||||
// Core block types and marks we expect to appear.
|
||||
for (const t of [
|
||||
'paragraph',
|
||||
'heading',
|
||||
'codeBlock',
|
||||
'bulletList',
|
||||
'orderedList',
|
||||
'taskList',
|
||||
'blockquote',
|
||||
'table',
|
||||
'callout',
|
||||
'horizontalRule',
|
||||
'mathBlock',
|
||||
// 'image' is covered by its own dedicated tests, not docArb.
|
||||
'mention',
|
||||
'mathInline',
|
||||
'hardBreak',
|
||||
'mark:bold',
|
||||
'mark:italic',
|
||||
'mark:strike',
|
||||
'mark:code',
|
||||
'mark:link',
|
||||
'mark:comment',
|
||||
]) {
|
||||
expect(seen, `expected the generator to produce ${t}`).toContain(t);
|
||||
}
|
||||
});
|
||||
|
||||
it('markdown is byte-stable across export -> import -> export', async () => {
|
||||
// The property git needs: a second export reproduces the first byte-for-byte.
|
||||
await fc.assert(
|
||||
fc.asyncProperty(docArb, async (doc) => {
|
||||
const { md1, md2 } = await roundTrip(doc);
|
||||
expect(md2).toBe(md1);
|
||||
}),
|
||||
{ numRuns: NUM_RUNS, seed: SEED },
|
||||
);
|
||||
});
|
||||
|
||||
it('the document is semantically stable on a second cycle (ids stripped)', async () => {
|
||||
// Optional, stronger-feeling property. We do NOT compare doc vs doc2: the
|
||||
// converter reconstructs schema default attrs on the FIRST import (a known
|
||||
// SPEC §11 divergence). But once the markdown is byte-stable, importing the
|
||||
// SAME markdown twice must yield structurally identical docs (modulo the
|
||||
// regenerated block ids). So we compare doc2 (import of md1) with doc3
|
||||
// (import of md2 == md1) after stripping ids.
|
||||
await fc.assert(
|
||||
fc.asyncProperty(docArb, async (doc) => {
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
// Guard: this property only makes sense when md is byte-stable.
|
||||
expect(md2).toBe(md1);
|
||||
const doc3 = await markdownToProseMirror(md2);
|
||||
expect(stripBlockIds(doc3)).toEqual(stripBlockIds(doc2));
|
||||
}),
|
||||
{ numRuns: NUM_RUNS, seed: SEED },
|
||||
);
|
||||
});
|
||||
|
||||
it('a SOLE image block is byte-stable', async () => {
|
||||
// An image is byte-stable when it is the only block in the document: the
|
||||
// stray empty paragraph the schema leaves beside the hoisted block <img>
|
||||
// sits at a document edge and is removed by the converter's final .trim().
|
||||
await fc.assert(
|
||||
fc.asyncProperty(imageArb, async (image) => {
|
||||
const doc = { type: 'doc', content: [image] };
|
||||
const { md1, md2 } = await roundTrip(doc);
|
||||
expect(md2).toBe(md1);
|
||||
}),
|
||||
{ numRuns: NUM_RUNS, seed: SEED },
|
||||
);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// KNOWN, DOCUMENTED non-roundtrip bug #2 (kept honest as it.fails).
|
||||
//
|
||||
// BUG: a block-level `image` placed BETWEEN other blocks is not byte-stable.
|
||||
// The Docmost image node is BLOCK-level but its markdown form `` is
|
||||
// INLINE. marked wraps the inline image in a <p>; the schema then hoists the
|
||||
// block <img> out of that <p>, leaving an EMPTY paragraph as a sibling. On the
|
||||
// second export that empty paragraph renders as "" and the "\n\n" doc join
|
||||
// injects an extra blank gap:
|
||||
// "p\n\n\n\nq" -> "p\n\n\n\n\n\nq" (=> md2 !== md1).
|
||||
// Minimal repro doc:
|
||||
// { type:'doc', content:[
|
||||
// { type:'paragraph', content:[{type:'text',text:'p'}] },
|
||||
// { type:'image', attrs:{ src:'http://a.aa', alt:'x' } },
|
||||
// { type:'paragraph', content:[{type:'text',text:'q'}] } ] }
|
||||
// Not "fixed" — the source must not change; documented and exercised here.
|
||||
// -------------------------------------------------------------------------
|
||||
it.fails('BUG: a block image between other blocks is not byte-stable', async () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'p' }] },
|
||||
{ type: 'image', attrs: { src: 'http://a.aa', alt: 'x' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'q' }] },
|
||||
],
|
||||
};
|
||||
const { md1, md2 } = await roundTrip(doc);
|
||||
expect(md2).toBe(md1);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// KNOWN, DOCUMENTED non-roundtrip bug #1 (kept honest as it.fails).
|
||||
//
|
||||
// BUG: the `code` mark combined with ANY other mark does NOT round-trip.
|
||||
// The converter emits nested HTML so the output is well-formed, e.g.
|
||||
// marks [code, bold] -> md1 = "<strong><code>x</code></strong>"
|
||||
// but the schema's `code` mark declares `excludes: "_"`, so on import the
|
||||
// co-occurring mark is dropped and the run comes back as code-only:
|
||||
// md2 = "`x`" (=> md2 !== md1).
|
||||
// Minimal repro doc:
|
||||
// { type:'doc', content:[ { type:'paragraph', content:[
|
||||
// { type:'text', text:'x', marks:[{type:'code'},{type:'bold'}] } ] } ] }
|
||||
// This is acknowledged in markdown-converter.ts (the long comment above the
|
||||
// marks switch): preserving both marks is impossible while `code` excludes
|
||||
// them. Documented here, not "fixed", because the source must not change.
|
||||
// -------------------------------------------------------------------------
|
||||
it.fails(
|
||||
'BUG: code mark combined with another mark is not byte-stable',
|
||||
async () => {
|
||||
const codeComboArb = fc
|
||||
.tuple(safeTextArb, fc.constantFrom('bold', 'italic', 'strike'))
|
||||
.map(([t, other]) => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: t, marks: [{ type: 'code' }, { type: other }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
}));
|
||||
await fc.assert(
|
||||
fc.asyncProperty(codeComboArb, async (doc) => {
|
||||
const { md1, md2 } = await roundTrip(doc);
|
||||
expect(md2).toBe(md1);
|
||||
}),
|
||||
{ numRuns: 20, seed: SEED },
|
||||
);
|
||||
},
|
||||
);
|
||||
});
|
||||
268
packages/git-sync/test/node-ops-extra.test.ts
Normal file
268
packages/git-sync/test/node-ops-extra.test.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import fc from 'fast-check';
|
||||
import {
|
||||
getNodeByRef,
|
||||
replaceNodeById,
|
||||
insertNodeRelative,
|
||||
insertTableRow,
|
||||
updateTableCell,
|
||||
sanitizeForYjs,
|
||||
findUnstorableAttr,
|
||||
buildOutline,
|
||||
} from '../src/lib/node-ops.js';
|
||||
|
||||
// Gaps NOT covered by node-ops.test.ts (test-strategy report §2). The base file
|
||||
// is comprehensive; these add only the missing edges: newNode-arg immutability,
|
||||
// anchor-is-container routing, malformed opts, ragged/empty/no-colwidth/non-int
|
||||
// insertTableRow, getNodeByRef non-object/#-1, updateTableCell empty-id refresh,
|
||||
// outline 100/40 boundary, malformed marks, and the makeFreshId property.
|
||||
|
||||
const text = (value: string, marks?: any[]): any => {
|
||||
const node: any = { type: 'text', text: value };
|
||||
if (marks) node.marks = marks;
|
||||
return node;
|
||||
};
|
||||
const para = (id: string, value = ''): any => ({
|
||||
type: 'paragraph',
|
||||
attrs: { id, indent: 0 },
|
||||
content: value ? [text(value)] : [],
|
||||
});
|
||||
const cell = (
|
||||
type: 'tableCell' | 'tableHeader',
|
||||
paraId: string | null,
|
||||
value = '',
|
||||
extraAttrs: Record<string, any> = {},
|
||||
): any => ({
|
||||
type,
|
||||
attrs: { colspan: 1, rowspan: 1, ...extraAttrs },
|
||||
content: paraId == null ? [] : [para(paraId, value)],
|
||||
});
|
||||
const row = (cells: any[]): any => ({ type: 'tableRow', content: cells });
|
||||
const doc = (...content: any[]): any => ({ type: 'doc', content });
|
||||
|
||||
// ===========================================================================
|
||||
describe('replaceNodeById — newNode ARGUMENT immutability', () => {
|
||||
it('does not mutate the caller-supplied newNode after replacement', () => {
|
||||
// The doc-argument immutability is covered in the base file; this pins the
|
||||
// OTHER input — the replacement node must be deep-cloned, so mutating the
|
||||
// result never reaches the caller's newNode (and vice versa).
|
||||
const d = doc(para('p0', 'old'), para('p1', 'old2'));
|
||||
const newNode = { type: 'paragraph', attrs: { id: 'new' }, content: [text('NEW')] };
|
||||
const snapshot = structuredClone(newNode);
|
||||
const res = replaceNodeById(d, 'p0', newNode);
|
||||
// Mutating the inserted copy must not touch the argument...
|
||||
res.doc.content[0].content.push(text('mutated'));
|
||||
expect(newNode).toEqual(snapshot);
|
||||
// ...and mutating the argument afterwards must not touch the inserted copy.
|
||||
newNode.content.push(text('later'));
|
||||
expect(res.doc.content[0].content).toEqual([text('NEW'), text('mutated')]);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('insertNodeRelative — container routing and malformed opts', () => {
|
||||
it('routes a structural row when anchorText resolves to the TABLE block itself', () => {
|
||||
// anchorText only scans top-level blocks, so it resolves to the whole table;
|
||||
// the matched container IS the anchor (containerIdx === chain.length-1), so
|
||||
// a row "after" must be appended inside the table, not spliced beside a row.
|
||||
const table = { type: 'table', content: [row([cell('tableCell', 'r0', 'hello cell')])] };
|
||||
const newRow = row([cell('tableCell', 'rNew', 'NEW')]);
|
||||
const res = insertNodeRelative(doc(table), newRow, {
|
||||
position: 'after',
|
||||
anchorText: 'hello cell',
|
||||
});
|
||||
expect(res.inserted).toBe(true);
|
||||
const firstCellId = (r: any) => r.content[0].content[0].attrs.id;
|
||||
expect(res.doc.content[0].content.map(firstCellId)).toEqual(['r0', 'rNew']);
|
||||
});
|
||||
|
||||
it('prepends a structural row when anchorText resolves to the table and position is "before"', () => {
|
||||
const table = { type: 'table', content: [row([cell('tableCell', 'r0', 'hello cell')])] };
|
||||
const newRow = row([cell('tableCell', 'rNew', 'NEW')]);
|
||||
const res = insertNodeRelative(doc(table), newRow, {
|
||||
position: 'before',
|
||||
anchorText: 'hello cell',
|
||||
});
|
||||
const firstCellId = (r: any) => r.content[0].content[0].attrs.id;
|
||||
expect(res.doc.content[0].content.map(firstCellId)).toEqual(['rNew', 'r0']);
|
||||
});
|
||||
|
||||
it('is a no-op (inserted:false) for a malformed opts object', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = insertNodeRelative(d, para('n'), null as any);
|
||||
expect(res.inserted).toBe(false);
|
||||
expect(res.doc).toEqual(d);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('insertTableRow — column count and index edge cases', () => {
|
||||
const ragged = () => ({
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H0')]), // 1 col
|
||||
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]), // 2 cols
|
||||
],
|
||||
});
|
||||
|
||||
it('derives the column count from the WIDEST row (ragged table)', () => {
|
||||
// The guard counts against the widest row (2), so 3 cells throws...
|
||||
expect(() => insertTableRow(doc(ragged()), '#0', ['X', 'Y', 'Z'])).toThrow(
|
||||
/got 3 cell\(s\) but the table has 2 column\(s\)/,
|
||||
);
|
||||
// ...and a 2-cell row is padded to the widest width (2), not the header's 1.
|
||||
const res = insertTableRow(doc(ragged()), '#0', ['X', 'Y']);
|
||||
expect(res.doc.content[0].content[2].content).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('an EMPTY table falls back to the supplied cell count', () => {
|
||||
const res = insertTableRow(doc({ type: 'table', content: [] }), '#0', ['A', 'B']);
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(res.doc.content[0].content[0].content).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('omits colwidth entirely when the header cell has none (no undefined leak)', () => {
|
||||
const noColwidth = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H')]),
|
||||
row([cell('tableCell', 'c0', 'A')]),
|
||||
],
|
||||
};
|
||||
const res = insertTableRow(doc(noColwidth), '#0', ['X']);
|
||||
const newCellAttrs = res.doc.content[0].content[2].content[0].attrs;
|
||||
expect('colwidth' in newCellAttrs).toBe(false); // not colwidth:undefined
|
||||
});
|
||||
|
||||
it('APPENDS for a non-integer or negative index (does not throw)', () => {
|
||||
const t = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H')]),
|
||||
row([cell('tableCell', 'c0', 'A')]),
|
||||
],
|
||||
};
|
||||
const frac = insertTableRow(doc(t), '#0', ['X'], 1.5);
|
||||
expect(frac.inserted).toBe(true);
|
||||
expect(frac.doc.content[0].content).toHaveLength(3); // appended at the end
|
||||
const neg = insertTableRow(doc(t), '#0', ['X'], -1);
|
||||
expect(neg.doc.content[0].content).toHaveLength(3);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('getNodeByRef — malformed refs', () => {
|
||||
it('returns null for a non-object block at a valid #n index', () => {
|
||||
const d = { type: 'doc', content: [null] };
|
||||
expect(getNodeByRef(d, '#0')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for "#-1" (the index regex does not match a negative)', () => {
|
||||
const d = doc(para('p0'));
|
||||
// "#-1" matches neither the "#<digits>" form nor any block id -> null.
|
||||
expect(getNodeByRef(d, '#-1')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('updateTableCell — fresh id when the first paragraph has an empty id', () => {
|
||||
it('mints a fresh id when the existing first paragraph id is the empty string', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H')]),
|
||||
row([
|
||||
{
|
||||
type: 'tableCell',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [{ type: 'paragraph', attrs: { id: '' }, content: [text('old')] }],
|
||||
},
|
||||
]),
|
||||
],
|
||||
};
|
||||
const res = updateTableCell(doc(table), '#0', 1, 0, 'new');
|
||||
const newId = res.doc.content[0].content[1].content[0].content[0].attrs.id;
|
||||
// An empty id is treated as missing -> a fresh Docmost-style id is minted.
|
||||
expect(newId).toMatch(/^[a-z0-9]{12}$/);
|
||||
expect(newId).not.toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('buildOutline — exact 100 / 40 char truncation boundaries', () => {
|
||||
it('does NOT truncate firstText at exactly 100 chars but DOES at 101', () => {
|
||||
const at100 = buildOutline(doc(para('p', 'x'.repeat(100))));
|
||||
expect(at100[0].firstText).toBe('x'.repeat(100)); // boundary: not cut
|
||||
expect(at100[0].firstText.endsWith('…')).toBe(false);
|
||||
const at101 = buildOutline(doc(para('p', 'x'.repeat(101))));
|
||||
expect(at101[0].firstText).toBe('x'.repeat(100) + '…'); // first char over the cap
|
||||
});
|
||||
|
||||
it('does NOT truncate a header cell at exactly 40 chars but DOES at 41', () => {
|
||||
const tableAt40 = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableHeader', 'h', 'y'.repeat(40))])],
|
||||
};
|
||||
expect(buildOutline(doc(tableAt40))[0].header).toEqual(['y'.repeat(40)]);
|
||||
const tableAt41 = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableHeader', 'h', 'y'.repeat(41))])],
|
||||
};
|
||||
expect(buildOutline(doc(tableAt41))[0].header).toEqual(['y'.repeat(40) + '…']);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('sanitizeForYjs / findUnstorableAttr — malformed marks array', () => {
|
||||
const malformed = () =>
|
||||
doc({
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p' },
|
||||
content: [
|
||||
text('x', [null, { type: 'link', attrs: { href: 'u', gone: undefined } }]),
|
||||
],
|
||||
});
|
||||
|
||||
it('sanitizeForYjs skips a null mark and strips undefined on the real one', () => {
|
||||
const res = sanitizeForYjs(malformed());
|
||||
const marks = res.content[0].content[0].marks;
|
||||
expect(marks[0]).toBeNull(); // the null mark is left untouched, not crashed on
|
||||
expect(marks[1].attrs).toEqual({ href: 'u' }); // undefined dropped
|
||||
});
|
||||
|
||||
it('findUnstorableAttr skips a null mark and reports the real undefined attr path', () => {
|
||||
expect(findUnstorableAttr(malformed())).toBe(
|
||||
'content[0].content[0].marks[1].attrs.gone (undefined)',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
describe('makeFreshId — format and uniqueness (property, via insertTableRow)', () => {
|
||||
it('every minted cell-paragraph id matches ^[a-z0-9]{12}$ and is globally unique', () => {
|
||||
fc.assert(
|
||||
fc.property(fc.integer({ min: 1, max: 5 }), (cols) => {
|
||||
// Build an empty-id table of `cols` columns; the inserted row mints a
|
||||
// fresh id per cell. The doc carries one pre-existing id to also assert
|
||||
// the new ids never collide with it.
|
||||
const headerCells = Array.from({ length: cols }, (_, i) =>
|
||||
cell('tableHeader', `pre-${i}`, `H${i}`),
|
||||
);
|
||||
const d = doc({ type: 'table', content: [row(headerCells)] });
|
||||
const res = insertTableRow(d, '#0', Array.from({ length: cols }, () => 'v'), 1);
|
||||
const ids = res.doc.content[0].content[1].content.map(
|
||||
(c: any) => c.content[0].attrs.id,
|
||||
);
|
||||
for (const id of ids) {
|
||||
expect(id).toMatch(/^[a-z0-9]{12}$/);
|
||||
}
|
||||
// Unique within the new row AND distinct from the pre-existing ids.
|
||||
expect(new Set(ids).size).toBe(ids.length);
|
||||
for (const id of ids) {
|
||||
expect(id.startsWith('pre-')).toBe(false);
|
||||
}
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
908
packages/git-sync/test/node-ops.test.ts
Normal file
908
packages/git-sync/test/node-ops.test.ts
Normal file
@@ -0,0 +1,908 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
blockPlainText,
|
||||
buildOutline,
|
||||
getNodeByRef,
|
||||
replaceNodeById,
|
||||
deleteNodeById,
|
||||
sanitizeForYjs,
|
||||
findUnstorableAttr,
|
||||
insertNodeRelative,
|
||||
readTable,
|
||||
insertTableRow,
|
||||
deleteTableRow,
|
||||
updateTableCell,
|
||||
} from '../src/lib/node-ops.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tiny ProseMirror/TipTap JSON fixture builders. These produce the exact plain
|
||||
// JSON shape Docmost uses: { type, attrs?, content?, text?, marks? }.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A text leaf node, optionally carrying marks. */
|
||||
function text(value: string, marks?: any[]): any {
|
||||
const node: any = { type: 'text', text: value };
|
||||
if (marks) node.marks = marks;
|
||||
return node;
|
||||
}
|
||||
|
||||
/** A paragraph block with an id and a single text child (or empty). */
|
||||
function para(id: string, value = ''): any {
|
||||
return {
|
||||
type: 'paragraph',
|
||||
attrs: { id, indent: 0 },
|
||||
content: value ? [text(value)] : [],
|
||||
};
|
||||
}
|
||||
|
||||
/** A heading block. */
|
||||
function heading(id: string, level: number, value: string): any {
|
||||
return {
|
||||
type: 'heading',
|
||||
attrs: { id, level },
|
||||
content: [text(value)],
|
||||
};
|
||||
}
|
||||
|
||||
/** A table cell (or header) wrapping a single paragraph; extra attrs merged in. */
|
||||
function cell(
|
||||
type: 'tableCell' | 'tableHeader',
|
||||
paraId: string | null,
|
||||
value = '',
|
||||
extraAttrs: Record<string, any> = {},
|
||||
): any {
|
||||
const attrs = { colspan: 1, rowspan: 1, ...extraAttrs };
|
||||
return {
|
||||
type,
|
||||
attrs,
|
||||
content: paraId == null ? [] : [para(paraId, value)],
|
||||
};
|
||||
}
|
||||
|
||||
/** A table row. */
|
||||
function row(cells: any[]): any {
|
||||
return { type: 'tableRow', content: cells };
|
||||
}
|
||||
|
||||
/** A doc root with the given top-level blocks. */
|
||||
function doc(...content: any[]): any {
|
||||
return { type: 'doc', content };
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// blockPlainText
|
||||
// ===========================================================================
|
||||
describe('blockPlainText', () => {
|
||||
it('returns the text of a plain text node', () => {
|
||||
expect(blockPlainText(text('hello'))).toBe('hello');
|
||||
});
|
||||
|
||||
it('concatenates text from nested containers', () => {
|
||||
const node = {
|
||||
type: 'paragraph',
|
||||
content: [text('foo'), text('bar'), { type: 'span', content: [text('baz')] }],
|
||||
};
|
||||
expect(blockPlainText(node)).toBe('foobarbaz');
|
||||
});
|
||||
|
||||
it('returns "" for nullish or non-object inputs', () => {
|
||||
expect(blockPlainText(null)).toBe('');
|
||||
expect(blockPlainText(undefined)).toBe('');
|
||||
expect(blockPlainText('a string')).toBe('');
|
||||
expect(blockPlainText(42)).toBe('');
|
||||
expect(blockPlainText([text('x')])).toBe(''); // arrays are not objects here
|
||||
});
|
||||
|
||||
it('uses BOTH text and nested content of a node, text first', () => {
|
||||
const node = { type: 'weird', text: 'A', content: [text('B'), text('C')] };
|
||||
expect(blockPlainText(node)).toBe('ABC');
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// buildOutline
|
||||
// ===========================================================================
|
||||
describe('buildOutline', () => {
|
||||
it('captures heading level, id and firstText', () => {
|
||||
const outline = buildOutline(doc(heading('h1', 2, 'Title')));
|
||||
expect(outline).toEqual([
|
||||
{ index: 0, type: 'heading', id: 'h1', firstText: 'Title', level: 2 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('reports table rows/cols and header texts (cols from row 0)', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'a', 'H1'), cell('tableHeader', 'b', 'H2')]),
|
||||
row([cell('tableCell', 'c', 'x'), cell('tableCell', 'd', 'y')]),
|
||||
],
|
||||
};
|
||||
const [entry] = buildOutline(doc(table));
|
||||
expect(entry.type).toBe('table');
|
||||
expect(entry.rows).toBe(2);
|
||||
expect(entry.cols).toBe(2);
|
||||
expect(entry.header).toEqual(['H1', 'H2']);
|
||||
});
|
||||
|
||||
it('derives cols from row 0 for a ragged table', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'a', 'H1')]), // row 0 has 1 col
|
||||
row([cell('tableCell', 'b', 'x'), cell('tableCell', 'c', 'y')]), // 2 cols
|
||||
],
|
||||
};
|
||||
const [entry] = buildOutline(doc(table));
|
||||
expect(entry.rows).toBe(2);
|
||||
expect(entry.cols).toBe(1); // cols reflect ONLY row 0
|
||||
expect(entry.header).toEqual(['H1']);
|
||||
});
|
||||
|
||||
it('reports item count for any *List block', () => {
|
||||
const list = {
|
||||
type: 'bulletList',
|
||||
attrs: { id: 'l1' },
|
||||
content: [{ type: 'listItem' }, { type: 'listItem' }, { type: 'listItem' }],
|
||||
};
|
||||
const [entry] = buildOutline(doc(list));
|
||||
expect(entry.type).toBe('bulletList');
|
||||
expect(entry.items).toBe(3);
|
||||
});
|
||||
|
||||
it('returns [] for an empty or non-object doc', () => {
|
||||
expect(buildOutline(null)).toEqual([]);
|
||||
expect(buildOutline({ type: 'doc' })).toEqual([]); // no content array
|
||||
expect(buildOutline({ type: 'doc', content: [] })).toEqual([]);
|
||||
expect(buildOutline('nope')).toEqual([]);
|
||||
});
|
||||
|
||||
it('falls back to null id when a block has no attrs.id', () => {
|
||||
const [entry] = buildOutline(doc({ type: 'paragraph', content: [text('hi')] }));
|
||||
expect(entry.id).toBeNull();
|
||||
expect(entry.firstText).toBe('hi');
|
||||
});
|
||||
|
||||
it('truncates firstText to 100 chars with an ellipsis', () => {
|
||||
const long = 'x'.repeat(150);
|
||||
const [entry] = buildOutline(doc(para('p', long)));
|
||||
expect(entry.firstText).toBe('x'.repeat(100) + '…');
|
||||
expect(entry.firstText.length).toBe(101); // 100 chars + ellipsis
|
||||
});
|
||||
|
||||
it('truncates table header cell text to 40 chars', () => {
|
||||
const long = 'y'.repeat(60);
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableHeader', 'a', long)])],
|
||||
};
|
||||
const [entry] = buildOutline(doc(table));
|
||||
expect(entry.header).toEqual(['y'.repeat(40) + '…']);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// getNodeByRef
|
||||
// ===========================================================================
|
||||
describe('getNodeByRef', () => {
|
||||
it('resolves a top-level block by #n', () => {
|
||||
const d = doc(para('p0', 'zero'), para('p1', 'one'));
|
||||
const hit = getNodeByRef(d, '#1');
|
||||
expect(hit).not.toBeNull();
|
||||
expect(hit!.path).toEqual([1]);
|
||||
expect(hit!.type).toBe('paragraph');
|
||||
expect(hit!.node.attrs.id).toBe('p1');
|
||||
});
|
||||
|
||||
it('returns null for #n out of range', () => {
|
||||
const d = doc(para('p0'));
|
||||
expect(getNodeByRef(d, '#5')).toBeNull();
|
||||
expect(getNodeByRef(d, '#1')).toBeNull();
|
||||
});
|
||||
|
||||
it('finds a nested node by id with the correct path', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', 'deep', 'found me')])],
|
||||
};
|
||||
const d = doc(para('p0'), table);
|
||||
const hit = getNodeByRef(d, 'deep');
|
||||
expect(hit).not.toBeNull();
|
||||
// doc.content[1] -> table.content[0] -> row.content[0] -> cell.content[0]
|
||||
expect(hit!.path).toEqual([1, 0, 0, 0]);
|
||||
expect(hit!.type).toBe('paragraph');
|
||||
});
|
||||
|
||||
it('returns null when the id is not found', () => {
|
||||
const d = doc(para('p0'));
|
||||
expect(getNodeByRef(d, 'missing')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns the FIRST node for a duplicate id', () => {
|
||||
const d = doc(para('dup', 'first'), para('dup', 'second'));
|
||||
const hit = getNodeByRef(d, 'dup');
|
||||
expect(hit!.path).toEqual([0]);
|
||||
expect(blockPlainText(hit!.node)).toBe('first');
|
||||
});
|
||||
|
||||
it('returns null for a non-object doc', () => {
|
||||
expect(getNodeByRef(null, '#0')).toBeNull();
|
||||
expect(getNodeByRef('x', 'id')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns a CLONE — mutating it does not touch the input doc', () => {
|
||||
const d = doc(para('p0', 'orig'));
|
||||
const snapshot = structuredClone(d);
|
||||
const hit = getNodeByRef(d, 'p0');
|
||||
hit!.node.attrs.id = 'mutated';
|
||||
hit!.node.content.push(text('extra'));
|
||||
expect(d).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// replaceNodeById
|
||||
// ===========================================================================
|
||||
describe('replaceNodeById', () => {
|
||||
const newNode = () => ({ type: 'paragraph', attrs: { id: 'new' }, content: [text('NEW')] });
|
||||
|
||||
it('reports replaced:0 when nothing matches', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = replaceNodeById(d, 'missing', newNode());
|
||||
expect(res.replaced).toBe(0);
|
||||
expect(res.doc).toEqual(d);
|
||||
});
|
||||
|
||||
it('replaces a single match', () => {
|
||||
const d = doc(para('p0', 'old'), para('p1'));
|
||||
const res = replaceNodeById(d, 'p0', newNode());
|
||||
expect(res.replaced).toBe(1);
|
||||
expect(res.doc.content[0]).toEqual(newNode());
|
||||
expect(res.doc.content[1].attrs.id).toBe('p1');
|
||||
});
|
||||
|
||||
it('replaces N matches', () => {
|
||||
const d = doc(para('dup', 'a'), para('keep'), para('dup', 'b'));
|
||||
const res = replaceNodeById(d, 'dup', newNode());
|
||||
expect(res.replaced).toBe(2);
|
||||
expect(res.doc.content[0]).toEqual(newNode());
|
||||
expect(res.doc.content[1].attrs.id).toBe('keep');
|
||||
expect(res.doc.content[2]).toEqual(newNode());
|
||||
});
|
||||
|
||||
it('replaces a nested match inside a table cell', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', 'inner', 'x')])],
|
||||
};
|
||||
const d = doc(table);
|
||||
const res = replaceNodeById(d, 'inner', newNode());
|
||||
expect(res.replaced).toBe(1);
|
||||
expect(res.doc.content[0].content[0].content[0].content[0]).toEqual(newNode());
|
||||
});
|
||||
|
||||
it('does NOT recurse into the substituted node', () => {
|
||||
// The replacement itself carries the same id; it must not be re-replaced.
|
||||
const d = doc(para('target'));
|
||||
const replacement = { type: 'paragraph', attrs: { id: 'target' }, content: [text('R')] };
|
||||
const res = replaceNodeById(d, 'target', replacement);
|
||||
expect(res.replaced).toBe(1); // not 2 — no recursion into the new node
|
||||
});
|
||||
|
||||
it('gives each match a SEPARATE clone', () => {
|
||||
const d = doc(para('dup'), para('dup'));
|
||||
const res = replaceNodeById(d, 'dup', newNode());
|
||||
res.doc.content[0].content.push(text('mutated'));
|
||||
// The second replacement must be untouched.
|
||||
expect(res.doc.content[1]).toEqual(newNode());
|
||||
});
|
||||
|
||||
it('does not mutate the input doc', () => {
|
||||
const d = doc(para('p0', 'old'));
|
||||
const snapshot = structuredClone(d);
|
||||
replaceNodeById(d, 'p0', newNode());
|
||||
expect(d).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// deleteNodeById
|
||||
// ===========================================================================
|
||||
describe('deleteNodeById', () => {
|
||||
it('reports deleted:0 when nothing matches', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = deleteNodeById(d, 'missing');
|
||||
expect(res.deleted).toBe(0);
|
||||
expect(res.doc).toEqual(d);
|
||||
});
|
||||
|
||||
it('deletes a single match', () => {
|
||||
const d = doc(para('p0'), para('p1'), para('p2'));
|
||||
const res = deleteNodeById(d, 'p1');
|
||||
expect(res.deleted).toBe(1);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'p2']);
|
||||
});
|
||||
|
||||
it('deletes N matches', () => {
|
||||
const d = doc(para('dup'), para('keep'), para('dup'));
|
||||
const res = deleteNodeById(d, 'dup');
|
||||
expect(res.deleted).toBe(2);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['keep']);
|
||||
});
|
||||
|
||||
it('deletes a nested node and preserves sibling order', () => {
|
||||
// A callout-style container holding three paragraph children; deleting the
|
||||
// middle one must leave the outer siblings in order.
|
||||
const callout = {
|
||||
type: 'callout',
|
||||
attrs: { id: 'cal' },
|
||||
content: [para('a', 'A'), para('b', 'B'), para('c', 'C')],
|
||||
};
|
||||
const d = doc(para('outer0'), callout, para('outer1'));
|
||||
const res = deleteNodeById(d, 'b');
|
||||
expect(res.deleted).toBe(1);
|
||||
// Inner siblings keep their order.
|
||||
const innerIds = res.doc.content[1].content.map((cl: any) => cl.attrs.id);
|
||||
expect(innerIds).toEqual(['a', 'c']);
|
||||
// Outer siblings are untouched and in order.
|
||||
const outerIds = res.doc.content.map((cl: any) => cl.attrs.id);
|
||||
expect(outerIds).toEqual(['outer0', 'cal', 'outer1']);
|
||||
});
|
||||
|
||||
it('does not mutate the input doc (deep-equal before/after)', () => {
|
||||
const d = doc(para('p0'), para('p1'));
|
||||
const snapshot = structuredClone(d);
|
||||
deleteNodeById(d, 'p0');
|
||||
expect(d).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// sanitizeForYjs
|
||||
// ===========================================================================
|
||||
describe('sanitizeForYjs', () => {
|
||||
it('strips undefined keys from node.attrs', () => {
|
||||
const d = doc({ type: 'paragraph', attrs: { id: 'p', gone: undefined, kept: 1 } });
|
||||
const res = sanitizeForYjs(d);
|
||||
expect('gone' in res.content[0].attrs).toBe(false);
|
||||
expect(res.content[0].attrs).toEqual({ id: 'p', kept: 1 });
|
||||
});
|
||||
|
||||
it('strips undefined keys from mark.attrs', () => {
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p' },
|
||||
content: [text('hi', [{ type: 'link', attrs: { href: 'u', gone: undefined } }])],
|
||||
});
|
||||
const res = sanitizeForYjs(d);
|
||||
expect('gone' in res.content[0].content[0].marks[0].attrs).toBe(false);
|
||||
expect(res.content[0].content[0].marks[0].attrs).toEqual({ href: 'u' });
|
||||
});
|
||||
|
||||
it('PRESERVES null, false, 0 and "" (only undefined is dropped)', () => {
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
attrs: { a: null, b: false, c: 0, d: '', e: undefined },
|
||||
});
|
||||
const res = sanitizeForYjs(d);
|
||||
expect(res.content[0].attrs).toEqual({ a: null, b: false, c: 0, d: '' });
|
||||
});
|
||||
|
||||
it('recurses into nested content', () => {
|
||||
const d = doc({
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', null, '', { gone: undefined, colwidth: null })])],
|
||||
});
|
||||
const res = sanitizeForYjs(d);
|
||||
const cellAttrs = res.content[0].content[0].content[0].attrs;
|
||||
expect('gone' in cellAttrs).toBe(false);
|
||||
expect(cellAttrs.colwidth).toBeNull();
|
||||
});
|
||||
|
||||
it('does not mutate the input doc', () => {
|
||||
const d = doc({ type: 'paragraph', attrs: { id: 'p', gone: undefined } });
|
||||
// structuredClone preserves an explicit `undefined` value key, so snapshot it.
|
||||
const snapshot = structuredClone(d);
|
||||
sanitizeForYjs(d);
|
||||
expect(d).toEqual(snapshot);
|
||||
expect('gone' in d.content[0].attrs).toBe(true); // still present on the input
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// findUnstorableAttr
|
||||
// ===========================================================================
|
||||
describe('findUnstorableAttr', () => {
|
||||
it('returns null for a fully storable doc', () => {
|
||||
const d = doc(para('p0', 'clean'));
|
||||
expect(findUnstorableAttr(d)).toBeNull();
|
||||
});
|
||||
|
||||
it('detects an undefined node attr with its path and kind', () => {
|
||||
const d = doc(para('a'), para('b'), { type: 'paragraph', attrs: { id: 'c', x: undefined } });
|
||||
expect(findUnstorableAttr(d)).toBe('content[2].attrs.x (undefined)');
|
||||
});
|
||||
|
||||
it('detects a function attr', () => {
|
||||
const d = doc({ type: 'paragraph', attrs: { fn: () => 1 } });
|
||||
expect(findUnstorableAttr(d)).toBe('content[0].attrs.fn (function)');
|
||||
});
|
||||
|
||||
it('detects a symbol attr', () => {
|
||||
const d = doc({ type: 'paragraph', attrs: { s: Symbol('x') } });
|
||||
expect(findUnstorableAttr(d)).toBe('content[0].attrs.s (symbol)');
|
||||
});
|
||||
|
||||
it('detects a bigint attr', () => {
|
||||
const d = doc({ type: 'paragraph', attrs: { big: 10n } });
|
||||
expect(findUnstorableAttr(d)).toBe('content[0].attrs.big (bigint)');
|
||||
});
|
||||
|
||||
it('detects an unstorable mark attr with the marks[i] path', () => {
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p' },
|
||||
content: [text('hi'), text('yo', [{ type: 'link', attrs: { x: undefined } }])],
|
||||
});
|
||||
expect(findUnstorableAttr(d)).toBe('content[0].content[1].marks[0].attrs.x (undefined)');
|
||||
});
|
||||
|
||||
it('returns the FIRST hit only', () => {
|
||||
const d = doc(
|
||||
{ type: 'paragraph', attrs: { first: undefined } },
|
||||
{ type: 'paragraph', attrs: { second: undefined } },
|
||||
);
|
||||
expect(findUnstorableAttr(d)).toBe('content[0].attrs.first (undefined)');
|
||||
});
|
||||
|
||||
it('returns null for a non-object doc', () => {
|
||||
expect(findUnstorableAttr(null)).toBeNull();
|
||||
expect(findUnstorableAttr('x')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// insertNodeRelative
|
||||
// ===========================================================================
|
||||
describe('insertNodeRelative', () => {
|
||||
const block = (id: string, value = '') => para(id, value);
|
||||
|
||||
it('appends a node to top-level content', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = insertNodeRelative(d, block('new', 'N'), { position: 'append' });
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new']);
|
||||
});
|
||||
|
||||
it('creates a content array when appending to a doc without one', () => {
|
||||
const res = insertNodeRelative({ type: 'doc' }, block('new'), { position: 'append' });
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['new']);
|
||||
});
|
||||
|
||||
it('inserts before a node by id (top level)', () => {
|
||||
const d = doc(para('p0'), para('p1'));
|
||||
const res = insertNodeRelative(d, block('new'), { position: 'before', anchorNodeId: 'p1' });
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new', 'p1']);
|
||||
});
|
||||
|
||||
it('inserts after a node by id (top level)', () => {
|
||||
const d = doc(para('p0'), para('p1'));
|
||||
const res = insertNodeRelative(d, block('new'), { position: 'after', anchorNodeId: 'p0' });
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new', 'p1']);
|
||||
});
|
||||
|
||||
it('inserts before a NESTED anchor by id, into its own parent content', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', 'inner', 'x')])],
|
||||
};
|
||||
const d = doc(table);
|
||||
const res = insertNodeRelative(d, block('new'), { position: 'before', anchorNodeId: 'inner' });
|
||||
expect(res.inserted).toBe(true);
|
||||
// The new (non-structural) node is spliced into the cell's content before the paragraph.
|
||||
const cellContent = res.doc.content[0].content[0].content[0].content;
|
||||
expect(cellContent.map((c: any) => c.attrs.id)).toEqual(['new', 'inner']);
|
||||
});
|
||||
|
||||
it('inserts by anchorText against top-level blocks (substring match)', () => {
|
||||
const d = doc(para('p0', 'hello world'), para('p1', 'other'));
|
||||
const res = insertNodeRelative(d, block('new'), { position: 'after', anchorText: 'world' });
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new', 'p1']);
|
||||
});
|
||||
|
||||
it('returns inserted:false when the anchor cannot be resolved', () => {
|
||||
const d = doc(para('p0'));
|
||||
const byId = insertNodeRelative(d, block('new'), { position: 'after', anchorNodeId: 'nope' });
|
||||
expect(byId.inserted).toBe(false);
|
||||
expect(byId.doc).toEqual(d);
|
||||
|
||||
const byText = insertNodeRelative(d, block('new'), { position: 'before', anchorText: 'zzz' });
|
||||
expect(byText.inserted).toBe(false);
|
||||
expect(byText.doc).toEqual(d);
|
||||
});
|
||||
|
||||
it('routes a structural tableRow to the nearest table container', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableCell', 'r0c0', 'A')]),
|
||||
row([cell('tableCell', 'r1c0', 'B')]),
|
||||
],
|
||||
};
|
||||
const d = doc(table);
|
||||
const newRow = row([cell('tableCell', 'rNew', 'NEW')]);
|
||||
// Anchor on a cell paragraph inside row 0; "after" should put the row after row 0.
|
||||
const res = insertNodeRelative(d, newRow, { position: 'after', anchorNodeId: 'r0c0' });
|
||||
expect(res.inserted).toBe(true);
|
||||
const rowFirstCellId = (r: any) => r.content[0].content[0].attrs.id;
|
||||
expect(res.doc.content[0].content.map(rowFirstCellId)).toEqual(['r0c0', 'rNew', 'r1c0']);
|
||||
});
|
||||
|
||||
it('throws when appending a structural node at the top level', () => {
|
||||
const d = doc(para('p0'));
|
||||
const newRow = row([cell('tableCell', 'x', 'X')]);
|
||||
expect(() => insertNodeRelative(d, newRow, { position: 'append' })).toThrow(
|
||||
/cannot append a tableRow at the top level/,
|
||||
);
|
||||
});
|
||||
|
||||
it('throws when a structural anchor is not inside the required container', () => {
|
||||
// Anchor resolves to a top-level paragraph that is not inside any table.
|
||||
const d = doc(para('p0', 'loose'));
|
||||
const newRow = row([cell('tableCell', 'x', 'X')]);
|
||||
expect(() =>
|
||||
insertNodeRelative(d, newRow, { position: 'after', anchorNodeId: 'p0' }),
|
||||
).toThrow(/the anchor is not inside a table/);
|
||||
});
|
||||
|
||||
it('honours offset: before vs after place the node on the correct side', () => {
|
||||
const d = doc(para('a'), para('b'), para('c'));
|
||||
const before = insertNodeRelative(d, block('N'), { position: 'before', anchorNodeId: 'b' });
|
||||
expect(before.doc.content.map((c: any) => c.attrs.id)).toEqual(['a', 'N', 'b', 'c']);
|
||||
const after = insertNodeRelative(d, block('N'), { position: 'after', anchorNodeId: 'b' });
|
||||
expect(after.doc.content.map((c: any) => c.attrs.id)).toEqual(['a', 'b', 'N', 'c']);
|
||||
});
|
||||
|
||||
it('does not mutate the input doc or the node argument', () => {
|
||||
const d = doc(para('p0'));
|
||||
const dSnapshot = structuredClone(d);
|
||||
const node = block('new', 'N');
|
||||
const nodeSnapshot = structuredClone(node);
|
||||
insertNodeRelative(d, node, { position: 'append' });
|
||||
expect(d).toEqual(dSnapshot);
|
||||
expect(node).toEqual(nodeSnapshot);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// readTable
|
||||
// ===========================================================================
|
||||
describe('readTable', () => {
|
||||
const makeTable = () => ({
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H0'), cell('tableHeader', 'h1', 'H1')]),
|
||||
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]),
|
||||
],
|
||||
});
|
||||
|
||||
it('reads a table by #n', () => {
|
||||
const d = doc(para('p0'), makeTable());
|
||||
const res = readTable(d, '#1');
|
||||
expect(res).not.toBeNull();
|
||||
expect(res!.rows).toBe(2);
|
||||
expect(res!.cols).toBe(2);
|
||||
expect(res!.cells).toEqual([['H0', 'H1'], ['A', 'B']]);
|
||||
expect(res!.cellIds).toEqual([['h0', 'h1'], ['c0', 'c1']]);
|
||||
expect(res!.path).toEqual([1]);
|
||||
});
|
||||
|
||||
it('climbs from an inner paragraph id up to the table', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = readTable(d, 'c1'); // id of a paragraph inside a data cell
|
||||
expect(res).not.toBeNull();
|
||||
expect(res!.path).toEqual([0]);
|
||||
expect(res!.cells).toEqual([['H0', 'H1'], ['A', 'B']]);
|
||||
});
|
||||
|
||||
it('reports per-row widths via cells for a ragged table', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H0')]),
|
||||
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]),
|
||||
],
|
||||
};
|
||||
const res = readTable(doc(table), '#0');
|
||||
expect(res!.cols).toBe(1); // cols comes from row 0
|
||||
expect(res!.cells).toEqual([['H0'], ['A', 'B']]); // actual per-row widths preserved
|
||||
expect(res!.cellIds).toEqual([['h0'], ['c0', 'c1']]);
|
||||
});
|
||||
|
||||
it('reports null cellId for an empty cell with no paragraph', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', null), cell('tableCell', 'c1', 'B')])],
|
||||
};
|
||||
const res = readTable(doc(table), '#0');
|
||||
expect(res!.cells).toEqual([['', 'B']]);
|
||||
expect(res!.cellIds).toEqual([[null, 'c1']]);
|
||||
});
|
||||
|
||||
it('returns null when the ref matches no table', () => {
|
||||
const d = doc(para('p0'));
|
||||
expect(readTable(d, '#0')).toBeNull(); // #0 is a paragraph, not a table
|
||||
expect(readTable(d, 'missing')).toBeNull();
|
||||
expect(readTable(d, 'p0')).toBeNull(); // id found but no enclosing table
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// insertTableRow
|
||||
// ===========================================================================
|
||||
describe('insertTableRow', () => {
|
||||
const makeTable = () => ({
|
||||
type: 'table',
|
||||
content: [
|
||||
row([
|
||||
cell('tableHeader', 'h0', 'H0', { colwidth: [120] }),
|
||||
cell('tableHeader', 'h1', 'H1', { colwidth: [240] }),
|
||||
]),
|
||||
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]),
|
||||
],
|
||||
});
|
||||
|
||||
/** First-paragraph ids of every cell in a row, for ordering assertions. */
|
||||
const rowCellParaIds = (r: any): (string | undefined)[] =>
|
||||
r.content.map((c: any) => c.content[0]?.attrs?.id);
|
||||
/** Cell text of a row. */
|
||||
const rowTexts = (r: any): string[] =>
|
||||
r.content.map((c: any) => blockPlainText(c));
|
||||
|
||||
it('appends a row when index is omitted', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y']);
|
||||
expect(res.inserted).toBe(true);
|
||||
const rows = res.doc.content[0].content;
|
||||
expect(rows.length).toBe(3);
|
||||
expect(rowTexts(rows[2])).toEqual(['X', 'Y']);
|
||||
});
|
||||
|
||||
it('splices at a middle index', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y'], 1);
|
||||
const rows = res.doc.content[0].content;
|
||||
expect(rows.length).toBe(3);
|
||||
expect(rowTexts(rows[1])).toEqual(['X', 'Y']); // new row at index 1
|
||||
expect(rowTexts(rows[2])).toEqual(['A', 'B']); // old data row pushed down
|
||||
});
|
||||
|
||||
it('splices at the end index', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y'], 2); // rows == 2, valid end index
|
||||
const rows = res.doc.content[0].content;
|
||||
expect(rows.length).toBe(3);
|
||||
expect(rowTexts(rows[2])).toEqual(['X', 'Y']);
|
||||
});
|
||||
|
||||
it('APPENDS (does not throw) for an out-of-range index', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y'], 99);
|
||||
const rows = res.doc.content[0].content;
|
||||
expect(res.inserted).toBe(true);
|
||||
expect(rows.length).toBe(3);
|
||||
expect(rowTexts(rows[2])).toEqual(['X', 'Y']); // appended at the end
|
||||
});
|
||||
|
||||
it('throws when given more cells than columns', () => {
|
||||
const d = doc(makeTable());
|
||||
expect(() => insertTableRow(d, '#0', ['X', 'Y', 'Z'])).toThrow(
|
||||
/got 3 cell\(s\) but the table has 2 column\(s\)/,
|
||||
);
|
||||
});
|
||||
|
||||
it('pads a short row to the column count', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['only']);
|
||||
const rows = res.doc.content[0].content;
|
||||
expect(rowTexts(rows[2])).toEqual(['only', '']); // padded with empty cell
|
||||
});
|
||||
|
||||
it('copies colwidth from the header row for each column', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y']);
|
||||
const newRow = res.doc.content[0].content[2];
|
||||
expect(newRow.content[0].attrs.colwidth).toEqual([120]);
|
||||
expect(newRow.content[1].attrs.colwidth).toEqual([240]);
|
||||
expect(newRow.content[0].attrs).toMatchObject({ colspan: 1, rowspan: 1 });
|
||||
});
|
||||
|
||||
it('index 0 inherits the header cell TYPE', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y'], 0);
|
||||
const newRow = res.doc.content[0].content[0];
|
||||
expect(newRow.content.every((c: any) => c.type === 'tableHeader')).toBe(true);
|
||||
// A non-zero index produces plain data cells instead.
|
||||
const res2 = insertTableRow(d, '#0', ['X', 'Y'], 1);
|
||||
const dataRow = res2.doc.content[0].content[1];
|
||||
expect(dataRow.content.every((c: any) => c.type === 'tableCell')).toBe(true);
|
||||
});
|
||||
|
||||
it('mints unique, well-formed paragraph ids for new cells', () => {
|
||||
const d = doc(makeTable());
|
||||
const existing = new Set(['h0', 'h1', 'c0', 'c1']);
|
||||
const res = insertTableRow(d, '#0', ['X', 'Y']);
|
||||
const newRow = res.doc.content[0].content[2];
|
||||
const ids = rowCellParaIds(newRow) as string[];
|
||||
for (const id of ids) {
|
||||
expect(typeof id).toBe('string');
|
||||
expect(id).toMatch(/^[a-z0-9]{12}$/); // Docmost-style 12-char id
|
||||
expect(existing.has(id)).toBe(false); // unique vs pre-existing ids
|
||||
}
|
||||
expect(new Set(ids).size).toBe(ids.length); // unique within the row
|
||||
});
|
||||
|
||||
it('returns inserted:false when the table cannot be located', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = insertTableRow(d, 'missing', ['X']);
|
||||
expect(res.inserted).toBe(false);
|
||||
expect(res.doc).toEqual(d);
|
||||
});
|
||||
|
||||
it('does not mutate the input doc', () => {
|
||||
const d = doc(makeTable());
|
||||
const snapshot = structuredClone(d);
|
||||
insertTableRow(d, '#0', ['X', 'Y'], 1);
|
||||
expect(d).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// deleteTableRow
|
||||
// ===========================================================================
|
||||
describe('deleteTableRow', () => {
|
||||
const makeTable = () => ({
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H')]),
|
||||
row([cell('tableCell', 'c0', 'A')]),
|
||||
row([cell('tableCell', 'c1', 'B')]),
|
||||
],
|
||||
});
|
||||
const firstId = (r: any) => r.content[0].content[0].attrs.id;
|
||||
|
||||
it('deletes a middle row and preserves siblings', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = deleteTableRow(d, '#0', 1);
|
||||
expect(res.deleted).toBe(true);
|
||||
expect(res.doc.content[0].content.map(firstId)).toEqual(['h0', 'c1']);
|
||||
});
|
||||
|
||||
it('deletes the first row', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = deleteTableRow(d, '#0', 0);
|
||||
expect(res.doc.content[0].content.map(firstId)).toEqual(['c0', 'c1']);
|
||||
});
|
||||
|
||||
it('deletes the last row', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = deleteTableRow(d, '#0', 2);
|
||||
expect(res.doc.content[0].content.map(firstId)).toEqual(['h0', 'c0']);
|
||||
});
|
||||
|
||||
it('throws on an out-of-range index', () => {
|
||||
const d = doc(makeTable());
|
||||
expect(() => deleteTableRow(d, '#0', 99)).toThrow(/out of range/);
|
||||
expect(() => deleteTableRow(d, '#0', -1)).toThrow(/out of range/);
|
||||
});
|
||||
|
||||
it('throws when asked to delete the only row', () => {
|
||||
const single = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', 'c0', 'A')])],
|
||||
};
|
||||
expect(() => deleteTableRow(doc(single), '#0', 0)).toThrow(
|
||||
/refusing to delete the only row/,
|
||||
);
|
||||
});
|
||||
|
||||
it('returns deleted:false when the table cannot be located', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = deleteTableRow(d, 'missing', 0);
|
||||
expect(res.deleted).toBe(false);
|
||||
expect(res.doc).toEqual(d);
|
||||
});
|
||||
|
||||
it('does not mutate the input doc', () => {
|
||||
const d = doc(makeTable());
|
||||
const snapshot = structuredClone(d);
|
||||
deleteTableRow(d, '#0', 1);
|
||||
expect(d).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// updateTableCell
|
||||
// ===========================================================================
|
||||
describe('updateTableCell', () => {
|
||||
const makeTable = () => ({
|
||||
type: 'table',
|
||||
content: [
|
||||
row([cell('tableHeader', 'h0', 'H0'), cell('tableHeader', 'h1', 'H1')]),
|
||||
row([
|
||||
cell('tableCell', 'c0', 'A', { colspan: 2, rowspan: 3, colwidth: [200] }),
|
||||
cell('tableCell', 'c1', 'B'),
|
||||
]),
|
||||
],
|
||||
});
|
||||
|
||||
it('sets the cell text', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = updateTableCell(d, '#0', 1, 1, 'NEW');
|
||||
expect(res.updated).toBe(true);
|
||||
expect(blockPlainText(res.doc.content[0].content[1].content[1])).toBe('NEW');
|
||||
});
|
||||
|
||||
it('REUSES the existing first-paragraph id', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = updateTableCell(d, '#0', 1, 0, 'changed');
|
||||
const para0 = res.doc.content[0].content[1].content[0].content[0];
|
||||
expect(para0.attrs.id).toBe('c0'); // critical: id reused, not regenerated
|
||||
expect(para0.content[0].text).toBe('changed');
|
||||
});
|
||||
|
||||
it('mints a fresh id when the cell had no paragraph', () => {
|
||||
const table = {
|
||||
type: 'table',
|
||||
content: [row([cell('tableCell', null), cell('tableCell', 'c1', 'B')])],
|
||||
};
|
||||
const d = doc(table);
|
||||
const res = updateTableCell(d, '#0', 0, 0, 'now has text');
|
||||
const newPara = res.doc.content[0].content[0].content[0].content[0];
|
||||
expect(typeof newPara.attrs.id).toBe('string');
|
||||
expect(newPara.attrs.id).toMatch(/^[a-z0-9]{12}$/);
|
||||
expect(newPara.attrs.id).not.toBe('c1'); // unique vs existing ids
|
||||
expect(newPara.content[0].text).toBe('now has text');
|
||||
});
|
||||
|
||||
it('PRESERVES the cell colspan/rowspan/colwidth (only content replaced)', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = updateTableCell(d, '#0', 1, 0, 'x');
|
||||
const cellNode = res.doc.content[0].content[1].content[0];
|
||||
expect(cellNode.attrs).toEqual({ colspan: 2, rowspan: 3, colwidth: [200] });
|
||||
});
|
||||
|
||||
it('throws when row or col is out of range', () => {
|
||||
const d = doc(makeTable());
|
||||
expect(() => updateTableCell(d, '#0', 5, 0, 'x')).toThrow(/out of range/);
|
||||
expect(() => updateTableCell(d, '#0', 0, 5, 'x')).toThrow(/out of range/);
|
||||
expect(() => updateTableCell(d, '#0', -1, 0, 'x')).toThrow(/out of range/);
|
||||
});
|
||||
|
||||
it('an empty string yields an empty paragraph content array', () => {
|
||||
const d = doc(makeTable());
|
||||
const res = updateTableCell(d, '#0', 1, 1, '');
|
||||
const cellPara = res.doc.content[0].content[1].content[1].content[0];
|
||||
expect(cellPara.type).toBe('paragraph');
|
||||
expect(cellPara.content).toEqual([]); // empty string -> empty content
|
||||
expect(cellPara.attrs.id).toBe('c1'); // id still reused
|
||||
});
|
||||
|
||||
it('returns updated:false when the table cannot be located', () => {
|
||||
const d = doc(para('p0'));
|
||||
const res = updateTableCell(d, 'missing', 0, 0, 'x');
|
||||
expect(res.updated).toBe(false);
|
||||
expect(res.doc).toEqual(d);
|
||||
});
|
||||
|
||||
it('does not mutate the input doc', () => {
|
||||
const d = doc(makeTable());
|
||||
const snapshot = structuredClone(d);
|
||||
updateTableCell(d, '#0', 1, 1, 'NEW');
|
||||
expect(d).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
238
packages/git-sync/test/reconcile.test.ts
Normal file
238
packages/git-sync/test/reconcile.test.ts
Normal file
@@ -0,0 +1,238 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
planReconciliation,
|
||||
decideAbsenceDeletions,
|
||||
type ExistingEntry,
|
||||
type LiveEntry,
|
||||
} from '../src/engine/reconcile.js';
|
||||
|
||||
describe('planReconciliation', () => {
|
||||
it('ADD: a new live page (not tracked) is written, nothing deleted', () => {
|
||||
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/New.md' }];
|
||||
const existing: ExistingEntry[] = [];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/New.md' }]);
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
it('CONTENT-UPDATE: tracked page at the SAME path is rewritten, not moved/deleted', () => {
|
||||
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Doc.md' }];
|
||||
const existing: ExistingEntry[] = [{ pageId: 'p1', relPath: 'Space/Doc.md' }];
|
||||
const plan = planReconciliation(live, existing);
|
||||
// Still written (re-emitted; identical bytes => git no-op), no move/delete.
|
||||
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Doc.md' }]);
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
it('MOVE: same pageId, new path -> write new + recorded as moved (NOT in toDelete)', () => {
|
||||
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/NewParent/Doc.md' }];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'Space/OldParent/Doc.md' },
|
||||
];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(plan.toWrite).toEqual([
|
||||
{ pageId: 'p1', relPath: 'Space/NewParent/Doc.md' },
|
||||
]);
|
||||
// The old path is a MOVE removal, NOT an absence delete -> not in toDelete.
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
expect(plan.moved).toEqual([
|
||||
{
|
||||
pageId: 'p1',
|
||||
fromRelPath: 'Space/OldParent/Doc.md',
|
||||
toRelPath: 'Space/NewParent/Doc.md',
|
||||
removeOldPath: true,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('DELETE: a tracked pageId gone from live -> its file is deleted', () => {
|
||||
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'Space/Keep.md' },
|
||||
{ pageId: 'p2', relPath: 'Space/Gone.md' },
|
||||
];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Keep.md' }]);
|
||||
expect(plan.toDelete).toEqual(['Space/Gone.md']);
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
it('NO-OP: live and existing identical -> writes (re-emit) but no deletes/moves', () => {
|
||||
const live: LiveEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'A.md' },
|
||||
{ pageId: 'p2', relPath: 'B.md' },
|
||||
];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'A.md' },
|
||||
{ pageId: 'p2', relPath: 'B.md' },
|
||||
];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(plan.toWrite).toEqual(live);
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
it('does NOT delete an old path that another live page will write (path reuse)', () => {
|
||||
// p1 moves from X.md to Y.md; p2 is a NEW page taking over X.md. The old
|
||||
// X.md must NOT be deleted, because p2 writes it.
|
||||
const live: LiveEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'Y.md' },
|
||||
{ pageId: 'p2', relPath: 'X.md' },
|
||||
];
|
||||
const existing: ExistingEntry[] = [{ pageId: 'p1', relPath: 'X.md' }];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(new Set(plan.toWrite)).toEqual(
|
||||
new Set([
|
||||
{ pageId: 'p1', relPath: 'Y.md' },
|
||||
{ pageId: 'p2', relPath: 'X.md' },
|
||||
]),
|
||||
);
|
||||
// X.md is a live target, so nothing is deleted.
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
// The move is still recorded, but its old path is NOT removable (p2 writes
|
||||
// X.md): removeOldPath:false protects the reused path from data loss.
|
||||
expect(plan.moved).toEqual([
|
||||
{ pageId: 'p1', fromRelPath: 'X.md', toRelPath: 'Y.md', removeOldPath: false },
|
||||
]);
|
||||
});
|
||||
|
||||
it('combines add + update + move + delete in one plan', () => {
|
||||
const live: LiveEntry[] = [
|
||||
{ pageId: 'keep', relPath: 'Keep.md' }, // update in place
|
||||
{ pageId: 'mover', relPath: 'New/Moved.md' }, // moved
|
||||
{ pageId: 'fresh', relPath: 'Fresh.md' }, // added
|
||||
];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'keep', relPath: 'Keep.md' },
|
||||
{ pageId: 'mover', relPath: 'Old/Moved.md' },
|
||||
{ pageId: 'dead', relPath: 'Dead.md' }, // deleted
|
||||
];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(plan.toWrite).toEqual(live);
|
||||
expect(plan.moved).toEqual([
|
||||
{
|
||||
pageId: 'mover',
|
||||
fromRelPath: 'Old/Moved.md',
|
||||
toRelPath: 'New/Moved.md',
|
||||
removeOldPath: true,
|
||||
},
|
||||
]);
|
||||
// toDelete is ABSENCE-only now: the moved old path lives in `moved`, so only
|
||||
// the genuinely-gone page (Dead.md) is here.
|
||||
expect(plan.toDelete).toEqual(['Dead.md']);
|
||||
});
|
||||
|
||||
it('records each duplicate tracked row of a present pageId as a removable move', () => {
|
||||
// Two stray files both claim pageId "dup"; the live page lives elsewhere.
|
||||
// Each stray is a MOVE (same pageId, different path) -> recorded in `moved`
|
||||
// with removeOldPath:true, NOT in absence-based toDelete.
|
||||
const live: LiveEntry[] = [{ pageId: 'dup', relPath: 'Canonical.md' }];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'dup', relPath: 'StrayA.md' },
|
||||
{ pageId: 'dup', relPath: 'StrayB.md' },
|
||||
];
|
||||
const plan = planReconciliation(live, existing);
|
||||
expect(plan.toWrite).toEqual([{ pageId: 'dup', relPath: 'Canonical.md' }]);
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
expect(plan.moved).toEqual([
|
||||
{
|
||||
pageId: 'dup',
|
||||
fromRelPath: 'StrayA.md',
|
||||
toRelPath: 'Canonical.md',
|
||||
removeOldPath: true,
|
||||
},
|
||||
{
|
||||
pageId: 'dup',
|
||||
fromRelPath: 'StrayB.md',
|
||||
toRelPath: 'Canonical.md',
|
||||
removeOldPath: true,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('decideAbsenceDeletions (SPEC §8)', () => {
|
||||
it('APPLIES when the tree is complete and the delete count is modest', () => {
|
||||
const d = decideAbsenceDeletions({
|
||||
treeComplete: true,
|
||||
liveCount: 10,
|
||||
existingCount: 10,
|
||||
deleteCount: 1,
|
||||
});
|
||||
expect(d).toEqual({ apply: true });
|
||||
});
|
||||
|
||||
it('SUPPRESSES all absence deletions when the tree fetch is incomplete', () => {
|
||||
// Even a single absence delete is suppressed on a partial tree (a missing
|
||||
// pageId in a partial tree is NOT proof of deletion).
|
||||
const d = decideAbsenceDeletions({
|
||||
treeComplete: false,
|
||||
liveCount: 9,
|
||||
existingCount: 10,
|
||||
deleteCount: 1,
|
||||
});
|
||||
expect(d).toEqual({ apply: false, reason: 'incomplete-fetch' });
|
||||
});
|
||||
|
||||
it('SUPPRESSES when live returned 0 pages but files are tracked (complete flag aside)', () => {
|
||||
const d = decideAbsenceDeletions({
|
||||
treeComplete: true,
|
||||
liveCount: 0,
|
||||
existingCount: 5,
|
||||
deleteCount: 5,
|
||||
});
|
||||
expect(d).toEqual({ apply: false, reason: 'empty-live' });
|
||||
});
|
||||
|
||||
it('SUPPRESSES over the mass-delete guard (> 50% of a non-trivial vault)', () => {
|
||||
const d = decideAbsenceDeletions({
|
||||
treeComplete: true,
|
||||
liveCount: 4,
|
||||
existingCount: 10,
|
||||
deleteCount: 6, // 60% > 50%
|
||||
});
|
||||
expect(d).toEqual({ apply: false, reason: 'mass-delete' });
|
||||
});
|
||||
|
||||
it('does NOT apply the fraction guard for a tiny vault (below the floor)', () => {
|
||||
// 1-of-2 is normal in a tiny vault; the fraction guard does not fire.
|
||||
const d = decideAbsenceDeletions({
|
||||
treeComplete: true,
|
||||
liveCount: 1,
|
||||
existingCount: 2,
|
||||
deleteCount: 1,
|
||||
});
|
||||
expect(d).toEqual({ apply: true });
|
||||
});
|
||||
|
||||
it('incomplete-fetch takes precedence over the mass-delete reason', () => {
|
||||
const d = decideAbsenceDeletions({
|
||||
treeComplete: false,
|
||||
liveCount: 4,
|
||||
existingCount: 10,
|
||||
deleteCount: 6,
|
||||
});
|
||||
expect(d).toEqual({ apply: false, reason: 'incomplete-fetch' });
|
||||
});
|
||||
|
||||
it('trivially applies when nothing is tracked or nothing would be deleted', () => {
|
||||
expect(
|
||||
decideAbsenceDeletions({
|
||||
treeComplete: false,
|
||||
liveCount: 0,
|
||||
existingCount: 0,
|
||||
deleteCount: 0,
|
||||
}),
|
||||
).toEqual({ apply: true });
|
||||
expect(
|
||||
decideAbsenceDeletions({
|
||||
treeComplete: false,
|
||||
liveCount: 5,
|
||||
existingCount: 5,
|
||||
deleteCount: 0,
|
||||
}),
|
||||
).toEqual({ apply: true });
|
||||
});
|
||||
});
|
||||
104
packages/git-sync/test/roundtrip-corpus.test.ts
Normal file
104
packages/git-sync/test/roundtrip-corpus.test.ts
Normal file
@@ -0,0 +1,104 @@
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { readdirSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
docsCanonicallyEqual,
|
||||
} from 'docmost-client';
|
||||
|
||||
// Resolve fixtures relative to this test file so the test is CWD-independent.
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
const CORPUS_DIR = join(here, 'fixtures', 'corpus');
|
||||
const KNOWN_LIMITATIONS_DIR = join(here, 'fixtures', 'known-limitations');
|
||||
|
||||
/** Run a single document through export -> import -> export. */
|
||||
async function roundTrip(doc: any) {
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
return { md1, md2, doc2 };
|
||||
}
|
||||
|
||||
describe('round-trip corpus (SPEC §11)', () => {
|
||||
// Discover the corpus synchronously at collection time so each fixture gets
|
||||
// its own `it` with the file name in the test title.
|
||||
const files = readdirSync(CORPUS_DIR)
|
||||
.filter((name) => name.endsWith('.json'))
|
||||
.sort();
|
||||
|
||||
it('has a non-empty corpus', () => {
|
||||
expect(files.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
for (const name of files) {
|
||||
it(`${name}: markdown byte-stable AND canonically stable`, async () => {
|
||||
const doc = JSON.parse(await readFile(join(CORPUS_DIR, name), 'utf8'));
|
||||
const { md1, md2, doc2 } = await roundTrip(doc);
|
||||
|
||||
// 1) The byte-stable markdown property git actually needs.
|
||||
expect(md2, `${name}: markdown not byte-stable`).toBe(md1);
|
||||
// 2) Semantic stability (block ids stripped, default-null normalized).
|
||||
expect(
|
||||
docsCanonicallyEqual(doc, doc2),
|
||||
`${name}: document not canonically stable`,
|
||||
).toBe(true);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// KNOWN CONVERTER LIMITATIONS (isolated so they do NOT make CI red).
|
||||
//
|
||||
// SPEC §11 explicitly flags images and diagrams as high round-trip risk. These
|
||||
// fixtures are kept OUT of the green corpus above and asserted with `it.fails`
|
||||
// so the documented divergence is locked in (the test FAILS if the converter
|
||||
// ever starts round-tripping them — at which point promote the fixture into
|
||||
// the corpus). The precise divergences for `image-diagrams.json` are:
|
||||
//
|
||||
// * A BLOCK-LEVEL image preceded by a paragraph is NOT byte-stable on the
|
||||
// FIRST re-export. The HTML re-parser hoists the block <img> out of its
|
||||
// line and leaves an empty paragraph behind, so `paragraph` + ``
|
||||
// re-imports as paragraph + empty-paragraph + image; the empty paragraph
|
||||
// adds one blank line, so export #2 grows by a one-time "\n\n" (md1 !== md2).
|
||||
// This is NOT non-convergence: the growth happens exactly ONCE. The doc
|
||||
// CONVERGES to a fixpoint after one extra `export→import→export` pass — the
|
||||
// empty paragraph is already present after the first import, so export #2
|
||||
// and export #3 are byte-identical (md2 === md3, verified).
|
||||
//
|
||||
// * drawio / excalidraw diagrams gain `data-align="center"` on the second
|
||||
// export: the schema's diagram `align` attribute has a NON-null default of
|
||||
// "center", which materializes on import; the converter only emits
|
||||
// data-align when set, so it appears on export #2 but not #1. Like the
|
||||
// image case, this is one-time and converges after one extra pass.
|
||||
//
|
||||
// * A STANDALONE block image (no preceding paragraph) IS byte-stable from
|
||||
// export #1 (md1 === md2) — but it is still NOT canonically stable: on
|
||||
// import the bare <img> is wrapped, gaining a leading EMPTY paragraph, so
|
||||
// the canonical doc differs by that spurious paragraph node even though the
|
||||
// markdown bytes match.
|
||||
//
|
||||
// Resolution (SPEC §11, "normalize-on-write"): rather than deep-fixing the
|
||||
// converter, the engine runs ONE `export→import→export` pass when writing into
|
||||
// the vault; from that fixpoint onward the form is byte-stable, so git sees no
|
||||
// phantom diff. The green corpus above avoids these one-time asymmetries by
|
||||
// pre-authoring the materialized defaults (e.g. `align: "center"` on the
|
||||
// diagrams in 06-diagrams.json) so a single pass is already at the fixpoint.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('round-trip KNOWN LIMITATIONS (SPEC §11 image/diagram risk)', () => {
|
||||
it.fails(
|
||||
'image-diagrams.json is NOT byte-stable on export #1 (block image hoist + diagram align default; converges after one extra pass — SPEC §11 normalize-on-write)',
|
||||
async () => {
|
||||
const doc = JSON.parse(
|
||||
await readFile(join(KNOWN_LIMITATIONS_DIR, 'image-diagrams.json'), 'utf8'),
|
||||
);
|
||||
const { md1, md2 } = await roundTrip(doc);
|
||||
// This assertion FAILS today (documented divergence). `it.fails` turns a
|
||||
// failing body into a PASS; if the converter is fixed this flips and the
|
||||
// test goes red, prompting promotion into the green corpus.
|
||||
expect(md2).toBe(md1);
|
||||
},
|
||||
);
|
||||
});
|
||||
29
packages/git-sync/test/roundtrip.test.ts
Normal file
29
packages/git-sync/test/roundtrip.test.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
} from 'docmost-client';
|
||||
|
||||
// Resolve the fixture relative to this test file so the test is CWD-independent.
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
const FIXTURE = join(here, 'fixtures', 'sample-doc.json');
|
||||
|
||||
describe('round-trip idempotency (SPEC §11)', () => {
|
||||
it('markdown is byte-stable across export -> import -> export', async () => {
|
||||
const doc = JSON.parse(await readFile(FIXTURE, 'utf8'));
|
||||
|
||||
// export -> import -> export
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
|
||||
// The property git actually needs: a second export reproduces the first
|
||||
// byte-for-byte. We intentionally do NOT deep-equal doc vs doc2 — the
|
||||
// converter reconstructs schema default attrs (e.g. indent:null), a known
|
||||
// SPEC §11 divergence that does not affect markdown stability.
|
||||
expect(md2).toBe(md1);
|
||||
});
|
||||
});
|
||||
96
packages/git-sync/test/sanitize.test.ts
Normal file
96
packages/git-sync/test/sanitize.test.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { sanitizeTitle, disambiguate } from '../src/engine/sanitize.js';
|
||||
|
||||
describe('sanitizeTitle', () => {
|
||||
it('passes a plain title through unchanged', () => {
|
||||
expect(sanitizeTitle('Getting Started')).toBe('Getting Started');
|
||||
});
|
||||
|
||||
it('replaces every forbidden printable character with a dash', () => {
|
||||
// Forbidden set: / \ < > : " | ? *
|
||||
expect(sanitizeTitle('a/b\\c<d>e:f"g|h?i*j')).toBe('a-b-c-d-e-f-g-h-i-j');
|
||||
});
|
||||
|
||||
it('replaces ASCII control characters with a dash', () => {
|
||||
// Build the input with explicit control code points (tab=9, newline=10) to
|
||||
// avoid editor escaping pitfalls. Control chars become "-" BEFORE
|
||||
// whitespace is collapsed, so they survive as dashes (not a folded space).
|
||||
const TAB = String.fromCharCode(9);
|
||||
const NL = String.fromCharCode(10);
|
||||
expect(sanitizeTitle('a b' + TAB + 'c' + NL + 'd')).toBe('a b-c-d');
|
||||
});
|
||||
|
||||
it('collapses runs of plain whitespace to a single space and trims', () => {
|
||||
expect(sanitizeTitle(' hello world ')).toBe('hello world');
|
||||
});
|
||||
|
||||
it('caps the length at 120 characters', () => {
|
||||
const long = 'x'.repeat(200);
|
||||
const out = sanitizeTitle(long);
|
||||
expect(out.length).toBe(120);
|
||||
expect(out).toBe('x'.repeat(120));
|
||||
});
|
||||
|
||||
it('prefixes reserved Windows names with an underscore', () => {
|
||||
expect(sanitizeTitle('CON')).toBe('_CON');
|
||||
expect(sanitizeTitle('nul')).toBe('_nul');
|
||||
// The base name (before the first dot) is what matters.
|
||||
expect(sanitizeTitle('con.md')).toBe('_con.md');
|
||||
});
|
||||
|
||||
it('does not flag names that merely contain a reserved word', () => {
|
||||
expect(sanitizeTitle('console')).toBe('console');
|
||||
expect(sanitizeTitle('Control')).toBe('Control');
|
||||
});
|
||||
|
||||
it('returns "_" for empty or whitespace-only input', () => {
|
||||
expect(sanitizeTitle('')).toBe('_');
|
||||
expect(sanitizeTitle(' ')).toBe('_');
|
||||
});
|
||||
|
||||
it('handles a title that is only forbidden characters', () => {
|
||||
// Each forbidden char becomes "-", so the result is non-empty and safe.
|
||||
expect(sanitizeTitle('///')).toBe('---');
|
||||
});
|
||||
|
||||
it('neutralizes all-dot names so they cannot escape the vault', () => {
|
||||
// ".", "..", "..." (and whitespace-padded variants) are path-traversal
|
||||
// hazards as directory segments. The result must never be a pure-dot
|
||||
// segment and must contain no path separators.
|
||||
for (const input of ['.', '..', '...', ' .. ']) {
|
||||
const out = sanitizeTitle(input);
|
||||
expect(['.', '..', '...']).not.toContain(out);
|
||||
expect(/^\.+$/.test(out)).toBe(false);
|
||||
expect(out).not.toContain('/');
|
||||
expect(out).not.toContain('\\');
|
||||
}
|
||||
// The concrete prefixing behaviour (existing "_" safeguard).
|
||||
expect(sanitizeTitle('.')).toBe('_.');
|
||||
expect(sanitizeTitle('..')).toBe('_..');
|
||||
expect(sanitizeTitle('...')).toBe('_...');
|
||||
expect(sanitizeTitle(' .. ')).toBe('_..');
|
||||
});
|
||||
|
||||
it('is deterministic — the same input yields the same output', () => {
|
||||
const title = 'Some / weird : title?';
|
||||
expect(sanitizeTitle(title)).toBe(sanitizeTitle(title));
|
||||
});
|
||||
});
|
||||
|
||||
describe('disambiguate', () => {
|
||||
it('appends a stable ~slugId suffix', () => {
|
||||
expect(disambiguate('Notes', 'abc123')).toBe('Notes ~abc123');
|
||||
});
|
||||
|
||||
it('is deterministic for the same name and slugId', () => {
|
||||
expect(disambiguate('Notes', 'abc123')).toBe(
|
||||
disambiguate('Notes', 'abc123'),
|
||||
);
|
||||
});
|
||||
|
||||
it('produces distinct names for colliding siblings', () => {
|
||||
const a = disambiguate('Notes', 'slug-a');
|
||||
const b = disambiguate('Notes', 'slug-b');
|
||||
expect(a).not.toBe(b);
|
||||
});
|
||||
});
|
||||
90
packages/git-sync/test/stabilize.test.ts
Normal file
90
packages/git-sync/test/stabilize.test.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { stabilizePageFile, type PageMeta } from '../src/engine/stabilize.js';
|
||||
// markdownToProseMirror lives in collaboration.ts; importing it mutates the
|
||||
// global DOM via jsdom at module load time (required for @tiptap/html under Node).
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
import { parseDocmostMarkdown } from '../src/lib/markdown-document.js';
|
||||
|
||||
// stabilize.ts (SPEC §11 normalize-on-write) was 0% covered (only the gated e2e
|
||||
// touched it). stabilizePageFile is import-testable: build a small ProseMirror
|
||||
// content + meta and assert (1) the normalize-on-write pass reaches a fixpoint
|
||||
// (a SECOND pass over the written body is byte-identical), and (2) the meta is
|
||||
// serialized verbatim, including a null parentPageId.
|
||||
|
||||
const meta: PageMeta = {
|
||||
version: 1,
|
||||
pageId: 'pg-1',
|
||||
slugId: 'sl-1',
|
||||
title: 'My Title',
|
||||
spaceId: 'sp-1',
|
||||
parentPageId: null,
|
||||
};
|
||||
|
||||
describe('stabilizePageFile — normalize-on-write fixpoint (SPEC §11)', () => {
|
||||
it('reaches a byte-identical fixpoint after one extra export/import/export pass', async () => {
|
||||
// A diagram is the canonical one-pass asymmetry: drawio's `align` default of
|
||||
// "center" materializes on import, so a NAIVE export differs on the second
|
||||
// export. stabilizePageFile runs the convergence pass at write time, so the
|
||||
// written body must already be at the fixpoint: re-importing its body and
|
||||
// re-stabilizing yields the exact same bytes.
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: 'drawio', attrs: { src: '/d.drawio' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'outro' }] },
|
||||
],
|
||||
};
|
||||
|
||||
const file1 = await stabilizePageFile(content, meta);
|
||||
// Re-import the written body and stabilize again — the second pass must be
|
||||
// byte-identical to the first (the fixpoint property git relies on).
|
||||
const body1 = parseDocmostMarkdown(file1).body;
|
||||
const doc2 = await markdownToProseMirror(body1);
|
||||
const file2 = await stabilizePageFile(doc2, meta);
|
||||
expect(file2).toBe(file1);
|
||||
|
||||
// The materialized diagram default is present in the stabilized body (proof
|
||||
// that the convergence pass actually ran, not just that two naive exports
|
||||
// happened to match).
|
||||
expect(body1).toContain('data-align="center"');
|
||||
});
|
||||
|
||||
it('already-stable content is unchanged by the pass (idempotent)', async () => {
|
||||
// Plain prose is already a fixpoint; stabilizing it once and twice agree.
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'just plain text' }] }],
|
||||
};
|
||||
const file1 = await stabilizePageFile(content, meta);
|
||||
const body1 = parseDocmostMarkdown(file1).body;
|
||||
const doc2 = await markdownToProseMirror(body1);
|
||||
const file2 = await stabilizePageFile(doc2, meta);
|
||||
expect(file2).toBe(file1);
|
||||
expect(body1).toBe('just plain text');
|
||||
});
|
||||
});
|
||||
|
||||
describe('stabilizePageFile — meta serialization', () => {
|
||||
it('preserves a null parentPageId verbatim in the meta block', async () => {
|
||||
const file = await stabilizePageFile(
|
||||
{ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] }] },
|
||||
meta,
|
||||
);
|
||||
const parsed = parseDocmostMarkdown(file);
|
||||
// The whole meta round-trips, and parentPageId is exactly null (root page).
|
||||
expect(parsed.meta).toEqual(meta);
|
||||
expect(parsed.meta!.parentPageId).toBeNull();
|
||||
// No trailing docmost:comments block — the sync body serializer omits it.
|
||||
expect(file).not.toContain('docmost:comments');
|
||||
});
|
||||
|
||||
it('keeps a non-null parentPageId as-is', async () => {
|
||||
const childMeta: PageMeta = { ...meta, parentPageId: 'parent-99' };
|
||||
const file = await stabilizePageFile(
|
||||
{ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] }] },
|
||||
childMeta,
|
||||
);
|
||||
expect(parseDocmostMarkdown(file).meta).toEqual(childMeta);
|
||||
});
|
||||
});
|
||||
15
packages/git-sync/tsconfig.json
Normal file
15
packages/git-sync/tsconfig.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "Node16",
|
||||
"moduleResolution": "Node16",
|
||||
"outDir": "./build",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true
|
||||
},
|
||||
"include": ["src/**/*"]
|
||||
}
|
||||
23
packages/git-sync/vitest.config.ts
Normal file
23
packages/git-sync/vitest.config.ts
Normal file
@@ -0,0 +1,23 @@
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import path from 'node:path';
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
// Ported docmost-sync tests import the converter through the upstream package
|
||||
// barrel specifier `docmost-client`. We vendored only the PURE half of that
|
||||
// package into `src/lib`, so alias the barrel specifier to our local lib
|
||||
// barrel; everything those tests use (converter, canonicalize, markdown
|
||||
// envelope, markdownToProseMirror) is re-exported there.
|
||||
const here = path.dirname(fileURLToPath(import.meta.url));
|
||||
const libBarrel = path.resolve(here, 'src/lib/index.ts');
|
||||
|
||||
export default defineConfig({
|
||||
resolve: {
|
||||
alias: {
|
||||
'docmost-client': libBarrel,
|
||||
},
|
||||
},
|
||||
test: {
|
||||
environment: 'node',
|
||||
include: ['test/**/*.test.ts'],
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user