feat(git-sync): vendor pure converter + engine into @docmost/git-sync (Phase A.1)

First step of docs/git-sync-plan.md. New workspace package @docmost/git-sync
vendoring the PURE parts from docmost-sync (HEAD b03eb35):
- lib: markdown-converter, markdown-document, canonicalize, docmost-schema,
  node-ops, diff, and an extracted markdown-to-prosemirror (only the pure
  marked->HTML->generateJSON path from upstream collaboration.ts; no websocket).
- engine (pure, no IO): reconcile, layout, sanitize, stabilize, loop-guard.
Ported the upstream pure-module + round-trip corpus tests (vitest): 314 pass,
3 expected upstream known-limitation fails. tsc clean. No server wiring yet.

docmost-schema inlines getStyleProperty (as packages/mcp does — @tiptap/core
3.20.4 doesn't export it). IO engine (pull/push/git/settings) deferred to later
Phase A/B steps; the editor-ext idempotency gate (plan §13.1) is the next step.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-21 13:55:23 +03:00
parent fad1aa0501
commit 87e023b755
61 changed files with 9729 additions and 1817 deletions

View File

@@ -0,0 +1,205 @@
import { describe, expect, it } from 'vitest';
import fc from 'fast-check';
// Barrel import (R-Infra alias resolves this to packages/docmost-client/src so
// coverage measures the real source, not stale dist).
import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
// ---------------------------------------------------------------------------
// Gaps NOT covered by canonicalize.test.ts (test-strategy report §2 diff):
// - the *.align family (drawio/excalidraw/video/youtube/embed): a "center"
// default is dropped, a non-default value is kept;
// - comment.resolved: TRUE is PRESERVED (only resolved:false is normalized);
// - link.target / link.rel NON-default values are kept;
// - property: canonicalizeContent is a fixpoint, docsCanonicallyEqual is
// reflexive and symmetric.
// The base file already covers id-stripping, null-drop, link/comment/orderedList
// default-drop, key-order insensitivity, and a real-diff negative — not re-added.
// ---------------------------------------------------------------------------
describe('canonicalizeContent — *.align default family', () => {
// Every diagram/media node whose schema `align` defaults to "center".
const alignTypes = ['drawio', 'excalidraw', 'video', 'youtube', 'embed'];
for (const type of alignTypes) {
it(`${type}: align "center" (the schema default) is dropped`, () => {
const out = canonicalizeContent({
type,
attrs: { id: 'n-1', src: '/x', align: 'center' },
});
// align==default removed; the meaningful src survives.
expect(out.attrs).toEqual({ src: '/x' });
});
it(`${type}: a NON-default align (e.g. "right") is kept`, () => {
const out = canonicalizeContent({
type,
attrs: { id: 'n-1', src: '/x', align: 'right' },
});
expect(out.attrs).toEqual({ src: '/x', align: 'right' });
});
}
it('image align is NOT in KNOWN_DEFAULTS: a non-null align survives, null is dropped', () => {
// image.align defaults to null, so it is handled by the null-drop rule and
// a real value ("left") must be kept (no spurious default match).
const kept = canonicalizeContent({
type: 'image',
attrs: { id: 'i-1', src: '/a.png', align: 'left' },
});
expect(kept.attrs).toEqual({ src: '/a.png', align: 'left' });
// An image with align:"center" must KEEP it (center is NOT a default for
// image, only for the diagram/media family) — guards against over-matching.
const center = canonicalizeContent({
type: 'image',
attrs: { id: 'i-2', src: '/b.png', align: 'center' },
});
expect(center.attrs).toEqual({ src: '/b.png', align: 'center' });
});
});
describe('canonicalizeContent — comment.resolved:true preserved (SPEC §11 L66)', () => {
it('keeps resolved:true (a legitimate change, not a default to normalize away)', () => {
const out = canonicalizeContent({
type: 'text',
text: 'anchored',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: true } }],
});
// resolved:true is NON-default; it must survive alongside the commentId so a
// resolve-vs-unresolved divergence is not falsely reported as equal.
expect(out.marks).toEqual([
{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: true } },
]);
});
it('a resolved:true comment is NOT canonically equal to an unresolved one', () => {
const resolved = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'c', resolved: true } }],
};
const open = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'c' } }],
};
expect(docsCanonicallyEqual(resolved, open)).toBe(false);
});
});
describe('canonicalizeContent — link non-default target/rel kept', () => {
it('keeps a NON-default link.target (e.g. "_self")', () => {
const out = canonicalizeContent({
type: 'text',
text: 'l',
marks: [{ type: 'link', attrs: { href: 'https://e.com', target: '_self' } }],
});
// _self != the "_blank" default, so target must survive.
expect(out.marks).toEqual([
{ type: 'link', attrs: { href: 'https://e.com', target: '_self' } },
]);
});
it('keeps a NON-default link.rel', () => {
const out = canonicalizeContent({
type: 'text',
text: 'l',
marks: [{ type: 'link', attrs: { href: 'https://e.com', rel: 'nofollow' } }],
});
expect(out.marks).toEqual([
{ type: 'link', attrs: { href: 'https://e.com', rel: 'nofollow' } },
]);
});
});
// ---------------------------------------------------------------------------
// Property-based oracle checks (SPEC §11). The generated trees mix node/mark
// types, ids, null attrs, known-default attrs and meaningful attrs, so the
// invariants are exercised across the whole canonicalization surface.
// ---------------------------------------------------------------------------
// An attribute value: a meaningful value, a null/undefined, a block id, or a
// known schema default — so pruning, id-drop, null-drop and default-drop all
// fire during shrinking.
const attrValueArb = fc.oneof(
fc.string({ minLength: 1, maxLength: 6 }),
fc.integer({ min: 0, max: 9 }),
fc.boolean(),
fc.constant(null),
);
// A recursive ProseMirror-ish node arbitrary (bounded depth) with type, attrs
// (incl. an id and possibly a known default), optional marks and content.
const nodeArb: fc.Arbitrary<any> = fc.letrec((tie) => ({
node: fc.record(
{
type: fc.constantFrom(
'paragraph',
'heading',
'orderedList',
'drawio',
'video',
'text',
),
text: fc.option(fc.string({ minLength: 0, maxLength: 5 }), { nil: undefined }),
attrs: fc.option(
fc.dictionary(
fc.constantFrom('id', 'level', 'start', 'align', 'src', 'indent', 'keep'),
attrValueArb,
{ maxKeys: 4 },
),
{ nil: undefined },
),
marks: fc.option(
fc.array(
fc.record({
type: fc.constantFrom('bold', 'link', 'comment'),
attrs: fc.option(
fc.dictionary(
fc.constantFrom('href', 'target', 'rel', 'commentId', 'resolved'),
fc.oneof(attrValueArb, fc.constant('_blank')),
{ maxKeys: 3 },
),
{ nil: undefined },
),
}),
{ maxLength: 2 },
),
{ nil: undefined },
),
content: fc.option(fc.array(tie('node'), { maxLength: 2 }), { nil: undefined }),
},
{ requiredKeys: ['type'] },
),
})).node;
describe('canonicalizeContent — property invariants (SPEC §11 oracle)', () => {
it('is a fixpoint: f(f(x)) === f(x)', () => {
fc.assert(
fc.property(nodeArb, (node) => {
const once = canonicalizeContent(node);
const twice = canonicalizeContent(once);
// The canonical form must already be stable under a second pass.
expect(twice).toEqual(once);
}),
{ numRuns: 300 },
);
});
it('docsCanonicallyEqual is reflexive: equal(x, x) is always true', () => {
fc.assert(
fc.property(nodeArb, (node) => {
expect(docsCanonicallyEqual(node, node)).toBe(true);
}),
{ numRuns: 300 },
);
});
it('docsCanonicallyEqual is symmetric: equal(a, b) === equal(b, a)', () => {
fc.assert(
fc.property(nodeArb, nodeArb, (a, b) => {
expect(docsCanonicallyEqual(a, b)).toBe(docsCanonicallyEqual(b, a));
}),
{ numRuns: 300 },
);
});
});

View File

@@ -0,0 +1,302 @@
import { describe, expect, it } from 'vitest';
// Import via the package barrel to also assert the symbols are re-exported.
import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
describe('canonicalizeContent', () => {
it('strips node-level attrs.id, recursively', () => {
const input = {
type: 'doc',
content: [
{
type: 'heading',
attrs: { id: 'h-1', level: 2 },
content: [{ type: 'text', text: 'Title' }],
},
],
};
const out = canonicalizeContent(input);
expect(out.content[0].attrs).toEqual({ level: 2 });
// No `id` survives anywhere in the canonical tree.
expect(JSON.stringify(out)).not.toContain('"id"');
});
it('drops null/undefined attrs but keeps every non-null attr', () => {
const out = canonicalizeContent({
type: 'paragraph',
attrs: {
id: 'p-1',
indent: null,
textAlign: undefined,
level: 0,
keep: 'yes',
},
content: [],
});
// null/undefined gone; non-null values (incl. 0 and false) kept.
expect(out.attrs).toEqual({ keep: 'yes', level: 0 });
});
it('removes an attrs object that becomes empty after pruning', () => {
const out = canonicalizeContent({
type: 'paragraph',
attrs: { id: 'p-1', indent: null, textAlign: null },
content: [{ type: 'text', text: 'x' }],
});
// attrs had only an id + null defaults -> the whole attrs key is dropped.
expect('attrs' in out).toBe(false);
expect(out).toEqual({
type: 'paragraph',
content: [{ type: 'text', text: 'x' }],
});
});
it('treats {attrs:{}} as equivalent to no attrs', () => {
const withEmpty = canonicalizeContent({ type: 'paragraph', attrs: {} });
const without = canonicalizeContent({ type: 'paragraph' });
expect(withEmpty).toEqual(without);
});
it('keeps comment marks + commentId but normalizes resolved:false default (SPEC §3 anchor)', () => {
const out = canonicalizeContent({
type: 'text',
text: 'anchored',
marks: [
{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: false } },
],
});
// The comment mark is preserved; commentId (a meaningful anchor) survives,
// but the `resolved: false` schema default is normalized away.
expect(out.marks).toEqual([
{ type: 'comment', attrs: { commentId: 'cmt-1' } },
]);
});
it('drops known non-null schema defaults (link target/rel, comment resolved)', () => {
const out = canonicalizeContent({
type: 'text',
text: 'a link',
marks: [
{
type: 'link',
attrs: {
href: 'https://example.com/page',
target: '_blank',
rel: 'noopener noreferrer nofollow',
},
},
],
});
// href (non-default) kept; target/rel (schema defaults) dropped.
expect(out.marks).toEqual([
{ type: 'link', attrs: { href: 'https://example.com/page' } },
]);
});
it('keeps a NON-default value that happens to share an attr name (orderedList start:5)', () => {
const out = canonicalizeContent({
type: 'orderedList',
attrs: { id: 'ol-1', start: 5 },
content: [],
});
// start:5 is NOT the default (1), so it must survive.
expect(out.attrs).toEqual({ start: 5 });
});
it('keeps meaningful node/mark attrs (level, language, href, src, width)', () => {
const out = canonicalizeContent({
type: 'doc',
content: [
{
type: 'codeBlock',
attrs: { id: 'c-1', language: 'js' },
content: [{ type: 'text', text: 'x' }],
},
{
type: 'image',
attrs: { id: 'i-1', src: '/a.png', width: 100, height: null },
},
{
type: 'paragraph',
content: [
{
type: 'text',
text: 'link',
marks: [{ type: 'link', attrs: { href: 'https://e.com' } }],
},
],
},
],
});
expect(out.content[0].attrs).toEqual({ language: 'js' });
expect(out.content[1].attrs).toEqual({ src: '/a.png', width: 100 });
expect(out.content[2].content[0].marks[0].attrs).toEqual({
href: 'https://e.com',
});
});
it('preserves text, type and content order exactly', () => {
const input = {
type: 'paragraph',
content: [
{ type: 'text', text: 'one' },
{ type: 'text', text: 'two', marks: [{ type: 'bold' }] },
{ type: 'text', text: 'three' },
],
};
const out = canonicalizeContent(input);
expect(out.content.map((n: any) => n.text)).toEqual([
'one',
'two',
'three',
]);
expect(out.content[1].marks).toEqual([{ type: 'bold' }]);
});
it('drops an empty marks array (marks:[] === no marks)', () => {
const out = canonicalizeContent({ type: 'text', text: 'x', marks: [] });
expect('marks' in out).toBe(false);
});
it('does not mutate its input (frozen tree passes through unchanged)', () => {
const input = Object.freeze({
type: 'doc',
content: Object.freeze([
Object.freeze({
type: 'paragraph',
attrs: Object.freeze({ id: 'p-1', indent: null }),
content: Object.freeze([Object.freeze({ type: 'text', text: 'x' })]),
}),
]),
});
const before = JSON.stringify(input);
const out = canonicalizeContent(input);
// Input is structurally identical after the call.
expect(JSON.stringify(input)).toBe(before);
// A fresh tree is returned.
expect(out).not.toBe(input);
expect('attrs' in out.content[0]).toBe(false);
});
});
describe('docsCanonicallyEqual', () => {
it('is true when docs differ only by block ids', () => {
const a = {
type: 'doc',
content: [
{ type: 'heading', attrs: { id: 'h-1', level: 1 }, content: [] },
],
};
const b = {
type: 'doc',
content: [
{ type: 'heading', attrs: { id: 'h-DIFFERENT', level: 1 }, content: [] },
],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is true when one side omits an attr the other sets to default null', () => {
const a = {
type: 'paragraph',
attrs: { id: 'p-1' },
content: [{ type: 'text', text: 'x' }],
};
const b = {
type: 'paragraph',
attrs: { id: 'p-2', indent: null, textAlign: null },
content: [{ type: 'text', text: 'x' }],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is key-order-insensitive for attrs', () => {
const a = { type: 'image', attrs: { src: '/a.png', width: 10 } };
const b = { type: 'image', attrs: { width: 10, src: '/a.png' } };
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is false for a real text difference', () => {
const a = { type: 'text', text: 'hello' };
const b = { type: 'text', text: 'world' };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is false for a real attr difference (different level)', () => {
const a = { type: 'heading', attrs: { id: 'x', level: 1 } };
const b = { type: 'heading', attrs: { id: 'y', level: 2 } };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is false when a meaningful mark attr differs (commentId)', () => {
const a = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
};
const b = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-2' } }],
};
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is true when a link has only href vs one with the schema-default target/rel', () => {
const a = {
type: 'text',
text: 'link',
marks: [{ type: 'link', attrs: { href: 'https://example.com' } }],
};
const b = {
type: 'text',
text: 'link',
marks: [
{
type: 'link',
attrs: {
href: 'https://example.com',
target: '_blank',
rel: 'noopener noreferrer nofollow',
},
},
],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is true when an orderedList omits start vs one with the default start:1', () => {
const a = { type: 'orderedList', content: [] };
const b = { type: 'orderedList', attrs: { start: 1 }, content: [] };
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is false when an orderedList has a non-default start (5 vs absent)', () => {
const a = { type: 'orderedList', content: [] };
const b = { type: 'orderedList', attrs: { start: 5 }, content: [] };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is true when a comment mark omits resolved vs one with the default false', () => {
const a = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
};
const b = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: false } }],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is false when a comment mark is dropped entirely', () => {
const a = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
};
const b = { type: 'text', text: 'x' };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
});

View File

@@ -0,0 +1,377 @@
import { describe, expect, it } from 'vitest';
import { diffDocs } from '../src/lib/diff.js';
// ---------------------------------------------------------------------------
// ProseMirror JSON builders. diffDocs accepts plain JSON docs (it parses them
// through the Docmost schema internally), so we only need minimal node shapes.
// ---------------------------------------------------------------------------
/** A paragraph; omit `text` for an empty paragraph (no content array entries). */
const para = (text?: string) => ({
type: 'paragraph',
content: text ? [{ type: 'text', text }] : [],
});
/** A heading (level 2 by default) carrying a single text run. */
const heading = (text: string, level = 2) => ({
type: 'heading',
attrs: { level },
content: [{ type: 'text', text }],
});
/** A top-level doc node wrapping the given blocks. */
const doc = (...content: any[]) => ({ type: 'doc', content });
/** An image node (atom). */
const image = () => ({ type: 'image', attrs: {} });
/** A callout node wrapping one paragraph. */
const callout = (text = 'note') => ({
type: 'callout',
attrs: { type: 'info' },
content: [para(text)],
});
/** A 1x1 table. */
const table = (cell = 'c') => ({
type: 'table',
content: [
{ type: 'tableRow', content: [{ type: 'tableCell', content: [para(cell)] }] },
],
});
/** A paragraph carrying a text run that bears a link mark with the given href. */
const linkPara = (text: string, href: string | undefined, extraMarks: any[] = []) => ({
type: 'paragraph',
content: [
{
type: 'text',
text,
marks: [{ type: 'link', attrs: href === undefined ? {} : { href } }, ...extraMarks],
},
],
});
/** The diff.ts default for the notes-heading argument. */
const DEFAULT_NOTES_HEADING = 'Примечания переводчика';
describe('diffDocs', () => {
describe('textual changes (precise path)', () => {
it('reports no changes for two identical docs', () => {
const d = doc(para('hello world'));
const result = diffDocs(d, d);
expect(result.changes).toHaveLength(0);
expect(result.summary).toEqual({ inserted: 0, deleted: 0, blocksChanged: 0 });
// The Changes section renders the sentinel line for an empty change list.
expect(result.markdown).toContain('(no textual changes)');
});
it('counts a pure insertion ("abc" -> "abcXY") and captures the inserted substring', () => {
const result = diffDocs(doc(para('abc')), doc(para('abcXY')));
expect(result.summary.inserted).toBe(2);
expect(result.summary.deleted).toBe(0);
// Exactly one insert change whose text equals the inserted substring.
const inserts = result.changes.filter((c) => c.op === 'insert');
expect(inserts).toHaveLength(1);
expect(inserts[0].text).toBe('XY');
// No deletions on a pure insertion.
expect(result.changes.filter((c) => c.op === 'delete')).toHaveLength(0);
});
it('counts a pure deletion ("abcXY" -> "abc") and captures the deleted substring', () => {
const result = diffDocs(doc(para('abcXY')), doc(para('abc')));
expect(result.summary.deleted).toBe(2);
expect(result.summary.inserted).toBe(0);
const deletes = result.changes.filter((c) => c.op === 'delete');
expect(deletes).toHaveLength(1);
expect(deletes[0].text).toBe('XY');
expect(result.changes.filter((c) => c.op === 'insert')).toHaveLength(0);
});
it('reports a word modification as a matched delete + insert with exact substrings', () => {
const result = diffDocs(doc(para('hello world')), doc(para('hello there')));
// "world" (5) removed, "there" (5) added.
expect(result.summary.inserted).toBe(5);
expect(result.summary.deleted).toBe(5);
const deletes = result.changes.filter((c) => c.op === 'delete');
const inserts = result.changes.filter((c) => c.op === 'insert');
expect(deletes.map((c) => c.text)).toContain('world');
expect(inserts.map((c) => c.text)).toContain('there');
});
it('handles two empty docs without error', () => {
const result = diffDocs({ type: 'doc', content: [] }, { type: 'doc', content: [] });
expect(result.changes).toHaveLength(0);
expect(result.summary).toEqual({ inserted: 0, deleted: 0, blocksChanged: 0 });
expect(result.markdown).toContain('(no textual changes)');
});
it('reports an insertion into an empty doc', () => {
const result = diffDocs({ type: 'doc', content: [] }, doc(para('brand new')));
expect(result.summary.inserted).toBeGreaterThan(0);
const inserts = result.changes.filter((c) => c.op === 'insert');
expect(inserts.length).toBeGreaterThan(0);
// The inserted text is the new paragraph's content.
expect(inserts.map((c) => c.text).join('')).toContain('brand new');
});
});
describe('integrity counting', () => {
it('counts images, tables and callouts as old -> new tuples', () => {
// old: 1 image, 1 callout, 1 table new: 2 images, 0 callouts, 1 table
const oldDoc = doc(image(), callout(), table());
const newDoc = doc(image(), image(), table());
const { integrity } = diffDocs(oldDoc, newDoc);
expect(integrity.images).toEqual([1, 2]);
expect(integrity.callouts).toEqual([1, 0]);
expect(integrity.tables).toEqual([1, 1]);
});
it('renders the integrity section verbatim in the markdown', () => {
const oldDoc = doc(image(), callout(), table());
const newDoc = doc(image(), image(), table());
const { markdown } = diffDocs(oldDoc, newDoc);
// The integrity block is our own formatting, so exact lines are asserted.
expect(markdown).toContain('## Integrity (old -> new)');
expect(markdown).toContain('- images: 1 -> 2');
expect(markdown).toContain('- callouts: 1 -> 0');
expect(markdown).toContain('- tables: 1 -> 1');
});
it('counts a single link split across two adjacent runs (shared href) as one link', () => {
// Two text runs, both bearing a link to the SAME href; one also bold.
const d = doc({
type: 'paragraph',
content: [
{ type: 'text', text: 'foo', marks: [{ type: 'link', attrs: { href: 'http://x' } }, { type: 'bold' }] },
{ type: 'text', text: 'bar', marks: [{ type: 'link', attrs: { href: 'http://x' } }] },
],
});
const { integrity } = diffDocs(d, d);
// Counting by unique href collapses the two runs into one link.
expect(integrity.links).toEqual([1, 1]);
});
it('counts distinct hrefs separately', () => {
const d = doc({
type: 'paragraph',
content: [
{ type: 'text', text: 'one', marks: [{ type: 'link', attrs: { href: 'http://a' } }] },
{ type: 'text', text: 'two', marks: [{ type: 'link', attrs: { href: 'http://b' } }] },
],
});
const { integrity } = diffDocs(d, d);
expect(integrity.links).toEqual([2, 2]);
});
it('counts a link mark with a missing href once (bucketed under "")', () => {
// Per source: a missing/empty href is collected under a single "" key, so a
// malformed link is still counted exactly once.
const d = linkPara('orphan', undefined);
const { integrity } = diffDocs(d, d);
expect(integrity.links).toEqual([1, 1]);
});
});
describe('footnoteMarkers', () => {
it('excludes markers after the default notes heading and preserves reading order', () => {
// Body has [1] then [2]; the [99] sits AFTER the notes heading and must be
// excluded from both old and new marker lists.
const d = doc(
para('intro [1] middle [2]'),
heading(DEFAULT_NOTES_HEADING),
para('[99] footnote body'),
);
const { integrity } = diffDocs(d, d);
expect(integrity.footnoteMarkers).toEqual([
[1, 2],
[1, 2],
]);
// Reading order: [1] precedes [2].
expect(integrity.footnoteMarkers[1]).toEqual([1, 2]);
});
it('honors a custom notesHeading argument', () => {
const d = doc(para('a [1]'), heading('Notes'), para('[5] excluded'));
const { integrity } = diffDocs(d, d, 'Notes');
// With the matching custom heading, [5] is excluded.
expect(integrity.footnoteMarkers).toEqual([[1], [1]]);
});
it('includes every marker when no notes heading is present', () => {
// No heading equals the notesHeading -> the whole doc is the body.
const d = doc(para('a [1] b [2]'), para('[3]'));
const { integrity } = diffDocs(d, d);
expect(integrity.footnoteMarkers).toEqual([
[1, 2, 3],
[1, 2, 3],
]);
});
it('renders the footnoteMarkers integrity line verbatim', () => {
const d = doc(para('x [1] y [2]'), heading(DEFAULT_NOTES_HEADING), para('[9]'));
const { markdown } = diffDocs(d, d);
expect(markdown).toContain('- footnoteMarkers: [1, 2] -> [1, 2]');
});
});
describe('coarse fallback', () => {
// An unknown node type makes Node.fromJSON reject the doc, which throws
// inside the precise pipeline and triggers the coarse block-level fallback.
// (Confirmed by running the module: `{ type: '___nope' }` is not in the
// schema, so parsing throws and `fellBack` becomes true.)
it('degrades to a coarse block-level diff instead of throwing', () => {
const oldDoc = doc(para('keep this'), { type: '___nope' });
const newDoc = doc(para('keep this'), para('new block'));
// Must not throw.
const result = diffDocs(oldDoc, newDoc);
// The fallback note appears in the markdown header area.
expect(result.markdown).toContain('precise diff failed; coarse block-level diff shown.');
// Only the genuinely new block is reported; the unchanged "keep this"
// block is not.
const inserts = result.changes.filter((c) => c.op === 'insert');
expect(inserts).toHaveLength(1);
expect(inserts[0].text).toBe('new block');
});
it('does not report whitespace-only blocks in the fallback path', () => {
// New doc adds a block whose plain text is only whitespace; coarseDiff
// skips blocks whose trimmed text is empty.
const oldDoc = doc({ type: '___nope' }, para('kept'));
const newDoc = doc(para('kept'), para(' '));
const result = diffDocs(oldDoc, newDoc);
// Fallback was taken (precise path threw on the unknown node).
expect(result.markdown).toContain('coarse block-level diff shown.');
// No change is reported: "kept" is unchanged and " " is whitespace-only.
expect(result.changes).toHaveLength(0);
expect(result.summary).toEqual({ inserted: 0, deleted: 0, blocksChanged: 0 });
});
it('still computes integrity (images/tables/callouts/footnotes) in the coarse-fallback branch', () => {
// Regression guard: integrity is computed BEFORE the try/catch, so a
// pathological pair that forces the fallback must NOT zero the integrity
// counts. The unknown node forces the precise path to throw (fellBack).
const oldDoc = doc(image(), callout(), table(), para('a [1]'), { type: '___nope' });
const newDoc = doc(image(), image(), table(), para('b [2] [3]'));
const result = diffDocs(oldDoc, newDoc);
// The fallback was taken...
expect(result.markdown).toContain('coarse block-level diff shown.');
// ...yet every integrity tuple is the real count, not [0,0].
expect(result.integrity.images).toEqual([1, 2]);
expect(result.integrity.callouts).toEqual([1, 0]);
expect(result.integrity.tables).toEqual([1, 1]);
// Footnote markers are counted from both docs even under the fallback.
expect(result.integrity.footnoteMarkers).toEqual([[1], [2, 3]]);
});
it('reports both a deletion and an insertion in the fallback path', () => {
const oldDoc = doc(para('old paragraph'), { type: '___nope' });
const newDoc = doc(para('new paragraph'));
const result = diffDocs(oldDoc, newDoc);
expect(result.markdown).toContain('coarse block-level diff shown.');
const deletes = result.changes.filter((c) => c.op === 'delete');
const inserts = result.changes.filter((c) => c.op === 'insert');
// "old paragraph" no longer present -> deletion; "new paragraph" -> insertion.
expect(deletes.map((c) => c.text)).toContain('old paragraph');
expect(inserts.map((c) => c.text)).toContain('new paragraph');
// Character counts accumulate from the reported texts.
expect(result.summary.deleted).toBe('old paragraph'.length);
expect(result.summary.inserted).toBe('new paragraph'.length);
});
});
describe('blockContextAt (DiffChange.block)', () => {
it('truncates a >80-char block context with an ellipsis and keeps it non-empty', () => {
// A 100-char paragraph with a one-char edit; the block context guards a
// swallowed catch and must produce a truncated, non-empty string.
const longText = 'X'.repeat(100);
const result = diffDocs(doc(para(longText)), doc(para(longText + 'Z')));
const inserts = result.changes.filter((c) => c.op === 'insert');
expect(inserts).toHaveLength(1);
const block = inserts[0].block;
expect(block.length).toBeGreaterThan(0);
// Truncation rule: 77 chars + "..." = length 80, ending with "...".
expect(block.endsWith('...')).toBe(true);
expect(block).toHaveLength(80);
});
it('keeps a short block context untruncated', () => {
const result = diffDocs(doc(para('abc')), doc(para('abcXY')));
const inserts = result.changes.filter((c) => c.op === 'insert');
expect(inserts[0].block).toBe('abcXY');
expect(inserts[0].block.endsWith('...')).toBe(false);
});
it('dedups blocksChanged by op + block context (multiple edits in one block count once per op)', () => {
// Two separate word edits inside a single paragraph produce 4 changes
// (2 deletes + 2 inserts) but only 2 distinct block keys:
// "d:the quick brown fox" and "i:the slow brown wolf".
const result = diffDocs(
doc(para('the quick brown fox')),
doc(para('the slow brown wolf')),
);
expect(result.changes.length).toBe(4);
expect(result.summary.blocksChanged).toBe(2);
});
it('counts one block key per op for edits spread across two blocks', () => {
// Edits in two different paragraphs -> 4 distinct block keys.
const result = diffDocs(
doc(para('first line here'), para('second line here')),
doc(para('first line HERE'), para('second line HERE')),
);
expect(result.summary.blocksChanged).toBe(4);
});
});
describe('markdown rendering', () => {
it('puts the summary counts in the markdown header', () => {
const result = diffDocs(doc(para('abc')), doc(para('abcXY')));
expect(result.markdown).toContain(
'# Diff: 2 inserted / 0 deleted (1 blocks changed)',
);
});
it('renders each change with its op sign (loose membership, library-controlled order)', () => {
const result = diffDocs(doc(para('hello world')), doc(para('hello there')));
// The Changes section is ordered by the diff library; assert membership,
// not an exact ordered string. Scope to lines AFTER the "## Changes"
// heading, since integrity lines also begin with "- ".
const lines = result.markdown.split('\n');
const changesIdx = lines.indexOf('## Changes');
expect(changesIdx).toBeGreaterThanOrEqual(0);
const changeLines = lines
.slice(changesIdx + 1)
.filter((l) => l.startsWith('+ ') || l.startsWith('- '));
expect(changeLines.some((l) => l.startsWith('- ') && l.includes('world'))).toBe(true);
expect(changeLines.some((l) => l.startsWith('+ ') && l.includes('there'))).toBe(true);
// One delete line and one insert line.
expect(changeLines.filter((l) => l.startsWith('- '))).toHaveLength(1);
expect(changeLines.filter((l) => l.startsWith('+ '))).toHaveLength(1);
});
});
});

View File

@@ -0,0 +1,36 @@
{
"type": "doc",
"content": [
{
"type": "heading",
"attrs": { "level": 1 },
"content": [{ "type": "text", "text": "Level one heading" }]
},
{
"type": "paragraph",
"content": [{ "type": "text", "text": "A plain paragraph of text." }]
},
{
"type": "heading",
"attrs": { "level": 2 },
"content": [{ "type": "text", "text": "Level two heading" }]
},
{
"type": "paragraph",
"content": [
{ "type": "text", "text": "First line of a paragraph" },
{ "type": "hardBreak" },
{ "type": "text", "text": "second line after a hard break." }
]
},
{
"type": "heading",
"attrs": { "level": 3 },
"content": [{ "type": "text", "text": "Level three heading" }]
},
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Closing paragraph." }]
}
]
}

View File

@@ -0,0 +1,62 @@
{
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{ "type": "text", "marks": [{ "type": "bold" }], "text": "bold" },
{ "type": "text", "text": " " },
{ "type": "text", "marks": [{ "type": "italic" }], "text": "italic" },
{ "type": "text", "text": " " },
{ "type": "text", "marks": [{ "type": "code" }], "text": "code" },
{ "type": "text", "text": " " },
{ "type": "text", "marks": [{ "type": "strike" }], "text": "strike" }
]
},
{
"type": "paragraph",
"content": [
{
"type": "text",
"marks": [
{
"type": "link",
"attrs": {
"href": "https://example.com/page"
}
}
],
"text": "a link"
},
{ "type": "text", "text": ", " },
{
"type": "text",
"marks": [{ "type": "highlight" }],
"text": "highlighted"
},
{ "type": "text", "text": ", base" },
{ "type": "text", "marks": [{ "type": "subscript" }], "text": "sub" },
{ "type": "text", "text": " and base" },
{ "type": "text", "marks": [{ "type": "superscript" }], "text": "sup" },
{ "type": "text", "text": "." }
]
},
{
"type": "paragraph",
"content": [
{ "type": "text", "text": "Here is a " },
{
"type": "text",
"marks": [
{
"type": "comment",
"attrs": { "commentId": "cmt-xyz789" }
}
],
"text": "commented anchor span"
},
{ "type": "text", "text": " that must survive (SPEC §3)." }
]
}
]
}

View File

@@ -0,0 +1,113 @@
{
"type": "doc",
"content": [
{
"type": "bulletList",
"content": [
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "First bullet" }]
}
]
},
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Second bullet with a nested list" }]
},
{
"type": "bulletList",
"content": [
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Nested bullet A" }]
}
]
},
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Nested bullet B" }]
}
]
}
]
}
]
}
]
},
{
"type": "orderedList",
"content": [
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "First ordered item" }]
}
]
},
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Second ordered item" }]
},
{
"type": "orderedList",
"content": [
{
"type": "listItem",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Nested ordered one" }]
}
]
}
]
}
]
}
]
},
{
"type": "taskList",
"content": [
{
"type": "taskItem",
"attrs": { "checked": true },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Done task" }]
}
]
},
{
"type": "taskItem",
"attrs": { "checked": false },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Pending task" }]
}
]
}
]
}
]
}

View File

@@ -0,0 +1,38 @@
{
"type": "doc",
"content": [
{
"type": "blockquote",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "A quoted line." }]
},
{
"type": "paragraph",
"content": [{ "type": "text", "text": "A second quoted paragraph." }]
}
]
},
{
"type": "horizontalRule"
},
{
"type": "codeBlock",
"attrs": { "language": "js" },
"content": [
{ "type": "text", "text": "const a = 1;\nconsole.log(a);\n" }
]
},
{
"type": "callout",
"attrs": { "type": "warning" },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "This is a warning callout." }]
}
]
}
]
}

View File

@@ -0,0 +1,85 @@
{
"type": "doc",
"content": [
{
"type": "table",
"content": [
{
"type": "tableRow",
"content": [
{
"type": "tableHeader",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Name" }]
}
]
},
{
"type": "tableHeader",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Value" }]
}
]
}
]
},
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "alpha" }]
}
]
},
{
"type": "tableCell",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "1" }]
}
]
}
]
},
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "beta" }]
}
]
},
{
"type": "tableCell",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "2" }]
}
]
}
]
}
]
}
]
}

View File

@@ -0,0 +1,17 @@
{
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "A drawio and an excalidraw diagram follow." }]
},
{
"type": "drawio",
"attrs": { "src": "/api/files/def/flow.drawio", "align": "center", "attachmentId": "att-1" }
},
{
"type": "excalidraw",
"attrs": { "src": "/api/files/ghi/sketch.excalidraw", "align": "center", "attachmentId": "att-2" }
}
]
}

View File

@@ -0,0 +1,35 @@
{
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{ "type": "text", "text": "Some " },
{
"type": "text",
"marks": [{ "type": "textStyle", "attrs": { "color": "#ff0000" } }],
"text": "red colored"
},
{ "type": "text", "text": " text." }
]
},
{
"type": "paragraph",
"content": [
{ "type": "text", "text": "Ping " },
{
"type": "mention",
"attrs": {
"id": "m-1",
"label": "Alice",
"entityType": "user",
"entityId": "u-1",
"slugId": "s-1",
"creatorId": "c-1"
}
},
{ "type": "text", "text": " please." }
]
}
]
}

View File

@@ -0,0 +1,15 @@
{
"type": "doc",
"content": [
{
"type": "details",
"attrs": { "open": false },
"content": [
{ "type": "detailsSummary", "content": [{ "type": "text", "text": "Click to expand" }] },
{ "type": "detailsContent", "content": [
{ "type": "paragraph", "content": [{ "type": "text", "text": "Hidden body paragraph." }] }
]}
]
}
]
}

View File

@@ -0,0 +1,17 @@
{
"type": "doc",
"content": [
{
"type": "columns",
"attrs": { "layout": "two", "widthMode": "normal" },
"content": [
{ "type": "column", "attrs": { "width": 50 }, "content": [
{ "type": "paragraph", "content": [{ "type": "text", "text": "Left column." }] }
]},
{ "type": "column", "attrs": { "width": 50 }, "content": [
{ "type": "paragraph", "content": [{ "type": "text", "text": "Right column." }] }
]}
]
}
]
}

View File

@@ -0,0 +1,13 @@
{
"type": "doc",
"content": [
{
"type": "heading",
"attrs": { "level": 2 },
"content": [
{ "type": "text", "text": "Notes for " },
{ "type": "mention", "attrs": { "id": "m-2", "label": "Bob", "entityType": "user", "entityId": "u-2", "slugId": "s-2", "creatorId": "c-2" } }
]
}
]
}

View File

@@ -0,0 +1,21 @@
{
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "An image followed by two diagrams." }]
},
{
"type": "image",
"attrs": { "src": "/api/files/abc/diagram.png", "alt": "A picture" }
},
{
"type": "drawio",
"attrs": { "src": "/api/files/def/flow.drawio", "attachmentId": "att-1" }
},
{
"type": "excalidraw",
"attrs": { "src": "/api/files/ghi/sketch.excalidraw", "attachmentId": "att-2" }
}
]
}

View File

@@ -0,0 +1,151 @@
{
"type": "doc",
"content": [
{
"type": "heading",
"attrs": { "level": 1, "id": "h-1" },
"content": [{ "type": "text", "text": "Round-trip sample" }]
},
{
"type": "paragraph",
"attrs": { "id": "p-1" },
"content": [
{ "type": "text", "text": "This paragraph has " },
{ "type": "text", "marks": [{ "type": "bold" }], "text": "bold" },
{ "type": "text", "text": ", " },
{ "type": "text", "marks": [{ "type": "italic" }], "text": "italic" },
{ "type": "text", "text": " and a " },
{
"type": "text",
"marks": [
{
"type": "link",
"attrs": {
"href": "https://example.com"
}
}
],
"text": "link"
},
{ "type": "text", "text": "." }
]
},
{
"type": "paragraph",
"attrs": { "id": "p-2" },
"content": [
{ "type": "text", "text": "Here is a " },
{
"type": "text",
"marks": [
{ "type": "comment", "attrs": { "commentId": "cmt-abc123", "resolved": false } }
],
"text": "commented span"
},
{ "type": "text", "text": " that must survive the round-trip." }
]
},
{
"type": "bulletList",
"attrs": { "id": "ul-1" },
"content": [
{
"type": "listItem",
"attrs": { "id": "li-1" },
"content": [
{
"type": "paragraph",
"attrs": { "id": "p-3" },
"content": [{ "type": "text", "text": "First bullet" }]
}
]
},
{
"type": "listItem",
"attrs": { "id": "li-2" },
"content": [
{
"type": "paragraph",
"attrs": { "id": "p-4" },
"content": [{ "type": "text", "text": "Second bullet" }]
}
]
}
]
},
{
"type": "table",
"attrs": { "id": "tbl-1" },
"content": [
{
"type": "tableRow",
"content": [
{
"type": "tableHeader",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Name" }]
}
]
},
{
"type": "tableHeader",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "Value" }]
}
]
}
]
},
{
"type": "tableRow",
"content": [
{
"type": "tableCell",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "alpha" }]
}
]
},
{
"type": "tableCell",
"attrs": { "colspan": 1, "rowspan": 1 },
"content": [
{
"type": "paragraph",
"content": [{ "type": "text", "text": "1" }]
}
]
}
]
}
]
},
{
"type": "callout",
"attrs": { "type": "info", "id": "callout-1" },
"content": [
{
"type": "paragraph",
"attrs": { "id": "p-5" },
"content": [{ "type": "text", "text": "This is an info callout." }]
}
]
},
{
"type": "codeBlock",
"attrs": { "language": "js", "id": "code-1" },
"content": [
{ "type": "text", "text": "const a = 1;\nconsole.log(a);\n" }
]
}
]
}

View File

@@ -0,0 +1,144 @@
import { describe, expect, it } from 'vitest';
import { buildVaultLayout, type PageNode } from '../src/engine/layout.js';
describe('buildVaultLayout', () => {
it('disambiguates two siblings with the same sanitized title via ~slugId', () => {
const pages: PageNode[] = [
{ id: 'p1', title: 'Notes', slugId: 'slug-a', parentPageId: null },
{ id: 'p2', title: 'Notes', slugId: 'slug-b', parentPageId: null },
];
const layout = buildVaultLayout(pages);
expect(layout.get('p1')).toEqual({ segments: [], stem: 'Notes' });
expect(layout.get('p2')).toEqual({ segments: [], stem: 'Notes ~slug-b' });
});
it('falls back to ~id when a colliding sibling has no slugId', () => {
const pages: PageNode[] = [
{ id: 'p1', title: 'Notes', parentPageId: null },
{ id: 'p2', title: 'Notes', parentPageId: null },
];
const layout = buildVaultLayout(pages);
expect(layout.get('p1')?.stem).toBe('Notes');
expect(layout.get('p2')?.stem).toBe('Notes ~p2');
});
it('does NOT collide identical titles under DIFFERENT parents (distinct segments)', () => {
const pages: PageNode[] = [
{ id: 'a', title: 'Alpha', parentPageId: null },
{ id: 'b', title: 'Beta', parentPageId: null },
{ id: 'a1', title: 'Notes', parentPageId: 'a' },
{ id: 'b1', title: 'Notes', parentPageId: 'b' },
];
const layout = buildVaultLayout(pages);
// Same stem, but different folder segments => no disambiguation needed.
expect(layout.get('a1')).toEqual({ segments: ['Alpha'], stem: 'Notes' });
expect(layout.get('b1')).toEqual({ segments: ['Beta'], stem: 'Notes' });
});
it('terminates on a 2-node parent cycle and yields a finite result', () => {
const pages: PageNode[] = [
{ id: 'a', title: 'A', parentPageId: 'b' },
{ id: 'b', title: 'B', parentPageId: 'a' },
];
const layout = buildVaultLayout(pages);
// Both resolve to a finite path; the visited-guard breaks the cycle.
expect(layout.size).toBe(2);
const a = layout.get('a');
const b = layout.get('b');
expect(a).toBeDefined();
expect(b).toBeDefined();
// Each node's segment chain is bounded (no infinite walk).
expect(a!.segments.length).toBeLessThanOrEqual(2);
expect(b!.segments.length).toBeLessThanOrEqual(2);
});
it('maps a root page (parentPageId null) to empty segments', () => {
const pages: PageNode[] = [{ id: 'root', title: 'Home', parentPageId: null }];
const layout = buildVaultLayout(pages);
expect(layout.get('root')).toEqual({ segments: [], stem: 'Home' });
});
it('emits ancestors in root->leaf order for a deep chain', () => {
const pages: PageNode[] = [
{ id: 'g', title: 'Grand', parentPageId: null },
{ id: 'p', title: 'Parent', parentPageId: 'g' },
{ id: 'c', title: 'Child', parentPageId: 'p' },
];
const layout = buildVaultLayout(pages);
expect(layout.get('c')).toEqual({
segments: ['Grand', 'Parent'],
stem: 'Child',
});
});
it('disambiguates two orphan-parent pages with the same title at the path level', () => {
// Both parents are OUTSIDE the input set, so both pages bucket at the root
// with segments: []. Sibling-scoping cannot see this (different parentKeys),
// so the final full-path pass must produce DISTINCT paths.
const pages: PageNode[] = [
{ id: 'x', title: 'Orphan', slugId: 'sx', parentPageId: 'missing-1' },
{ id: 'y', title: 'Orphan', slugId: 'sy', parentPageId: 'missing-2' },
];
const layout = buildVaultLayout(pages);
const ex = layout.get('x')!;
const ey = layout.get('y')!;
const pathOf = (e: { segments: string[]; stem: string }) =>
[...e.segments, e.stem].join('/');
expect(pathOf(ex)).not.toBe(pathOf(ey));
// The first keeps the plain stem; the later one is re-stemmed.
expect(ex.stem).toBe('Orphan');
expect(ey.stem).toBe('Orphan ~sy');
});
it('sanitizes a slugId containing a path separator before using it as a suffix', () => {
// A crafted slugId with "/" must NOT leak a path separator into the stem.
const pages: PageNode[] = [
{ id: 'p1', title: 'Notes', slugId: 'a/b', parentPageId: null },
{ id: 'p2', title: 'Notes', slugId: 'c/d', parentPageId: null },
];
const layout = buildVaultLayout(pages);
const stem = layout.get('p2')!.stem;
expect(stem).not.toContain('/');
expect(stem).not.toContain('\\');
// The "/" was replaced by sanitizeTitle's dash substitution.
expect(stem).toBe('Notes ~c-d');
});
it('disambiguates two ORPHAN ancestors at the NAME pass so their children stay in sync', () => {
// Two orphan PARENTS share the same title but live under DIFFERENT missing
// parents, so sibling-scoping by raw parentPageId would never compare them.
// Both bucket at the vault root, so they MUST be disambiguated in the name
// pass (sharing the "__root__" bucket) BEFORE any child folder segment is
// computed from the parent name — otherwise re-stemming a parent post-hoc
// would desync its child's folder from the parent file.
const pages: PageNode[] = [
{ id: 'p1', title: 'Dup', slugId: 's1', parentPageId: 'missing-1' },
{ id: 'p2', title: 'Dup', slugId: 's2', parentPageId: 'missing-2' },
{ id: 'c1', title: 'Child', parentPageId: 'p1' },
{ id: 'c2', title: 'Child', parentPageId: 'p2' },
];
const layout = buildVaultLayout(pages);
const p1 = layout.get('p1')!;
const p2 = layout.get('p2')!;
const c1 = layout.get('c1')!;
const c2 = layout.get('c2')!;
// The two orphan parents get DISTINCT stems, both at the root.
expect(p1.segments).toEqual([]);
expect(p2.segments).toEqual([]);
expect(p1.stem).toBe('Dup');
expect(p2.stem).toBe('Dup ~s2');
expect(p1.stem).not.toBe(p2.stem);
// Each child's folder segment EXACTLY equals its parent's resolved stem
// (no desync): the parent name is final before segments are built.
expect(c1.segments).toEqual([p1.stem]);
expect(c2.segments).toEqual([p2.stem]);
// All four full paths are unique.
const pathOf = (e: { segments: string[]; stem: string }) =>
[...e.segments, e.stem].join('/');
const paths = [p1, p2, c1, c2].map(pathOf);
expect(new Set(paths).size).toBe(paths.length);
});
});

View File

@@ -0,0 +1,41 @@
import { describe, expect, it } from 'vitest';
import { createHash } from 'node:crypto';
import { bodyHash } from '../src/engine/loop-guard.js';
// Loop-guard body hash (SPEC §10 "хэш тела"). The hash is the signal a future
// pull-side poll-suppression uses to recognize our OWN write. It MUST be
// deterministic (same input -> same hash) and discriminating (different input ->
// different hash).
describe('bodyHash (pure, SPEC §10)', () => {
it('is deterministic — same input yields the same hash', () => {
const body = '# Title\n\nsome body with <span data-comment-id="x">mark</span>\n';
expect(bodyHash(body)).toBe(bodyHash(body));
});
it('differs for different input', () => {
expect(bodyHash('alpha')).not.toBe(bodyHash('beta'));
// Even a one-character difference produces a different digest.
expect(bodyHash('alpha')).not.toBe(bodyHash('alphb'));
});
it('returns lowercase sha256 hex (64 chars)', () => {
const h = bodyHash('hello');
expect(h).toMatch(/^[0-9a-f]{64}$/);
// Matches an independent sha256 of the same UTF-8 bytes.
expect(h).toBe(createHash('sha256').update('hello', 'utf8').digest('hex'));
});
it('hashes the empty string to the well-known sha256 empty digest', () => {
expect(bodyHash('')).toBe(
'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855',
);
});
it('is sensitive to UTF-8 content (Cyrillic body)', () => {
expect(bodyHash('Колонка')).not.toBe(bodyHash('Колонкa'));
expect(bodyHash('Колонка')).toBe(
createHash('sha256').update('Колонка', 'utf8').digest('hex'),
);
});
});

View File

@@ -0,0 +1,227 @@
import { describe, expect, it } from 'vitest';
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
// collaboration.ts and mutates global DOM at import time).
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
// markdown-converter.ts is the weakest pure module (report §2). These golden
// tests close the gaps the base markdown-converter.test.ts leaves open:
// columns/column wrapper, embed/audio/pdf (used to emit nothing), drawio/
// excalidraw data-align presence rule, the remaining inline-mark matrix,
// paragraph.textAlign, subpages + unknown-in-container fallback, escaping
// idempotence, table-cell pipe/newline sanitization, and empty/single-column
// tables. Cases already asserted in the base file are NOT repeated.
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
const c = (node: any) => convertProseMirrorToMarkdown(doc(node));
const text = (t: string, marks?: any[]) =>
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
describe('columns / column (raw-HTML layout wrapper)', () => {
it('wraps a multi-column layout as nested data-type divs with the children inside (regression: children unwrapped)', () => {
const out = c({
type: 'columns',
attrs: { layout: 'two' },
content: [
{ type: 'column', attrs: { width: 50 }, content: [para(text('L'))] },
{ type: 'column', content: [para(text('R'))] },
],
});
expect(out).toBe(
'<div data-type="columns" data-layout="two">' +
'<div data-type="column" data-width="50"><p>L</p></div>' +
'<div data-type="column"><p>R</p></div>' +
'</div>',
);
});
it('omits the default widthMode "normal" but emits a non-default one', () => {
const normal = c({
type: 'columns',
attrs: { layout: 'two', widthMode: 'normal' },
content: [{ type: 'column', content: [para(text('x'))] }],
});
expect(normal).not.toContain('data-width-mode');
const wide = c({
type: 'columns',
attrs: { layout: 'two', widthMode: 'full' },
content: [{ type: 'column', content: [para(text('x'))] }],
});
expect(wide).toContain('data-width-mode="full"');
});
});
describe('embed / audio / pdf (previously emitted nothing — invisible regression)', () => {
it('embed emits div[data-type="embed"] with src/provider', () => {
expect(c({ type: 'embed', attrs: { src: 'https://x.com/e', provider: 'iframe' } })).toBe(
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe"></div>',
);
});
it('audio emits a div-wrapped <audio> with src', () => {
expect(c({ type: 'audio', attrs: { src: '/a.mp3' } })).toBe(
'<div><audio src="/a.mp3"></audio></div>',
);
});
it('pdf emits div[data-type="pdf"] with src and name', () => {
expect(c({ type: 'pdf', attrs: { src: '/d.pdf', name: 'd.pdf' } })).toBe(
'<div data-type="pdf" src="/d.pdf" data-name="d.pdf"></div>',
);
});
});
describe('drawio / excalidraw data-align asymmetry (SPEC §11)', () => {
it('drawio: data-align is ABSENT when align is unset', () => {
const out = c({ type: 'drawio', attrs: { src: '/d.drawio' } });
expect(out).toBe('<div data-type="drawio" data-src="/d.drawio"></div>');
expect(out).not.toContain('data-align');
});
it('drawio: data-align is PRESENT for a non-default align', () => {
expect(c({ type: 'drawio', attrs: { src: '/d.drawio', align: 'right' } })).toBe(
'<div data-type="drawio" data-src="/d.drawio" data-align="right"></div>',
);
});
it('excalidraw: data-align is ABSENT when align is unset', () => {
const out = c({ type: 'excalidraw', attrs: { src: '/e.excalidraw' } });
expect(out).toBe('<div data-type="excalidraw" data-src="/e.excalidraw"></div>');
expect(out).not.toContain('data-align');
});
});
describe('inline-mark matrix (underline/sub/sup/highlight±color/textStyle/comment)', () => {
it('emits the schema HTML for each remaining inline mark in one matrix', () => {
const cases: [any[], string][] = [
[[{ type: 'underline' }], '<u>m</u>'],
[[{ type: 'subscript' }], '<sub>m</sub>'],
[[{ type: 'superscript' }], '<sup>m</sup>'],
[[{ type: 'highlight' }], '<mark>m</mark>'],
[
[{ type: 'highlight', attrs: { color: '#ff0000' } }],
'<mark style="background-color: #ff0000">m</mark>',
],
[
[{ type: 'textStyle', attrs: { color: '#00ff00' } }],
'<span style="color: #00ff00">m</span>',
],
[
[{ type: 'comment', attrs: { commentId: 'cid-1' } }],
'<span data-comment-id="cid-1">m</span>',
],
[
[{ type: 'comment', attrs: { commentId: 'cid-1', resolved: true } }],
'<span data-comment-id="cid-1" data-resolved="true">m</span>',
],
];
for (const [marks, expected] of cases) {
expect(c(para(text('m', marks)))).toBe(expected);
}
});
it('a textStyle mark with no color emits nothing (plain text passes through)', () => {
expect(c(para(text('plain', [{ type: 'textStyle', attrs: {} }])))).toBe('plain');
});
it('a comment mark with no commentId emits nothing (plain text)', () => {
expect(c(para(text('plain', [{ type: 'comment', attrs: {} }])))).toBe('plain');
});
});
describe('paragraph.textAlign -> <div align>', () => {
it('non-default alignment wraps the paragraph in <div align="...">', () => {
expect(c({ type: 'paragraph', attrs: { textAlign: 'center' }, content: [text('x')] })).toBe(
'<div align="center">x</div>',
);
});
it('textAlign "left" (the default) is NOT wrapped', () => {
expect(c({ type: 'paragraph', attrs: { textAlign: 'left' }, content: [text('x')] })).toBe('x');
});
});
describe('subpages token + unknown-in-container fallback', () => {
it('subpages emits the {{SUBPAGES}} placeholder token', () => {
expect(c({ type: 'subpages' })).toBe('{{SUBPAGES}}');
});
it('an unknown block inside a raw-HTML container is wrapped in <div> (never markdown)', () => {
// Inside columns the children are rendered as HTML; an unknown block type
// must NOT fall back to markdown (which would land as literal text on
// re-import). It is wrapped in a <div> so its children survive.
const out = c({
type: 'columns',
attrs: { layout: 'two' },
content: [
{ type: 'column', content: [{ type: 'weirdBlock', content: [para(text('kept'))] }] },
],
});
expect(out).toBe(
'<div data-type="columns" data-layout="two">' +
'<div data-type="column"><div><p>kept</p></div></div>' +
'</div>',
);
});
it('an unknown TOP-LEVEL block falls back to its children only (markdown context)', () => {
expect(c({ type: 'totallyUnknown', content: [text('inner')] })).toBe('inner');
});
});
describe('escaping idempotence (SPEC §11 phantom-diff guard)', () => {
it('escapeAttr escapes ONLY & and " in an attribute context, and is idempotent', () => {
// The mathBlock `text` attr goes through escapeAttr. & -> &amp;, " -> &quot;.
const once = c({ type: 'mathBlock', attrs: { text: 'a & "b"' } });
expect(once).toBe(
'<div data-type="mathBlock" data-katex="true" text="a &amp; &quot;b&quot;"></div>',
);
// < and > are deliberately NOT escaped (would accumulate on round-trips).
const angled = c({ type: 'mathBlock', attrs: { text: 'a < b > c' } });
expect(angled).toContain('text="a < b > c"');
expect(angled).not.toContain('&lt;');
expect(angled).not.toContain('&gt;');
});
it('encodeMdUrl turns a space into %20 in an image src (single inert URL token)', () => {
expect(c({ type: 'image', attrs: { alt: 'c', src: '/my pic.png' } })).toBe(
'![c](/my%20pic.png)',
);
});
});
describe('table-cell sanitization (| and newline must not corrupt the GFM row)', () => {
it('escapes a literal pipe and collapses an inter-block newline in a cell', () => {
// A cell with a pipe in one paragraph and a second block paragraph: the pipe
// is escaped to \| and the block join (a space) keeps the row intact.
const out = c({
type: 'table',
content: [
{ type: 'tableRow', content: [
{ type: 'tableHeader', content: [para(text('H'))] },
]},
{ type: 'tableRow', content: [
{ type: 'tableCell', content: [para(text('a|b')), para(text('c'))] },
]},
],
});
expect(out).toBe('| H |\n| --- |\n| a\\|b c |');
});
});
describe('empty / single-column tables', () => {
it('a table with no rows renders as the empty string', () => {
expect(c({ type: 'table', content: [] })).toBe('');
});
it('a single-column GFM table emits one column with a "---" separator', () => {
const out = c({
type: 'table',
content: [
{ type: 'tableRow', content: [{ type: 'tableHeader', content: [para(text('Only'))] }] },
{ type: 'tableRow', content: [{ type: 'tableCell', content: [para(text('v'))] }] },
],
});
expect(out).toBe('| Only |\n| --- |\n| v |');
});
});

View File

@@ -0,0 +1,507 @@
import { describe, expect, it } from 'vitest';
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
// collaboration.ts and mutates global DOM at import time).
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
// Wrap a single node in a minimal ProseMirror doc. The top-level converter
// joins doc children with "\n\n" and then .trim()s the whole output, so a
// single-node doc yields exactly that node's rendered (and trimmed) string.
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
// Convenience: a text node, optionally with marks.
const text = (t: string, marks?: any[]) =>
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
// Convenience: a paragraph wrapping inline children.
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
describe('convertProseMirrorToMarkdown', () => {
// ---------------------------------------------------------------------------
describe('headings', () => {
it('emits the right number of "#" for levels 1-6', () => {
for (let level = 1; level <= 6; level++) {
const out = convertProseMirrorToMarkdown(
doc({ type: 'heading', attrs: { level }, content: [text('H')] }),
);
expect(out).toBe('#'.repeat(level) + ' H');
}
});
it('defaults to level 1 when level is missing', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'heading', content: [text('NoLevel')] }),
);
expect(out).toBe('# NoLevel');
});
});
// ---------------------------------------------------------------------------
describe('text marks', () => {
it('bold', () => {
expect(
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'bold' }])))),
).toBe('**x**');
});
it('italic', () => {
expect(
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'italic' }])))),
).toBe('*x*');
});
it('strike', () => {
expect(
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'strike' }])))),
).toBe('~~x~~');
});
it('inline code (sole mark) uses backtick span', () => {
expect(
convertProseMirrorToMarkdown(doc(para(text('x', [{ type: 'code' }])))),
).toBe('`x`');
});
it('code + another mark switches to nested HTML (no backtick form)', () => {
// marks array order drives nesting: bold first wraps, then code wraps that.
const out = convertProseMirrorToMarkdown(
doc(para(text('x', [{ type: 'bold' }, { type: 'code' }]))),
);
expect(out).toBe('<code><strong>x</strong></code>');
});
it('code + strike combo emits <code> wrapping <s>', () => {
const out = convertProseMirrorToMarkdown(
doc(para(text('x', [{ type: 'strike' }, { type: 'code' }]))),
);
expect(out).toBe('<code><s>x</s></code>');
});
});
// ---------------------------------------------------------------------------
describe('links', () => {
it('href only', () => {
const out = convertProseMirrorToMarkdown(
doc(para(text('site', [{ type: 'link', attrs: { href: 'https://e.com' } }]))),
);
expect(out).toBe('[site](https://e.com)');
});
it('href + title with an embedded double quote is escaped', () => {
const out = convertProseMirrorToMarkdown(
doc(
para(
text('site', [
{ type: 'link', attrs: { href: 'https://e.com', title: 'a "b" c' } },
]),
),
),
);
// The markdown link-title form escapes the inner " as \".
expect(out).toBe('[site](https://e.com "a \\"b\\" c")');
});
});
// ---------------------------------------------------------------------------
describe('image', () => {
it('percent-encodes spaces and parentheses in src', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'image',
attrs: { alt: 'cap', src: '/files/my pic (1).png' },
}),
);
// space -> %20, ( -> %28, ) -> %29
expect(out).toBe('![cap](/files/my%20pic%20%281%29.png)');
});
it('empty alt and missing src render harmlessly', () => {
const out = convertProseMirrorToMarkdown(doc({ type: 'image', attrs: {} }));
expect(out).toBe('![]()');
});
});
// ---------------------------------------------------------------------------
describe('codeBlock', () => {
it('with language', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'codeBlock',
attrs: { language: 'ts' },
content: [text('const a = 1;')],
}),
);
expect(out).toBe('```ts\nconst a = 1;\n```');
});
it('without language emits empty info string', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'codeBlock', content: [text('plain')] }),
);
expect(out).toBe('```\nplain\n```');
});
it('strips ALL trailing newlines for idempotency', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'codeBlock', content: [text('a\n\n\n')] }),
);
// Every trailing "\n" is removed, then exactly one is re-added by the fence.
expect(out).toBe('```\na\n```');
});
});
// ---------------------------------------------------------------------------
describe('lists', () => {
it('bullet list', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'bulletList',
content: [
{ type: 'listItem', content: [para(text('one'))] },
{ type: 'listItem', content: [para(text('two'))] },
],
}),
);
expect(out).toBe('- one\n- two');
});
it('ordered list numbers items sequentially', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'orderedList',
content: [
{ type: 'listItem', content: [para(text('a'))] },
{ type: 'listItem', content: [para(text('b'))] },
{ type: 'listItem', content: [para(text('c'))] },
],
}),
);
expect(out).toBe('1. a\n2. b\n3. c');
});
it('nested bullet list indents the child by the 2-col marker width', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'bulletList',
content: [
{
type: 'listItem',
content: [
para(text('parent')),
{
type: 'bulletList',
content: [{ type: 'listItem', content: [para(text('child'))] }],
},
],
},
],
}),
);
// First line carries the marker; the nested list is indented 2 columns.
expect(out).toBe('- parent\n - child');
});
it('nested ordered list indents by the wider 3-col marker width', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'orderedList',
content: [
{
type: 'listItem',
content: [
para(text('parent')),
{
type: 'orderedList',
content: [{ type: 'listItem', content: [para(text('child'))] }],
},
],
},
],
}),
);
// "1. " is 3 columns wide, so the continuation indent is 3 spaces.
expect(out).toBe('1. parent\n 1. child');
});
});
// ---------------------------------------------------------------------------
describe('task list', () => {
it('unchecked and checked items', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'taskList',
content: [
{ type: 'taskItem', attrs: { checked: false }, content: [para(text('todo'))] },
{ type: 'taskItem', attrs: { checked: true }, content: [para(text('done'))] },
],
}),
);
expect(out).toBe('- [ ] todo\n- [x] done');
});
it('empty task item keeps its marker', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'taskList',
content: [{ type: 'taskItem', attrs: { checked: false }, content: [] }],
}),
);
expect(out).toBe('- [ ]');
});
});
// ---------------------------------------------------------------------------
describe('blockquote', () => {
it('single paragraph quote prefixes the line', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'blockquote', content: [para(text('quoted'))] }),
);
expect(out).toBe('> quoted');
});
it('multi-paragraph quote separates blocks with a bare ">" line', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'blockquote',
content: [para(text('first')), para(text('second'))],
}),
);
expect(out).toBe('> first\n>\n> second');
});
});
// ---------------------------------------------------------------------------
describe('breaks and rules', () => {
it('horizontal rule', () => {
expect(
convertProseMirrorToMarkdown(doc({ type: 'horizontalRule' })),
).toBe('---');
});
it('hard break emits two trailing spaces then newline', () => {
const out = convertProseMirrorToMarkdown(
doc(para(text('a'), { type: 'hardBreak' }, text('b'))),
);
expect(out).toBe('a \nb');
});
});
// ---------------------------------------------------------------------------
describe('tables', () => {
it('GFM table emits alignment markers derived from header cells', () => {
const headerRow = {
type: 'tableRow',
content: [
{ type: 'tableHeader', attrs: { align: 'left' }, content: [para(text('L'))] },
{ type: 'tableHeader', attrs: { align: 'center' }, content: [para(text('C'))] },
{ type: 'tableHeader', attrs: { align: 'right' }, content: [para(text('R'))] },
{ type: 'tableHeader', content: [para(text('N'))] },
],
};
const bodyRow = {
type: 'tableRow',
content: [
{ type: 'tableCell', content: [para(text('1'))] },
{ type: 'tableCell', content: [para(text('2'))] },
{ type: 'tableCell', content: [para(text('3'))] },
{ type: 'tableCell', content: [para(text('4'))] },
],
};
const out = convertProseMirrorToMarkdown(
doc({ type: 'table', content: [headerRow, bodyRow] }),
);
expect(out).toBe(
[
'| L | C | R | N |',
'| :-- | :-: | --: | --- |',
'| 1 | 2 | 3 | 4 |',
].join('\n'),
);
});
it('spanned table (colspan/rowspan) emits raw <table> HTML', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'table',
content: [
{
type: 'tableRow',
content: [
{
type: 'tableHeader',
attrs: { colspan: 2 },
content: [para(text('wide'))],
},
],
},
{
type: 'tableRow',
content: [
{ type: 'tableCell', content: [para(text('a'))] },
{ type: 'tableCell', content: [para(text('b'))] },
],
},
],
}),
);
expect(out).toBe(
'<table><tbody>' +
'<tr><th colspan="2"><p>wide</p></th></tr>' +
'<tr><td><p>a</p></td><td><p>b</p></td></tr>' +
'</tbody></table>',
);
});
});
// ---------------------------------------------------------------------------
describe('callout and details', () => {
it('callout uses lowercased type fence', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'callout',
attrs: { type: 'WARNING' },
content: [para(text('beware'))],
}),
);
expect(out).toBe(':::warning\nbeware\n:::');
});
it('callout defaults to info', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'callout', content: [para(text('hi'))] }),
);
expect(out).toBe(':::info\nhi\n:::');
});
it('details emits summary + content wrapped in <details>', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'details',
content: [
{ type: 'detailsSummary', content: [text('Title')] },
{ type: 'detailsContent', content: [para(text('Body'))] },
],
}),
);
// details joins its children with "\n"; summary opens, content closes.
expect(out).toBe('<details>\n<summary>Title</summary>\n\nBody\n</details>');
});
});
// ---------------------------------------------------------------------------
describe('math', () => {
it('inline math carries LaTeX in a text attr WITHOUT escaping < or >', () => {
const out = convertProseMirrorToMarkdown(
doc(para({ type: 'mathInline', attrs: { text: 'a < b' } })),
);
// < and > must NOT be HTML-escaped (idempotency); only & and " would be.
expect(out).toBe(
'<span data-type="mathInline" data-katex="true" text="a < b"></span>',
);
expect(out).not.toContain('&lt;');
});
it('block math carries LaTeX in a text attr WITHOUT escaping < or >', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'mathBlock', attrs: { text: 'x > y & z' } }),
);
// & IS escaped (entity-significant), but < and > are NOT.
expect(out).toBe(
'<div data-type="mathBlock" data-katex="true" text="x > y &amp; z"></div>',
);
expect(out).not.toContain('&lt;');
expect(out).not.toContain('&gt;');
});
});
// ---------------------------------------------------------------------------
describe('inline atoms and media', () => {
it('mention emits schema span with data-* attrs and visible label', () => {
const out = convertProseMirrorToMarkdown(
doc(
para({
type: 'mention',
attrs: { id: 'u1', label: 'Alice', entityType: 'user' },
}),
),
);
expect(out).toBe(
'<span data-type="mention" data-id="u1" data-label="Alice" data-entity-type="user">@Alice</span>',
);
});
it('attachment emits div with schema data-attachment-* attrs', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'attachment',
attrs: { url: '/files/x.zip', name: 'x.zip', mime: 'application/zip', size: 99 },
}),
);
expect(out).toBe(
'<div data-type="attachment" data-attachment-url="/files/x.zip" ' +
'data-attachment-name="x.zip" data-attachment-mime="application/zip" ' +
'data-attachment-size="99"></div>',
);
});
it('video emits a <div>-wrapped <video> with schema attrs', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'video',
attrs: { src: '/v.mp4', alt: 'clip', width: 640 },
}),
);
expect(out).toBe(
'<div><video src="/v.mp4" aria-label="clip" width="640"></video></div>',
);
});
it('youtube emits a div[data-type="youtube"] with data-src', () => {
const out = convertProseMirrorToMarkdown(
doc({
type: 'youtube',
attrs: { src: 'https://youtu.be/abc', width: 560, height: 315 },
}),
);
expect(out).toBe(
'<div data-type="youtube" data-src="https://youtu.be/abc" ' +
'data-width="560" data-height="315"></div>',
);
});
});
// ---------------------------------------------------------------------------
describe('edge cases', () => {
it('null content returns ""', () => {
expect(convertProseMirrorToMarkdown(null)).toBe('');
});
it('empty object returns ""', () => {
expect(convertProseMirrorToMarkdown({})).toBe('');
});
it('doc with no content returns ""', () => {
expect(convertProseMirrorToMarkdown({ type: 'doc' })).toBe('');
});
it('unknown node type falls back to children-only (no throw, text preserved)', () => {
const out = convertProseMirrorToMarkdown(
doc({ type: 'totallyUnknownType', content: [text('kept')] }),
);
expect(out).toBe('kept');
});
it('deeply nested structure does not stack-overflow', () => {
// Build a deeply nested bullet list (each level holds one nested list).
let node: any = { type: 'listItem', content: [para(text('leaf'))] };
for (let i = 0; i < 200; i++) {
node = {
type: 'listItem',
content: [para(text('lvl')), { type: 'bulletList', content: [node] }],
};
}
const root = doc({ type: 'bulletList', content: [node] });
expect(() => convertProseMirrorToMarkdown(root)).not.toThrow();
const out = convertProseMirrorToMarkdown(root);
expect(out).toContain('leaf');
expect(out.startsWith('- lvl')).toBe(true);
});
});
});

View File

@@ -0,0 +1,218 @@
import { describe, expect, it } from 'vitest';
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
// collaboration.ts and mutates global DOM at import time).
import {
serializeDocmostMarkdown,
parseDocmostMarkdown,
serializeDocmostMarkdownBody,
type DocmostMdMeta,
} from '../src/lib/markdown-document.js';
const meta: DocmostMdMeta = {
version: 1,
pageId: 'p1',
slugId: 's1',
title: 'Hello',
spaceId: 'sp1',
parentPageId: null,
};
describe('serializeDocmostMarkdown / parseDocmostMarkdown', () => {
// ---------------------------------------------------------------------------
describe('round-trip', () => {
it('round-trips meta, body, and comments', () => {
const body = '# Title\n\nSome **body** text.';
const comments = [{ id: 'c1', text: 'a note' }];
const full = serializeDocmostMarkdown(meta, body, comments);
const parsed = parseDocmostMarkdown(full);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe(body);
expect(parsed.comments).toEqual(comments);
});
it('emits a comments block with [] even when there are no comments', () => {
const full = serializeDocmostMarkdown(meta, 'body', []);
expect(full).toContain('<!-- docmost:comments\n[]\n-->');
const parsed = parseDocmostMarkdown(full);
expect(parsed.comments).toEqual([]);
expect(parsed.body).toBe('body');
});
it('non-array comments arg is normalized to [] in the serialized output', () => {
const full = serializeDocmostMarkdown(meta, 'body', null as any);
expect(full).toContain('<!-- docmost:comments\n[]\n-->');
});
it('trims surrounding whitespace from the body on serialize', () => {
const full = serializeDocmostMarkdown(meta, '\n\n body \n\n', []);
const parsed = parseDocmostMarkdown(full);
expect(parsed.body).toBe('body');
});
});
// ---------------------------------------------------------------------------
describe('missing blocks (tolerant parsing)', () => {
it('missing meta block yields meta:null', () => {
const input = 'Just a body.\n\n<!-- docmost:comments\n[]\n-->\n';
const parsed = parseDocmostMarkdown(input);
expect(parsed.meta).toBeNull();
expect(parsed.body).toBe('Just a body.');
expect(parsed.comments).toEqual([]);
});
it('missing comments block yields comments:null and treats all as body', () => {
const input =
'<!-- docmost:meta\n' + JSON.stringify(meta) + '\n-->\n\nbody only';
const parsed = parseDocmostMarkdown(input);
expect(parsed.meta).toEqual(meta);
expect(parsed.comments).toBeNull();
expect(parsed.body).toBe('body only');
});
it('plain markdown with neither block: meta and comments null, whole input is body', () => {
const input = '# Plain\n\nNo envelope here.';
const parsed = parseDocmostMarkdown(input);
expect(parsed.meta).toBeNull();
expect(parsed.comments).toBeNull();
expect(parsed.body).toBe(input);
});
});
// ---------------------------------------------------------------------------
describe('CRLF normalization', () => {
it('parses a CRLF-encoded document the same as LF', () => {
const lf = serializeDocmostMarkdown(meta, 'line one\nline two', [
{ id: 'c1' },
]);
const crlf = lf.replace(/\n/g, '\r\n');
const parsed = parseDocmostMarkdown(crlf);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe('line one\nline two');
expect(parsed.comments).toEqual([{ id: 'c1' }]);
});
});
// ---------------------------------------------------------------------------
describe('only the final document-ending comments block is captured', () => {
it('an earlier literal docmost:comments opener inside the body stays in the body', () => {
// The body documents the format and contains a literal opener that does
// NOT end the document. Only the trailing block is treated as metadata.
const bodyWithLiteral =
'Here is how the format looks:\n\n<!-- docmost:comments\n[{"fake":true}]\n-->\n\nand more prose after it.';
const full = serializeDocmostMarkdown(meta, bodyWithLiteral, [
{ id: 'real' },
]);
const parsed = parseDocmostMarkdown(full);
// The real (final) block parses into the comments...
expect(parsed.comments).toEqual([{ id: 'real' }]);
// ...and the earlier literal opener is preserved verbatim in the body.
expect(parsed.body).toContain(
'<!-- docmost:comments\n[{"fake":true}]\n-->',
);
expect(parsed.body).toContain('and more prose after it.');
});
it('a literal opener whose closer does NOT end the doc is left entirely in the body', () => {
// No real trailing block: the opener is not document-ending, so comments
// stays null and nothing is stripped.
const input =
'<!-- docmost:meta\n' +
JSON.stringify(meta) +
'\n-->\n\nbody start\n\n<!-- docmost:comments\n[]\n-->\n\ntrailing text not ending the doc';
const parsed = parseDocmostMarkdown(input);
expect(parsed.comments).toBeNull();
expect(parsed.body).toContain('<!-- docmost:comments');
expect(parsed.body).toContain('trailing text not ending the doc');
});
});
// ---------------------------------------------------------------------------
describe('end-anchored comments closer tolerates CRLF + trailing whitespace', () => {
it('captures the final comments block when its "-->" closer has CRLF and trailing spaces', () => {
// The closer regex is /\r?\n-->[ \t]*\r?\n?\s*$/. Build a document whose
// trailing comments block uses CRLF line endings AND has trailing spaces
// after the "-->" closer, then assert it is still recognised as the
// document-ending block (and the body is not polluted by it).
const metaLine = JSON.stringify(meta);
const crlfDoc =
`<!-- docmost:meta\r\n${metaLine}\r\n-->\r\n\r\n` +
`the body line\r\n\r\n` +
`<!-- docmost:comments\r\n[{"id":"c-crlf"}]\r\n--> \r\n`;
const parsed = parseDocmostMarkdown(crlfDoc);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe('the body line');
expect(parsed.comments).toEqual([{ id: 'c-crlf' }]);
});
});
// ---------------------------------------------------------------------------
describe('malformed JSON throws a clear error', () => {
it('throws on malformed meta JSON', () => {
const input = '<!-- docmost:meta\n{not valid json}\n-->\n\nbody';
expect(() => parseDocmostMarkdown(input)).toThrow(/docmost:meta JSON/);
});
it('throws on malformed comments JSON', () => {
const input = 'body\n\n<!-- docmost:comments\n[not, valid]\n-->\n';
expect(() => parseDocmostMarkdown(input)).toThrow(/docmost:comments JSON/);
});
});
});
describe('serializeDocmostMarkdownBody', () => {
it('emits NO comments block', () => {
const out = serializeDocmostMarkdownBody(meta, 'just the body');
expect(out).not.toContain('docmost:comments');
expect(out).toContain('<!-- docmost:meta');
});
it('serialize -> parse preserves meta and the trimmed body, comments null (SPEC §3)', () => {
const fullMeta: DocmostMdMeta = {
version: 1,
pageId: 'page-123',
slugId: 'slug-abc',
title: 'My Page',
spaceId: 'space-1',
parentPageId: 'parent-9',
};
const body = 'Hello\n\nWorld';
const out = serializeDocmostMarkdownBody(fullMeta, body);
const parsed = parseDocmostMarkdown(out);
expect(parsed.meta).toEqual(fullMeta);
expect(parsed.body).toBe(body);
expect(parsed.comments).toBeNull();
});
it('preserves a null parentPageId for a root page', () => {
const out = serializeDocmostMarkdownBody(meta, 'body text');
const parsed = parseDocmostMarkdown(out);
expect(parsed.meta).toEqual(meta);
expect(parsed.comments).toBeNull();
});
it('produces a parseable file for an empty or missing body', () => {
const minimal: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
const emptyFile = serializeDocmostMarkdownBody(minimal, '');
const parsedEmpty = parseDocmostMarkdown(emptyFile);
expect(parsedEmpty.meta).toEqual(minimal);
expect(parsedEmpty.body).toBe('');
expect(parsedEmpty.comments).toBeNull();
// Missing body (undefined) — serializer coalesces to "".
const missingFile = serializeDocmostMarkdownBody(
minimal,
undefined as unknown as string,
);
const parsedMissing = parseDocmostMarkdown(missingFile);
expect(parsedMissing.meta).toEqual(minimal);
expect(parsedMissing.body).toBe('');
expect(parsedMissing.comments).toBeNull();
});
it('trims the body', () => {
const out = serializeDocmostMarkdownBody(meta, '\n\n hi \n');
const parsed = parseDocmostMarkdown(out);
expect(parsed.body).toBe('hi');
});
});

View File

@@ -0,0 +1,66 @@
import { describe, expect, it } from 'vitest';
import {
serializeDocmostMarkdownBody,
parseDocmostMarkdown,
type DocmostMdMeta,
} from 'docmost-client';
describe('serializeDocmostMarkdownBody round-trip (SPEC §3)', () => {
it('serialize -> parse preserves meta and the trimmed body, with no comments block', () => {
const meta: DocmostMdMeta = {
version: 1,
pageId: 'page-123',
slugId: 'slug-abc',
title: 'My Page',
spaceId: 'space-1',
parentPageId: 'parent-9',
};
const body = 'Hello\n\nWorld';
const file = serializeDocmostMarkdownBody(meta, body);
const parsed = parseDocmostMarkdown(file);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe(body);
// No trailing docmost:comments block was emitted (SPEC §3).
expect(parsed.comments).toBeNull();
});
it('preserves a null parentPageId for a root page', () => {
const meta: DocmostMdMeta = {
version: 1,
pageId: 'root-1',
slugId: 'root-slug',
title: 'Root',
spaceId: 'space-1',
parentPageId: null,
};
const file = serializeDocmostMarkdownBody(meta, 'body text');
const parsed = parseDocmostMarkdown(file);
expect(parsed.meta).toEqual(meta);
expect(parsed.comments).toBeNull();
});
it('produces a parseable file for an empty/missing body', () => {
const meta: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
// Empty string body.
const emptyFile = serializeDocmostMarkdownBody(meta, '');
expect(() => parseDocmostMarkdown(emptyFile)).not.toThrow();
const parsedEmpty = parseDocmostMarkdown(emptyFile);
expect(parsedEmpty.meta).toEqual(meta);
expect(parsedEmpty.body).toBe('');
expect(parsedEmpty.comments).toBeNull();
// Missing body (undefined) — serializer coalesces to "".
const missingFile = serializeDocmostMarkdownBody(
meta,
undefined as unknown as string,
);
expect(() => parseDocmostMarkdown(missingFile)).not.toThrow();
const parsedMissing = parseDocmostMarkdown(missingFile);
expect(parsedMissing.meta).toEqual(meta);
expect(parsedMissing.body).toBe('');
expect(parsedMissing.comments).toBeNull();
});
});

View File

@@ -0,0 +1,698 @@
import { describe, expect, it, vi } from 'vitest';
import fc from 'fast-check';
// These property tests run real ProseMirror<->Markdown conversion × NUM_RUNS, so
// each takes ~4–5s. Inputs are DETERMINISTIC (fixed SEED below) — the only source
// of flakiness is wall-clock: under the full suite's parallel worker load they can
// exceed vitest's default 5000ms per-test timeout. Give them ample headroom so CI
// (which gates the docker build, AGENTS.md) is deterministic regardless of load.
vi.setConfig({ testTimeout: 30000 });
// Import the converter DIRECTLY from src (NOT the docmost-client barrel) so we
// match the path used by the other converter unit tests.
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
// markdownToProseMirror lives in collaboration.ts; importing it mutates the
// global DOM via jsdom at module load time — this is expected and required for
// @tiptap/html's generateJSON to run under Node.
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
import { stripBlockIds } from '../src/engine/roundtrip-helpers.js';
// ---------------------------------------------------------------------------
// WHY THIS TEST EXISTS (SPEC §11 / "Задача №0")
//
// git is the state store, and git diffs byte-for-byte. The sync daemon does
// `export(markdown) -> import(ProseMirror) -> export(markdown)` on every pull,
// so if the *second* export differs from the first by even one byte, every
// pull produces a phantom diff -> endless commits/conflicts. The single
// property git actually needs is therefore MARKDOWN BYTE-STABILITY:
//
// md2 := export(import(export(doc))) MUST equal md1 := export(doc)
//
// This file fuzzes that invariant with fast-check over randomly generated,
// representative Docmost ProseMirror documents.
//
// ---------------------------------------------------------------------------
// THE "SUPPORTED SPACE" PROBLEM
//
// A NAIVE generator surfaces two different kinds of `md2 !== md1`:
//
// (a) GENUINE converter limitations — documented below as `it.fails` repros.
// (b) Inputs the converter LEGITIMATELY normalizes, i.e. markdown that is
// ambiguous or that the schema rewrites to a canonical form. These are
// NOT byte-stable by construction and are NOT bugs; the fix is to keep
// the generator inside the byte-stable / supported space.
//
// The following were all empirically confirmed (by probing the live converter)
// and are EXCLUDED from / canonicalized by the byte-stable arbitrary. Each is a
// markdown ambiguity or a schema/ProseMirror normalization, NOT a converter bug.
//
// * Text that re-triggers block/inline markdown syntax on re-parse:
// - a leading `>`/`*`/`-`/`#`/`1.` turns a paragraph into a blockquote/
// list/heading;
// - `a b` (2+ spaces) collapses to `a b`;
// - `<b>` / `</div>` parse as real HTML tags (and run-concatenation can
// form `<word>` across a run boundary);
// - `&amp;` / `&lt;` decode back to `&` / `<`;
// - a lone backtick is a code-span delimiter and re-pairs globally.
// -> The text arbitrary emits space-joined tokens that BEGIN and END with an
// alphanumeric word, with any single special char confined to the middle
// (space-flanked). Every char the task requires (* _ [ ] ( ) | < > &, and
// more) is covered this way; the backtick is exercised via code spans.
// * A purely numeric image `alt` ("0") or link `title` ("0") is parsed back as
// a NUMBER and dropped by the converter's `value || ""` -> alt/title always
// carry at least one letter.
// * Callout types other than info/success/warning/danger normalize to `info`
// (schema only knows those four) -> generator restricts to those four.
// * A list item / callout / blockquote with MULTIPLE block children: the
// converter joins them with a single "\n", which marked re-parses as ONE
// merged paragraph ("- p1\n p2" -> "- p1 p2"). -> container bodies hold a
// SINGLE paragraph, optionally plus ONE nested list for lists.
// * `orderedList.start` / `1)` markers normalize to `1.` -> not emitted.
// * Two sibling lists sharing a marker family (bullet/task use "-", ordered
// uses "1.") MERGE into one list -> no two list blocks are adjacent.
// * TWO consecutive hard breaks render a blank line that marked eats as a
// paragraph break, and a trailing hard break is trimmed -> consecutive/
// trailing hard breaks are collapsed/removed.
// * Adjacent text runs with IDENTICAL marks ("**a****b****c**" -> "**abc**").
// A real ProseMirror doc never stores split same-mark runs (the editor
// coalesces them) -> the generator merges them too (normalizeInline).
//
// The GENUINE, real-but-intentional non-roundtrip limitations are kept HONEST as
// `it.fails` blocks below (so the suite stays green only because they are marked
// expected-to-fail, never by hiding them):
//
// 1. The `code` mark COMBINED with any other mark. The converter emits nested
// HTML (`<strong><code>x</code></strong>`), but the schema's `code` mark
// declares `excludes: "_"`, so on import every co-occurring mark is dropped
// and the run comes back as `code` only -> md2 == "`x`". Acknowledged in
// markdown-converter.ts (the long comment above the marks switch);
// impossible to round-trip both while `code` excludes them.
// 2. A BLOCK-level `image` placed BETWEEN other blocks. The Docmost image node
// is block-level but `![](url)` is inline; marked wraps it in a <p>, the
// schema hoists the <img> out and leaves an empty paragraph sibling, which
// injects an extra blank gap on the second export. An image IS byte-stable
// as the sole block (edge artifacts get trimmed) — covered by a green test.
// ---------------------------------------------------------------------------
// Run a full export -> import -> export cycle and return both markdown strings.
async function roundTrip(doc: unknown): Promise<{ md1: string; md2: string; doc2: any }> {
const md1 = convertProseMirrorToMarkdown(doc);
const doc2 = await markdownToProseMirror(md1);
const md2 = convertProseMirrorToMarkdown(doc2);
return { md1, md2, doc2 };
}
const SEED = 42;
const NUM_RUNS = 100;
// ---------------------------------------------------------------------------
// Inline text arbitraries
// ---------------------------------------------------------------------------
// Alphanumeric "word" (no markdown-significant characters). Length 1..6.
const wordArb = fc
.stringMatching(/^[A-Za-z0-9]{1,6}$/)
.filter((w) => w.length > 0);
// A SINGLE markdown-significant character, emitted only as an isolated,
// space-flanked token. Every char the task calls out plus a few more; each was
// verified byte-stable in this position.
//
// NOTE: the backtick (`) is DELIBERATELY excluded from free-floating plain
// text. A lone backtick is a markdown code-span DELIMITER, so its round-trip
// depends on GLOBAL backtick pairing: a stray backtick in running text adjacent
// to a real code span ("A ` " + `code`) re-pairs into a different code span and
// loses a space — genuinely outside the byte-stable space. The backtick is
// still fully exercised as the `code`-mark delimiter and inside code blocks.
const specialCharArb = fc.constantFrom(
'*', '_', '[', ']', '(', ')', '{', '}', '|', '<', '>', '&', '#', '!', '~', '=', '+', '-',
);
// Build a "safe special" text string: a space-joined sequence of tokens that
// always BEGINS and ENDS with an alphanumeric word, with any isolated special
// chars confined to the MIDDLE (each space-flanked by words).
//
// Both boundary guarantees matter:
// * Leading word: the line never opens with a block/inline trigger
// (">", "*", "-", "#", "1." ...).
// * Trailing word: adjacent text runs CONCATENATE with no separator, so a run
// ending in a bare "<" beside a run starting with a letter would form a fake
// HTML tag ("...0 <" + "A >" -> "0 <A >"), which marked/jsdom strips. Ending
// every run with an alphanumeric word keeps every special internal and
// space-flanked even after concatenation.
const safeTextArb: fc.Arbitrary<string> = fc
.tuple(
wordArb,
fc.array(fc.oneof(wordArb, specialCharArb), { minLength: 0, maxLength: 3 }),
wordArb,
)
.map(([first, middle, last]) => [first, ...middle, last].join(' '));
// A plain alphanumeric phrase (1..3 words) for places where even isolated
// specials are not wanted (e.g. code-block language, mention labels).
const phraseArb: fc.Arbitrary<string> = fc
.array(wordArb, { minLength: 1, maxLength: 3 })
.map((ws) => ws.join(' '));
// A phrase guaranteed to contain at least one letter. Used for image alt text:
// a PURELY numeric alt (e.g. "0", "00") is parsed back by the schema as a
// NUMBER, and the converter's `alt || ""` then treats the number 0 as falsy and
// DROPS the alt ("![0](u)" -> "![](u)") — not byte-stable. A letter anywhere in
// the alt keeps it a string and avoids the coercion.
const letterPhraseArb: fc.Arbitrary<string> = fc
.tuple(
fc.stringMatching(/^[A-Za-z]{1,4}$/),
fc.array(wordArb, { minLength: 0, maxLength: 2 }),
)
.map(([head, rest]) => [head, ...rest].join(' '));
// A text run with an OPTIONAL single non-code mark (bold/italic/strike), or a
// SOLE `code` mark, or a link. `code` is never combined with another mark in
// the byte-stable arbitrary (that combination is the known bug, exercised
// separately in the it.fails block). Marks wrap safe text, which stays stable
// even when it contains isolated specials.
const markedTextRunArb: fc.Arbitrary<any> = fc.oneof(
// Plain text.
safeTextArb.map((t) => ({ type: 'text', text: t })),
// Single formatting mark.
fc
.tuple(safeTextArb, fc.constantFrom('bold', 'italic', 'strike'))
.map(([t, m]) => ({ type: 'text', text: t, marks: [{ type: m }] })),
// Sole code mark (backtick span). safeTextArb is already backtick-free, so the
// code span content cannot contain an inner backtick (which would be
// ambiguous to re-parse).
safeTextArb.map((t) => ({ type: 'text', text: t, marks: [{ type: 'code' }] })),
// Link with safe text and a paren/space-free href, optionally with a title.
// The title rides in a markdown link-title attribute; a purely numeric title
// is coerced to a number and dropped on re-import (same class of quirk as the
// image alt), so the title always carries at least one letter.
fc
.tuple(
phraseArb,
fc.webUrl().filter((u) => !/[()\s]/.test(u)),
fc.option(letterPhraseArb, { nil: undefined }),
)
.map(([t, href, title]) => ({
type: 'text',
text: t,
marks: [{ type: 'link', attrs: title ? { href, title } : { href } }],
})),
// Inline COMMENT anchor (SPEC §3): a span[data-comment-id] that must survive
// the round-trip byte-for-byte. The commentId is an alphanumeric token (no
// attribute-breaking chars), and `resolved` rides as data-resolved="true"
// only when true — both forms were verified byte-stable.
fc
.tuple(safeTextArb, fc.stringMatching(/^[A-Za-z0-9]{4,10}$/), fc.boolean())
.map(([t, commentId, resolved]) => ({
type: 'text',
text: t,
marks: [
{
type: 'comment',
attrs: resolved ? { commentId, resolved: true } : { commentId },
},
],
})),
);
// Inline math node carrying LaTeX that includes the `a < b` the task asks for.
const mathInlineArb: fc.Arbitrary<any> = fc
.constantFrom('a < b', 'x^2 + y^2', 'a < b < c', '\\frac{1}{2}', 'E = mc^2')
.map((text) => ({ type: 'mathInline', attrs: { text } }));
// Mention node (schema attrs); label/id are plain phrases.
const mentionArb: fc.Arbitrary<any> = fc
.tuple(phraseArb, fc.uuid(), fc.uuid())
.map(([label, id, entityId]) => ({
type: 'mention',
attrs: { id, label, entityType: 'user', entityId },
}));
const hardBreakArb: fc.Arbitrary<any> = fc.constant({ type: 'hardBreak' });
// Canonicalize a generated inline-content array the way ProseMirror itself
// stores inline content, then trim the markdown-fragile edges. Applied to both
// paragraph and heading inline content.
//
// 1) MERGE adjacent `text` runs that carry IDENTICAL marks. A real
// ProseMirror document never stores two neighbouring runs with the same
// mark set — the editor coalesces them into one. A naive generator that
// leaves them split produces UNREALISTIC docs AND breaks byte-stability:
// three adjacent bold runs export as "**a****b****c**", whose inner
// "****" boundaries are ambiguous and re-parse as a single "**abc**".
// Merging makes the generated doc canonical and the markdown stable.
// 2) Collapse CONSECUTIVE hard breaks. Two in a row render as " \n \n",
// whose middle whitespace-only line marked treats as a paragraph break, so
// "a \n \nb" re-parses to "a\n\nb". A SINGLE hard break round-trips.
// 3) Drop a TRAILING hard break: "... \n" sits at the paragraph edge and is
// removed by the converter's .trim().
const sameMarks = (a: any[] | undefined, b: any[] | undefined): boolean =>
JSON.stringify(a ?? []) === JSON.stringify(b ?? []);
function normalizeInline(nodes: any[]): any[] {
const out: any[] = [];
for (const node of nodes) {
const prev = out[out.length - 1];
// Collapse a second consecutive hard break.
if (node.type === 'hardBreak' && prev && prev.type === 'hardBreak') {
continue;
}
// Merge an adjacent text run with the same marks.
if (
node.type === 'text' &&
prev &&
prev.type === 'text' &&
sameMarks(prev.marks, node.marks)
) {
prev.text += node.text;
continue;
}
// Clone text nodes so the in-place merge above never mutates a shared value.
out.push(node.type === 'text' ? { ...node } : node);
}
while (out.length > 1 && out[out.length - 1].type === 'hardBreak') {
out.pop();
}
return out;
}
// Inline content for a paragraph: at least one marked text run, optionally with
// inline atoms (math/mention) and hard breaks interspersed. Always starts with a
// text run so the paragraph never opens with a block trigger.
const inlineContentArb: fc.Arbitrary<any[]> = fc
.tuple(
markedTextRunArb,
fc.array(
fc.oneof(
{ weight: 5, arbitrary: markedTextRunArb },
{ weight: 1, arbitrary: mathInlineArb },
{ weight: 1, arbitrary: mentionArb },
{ weight: 1, arbitrary: hardBreakArb },
),
{ minLength: 0, maxLength: 4 },
),
)
.map(([first, rest]) => normalizeInline([first, ...rest]));
// Inline content for a HEADING — identical to a paragraph's, but WITHOUT hard
// breaks. A hard break inside an ATX heading ("# a \nb") is NOT byte-stable:
// marked does not honour a hard break inside a heading, so it re-parses as the
// heading "# a" plus a separate paragraph "b" (md2 = "# a\n\nb"). math/mention/
// link inside a heading are fine (verified) and stay in the menu.
const headingInlineContentArb: fc.Arbitrary<any[]> = fc
.tuple(
markedTextRunArb,
fc.array(
fc.oneof(
{ weight: 5, arbitrary: markedTextRunArb },
{ weight: 1, arbitrary: mathInlineArb },
{ weight: 1, arbitrary: mentionArb },
),
{ minLength: 0, maxLength: 4 },
),
)
.map(([first, rest]) => normalizeInline([first, ...rest]));
// ---------------------------------------------------------------------------
// Block arbitraries
// ---------------------------------------------------------------------------
const paragraphArb: fc.Arbitrary<any> = inlineContentArb.map((content) => ({
type: 'paragraph',
content,
}));
const headingArb: fc.Arbitrary<any> = fc
.tuple(fc.integer({ min: 1, max: 6 }), headingInlineContentArb)
.map(([level, content]) => ({ type: 'heading', attrs: { level }, content }));
// Code block content: 1..4 lines of plain phrases (may contain specials inline,
// which are inert inside a fenced block). Language is optional and is a single
// lowercase token.
const codeBlockArb: fc.Arbitrary<any> = fc
.tuple(
fc.option(fc.constantFrom('js', 'ts', 'python', 'go', 'rust', 'bash'), {
nil: '',
}),
fc
.array(safeTextArb, { minLength: 1, maxLength: 4 })
.map((lines) => lines.join('\n')),
)
.map(([language, code]) => ({
type: 'codeBlock',
attrs: { language },
content: [{ type: 'text', text: code }],
}));
const blockquoteArb: fc.Arbitrary<any> = paragraphArb.map((p) => ({
type: 'blockquote',
content: [p],
}));
const horizontalRuleArb: fc.Arbitrary<any> = fc.constant({
type: 'horizontalRule',
});
// Callout: ONE paragraph child; type restricted to the four the schema knows.
const calloutArb: fc.Arbitrary<any> = fc
.tuple(
fc.constantFrom('info', 'success', 'warning', 'danger'),
paragraphArb,
)
.map(([type, p]) => ({ type: 'callout', attrs: { type }, content: [p] }));
const mathBlockArb: fc.Arbitrary<any> = fc
.constantFrom('a < b', 'a < b < c', '\\sum_{i=0}^{n} i', 'x = \\frac{-b}{2a}', '')
.map((text) => ({ type: 'mathBlock', attrs: { text } }));
const imageArb: fc.Arbitrary<any> = fc
.tuple(
fc.webUrl(),
// alt is a letter-bearing phrase OR empty. Brackets/parens leak into the
// markdown image syntax (not byte-stable) so they are excluded, and a purely
// numeric alt is coerced to a number and dropped (see letterPhraseArb), so
// alt always carries at least one letter when non-empty.
fc.option(letterPhraseArb, { nil: '' }),
)
.map(([src, alt]) => ({ type: 'image', attrs: { src, alt } }));
// A simple list item: ONE paragraph, optionally followed by ONE nested bullet
// list (single level of nesting). depth controls whether nesting is allowed.
function listItemArb(allowNest: boolean): fc.Arbitrary<any> {
if (!allowNest) {
return paragraphArb.map((p) => ({ type: 'listItem', content: [p] }));
}
return fc
.tuple(
paragraphArb,
fc.option(
fc.array(
paragraphArb.map((p) => ({ type: 'listItem', content: [p] })),
{ minLength: 1, maxLength: 3 },
),
{ nil: undefined },
),
)
.map(([p, nested]) => ({
type: 'listItem',
content: nested
? [p, { type: 'bulletList', content: nested }]
: [p],
}));
}
const bulletListArb: fc.Arbitrary<any> = fc
.array(listItemArb(true), { minLength: 1, maxLength: 4 })
.map((items) => ({ type: 'bulletList', content: items }));
const orderedListArb: fc.Arbitrary<any> = fc
.array(listItemArb(true), { minLength: 1, maxLength: 4 })
.map((items) => ({ type: 'orderedList', content: items }));
// Task item: ONE paragraph, optional ONE nested bullet list.
const taskItemArb: fc.Arbitrary<any> = fc
.tuple(
fc.boolean(),
paragraphArb,
fc.option(
fc.array(listItemArb(false), { minLength: 1, maxLength: 2 }),
{ nil: undefined },
),
)
.map(([checked, p, nested]) => ({
type: 'taskItem',
attrs: { checked },
content: nested ? [p, { type: 'bulletList', content: nested }] : [p],
}));
const taskListArb: fc.Arbitrary<any> = fc
.array(taskItemArb, { minLength: 1, maxLength: 4 })
.map((items) => ({ type: 'taskList', content: items }));
// GFM table: a header row + 1..3 body rows, with a fixed column count (1..3) and
// per-column alignment. Cells hold a single short paragraph of safe text.
const tableArb: fc.Arbitrary<any> = fc
.integer({ min: 1, max: 3 })
.chain((cols) => {
const cellArb = (header: boolean, align?: string) =>
phraseArb.map((t) => ({
type: header ? 'tableHeader' : 'tableCell',
attrs: align ? { align } : {},
content: [{ type: 'paragraph', content: [{ type: 'text', text: t }] }],
}));
const alignsArb = fc.array(
fc.constantFrom(undefined, 'left', 'center', 'right'),
{ minLength: cols, maxLength: cols },
);
return fc
.tuple(
alignsArb,
fc.array(
fc.constant(null), // body-row placeholders; cells filled below
{ minLength: 1, maxLength: 3 },
),
)
.chain(([aligns, bodyRows]) => {
const headerRow = fc
.tuple(...aligns.map((a) => cellArb(true, a)))
.map((cells) => ({ type: 'tableRow', content: cells }));
const bodyRowArbs = bodyRows.map(() =>
fc
.tuple(...aligns.map(() => cellArb(false)))
.map((cells) => ({ type: 'tableRow', content: cells })),
);
return fc
.tuple(headerRow, fc.tuple(...bodyRowArbs))
.map(([h, body]) => ({ type: 'table', content: [h, ...body] }));
});
});
// ---------------------------------------------------------------------------
// Top-level document arbitrary
// ---------------------------------------------------------------------------
// The full menu of block nodes that are byte-stable when SEQUENCED with other
// blocks. NOTE: `image` is deliberately NOT in this menu — see the dedicated
// image tests below. The Docmost `image` node is BLOCK-level, but its markdown
// form `![](url)` is INLINE; marked wraps it in a <p>, the schema then hoists
// the block <img> out and leaves an EMPTY paragraph beside it, so on the second
// export the stray empty paragraph injects extra blank lines between siblings
// ("p\n\n![](u)\n\nq" -> "p\n\n\n\n![](u)\n\nq"). An image is only byte-stable
// when it is the SOLE block (the edge artifacts get .trim()'d away). It is
// therefore covered by its own targeted tests, not mixed into multi-block docs.
const blockArb: fc.Arbitrary<any> = fc.oneof(
{ weight: 6, arbitrary: paragraphArb },
{ weight: 3, arbitrary: headingArb },
{ weight: 2, arbitrary: codeBlockArb },
{ weight: 2, arbitrary: bulletListArb },
{ weight: 2, arbitrary: orderedListArb },
{ weight: 2, arbitrary: taskListArb },
{ weight: 2, arbitrary: blockquoteArb },
{ weight: 2, arbitrary: tableArb },
{ weight: 2, arbitrary: calloutArb },
{ weight: 1, arbitrary: horizontalRuleArb },
{ weight: 1, arbitrary: mathBlockArb },
);
const LIST_TYPES = new Set(['bulletList', 'orderedList', 'taskList']);
// A bounded document: 1..8 block nodes. Kept small so each run is cheap (each
// run does a real marked + jsdom parse) and shrinking stays fast.
//
// Post-process: never let two LIST blocks sit directly adjacent. Two sibling
// lists that share a marker family — bullet/task both use "-", ordered uses
// "1." — are MERGED by markdown into a single list when only a blank line
// separates them ("- a\n\n- b" -> one list -> "- a\n- b"), which is not
// byte-stable. (A non-list block between two lists separates them fine, as does
// a different marker family, but dropping every back-to-back list is the clean,
// always-correct rule.) We drop a list block whenever the previously kept block
// is also a list.
const docArb: fc.Arbitrary<any> = fc
.array(blockArb, { minLength: 1, maxLength: 8 })
.map((content) => {
const out: any[] = [];
for (const block of content) {
const prev = out[out.length - 1];
if (
prev &&
LIST_TYPES.has(prev.type) &&
LIST_TYPES.has(block.type)
) {
continue; // skip a list that would sit right after another list
}
out.push(block);
}
// Guarantee a non-empty document even if filtering removed everything but a
// single dropped block (cannot happen here since the first block is always
// kept, but keep the invariant explicit).
return { type: 'doc', content: out.length ? out : content.slice(0, 1) };
});
// ---------------------------------------------------------------------------
// The properties
// ---------------------------------------------------------------------------
describe('markdown <-> ProseMirror round-trip (property-based)', () => {
it('the generator covers every targeted node type at least once', () => {
// A sanity check that the arbitrary actually exercises the intended node
// variety within NUM_RUNS — not a correctness property, just coverage.
const seen = new Set<string>();
const collect = (node: any) => {
if (!node || typeof node !== 'object') return;
if (node.type) seen.add(node.type);
for (const m of node.marks ?? []) seen.add(`mark:${m.type}`);
for (const c of node.content ?? []) collect(c);
};
fc.assert(
fc.property(docArb, (doc) => {
collect(doc);
return true;
}),
{ numRuns: NUM_RUNS, seed: SEED },
);
// Core block types and marks we expect to appear.
for (const t of [
'paragraph',
'heading',
'codeBlock',
'bulletList',
'orderedList',
'taskList',
'blockquote',
'table',
'callout',
'horizontalRule',
'mathBlock',
// 'image' is covered by its own dedicated tests, not docArb.
'mention',
'mathInline',
'hardBreak',
'mark:bold',
'mark:italic',
'mark:strike',
'mark:code',
'mark:link',
'mark:comment',
]) {
expect(seen, `expected the generator to produce ${t}`).toContain(t);
}
});
it('markdown is byte-stable across export -> import -> export', async () => {
// The property git needs: a second export reproduces the first byte-for-byte.
await fc.assert(
fc.asyncProperty(docArb, async (doc) => {
const { md1, md2 } = await roundTrip(doc);
expect(md2).toBe(md1);
}),
{ numRuns: NUM_RUNS, seed: SEED },
);
});
it('the document is semantically stable on a second cycle (ids stripped)', async () => {
// Optional, stronger-feeling property. We do NOT compare doc vs doc2: the
// converter reconstructs schema default attrs on the FIRST import (a known
// SPEC §11 divergence). But once the markdown is byte-stable, importing the
// SAME markdown twice must yield structurally identical docs (modulo the
// regenerated block ids). So we compare doc2 (import of md1) with doc3
// (import of md2 == md1) after stripping ids.
await fc.assert(
fc.asyncProperty(docArb, async (doc) => {
const md1 = convertProseMirrorToMarkdown(doc);
const doc2 = await markdownToProseMirror(md1);
const md2 = convertProseMirrorToMarkdown(doc2);
// Guard: this property only makes sense when md is byte-stable.
expect(md2).toBe(md1);
const doc3 = await markdownToProseMirror(md2);
expect(stripBlockIds(doc3)).toEqual(stripBlockIds(doc2));
}),
{ numRuns: NUM_RUNS, seed: SEED },
);
});
it('a SOLE image block is byte-stable', async () => {
// An image is byte-stable when it is the only block in the document: the
// stray empty paragraph the schema leaves beside the hoisted block <img>
// sits at a document edge and is removed by the converter's final .trim().
await fc.assert(
fc.asyncProperty(imageArb, async (image) => {
const doc = { type: 'doc', content: [image] };
const { md1, md2 } = await roundTrip(doc);
expect(md2).toBe(md1);
}),
{ numRuns: NUM_RUNS, seed: SEED },
);
});
// -------------------------------------------------------------------------
// KNOWN, DOCUMENTED non-roundtrip bug #2 (kept honest as it.fails).
//
// BUG: a block-level `image` placed BETWEEN other blocks is not byte-stable.
// The Docmost image node is BLOCK-level but its markdown form `![](url)` is
// INLINE. marked wraps the inline image in a <p>; the schema then hoists the
// block <img> out of that <p>, leaving an EMPTY paragraph as a sibling. On the
// second export that empty paragraph renders as "" and the "\n\n" doc join
// injects an extra blank gap:
// "p\n\n![x](u)\n\nq" -> "p\n\n\n\n![x](u)\n\nq" (=> md2 !== md1).
// Minimal repro doc:
// { type:'doc', content:[
// { type:'paragraph', content:[{type:'text',text:'p'}] },
// { type:'image', attrs:{ src:'http://a.aa', alt:'x' } },
// { type:'paragraph', content:[{type:'text',text:'q'}] } ] }
// Not "fixed" — the source must not change; documented and exercised here.
// -------------------------------------------------------------------------
it.fails('BUG: a block image between other blocks is not byte-stable', async () => {
const doc = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'p' }] },
{ type: 'image', attrs: { src: 'http://a.aa', alt: 'x' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'q' }] },
],
};
const { md1, md2 } = await roundTrip(doc);
expect(md2).toBe(md1);
});
// -------------------------------------------------------------------------
// KNOWN, DOCUMENTED non-roundtrip bug #1 (kept honest as it.fails).
//
// BUG: the `code` mark combined with ANY other mark does NOT round-trip.
// The converter emits nested HTML so the output is well-formed, e.g.
// marks [code, bold] -> md1 = "<strong><code>x</code></strong>"
// but the schema's `code` mark declares `excludes: "_"`, so on import the
// co-occurring mark is dropped and the run comes back as code-only:
// md2 = "`x`" (=> md2 !== md1).
// Minimal repro doc:
// { type:'doc', content:[ { type:'paragraph', content:[
// { type:'text', text:'x', marks:[{type:'code'},{type:'bold'}] } ] } ] }
// This is acknowledged in markdown-converter.ts (the long comment above the
// marks switch): preserving both marks is impossible while `code` excludes
// them. Documented here, not "fixed", because the source must not change.
// -------------------------------------------------------------------------
it.fails(
'BUG: code mark combined with another mark is not byte-stable',
async () => {
const codeComboArb = fc
.tuple(safeTextArb, fc.constantFrom('bold', 'italic', 'strike'))
.map(([t, other]) => ({
type: 'doc',
content: [
{
type: 'paragraph',
content: [
{ type: 'text', text: t, marks: [{ type: 'code' }, { type: other }] },
],
},
],
}));
await fc.assert(
fc.asyncProperty(codeComboArb, async (doc) => {
const { md1, md2 } = await roundTrip(doc);
expect(md2).toBe(md1);
}),
{ numRuns: 20, seed: SEED },
);
},
);
});

View File

@@ -0,0 +1,268 @@
import { describe, expect, it } from 'vitest';
import fc from 'fast-check';
import {
getNodeByRef,
replaceNodeById,
insertNodeRelative,
insertTableRow,
updateTableCell,
sanitizeForYjs,
findUnstorableAttr,
buildOutline,
} from '../src/lib/node-ops.js';
// Gaps NOT covered by node-ops.test.ts (test-strategy report §2). The base file
// is comprehensive; these add only the missing edges: newNode-arg immutability,
// anchor-is-container routing, malformed opts, ragged/empty/no-colwidth/non-int
// insertTableRow, getNodeByRef non-object/#-1, updateTableCell empty-id refresh,
// outline 100/40 boundary, malformed marks, and the makeFreshId property.
const text = (value: string, marks?: any[]): any => {
const node: any = { type: 'text', text: value };
if (marks) node.marks = marks;
return node;
};
const para = (id: string, value = ''): any => ({
type: 'paragraph',
attrs: { id, indent: 0 },
content: value ? [text(value)] : [],
});
const cell = (
type: 'tableCell' | 'tableHeader',
paraId: string | null,
value = '',
extraAttrs: Record<string, any> = {},
): any => ({
type,
attrs: { colspan: 1, rowspan: 1, ...extraAttrs },
content: paraId == null ? [] : [para(paraId, value)],
});
const row = (cells: any[]): any => ({ type: 'tableRow', content: cells });
const doc = (...content: any[]): any => ({ type: 'doc', content });
// ===========================================================================
describe('replaceNodeById — newNode ARGUMENT immutability', () => {
it('does not mutate the caller-supplied newNode after replacement', () => {
// The doc-argument immutability is covered in the base file; this pins the
// OTHER input — the replacement node must be deep-cloned, so mutating the
// result never reaches the caller's newNode (and vice versa).
const d = doc(para('p0', 'old'), para('p1', 'old2'));
const newNode = { type: 'paragraph', attrs: { id: 'new' }, content: [text('NEW')] };
const snapshot = structuredClone(newNode);
const res = replaceNodeById(d, 'p0', newNode);
// Mutating the inserted copy must not touch the argument...
res.doc.content[0].content.push(text('mutated'));
expect(newNode).toEqual(snapshot);
// ...and mutating the argument afterwards must not touch the inserted copy.
newNode.content.push(text('later'));
expect(res.doc.content[0].content).toEqual([text('NEW'), text('mutated')]);
});
});
// ===========================================================================
describe('insertNodeRelative — container routing and malformed opts', () => {
it('routes a structural row when anchorText resolves to the TABLE block itself', () => {
// anchorText only scans top-level blocks, so it resolves to the whole table;
// the matched container IS the anchor (containerIdx === chain.length-1), so
// a row "after" must be appended inside the table, not spliced beside a row.
const table = { type: 'table', content: [row([cell('tableCell', 'r0', 'hello cell')])] };
const newRow = row([cell('tableCell', 'rNew', 'NEW')]);
const res = insertNodeRelative(doc(table), newRow, {
position: 'after',
anchorText: 'hello cell',
});
expect(res.inserted).toBe(true);
const firstCellId = (r: any) => r.content[0].content[0].attrs.id;
expect(res.doc.content[0].content.map(firstCellId)).toEqual(['r0', 'rNew']);
});
it('prepends a structural row when anchorText resolves to the table and position is "before"', () => {
const table = { type: 'table', content: [row([cell('tableCell', 'r0', 'hello cell')])] };
const newRow = row([cell('tableCell', 'rNew', 'NEW')]);
const res = insertNodeRelative(doc(table), newRow, {
position: 'before',
anchorText: 'hello cell',
});
const firstCellId = (r: any) => r.content[0].content[0].attrs.id;
expect(res.doc.content[0].content.map(firstCellId)).toEqual(['rNew', 'r0']);
});
it('is a no-op (inserted:false) for a malformed opts object', () => {
const d = doc(para('p0'));
const res = insertNodeRelative(d, para('n'), null as any);
expect(res.inserted).toBe(false);
expect(res.doc).toEqual(d);
});
});
// ===========================================================================
describe('insertTableRow — column count and index edge cases', () => {
const ragged = () => ({
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H0')]), // 1 col
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]), // 2 cols
],
});
it('derives the column count from the WIDEST row (ragged table)', () => {
// The guard counts against the widest row (2), so 3 cells throws...
expect(() => insertTableRow(doc(ragged()), '#0', ['X', 'Y', 'Z'])).toThrow(
/got 3 cell\(s\) but the table has 2 column\(s\)/,
);
// ...and a 2-cell row is padded to the widest width (2), not the header's 1.
const res = insertTableRow(doc(ragged()), '#0', ['X', 'Y']);
expect(res.doc.content[0].content[2].content).toHaveLength(2);
});
it('an EMPTY table falls back to the supplied cell count', () => {
const res = insertTableRow(doc({ type: 'table', content: [] }), '#0', ['A', 'B']);
expect(res.inserted).toBe(true);
expect(res.doc.content[0].content[0].content).toHaveLength(2);
});
it('omits colwidth entirely when the header cell has none (no undefined leak)', () => {
const noColwidth = {
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H')]),
row([cell('tableCell', 'c0', 'A')]),
],
};
const res = insertTableRow(doc(noColwidth), '#0', ['X']);
const newCellAttrs = res.doc.content[0].content[2].content[0].attrs;
expect('colwidth' in newCellAttrs).toBe(false); // not colwidth:undefined
});
it('APPENDS for a non-integer or negative index (does not throw)', () => {
const t = {
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H')]),
row([cell('tableCell', 'c0', 'A')]),
],
};
const frac = insertTableRow(doc(t), '#0', ['X'], 1.5);
expect(frac.inserted).toBe(true);
expect(frac.doc.content[0].content).toHaveLength(3); // appended at the end
const neg = insertTableRow(doc(t), '#0', ['X'], -1);
expect(neg.doc.content[0].content).toHaveLength(3);
});
});
// ===========================================================================
describe('getNodeByRef — malformed refs', () => {
it('returns null for a non-object block at a valid #n index', () => {
const d = { type: 'doc', content: [null] };
expect(getNodeByRef(d, '#0')).toBeNull();
});
it('returns null for "#-1" (the index regex does not match a negative)', () => {
const d = doc(para('p0'));
// "#-1" matches neither the "#<digits>" form nor any block id -> null.
expect(getNodeByRef(d, '#-1')).toBeNull();
});
});
// ===========================================================================
describe('updateTableCell — fresh id when the first paragraph has an empty id', () => {
it('mints a fresh id when the existing first paragraph id is the empty string', () => {
const table = {
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H')]),
row([
{
type: 'tableCell',
attrs: { colspan: 1, rowspan: 1 },
content: [{ type: 'paragraph', attrs: { id: '' }, content: [text('old')] }],
},
]),
],
};
const res = updateTableCell(doc(table), '#0', 1, 0, 'new');
const newId = res.doc.content[0].content[1].content[0].content[0].attrs.id;
// An empty id is treated as missing -> a fresh Docmost-style id is minted.
expect(newId).toMatch(/^[a-z0-9]{12}$/);
expect(newId).not.toBe('');
});
});
// ===========================================================================
describe('buildOutline — exact 100 / 40 char truncation boundaries', () => {
it('does NOT truncate firstText at exactly 100 chars but DOES at 101', () => {
const at100 = buildOutline(doc(para('p', 'x'.repeat(100))));
expect(at100[0].firstText).toBe('x'.repeat(100)); // boundary: not cut
expect(at100[0].firstText.endsWith('…')).toBe(false);
const at101 = buildOutline(doc(para('p', 'x'.repeat(101))));
expect(at101[0].firstText).toBe('x'.repeat(100) + '…'); // first char over the cap
});
it('does NOT truncate a header cell at exactly 40 chars but DOES at 41', () => {
const tableAt40 = {
type: 'table',
content: [row([cell('tableHeader', 'h', 'y'.repeat(40))])],
};
expect(buildOutline(doc(tableAt40))[0].header).toEqual(['y'.repeat(40)]);
const tableAt41 = {
type: 'table',
content: [row([cell('tableHeader', 'h', 'y'.repeat(41))])],
};
expect(buildOutline(doc(tableAt41))[0].header).toEqual(['y'.repeat(40) + '…']);
});
});
// ===========================================================================
describe('sanitizeForYjs / findUnstorableAttr — malformed marks array', () => {
const malformed = () =>
doc({
type: 'paragraph',
attrs: { id: 'p' },
content: [
text('x', [null, { type: 'link', attrs: { href: 'u', gone: undefined } }]),
],
});
it('sanitizeForYjs skips a null mark and strips undefined on the real one', () => {
const res = sanitizeForYjs(malformed());
const marks = res.content[0].content[0].marks;
expect(marks[0]).toBeNull(); // the null mark is left untouched, not crashed on
expect(marks[1].attrs).toEqual({ href: 'u' }); // undefined dropped
});
it('findUnstorableAttr skips a null mark and reports the real undefined attr path', () => {
expect(findUnstorableAttr(malformed())).toBe(
'content[0].content[0].marks[1].attrs.gone (undefined)',
);
});
});
// ===========================================================================
describe('makeFreshId — format and uniqueness (property, via insertTableRow)', () => {
it('every minted cell-paragraph id matches ^[a-z0-9]{12}$ and is globally unique', () => {
fc.assert(
fc.property(fc.integer({ min: 1, max: 5 }), (cols) => {
// Build an empty-id table of `cols` columns; the inserted row mints a
// fresh id per cell. The doc carries one pre-existing id to also assert
// the new ids never collide with it.
const headerCells = Array.from({ length: cols }, (_, i) =>
cell('tableHeader', `pre-${i}`, `H${i}`),
);
const d = doc({ type: 'table', content: [row(headerCells)] });
const res = insertTableRow(d, '#0', Array.from({ length: cols }, () => 'v'), 1);
const ids = res.doc.content[0].content[1].content.map(
(c: any) => c.content[0].attrs.id,
);
for (const id of ids) {
expect(id).toMatch(/^[a-z0-9]{12}$/);
}
// Unique within the new row AND distinct from the pre-existing ids.
expect(new Set(ids).size).toBe(ids.length);
for (const id of ids) {
expect(id.startsWith('pre-')).toBe(false);
}
}),
{ numRuns: 100 },
);
});
});

View File

@@ -0,0 +1,908 @@
import { describe, expect, it } from 'vitest';
import {
blockPlainText,
buildOutline,
getNodeByRef,
replaceNodeById,
deleteNodeById,
sanitizeForYjs,
findUnstorableAttr,
insertNodeRelative,
readTable,
insertTableRow,
deleteTableRow,
updateTableCell,
} from '../src/lib/node-ops.js';
// ---------------------------------------------------------------------------
// Tiny ProseMirror/TipTap JSON fixture builders. These produce the exact plain
// JSON shape Docmost uses: { type, attrs?, content?, text?, marks? }.
// ---------------------------------------------------------------------------
/** A text leaf node, optionally carrying marks. */
function text(value: string, marks?: any[]): any {
const node: any = { type: 'text', text: value };
if (marks) node.marks = marks;
return node;
}
/** A paragraph block with an id and a single text child (or empty). */
function para(id: string, value = ''): any {
return {
type: 'paragraph',
attrs: { id, indent: 0 },
content: value ? [text(value)] : [],
};
}
/** A heading block. */
function heading(id: string, level: number, value: string): any {
return {
type: 'heading',
attrs: { id, level },
content: [text(value)],
};
}
/** A table cell (or header) wrapping a single paragraph; extra attrs merged in. */
function cell(
type: 'tableCell' | 'tableHeader',
paraId: string | null,
value = '',
extraAttrs: Record<string, any> = {},
): any {
const attrs = { colspan: 1, rowspan: 1, ...extraAttrs };
return {
type,
attrs,
content: paraId == null ? [] : [para(paraId, value)],
};
}
/** A table row. */
function row(cells: any[]): any {
return { type: 'tableRow', content: cells };
}
/** A doc root with the given top-level blocks. */
function doc(...content: any[]): any {
return { type: 'doc', content };
}
// ===========================================================================
// blockPlainText
// ===========================================================================
describe('blockPlainText', () => {
it('returns the text of a plain text node', () => {
expect(blockPlainText(text('hello'))).toBe('hello');
});
it('concatenates text from nested containers', () => {
const node = {
type: 'paragraph',
content: [text('foo'), text('bar'), { type: 'span', content: [text('baz')] }],
};
expect(blockPlainText(node)).toBe('foobarbaz');
});
it('returns "" for nullish or non-object inputs', () => {
expect(blockPlainText(null)).toBe('');
expect(blockPlainText(undefined)).toBe('');
expect(blockPlainText('a string')).toBe('');
expect(blockPlainText(42)).toBe('');
expect(blockPlainText([text('x')])).toBe(''); // arrays are not objects here
});
it('uses BOTH text and nested content of a node, text first', () => {
const node = { type: 'weird', text: 'A', content: [text('B'), text('C')] };
expect(blockPlainText(node)).toBe('ABC');
});
});
// ===========================================================================
// buildOutline
// ===========================================================================
describe('buildOutline', () => {
it('captures heading level, id and firstText', () => {
const outline = buildOutline(doc(heading('h1', 2, 'Title')));
expect(outline).toEqual([
{ index: 0, type: 'heading', id: 'h1', firstText: 'Title', level: 2 },
]);
});
it('reports table rows/cols and header texts (cols from row 0)', () => {
const table = {
type: 'table',
content: [
row([cell('tableHeader', 'a', 'H1'), cell('tableHeader', 'b', 'H2')]),
row([cell('tableCell', 'c', 'x'), cell('tableCell', 'd', 'y')]),
],
};
const [entry] = buildOutline(doc(table));
expect(entry.type).toBe('table');
expect(entry.rows).toBe(2);
expect(entry.cols).toBe(2);
expect(entry.header).toEqual(['H1', 'H2']);
});
it('derives cols from row 0 for a ragged table', () => {
const table = {
type: 'table',
content: [
row([cell('tableHeader', 'a', 'H1')]), // row 0 has 1 col
row([cell('tableCell', 'b', 'x'), cell('tableCell', 'c', 'y')]), // 2 cols
],
};
const [entry] = buildOutline(doc(table));
expect(entry.rows).toBe(2);
expect(entry.cols).toBe(1); // cols reflect ONLY row 0
expect(entry.header).toEqual(['H1']);
});
it('reports item count for any *List block', () => {
const list = {
type: 'bulletList',
attrs: { id: 'l1' },
content: [{ type: 'listItem' }, { type: 'listItem' }, { type: 'listItem' }],
};
const [entry] = buildOutline(doc(list));
expect(entry.type).toBe('bulletList');
expect(entry.items).toBe(3);
});
it('returns [] for an empty or non-object doc', () => {
expect(buildOutline(null)).toEqual([]);
expect(buildOutline({ type: 'doc' })).toEqual([]); // no content array
expect(buildOutline({ type: 'doc', content: [] })).toEqual([]);
expect(buildOutline('nope')).toEqual([]);
});
it('falls back to null id when a block has no attrs.id', () => {
const [entry] = buildOutline(doc({ type: 'paragraph', content: [text('hi')] }));
expect(entry.id).toBeNull();
expect(entry.firstText).toBe('hi');
});
it('truncates firstText to 100 chars with an ellipsis', () => {
const long = 'x'.repeat(150);
const [entry] = buildOutline(doc(para('p', long)));
expect(entry.firstText).toBe('x'.repeat(100) + '…');
expect(entry.firstText.length).toBe(101); // 100 chars + ellipsis
});
it('truncates table header cell text to 40 chars', () => {
const long = 'y'.repeat(60);
const table = {
type: 'table',
content: [row([cell('tableHeader', 'a', long)])],
};
const [entry] = buildOutline(doc(table));
expect(entry.header).toEqual(['y'.repeat(40) + '…']);
});
});
// ===========================================================================
// getNodeByRef
// ===========================================================================
describe('getNodeByRef', () => {
it('resolves a top-level block by #n', () => {
const d = doc(para('p0', 'zero'), para('p1', 'one'));
const hit = getNodeByRef(d, '#1');
expect(hit).not.toBeNull();
expect(hit!.path).toEqual([1]);
expect(hit!.type).toBe('paragraph');
expect(hit!.node.attrs.id).toBe('p1');
});
it('returns null for #n out of range', () => {
const d = doc(para('p0'));
expect(getNodeByRef(d, '#5')).toBeNull();
expect(getNodeByRef(d, '#1')).toBeNull();
});
it('finds a nested node by id with the correct path', () => {
const table = {
type: 'table',
content: [row([cell('tableCell', 'deep', 'found me')])],
};
const d = doc(para('p0'), table);
const hit = getNodeByRef(d, 'deep');
expect(hit).not.toBeNull();
// doc.content[1] -> table.content[0] -> row.content[0] -> cell.content[0]
expect(hit!.path).toEqual([1, 0, 0, 0]);
expect(hit!.type).toBe('paragraph');
});
it('returns null when the id is not found', () => {
const d = doc(para('p0'));
expect(getNodeByRef(d, 'missing')).toBeNull();
});
it('returns the FIRST node for a duplicate id', () => {
const d = doc(para('dup', 'first'), para('dup', 'second'));
const hit = getNodeByRef(d, 'dup');
expect(hit!.path).toEqual([0]);
expect(blockPlainText(hit!.node)).toBe('first');
});
it('returns null for a non-object doc', () => {
expect(getNodeByRef(null, '#0')).toBeNull();
expect(getNodeByRef('x', 'id')).toBeNull();
});
it('returns a CLONE — mutating it does not touch the input doc', () => {
const d = doc(para('p0', 'orig'));
const snapshot = structuredClone(d);
const hit = getNodeByRef(d, 'p0');
hit!.node.attrs.id = 'mutated';
hit!.node.content.push(text('extra'));
expect(d).toEqual(snapshot);
});
});
// ===========================================================================
// replaceNodeById
// ===========================================================================
describe('replaceNodeById', () => {
const newNode = () => ({ type: 'paragraph', attrs: { id: 'new' }, content: [text('NEW')] });
it('reports replaced:0 when nothing matches', () => {
const d = doc(para('p0'));
const res = replaceNodeById(d, 'missing', newNode());
expect(res.replaced).toBe(0);
expect(res.doc).toEqual(d);
});
it('replaces a single match', () => {
const d = doc(para('p0', 'old'), para('p1'));
const res = replaceNodeById(d, 'p0', newNode());
expect(res.replaced).toBe(1);
expect(res.doc.content[0]).toEqual(newNode());
expect(res.doc.content[1].attrs.id).toBe('p1');
});
it('replaces N matches', () => {
const d = doc(para('dup', 'a'), para('keep'), para('dup', 'b'));
const res = replaceNodeById(d, 'dup', newNode());
expect(res.replaced).toBe(2);
expect(res.doc.content[0]).toEqual(newNode());
expect(res.doc.content[1].attrs.id).toBe('keep');
expect(res.doc.content[2]).toEqual(newNode());
});
it('replaces a nested match inside a table cell', () => {
const table = {
type: 'table',
content: [row([cell('tableCell', 'inner', 'x')])],
};
const d = doc(table);
const res = replaceNodeById(d, 'inner', newNode());
expect(res.replaced).toBe(1);
expect(res.doc.content[0].content[0].content[0].content[0]).toEqual(newNode());
});
it('does NOT recurse into the substituted node', () => {
// The replacement itself carries the same id; it must not be re-replaced.
const d = doc(para('target'));
const replacement = { type: 'paragraph', attrs: { id: 'target' }, content: [text('R')] };
const res = replaceNodeById(d, 'target', replacement);
expect(res.replaced).toBe(1); // not 2 — no recursion into the new node
});
it('gives each match a SEPARATE clone', () => {
const d = doc(para('dup'), para('dup'));
const res = replaceNodeById(d, 'dup', newNode());
res.doc.content[0].content.push(text('mutated'));
// The second replacement must be untouched.
expect(res.doc.content[1]).toEqual(newNode());
});
it('does not mutate the input doc', () => {
const d = doc(para('p0', 'old'));
const snapshot = structuredClone(d);
replaceNodeById(d, 'p0', newNode());
expect(d).toEqual(snapshot);
});
});
// ===========================================================================
// deleteNodeById
// ===========================================================================
describe('deleteNodeById', () => {
it('reports deleted:0 when nothing matches', () => {
const d = doc(para('p0'));
const res = deleteNodeById(d, 'missing');
expect(res.deleted).toBe(0);
expect(res.doc).toEqual(d);
});
it('deletes a single match', () => {
const d = doc(para('p0'), para('p1'), para('p2'));
const res = deleteNodeById(d, 'p1');
expect(res.deleted).toBe(1);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'p2']);
});
it('deletes N matches', () => {
const d = doc(para('dup'), para('keep'), para('dup'));
const res = deleteNodeById(d, 'dup');
expect(res.deleted).toBe(2);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['keep']);
});
it('deletes a nested node and preserves sibling order', () => {
// A callout-style container holding three paragraph children; deleting the
// middle one must leave the outer siblings in order.
const callout = {
type: 'callout',
attrs: { id: 'cal' },
content: [para('a', 'A'), para('b', 'B'), para('c', 'C')],
};
const d = doc(para('outer0'), callout, para('outer1'));
const res = deleteNodeById(d, 'b');
expect(res.deleted).toBe(1);
// Inner siblings keep their order.
const innerIds = res.doc.content[1].content.map((cl: any) => cl.attrs.id);
expect(innerIds).toEqual(['a', 'c']);
// Outer siblings are untouched and in order.
const outerIds = res.doc.content.map((cl: any) => cl.attrs.id);
expect(outerIds).toEqual(['outer0', 'cal', 'outer1']);
});
it('does not mutate the input doc (deep-equal before/after)', () => {
const d = doc(para('p0'), para('p1'));
const snapshot = structuredClone(d);
deleteNodeById(d, 'p0');
expect(d).toEqual(snapshot);
});
});
// ===========================================================================
// sanitizeForYjs
// ===========================================================================
describe('sanitizeForYjs', () => {
it('strips undefined keys from node.attrs', () => {
const d = doc({ type: 'paragraph', attrs: { id: 'p', gone: undefined, kept: 1 } });
const res = sanitizeForYjs(d);
expect('gone' in res.content[0].attrs).toBe(false);
expect(res.content[0].attrs).toEqual({ id: 'p', kept: 1 });
});
it('strips undefined keys from mark.attrs', () => {
const d = doc({
type: 'paragraph',
attrs: { id: 'p' },
content: [text('hi', [{ type: 'link', attrs: { href: 'u', gone: undefined } }])],
});
const res = sanitizeForYjs(d);
expect('gone' in res.content[0].content[0].marks[0].attrs).toBe(false);
expect(res.content[0].content[0].marks[0].attrs).toEqual({ href: 'u' });
});
it('PRESERVES null, false, 0 and "" (only undefined is dropped)', () => {
const d = doc({
type: 'paragraph',
attrs: { a: null, b: false, c: 0, d: '', e: undefined },
});
const res = sanitizeForYjs(d);
expect(res.content[0].attrs).toEqual({ a: null, b: false, c: 0, d: '' });
});
it('recurses into nested content', () => {
const d = doc({
type: 'table',
content: [row([cell('tableCell', null, '', { gone: undefined, colwidth: null })])],
});
const res = sanitizeForYjs(d);
const cellAttrs = res.content[0].content[0].content[0].attrs;
expect('gone' in cellAttrs).toBe(false);
expect(cellAttrs.colwidth).toBeNull();
});
it('does not mutate the input doc', () => {
const d = doc({ type: 'paragraph', attrs: { id: 'p', gone: undefined } });
// structuredClone preserves an explicit `undefined` value key, so snapshot it.
const snapshot = structuredClone(d);
sanitizeForYjs(d);
expect(d).toEqual(snapshot);
expect('gone' in d.content[0].attrs).toBe(true); // still present on the input
});
});
// ===========================================================================
// findUnstorableAttr
// ===========================================================================
describe('findUnstorableAttr', () => {
it('returns null for a fully storable doc', () => {
const d = doc(para('p0', 'clean'));
expect(findUnstorableAttr(d)).toBeNull();
});
it('detects an undefined node attr with its path and kind', () => {
const d = doc(para('a'), para('b'), { type: 'paragraph', attrs: { id: 'c', x: undefined } });
expect(findUnstorableAttr(d)).toBe('content[2].attrs.x (undefined)');
});
it('detects a function attr', () => {
const d = doc({ type: 'paragraph', attrs: { fn: () => 1 } });
expect(findUnstorableAttr(d)).toBe('content[0].attrs.fn (function)');
});
it('detects a symbol attr', () => {
const d = doc({ type: 'paragraph', attrs: { s: Symbol('x') } });
expect(findUnstorableAttr(d)).toBe('content[0].attrs.s (symbol)');
});
it('detects a bigint attr', () => {
const d = doc({ type: 'paragraph', attrs: { big: 10n } });
expect(findUnstorableAttr(d)).toBe('content[0].attrs.big (bigint)');
});
it('detects an unstorable mark attr with the marks[i] path', () => {
const d = doc({
type: 'paragraph',
attrs: { id: 'p' },
content: [text('hi'), text('yo', [{ type: 'link', attrs: { x: undefined } }])],
});
expect(findUnstorableAttr(d)).toBe('content[0].content[1].marks[0].attrs.x (undefined)');
});
it('returns the FIRST hit only', () => {
const d = doc(
{ type: 'paragraph', attrs: { first: undefined } },
{ type: 'paragraph', attrs: { second: undefined } },
);
expect(findUnstorableAttr(d)).toBe('content[0].attrs.first (undefined)');
});
it('returns null for a non-object doc', () => {
expect(findUnstorableAttr(null)).toBeNull();
expect(findUnstorableAttr('x')).toBeNull();
});
});
// ===========================================================================
// insertNodeRelative
// ===========================================================================
describe('insertNodeRelative', () => {
const block = (id: string, value = '') => para(id, value);
it('appends a node to top-level content', () => {
const d = doc(para('p0'));
const res = insertNodeRelative(d, block('new', 'N'), { position: 'append' });
expect(res.inserted).toBe(true);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new']);
});
it('creates a content array when appending to a doc without one', () => {
const res = insertNodeRelative({ type: 'doc' }, block('new'), { position: 'append' });
expect(res.inserted).toBe(true);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['new']);
});
it('inserts before a node by id (top level)', () => {
const d = doc(para('p0'), para('p1'));
const res = insertNodeRelative(d, block('new'), { position: 'before', anchorNodeId: 'p1' });
expect(res.inserted).toBe(true);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new', 'p1']);
});
it('inserts after a node by id (top level)', () => {
const d = doc(para('p0'), para('p1'));
const res = insertNodeRelative(d, block('new'), { position: 'after', anchorNodeId: 'p0' });
expect(res.inserted).toBe(true);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new', 'p1']);
});
it('inserts before a NESTED anchor by id, into its own parent content', () => {
const table = {
type: 'table',
content: [row([cell('tableCell', 'inner', 'x')])],
};
const d = doc(table);
const res = insertNodeRelative(d, block('new'), { position: 'before', anchorNodeId: 'inner' });
expect(res.inserted).toBe(true);
// The new (non-structural) node is spliced into the cell's content before the paragraph.
const cellContent = res.doc.content[0].content[0].content[0].content;
expect(cellContent.map((c: any) => c.attrs.id)).toEqual(['new', 'inner']);
});
it('inserts by anchorText against top-level blocks (substring match)', () => {
const d = doc(para('p0', 'hello world'), para('p1', 'other'));
const res = insertNodeRelative(d, block('new'), { position: 'after', anchorText: 'world' });
expect(res.inserted).toBe(true);
expect(res.doc.content.map((c: any) => c.attrs.id)).toEqual(['p0', 'new', 'p1']);
});
it('returns inserted:false when the anchor cannot be resolved', () => {
const d = doc(para('p0'));
const byId = insertNodeRelative(d, block('new'), { position: 'after', anchorNodeId: 'nope' });
expect(byId.inserted).toBe(false);
expect(byId.doc).toEqual(d);
const byText = insertNodeRelative(d, block('new'), { position: 'before', anchorText: 'zzz' });
expect(byText.inserted).toBe(false);
expect(byText.doc).toEqual(d);
});
it('routes a structural tableRow to the nearest table container', () => {
const table = {
type: 'table',
content: [
row([cell('tableCell', 'r0c0', 'A')]),
row([cell('tableCell', 'r1c0', 'B')]),
],
};
const d = doc(table);
const newRow = row([cell('tableCell', 'rNew', 'NEW')]);
// Anchor on a cell paragraph inside row 0; "after" should put the row after row 0.
const res = insertNodeRelative(d, newRow, { position: 'after', anchorNodeId: 'r0c0' });
expect(res.inserted).toBe(true);
const rowFirstCellId = (r: any) => r.content[0].content[0].attrs.id;
expect(res.doc.content[0].content.map(rowFirstCellId)).toEqual(['r0c0', 'rNew', 'r1c0']);
});
it('throws when appending a structural node at the top level', () => {
const d = doc(para('p0'));
const newRow = row([cell('tableCell', 'x', 'X')]);
expect(() => insertNodeRelative(d, newRow, { position: 'append' })).toThrow(
/cannot append a tableRow at the top level/,
);
});
it('throws when a structural anchor is not inside the required container', () => {
// Anchor resolves to a top-level paragraph that is not inside any table.
const d = doc(para('p0', 'loose'));
const newRow = row([cell('tableCell', 'x', 'X')]);
expect(() =>
insertNodeRelative(d, newRow, { position: 'after', anchorNodeId: 'p0' }),
).toThrow(/the anchor is not inside a table/);
});
it('honours offset: before vs after place the node on the correct side', () => {
const d = doc(para('a'), para('b'), para('c'));
const before = insertNodeRelative(d, block('N'), { position: 'before', anchorNodeId: 'b' });
expect(before.doc.content.map((c: any) => c.attrs.id)).toEqual(['a', 'N', 'b', 'c']);
const after = insertNodeRelative(d, block('N'), { position: 'after', anchorNodeId: 'b' });
expect(after.doc.content.map((c: any) => c.attrs.id)).toEqual(['a', 'b', 'N', 'c']);
});
it('does not mutate the input doc or the node argument', () => {
const d = doc(para('p0'));
const dSnapshot = structuredClone(d);
const node = block('new', 'N');
const nodeSnapshot = structuredClone(node);
insertNodeRelative(d, node, { position: 'append' });
expect(d).toEqual(dSnapshot);
expect(node).toEqual(nodeSnapshot);
});
});
// ===========================================================================
// readTable
// ===========================================================================
describe('readTable', () => {
const makeTable = () => ({
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H0'), cell('tableHeader', 'h1', 'H1')]),
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]),
],
});
it('reads a table by #n', () => {
const d = doc(para('p0'), makeTable());
const res = readTable(d, '#1');
expect(res).not.toBeNull();
expect(res!.rows).toBe(2);
expect(res!.cols).toBe(2);
expect(res!.cells).toEqual([['H0', 'H1'], ['A', 'B']]);
expect(res!.cellIds).toEqual([['h0', 'h1'], ['c0', 'c1']]);
expect(res!.path).toEqual([1]);
});
it('climbs from an inner paragraph id up to the table', () => {
const d = doc(makeTable());
const res = readTable(d, 'c1'); // id of a paragraph inside a data cell
expect(res).not.toBeNull();
expect(res!.path).toEqual([0]);
expect(res!.cells).toEqual([['H0', 'H1'], ['A', 'B']]);
});
it('reports per-row widths via cells for a ragged table', () => {
const table = {
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H0')]),
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]),
],
};
const res = readTable(doc(table), '#0');
expect(res!.cols).toBe(1); // cols comes from row 0
expect(res!.cells).toEqual([['H0'], ['A', 'B']]); // actual per-row widths preserved
expect(res!.cellIds).toEqual([['h0'], ['c0', 'c1']]);
});
it('reports null cellId for an empty cell with no paragraph', () => {
const table = {
type: 'table',
content: [row([cell('tableCell', null), cell('tableCell', 'c1', 'B')])],
};
const res = readTable(doc(table), '#0');
expect(res!.cells).toEqual([['', 'B']]);
expect(res!.cellIds).toEqual([[null, 'c1']]);
});
it('returns null when the ref matches no table', () => {
const d = doc(para('p0'));
expect(readTable(d, '#0')).toBeNull(); // #0 is a paragraph, not a table
expect(readTable(d, 'missing')).toBeNull();
expect(readTable(d, 'p0')).toBeNull(); // id found but no enclosing table
});
});
// ===========================================================================
// insertTableRow
// ===========================================================================
describe('insertTableRow', () => {
const makeTable = () => ({
type: 'table',
content: [
row([
cell('tableHeader', 'h0', 'H0', { colwidth: [120] }),
cell('tableHeader', 'h1', 'H1', { colwidth: [240] }),
]),
row([cell('tableCell', 'c0', 'A'), cell('tableCell', 'c1', 'B')]),
],
});
/** First-paragraph ids of every cell in a row, for ordering assertions. */
const rowCellParaIds = (r: any): (string | undefined)[] =>
r.content.map((c: any) => c.content[0]?.attrs?.id);
/** Cell text of a row. */
const rowTexts = (r: any): string[] =>
r.content.map((c: any) => blockPlainText(c));
it('appends a row when index is omitted', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['X', 'Y']);
expect(res.inserted).toBe(true);
const rows = res.doc.content[0].content;
expect(rows.length).toBe(3);
expect(rowTexts(rows[2])).toEqual(['X', 'Y']);
});
it('splices at a middle index', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['X', 'Y'], 1);
const rows = res.doc.content[0].content;
expect(rows.length).toBe(3);
expect(rowTexts(rows[1])).toEqual(['X', 'Y']); // new row at index 1
expect(rowTexts(rows[2])).toEqual(['A', 'B']); // old data row pushed down
});
it('splices at the end index', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['X', 'Y'], 2); // rows == 2, valid end index
const rows = res.doc.content[0].content;
expect(rows.length).toBe(3);
expect(rowTexts(rows[2])).toEqual(['X', 'Y']);
});
it('APPENDS (does not throw) for an out-of-range index', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['X', 'Y'], 99);
const rows = res.doc.content[0].content;
expect(res.inserted).toBe(true);
expect(rows.length).toBe(3);
expect(rowTexts(rows[2])).toEqual(['X', 'Y']); // appended at the end
});
it('throws when given more cells than columns', () => {
const d = doc(makeTable());
expect(() => insertTableRow(d, '#0', ['X', 'Y', 'Z'])).toThrow(
/got 3 cell\(s\) but the table has 2 column\(s\)/,
);
});
it('pads a short row to the column count', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['only']);
const rows = res.doc.content[0].content;
expect(rowTexts(rows[2])).toEqual(['only', '']); // padded with empty cell
});
it('copies colwidth from the header row for each column', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['X', 'Y']);
const newRow = res.doc.content[0].content[2];
expect(newRow.content[0].attrs.colwidth).toEqual([120]);
expect(newRow.content[1].attrs.colwidth).toEqual([240]);
expect(newRow.content[0].attrs).toMatchObject({ colspan: 1, rowspan: 1 });
});
it('index 0 inherits the header cell TYPE', () => {
const d = doc(makeTable());
const res = insertTableRow(d, '#0', ['X', 'Y'], 0);
const newRow = res.doc.content[0].content[0];
expect(newRow.content.every((c: any) => c.type === 'tableHeader')).toBe(true);
// A non-zero index produces plain data cells instead.
const res2 = insertTableRow(d, '#0', ['X', 'Y'], 1);
const dataRow = res2.doc.content[0].content[1];
expect(dataRow.content.every((c: any) => c.type === 'tableCell')).toBe(true);
});
it('mints unique, well-formed paragraph ids for new cells', () => {
const d = doc(makeTable());
const existing = new Set(['h0', 'h1', 'c0', 'c1']);
const res = insertTableRow(d, '#0', ['X', 'Y']);
const newRow = res.doc.content[0].content[2];
const ids = rowCellParaIds(newRow) as string[];
for (const id of ids) {
expect(typeof id).toBe('string');
expect(id).toMatch(/^[a-z0-9]{12}$/); // Docmost-style 12-char id
expect(existing.has(id)).toBe(false); // unique vs pre-existing ids
}
expect(new Set(ids).size).toBe(ids.length); // unique within the row
});
it('returns inserted:false when the table cannot be located', () => {
const d = doc(para('p0'));
const res = insertTableRow(d, 'missing', ['X']);
expect(res.inserted).toBe(false);
expect(res.doc).toEqual(d);
});
it('does not mutate the input doc', () => {
const d = doc(makeTable());
const snapshot = structuredClone(d);
insertTableRow(d, '#0', ['X', 'Y'], 1);
expect(d).toEqual(snapshot);
});
});
// ===========================================================================
// deleteTableRow
// ===========================================================================
describe('deleteTableRow', () => {
const makeTable = () => ({
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H')]),
row([cell('tableCell', 'c0', 'A')]),
row([cell('tableCell', 'c1', 'B')]),
],
});
const firstId = (r: any) => r.content[0].content[0].attrs.id;
it('deletes a middle row and preserves siblings', () => {
const d = doc(makeTable());
const res = deleteTableRow(d, '#0', 1);
expect(res.deleted).toBe(true);
expect(res.doc.content[0].content.map(firstId)).toEqual(['h0', 'c1']);
});
it('deletes the first row', () => {
const d = doc(makeTable());
const res = deleteTableRow(d, '#0', 0);
expect(res.doc.content[0].content.map(firstId)).toEqual(['c0', 'c1']);
});
it('deletes the last row', () => {
const d = doc(makeTable());
const res = deleteTableRow(d, '#0', 2);
expect(res.doc.content[0].content.map(firstId)).toEqual(['h0', 'c0']);
});
it('throws on an out-of-range index', () => {
const d = doc(makeTable());
expect(() => deleteTableRow(d, '#0', 99)).toThrow(/out of range/);
expect(() => deleteTableRow(d, '#0', -1)).toThrow(/out of range/);
});
it('throws when asked to delete the only row', () => {
const single = {
type: 'table',
content: [row([cell('tableCell', 'c0', 'A')])],
};
expect(() => deleteTableRow(doc(single), '#0', 0)).toThrow(
/refusing to delete the only row/,
);
});
it('returns deleted:false when the table cannot be located', () => {
const d = doc(para('p0'));
const res = deleteTableRow(d, 'missing', 0);
expect(res.deleted).toBe(false);
expect(res.doc).toEqual(d);
});
it('does not mutate the input doc', () => {
const d = doc(makeTable());
const snapshot = structuredClone(d);
deleteTableRow(d, '#0', 1);
expect(d).toEqual(snapshot);
});
});
// ===========================================================================
// updateTableCell
// ===========================================================================
describe('updateTableCell', () => {
const makeTable = () => ({
type: 'table',
content: [
row([cell('tableHeader', 'h0', 'H0'), cell('tableHeader', 'h1', 'H1')]),
row([
cell('tableCell', 'c0', 'A', { colspan: 2, rowspan: 3, colwidth: [200] }),
cell('tableCell', 'c1', 'B'),
]),
],
});
it('sets the cell text', () => {
const d = doc(makeTable());
const res = updateTableCell(d, '#0', 1, 1, 'NEW');
expect(res.updated).toBe(true);
expect(blockPlainText(res.doc.content[0].content[1].content[1])).toBe('NEW');
});
it('REUSES the existing first-paragraph id', () => {
const d = doc(makeTable());
const res = updateTableCell(d, '#0', 1, 0, 'changed');
const para0 = res.doc.content[0].content[1].content[0].content[0];
expect(para0.attrs.id).toBe('c0'); // critical: id reused, not regenerated
expect(para0.content[0].text).toBe('changed');
});
it('mints a fresh id when the cell had no paragraph', () => {
const table = {
type: 'table',
content: [row([cell('tableCell', null), cell('tableCell', 'c1', 'B')])],
};
const d = doc(table);
const res = updateTableCell(d, '#0', 0, 0, 'now has text');
const newPara = res.doc.content[0].content[0].content[0].content[0];
expect(typeof newPara.attrs.id).toBe('string');
expect(newPara.attrs.id).toMatch(/^[a-z0-9]{12}$/);
expect(newPara.attrs.id).not.toBe('c1'); // unique vs existing ids
expect(newPara.content[0].text).toBe('now has text');
});
it('PRESERVES the cell colspan/rowspan/colwidth (only content replaced)', () => {
const d = doc(makeTable());
const res = updateTableCell(d, '#0', 1, 0, 'x');
const cellNode = res.doc.content[0].content[1].content[0];
expect(cellNode.attrs).toEqual({ colspan: 2, rowspan: 3, colwidth: [200] });
});
it('throws when row or col is out of range', () => {
const d = doc(makeTable());
expect(() => updateTableCell(d, '#0', 5, 0, 'x')).toThrow(/out of range/);
expect(() => updateTableCell(d, '#0', 0, 5, 'x')).toThrow(/out of range/);
expect(() => updateTableCell(d, '#0', -1, 0, 'x')).toThrow(/out of range/);
});
it('an empty string yields an empty paragraph content array', () => {
const d = doc(makeTable());
const res = updateTableCell(d, '#0', 1, 1, '');
const cellPara = res.doc.content[0].content[1].content[1].content[0];
expect(cellPara.type).toBe('paragraph');
expect(cellPara.content).toEqual([]); // empty string -> empty content
expect(cellPara.attrs.id).toBe('c1'); // id still reused
});
it('returns updated:false when the table cannot be located', () => {
const d = doc(para('p0'));
const res = updateTableCell(d, 'missing', 0, 0, 'x');
expect(res.updated).toBe(false);
expect(res.doc).toEqual(d);
});
it('does not mutate the input doc', () => {
const d = doc(makeTable());
const snapshot = structuredClone(d);
updateTableCell(d, '#0', 1, 1, 'NEW');
expect(d).toEqual(snapshot);
});
});

View File

@@ -0,0 +1,238 @@
import { describe, expect, it } from 'vitest';
import {
planReconciliation,
decideAbsenceDeletions,
type ExistingEntry,
type LiveEntry,
} from '../src/engine/reconcile.js';
describe('planReconciliation', () => {
it('ADD: a new live page (not tracked) is written, nothing deleted', () => {
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/New.md' }];
const existing: ExistingEntry[] = [];
const plan = planReconciliation(live, existing);
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/New.md' }]);
expect(plan.toDelete).toEqual([]);
expect(plan.moved).toEqual([]);
});
it('CONTENT-UPDATE: tracked page at the SAME path is rewritten, not moved/deleted', () => {
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Doc.md' }];
const existing: ExistingEntry[] = [{ pageId: 'p1', relPath: 'Space/Doc.md' }];
const plan = planReconciliation(live, existing);
// Still written (re-emitted; identical bytes => git no-op), no move/delete.
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Doc.md' }]);
expect(plan.toDelete).toEqual([]);
expect(plan.moved).toEqual([]);
});
it('MOVE: same pageId, new path -> write new + recorded as moved (NOT in toDelete)', () => {
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/NewParent/Doc.md' }];
const existing: ExistingEntry[] = [
{ pageId: 'p1', relPath: 'Space/OldParent/Doc.md' },
];
const plan = planReconciliation(live, existing);
expect(plan.toWrite).toEqual([
{ pageId: 'p1', relPath: 'Space/NewParent/Doc.md' },
]);
// The old path is a MOVE removal, NOT an absence delete -> not in toDelete.
expect(plan.toDelete).toEqual([]);
expect(plan.moved).toEqual([
{
pageId: 'p1',
fromRelPath: 'Space/OldParent/Doc.md',
toRelPath: 'Space/NewParent/Doc.md',
removeOldPath: true,
},
]);
});
it('DELETE: a tracked pageId gone from live -> its file is deleted', () => {
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
const existing: ExistingEntry[] = [
{ pageId: 'p1', relPath: 'Space/Keep.md' },
{ pageId: 'p2', relPath: 'Space/Gone.md' },
];
const plan = planReconciliation(live, existing);
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Keep.md' }]);
expect(plan.toDelete).toEqual(['Space/Gone.md']);
expect(plan.moved).toEqual([]);
});
it('NO-OP: live and existing identical -> writes (re-emit) but no deletes/moves', () => {
const live: LiveEntry[] = [
{ pageId: 'p1', relPath: 'A.md' },
{ pageId: 'p2', relPath: 'B.md' },
];
const existing: ExistingEntry[] = [
{ pageId: 'p1', relPath: 'A.md' },
{ pageId: 'p2', relPath: 'B.md' },
];
const plan = planReconciliation(live, existing);
expect(plan.toWrite).toEqual(live);
expect(plan.toDelete).toEqual([]);
expect(plan.moved).toEqual([]);
});
it('does NOT delete an old path that another live page will write (path reuse)', () => {
// p1 moves from X.md to Y.md; p2 is a NEW page taking over X.md. The old
// X.md must NOT be deleted, because p2 writes it.
const live: LiveEntry[] = [
{ pageId: 'p1', relPath: 'Y.md' },
{ pageId: 'p2', relPath: 'X.md' },
];
const existing: ExistingEntry[] = [{ pageId: 'p1', relPath: 'X.md' }];
const plan = planReconciliation(live, existing);
expect(new Set(plan.toWrite)).toEqual(
new Set([
{ pageId: 'p1', relPath: 'Y.md' },
{ pageId: 'p2', relPath: 'X.md' },
]),
);
// X.md is a live target, so nothing is deleted.
expect(plan.toDelete).toEqual([]);
// The move is still recorded, but its old path is NOT removable (p2 writes
// X.md): removeOldPath:false protects the reused path from data loss.
expect(plan.moved).toEqual([
{ pageId: 'p1', fromRelPath: 'X.md', toRelPath: 'Y.md', removeOldPath: false },
]);
});
it('combines add + update + move + delete in one plan', () => {
const live: LiveEntry[] = [
{ pageId: 'keep', relPath: 'Keep.md' }, // update in place
{ pageId: 'mover', relPath: 'New/Moved.md' }, // moved
{ pageId: 'fresh', relPath: 'Fresh.md' }, // added
];
const existing: ExistingEntry[] = [
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'mover', relPath: 'Old/Moved.md' },
{ pageId: 'dead', relPath: 'Dead.md' }, // deleted
];
const plan = planReconciliation(live, existing);
expect(plan.toWrite).toEqual(live);
expect(plan.moved).toEqual([
{
pageId: 'mover',
fromRelPath: 'Old/Moved.md',
toRelPath: 'New/Moved.md',
removeOldPath: true,
},
]);
// toDelete is ABSENCE-only now: the moved old path lives in `moved`, so only
// the genuinely-gone page (Dead.md) is here.
expect(plan.toDelete).toEqual(['Dead.md']);
});
it('records each duplicate tracked row of a present pageId as a removable move', () => {
// Two stray files both claim pageId "dup"; the live page lives elsewhere.
// Each stray is a MOVE (same pageId, different path) -> recorded in `moved`
// with removeOldPath:true, NOT in absence-based toDelete.
const live: LiveEntry[] = [{ pageId: 'dup', relPath: 'Canonical.md' }];
const existing: ExistingEntry[] = [
{ pageId: 'dup', relPath: 'StrayA.md' },
{ pageId: 'dup', relPath: 'StrayB.md' },
];
const plan = planReconciliation(live, existing);
expect(plan.toWrite).toEqual([{ pageId: 'dup', relPath: 'Canonical.md' }]);
expect(plan.toDelete).toEqual([]);
expect(plan.moved).toEqual([
{
pageId: 'dup',
fromRelPath: 'StrayA.md',
toRelPath: 'Canonical.md',
removeOldPath: true,
},
{
pageId: 'dup',
fromRelPath: 'StrayB.md',
toRelPath: 'Canonical.md',
removeOldPath: true,
},
]);
});
});
describe('decideAbsenceDeletions (SPEC §8)', () => {
it('APPLIES when the tree is complete and the delete count is modest', () => {
const d = decideAbsenceDeletions({
treeComplete: true,
liveCount: 10,
existingCount: 10,
deleteCount: 1,
});
expect(d).toEqual({ apply: true });
});
it('SUPPRESSES all absence deletions when the tree fetch is incomplete', () => {
// Even a single absence delete is suppressed on a partial tree (a missing
// pageId in a partial tree is NOT proof of deletion).
const d = decideAbsenceDeletions({
treeComplete: false,
liveCount: 9,
existingCount: 10,
deleteCount: 1,
});
expect(d).toEqual({ apply: false, reason: 'incomplete-fetch' });
});
it('SUPPRESSES when live returned 0 pages but files are tracked (complete flag aside)', () => {
const d = decideAbsenceDeletions({
treeComplete: true,
liveCount: 0,
existingCount: 5,
deleteCount: 5,
});
expect(d).toEqual({ apply: false, reason: 'empty-live' });
});
it('SUPPRESSES over the mass-delete guard (> 50% of a non-trivial vault)', () => {
const d = decideAbsenceDeletions({
treeComplete: true,
liveCount: 4,
existingCount: 10,
deleteCount: 6, // 60% > 50%
});
expect(d).toEqual({ apply: false, reason: 'mass-delete' });
});
it('does NOT apply the fraction guard for a tiny vault (below the floor)', () => {
// 1-of-2 is normal in a tiny vault; the fraction guard does not fire.
const d = decideAbsenceDeletions({
treeComplete: true,
liveCount: 1,
existingCount: 2,
deleteCount: 1,
});
expect(d).toEqual({ apply: true });
});
it('incomplete-fetch takes precedence over the mass-delete reason', () => {
const d = decideAbsenceDeletions({
treeComplete: false,
liveCount: 4,
existingCount: 10,
deleteCount: 6,
});
expect(d).toEqual({ apply: false, reason: 'incomplete-fetch' });
});
it('trivially applies when nothing is tracked or nothing would be deleted', () => {
expect(
decideAbsenceDeletions({
treeComplete: false,
liveCount: 0,
existingCount: 0,
deleteCount: 0,
}),
).toEqual({ apply: true });
expect(
decideAbsenceDeletions({
treeComplete: false,
liveCount: 5,
existingCount: 5,
deleteCount: 0,
}),
).toEqual({ apply: true });
});
});

View File

@@ -0,0 +1,104 @@
import { readFile } from 'node:fs/promises';
import { readdirSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import { describe, expect, it } from 'vitest';
import {
convertProseMirrorToMarkdown,
markdownToProseMirror,
docsCanonicallyEqual,
} from 'docmost-client';
// Resolve fixtures relative to this test file so the test is CWD-independent.
const here = dirname(fileURLToPath(import.meta.url));
const CORPUS_DIR = join(here, 'fixtures', 'corpus');
const KNOWN_LIMITATIONS_DIR = join(here, 'fixtures', 'known-limitations');
/** Run a single document through export -> import -> export. */
async function roundTrip(doc: any) {
const md1 = convertProseMirrorToMarkdown(doc);
const doc2 = await markdownToProseMirror(md1);
const md2 = convertProseMirrorToMarkdown(doc2);
return { md1, md2, doc2 };
}
describe('round-trip corpus (SPEC §11)', () => {
// Discover the corpus synchronously at collection time so each fixture gets
// its own `it` with the file name in the test title.
const files = readdirSync(CORPUS_DIR)
.filter((name) => name.endsWith('.json'))
.sort();
it('has a non-empty corpus', () => {
expect(files.length).toBeGreaterThan(0);
});
for (const name of files) {
it(`${name}: markdown byte-stable AND canonically stable`, async () => {
const doc = JSON.parse(await readFile(join(CORPUS_DIR, name), 'utf8'));
const { md1, md2, doc2 } = await roundTrip(doc);
// 1) The byte-stable markdown property git actually needs.
expect(md2, `${name}: markdown not byte-stable`).toBe(md1);
// 2) Semantic stability (block ids stripped, default-null normalized).
expect(
docsCanonicallyEqual(doc, doc2),
`${name}: document not canonically stable`,
).toBe(true);
});
}
});
// ---------------------------------------------------------------------------
// KNOWN CONVERTER LIMITATIONS (isolated so they do NOT make CI red).
//
// SPEC §11 explicitly flags images and diagrams as high round-trip risk. These
// fixtures are kept OUT of the green corpus above and asserted with `it.fails`
// so the documented divergence is locked in (the test FAILS if the converter
// ever starts round-tripping them — at which point promote the fixture into
// the corpus). The precise divergences for `image-diagrams.json` are:
//
// * A BLOCK-LEVEL image preceded by a paragraph is NOT byte-stable on the
// FIRST re-export. The HTML re-parser hoists the block <img> out of its
// line and leaves an empty paragraph behind, so `paragraph` + `![..](..)`
// re-imports as paragraph + empty-paragraph + image; the empty paragraph
// adds one blank line, so export #2 grows by a one-time "\n\n" (md1 !== md2).
// This is NOT non-convergence: the growth happens exactly ONCE. The doc
// CONVERGES to a fixpoint after one extra `export→import→export` pass — the
// empty paragraph is already present after the first import, so export #2
// and export #3 are byte-identical (md2 === md3, verified).
//
// * drawio / excalidraw diagrams gain `data-align="center"` on the second
// export: the schema's diagram `align` attribute has a NON-null default of
// "center", which materializes on import; the converter only emits
// data-align when set, so it appears on export #2 but not #1. Like the
// image case, this is one-time and converges after one extra pass.
//
// * A STANDALONE block image (no preceding paragraph) IS byte-stable from
// export #1 (md1 === md2) — but it is still NOT canonically stable: on
// import the bare <img> is wrapped, gaining a leading EMPTY paragraph, so
// the canonical doc differs by that spurious paragraph node even though the
// markdown bytes match.
//
// Resolution (SPEC §11, "normalize-on-write"): rather than deep-fixing the
// converter, the engine runs ONE `export→import→export` pass when writing into
// the vault; from that fixpoint onward the form is byte-stable, so git sees no
// phantom diff. The green corpus above avoids these one-time asymmetries by
// pre-authoring the materialized defaults (e.g. `align: "center"` on the
// diagrams in 06-diagrams.json) so a single pass is already at the fixpoint.
// ---------------------------------------------------------------------------
describe('round-trip KNOWN LIMITATIONS (SPEC §11 image/diagram risk)', () => {
it.fails(
'image-diagrams.json is NOT byte-stable on export #1 (block image hoist + diagram align default; converges after one extra pass — SPEC §11 normalize-on-write)',
async () => {
const doc = JSON.parse(
await readFile(join(KNOWN_LIMITATIONS_DIR, 'image-diagrams.json'), 'utf8'),
);
const { md1, md2 } = await roundTrip(doc);
// This assertion FAILS today (documented divergence). `it.fails` turns a
// failing body into a PASS; if the converter is fixed this flips and the
// test goes red, prompting promotion into the green corpus.
expect(md2).toBe(md1);
},
);
});

View File

@@ -0,0 +1,29 @@
import { readFile } from 'node:fs/promises';
import { fileURLToPath } from 'node:url';
import { dirname, join } from 'node:path';
import { describe, expect, it } from 'vitest';
import {
convertProseMirrorToMarkdown,
markdownToProseMirror,
} from 'docmost-client';
// Resolve the fixture relative to this test file so the test is CWD-independent.
const here = dirname(fileURLToPath(import.meta.url));
const FIXTURE = join(here, 'fixtures', 'sample-doc.json');
describe('round-trip idempotency (SPEC §11)', () => {
it('markdown is byte-stable across export -> import -> export', async () => {
const doc = JSON.parse(await readFile(FIXTURE, 'utf8'));
// export -> import -> export
const md1 = convertProseMirrorToMarkdown(doc);
const doc2 = await markdownToProseMirror(md1);
const md2 = convertProseMirrorToMarkdown(doc2);
// The property git actually needs: a second export reproduces the first
// byte-for-byte. We intentionally do NOT deep-equal doc vs doc2 — the
// converter reconstructs schema default attrs (e.g. indent:null), a known
// SPEC §11 divergence that does not affect markdown stability.
expect(md2).toBe(md1);
});
});

View File

@@ -0,0 +1,96 @@
import { describe, expect, it } from 'vitest';
import { sanitizeTitle, disambiguate } from '../src/engine/sanitize.js';
describe('sanitizeTitle', () => {
it('passes a plain title through unchanged', () => {
expect(sanitizeTitle('Getting Started')).toBe('Getting Started');
});
it('replaces every forbidden printable character with a dash', () => {
// Forbidden set: / \ < > : " | ? *
expect(sanitizeTitle('a/b\\c<d>e:f"g|h?i*j')).toBe('a-b-c-d-e-f-g-h-i-j');
});
it('replaces ASCII control characters with a dash', () => {
// Build the input with explicit control code points (tab=9, newline=10) to
// avoid editor escaping pitfalls. Control chars become "-" BEFORE
// whitespace is collapsed, so they survive as dashes (not a folded space).
const TAB = String.fromCharCode(9);
const NL = String.fromCharCode(10);
expect(sanitizeTitle('a b' + TAB + 'c' + NL + 'd')).toBe('a b-c-d');
});
it('collapses runs of plain whitespace to a single space and trims', () => {
expect(sanitizeTitle(' hello world ')).toBe('hello world');
});
it('caps the length at 120 characters', () => {
const long = 'x'.repeat(200);
const out = sanitizeTitle(long);
expect(out.length).toBe(120);
expect(out).toBe('x'.repeat(120));
});
it('prefixes reserved Windows names with an underscore', () => {
expect(sanitizeTitle('CON')).toBe('_CON');
expect(sanitizeTitle('nul')).toBe('_nul');
// The base name (before the first dot) is what matters.
expect(sanitizeTitle('con.md')).toBe('_con.md');
});
it('does not flag names that merely contain a reserved word', () => {
expect(sanitizeTitle('console')).toBe('console');
expect(sanitizeTitle('Control')).toBe('Control');
});
it('returns "_" for empty or whitespace-only input', () => {
expect(sanitizeTitle('')).toBe('_');
expect(sanitizeTitle(' ')).toBe('_');
});
it('handles a title that is only forbidden characters', () => {
// Each forbidden char becomes "-", so the result is non-empty and safe.
expect(sanitizeTitle('///')).toBe('---');
});
it('neutralizes all-dot names so they cannot escape the vault', () => {
// ".", "..", "..." (and whitespace-padded variants) are path-traversal
// hazards as directory segments. The result must never be a pure-dot
// segment and must contain no path separators.
for (const input of ['.', '..', '...', ' .. ']) {
const out = sanitizeTitle(input);
expect(['.', '..', '...']).not.toContain(out);
expect(/^\.+$/.test(out)).toBe(false);
expect(out).not.toContain('/');
expect(out).not.toContain('\\');
}
// The concrete prefixing behaviour (existing "_" safeguard).
expect(sanitizeTitle('.')).toBe('_.');
expect(sanitizeTitle('..')).toBe('_..');
expect(sanitizeTitle('...')).toBe('_...');
expect(sanitizeTitle(' .. ')).toBe('_..');
});
it('is deterministic — the same input yields the same output', () => {
const title = 'Some / weird : title?';
expect(sanitizeTitle(title)).toBe(sanitizeTitle(title));
});
});
describe('disambiguate', () => {
it('appends a stable ~slugId suffix', () => {
expect(disambiguate('Notes', 'abc123')).toBe('Notes ~abc123');
});
it('is deterministic for the same name and slugId', () => {
expect(disambiguate('Notes', 'abc123')).toBe(
disambiguate('Notes', 'abc123'),
);
});
it('produces distinct names for colliding siblings', () => {
const a = disambiguate('Notes', 'slug-a');
const b = disambiguate('Notes', 'slug-b');
expect(a).not.toBe(b);
});
});

View File

@@ -0,0 +1,90 @@
import { describe, expect, it } from 'vitest';
import { stabilizePageFile, type PageMeta } from '../src/engine/stabilize.js';
// markdownToProseMirror lives in collaboration.ts; importing it mutates the
// global DOM via jsdom at module load time (required for @tiptap/html under Node).
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
import { parseDocmostMarkdown } from '../src/lib/markdown-document.js';
// stabilize.ts (SPEC §11 normalize-on-write) was 0% covered (only the gated e2e
// touched it). stabilizePageFile is import-testable: build a small ProseMirror
// content + meta and assert (1) the normalize-on-write pass reaches a fixpoint
// (a SECOND pass over the written body is byte-identical), and (2) the meta is
// serialized verbatim, including a null parentPageId.
const meta: PageMeta = {
version: 1,
pageId: 'pg-1',
slugId: 'sl-1',
title: 'My Title',
spaceId: 'sp-1',
parentPageId: null,
};
describe('stabilizePageFile — normalize-on-write fixpoint (SPEC §11)', () => {
it('reaches a byte-identical fixpoint after one extra export/import/export pass', async () => {
// A diagram is the canonical one-pass asymmetry: drawio's `align` default of
// "center" materializes on import, so a NAIVE export differs on the second
// export. stabilizePageFile runs the convergence pass at write time, so the
// written body must already be at the fixpoint: re-importing its body and
// re-stabilizing yields the exact same bytes.
const content = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
{ type: 'drawio', attrs: { src: '/d.drawio' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'outro' }] },
],
};
const file1 = await stabilizePageFile(content, meta);
// Re-import the written body and stabilize again — the second pass must be
// byte-identical to the first (the fixpoint property git relies on).
const body1 = parseDocmostMarkdown(file1).body;
const doc2 = await markdownToProseMirror(body1);
const file2 = await stabilizePageFile(doc2, meta);
expect(file2).toBe(file1);
// The materialized diagram default is present in the stabilized body (proof
// that the convergence pass actually ran, not just that two naive exports
// happened to match).
expect(body1).toContain('data-align="center"');
});
it('already-stable content is unchanged by the pass (idempotent)', async () => {
// Plain prose is already a fixpoint; stabilizing it once and twice agree.
const content = {
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'just plain text' }] }],
};
const file1 = await stabilizePageFile(content, meta);
const body1 = parseDocmostMarkdown(file1).body;
const doc2 = await markdownToProseMirror(body1);
const file2 = await stabilizePageFile(doc2, meta);
expect(file2).toBe(file1);
expect(body1).toBe('just plain text');
});
});
describe('stabilizePageFile — meta serialization', () => {
it('preserves a null parentPageId verbatim in the meta block', async () => {
const file = await stabilizePageFile(
{ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] }] },
meta,
);
const parsed = parseDocmostMarkdown(file);
// The whole meta round-trips, and parentPageId is exactly null (root page).
expect(parsed.meta).toEqual(meta);
expect(parsed.meta!.parentPageId).toBeNull();
// No trailing docmost:comments block — the sync body serializer omits it.
expect(file).not.toContain('docmost:comments');
});
it('keeps a non-null parentPageId as-is', async () => {
const childMeta: PageMeta = { ...meta, parentPageId: 'parent-99' };
const file = await stabilizePageFile(
{ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] }] },
childMeta,
);
expect(parseDocmostMarkdown(file).meta).toEqual(childMeta);
});
});