First step of docs/git-sync-plan.md. New workspace package @docmost/git-sync vendoring the PURE parts from docmost-sync (HEAD b03eb35): - lib: markdown-converter, markdown-document, canonicalize, docmost-schema, node-ops, diff, and an extracted markdown-to-prosemirror (only the pure marked->HTML->generateJSON path from upstream collaboration.ts; no websocket). - engine (pure, no IO): reconcile, layout, sanitize, stabilize, loop-guard. Ported the upstream pure-module + round-trip corpus tests (vitest): 314 pass, 3 expected upstream known-limitation fails. tsc clean. No server wiring yet. docmost-schema inlines getStyleProperty (as packages/mcp does — @tiptap/core 3.20.4 doesn't export it). IO engine (pull/push/git/settings) deferred to later Phase A/B steps; the editor-ext idempotency gate (plan §13.1) is the next step. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
105 lines
5.0 KiB
TypeScript
105 lines
5.0 KiB
TypeScript
import { readFile } from 'node:fs/promises';
|
|
import { readdirSync } from 'node:fs';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { dirname, join } from 'node:path';
|
|
import { describe, expect, it } from 'vitest';
|
|
import {
|
|
convertProseMirrorToMarkdown,
|
|
markdownToProseMirror,
|
|
docsCanonicallyEqual,
|
|
} from 'docmost-client';
|
|
|
|
// Resolve fixtures relative to this test file so the test is CWD-independent.
|
|
const here = dirname(fileURLToPath(import.meta.url));
|
|
const CORPUS_DIR = join(here, 'fixtures', 'corpus');
|
|
const KNOWN_LIMITATIONS_DIR = join(here, 'fixtures', 'known-limitations');
|
|
|
|
/** Run a single document through export -> import -> export. */
|
|
async function roundTrip(doc: any) {
|
|
const md1 = convertProseMirrorToMarkdown(doc);
|
|
const doc2 = await markdownToProseMirror(md1);
|
|
const md2 = convertProseMirrorToMarkdown(doc2);
|
|
return { md1, md2, doc2 };
|
|
}
|
|
|
|
describe('round-trip corpus (SPEC §11)', () => {
|
|
// Discover the corpus synchronously at collection time so each fixture gets
|
|
// its own `it` with the file name in the test title.
|
|
const files = readdirSync(CORPUS_DIR)
|
|
.filter((name) => name.endsWith('.json'))
|
|
.sort();
|
|
|
|
it('has a non-empty corpus', () => {
|
|
expect(files.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
for (const name of files) {
|
|
it(`${name}: markdown byte-stable AND canonically stable`, async () => {
|
|
const doc = JSON.parse(await readFile(join(CORPUS_DIR, name), 'utf8'));
|
|
const { md1, md2, doc2 } = await roundTrip(doc);
|
|
|
|
// 1) The byte-stable markdown property git actually needs.
|
|
expect(md2, `${name}: markdown not byte-stable`).toBe(md1);
|
|
// 2) Semantic stability (block ids stripped, default-null normalized).
|
|
expect(
|
|
docsCanonicallyEqual(doc, doc2),
|
|
`${name}: document not canonically stable`,
|
|
).toBe(true);
|
|
});
|
|
}
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// KNOWN CONVERTER LIMITATIONS (isolated so they do NOT make CI red).
|
|
//
|
|
// SPEC §11 explicitly flags images and diagrams as high round-trip risk. These
|
|
// fixtures are kept OUT of the green corpus above and asserted with `it.fails`
|
|
// so the documented divergence is locked in (the test FAILS if the converter
|
|
// ever starts round-tripping them — at which point promote the fixture into
|
|
// the corpus). The precise divergences for `image-diagrams.json` are:
|
|
//
|
|
// * A BLOCK-LEVEL image preceded by a paragraph is NOT byte-stable on the
|
|
// FIRST re-export. The HTML re-parser hoists the block <img> out of its
|
|
// line and leaves an empty paragraph behind, so `paragraph` + ``
|
|
// re-imports as paragraph + empty-paragraph + image; the empty paragraph
|
|
// adds one blank line, so export #2 grows by a one-time "\n\n" (md1 !== md2).
|
|
// This is NOT non-convergence: the growth happens exactly ONCE. The doc
|
|
// CONVERGES to a fixpoint after one extra `export→import→export` pass — the
|
|
// empty paragraph is already present after the first import, so export #2
|
|
// and export #3 are byte-identical (md2 === md3, verified).
|
|
//
|
|
// * drawio / excalidraw diagrams gain `data-align="center"` on the second
|
|
// export: the schema's diagram `align` attribute has a NON-null default of
|
|
// "center", which materializes on import; the converter only emits
|
|
// data-align when set, so it appears on export #2 but not #1. Like the
|
|
// image case, this is one-time and converges after one extra pass.
|
|
//
|
|
// * A STANDALONE block image (no preceding paragraph) IS byte-stable from
|
|
// export #1 (md1 === md2) — but it is still NOT canonically stable: on
|
|
// import the bare <img> is wrapped, gaining a leading EMPTY paragraph, so
|
|
// the canonical doc differs by that spurious paragraph node even though the
|
|
// markdown bytes match.
|
|
//
|
|
// Resolution (SPEC §11, "normalize-on-write"): rather than deep-fixing the
|
|
// converter, the engine runs ONE `export→import→export` pass when writing into
|
|
// the vault; from that fixpoint onward the form is byte-stable, so git sees no
|
|
// phantom diff. The green corpus above avoids these one-time asymmetries by
|
|
// pre-authoring the materialized defaults (e.g. `align: "center"` on the
|
|
// diagrams in 06-diagrams.json) so a single pass is already at the fixpoint.
|
|
// ---------------------------------------------------------------------------
|
|
describe('round-trip KNOWN LIMITATIONS (SPEC §11 image/diagram risk)', () => {
|
|
it.fails(
|
|
'image-diagrams.json is NOT byte-stable on export #1 (block image hoist + diagram align default; converges after one extra pass — SPEC §11 normalize-on-write)',
|
|
async () => {
|
|
const doc = JSON.parse(
|
|
await readFile(join(KNOWN_LIMITATIONS_DIR, 'image-diagrams.json'), 'utf8'),
|
|
);
|
|
const { md1, md2 } = await roundTrip(doc);
|
|
// This assertion FAILS today (documented divergence). `it.fails` turns a
|
|
// failing body into a PASS; if the converter is fixed this flips and the
|
|
// test goes red, prompting promotion into the green corpus.
|
|
expect(md2).toBe(md1);
|
|
},
|
|
);
|
|
});
|