feat(git-sync): vendor pure converter + engine into @docmost/git-sync (Phase A.1)

First step of docs/git-sync-plan.md. New workspace package @docmost/git-sync
vendoring the PURE parts from docmost-sync (HEAD b03eb35):
- lib: markdown-converter, markdown-document, canonicalize, docmost-schema,
  node-ops, diff, and an extracted markdown-to-prosemirror (only the pure
  marked->HTML->generateJSON path from upstream collaboration.ts; no websocket).
- engine (pure, no IO): reconcile, layout, sanitize, stabilize, loop-guard.
Ported the upstream pure-module + round-trip corpus tests (vitest): 314 pass,
3 expected upstream known-limitation fails. tsc clean. No server wiring yet.

docmost-schema inlines getStyleProperty (as packages/mcp does — @tiptap/core
3.20.4 doesn't export it). IO engine (pull/push/git/settings) deferred to later
Phase A/B steps; the editor-ext idempotency gate (plan §13.1) is the next step.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-21 13:55:23 +03:00
parent acf6d85b07
commit 6861b19bde
108 changed files with 14801 additions and 1 deletions

View File

@@ -0,0 +1,218 @@
import { describe, expect, it } from 'vitest';
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
// collaboration.ts and mutates global DOM at import time).
import {
serializeDocmostMarkdown,
parseDocmostMarkdown,
serializeDocmostMarkdownBody,
type DocmostMdMeta,
} from '../src/lib/markdown-document.js';
const meta: DocmostMdMeta = {
version: 1,
pageId: 'p1',
slugId: 's1',
title: 'Hello',
spaceId: 'sp1',
parentPageId: null,
};
describe('serializeDocmostMarkdown / parseDocmostMarkdown', () => {
// ---------------------------------------------------------------------------
describe('round-trip', () => {
it('round-trips meta, body, and comments', () => {
const body = '# Title\n\nSome **body** text.';
const comments = [{ id: 'c1', text: 'a note' }];
const full = serializeDocmostMarkdown(meta, body, comments);
const parsed = parseDocmostMarkdown(full);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe(body);
expect(parsed.comments).toEqual(comments);
});
it('emits a comments block with [] even when there are no comments', () => {
const full = serializeDocmostMarkdown(meta, 'body', []);
expect(full).toContain('<!-- docmost:comments\n[]\n-->');
const parsed = parseDocmostMarkdown(full);
expect(parsed.comments).toEqual([]);
expect(parsed.body).toBe('body');
});
it('non-array comments arg is normalized to [] in the serialized output', () => {
const full = serializeDocmostMarkdown(meta, 'body', null as any);
expect(full).toContain('<!-- docmost:comments\n[]\n-->');
});
it('trims surrounding whitespace from the body on serialize', () => {
const full = serializeDocmostMarkdown(meta, '\n\n body \n\n', []);
const parsed = parseDocmostMarkdown(full);
expect(parsed.body).toBe('body');
});
});
// ---------------------------------------------------------------------------
describe('missing blocks (tolerant parsing)', () => {
it('missing meta block yields meta:null', () => {
const input = 'Just a body.\n\n<!-- docmost:comments\n[]\n-->\n';
const parsed = parseDocmostMarkdown(input);
expect(parsed.meta).toBeNull();
expect(parsed.body).toBe('Just a body.');
expect(parsed.comments).toEqual([]);
});
it('missing comments block yields comments:null and treats all as body', () => {
const input =
'<!-- docmost:meta\n' + JSON.stringify(meta) + '\n-->\n\nbody only';
const parsed = parseDocmostMarkdown(input);
expect(parsed.meta).toEqual(meta);
expect(parsed.comments).toBeNull();
expect(parsed.body).toBe('body only');
});
it('plain markdown with neither block: meta and comments null, whole input is body', () => {
const input = '# Plain\n\nNo envelope here.';
const parsed = parseDocmostMarkdown(input);
expect(parsed.meta).toBeNull();
expect(parsed.comments).toBeNull();
expect(parsed.body).toBe(input);
});
});
// ---------------------------------------------------------------------------
describe('CRLF normalization', () => {
it('parses a CRLF-encoded document the same as LF', () => {
const lf = serializeDocmostMarkdown(meta, 'line one\nline two', [
{ id: 'c1' },
]);
const crlf = lf.replace(/\n/g, '\r\n');
const parsed = parseDocmostMarkdown(crlf);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe('line one\nline two');
expect(parsed.comments).toEqual([{ id: 'c1' }]);
});
});
// ---------------------------------------------------------------------------
describe('only the final document-ending comments block is captured', () => {
it('an earlier literal docmost:comments opener inside the body stays in the body', () => {
// The body documents the format and contains a literal opener that does
// NOT end the document. Only the trailing block is treated as metadata.
const bodyWithLiteral =
'Here is how the format looks:\n\n<!-- docmost:comments\n[{"fake":true}]\n-->\n\nand more prose after it.';
const full = serializeDocmostMarkdown(meta, bodyWithLiteral, [
{ id: 'real' },
]);
const parsed = parseDocmostMarkdown(full);
// The real (final) block parses into the comments...
expect(parsed.comments).toEqual([{ id: 'real' }]);
// ...and the earlier literal opener is preserved verbatim in the body.
expect(parsed.body).toContain(
'<!-- docmost:comments\n[{"fake":true}]\n-->',
);
expect(parsed.body).toContain('and more prose after it.');
});
it('a literal opener whose closer does NOT end the doc is left entirely in the body', () => {
// No real trailing block: the opener is not document-ending, so comments
// stays null and nothing is stripped.
const input =
'<!-- docmost:meta\n' +
JSON.stringify(meta) +
'\n-->\n\nbody start\n\n<!-- docmost:comments\n[]\n-->\n\ntrailing text not ending the doc';
const parsed = parseDocmostMarkdown(input);
expect(parsed.comments).toBeNull();
expect(parsed.body).toContain('<!-- docmost:comments');
expect(parsed.body).toContain('trailing text not ending the doc');
});
});
// ---------------------------------------------------------------------------
describe('end-anchored comments closer tolerates CRLF + trailing whitespace', () => {
it('captures the final comments block when its "-->" closer has CRLF and trailing spaces', () => {
// The closer regex is /\r?\n-->[ \t]*\r?\n?\s*$/. Build a document whose
// trailing comments block uses CRLF line endings AND has trailing spaces
// after the "-->" closer, then assert it is still recognised as the
// document-ending block (and the body is not polluted by it).
const metaLine = JSON.stringify(meta);
const crlfDoc =
`<!-- docmost:meta\r\n${metaLine}\r\n-->\r\n\r\n` +
`the body line\r\n\r\n` +
`<!-- docmost:comments\r\n[{"id":"c-crlf"}]\r\n--> \r\n`;
const parsed = parseDocmostMarkdown(crlfDoc);
expect(parsed.meta).toEqual(meta);
expect(parsed.body).toBe('the body line');
expect(parsed.comments).toEqual([{ id: 'c-crlf' }]);
});
});
// ---------------------------------------------------------------------------
describe('malformed JSON throws a clear error', () => {
it('throws on malformed meta JSON', () => {
const input = '<!-- docmost:meta\n{not valid json}\n-->\n\nbody';
expect(() => parseDocmostMarkdown(input)).toThrow(/docmost:meta JSON/);
});
it('throws on malformed comments JSON', () => {
const input = 'body\n\n<!-- docmost:comments\n[not, valid]\n-->\n';
expect(() => parseDocmostMarkdown(input)).toThrow(/docmost:comments JSON/);
});
});
});
describe('serializeDocmostMarkdownBody', () => {
it('emits NO comments block', () => {
const out = serializeDocmostMarkdownBody(meta, 'just the body');
expect(out).not.toContain('docmost:comments');
expect(out).toContain('<!-- docmost:meta');
});
it('serialize -> parse preserves meta and the trimmed body, comments null (SPEC §3)', () => {
const fullMeta: DocmostMdMeta = {
version: 1,
pageId: 'page-123',
slugId: 'slug-abc',
title: 'My Page',
spaceId: 'space-1',
parentPageId: 'parent-9',
};
const body = 'Hello\n\nWorld';
const out = serializeDocmostMarkdownBody(fullMeta, body);
const parsed = parseDocmostMarkdown(out);
expect(parsed.meta).toEqual(fullMeta);
expect(parsed.body).toBe(body);
expect(parsed.comments).toBeNull();
});
it('preserves a null parentPageId for a root page', () => {
const out = serializeDocmostMarkdownBody(meta, 'body text');
const parsed = parseDocmostMarkdown(out);
expect(parsed.meta).toEqual(meta);
expect(parsed.comments).toBeNull();
});
it('produces a parseable file for an empty or missing body', () => {
const minimal: DocmostMdMeta = { version: 1, pageId: 'p-empty' };
const emptyFile = serializeDocmostMarkdownBody(minimal, '');
const parsedEmpty = parseDocmostMarkdown(emptyFile);
expect(parsedEmpty.meta).toEqual(minimal);
expect(parsedEmpty.body).toBe('');
expect(parsedEmpty.comments).toBeNull();
// Missing body (undefined) — serializer coalesces to "".
const missingFile = serializeDocmostMarkdownBody(
minimal,
undefined as unknown as string,
);
const parsedMissing = parseDocmostMarkdown(missingFile);
expect(parsedMissing.meta).toEqual(minimal);
expect(parsedMissing.body).toBe('');
expect(parsedMissing.comments).toBeNull();
});
it('trims the body', () => {
const out = serializeDocmostMarkdownBody(meta, '\n\n hi \n');
const parsed = parseDocmostMarkdown(out);
expect(parsed.body).toBe('hi');
});
});