Review finding #5: the git -> page body write (writeBody) did a full-body replace
(delete-all + re-insert) on the shared Yjs doc. Applied while a human is editing
the page, it discarded their in-flight changes; and TiptapTransformer.toYdoc ran
AFTER the fragment was cleared, so a conversion failure could leave the page with
an empty body.
Fixes:
- Active-session guard: CollaborationGateway.getActiveEditorCount(documentName)
reports live human (websocket) editor sessions for a doc, excluding server-side
direct connections. writeBody now throws ActiveEditSessionError when an editor
is connected. The engine's push loop already isolates each importPageMarkdown in
try/catch and does not advance the loop-guard on failure, so the write is simply
retried on the next poll once the editor disconnects — never a clobber.
- Crash-safe conversion: build the replacement Yjs update BEFORE opening the
connection / clearing the fragment, so a transform failure can never leave the
body empty.
Also updates the server-side converter gate spec to the corrected round-trip
shape: the block-image hoist no longer leaves a leading empty paragraph (the
git-sync converter fix in 7d39c16b, now reaching the built package).
A true merge of git content into a live Yjs session is out of scope (it needs a
real 3-way text merge with no shared update lineage); deferring the write while a
page is being edited is the safe, owner-approved minimum.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
518 lines
19 KiB
TypeScript
518 lines
19 KiB
TypeScript
/**
|
|
* §13.1 IDEMPOTENCY GATE — the blocking gate for git-sync Phase B.
|
|
*
|
|
* Proves the vendored `@docmost/git-sync` pure converter is schema-compatible
|
|
* with the server's REAL editor-ext document schema: a representative corpus of
|
|
* editor-ext ProseMirror documents must survive a full round trip through the
|
|
* actual server write path without losing any node / mark / attribute.
|
|
*
|
|
* Pipeline per document (plan §13.1):
|
|
* 1. md = convertProseMirrorToMarkdown(content) // git-sync export
|
|
* 2. doc = await markdownToProseMirror(md) // git-sync import
|
|
* 3. push `doc` through the REAL editor-ext Yjs write path the server uses:
|
|
* ydoc = TiptapTransformer.toYdoc(doc, 'default', tiptapExtensions)
|
|
* normalized = TiptapTransformer.fromYdoc(ydoc, 'default')
|
|
* This is exactly what PersistenceExtension does on store
|
|
* (apps/server/src/collaboration/extensions/persistence.extension.ts:96/115)
|
|
* with the same `tiptapExtensions` (collaboration.util.ts) and the same
|
|
* `@hocuspocus/transformer`, so the gate exercises the real schema
|
|
* validation that runs on a git-sync write (plan §3.3).
|
|
* 4. assert docsCanonicallyEqual(canon(original), canon(normalized)) === true
|
|
*
|
|
* Any node / mark / attr that editor-ext drops (because the vendored
|
|
* docmost-schema named it differently, or declares a different default) makes
|
|
* the gate FAIL for that document — exactly the schema-divergence plan §3.3 /
|
|
* §13.1 warn about. Genuine, irreducible divergences are isolated into the
|
|
* clearly-named `KNOWN DIVERGENCE` block at the bottom (never silently hidden).
|
|
*
|
|
* Requires the workspace packages built first:
|
|
* pnpm --filter @docmost/editor-ext build
|
|
* pnpm --filter @docmost/git-sync build
|
|
*/
|
|
import { TiptapTransformer } from '@hocuspocus/transformer';
|
|
// Import the server's real schema FIRST so `@docmost/editor-ext` resolves to its
|
|
// built CJS `dist` (its `main`). Importing the ESM `@docmost/git-sync` package
|
|
// first flips jest's resolver to editor-ext's `module` (src) field, which then
|
|
// drags in React node views (navigator-less) and breaks the node test env.
|
|
import { tiptapExtensions } from './collaboration.util';
|
|
import {
|
|
convertProseMirrorToMarkdown,
|
|
markdownToProseMirror,
|
|
canonicalizeContent,
|
|
docsCanonicallyEqual,
|
|
} from '@docmost/git-sync';
|
|
|
|
/**
|
|
* Run a single editor-ext document through the full gate pipeline and return
|
|
* the canonical original vs the canonical doc as it lands after the real Yjs
|
|
* write path, plus the intermediate markdown for diagnostics.
|
|
*/
|
|
async function runGate(original: any): Promise<{
|
|
md: string;
|
|
imported: any;
|
|
normalized: any;
|
|
canonOriginal: any;
|
|
canonNormalized: any;
|
|
}> {
|
|
// 1) editor-ext JSON -> markdown (git-sync export).
|
|
const md = convertProseMirrorToMarkdown(original);
|
|
|
|
// 2) markdown -> ProseMirror JSON (git-sync import, docmost-schema).
|
|
const imported = await markdownToProseMirror(md);
|
|
|
|
// 3) push through the REAL editor-ext schema via the server's Yjs write path.
|
|
// toYdoc validates `imported` against tiptapExtensions (throws on an
|
|
// unknown node, drops unknown attrs); fromYdoc reads it back as the
|
|
// normalized editor-ext JSON the server would persist.
|
|
const ydoc = TiptapTransformer.toYdoc(imported, 'default', tiptapExtensions);
|
|
const normalized = TiptapTransformer.fromYdoc(ydoc, 'default');
|
|
|
|
return {
|
|
md,
|
|
imported,
|
|
normalized,
|
|
canonOriginal: canonicalizeContent(original),
|
|
canonNormalized: canonicalizeContent(normalized),
|
|
};
|
|
}
|
|
|
|
const doc = (...content: any[]) => ({ type: 'doc', content });
|
|
const text = (t: string, marks?: any[]) =>
|
|
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
|
|
const para = (...content: any[]) => ({ type: 'paragraph', content });
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Corpus: editor-ext ProseMirror documents covering the common node/mark types.
|
|
// Node / mark / attr names and DEFAULTS are taken from the real schema —
|
|
// editor-ext (packages/editor-ext/src) + the server's tiptapExtensions
|
|
// (collaboration.util.ts) — NOT guessed. Where editor-ext materializes a
|
|
// non-null default on import (e.g. image.align="center", callout.type, list
|
|
// start) the fixture pre-authors that materialized value so the round trip is
|
|
// already at its fixpoint (matches how the engine normalizes-on-write, SPEC §11).
|
|
// ---------------------------------------------------------------------------
|
|
const CORPUS: Record<string, any> = {
|
|
'paragraphs + headings (h1-h3)': doc(
|
|
{ type: 'heading', attrs: { level: 1 }, content: [text('Heading one')] },
|
|
{ type: 'heading', attrs: { level: 2 }, content: [text('Heading two')] },
|
|
{ type: 'heading', attrs: { level: 3 }, content: [text('Heading three')] },
|
|
para(text('A plain paragraph of text.')),
|
|
para(text('Second paragraph.')),
|
|
),
|
|
|
|
'inline marks (bold/italic/strike/code)': doc(
|
|
para(
|
|
text('normal '),
|
|
text('bold', [{ type: 'bold' }]),
|
|
text(' '),
|
|
text('italic', [{ type: 'italic' }]),
|
|
text(' '),
|
|
text('struck', [{ type: 'strike' }]),
|
|
text(' '),
|
|
text('code', [{ type: 'code' }]),
|
|
),
|
|
),
|
|
|
|
'links': doc(
|
|
para(
|
|
text('see '),
|
|
text('the site', [
|
|
{ type: 'link', attrs: { href: 'https://example.com' } },
|
|
]),
|
|
text(' for more'),
|
|
),
|
|
),
|
|
|
|
'bullet list': doc({
|
|
type: 'bulletList',
|
|
content: [
|
|
{ type: 'listItem', content: [para(text('first'))] },
|
|
{ type: 'listItem', content: [para(text('second'))] },
|
|
{ type: 'listItem', content: [para(text('third'))] },
|
|
],
|
|
}),
|
|
|
|
'ordered list': doc({
|
|
type: 'orderedList',
|
|
attrs: { start: 1 },
|
|
content: [
|
|
{ type: 'listItem', content: [para(text('one'))] },
|
|
{ type: 'listItem', content: [para(text('two'))] },
|
|
],
|
|
}),
|
|
|
|
'task list (checkbox)': doc({
|
|
type: 'taskList',
|
|
content: [
|
|
{
|
|
type: 'taskItem',
|
|
attrs: { checked: true },
|
|
content: [para(text('done item'))],
|
|
},
|
|
{
|
|
type: 'taskItem',
|
|
attrs: { checked: false },
|
|
content: [para(text('todo item'))],
|
|
},
|
|
],
|
|
}),
|
|
|
|
'blockquote': doc({
|
|
type: 'blockquote',
|
|
content: [para(text('a quoted line')), para(text('second quoted line'))],
|
|
}),
|
|
|
|
'callout (info)': doc({
|
|
type: 'callout',
|
|
attrs: { type: 'info' },
|
|
content: [para(text('an informational callout'))],
|
|
}),
|
|
|
|
'callout (warning)': doc({
|
|
type: 'callout',
|
|
attrs: { type: 'warning' },
|
|
content: [para(text('a warning callout'))],
|
|
}),
|
|
|
|
'code block (with language)': doc({
|
|
type: 'codeBlock',
|
|
attrs: { language: 'typescript' },
|
|
// A fenced code block's body is stored with a trailing newline (the form a
|
|
// markdown ``` fence round-trips to: marked normalizes the code text to end
|
|
// in "\n"). Authoring the fixture at that fixpoint mirrors how the engine
|
|
// normalizes-on-write (SPEC §11): codeBlock + `language` round-trip exactly.
|
|
content: [text('const a: number = 1;\nconsole.log(a);\n')],
|
|
}),
|
|
|
|
'horizontal rule': doc(
|
|
para(text('before')),
|
|
{ type: 'horizontalRule' },
|
|
para(text('after')),
|
|
),
|
|
|
|
'table (header row + cells)': doc({
|
|
type: 'table',
|
|
content: [
|
|
{
|
|
type: 'tableRow',
|
|
content: [
|
|
{
|
|
type: 'tableHeader',
|
|
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
|
content: [para(text('Name'))],
|
|
},
|
|
{
|
|
type: 'tableHeader',
|
|
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
|
content: [para(text('Value'))],
|
|
},
|
|
],
|
|
},
|
|
{
|
|
type: 'tableRow',
|
|
content: [
|
|
{
|
|
type: 'tableCell',
|
|
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
|
content: [para(text('alpha'))],
|
|
},
|
|
{
|
|
type: 'tableCell',
|
|
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
|
content: [para(text('1'))],
|
|
},
|
|
],
|
|
},
|
|
],
|
|
}),
|
|
|
|
// --- editor-ext nodes/marks beyond the original corpus (item #7) ----------
|
|
// Each of these was verified to round-trip CLEANLY through the real gate
|
|
// (export -> markdown -> import -> editor-ext Yjs write path). Fixtures are
|
|
// pre-authored at the engine's normalize-on-write fixpoint (SPEC §11), e.g.
|
|
// details carries the materialized `open:false`, and color marks use the
|
|
// `rgb(...)` form the HTML re-parser normalizes to.
|
|
|
|
'mention (user)': doc(
|
|
para(
|
|
text('hi '),
|
|
{
|
|
type: 'mention',
|
|
attrs: {
|
|
id: 'user-123',
|
|
label: 'Alice',
|
|
entityType: 'user',
|
|
entityId: 'user-123',
|
|
creatorId: 'creator-1',
|
|
},
|
|
},
|
|
text(' there'),
|
|
),
|
|
),
|
|
|
|
'inline math': doc(
|
|
para(
|
|
text('inline '),
|
|
{ type: 'mathInline', attrs: { text: 'x^2' } },
|
|
text(' math'),
|
|
),
|
|
),
|
|
|
|
'block math': doc({ type: 'mathBlock', attrs: { text: 'x^2 + y^2 = z^2' } }),
|
|
|
|
'details (collapsible)': doc({
|
|
type: 'details',
|
|
// `open:false` is the value editor-ext materializes on import; pre-authoring
|
|
// it puts the fixture at its round-trip fixpoint.
|
|
attrs: { open: false },
|
|
content: [
|
|
{ type: 'detailsSummary', content: [text('Summary line')] },
|
|
{ type: 'detailsContent', content: [para(text('hidden body'))] },
|
|
],
|
|
}),
|
|
|
|
'highlight (mark, no color)': doc(
|
|
para(
|
|
text('a '),
|
|
text('highlighted', [{ type: 'highlight' }]),
|
|
text(' word'),
|
|
),
|
|
),
|
|
|
|
'highlight (mark, with color)': doc(
|
|
para(
|
|
text('a '),
|
|
text('red', [{ type: 'highlight', attrs: { color: 'rgb(255, 0, 0)' } }]),
|
|
text(' word'),
|
|
),
|
|
),
|
|
|
|
'subscript': doc(
|
|
para(text('H'), text('2', [{ type: 'subscript' }]), text('O')),
|
|
),
|
|
|
|
'superscript': doc(
|
|
para(text('E=mc'), text('2', [{ type: 'superscript' }])),
|
|
),
|
|
|
|
'text color (textStyle)': doc(
|
|
// The HTML re-parser normalizes CSS colors to the `rgb(...)` form, so the
|
|
// fixture pre-authors that form; a `#hex` color would round-trip to the
|
|
// equivalent rgb() and is therefore a value-normalization divergence (see
|
|
// the KNOWN DIVERGENCE block below).
|
|
para(text('green', [{ type: 'textStyle', attrs: { color: 'rgb(0, 255, 0)' } }])),
|
|
),
|
|
|
|
'nested / mixed document': doc(
|
|
{ type: 'heading', attrs: { level: 1 }, content: [text('Mixed')] },
|
|
para(
|
|
text('intro with '),
|
|
text('bold', [{ type: 'bold' }]),
|
|
text(' and a '),
|
|
text('link', [{ type: 'link', attrs: { href: 'https://example.com' } }]),
|
|
text('.'),
|
|
),
|
|
{
|
|
type: 'bulletList',
|
|
content: [
|
|
{
|
|
type: 'listItem',
|
|
content: [
|
|
para(text('item with '), text('code', [{ type: 'code' }])),
|
|
],
|
|
},
|
|
{
|
|
type: 'listItem',
|
|
content: [
|
|
para(text('item with sublist')),
|
|
{
|
|
type: 'bulletList',
|
|
content: [
|
|
{ type: 'listItem', content: [para(text('nested a'))] },
|
|
{ type: 'listItem', content: [para(text('nested b'))] },
|
|
],
|
|
},
|
|
],
|
|
},
|
|
],
|
|
},
|
|
{
|
|
type: 'callout',
|
|
attrs: { type: 'success' },
|
|
content: [
|
|
para(text('callout body')),
|
|
{ type: 'codeBlock', attrs: { language: 'bash' }, content: [text('echo hi\n')] },
|
|
],
|
|
},
|
|
{
|
|
type: 'blockquote',
|
|
content: [para(text('quote at the end'))],
|
|
},
|
|
),
|
|
};
|
|
|
|
describe('git-sync converter §13.1 idempotency gate (editor-ext schema)', () => {
|
|
for (const [name, original] of Object.entries(CORPUS)) {
|
|
it(`round-trips losslessly: ${name}`, async () => {
|
|
const { md, canonOriginal, canonNormalized } = await runGate(original);
|
|
|
|
const equal = docsCanonicallyEqual(original, canonNormalized);
|
|
if (!equal) {
|
|
// Surface a readable diff so a real divergence is actionable.
|
|
// eslint-disable-next-line no-console
|
|
console.error(
|
|
`\n[GATE FAIL] ${name}\n--- markdown ---\n${md}\n` +
|
|
`--- canonical original ---\n${JSON.stringify(canonOriginal, null, 2)}\n` +
|
|
`--- canonical round-tripped ---\n${JSON.stringify(canonNormalized, null, 2)}\n`,
|
|
);
|
|
}
|
|
expect(equal).toBe(true);
|
|
});
|
|
}
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// KNOWN DIVERGENCE — images (isolated so it does NOT silently weaken the gate).
|
|
//
|
|
// This is NOT a schema-name divergence: the `image` NODE itself round-trips
|
|
// through editor-ext fine (it survives toYdoc under the real tiptapExtensions).
|
|
// The loss is intrinsic to MARKDOWN, the on-disk transport format git-sync uses:
|
|
//
|
|
// 1. `convertProseMirrorToMarkdown` emits a standard `` image
|
|
// (markdown-converter.ts case "image"). Standard markdown image syntax has
|
|
// no way to express `width` / `height` / `align`, so those attrs are
|
|
// DROPPED on export and cannot be recovered on import.
|
|
// 2. A block-level image is hoisted out of its line by the HTML re-parser,
|
|
// leaving a leading EMPTY paragraph (the same block-image-hoist limitation
|
|
// documented in packages/git-sync/test/fixtures/known-limitations).
|
|
//
|
|
// The gate documents the EXACT lossy shape below. If the converter is ever
|
|
// taught to preserve image dimensions (e.g. by emitting an HTML <img> with
|
|
// data-* attrs, as it already does for video/diagrams), these assertions flip
|
|
// and the image fixture should be promoted into the green CORPUS above.
|
|
// ---------------------------------------------------------------------------
|
|
describe('git-sync converter §13.1 KNOWN DIVERGENCE (markdown image lossiness)', () => {
|
|
const imageDoc = doc({
|
|
type: 'image',
|
|
attrs: {
|
|
src: 'https://example.com/pic.png',
|
|
width: 640,
|
|
height: 480,
|
|
align: 'center',
|
|
},
|
|
});
|
|
|
|
it('drops width/height/align (markdown  cannot carry them); the block-image hoist no longer leaves an empty paragraph', async () => {
|
|
const { md, canonNormalized } = await runGate(imageDoc);
|
|
|
|
// Export is plain markdown image syntax — no dimensions/align survive.
|
|
expect(md.trim()).toBe('');
|
|
|
|
// The round-tripped doc carries ONLY src (+ alt=""). The leading empty
|
|
// paragraph that the block-image hoist used to leave behind (a phantom
|
|
// blank-gap on every sync) is now stripped on import (git-sync fix), so the
|
|
// doc is just the image — no empty-paragraph artifact.
|
|
expect(canonNormalized).toEqual({
|
|
type: 'doc',
|
|
content: [
|
|
{
|
|
type: 'image',
|
|
attrs: { alt: '', src: 'https://example.com/pic.png' },
|
|
},
|
|
],
|
|
});
|
|
|
|
// Still NOT canonically equal to the original: width/height/align are an
|
|
// intrinsic markdown-transport loss (unrelated to the empty-paragraph fix).
|
|
expect(docsCanonicallyEqual(imageDoc, canonNormalized)).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// KNOWN DIVERGENCE — text alignment (item #7; isolated, not silently dropped).
|
|
//
|
|
// editor-ext registers TextAlign for heading+paragraph, and the SERVER schema
|
|
// fully supports it — the loss is intrinsic to the MARKDOWN transport:
|
|
//
|
|
// • A paragraph's `textAlign` is EXPORTED as `<div align="...">text</div>`
|
|
// (markdown-converter case "paragraph"), but on import the converter's
|
|
// docmost-schema declares `textAlign` WITHOUT a parseHTML mapping, so the
|
|
// `align` attribute is never recovered -> it imports as `textAlign:null`
|
|
// and canonicalizes away. A heading's alignment is not even exported.
|
|
// • Therefore any non-default alignment is dropped on a full round trip.
|
|
//
|
|
// If the converter is ever taught to parse `align`/`text-align` back onto the
|
|
// block, this assertion flips and an aligned-paragraph fixture should be
|
|
// promoted into the green CORPUS above.
|
|
// ---------------------------------------------------------------------------
|
|
describe('git-sync converter §13.1 KNOWN DIVERGENCE (text alignment dropped)', () => {
|
|
it('drops a paragraph textAlign on the markdown round trip', async () => {
|
|
const alignedDoc = doc({
|
|
type: 'paragraph',
|
|
attrs: { textAlign: 'center' },
|
|
content: [text('centered')],
|
|
});
|
|
|
|
const { canonNormalized } = await runGate(alignedDoc);
|
|
|
|
// The round-tripped paragraph carries no alignment.
|
|
expect(canonNormalized).toEqual({
|
|
type: 'doc',
|
|
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'centered' }] }],
|
|
});
|
|
expect(docsCanonicallyEqual(alignedDoc, canonNormalized)).toBe(false);
|
|
});
|
|
|
|
it('drops a heading textAlign (headings do not export alignment at all)', async () => {
|
|
const alignedHeading = doc({
|
|
type: 'heading',
|
|
attrs: { level: 2, textAlign: 'center' },
|
|
content: [text('centered heading')],
|
|
});
|
|
|
|
const { md, canonNormalized } = await runGate(alignedHeading);
|
|
|
|
// Export is a plain markdown heading — no alignment syntax.
|
|
expect(md.trim()).toBe('## centered heading');
|
|
expect(docsCanonicallyEqual(alignedHeading, canonNormalized)).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// KNOWN DIVERGENCE — textStyle color is VALUE-NORMALIZED, not lost (item #7).
|
|
//
|
|
// The textStyle/color mark itself round-trips (the green CORPUS has the rgb()
|
|
// form). But a `#hex` color is normalized to the equivalent `rgb(...)` string
|
|
// by the HTML re-parser on import, and canonicalize.ts does NOT normalize color
|
|
// formats — so a `#hex` original is not STRING-identical to its round trip even
|
|
// though the color is semantically preserved. Locked here so the boundary is
|
|
// explicit: author color fixtures in rgb() form to stay in the green corpus.
|
|
// ---------------------------------------------------------------------------
|
|
describe('git-sync converter §13.1 KNOWN DIVERGENCE (textStyle color #hex -> rgb)', () => {
|
|
it('normalizes a #hex text color to rgb() (semantically preserved, string-divergent)', async () => {
|
|
const hexDoc = doc(
|
|
para(text('green', [{ type: 'textStyle', attrs: { color: '#00ff00' } }])),
|
|
);
|
|
|
|
const { canonNormalized } = await runGate(hexDoc);
|
|
|
|
// Color survives, but as the normalized rgb() string.
|
|
expect(canonNormalized).toEqual({
|
|
type: 'doc',
|
|
content: [
|
|
{
|
|
type: 'paragraph',
|
|
content: [
|
|
{
|
|
type: 'text',
|
|
text: 'green',
|
|
marks: [{ type: 'textStyle', attrs: { color: 'rgb(0, 255, 0)' } }],
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
// Not string-identical to the #hex original.
|
|
expect(docsCanonicallyEqual(hexDoc, canonNormalized)).toBe(false);
|
|
});
|
|
});
|