e2a3b5fc4d
Ten media/embed node types move their TOP-LEVEL serialization off raw schema HTML onto a readable markdown target plus an always-emitted discriminator comment whose NAME selects the node type. The schema-HTML form is retained on the raw-HTML/columns path (comments are dropped by the DOM parse stage there). image-form <!--name …--> youtube, video, audio, drawio, excalidraw link-form [text](src)<!--name …--> pdf, attachment, embed (text=filename/provider) standalone <!--pageembed …--> / <!--transclusion …--> pageEmbed, transclusionReference The comment NAME is the node-type discriminator and is ALWAYS emitted, even when the attr JSON is empty (`<!--youtube-->`), so a bare `` is never mistaken for an `image` and a bare `[t](u)` stays a plain link — no URL-sniffing. src rides in the markdown target; every other non-default attr (incl. the id links attachmentId/sourcePageId/transclusionId) rides in the comment JSON (stable key order, numerics stringified, align="center" omitted). New src/lib/media-html.ts: byte-exact builders reproducing the schema HTML each old processNode case returned. Both the serializer's raw-HTML path (blockToHtml, now de-delegated from `return processNode(block)` to explicit per-type cases) and the importer call these, so serialize and parse cannot drift. Import (applyCommentDirectives): image-form binds the preceding <img> (src from it), link-form the preceding <a> (src=href, text=filename/provider), standalone replaces the comment (same leading-doc-level handling as #5). Each rebuilds the schema element via the media-html builder, then swaps it in; the empty-<p> hoist is absorbed by stripEmptyParagraphs. Fail-open: wrong element/position/name or malformed JSON -> inert, no throw. Link-form visible text is escaped (escapeLinkText) for the FULL set of CommonMark inline-active punctuation (\ ` * _ ~ [ ] < & ! ( )), not just [ ] \: the label is parsed as inline content, so a filename/provider like `report *v2*.pdf` or `.pdf` would otherwise lose the markup (or fragment the parse) when the importer reads a.textContent back — a data-loss regression vs the old data-attachment-name form. Adversarial round-trip fixtures lock byte- and value-stability for emphasis/code/strike/autolink/entity/image markers and nested-link names. Tests: new media-comments.test.ts (40 cases: per-type exact md + lossless byte-stable round-trip incl. id links, minimal-node discriminator-still-emitted, in-column schema-HTML form, discriminator integrity, fail-open, active-punct filenames). Goldens in media-roundtrip / markdown-converter-golden / markdown-converter / diagram-roundtrip updated to the md+comment form (columns stay schema-HTML). The former known-limitation image-diagrams fixture is now byte- AND canonically-stable (canon #8 omits the diagram align="center" default) and was promoted from an it.fails into the green corpus (11-image-diagrams.json). git-sync stabilize.test.ts: the "diagram materializes data-align=center" fixpoint moved into a column (where the raw-HTML asymmetry still holds), since top level is now byte-stable. package vitest: 540 passed; tsc clean. git-sync: 268 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
102 lines
4.5 KiB
TypeScript
102 lines
4.5 KiB
TypeScript
import { describe, expect, it } from 'vitest';
|
|
import { stabilizePageFile, type PageMeta } from '../src/engine/stabilize.js';
|
|
// markdownToProseMirror lives in collaboration.ts; importing it mutates the
|
|
// global DOM via jsdom at module load time (required for @tiptap/html under Node).
|
|
import { markdownToProseMirror } from '@docmost/prosemirror-markdown';
|
|
import { parseDocmostMarkdown } from '@docmost/prosemirror-markdown';
|
|
|
|
// stabilize.ts (SPEC §11 normalize-on-write) was 0% covered (only the gated e2e
|
|
// touched it). stabilizePageFile is import-testable: build a small ProseMirror
|
|
// content + meta and assert (1) the normalize-on-write pass reaches a fixpoint
|
|
// (a SECOND pass over the written body is byte-identical), and (2) the meta is
|
|
// serialized verbatim, including a null parentPageId.
|
|
|
|
const meta: PageMeta = {
|
|
version: 1,
|
|
pageId: 'pg-1',
|
|
slugId: 'sl-1',
|
|
title: 'My Title',
|
|
spaceId: 'sp-1',
|
|
parentPageId: null,
|
|
};
|
|
|
|
describe('stabilizePageFile — normalize-on-write fixpoint (SPEC §11)', () => {
|
|
it('reaches a byte-identical fixpoint after one extra export/import/export pass', async () => {
|
|
// A diagram inside a column is the canonical one-pass asymmetry: on the
|
|
// raw-HTML/columns path a diagram's `align` default of "center" materializes
|
|
// on import, so a NAIVE export differs on the second export. (#293 canon #8
|
|
// made the TOP-LEVEL diagram form — `<!--drawio …-->` — byte-stable by
|
|
// omitting the default, so the asymmetry now lives only on the columns path
|
|
// where the schema `<div data-type="drawio">` form is retained.)
|
|
// stabilizePageFile runs the convergence pass at write time, so the written
|
|
// body must already be at the fixpoint: re-importing its body and
|
|
// re-stabilizing yields the exact same bytes.
|
|
const content = {
|
|
type: 'doc',
|
|
content: [
|
|
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
|
{
|
|
type: 'columns',
|
|
attrs: { layout: 'two_equal' },
|
|
content: [
|
|
{ type: 'column', content: [{ type: 'drawio', attrs: { src: '/d.drawio' } }] },
|
|
{ type: 'column', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'side' }] }] },
|
|
],
|
|
},
|
|
{ type: 'paragraph', content: [{ type: 'text', text: 'outro' }] },
|
|
],
|
|
};
|
|
|
|
const file1 = await stabilizePageFile(content, meta);
|
|
// Re-import the written body and stabilize again — the second pass must be
|
|
// byte-identical to the first (the fixpoint property git relies on).
|
|
const body1 = parseDocmostMarkdown(file1).body;
|
|
const doc2 = await markdownToProseMirror(body1);
|
|
const file2 = await stabilizePageFile(doc2, meta);
|
|
expect(file2).toBe(file1);
|
|
|
|
// The materialized diagram default is present in the stabilized body (proof
|
|
// that the convergence pass actually ran, not just that two naive exports
|
|
// happened to match).
|
|
expect(body1).toContain('data-align="center"');
|
|
});
|
|
|
|
it('already-stable content is unchanged by the pass (idempotent)', async () => {
|
|
// Plain prose is already a fixpoint; stabilizing it once and twice agree.
|
|
const content = {
|
|
type: 'doc',
|
|
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'just plain text' }] }],
|
|
};
|
|
const file1 = await stabilizePageFile(content, meta);
|
|
const body1 = parseDocmostMarkdown(file1).body;
|
|
const doc2 = await markdownToProseMirror(body1);
|
|
const file2 = await stabilizePageFile(doc2, meta);
|
|
expect(file2).toBe(file1);
|
|
expect(body1).toBe('just plain text');
|
|
});
|
|
});
|
|
|
|
describe('stabilizePageFile — meta serialization', () => {
|
|
it('preserves a null parentPageId verbatim in the meta block', async () => {
|
|
const file = await stabilizePageFile(
|
|
{ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] }] },
|
|
meta,
|
|
);
|
|
const parsed = parseDocmostMarkdown(file);
|
|
// The whole meta round-trips, and parentPageId is exactly null (root page).
|
|
expect(parsed.meta).toEqual(meta);
|
|
expect(parsed.meta!.parentPageId).toBeNull();
|
|
// No trailing docmost:comments block — the sync body serializer omits it.
|
|
expect(file).not.toContain('docmost:comments');
|
|
});
|
|
|
|
it('keeps a non-null parentPageId as-is', async () => {
|
|
const childMeta: PageMeta = { ...meta, parentPageId: 'parent-99' };
|
|
const file = await stabilizePageFile(
|
|
{ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] }] },
|
|
childMeta,
|
|
);
|
|
expect(parseDocmostMarkdown(file).meta).toEqual(childMeta);
|
|
});
|
|
});
|