Merge pull request 'fix(converter): стабильность round-trip image/медиа — «» ≡ absent (класс defaults-instability)' (#350) from fix/media-roundtrip-stability into develop

Reviewed-on: #350
This commit was merged in pull request #350.
This commit is contained in:
2026-07-04 21:30:12 +03:00
3 changed files with 646 additions and 6 deletions
@@ -635,13 +635,17 @@ const Attachment = Node.create({
},
name: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-name"),
// Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default.
parseHTML: (el: HTMLElement) =>
el.getAttribute("data-attachment-name") || null,
renderHTML: (attrs: Record<string, any>) =>
attrs.name ? { "data-attachment-name": attrs.name } : {},
},
mime: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-mime"),
// Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default.
parseHTML: (el: HTMLElement) =>
el.getAttribute("data-attachment-mime") || null,
renderHTML: (attrs: Record<string, any>) =>
attrs.mime ? { "data-attachment-mime": attrs.mime } : {},
},
@@ -689,7 +693,10 @@ const Video = Node.create({
},
alt: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("aria-label"),
// Empty-string-vs-absent idempotency: coerce "" back to the default so a
// stray empty `aria-label` never materializes `alt: ""` on a video stored
// with no alt (same GS-EDIT-REVERT class as the image `alt` fix).
parseHTML: (el: HTMLElement) => el.getAttribute("aria-label") || null,
renderHTML: (attrs: Record<string, any>) =>
attrs.alt ? { "aria-label": attrs.alt } : {},
},
@@ -864,13 +871,15 @@ const diagramAttributes = () => ({
},
title: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-title"),
// Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default.
parseHTML: (el: HTMLElement) => el.getAttribute("data-title") || null,
renderHTML: (attrs: Record<string, any>) =>
attrs.title ? { "data-title": attrs.title } : {},
},
alt: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-alt"),
// Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default.
parseHTML: (el: HTMLElement) => el.getAttribute("data-alt") || null,
renderHTML: (attrs: Record<string, any>) =>
attrs.alt ? { "data-alt": attrs.alt } : {},
},
@@ -1106,7 +1115,8 @@ const Pdf = Node.create({
},
name: {
default: null,
parseHTML: (el: HTMLElement) => el.getAttribute("data-name"),
// Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default.
parseHTML: (el: HTMLElement) => el.getAttribute("data-name") || null,
renderHTML: (attrs: Record<string, any>) =>
attrs.name ? { "data-name": attrs.name } : {},
},
@@ -1491,6 +1501,29 @@ export const docmostExtensions = [
...parent.height,
parseHTML: (el: HTMLElement) => el.getAttribute("height"),
},
// Empty-string-vs-absent idempotency (GS-EDIT-REVERT class). `marked`
// renders `![](src)` as `<img alt="">`, so the stock Image `alt`
// parseHTML (`getAttribute("alt")`) materializes `alt: ""` on an image
// that was stored with NO alt (attr absent). That is a false diff against
// the editor-stored form (a no-alt image has alt ABSENT, not ""), so a
// git-sync / ai-chat touch of a page with a plain image produced phantom
// churn. Coerce an empty string back to the attr's default (null) so the
// import is idempotent. A real alt survives verbatim (`|| undefined` keeps
// the truthy value; the default fills the empty case). `title` is coerced
// the same way for the whole class, even though `marked` does not
// currently emit `title=""` — defence in depth against any path that does.
// NOTE: this DIVERGES from editor-ext's literal image `alt` parseHTML
// (`getAttribute("alt")`, which returns "" verbatim), but CONVERGES on
// editor-ext's real STORED shape: an editor image inserted without alt
// renders with no `alt` attribute and re-parses as absent, never "".
alt: {
...parent.alt,
parseHTML: (el: HTMLElement) => el.getAttribute("alt") || null,
},
title: {
...parent.title,
parseHTML: (el: HTMLElement) => el.getAttribute("title") || null,
},
};
},
}).configure({ inline: false }),
@@ -0,0 +1,443 @@
/**
* Reusable round-trip-STABILITY matrix helper (fixtures-first).
*
* A single stored node authored WITHOUT a given string attribute (attr
* absent / undefined) must not gain a phantom EMPTY-STRING value after a
* markdown round-trip — the "empty-string-vs-absent" churn class. This helper,
* given a node spec, drives a matrix of attribute combinations through the REAL
* converter (`convertProseMirrorToMarkdown` -> `markdownToProseMirror`) and
* asserts byte-stability on two contours:
*
* 1. RAW round-trip: for the node under test, every attribute the round-trip
* materializes must equal what the INPUT authored — an authored attr keeps
* its value, an ABSENT attr may only reappear at its SCHEMA DEFAULT. If an
* absent attr comes back as a NON-default value (e.g. `alt: ""` where the
* default is `null`), that is an instability and is reported precisely as
* `type.attr: absent -> "<got>"`. This is the contour git-sync / stored
* JSON diffs on, so masking it only in `canonicalize` would leave the noise.
*
* 2. CANONICAL round-trip: `canonicalizeContent(original)` must deep-equal
* `canonicalizeContent(roundtrip)` (a second, semantic contour).
*
* The ONLY normalization the helper treats as allowed (not an instability) is
* the DOCUMENTED numeric width/height/size/aspectRatio -> string coercion the
* converter performs on purpose (a stored numeric `640` re-parses via
* `getAttribute` as the string `"640"`). It is encoded here as an explicit
* per-spec `numericStringAttrs` set applied to BOTH contours, NOT a silent skip.
*
* The helper is node-type agnostic: image and the whole media family share the
* `align !== "center"` predicate + `<!--name {…}-->` comment machinery, so one
* matrix guards the shared class.
*/
import { getSchema } from "@tiptap/core";
import {
convertProseMirrorToMarkdown,
markdownToProseMirror,
canonicalizeContent,
docmostExtensions,
} from "../src/lib/index.js";
import { firstDivergence } from "./roundtrip-helpers.js";
/** One attribute's two probe values. */
export interface AttrMatrixEntry {
/** Attribute name on the node. */
attr: string;
/**
* The "default" pick. `undefined` means the attribute is OMITTED entirely
* (the absent case — the one that can materialize an empty string on import).
* A concrete value is authored verbatim.
*/
default: unknown;
/** A representative NON-default value to exercise (must survive verbatim). */
nonDefault: unknown;
/**
* Marks the attr as a member of the EMPTY-STRING class the fix targets: a
* string attr whose schema default is `null`/absent and whose parseHTML
* coerces `"" -> default` (image/drawio `alt`+`title`, video `alt` via
* aria-label, pdf/attachment `name`, attachment `mime`). Set true to also
* drive the THIRD-STATE convergence case (see runConvergenceCase) for this
* attr. Attrs whose default is NOT null (e.g. embed `provider`, default "")
* or that are not `""`-coerced (control attrs) are left unset.
*/
emptyStringClass?: boolean;
}
/** A node type + the attribute matrix to sweep for it. */
export interface NodeStabilitySpec {
/** Node type (e.g. "image", "video"). */
type: string;
/** Attributes always present on the node (e.g. `{ src: "/i.png" }`). */
baseAttrs?: Record<string, unknown>;
/** Attributes to sweep at default and non-default. */
attrMatrix: AttrMatrixEntry[];
/**
* Attributes whose numeric -> string coercion on round-trip is DOCUMENTED and
* intentional; compared modulo `String(x)` on both sides. Defaults to the
* converter's known sizing set.
*/
numericStringAttrs?: string[];
}
/** A single unstable finding, legible enough to tie a gate-lock to. */
export interface Instability {
type: string;
attr: string;
/** What the input authored: the literal value, or the ABSENT sentinel. */
authored: unknown | typeof ABSENT;
/** What the round-trip produced. */
got: unknown;
/** What a stable round-trip should have produced (authored value or default). */
expected: unknown;
}
/** One matrix cell's result. */
export interface ComboResult {
label: string;
authored: Record<string, unknown>;
/** RAW-contour instabilities on the node under test. */
raw: Instability[];
/** CANONICAL-contour divergence (path + values) or null when equal. */
canonical: { path: string; a: unknown; b: unknown } | null;
/** True when the node type failed to round-trip at all (structural loss). */
missing: boolean;
md: string;
}
/** Whole-matrix report for one node spec. */
export interface MatrixReport {
type: string;
combos: ComboResult[];
}
/** Sentinel marking an attribute the input did NOT author. */
export const ABSENT = Symbol("ABSENT");
const DEFAULT_NUMERIC_STRING_ATTRS = [
"width",
"height",
"size",
"aspectRatio",
];
// The ProseMirror schema the converter targets — its attribute `default`s are
// the authoritative "what an absent attr should re-materialize as" oracle.
const schema = getSchema(docmostExtensions);
/** Read the schema default for every attribute of a node type. */
function schemaDefaults(type: string): Record<string, unknown> {
const specAttrs = (schema.nodes[type]?.spec?.attrs ?? {}) as Record<
string,
{ default: unknown }
>;
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(specAttrs)) out[k] = v.default;
return out;
}
/** Find the first node of a given type anywhere in a PM doc tree. */
function findFirst(node: any, type: string): any {
if (node && node.type === type) return node;
for (const child of node?.content ?? []) {
const hit = findFirst(child, type);
if (hit) return hit;
}
return null;
}
/** Coerce a scalar for the documented numeric->string comparison. */
const numStr = (x: unknown): unknown => (x == null ? x : String(x));
/**
* Enumerate the cartesian product of the matrix: every attribute independently
* at its default (index 0) or non-default (index 1) pick. The all-default
* corner is included (the baseline). Small by construction (2^N over a handful
* of at-risk string attrs).
*/
function enumerateCombos(matrix: AttrMatrixEntry[]): number[][] {
let combos: number[][] = [[]];
for (let i = 0; i < matrix.length; i++) {
const next: number[][] = [];
for (const c of combos) {
next.push([...c, 0]);
next.push([...c, 1]);
}
combos = next;
}
return combos;
}
/** Build the authored attrs for one combo pick vector. */
function authoredAttrs(
spec: NodeStabilitySpec,
picks: number[],
): Record<string, unknown> {
const attrs: Record<string, unknown> = { ...(spec.baseAttrs ?? {}) };
spec.attrMatrix.forEach((entry, i) => {
if (picks[i] === 1) {
attrs[entry.attr] = entry.nonDefault;
} else if (entry.default !== undefined) {
attrs[entry.attr] = entry.default;
}
// default === undefined -> OMIT the attr entirely (the absent case).
});
return attrs;
}
/** Human-readable label for a combo (which attrs are at non-default). */
function comboLabel(spec: NodeStabilitySpec, picks: number[]): string {
const on = spec.attrMatrix
.filter((_, i) => picks[i] === 1)
.map((e) => e.attr);
return on.length === 0 ? "<all-default>" : on.join("+");
}
/**
* Run the full stability matrix for one node spec and return a structured
* report (does NOT throw — the caller asserts, so a failure can print the whole
* report). Every combo runs the real export->import pipeline once.
*/
export async function runStabilityMatrix(
spec: NodeStabilitySpec,
): Promise<MatrixReport> {
const numericStringAttrs = new Set(
spec.numericStringAttrs ?? DEFAULT_NUMERIC_STRING_ATTRS,
);
const defaults = schemaDefaults(spec.type);
const combos: ComboResult[] = [];
for (const picks of enumerateCombos(spec.attrMatrix)) {
const authored = authoredAttrs(spec, picks);
const doc = { type: "doc", content: [{ type: spec.type, attrs: authored }] };
const md = convertProseMirrorToMarkdown(doc);
const rt = await markdownToProseMirror(md);
const node = findFirst(rt, spec.type);
const result: ComboResult = {
label: comboLabel(spec, picks),
authored,
raw: [],
canonical: null,
missing: node == null,
md,
};
if (node != null) {
// RAW contour: every materialized attr must equal the authored value, or
// (for an absent attr) the schema default — modulo the documented numeric
// string coercion.
const rtAttrs = (node.attrs ?? {}) as Record<string, unknown>;
for (const key of Object.keys(rtAttrs)) {
const authoredHas = Object.prototype.hasOwnProperty.call(authored, key);
const expected = authoredHas ? authored[key] : defaults[key];
let got = rtAttrs[key];
let exp = expected;
if (numericStringAttrs.has(key)) {
got = numStr(got);
exp = numStr(exp);
}
if (firstDivergence(got, exp) !== null) {
result.raw.push({
type: spec.type,
attr: key,
authored: authoredHas ? authored[key] : ABSENT,
got: rtAttrs[key],
expected,
});
}
}
// CANONICAL contour: canonical forms deep-equal, modulo the same numeric
// string coercion (applied to both trees so a documented coercion is not
// counted as a divergence).
const ca = normalizeNumeric(canonicalizeContent(doc), numericStringAttrs);
const cb = normalizeNumeric(canonicalizeContent(rt), numericStringAttrs);
result.canonical = firstDivergence(ca, cb);
}
combos.push(result);
}
return { type: spec.type, combos };
}
/**
* Deep-copy a canonical tree, coercing the documented numeric->string attrs to
* their string form so an intentional `640 -> "640"` coercion is not reported
* as a canonical divergence. Only touches the listed attribute keys.
*/
function normalizeNumeric(node: any, attrs: Set<string>): any {
if (Array.isArray(node)) return node.map((n) => normalizeNumeric(n, attrs));
if (node === null || typeof node !== "object") return node;
const out: Record<string, unknown> = {};
for (const key of Object.keys(node)) {
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
const a: Record<string, unknown> = {};
for (const [k, v] of Object.entries(node.attrs)) {
a[k] = attrs.has(k) ? numStr(v) : v;
}
out.attrs = a;
} else {
out[key] = normalizeNumeric(node[key], attrs);
}
}
return out;
}
/** Flatten a report to just its unstable combos (for a terse assertion). */
export function unstableCombos(report: MatrixReport): ComboResult[] {
return report.combos.filter(
(c) => c.missing || c.raw.length > 0 || c.canonical !== null,
);
}
// ---------------------------------------------------------------------------
// THIRD STATE: an EXPLICITLY-STORED empty string on a string attr.
//
// The matrix above sweeps TWO states per string attr: absent/default and a
// non-default value — and asserts FIRST-pass byte-stability for both. There is
// a third, degenerate state the matrix does NOT cover: the attr stored as a
// LITERAL `""`. This is DISTINCT from "the node never had the attr": a user
// types an alt in the editor, then deletes it, and Tiptap's
// `updateAttributes({ alt: "" })` persists a literal `alt: ""` in the stored
// JSON. There is no absent-vs-"" distinction in the DOM once serialized, so the
// fix's `getAttribute("alt") || null` coercion canonicalizes BOTH to the
// default (`null`).
//
// Consequence — and this is CORRECT, not a bug: a doc carrying an explicit `""`
// converges to the default on the FIRST round-trip (a ONE-TIME diff: `"" ->
// null`), then is byte-stable from the SECOND round-trip on (idempotent). So
// this state must be pinned with a DIFFERENT contract than the matrix's:
// - do NOT assert first-pass byte-stability (the first pass legitimately
// changes `""` -> default), and
// - DO assert the first pass converges to the default AND the second pass is
// idempotent (rt2 deep-equals rt1).
//
// A future sync/QA pass diffing stored pages will see this one-time `"" -> null`
// normalization exactly once per affected node; it is the converter canon, not
// corruption, and must not be flagged as data loss.
// ---------------------------------------------------------------------------
/** Result of the third-state ("explicit empty string") convergence probe. */
export interface ConvergenceResult {
type: string;
attr: string;
/** The schema default the attr must converge to on pass 1 (null / absent). */
expectedDefault: unknown;
/** rt1's materialized value for the attr — must equal `expectedDefault`. */
firstPassValue: unknown;
/** True when the node round-tripped AND rt1 converged the attr to default. */
convergedToDefault: boolean;
/** rt1-vs-rt2 divergence; MUST be null (idempotent from pass 2 on). */
secondPassDivergence: { path: string; a: unknown; b: unknown } | null;
/** True when the node type failed to round-trip at all (structural loss). */
missing: boolean;
}
/** Round-trip a full PM doc through the real converter once. */
async function roundtripDoc(doc: any): Promise<any> {
return markdownToProseMirror(convertProseMirrorToMarkdown(doc));
}
/**
* Third-state convergence probe for one string attr of the empty-string class.
*
* (a) builds a doc with the attr EXPLICITLY set to `""` (baseAttrs + `""`),
* (b) rt1 = roundtrip(doc); asserts rt1's attr equals the schema default — the
* documented ONE-TIME `"" -> default` normalization (NOT byte-stable vs the
* `""` input, so first-pass stability is deliberately NOT asserted here),
* (c) rt2 = roundtrip(rt1); asserts rt2 deep-equals rt1 — idempotent from the
* second round-trip on.
*
* Returns a structured result (does NOT throw) so the caller can assert and
* print. Reusable across the whole node family: drive it for every attr flagged
* `emptyStringClass` on every spec (see convergenceCasesFor / the test driver).
*/
export async function runConvergenceCase(
spec: NodeStabilitySpec,
attr: string,
): Promise<ConvergenceResult> {
const expectedDefault = schemaDefaults(spec.type)[attr];
// (a) The degenerate third state: attr persisted as a LITERAL "".
const authored = { ...(spec.baseAttrs ?? {}), [attr]: "" };
const doc = { type: "doc", content: [{ type: spec.type, attrs: authored }] };
// (b) First round-trip: "" must normalize to the default (a one-time diff).
const rt1 = await roundtripDoc(doc);
const node1 = findFirst(rt1, spec.type);
const firstPassValue = node1?.attrs?.[attr];
const convergedToDefault =
node1 != null && firstDivergence(firstPassValue, expectedDefault) === null;
// (c) Second round-trip: must be byte-stable (rt2 deep-equals rt1). We compare
// the WHOLE docs — both are converter OUTPUTS already in the same materialized
// form (numeric attrs are strings on both sides), so no numeric normalization
// is needed here, unlike the raw/canonical contours above.
const rt2 = node1 != null ? await roundtripDoc(rt1) : rt1;
const secondPassDivergence =
node1 != null ? firstDivergence(rt1, rt2) : null;
return {
type: spec.type,
attr,
expectedDefault,
firstPassValue,
convergedToDefault,
secondPassDivergence,
missing: node1 == null,
};
}
/** The attrs of a spec flagged as members of the empty-string class. */
export function convergenceCasesFor(spec: NodeStabilitySpec): string[] {
return spec.attrMatrix
.filter((e) => e.emptyStringClass)
.map((e) => e.attr);
}
/** True when a convergence result honours the "converges once, then stable" contract. */
export function convergenceOk(r: ConvergenceResult): boolean {
return !r.missing && r.convergedToDefault && r.secondPassDivergence === null;
}
/** Render a convergence result as a legible one-liner for a failed assertion. */
export function formatConvergence(r: ConvergenceResult): string {
if (r.missing) return `${r.type}.${r.attr}: DID-NOT-ROUND-TRIP`;
const parts: string[] = [];
if (!r.convergedToDefault) {
parts.push(
`pass1 did NOT converge: got ${JSON.stringify(r.firstPassValue)} (expected default ${JSON.stringify(r.expectedDefault)})`,
);
}
if (r.secondPassDivergence) {
parts.push(
`pass2 NOT idempotent @ ${r.secondPassDivergence.path}: ${JSON.stringify(r.secondPassDivergence.a)} vs ${JSON.stringify(r.secondPassDivergence.b)}`,
);
}
const status = parts.length === 0 ? "converges-once-then-stable" : parts.join("; ");
return `${r.type}.${r.attr}: ${status}`;
}
/** Render a report as a legible multi-line string for a failed assertion. */
export function formatReport(report: MatrixReport): string {
const lines: string[] = [`node "${report.type}":`];
for (const c of report.combos) {
const flags: string[] = [];
if (c.missing) flags.push("DID-NOT-ROUND-TRIP");
for (const i of c.raw) {
const authored =
i.authored === ABSENT ? "absent" : JSON.stringify(i.authored);
flags.push(
`RAW ${i.type}.${i.attr}: ${authored} -> ${JSON.stringify(i.got)} (expected ${JSON.stringify(i.expected)})`,
);
}
if (c.canonical) {
flags.push(
`CANON @ ${c.canonical.path}: ${JSON.stringify(c.canonical.a)} vs ${JSON.stringify(c.canonical.b)}`,
);
}
const status = flags.length === 0 ? "stable" : flags.join("; ");
lines.push(` [${c.label}] ${status}`);
}
return lines.join("\n");
}
@@ -0,0 +1,164 @@
import { describe, expect, it } from "vitest";
import {
runStabilityMatrix,
unstableCombos,
formatReport,
runConvergenceCase,
convergenceCasesFor,
convergenceOk,
formatConvergence,
type NodeStabilitySpec,
} from "./roundtrip-stability.helper.js";
// ---------------------------------------------------------------------------
// Round-trip STABILITY matrix for image + the media family.
//
// Guards the "empty-string-vs-absent" churn class (GS-EDIT-REVERT family): a
// stored node authored WITHOUT a string attr (alt/title/caption/aria-label/...)
// must not gain a phantom `attr: ""` after `markdownToProseMirror(convert…)`.
// Each spec sweeps the at-risk string attrs at DEFAULT (absent) and at a real
// NON-default value; the helper asserts both the RAW round-trip (attrs equal the
// input's, modulo the documented numeric width/height/size/aspectRatio -> string
// coercion) and the CANONICAL round-trip (canonical forms deep-equal).
//
// The image + media family share the `align !== "center"` predicate and the
// `<!--name {…}-->` comment machinery, so one matrix guards the shared class.
// align is NOT part of this class (it round-trips correctly) and is not swept.
// ---------------------------------------------------------------------------
const SPECS: NodeStabilitySpec[] = [
{
// Image carries the most at-risk string attrs. `alt` is the one marked
// materializes as `<img alt="">` on `![](src)` import (the real bug); title
// and caption are covered as the same class. attachmentId is a string attr
// that must stay absent when unset (control).
type: "image",
baseAttrs: { src: "/i.png" },
attrMatrix: [
{ attr: "alt", default: undefined, nonDefault: "a real alt text", emptyStringClass: true },
{ attr: "title", default: undefined, nonDefault: "a real title", emptyStringClass: true },
{ attr: "caption", default: undefined, nonDefault: "a real caption" },
{ attr: "attachmentId", default: undefined, nonDefault: "att-42" },
],
},
{
// Video's `alt` rides the `aria-label` attribute (media aria-label at risk).
type: "video",
baseAttrs: { src: "/v.mp4" },
attrMatrix: [
{ attr: "alt", default: undefined, nonDefault: "a clip", emptyStringClass: true },
{ attr: "attachmentId", default: undefined, nonDefault: "att-1" },
],
},
{
// Audio carries no alt/title; attachmentId is its only optional string attr.
type: "audio",
baseAttrs: { src: "/a.mp3" },
attrMatrix: [
{ attr: "attachmentId", default: undefined, nonDefault: "att-2" },
],
},
{
// pdf: link-form media. `name` (filename) is its at-risk string attr.
type: "pdf",
baseAttrs: { src: "/d.pdf" },
attrMatrix: [
{ attr: "name", default: undefined, nonDefault: "report.pdf", emptyStringClass: true },
{ attr: "attachmentId", default: undefined, nonDefault: "att-3" },
],
},
{
// attachment: link-form media (file card). `name` + `mime` string attrs.
type: "attachment",
baseAttrs: { url: "/f.zip" },
attrMatrix: [
{ attr: "name", default: undefined, nonDefault: "bundle.zip", emptyStringClass: true },
{ attr: "mime", default: undefined, nonDefault: "application/zip", emptyStringClass: true },
{ attr: "attachmentId", default: undefined, nonDefault: "att-4" },
],
},
{
// embed: link-form media. `provider` is its at-risk string attr (schema
// default ""). embed's numeric width/height defaults (800/600) are a SEPARATE,
// documented limitation OUTSIDE the empty-string class: they are not in
// canonicalize's KNOWN_DEFAULTS, so an ABSENT width/height re-imports as the
// 800/600 default and diverges canonically (see the note in canonicalize.ts).
// That is canonicalize-owned and out of scope here, so we author the
// dimensions at their defaults (as real editor embeds carry them) to keep this
// guard focused on the empty-string/provider class.
// provider's schema default is "" (NOT null), so a re-imported "" is the
// correct value, not a phantom — it is outside the null-default empty-string
// class. We author it at its "" default (the default pick) so the sweep still
// asserts a non-default provider ("youtube") round-trips, without tripping the
// canonicalize KNOWN_DEFAULTS gap for embed's non-null defaults.
type: "embed",
baseAttrs: { src: "https://example.com/x", width: 800, height: 600 },
attrMatrix: [
{ attr: "provider", default: "", nonDefault: "youtube" },
],
},
{
// drawio: image-form diagram. `title` + `alt` string attrs (data-title/-alt).
type: "drawio",
baseAttrs: { src: "blob:drawio" },
attrMatrix: [
{ attr: "title", default: undefined, nonDefault: "flow chart", emptyStringClass: true },
{ attr: "alt", default: undefined, nonDefault: "an alt", emptyStringClass: true },
{ attr: "attachmentId", default: undefined, nonDefault: "att-5" },
],
},
{
// excalidraw: image-form diagram, same shared diagramAttributes set.
type: "excalidraw",
baseAttrs: { src: "blob:excalidraw" },
attrMatrix: [
{ attr: "title", default: undefined, nonDefault: "sketch", emptyStringClass: true },
{ attr: "alt", default: undefined, nonDefault: "an alt", emptyStringClass: true },
{ attr: "attachmentId", default: undefined, nonDefault: "att-6" },
],
},
];
describe("round-trip stability matrix (image + media family)", () => {
for (const spec of SPECS) {
it(`${spec.type}: no attr materializes an empty-string / phantom value`, async () => {
const report = await runStabilityMatrix(spec);
const unstable = unstableCombos(report);
// On failure, print the WHOLE matrix so which (attr, value) combos are
// unstable is legible.
expect(unstable, `\n${formatReport(report)}\n`).toEqual([]);
});
}
});
// ---------------------------------------------------------------------------
// THIRD STATE: an attr EXPLICITLY stored as a literal "" (GS-EDIT-REVERT: a user
// typed alt/title/name/... then deleted it, so Tiptap persisted `attr: ""` — a
// value DISTINCT from "attr was never set"). Unlike the absent case above, this
// state is NOT first-pass byte-stable: the fix's `"" -> default` coercion is a
// deliberate ONE-TIME normalization on the FIRST sync round-trip, stable
// thereafter. We therefore assert a DIFFERENT contract — "converges to default
// on pass 1, then idempotent from pass 2 on" — for every empty-string-class attr
// across the whole node family (image/video/pdf/attachment/drawio/excalidraw).
//
// IMPORTANT for a future sync/QA pass: the pass-1 `"" -> null` diff is the
// converter canon, not corruption. It appears at most once per affected node and
// must NOT be flagged as "the converter is losing/corrupting page data".
// ---------------------------------------------------------------------------
describe("round-trip third state: explicit empty string converges once, then idempotent", () => {
for (const spec of SPECS) {
for (const attr of convergenceCasesFor(spec)) {
it(`${spec.type}.${attr}: "" normalizes to default on pass 1, byte-stable from pass 2`, async () => {
const r = await runConvergenceCase(spec, attr);
// Pass 1 must converge "" -> the schema default (the one-time diff) and
// pass 2 (roundtrip of pass-1 output) must be byte-stable. formatConvergence
// prints exactly which half failed.
expect(convergenceOk(r), `\n${formatConvergence(r)}\n`).toBe(true);
// Spell the contract out explicitly so the intent is legible in the test:
expect(r.convergedToDefault, `\n${formatConvergence(r)}\n`).toBe(true);
expect(r.firstPassValue).toEqual(r.expectedDefault);
expect(r.secondPassDivergence, `\n${formatConvergence(r)}\n`).toBeNull();
});
}
}
});