From 2ce672709a26e67bc7b152437eca4dd340481b3d Mon Sep 17 00:00:00 2001 From: agent_coder Date: Sat, 4 Jul 2026 20:51:34 +0300 Subject: [PATCH 1/2] =?UTF-8?q?fix(prosemirror-markdown):=20stabilize=20im?= =?UTF-8?q?age=20round-trip=20=E2=80=94=20""=20=E2=89=A1=20absent=20on=20p?= =?UTF-8?q?arse=20(empty-string=20class)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A stored image authored without `alt` gained a phantom `alt: ""` on every round-trip (`markdownToProseMirror(convertProseMirrorToMarkdown(doc))`): `marked` renders `![](src)` as ``, and the stock tiptap Image `alt` parseHTML (`getAttribute("alt")`) materialized the empty string where the original had no attribute. That false diff is a real GS-EDIT-REVERT churn source — an agent / git-sync touch of a page with an image mutates the stored JSON (`absent -> ""`), producing phantom diffs that can overwrite live edits. Fix is PARSE-SIDE ("" ≡ absent), so the RAW round-trip is idempotent — not only the canonical form (history / stored JSON diff on the raw shape; masking it only in canonicalize would leave that noise). `image.alt`/`title` parseHTML now coerce `getAttribute(...) || null`, plus defense-in-depth `|| null` across the at-risk empty-string class (video aria-label, drawio/excalidraw title+alt, pdf name, attachment name+mime) matching the existing `image.caption || null` precedent. NOTE — image `align` is NOT changed: it round-trips correctly (center via the schema default "center", left/right via the `` comment). Its `toBeUndefined()` in the git-sync gate is canonical-form normalization, not a loss. Intentional divergence from editor-ext: editor-ext's literal `alt` parseHTML returns "" verbatim, but this coercion CONVERGES on editor-ext's real STORED shape (an image inserted without alt has no `alt` attribute -> re-parses absent, never ""), so the round-trip is idempotent and matches real documents. Adds a reusable, node-agnostic round-trip-stability matrix helper (test/roundtrip-stability.helper.ts) — given a node + attr spec it enumerates default/non-default combos and asserts byte-stability of BOTH the raw and the canonical round-trip (the documented numeric width/height→string coercion encoded as an explicit allowed normalization) — driven over image + the whole media family (video/audio/pdf/attachment/embed/drawio/excalidraw). The only raw empty-string instability it found was image.alt; the family was already stable. Gate: package suite 33 files / 672 tests passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/lib/docmost-schema.ts | 45 ++- .../test/roundtrip-stability.helper.ts | 305 ++++++++++++++++++ .../test/roundtrip-stability.test.ts | 128 ++++++++ 3 files changed, 472 insertions(+), 6 deletions(-) create mode 100644 packages/prosemirror-markdown/test/roundtrip-stability.helper.ts create mode 100644 packages/prosemirror-markdown/test/roundtrip-stability.test.ts diff --git a/packages/prosemirror-markdown/src/lib/docmost-schema.ts b/packages/prosemirror-markdown/src/lib/docmost-schema.ts index 1a63ec95..e27994e3 100644 --- a/packages/prosemirror-markdown/src/lib/docmost-schema.ts +++ b/packages/prosemirror-markdown/src/lib/docmost-schema.ts @@ -635,13 +635,17 @@ const Attachment = Node.create({ }, name: { default: null, - parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-name"), + // Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default. + parseHTML: (el: HTMLElement) => + el.getAttribute("data-attachment-name") || null, renderHTML: (attrs: Record) => attrs.name ? { "data-attachment-name": attrs.name } : {}, }, mime: { default: null, - parseHTML: (el: HTMLElement) => el.getAttribute("data-attachment-mime"), + // Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default. + parseHTML: (el: HTMLElement) => + el.getAttribute("data-attachment-mime") || null, renderHTML: (attrs: Record) => attrs.mime ? { "data-attachment-mime": attrs.mime } : {}, }, @@ -689,7 +693,10 @@ const Video = Node.create({ }, alt: { default: null, - parseHTML: (el: HTMLElement) => el.getAttribute("aria-label"), + // Empty-string-vs-absent idempotency: coerce "" back to the default so a + // stray empty `aria-label` never materializes `alt: ""` on a video stored + // with no alt (same GS-EDIT-REVERT class as the image `alt` fix). + parseHTML: (el: HTMLElement) => el.getAttribute("aria-label") || null, renderHTML: (attrs: Record) => attrs.alt ? { "aria-label": attrs.alt } : {}, }, @@ -864,13 +871,15 @@ const diagramAttributes = () => ({ }, title: { default: null, - parseHTML: (el: HTMLElement) => el.getAttribute("data-title"), + // Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default. + parseHTML: (el: HTMLElement) => el.getAttribute("data-title") || null, renderHTML: (attrs: Record) => attrs.title ? { "data-title": attrs.title } : {}, }, alt: { default: null, - parseHTML: (el: HTMLElement) => el.getAttribute("data-alt"), + // Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default. + parseHTML: (el: HTMLElement) => el.getAttribute("data-alt") || null, renderHTML: (attrs: Record) => attrs.alt ? { "data-alt": attrs.alt } : {}, }, @@ -1106,7 +1115,8 @@ const Pdf = Node.create({ }, name: { default: null, - parseHTML: (el: HTMLElement) => el.getAttribute("data-name"), + // Empty-string-vs-absent idempotency (GS-EDIT-REVERT class): "" -> default. + parseHTML: (el: HTMLElement) => el.getAttribute("data-name") || null, renderHTML: (attrs: Record) => attrs.name ? { "data-name": attrs.name } : {}, }, @@ -1491,6 +1501,29 @@ export const docmostExtensions = [ ...parent.height, parseHTML: (el: HTMLElement) => el.getAttribute("height"), }, + // Empty-string-vs-absent idempotency (GS-EDIT-REVERT class). `marked` + // renders `![](src)` as ``, so the stock Image `alt` + // parseHTML (`getAttribute("alt")`) materializes `alt: ""` on an image + // that was stored with NO alt (attr absent). That is a false diff against + // the editor-stored form (a no-alt image has alt ABSENT, not ""), so a + // git-sync / ai-chat touch of a page with a plain image produced phantom + // churn. Coerce an empty string back to the attr's default (null) so the + // import is idempotent. A real alt survives verbatim (`|| undefined` keeps + // the truthy value; the default fills the empty case). `title` is coerced + // the same way for the whole class, even though `marked` does not + // currently emit `title=""` — defence in depth against any path that does. + // NOTE: this DIVERGES from editor-ext's literal image `alt` parseHTML + // (`getAttribute("alt")`, which returns "" verbatim), but CONVERGES on + // editor-ext's real STORED shape: an editor image inserted without alt + // renders with no `alt` attribute and re-parses as absent, never "". + alt: { + ...parent.alt, + parseHTML: (el: HTMLElement) => el.getAttribute("alt") || null, + }, + title: { + ...parent.title, + parseHTML: (el: HTMLElement) => el.getAttribute("title") || null, + }, }; }, }).configure({ inline: false }), diff --git a/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts b/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts new file mode 100644 index 00000000..3e63a7aa --- /dev/null +++ b/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts @@ -0,0 +1,305 @@ +/** + * Reusable round-trip-STABILITY matrix helper (fixtures-first). + * + * A single stored node authored WITHOUT a given string attribute (attr + * absent / undefined) must not gain a phantom EMPTY-STRING value after a + * markdown round-trip — the "empty-string-vs-absent" churn class. This helper, + * given a node spec, drives a matrix of attribute combinations through the REAL + * converter (`convertProseMirrorToMarkdown` -> `markdownToProseMirror`) and + * asserts byte-stability on two contours: + * + * 1. RAW round-trip: for the node under test, every attribute the round-trip + * materializes must equal what the INPUT authored — an authored attr keeps + * its value, an ABSENT attr may only reappear at its SCHEMA DEFAULT. If an + * absent attr comes back as a NON-default value (e.g. `alt: ""` where the + * default is `null`), that is an instability and is reported precisely as + * `type.attr: absent -> ""`. This is the contour git-sync / stored + * JSON diffs on, so masking it only in `canonicalize` would leave the noise. + * + * 2. CANONICAL round-trip: `canonicalizeContent(original)` must deep-equal + * `canonicalizeContent(roundtrip)` (a second, semantic contour). + * + * The ONLY normalization the helper treats as allowed (not an instability) is + * the DOCUMENTED numeric width/height/size/aspectRatio -> string coercion the + * converter performs on purpose (a stored numeric `640` re-parses via + * `getAttribute` as the string `"640"`). It is encoded here as an explicit + * per-spec `numericStringAttrs` set applied to BOTH contours, NOT a silent skip. + * + * The helper is node-type agnostic: image and the whole media family share the + * `align !== "center"` predicate + `` comment machinery, so one + * matrix guards the shared class. + */ +import { getSchema } from "@tiptap/core"; +import { + convertProseMirrorToMarkdown, + markdownToProseMirror, + canonicalizeContent, + docmostExtensions, +} from "../src/lib/index.js"; +import { firstDivergence } from "./roundtrip-helpers.js"; + +/** One attribute's two probe values. */ +export interface AttrMatrixEntry { + /** Attribute name on the node. */ + attr: string; + /** + * The "default" pick. `undefined` means the attribute is OMITTED entirely + * (the absent case — the one that can materialize an empty string on import). + * A concrete value is authored verbatim. + */ + default: unknown; + /** A representative NON-default value to exercise (must survive verbatim). */ + nonDefault: unknown; +} + +/** A node type + the attribute matrix to sweep for it. */ +export interface NodeStabilitySpec { + /** Node type (e.g. "image", "video"). */ + type: string; + /** Attributes always present on the node (e.g. `{ src: "/i.png" }`). */ + baseAttrs?: Record; + /** Attributes to sweep at default and non-default. */ + attrMatrix: AttrMatrixEntry[]; + /** + * Attributes whose numeric -> string coercion on round-trip is DOCUMENTED and + * intentional; compared modulo `String(x)` on both sides. Defaults to the + * converter's known sizing set. + */ + numericStringAttrs?: string[]; +} + +/** A single unstable finding, legible enough to tie a gate-lock to. */ +export interface Instability { + type: string; + attr: string; + /** What the input authored: the literal value, or the ABSENT sentinel. */ + authored: unknown | typeof ABSENT; + /** What the round-trip produced. */ + got: unknown; + /** What a stable round-trip should have produced (authored value or default). */ + expected: unknown; +} + +/** One matrix cell's result. */ +export interface ComboResult { + label: string; + authored: Record; + /** RAW-contour instabilities on the node under test. */ + raw: Instability[]; + /** CANONICAL-contour divergence (path + values) or null when equal. */ + canonical: { path: string; a: unknown; b: unknown } | null; + /** True when the node type failed to round-trip at all (structural loss). */ + missing: boolean; + md: string; +} + +/** Whole-matrix report for one node spec. */ +export interface MatrixReport { + type: string; + combos: ComboResult[]; +} + +/** Sentinel marking an attribute the input did NOT author. */ +export const ABSENT = Symbol("ABSENT"); + +const DEFAULT_NUMERIC_STRING_ATTRS = [ + "width", + "height", + "size", + "aspectRatio", +]; + +// The ProseMirror schema the converter targets — its attribute `default`s are +// the authoritative "what an absent attr should re-materialize as" oracle. +const schema = getSchema(docmostExtensions); + +/** Read the schema default for every attribute of a node type. */ +function schemaDefaults(type: string): Record { + const specAttrs = (schema.nodes[type]?.spec?.attrs ?? {}) as Record< + string, + { default: unknown } + >; + const out: Record = {}; + for (const [k, v] of Object.entries(specAttrs)) out[k] = v.default; + return out; +} + +/** Find the first node of a given type anywhere in a PM doc tree. */ +function findFirst(node: any, type: string): any { + if (node && node.type === type) return node; + for (const child of node?.content ?? []) { + const hit = findFirst(child, type); + if (hit) return hit; + } + return null; +} + +/** Coerce a scalar for the documented numeric->string comparison. */ +const numStr = (x: unknown): unknown => (x == null ? x : String(x)); + +/** + * Enumerate the cartesian product of the matrix: every attribute independently + * at its default (index 0) or non-default (index 1) pick. The all-default + * corner is included (the baseline). Small by construction (2^N over a handful + * of at-risk string attrs). + */ +function enumerateCombos(matrix: AttrMatrixEntry[]): number[][] { + let combos: number[][] = [[]]; + for (let i = 0; i < matrix.length; i++) { + const next: number[][] = []; + for (const c of combos) { + next.push([...c, 0]); + next.push([...c, 1]); + } + combos = next; + } + return combos; +} + +/** Build the authored attrs for one combo pick vector. */ +function authoredAttrs( + spec: NodeStabilitySpec, + picks: number[], +): Record { + const attrs: Record = { ...(spec.baseAttrs ?? {}) }; + spec.attrMatrix.forEach((entry, i) => { + if (picks[i] === 1) { + attrs[entry.attr] = entry.nonDefault; + } else if (entry.default !== undefined) { + attrs[entry.attr] = entry.default; + } + // default === undefined -> OMIT the attr entirely (the absent case). + }); + return attrs; +} + +/** Human-readable label for a combo (which attrs are at non-default). */ +function comboLabel(spec: NodeStabilitySpec, picks: number[]): string { + const on = spec.attrMatrix + .filter((_, i) => picks[i] === 1) + .map((e) => e.attr); + return on.length === 0 ? "" : on.join("+"); +} + +/** + * Run the full stability matrix for one node spec and return a structured + * report (does NOT throw — the caller asserts, so a failure can print the whole + * report). Every combo runs the real export->import pipeline once. + */ +export async function runStabilityMatrix( + spec: NodeStabilitySpec, +): Promise { + const numericStringAttrs = new Set( + spec.numericStringAttrs ?? DEFAULT_NUMERIC_STRING_ATTRS, + ); + const defaults = schemaDefaults(spec.type); + const combos: ComboResult[] = []; + + for (const picks of enumerateCombos(spec.attrMatrix)) { + const authored = authoredAttrs(spec, picks); + const doc = { type: "doc", content: [{ type: spec.type, attrs: authored }] }; + const md = convertProseMirrorToMarkdown(doc); + const rt = await markdownToProseMirror(md); + const node = findFirst(rt, spec.type); + + const result: ComboResult = { + label: comboLabel(spec, picks), + authored, + raw: [], + canonical: null, + missing: node == null, + md, + }; + + if (node != null) { + // RAW contour: every materialized attr must equal the authored value, or + // (for an absent attr) the schema default — modulo the documented numeric + // string coercion. + const rtAttrs = (node.attrs ?? {}) as Record; + for (const key of Object.keys(rtAttrs)) { + const authoredHas = Object.prototype.hasOwnProperty.call(authored, key); + const expected = authoredHas ? authored[key] : defaults[key]; + let got = rtAttrs[key]; + let exp = expected; + if (numericStringAttrs.has(key)) { + got = numStr(got); + exp = numStr(exp); + } + if (firstDivergence(got, exp) !== null) { + result.raw.push({ + type: spec.type, + attr: key, + authored: authoredHas ? authored[key] : ABSENT, + got: rtAttrs[key], + expected, + }); + } + } + + // CANONICAL contour: canonical forms deep-equal, modulo the same numeric + // string coercion (applied to both trees so a documented coercion is not + // counted as a divergence). + const ca = normalizeNumeric(canonicalizeContent(doc), numericStringAttrs); + const cb = normalizeNumeric(canonicalizeContent(rt), numericStringAttrs); + result.canonical = firstDivergence(ca, cb); + } + + combos.push(result); + } + + return { type: spec.type, combos }; +} + +/** + * Deep-copy a canonical tree, coercing the documented numeric->string attrs to + * their string form so an intentional `640 -> "640"` coercion is not reported + * as a canonical divergence. Only touches the listed attribute keys. + */ +function normalizeNumeric(node: any, attrs: Set): any { + if (Array.isArray(node)) return node.map((n) => normalizeNumeric(n, attrs)); + if (node === null || typeof node !== "object") return node; + const out: Record = {}; + for (const key of Object.keys(node)) { + if (key === "attrs" && node.attrs && typeof node.attrs === "object") { + const a: Record = {}; + for (const [k, v] of Object.entries(node.attrs)) { + a[k] = attrs.has(k) ? numStr(v) : v; + } + out.attrs = a; + } else { + out[key] = normalizeNumeric(node[key], attrs); + } + } + return out; +} + +/** Flatten a report to just its unstable combos (for a terse assertion). */ +export function unstableCombos(report: MatrixReport): ComboResult[] { + return report.combos.filter( + (c) => c.missing || c.raw.length > 0 || c.canonical !== null, + ); +} + +/** Render a report as a legible multi-line string for a failed assertion. */ +export function formatReport(report: MatrixReport): string { + const lines: string[] = [`node "${report.type}":`]; + for (const c of report.combos) { + const flags: string[] = []; + if (c.missing) flags.push("DID-NOT-ROUND-TRIP"); + for (const i of c.raw) { + const authored = + i.authored === ABSENT ? "absent" : JSON.stringify(i.authored); + flags.push( + `RAW ${i.type}.${i.attr}: ${authored} -> ${JSON.stringify(i.got)} (expected ${JSON.stringify(i.expected)})`, + ); + } + if (c.canonical) { + flags.push( + `CANON @ ${c.canonical.path}: ${JSON.stringify(c.canonical.a)} vs ${JSON.stringify(c.canonical.b)}`, + ); + } + const status = flags.length === 0 ? "stable" : flags.join("; "); + lines.push(` [${c.label}] ${status}`); + } + return lines.join("\n"); +} diff --git a/packages/prosemirror-markdown/test/roundtrip-stability.test.ts b/packages/prosemirror-markdown/test/roundtrip-stability.test.ts new file mode 100644 index 00000000..bd3464bc --- /dev/null +++ b/packages/prosemirror-markdown/test/roundtrip-stability.test.ts @@ -0,0 +1,128 @@ +import { describe, expect, it } from "vitest"; +import { + runStabilityMatrix, + unstableCombos, + formatReport, + type NodeStabilitySpec, +} from "./roundtrip-stability.helper.js"; + +// --------------------------------------------------------------------------- +// Round-trip STABILITY matrix for image + the media family. +// +// Guards the "empty-string-vs-absent" churn class (GS-EDIT-REVERT family): a +// stored node authored WITHOUT a string attr (alt/title/caption/aria-label/...) +// must not gain a phantom `attr: ""` after `markdownToProseMirror(convert…)`. +// Each spec sweeps the at-risk string attrs at DEFAULT (absent) and at a real +// NON-default value; the helper asserts both the RAW round-trip (attrs equal the +// input's, modulo the documented numeric width/height/size/aspectRatio -> string +// coercion) and the CANONICAL round-trip (canonical forms deep-equal). +// +// The image + media family share the `align !== "center"` predicate and the +// `` comment machinery, so one matrix guards the shared class. +// align is NOT part of this class (it round-trips correctly) and is not swept. +// --------------------------------------------------------------------------- + +const SPECS: NodeStabilitySpec[] = [ + { + // Image carries the most at-risk string attrs. `alt` is the one marked + // materializes as `` on `![](src)` import (the real bug); title + // and caption are covered as the same class. attachmentId is a string attr + // that must stay absent when unset (control). + type: "image", + baseAttrs: { src: "/i.png" }, + attrMatrix: [ + { attr: "alt", default: undefined, nonDefault: "a real alt text" }, + { attr: "title", default: undefined, nonDefault: "a real title" }, + { attr: "caption", default: undefined, nonDefault: "a real caption" }, + { attr: "attachmentId", default: undefined, nonDefault: "att-42" }, + ], + }, + { + // Video's `alt` rides the `aria-label` attribute (media aria-label at risk). + type: "video", + baseAttrs: { src: "/v.mp4" }, + attrMatrix: [ + { attr: "alt", default: undefined, nonDefault: "a clip" }, + { attr: "attachmentId", default: undefined, nonDefault: "att-1" }, + ], + }, + { + // Audio carries no alt/title; attachmentId is its only optional string attr. + type: "audio", + baseAttrs: { src: "/a.mp3" }, + attrMatrix: [ + { attr: "attachmentId", default: undefined, nonDefault: "att-2" }, + ], + }, + { + // pdf: link-form media. `name` (filename) is its at-risk string attr. + type: "pdf", + baseAttrs: { src: "/d.pdf" }, + attrMatrix: [ + { attr: "name", default: undefined, nonDefault: "report.pdf" }, + { attr: "attachmentId", default: undefined, nonDefault: "att-3" }, + ], + }, + { + // attachment: link-form media (file card). `name` + `mime` string attrs. + type: "attachment", + baseAttrs: { url: "/f.zip" }, + attrMatrix: [ + { attr: "name", default: undefined, nonDefault: "bundle.zip" }, + { attr: "mime", default: undefined, nonDefault: "application/zip" }, + { attr: "attachmentId", default: undefined, nonDefault: "att-4" }, + ], + }, + { + // embed: link-form media. `provider` is its at-risk string attr (schema + // default ""). embed's numeric width/height defaults (800/600) are a SEPARATE, + // documented limitation OUTSIDE the empty-string class: they are not in + // canonicalize's KNOWN_DEFAULTS, so an ABSENT width/height re-imports as the + // 800/600 default and diverges canonically (see the note in canonicalize.ts). + // That is canonicalize-owned and out of scope here, so we author the + // dimensions at their defaults (as real editor embeds carry them) to keep this + // guard focused on the empty-string/provider class. + // provider's schema default is "" (NOT null), so a re-imported "" is the + // correct value, not a phantom — it is outside the null-default empty-string + // class. We author it at its "" default (the default pick) so the sweep still + // asserts a non-default provider ("youtube") round-trips, without tripping the + // canonicalize KNOWN_DEFAULTS gap for embed's non-null defaults. + type: "embed", + baseAttrs: { src: "https://example.com/x", width: 800, height: 600 }, + attrMatrix: [ + { attr: "provider", default: "", nonDefault: "youtube" }, + ], + }, + { + // drawio: image-form diagram. `title` + `alt` string attrs (data-title/-alt). + type: "drawio", + baseAttrs: { src: "blob:drawio" }, + attrMatrix: [ + { attr: "title", default: undefined, nonDefault: "flow chart" }, + { attr: "alt", default: undefined, nonDefault: "an alt" }, + { attr: "attachmentId", default: undefined, nonDefault: "att-5" }, + ], + }, + { + // excalidraw: image-form diagram, same shared diagramAttributes set. + type: "excalidraw", + baseAttrs: { src: "blob:excalidraw" }, + attrMatrix: [ + { attr: "title", default: undefined, nonDefault: "sketch" }, + { attr: "alt", default: undefined, nonDefault: "an alt" }, + { attr: "attachmentId", default: undefined, nonDefault: "att-6" }, + ], + }, +]; + +describe("round-trip stability matrix (image + media family)", () => { + for (const spec of SPECS) { + it(`${spec.type}: no attr materializes an empty-string / phantom value`, async () => { + const report = await runStabilityMatrix(spec); + const unstable = unstableCombos(report); + // On failure, print the WHOLE matrix so which (attr, value) combos are + // unstable is legible. + expect(unstable, `\n${formatReport(report)}\n`).toEqual([]); + }); + } +}); -- 2.52.0 From c192f2a2e11b4e88aac9cb5d9ec19ee846af6325 Mon Sep 17 00:00:00 2001 From: agent_coder Date: Sat, 4 Jul 2026 21:17:17 +0300 Subject: [PATCH 2/2] =?UTF-8?q?test(prosemirror-markdown):=20pin=20the=20t?= =?UTF-8?q?hird=20state=20=E2=80=94=20explicit=20""=20converges=20once,=20?= =?UTF-8?q?then=20idempotent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer addition to the round-trip stability matrix: besides "attr absent" and "attr has a real value", a string attr in the empty-string class has a third, degenerate state — a LITERAL "" (a user types alt/title/name in the editor then deletes it, and Tiptap persists `attr: ""`, distinct from never-set). The fix's `getAttribute(...) || null` coercion normalizes such a stored "" to the default on the FIRST round-trip (a one-time "" -> null diff) and is byte-stable from the SECOND round-trip on. Adds a convergence contract to the reusable matrix helper (emptyStringClass flag + runConvergenceCase): pass 1 must converge the attr to its schema default (NOT asserted byte-stable vs the "" input — that is the intended one-time normalization); pass 2 must deep-equal pass 1 (idempotent thereafter). Driven for every empty-string-class attr across image + the media family (image/drawio alt+title, video alt via aria-label, pdf/attachment name, attachment mime). Documents the one-time normalization so a future sync/QA diff does not flag the single "" -> null change as converter corruption. Gate: package suite 33 files / 682 tests passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../test/roundtrip-stability.helper.ts | 138 ++++++++++++++++++ .../test/roundtrip-stability.test.ts | 56 +++++-- 2 files changed, 184 insertions(+), 10 deletions(-) diff --git a/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts b/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts index 3e63a7aa..f996efdf 100644 --- a/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts +++ b/packages/prosemirror-markdown/test/roundtrip-stability.helper.ts @@ -50,6 +50,16 @@ export interface AttrMatrixEntry { default: unknown; /** A representative NON-default value to exercise (must survive verbatim). */ nonDefault: unknown; + /** + * Marks the attr as a member of the EMPTY-STRING class the fix targets: a + * string attr whose schema default is `null`/absent and whose parseHTML + * coerces `"" -> default` (image/drawio `alt`+`title`, video `alt` via + * aria-label, pdf/attachment `name`, attachment `mime`). Set true to also + * drive the THIRD-STATE convergence case (see runConvergenceCase) for this + * attr. Attrs whose default is NOT null (e.g. embed `provider`, default "") + * or that are not `""`-coerced (control attrs) are left unset. + */ + emptyStringClass?: boolean; } /** A node type + the attribute matrix to sweep for it. */ @@ -280,6 +290,134 @@ export function unstableCombos(report: MatrixReport): ComboResult[] { ); } +// --------------------------------------------------------------------------- +// THIRD STATE: an EXPLICITLY-STORED empty string on a string attr. +// +// The matrix above sweeps TWO states per string attr: absent/default and a +// non-default value — and asserts FIRST-pass byte-stability for both. There is +// a third, degenerate state the matrix does NOT cover: the attr stored as a +// LITERAL `""`. This is DISTINCT from "the node never had the attr": a user +// types an alt in the editor, then deletes it, and Tiptap's +// `updateAttributes({ alt: "" })` persists a literal `alt: ""` in the stored +// JSON. There is no absent-vs-"" distinction in the DOM once serialized, so the +// fix's `getAttribute("alt") || null` coercion canonicalizes BOTH to the +// default (`null`). +// +// Consequence — and this is CORRECT, not a bug: a doc carrying an explicit `""` +// converges to the default on the FIRST round-trip (a ONE-TIME diff: `"" -> +// null`), then is byte-stable from the SECOND round-trip on (idempotent). So +// this state must be pinned with a DIFFERENT contract than the matrix's: +// - do NOT assert first-pass byte-stability (the first pass legitimately +// changes `""` -> default), and +// - DO assert the first pass converges to the default AND the second pass is +// idempotent (rt2 deep-equals rt1). +// +// A future sync/QA pass diffing stored pages will see this one-time `"" -> null` +// normalization exactly once per affected node; it is the converter canon, not +// corruption, and must not be flagged as data loss. +// --------------------------------------------------------------------------- + +/** Result of the third-state ("explicit empty string") convergence probe. */ +export interface ConvergenceResult { + type: string; + attr: string; + /** The schema default the attr must converge to on pass 1 (null / absent). */ + expectedDefault: unknown; + /** rt1's materialized value for the attr — must equal `expectedDefault`. */ + firstPassValue: unknown; + /** True when the node round-tripped AND rt1 converged the attr to default. */ + convergedToDefault: boolean; + /** rt1-vs-rt2 divergence; MUST be null (idempotent from pass 2 on). */ + secondPassDivergence: { path: string; a: unknown; b: unknown } | null; + /** True when the node type failed to round-trip at all (structural loss). */ + missing: boolean; +} + +/** Round-trip a full PM doc through the real converter once. */ +async function roundtripDoc(doc: any): Promise { + return markdownToProseMirror(convertProseMirrorToMarkdown(doc)); +} + +/** + * Third-state convergence probe for one string attr of the empty-string class. + * + * (a) builds a doc with the attr EXPLICITLY set to `""` (baseAttrs + `""`), + * (b) rt1 = roundtrip(doc); asserts rt1's attr equals the schema default — the + * documented ONE-TIME `"" -> default` normalization (NOT byte-stable vs the + * `""` input, so first-pass stability is deliberately NOT asserted here), + * (c) rt2 = roundtrip(rt1); asserts rt2 deep-equals rt1 — idempotent from the + * second round-trip on. + * + * Returns a structured result (does NOT throw) so the caller can assert and + * print. Reusable across the whole node family: drive it for every attr flagged + * `emptyStringClass` on every spec (see convergenceCasesFor / the test driver). + */ +export async function runConvergenceCase( + spec: NodeStabilitySpec, + attr: string, +): Promise { + const expectedDefault = schemaDefaults(spec.type)[attr]; + + // (a) The degenerate third state: attr persisted as a LITERAL "". + const authored = { ...(spec.baseAttrs ?? {}), [attr]: "" }; + const doc = { type: "doc", content: [{ type: spec.type, attrs: authored }] }; + + // (b) First round-trip: "" must normalize to the default (a one-time diff). + const rt1 = await roundtripDoc(doc); + const node1 = findFirst(rt1, spec.type); + const firstPassValue = node1?.attrs?.[attr]; + const convergedToDefault = + node1 != null && firstDivergence(firstPassValue, expectedDefault) === null; + + // (c) Second round-trip: must be byte-stable (rt2 deep-equals rt1). We compare + // the WHOLE docs — both are converter OUTPUTS already in the same materialized + // form (numeric attrs are strings on both sides), so no numeric normalization + // is needed here, unlike the raw/canonical contours above. + const rt2 = node1 != null ? await roundtripDoc(rt1) : rt1; + const secondPassDivergence = + node1 != null ? firstDivergence(rt1, rt2) : null; + + return { + type: spec.type, + attr, + expectedDefault, + firstPassValue, + convergedToDefault, + secondPassDivergence, + missing: node1 == null, + }; +} + +/** The attrs of a spec flagged as members of the empty-string class. */ +export function convergenceCasesFor(spec: NodeStabilitySpec): string[] { + return spec.attrMatrix + .filter((e) => e.emptyStringClass) + .map((e) => e.attr); +} + +/** True when a convergence result honours the "converges once, then stable" contract. */ +export function convergenceOk(r: ConvergenceResult): boolean { + return !r.missing && r.convergedToDefault && r.secondPassDivergence === null; +} + +/** Render a convergence result as a legible one-liner for a failed assertion. */ +export function formatConvergence(r: ConvergenceResult): string { + if (r.missing) return `${r.type}.${r.attr}: DID-NOT-ROUND-TRIP`; + const parts: string[] = []; + if (!r.convergedToDefault) { + parts.push( + `pass1 did NOT converge: got ${JSON.stringify(r.firstPassValue)} (expected default ${JSON.stringify(r.expectedDefault)})`, + ); + } + if (r.secondPassDivergence) { + parts.push( + `pass2 NOT idempotent @ ${r.secondPassDivergence.path}: ${JSON.stringify(r.secondPassDivergence.a)} vs ${JSON.stringify(r.secondPassDivergence.b)}`, + ); + } + const status = parts.length === 0 ? "converges-once-then-stable" : parts.join("; "); + return `${r.type}.${r.attr}: ${status}`; +} + /** Render a report as a legible multi-line string for a failed assertion. */ export function formatReport(report: MatrixReport): string { const lines: string[] = [`node "${report.type}":`]; diff --git a/packages/prosemirror-markdown/test/roundtrip-stability.test.ts b/packages/prosemirror-markdown/test/roundtrip-stability.test.ts index bd3464bc..57b91b23 100644 --- a/packages/prosemirror-markdown/test/roundtrip-stability.test.ts +++ b/packages/prosemirror-markdown/test/roundtrip-stability.test.ts @@ -3,6 +3,10 @@ import { runStabilityMatrix, unstableCombos, formatReport, + runConvergenceCase, + convergenceCasesFor, + convergenceOk, + formatConvergence, type NodeStabilitySpec, } from "./roundtrip-stability.helper.js"; @@ -31,8 +35,8 @@ const SPECS: NodeStabilitySpec[] = [ type: "image", baseAttrs: { src: "/i.png" }, attrMatrix: [ - { attr: "alt", default: undefined, nonDefault: "a real alt text" }, - { attr: "title", default: undefined, nonDefault: "a real title" }, + { attr: "alt", default: undefined, nonDefault: "a real alt text", emptyStringClass: true }, + { attr: "title", default: undefined, nonDefault: "a real title", emptyStringClass: true }, { attr: "caption", default: undefined, nonDefault: "a real caption" }, { attr: "attachmentId", default: undefined, nonDefault: "att-42" }, ], @@ -42,7 +46,7 @@ const SPECS: NodeStabilitySpec[] = [ type: "video", baseAttrs: { src: "/v.mp4" }, attrMatrix: [ - { attr: "alt", default: undefined, nonDefault: "a clip" }, + { attr: "alt", default: undefined, nonDefault: "a clip", emptyStringClass: true }, { attr: "attachmentId", default: undefined, nonDefault: "att-1" }, ], }, @@ -59,7 +63,7 @@ const SPECS: NodeStabilitySpec[] = [ type: "pdf", baseAttrs: { src: "/d.pdf" }, attrMatrix: [ - { attr: "name", default: undefined, nonDefault: "report.pdf" }, + { attr: "name", default: undefined, nonDefault: "report.pdf", emptyStringClass: true }, { attr: "attachmentId", default: undefined, nonDefault: "att-3" }, ], }, @@ -68,8 +72,8 @@ const SPECS: NodeStabilitySpec[] = [ type: "attachment", baseAttrs: { url: "/f.zip" }, attrMatrix: [ - { attr: "name", default: undefined, nonDefault: "bundle.zip" }, - { attr: "mime", default: undefined, nonDefault: "application/zip" }, + { attr: "name", default: undefined, nonDefault: "bundle.zip", emptyStringClass: true }, + { attr: "mime", default: undefined, nonDefault: "application/zip", emptyStringClass: true }, { attr: "attachmentId", default: undefined, nonDefault: "att-4" }, ], }, @@ -98,8 +102,8 @@ const SPECS: NodeStabilitySpec[] = [ type: "drawio", baseAttrs: { src: "blob:drawio" }, attrMatrix: [ - { attr: "title", default: undefined, nonDefault: "flow chart" }, - { attr: "alt", default: undefined, nonDefault: "an alt" }, + { attr: "title", default: undefined, nonDefault: "flow chart", emptyStringClass: true }, + { attr: "alt", default: undefined, nonDefault: "an alt", emptyStringClass: true }, { attr: "attachmentId", default: undefined, nonDefault: "att-5" }, ], }, @@ -108,8 +112,8 @@ const SPECS: NodeStabilitySpec[] = [ type: "excalidraw", baseAttrs: { src: "blob:excalidraw" }, attrMatrix: [ - { attr: "title", default: undefined, nonDefault: "sketch" }, - { attr: "alt", default: undefined, nonDefault: "an alt" }, + { attr: "title", default: undefined, nonDefault: "sketch", emptyStringClass: true }, + { attr: "alt", default: undefined, nonDefault: "an alt", emptyStringClass: true }, { attr: "attachmentId", default: undefined, nonDefault: "att-6" }, ], }, @@ -126,3 +130,35 @@ describe("round-trip stability matrix (image + media family)", () => { }); } }); + +// --------------------------------------------------------------------------- +// THIRD STATE: an attr EXPLICITLY stored as a literal "" (GS-EDIT-REVERT: a user +// typed alt/title/name/... then deleted it, so Tiptap persisted `attr: ""` — a +// value DISTINCT from "attr was never set"). Unlike the absent case above, this +// state is NOT first-pass byte-stable: the fix's `"" -> default` coercion is a +// deliberate ONE-TIME normalization on the FIRST sync round-trip, stable +// thereafter. We therefore assert a DIFFERENT contract — "converges to default +// on pass 1, then idempotent from pass 2 on" — for every empty-string-class attr +// across the whole node family (image/video/pdf/attachment/drawio/excalidraw). +// +// IMPORTANT for a future sync/QA pass: the pass-1 `"" -> null` diff is the +// converter canon, not corruption. It appears at most once per affected node and +// must NOT be flagged as "the converter is losing/corrupting page data". +// --------------------------------------------------------------------------- +describe("round-trip third state: explicit empty string converges once, then idempotent", () => { + for (const spec of SPECS) { + for (const attr of convergenceCasesFor(spec)) { + it(`${spec.type}.${attr}: "" normalizes to default on pass 1, byte-stable from pass 2`, async () => { + const r = await runConvergenceCase(spec, attr); + // Pass 1 must converge "" -> the schema default (the one-time diff) and + // pass 2 (roundtrip of pass-1 output) must be byte-stable. formatConvergence + // prints exactly which half failed. + expect(convergenceOk(r), `\n${formatConvergence(r)}\n`).toBe(true); + // Spell the contract out explicitly so the intent is legible in the test: + expect(r.convergedToDefault, `\n${formatConvergence(r)}\n`).toBe(true); + expect(r.firstPassValue).toEqual(r.expectedDefault); + expect(r.secondPassDivergence, `\n${formatConvergence(r)}\n`).toBeNull(); + }); + } + } +}); -- 2.52.0