Files
gitmost/packages/prosemirror-markdown/test/highlight.test.ts
T
claude code agent 227 77f5224b55 feat(prosemirror-markdown): highlight without color as ==text== (#293 canon #7)
A `highlight` mark WITHOUT a color now serializes as the Obsidian/GFM `==text==`
syntax (closing hand-authoring gap A19); a highlight WITH a color keeps the
`<mark style="background-color: …">` HTML form (condition is deterministic on
the color attr). On the raw-HTML path (columns/spanned cells) BOTH forms stay
`<mark>` via inlineToHtml — markdown is not re-parsed inside a raw-HTML block.

Parse: `==` is not standard markdown, so the importer uses a DEDICATED marked
instance (`new Marked().use({extensions:[highlightMark]})`) rather than the
global singleton — registered once, never leaks `==` behavior to other callers.
The inline extension tokenizes `==text==` (non-empty, non-space-leading inner,
lazy so `==a== ==b==` is two marks; inner re-tokenized so nested marks survive;
`====`/`==x` fail-open to literal) into `<mark>` with no color, which the schema
parses as a color-less highlight. Inline code (`` `a == b` ``) stays code via
marked token precedence. marked 17 defaults (gfm:true, breaks:false) are
identical for the fresh instance, so tables/strike/autolinks are unaffected.

Losslessness: a LITERAL `==` in a text run would otherwise be misparsed as a
highlight on the next import, so `case "text"` backslash-escapes each `=` of a
`==` pair (marked decodes `\=` back to `=`), and this round-trips byte-stably.
The escape does NOT run for inline-code runs, and — CRITICALLY — codeBlock now
reads its child text RAW (schema `content: "text*"`) instead of routing through
`case "text"`: marked does not decode `\=` inside a fence, so escaping there
would permanently stamp backslashes into any `==` comparison (ubiquitous in
source code) and corrupt the block on the git-sync data path.

Tests: new highlight.test.ts (19 cases incl. serialize forms, colored vs plain,
column `<mark>` path, nested marks, inline-code exclusion, literal-`==` escape,
fail-open, AND a codeBlock-with-`==` regression proving no backslash corruption
+ byte-stable round-trip). Golden inline-mark matrix flipped top-level no-color
highlight to `==m==`; the kept `<mark style=…>` assertions are the colored/
raw-HTML cases.

package vitest: 559 passed; tsc clean. git-sync: 268 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-04 09:12:18 +03:00

250 lines
11 KiB
TypeScript

import { describe, expect, it } from 'vitest';
// Import both directions DIRECTLY from src (NOT the docmost-client barrel, which
// pulls in collaboration.ts and mutates the global DOM at import time), matching
// the other converter unit tests.
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
// #293 canon #7: a `highlight` mark WITHOUT a color serializes as the
// Obsidian/GFM `==text==` syntax; a highlight WITH a color KEEPS the
// `<mark style="background-color: …">` HTML form. On the raw-HTML path
// (columns / spanned cells) BOTH forms stay `<mark>` because markdown is not
// re-parsed there. This file locks the serialize form, the round-trip, and the
// literal-`==` escape that keeps a literal `==` from becoming a phantom mark.
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
const text = (t: string, marks?: any[]) =>
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
// Find the first text node anywhere in a PM tree that carries a mark of `type`.
const firstMarkedText = (node: any, type: string): any => {
if (node?.type === 'text' && (node.marks || []).some((m: any) => m.type === type)) {
return node;
}
for (const child of node?.content || []) {
const hit = firstMarkedText(child, type);
if (hit) return hit;
}
return null;
};
const mark = (textNode: any, type: string): any =>
(textNode?.marks || []).find((m: any) => m.type === type);
// Concatenate all text within a subtree (order-preserving).
const allText = (node: any): string => {
if (node?.type === 'text') return node.text || '';
return (node?.content || []).map(allText).join('');
};
// Does ANY text node in the tree carry a mark of `type`?
const hasMark = (node: any, type: string): boolean => !!firstMarkedText(node, type);
// PM -> MD -> PM round-trip.
const roundTrip = async (d: any) => {
const md1 = convertProseMirrorToMarkdown(d);
const doc2 = await markdownToProseMirror(md1);
const md2 = convertProseMirrorToMarkdown(doc2);
return { md1, doc2, md2 };
};
describe('#293 #7: no-color highlight <-> ==text==', () => {
it('serializes a no-color highlight as exactly ==text==', () => {
expect(convertProseMirrorToMarkdown(doc(para(text('important', [{ type: 'highlight' }]))))).toBe(
'==important==',
);
});
it('imports ==text== as a highlight mark with NO color', async () => {
const d = await markdownToProseMirror('==important==');
const t = firstMarkedText(d, 'highlight');
expect(t).toBeTruthy();
expect(t.text).toBe('important');
// A bare <mark> carries no background-color, so the color attr is null.
expect(mark(t, 'highlight').attrs?.color ?? null).toBeNull();
});
it('is byte-stable and re-imports as a color-less highlight', async () => {
const { md1, md2, doc2 } = await roundTrip(
doc(para(text('a base '), text('hl', [{ type: 'highlight' }]), text(' tail'))),
);
expect(md1).toBe('a base ==hl== tail');
expect(md2).toBe(md1);
const t = firstMarkedText(doc2, 'highlight');
expect(t.text).toBe('hl');
expect(mark(t, 'highlight').attrs?.color ?? null).toBeNull();
});
});
describe('#293 #7: colored highlight keeps <mark style=…>', () => {
it('serializes a colored highlight as the <mark style=…> HTML form (NOT ==)', () => {
const out = convertProseMirrorToMarkdown(
doc(para(text('c', [{ type: 'highlight', attrs: { color: '#ff0000' } }]))),
);
expect(out).toBe('<mark style="background-color: #ff0000">c</mark>');
expect(out).not.toContain('==');
});
it('round-trips a colored highlight preserving its color', async () => {
const { md1, md2, doc2 } = await roundTrip(
doc(para(text('c', [{ type: 'highlight', attrs: { color: '#abcdef' } }]))),
);
expect(md1).toBe('<mark style="background-color: #abcdef">c</mark>');
expect(md2).toBe(md1);
const t = firstMarkedText(doc2, 'highlight');
expect(mark(t, 'highlight').attrs?.color).toBe('#abcdef');
});
});
describe('#293 #7: raw-HTML path (columns) stays <mark>, never ==', () => {
const oneColumn = (...blocks: any[]) => ({
type: 'columns',
attrs: { layout: 'two' },
content: [{ type: 'column', content: blocks }],
});
it('a no-color highlight inside a column serializes as <mark> (inlineToHtml), not ==', () => {
const out = convertProseMirrorToMarkdown(doc(oneColumn(para(text('p', [{ type: 'highlight' }])))));
expect(out).toContain('<mark>p</mark>');
// The `==` markdown syntax must NOT leak into a raw-HTML container (it would
// survive as literal text there because columns are not re-parsed).
expect(out).not.toContain('==');
});
it('a colored highlight inside a column keeps <mark style=…>', () => {
const out = convertProseMirrorToMarkdown(
doc(oneColumn(para(text('p', [{ type: 'highlight', attrs: { color: '#00ff00' } }])))),
);
expect(out).toContain('<mark style="background-color: #00ff00">p</mark>');
});
it('round-trips a highlight inside a column (byte-stable, mark preserved)', async () => {
const { md1, md2, doc2 } = await roundTrip(
doc(oneColumn(para(text('p', [{ type: 'highlight' }])))),
);
expect(md1).toContain('<mark>p</mark>');
expect(md2).toBe(md1);
expect(hasMark(doc2, 'highlight')).toBe(true);
});
});
describe('#293 #7: highlight wrapping other marks', () => {
it('serializes bold-inside-highlight as ==**x**== and round-trips both marks', async () => {
const { md1, md2, doc2 } = await roundTrip(
doc(para(text('x', [{ type: 'bold' }, { type: 'highlight' }]))),
);
expect(md1).toBe('==**x**==');
expect(md2).toBe(md1);
const t = firstMarkedText(doc2, 'highlight');
expect(t).toBeTruthy();
expect((t.marks || []).some((m: any) => m.type === 'bold')).toBe(true);
expect(t.text).toBe('x');
});
});
describe('#293 #7: inline code containing == stays code, not a highlight', () => {
it('imports `a == b` as an inline code span, not a highlight', async () => {
const d = await markdownToProseMirror('`a == b`');
expect(hasMark(d, 'highlight')).toBe(false);
const codeText = firstMarkedText(d, 'code');
expect(codeText).toBeTruthy();
expect(codeText.text).toBe('a == b');
});
it('round-trips an inline code span carrying == (byte-stable, no highlight)', async () => {
const { md1, md2, doc2 } = await roundTrip(doc(para(text('a == b', [{ type: 'code' }]))));
expect(md1).toBe('`a == b`');
expect(md2).toBe(md1);
expect(hasMark(doc2, 'highlight')).toBe(false);
expect(firstMarkedText(doc2, 'code').text).toBe('a == b');
});
});
describe('#293 #7: literal == in plain prose round-trips as text (no phantom highlight)', () => {
it('a lone literal == (a == b) is escaped and re-imports as literal text', async () => {
const { md1, md2, doc2 } = await roundTrip(doc(para(text('a == b'))));
// Each `=` of the pair is backslash-escaped so marked decodes it back.
expect(md1).toBe('a \\=\\= b');
expect(md2).toBe(md1);
expect(hasMark(doc2, 'highlight')).toBe(false);
expect(allText(doc2)).toBe('a == b');
});
it('a literal ==...== pair in prose does NOT materialize a highlight', async () => {
const { md1, md2, doc2 } = await roundTrip(doc(para(text('x ==not hl== y'))));
expect(md1).toBe('x \\=\\=not hl\\=\\= y');
expect(md2).toBe(md1);
expect(hasMark(doc2, 'highlight')).toBe(false);
expect(allText(doc2)).toBe('x ==not hl== y');
});
it('a highlight over text that itself contains a literal == round-trips both', async () => {
const { md1, md2, doc2 } = await roundTrip(
doc(para(text('a == b', [{ type: 'highlight' }]))),
);
// The inner literal `==` is escaped; the highlight `==` delimiters are added
// AFTER escaping, so the mark's own delimiters are intact.
expect(md1).toBe('==a \\=\\= b==');
expect(md2).toBe(md1);
const t = firstMarkedText(doc2, 'highlight');
expect(t.text).toBe('a == b');
});
});
describe('#293 #7: fail-open edges (empty / unbalanced ==)', () => {
it('empty ==== does not crash and stays literal (no highlight)', async () => {
const d = await markdownToProseMirror('====');
expect(hasMark(d, 'highlight')).toBe(false);
expect(allText(d)).toBe('====');
});
it('unbalanced ==x does not crash and stays literal (no highlight)', async () => {
const d = await markdownToProseMirror('==x');
expect(hasMark(d, 'highlight')).toBe(false);
expect(allText(d)).toBe('==x');
});
it('two highlights on one line both parse (lazy inner)', async () => {
const d = await markdownToProseMirror('==a== ==b==');
const first = firstMarkedText(d, 'highlight');
expect(first.text).toBe('a');
// Both highlighted runs are present.
expect(allText(d)).toContain('a');
expect(allText(d)).toContain('b');
});
});
describe('#293 #7: a codeBlock containing == is NOT escaped (literal code preserved)', () => {
// Regression: the canon #7 `==` -> `\=\=` escape lives in `case "text"`, but
// code-fence content is literal and marked does NOT decode `\=` inside a fence,
// so routing code through that path would permanently stamp backslashes into a
// `==` comparison (ubiquitous in source). codeBlock must read raw child text.
const codeBlock = (t: string, language = '') => ({
type: 'codeBlock',
attrs: { language },
content: [{ type: 'text', text: t }],
});
it('exports `==` in code verbatim (no \\=\\=) and round-trips byte-stably', async () => {
const d = doc(codeBlock('if (a == b) return c == d;', 'js'));
const md1 = convertProseMirrorToMarkdown(d);
expect(md1).toBe('```js\nif (a == b) return c == d;\n```');
expect(md1).not.toContain('\\='); // no backslash corruption
const back = await markdownToProseMirror(md1);
// The code text survives with no backslash corruption and no phantom
// highlight (marked re-adds a trailing "\n" to fence content on import,
// which the serializer strips again — hence trimEnd here; byte-stability of
// the markdown is asserted separately below).
expect(allText(back).trimEnd()).toBe('if (a == b) return c == d;');
expect(allText(back)).not.toContain('\\=');
expect(hasMark(back, 'highlight')).toBe(false);
expect(convertProseMirrorToMarkdown(back)).toBe(md1); // byte-stable
});
it('a real markdown code block with == imports clean and re-exports clean', async () => {
const src = '```\nx == y\n```';
const back = await markdownToProseMirror(src);
expect(allText(back).trimEnd()).toBe('x == y');
expect(allText(back)).not.toContain('\\=');
expect(convertProseMirrorToMarkdown(back)).toBe(src); // byte-stable
});
});