diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts index 8c17a4f1..b1ea3733 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts @@ -1,5 +1,9 @@ import { describe, it, expect } from "vitest"; -import { normalizeTableColumnWidths } from "./markdown-clipboard"; +import { htmlToMarkdown } from "@docmost/editor-ext"; +import { + normalizeTableColumnWidths, + classifyClipboardSelection, +} from "./markdown-clipboard"; // normalizeTableColumnWidths mutates a DOM subtree (jsdom provides document). function root(html: string): HTMLElement { @@ -124,3 +128,171 @@ describe("normalizeTableColumnWidths", () => { ).toEqual([null, null]); }); }); + +describe("classifyClipboardSelection", () => { + it("serializes a list of 2+ items as markdown", () => { + expect( + classifyClipboardSelection([{ name: "bulletList", childCount: 2 }]), + ).toEqual({ asMarkdown: true, wrapBareRows: false }); + }); + + it("leaves a single-item list as plain text", () => { + expect( + classifyClipboardSelection([{ name: "bulletList", childCount: 1 }]), + ).toEqual({ asMarkdown: false, wrapBareRows: false }); + }); + + it("serializes a whole table without wrapping bare rows", () => { + expect( + classifyClipboardSelection([{ name: "table", childCount: 3 }]), + ).toEqual({ asMarkdown: true, wrapBareRows: false }); + }); + + it("serializes a partial cell selection (bare rows) and flags wrapping", () => { + expect( + classifyClipboardSelection([ + { name: "tableRow", childCount: 2 }, + { name: "tableRow", childCount: 2 }, + ]), + ).toEqual({ asMarkdown: true, wrapBareRows: true }); + }); + + it("leaves plain paragraphs as plain text", () => { + expect( + classifyClipboardSelection([{ name: "paragraph", childCount: 1 }]), + ).toEqual({ asMarkdown: false, wrapBareRows: false }); + }); + + it("does not wrap when rows are mixed with other block types", () => { + expect( + classifyClipboardSelection([ + { name: "tableRow", childCount: 2 }, + { name: "paragraph", childCount: 1 }, + ]), + ).toEqual({ asMarkdown: false, wrapBareRows: false }); + }); +}); + +// Output-level tests for the table clipboard regression: copying a table must +// yield a real GFM pipe table, NOT one-value-per-line concatenated cells. +// These exercise the actual markdown produced by htmlToMarkdown (the same +// serializer step the clipboardTextSerializer runs), so they pin the OUTPUT +// shape that the classifier-flag tests above do not cover. +describe("table clipboard markdown output (htmlToMarkdown)", () => { + // Trim each line and drop blanks so structural assertions are whitespace-robust. + function lines(md: string): string[] { + return md + .split("\n") + .map((l) => l.trim()) + .filter((l) => l.length > 0); + } + + // A GFM separator row like "| --- | --- |" (any number of columns), tolerant + // of the padding turndown emits. + function isSeparatorRow(line: string): boolean { + const compact = line.replace(/\s+/g, ""); + return /^\|(?:-{3,}\|)+$/.test(compact); + } + + // Split a pipe-delimited row into trimmed cell values. + function cells(line: string): string[] { + return line + .replace(/^\|/, "") + .replace(/\|$/, "") + .split("|") + .map((c) => c.trim()); + } + + it("serializes a header-less partial cell selection (bare rows) as a valid GFM pipe table", () => { + // Mirror the serializer's `wrapBareRows` branch exactly: bare nodes are + // wrapped in and htmlToMarkdown(div.innerHTML) is called. + // See markdown-clipboard.ts clipboardTextSerializer: + // const table = document.createElement("table"); + // const tbody = document.createElement("tbody"); + // tbody.appendChild(fragment); table.appendChild(tbody); + // div.appendChild(table); + // return htmlToMarkdown(div.innerHTML); + const div = document.createElement("div"); + const table = document.createElement("table"); + const tbody = document.createElement("tbody"); + for (const [c1, c2] of [ + ["a", "b"], + ["c", "d"], + ]) { + const tr = document.createElement("tr"); + const td1 = document.createElement("td"); + td1.textContent = c1; + const td2 = document.createElement("td"); + td2.textContent = c2; + tr.appendChild(td1); + tr.appendChild(td2); + tbody.appendChild(tr); + } + table.appendChild(tbody); + div.appendChild(table); + + const md = htmlToMarkdown(div.innerHTML); + const ls = lines(md); + + // Valid GFM: a header/data separator row is present (an empty header is + // synthesized by the GFM turndown plugin for a header-less table — fine). + expect(ls.some(isSeparatorRow)).toBe(true); + // NOT the old broken "one value per line" shape: every line is pipe-delimited + // and no line is a bare cell value on its own. + expect(ls.every((l) => l.includes("|"))).toBe(true); + expect(md).not.toMatch(/^\s*(a|b|c|d)\s*$/m); + // The cell values land in real pipe-delimited data rows. + const dataRows = ls.filter((l) => !isSeparatorRow(l)).map(cells); + expect(dataRows).toContainEqual(["a", "b"]); + expect(dataRows).toContainEqual(["c", "d"]); + }); + + it("serializes a whole table with a header row as a proper GFM table (headline regression)", () => { + // Mirror the serializer's non-wrap branch: the full
node is appended + // directly (div.appendChild(fragment)) and htmlToMarkdown(div.innerHTML) runs. + const div = document.createElement("div"); + const table = document.createElement("table"); + + const thead = document.createElement("thead"); + const headerRow = document.createElement("tr"); + for (const h of ["Name", "Age"]) { + const th = document.createElement("th"); + th.textContent = h; + headerRow.appendChild(th); + } + thead.appendChild(headerRow); + table.appendChild(thead); + + const tbody = document.createElement("tbody"); + for (const [name, age] of [ + ["Alice", "30"], + ["Bob", "25"], + ]) { + const tr = document.createElement("tr"); + const td1 = document.createElement("td"); + td1.textContent = name; + const td2 = document.createElement("td"); + td2.textContent = age; + tr.appendChild(td1); + tr.appendChild(td2); + tbody.appendChild(tr); + } + table.appendChild(tbody); + div.appendChild(table); + + const md = htmlToMarkdown(div.innerHTML); + const ls = lines(md); + + // Proper GFM structure: separator row + all rows pipe-delimited. + expect(ls.some(isSeparatorRow)).toBe(true); + expect(ls.every((l) => l.includes("|"))).toBe(true); + + const rows = ls.filter((l) => !isSeparatorRow(l)).map(cells); + // Header row comes first, followed by both data rows. + expect(rows[0]).toEqual(["Name", "Age"]); + expect(rows).toContainEqual(["Alice", "30"]); + expect(rows).toContainEqual(["Bob", "25"]); + // Headline regression: the table is NOT concatenated one-value-per-line. + expect(md).not.toMatch(/^\s*(Name|Age|Alice|Bob|30|25)\s*$/m); + }); +}); diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.ts index c8e36a1b..fa387569 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts @@ -27,24 +27,36 @@ export const MarkdownClipboard = Extension.create({ key: new PluginKey("markdownClipboard"), props: { clipboardTextSerializer: (slice) => { - const listTypes = ["bulletList", "orderedList", "taskList"]; - let topLevelCount = 0; - let hasList = false; + const topLevelNodes: { name: string; childCount: number }[] = []; slice.content.forEach((node) => { - if (listTypes.includes(node.type.name)) { - hasList = true; - topLevelCount += node.childCount; - } else { - topLevelCount++; - } + topLevelNodes.push({ + name: node.type.name, + childCount: node.childCount, + }); }); - if (!hasList || topLevelCount < 2) return null; + const { asMarkdown, wrapBareRows } = + classifyClipboardSelection(topLevelNodes); + if (!asMarkdown) return null; const div = document.createElement("div"); const serializer = DOMSerializer.fromSchema(this.editor.schema); const fragment = serializer.serializeFragment(slice.content); - div.appendChild(fragment); + + if (wrapBareRows) { + // A partial table cell-selection serializes to bare nodes + // (prosemirror-tables returns the whole `table` node only when the + // entire table is selected). Bare would be foster-parented + // away by the HTML parser inside htmlToMarkdown, so wrap them in + //
first for the GFM turndown rule to detect them. + const table = document.createElement("table"); + const tbody = document.createElement("tbody"); + tbody.appendChild(fragment); + table.appendChild(tbody); + div.appendChild(table); + } else { + div.appendChild(fragment); + } return htmlToMarkdown(div.innerHTML); }, handlePaste: (view, event, slice) => { @@ -153,6 +165,55 @@ export const MarkdownClipboard = Extension.create({ }, }); +/** + * Decide whether a copied slice's plain-text clipboard payload should be + * serialized as Markdown (instead of ProseMirror's default text serializer, + * which joins block leaves with newlines — the "one value per line" bug for + * tables). + * + * Serialize as Markdown for structured content: + * - lists with 2+ total items (a single copied bullet stays literal text); + * - a whole table (top-level `table` node); + * - a partial table cell-selection, which prosemirror-tables copies as bare + * `tableRow` nodes (only a full-table selection yields a `table` node). + * + * `wrapBareRows` flags the bare-rows case so the caller wraps the serialized + * nodes in
before the HTML->Markdown step. Plain paragraphs + * return asMarkdown=false so a simple text copy stays literal, and internal + * copy/paste keeps using the richer text/html clipboard payload. + */ +export function classifyClipboardSelection( + nodes: { name: string; childCount: number }[], +): { asMarkdown: boolean; wrapBareRows: boolean } { + const listTypes = ["bulletList", "orderedList", "taskList"]; + let topLevelCount = 0; + let hasList = false; + let hasTable = false; + let tableRowCount = 0; + let nonRowCount = 0; + + for (const node of nodes) { + if (listTypes.includes(node.name)) { + hasList = true; + topLevelCount += node.childCount; + nonRowCount++; + } else { + if (node.name === "table") hasTable = true; + if (node.name === "tableRow") tableRowCount++; + else nonRowCount++; + topLevelCount++; + } + } + + // Bare tableRow nodes at the top level only occur for a partial cell + // selection; a slice never mixes bare rows with other block types, so + // "every top-level node is a row" is a safe signal to wrap-and-serialize. + const wrapBareRows = tableRowCount > 0 && nonRowCount === 0; + const asMarkdown = + (hasList && topLevelCount >= 2) || hasTable || wrapBareRows; + return { asMarkdown, wrapBareRows }; +} + /** * Reorder/dedup the footnotes of a SELF-CONTAINED pasted markdown block to the * canonical invariant (the live footnoteSyncPlugin never reorders an existing