fix(editor): copy tables to clipboard as Markdown, not newline-joined cells #297

Merged
vvzvlad merged 2 commits from fix/table-clipboard-markdown into develop 2026-07-02 22:51:28 +03:00
2 changed files with 245 additions and 12 deletions
@@ -1,5 +1,9 @@
import { describe, it, expect } from "vitest";
import { normalizeTableColumnWidths } from "./markdown-clipboard";
import { htmlToMarkdown } from "@docmost/editor-ext";
import {
normalizeTableColumnWidths,
classifyClipboardSelection,
} from "./markdown-clipboard";
// normalizeTableColumnWidths mutates a DOM subtree (jsdom provides document).
function root(html: string): HTMLElement {
@@ -124,3 +128,171 @@ describe("normalizeTableColumnWidths", () => {
).toEqual([null, null]);
});
});
describe("classifyClipboardSelection", () => {
it("serializes a list of 2+ items as markdown", () => {
expect(
classifyClipboardSelection([{ name: "bulletList", childCount: 2 }]),
).toEqual({ asMarkdown: true, wrapBareRows: false });
});
it("leaves a single-item list as plain text", () => {
expect(
classifyClipboardSelection([{ name: "bulletList", childCount: 1 }]),
).toEqual({ asMarkdown: false, wrapBareRows: false });
});
it("serializes a whole table without wrapping bare rows", () => {
expect(
classifyClipboardSelection([{ name: "table", childCount: 3 }]),
).toEqual({ asMarkdown: true, wrapBareRows: false });
});
it("serializes a partial cell selection (bare rows) and flags wrapping", () => {
expect(
classifyClipboardSelection([
{ name: "tableRow", childCount: 2 },
{ name: "tableRow", childCount: 2 },
]),
).toEqual({ asMarkdown: true, wrapBareRows: true });
});
it("leaves plain paragraphs as plain text", () => {
expect(
classifyClipboardSelection([{ name: "paragraph", childCount: 1 }]),
).toEqual({ asMarkdown: false, wrapBareRows: false });
});
it("does not wrap when rows are mixed with other block types", () => {
expect(
classifyClipboardSelection([
{ name: "tableRow", childCount: 2 },
{ name: "paragraph", childCount: 1 },
]),
).toEqual({ asMarkdown: false, wrapBareRows: false });
});
});
// Output-level tests for the table clipboard regression: copying a table must
// yield a real GFM pipe table, NOT one-value-per-line concatenated cells.
// These exercise the actual markdown produced by htmlToMarkdown (the same
// serializer step the clipboardTextSerializer runs), so they pin the OUTPUT
// shape that the classifier-flag tests above do not cover.
describe("table clipboard markdown output (htmlToMarkdown)", () => {
// Trim each line and drop blanks so structural assertions are whitespace-robust.
function lines(md: string): string[] {
return md
.split("\n")
.map((l) => l.trim())
.filter((l) => l.length > 0);
}
// A GFM separator row like "| --- | --- |" (any number of columns), tolerant
// of the padding turndown emits.
function isSeparatorRow(line: string): boolean {
const compact = line.replace(/\s+/g, "");
return /^\|(?:-{3,}\|)+$/.test(compact);
}
// Split a pipe-delimited row into trimmed cell values.
function cells(line: string): string[] {
return line
.replace(/^\|/, "")
.replace(/\|$/, "")
.split("|")
.map((c) => c.trim());
}
it("serializes a header-less partial cell selection (bare rows) as a valid GFM pipe table", () => {
// Mirror the serializer's `wrapBareRows` branch exactly: bare <tr> nodes are
// wrapped in <table><tbody> and htmlToMarkdown(div.innerHTML) is called.
// See markdown-clipboard.ts clipboardTextSerializer:
// const table = document.createElement("table");
// const tbody = document.createElement("tbody");
// tbody.appendChild(fragment); table.appendChild(tbody);
// div.appendChild(table);
// return htmlToMarkdown(div.innerHTML);
const div = document.createElement("div");
const table = document.createElement("table");
const tbody = document.createElement("tbody");
for (const [c1, c2] of [
["a", "b"],
["c", "d"],
]) {
const tr = document.createElement("tr");
const td1 = document.createElement("td");
td1.textContent = c1;
const td2 = document.createElement("td");
td2.textContent = c2;
tr.appendChild(td1);
tr.appendChild(td2);
tbody.appendChild(tr);
}
table.appendChild(tbody);
div.appendChild(table);
const md = htmlToMarkdown(div.innerHTML);
const ls = lines(md);
// Valid GFM: a header/data separator row is present (an empty header is
// synthesized by the GFM turndown plugin for a header-less table — fine).
expect(ls.some(isSeparatorRow)).toBe(true);
// NOT the old broken "one value per line" shape: every line is pipe-delimited
// and no line is a bare cell value on its own.
expect(ls.every((l) => l.includes("|"))).toBe(true);
expect(md).not.toMatch(/^\s*(a|b|c|d)\s*$/m);
// The cell values land in real pipe-delimited data rows.
const dataRows = ls.filter((l) => !isSeparatorRow(l)).map(cells);
expect(dataRows).toContainEqual(["a", "b"]);
expect(dataRows).toContainEqual(["c", "d"]);
});
it("serializes a whole table with a header row as a proper GFM table (headline regression)", () => {
// Mirror the serializer's non-wrap branch: the full <table> node is appended
// directly (div.appendChild(fragment)) and htmlToMarkdown(div.innerHTML) runs.
const div = document.createElement("div");
const table = document.createElement("table");
const thead = document.createElement("thead");
const headerRow = document.createElement("tr");
for (const h of ["Name", "Age"]) {
const th = document.createElement("th");
th.textContent = h;
headerRow.appendChild(th);
}
thead.appendChild(headerRow);
table.appendChild(thead);
const tbody = document.createElement("tbody");
for (const [name, age] of [
["Alice", "30"],
["Bob", "25"],
]) {
const tr = document.createElement("tr");
const td1 = document.createElement("td");
td1.textContent = name;
const td2 = document.createElement("td");
td2.textContent = age;
tr.appendChild(td1);
tr.appendChild(td2);
tbody.appendChild(tr);
}
table.appendChild(tbody);
div.appendChild(table);
const md = htmlToMarkdown(div.innerHTML);
const ls = lines(md);
// Proper GFM structure: separator row + all rows pipe-delimited.
expect(ls.some(isSeparatorRow)).toBe(true);
expect(ls.every((l) => l.includes("|"))).toBe(true);
const rows = ls.filter((l) => !isSeparatorRow(l)).map(cells);
// Header row comes first, followed by both data rows.
expect(rows[0]).toEqual(["Name", "Age"]);
expect(rows).toContainEqual(["Alice", "30"]);
expect(rows).toContainEqual(["Bob", "25"]);
// Headline regression: the table is NOT concatenated one-value-per-line.
expect(md).not.toMatch(/^\s*(Name|Age|Alice|Bob|30|25)\s*$/m);
});
});
@@ -27,24 +27,36 @@ export const MarkdownClipboard = Extension.create({
key: new PluginKey("markdownClipboard"),
props: {
clipboardTextSerializer: (slice) => {
const listTypes = ["bulletList", "orderedList", "taskList"];
let topLevelCount = 0;
let hasList = false;
const topLevelNodes: { name: string; childCount: number }[] = [];
slice.content.forEach((node) => {
if (listTypes.includes(node.type.name)) {
hasList = true;
topLevelCount += node.childCount;
} else {
topLevelCount++;
}
topLevelNodes.push({
name: node.type.name,
childCount: node.childCount,
});
});
if (!hasList || topLevelCount < 2) return null;
const { asMarkdown, wrapBareRows } =
classifyClipboardSelection(topLevelNodes);
if (!asMarkdown) return null;
const div = document.createElement("div");
const serializer = DOMSerializer.fromSchema(this.editor.schema);
const fragment = serializer.serializeFragment(slice.content);
div.appendChild(fragment);
if (wrapBareRows) {
// A partial table cell-selection serializes to bare <tr> nodes
// (prosemirror-tables returns the whole `table` node only when the
// entire table is selected). Bare <tr> would be foster-parented
// away by the HTML parser inside htmlToMarkdown, so wrap them in
// <table><tbody> first for the GFM turndown rule to detect them.
const table = document.createElement("table");
const tbody = document.createElement("tbody");
tbody.appendChild(fragment);
table.appendChild(tbody);
div.appendChild(table);
} else {
div.appendChild(fragment);
}
return htmlToMarkdown(div.innerHTML);
},
handlePaste: (view, event, slice) => {
@@ -153,6 +165,55 @@ export const MarkdownClipboard = Extension.create({
},
});
/**
* Decide whether a copied slice's plain-text clipboard payload should be
* serialized as Markdown (instead of ProseMirror's default text serializer,
* which joins block leaves with newlines — the "one value per line" bug for
* tables).
*
* Serialize as Markdown for structured content:
* - lists with 2+ total items (a single copied bullet stays literal text);
* - a whole table (top-level `table` node);
* - a partial table cell-selection, which prosemirror-tables copies as bare
* `tableRow` nodes (only a full-table selection yields a `table` node).
*
* `wrapBareRows` flags the bare-rows case so the caller wraps the serialized
* <tr> nodes in <table><tbody> before the HTML->Markdown step. Plain paragraphs
* return asMarkdown=false so a simple text copy stays literal, and internal
* copy/paste keeps using the richer text/html clipboard payload.
*/
export function classifyClipboardSelection(
nodes: { name: string; childCount: number }[],
): { asMarkdown: boolean; wrapBareRows: boolean } {
const listTypes = ["bulletList", "orderedList", "taskList"];
let topLevelCount = 0;
let hasList = false;
let hasTable = false;
let tableRowCount = 0;
let nonRowCount = 0;
for (const node of nodes) {
if (listTypes.includes(node.name)) {
hasList = true;
topLevelCount += node.childCount;
nonRowCount++;
} else {
if (node.name === "table") hasTable = true;
if (node.name === "tableRow") tableRowCount++;
else nonRowCount++;
topLevelCount++;
}
}
// Bare tableRow nodes at the top level only occur for a partial cell
// selection; a slice never mixes bare rows with other block types, so
// "every top-level node is a row" is a safe signal to wrap-and-serialize.
const wrapBareRows = tableRowCount > 0 && nonRowCount === 0;
const asMarkdown =
(hasList && topLevelCount >= 2) || hasTable || wrapBareRows;
return { asMarkdown, wrapBareRows };
}
/**
* Reorder/dedup the footnotes of a SELF-CONTAINED pasted markdown block to the
* canonical invariant (the live footnoteSyncPlugin never reorders an existing