fix(editor): copy tables to clipboard as Markdown, not newline-joined cells #297
@@ -1,5 +1,9 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { normalizeTableColumnWidths } from "./markdown-clipboard";
|
||||
import { htmlToMarkdown } from "@docmost/editor-ext";
|
||||
import {
|
||||
normalizeTableColumnWidths,
|
||||
classifyClipboardSelection,
|
||||
} from "./markdown-clipboard";
|
||||
|
||||
// normalizeTableColumnWidths mutates a DOM subtree (jsdom provides document).
|
||||
function root(html: string): HTMLElement {
|
||||
@@ -124,3 +128,171 @@ describe("normalizeTableColumnWidths", () => {
|
||||
).toEqual([null, null]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("classifyClipboardSelection", () => {
|
||||
it("serializes a list of 2+ items as markdown", () => {
|
||||
expect(
|
||||
classifyClipboardSelection([{ name: "bulletList", childCount: 2 }]),
|
||||
).toEqual({ asMarkdown: true, wrapBareRows: false });
|
||||
});
|
||||
|
||||
it("leaves a single-item list as plain text", () => {
|
||||
expect(
|
||||
classifyClipboardSelection([{ name: "bulletList", childCount: 1 }]),
|
||||
).toEqual({ asMarkdown: false, wrapBareRows: false });
|
||||
});
|
||||
|
||||
it("serializes a whole table without wrapping bare rows", () => {
|
||||
expect(
|
||||
classifyClipboardSelection([{ name: "table", childCount: 3 }]),
|
||||
).toEqual({ asMarkdown: true, wrapBareRows: false });
|
||||
});
|
||||
|
||||
it("serializes a partial cell selection (bare rows) and flags wrapping", () => {
|
||||
expect(
|
||||
classifyClipboardSelection([
|
||||
{ name: "tableRow", childCount: 2 },
|
||||
{ name: "tableRow", childCount: 2 },
|
||||
]),
|
||||
).toEqual({ asMarkdown: true, wrapBareRows: true });
|
||||
});
|
||||
|
||||
it("leaves plain paragraphs as plain text", () => {
|
||||
expect(
|
||||
classifyClipboardSelection([{ name: "paragraph", childCount: 1 }]),
|
||||
).toEqual({ asMarkdown: false, wrapBareRows: false });
|
||||
});
|
||||
|
||||
it("does not wrap when rows are mixed with other block types", () => {
|
||||
expect(
|
||||
classifyClipboardSelection([
|
||||
{ name: "tableRow", childCount: 2 },
|
||||
{ name: "paragraph", childCount: 1 },
|
||||
]),
|
||||
).toEqual({ asMarkdown: false, wrapBareRows: false });
|
||||
});
|
||||
});
|
||||
|
||||
// Output-level tests for the table clipboard regression: copying a table must
|
||||
// yield a real GFM pipe table, NOT one-value-per-line concatenated cells.
|
||||
// These exercise the actual markdown produced by htmlToMarkdown (the same
|
||||
// serializer step the clipboardTextSerializer runs), so they pin the OUTPUT
|
||||
// shape that the classifier-flag tests above do not cover.
|
||||
describe("table clipboard markdown output (htmlToMarkdown)", () => {
|
||||
// Trim each line and drop blanks so structural assertions are whitespace-robust.
|
||||
function lines(md: string): string[] {
|
||||
return md
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter((l) => l.length > 0);
|
||||
}
|
||||
|
||||
// A GFM separator row like "| --- | --- |" (any number of columns), tolerant
|
||||
// of the padding turndown emits.
|
||||
function isSeparatorRow(line: string): boolean {
|
||||
const compact = line.replace(/\s+/g, "");
|
||||
return /^\|(?:-{3,}\|)+$/.test(compact);
|
||||
}
|
||||
|
||||
// Split a pipe-delimited row into trimmed cell values.
|
||||
function cells(line: string): string[] {
|
||||
return line
|
||||
.replace(/^\|/, "")
|
||||
.replace(/\|$/, "")
|
||||
.split("|")
|
||||
.map((c) => c.trim());
|
||||
}
|
||||
|
||||
it("serializes a header-less partial cell selection (bare rows) as a valid GFM pipe table", () => {
|
||||
// Mirror the serializer's `wrapBareRows` branch exactly: bare <tr> nodes are
|
||||
// wrapped in <table><tbody> and htmlToMarkdown(div.innerHTML) is called.
|
||||
// See markdown-clipboard.ts clipboardTextSerializer:
|
||||
// const table = document.createElement("table");
|
||||
// const tbody = document.createElement("tbody");
|
||||
// tbody.appendChild(fragment); table.appendChild(tbody);
|
||||
// div.appendChild(table);
|
||||
// return htmlToMarkdown(div.innerHTML);
|
||||
const div = document.createElement("div");
|
||||
const table = document.createElement("table");
|
||||
const tbody = document.createElement("tbody");
|
||||
for (const [c1, c2] of [
|
||||
["a", "b"],
|
||||
["c", "d"],
|
||||
]) {
|
||||
const tr = document.createElement("tr");
|
||||
const td1 = document.createElement("td");
|
||||
td1.textContent = c1;
|
||||
const td2 = document.createElement("td");
|
||||
td2.textContent = c2;
|
||||
tr.appendChild(td1);
|
||||
tr.appendChild(td2);
|
||||
tbody.appendChild(tr);
|
||||
}
|
||||
table.appendChild(tbody);
|
||||
div.appendChild(table);
|
||||
|
||||
const md = htmlToMarkdown(div.innerHTML);
|
||||
const ls = lines(md);
|
||||
|
||||
// Valid GFM: a header/data separator row is present (an empty header is
|
||||
// synthesized by the GFM turndown plugin for a header-less table — fine).
|
||||
expect(ls.some(isSeparatorRow)).toBe(true);
|
||||
// NOT the old broken "one value per line" shape: every line is pipe-delimited
|
||||
// and no line is a bare cell value on its own.
|
||||
expect(ls.every((l) => l.includes("|"))).toBe(true);
|
||||
expect(md).not.toMatch(/^\s*(a|b|c|d)\s*$/m);
|
||||
// The cell values land in real pipe-delimited data rows.
|
||||
const dataRows = ls.filter((l) => !isSeparatorRow(l)).map(cells);
|
||||
expect(dataRows).toContainEqual(["a", "b"]);
|
||||
expect(dataRows).toContainEqual(["c", "d"]);
|
||||
});
|
||||
|
||||
it("serializes a whole table with a header row as a proper GFM table (headline regression)", () => {
|
||||
// Mirror the serializer's non-wrap branch: the full <table> node is appended
|
||||
// directly (div.appendChild(fragment)) and htmlToMarkdown(div.innerHTML) runs.
|
||||
const div = document.createElement("div");
|
||||
const table = document.createElement("table");
|
||||
|
||||
const thead = document.createElement("thead");
|
||||
const headerRow = document.createElement("tr");
|
||||
for (const h of ["Name", "Age"]) {
|
||||
const th = document.createElement("th");
|
||||
th.textContent = h;
|
||||
headerRow.appendChild(th);
|
||||
}
|
||||
thead.appendChild(headerRow);
|
||||
table.appendChild(thead);
|
||||
|
||||
const tbody = document.createElement("tbody");
|
||||
for (const [name, age] of [
|
||||
["Alice", "30"],
|
||||
["Bob", "25"],
|
||||
]) {
|
||||
const tr = document.createElement("tr");
|
||||
const td1 = document.createElement("td");
|
||||
td1.textContent = name;
|
||||
const td2 = document.createElement("td");
|
||||
td2.textContent = age;
|
||||
tr.appendChild(td1);
|
||||
tr.appendChild(td2);
|
||||
tbody.appendChild(tr);
|
||||
}
|
||||
table.appendChild(tbody);
|
||||
div.appendChild(table);
|
||||
|
||||
const md = htmlToMarkdown(div.innerHTML);
|
||||
const ls = lines(md);
|
||||
|
||||
// Proper GFM structure: separator row + all rows pipe-delimited.
|
||||
expect(ls.some(isSeparatorRow)).toBe(true);
|
||||
expect(ls.every((l) => l.includes("|"))).toBe(true);
|
||||
|
||||
const rows = ls.filter((l) => !isSeparatorRow(l)).map(cells);
|
||||
// Header row comes first, followed by both data rows.
|
||||
expect(rows[0]).toEqual(["Name", "Age"]);
|
||||
expect(rows).toContainEqual(["Alice", "30"]);
|
||||
expect(rows).toContainEqual(["Bob", "25"]);
|
||||
// Headline regression: the table is NOT concatenated one-value-per-line.
|
||||
expect(md).not.toMatch(/^\s*(Name|Age|Alice|Bob|30|25)\s*$/m);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -27,24 +27,36 @@ export const MarkdownClipboard = Extension.create({
|
||||
key: new PluginKey("markdownClipboard"),
|
||||
props: {
|
||||
clipboardTextSerializer: (slice) => {
|
||||
const listTypes = ["bulletList", "orderedList", "taskList"];
|
||||
let topLevelCount = 0;
|
||||
let hasList = false;
|
||||
const topLevelNodes: { name: string; childCount: number }[] = [];
|
||||
slice.content.forEach((node) => {
|
||||
if (listTypes.includes(node.type.name)) {
|
||||
hasList = true;
|
||||
topLevelCount += node.childCount;
|
||||
} else {
|
||||
topLevelCount++;
|
||||
}
|
||||
topLevelNodes.push({
|
||||
name: node.type.name,
|
||||
childCount: node.childCount,
|
||||
});
|
||||
});
|
||||
|
||||
if (!hasList || topLevelCount < 2) return null;
|
||||
const { asMarkdown, wrapBareRows } =
|
||||
classifyClipboardSelection(topLevelNodes);
|
||||
if (!asMarkdown) return null;
|
||||
|
||||
const div = document.createElement("div");
|
||||
const serializer = DOMSerializer.fromSchema(this.editor.schema);
|
||||
const fragment = serializer.serializeFragment(slice.content);
|
||||
div.appendChild(fragment);
|
||||
|
||||
if (wrapBareRows) {
|
||||
// A partial table cell-selection serializes to bare <tr> nodes
|
||||
// (prosemirror-tables returns the whole `table` node only when the
|
||||
// entire table is selected). Bare <tr> would be foster-parented
|
||||
// away by the HTML parser inside htmlToMarkdown, so wrap them in
|
||||
// <table><tbody> first for the GFM turndown rule to detect them.
|
||||
const table = document.createElement("table");
|
||||
const tbody = document.createElement("tbody");
|
||||
tbody.appendChild(fragment);
|
||||
table.appendChild(tbody);
|
||||
div.appendChild(table);
|
||||
} else {
|
||||
div.appendChild(fragment);
|
||||
}
|
||||
return htmlToMarkdown(div.innerHTML);
|
||||
},
|
||||
handlePaste: (view, event, slice) => {
|
||||
@@ -153,6 +165,55 @@ export const MarkdownClipboard = Extension.create({
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Decide whether a copied slice's plain-text clipboard payload should be
|
||||
* serialized as Markdown (instead of ProseMirror's default text serializer,
|
||||
* which joins block leaves with newlines — the "one value per line" bug for
|
||||
* tables).
|
||||
*
|
||||
* Serialize as Markdown for structured content:
|
||||
* - lists with 2+ total items (a single copied bullet stays literal text);
|
||||
* - a whole table (top-level `table` node);
|
||||
* - a partial table cell-selection, which prosemirror-tables copies as bare
|
||||
* `tableRow` nodes (only a full-table selection yields a `table` node).
|
||||
*
|
||||
* `wrapBareRows` flags the bare-rows case so the caller wraps the serialized
|
||||
* <tr> nodes in <table><tbody> before the HTML->Markdown step. Plain paragraphs
|
||||
* return asMarkdown=false so a simple text copy stays literal, and internal
|
||||
* copy/paste keeps using the richer text/html clipboard payload.
|
||||
*/
|
||||
export function classifyClipboardSelection(
|
||||
nodes: { name: string; childCount: number }[],
|
||||
): { asMarkdown: boolean; wrapBareRows: boolean } {
|
||||
const listTypes = ["bulletList", "orderedList", "taskList"];
|
||||
let topLevelCount = 0;
|
||||
let hasList = false;
|
||||
let hasTable = false;
|
||||
let tableRowCount = 0;
|
||||
let nonRowCount = 0;
|
||||
|
||||
for (const node of nodes) {
|
||||
if (listTypes.includes(node.name)) {
|
||||
hasList = true;
|
||||
topLevelCount += node.childCount;
|
||||
nonRowCount++;
|
||||
} else {
|
||||
if (node.name === "table") hasTable = true;
|
||||
if (node.name === "tableRow") tableRowCount++;
|
||||
else nonRowCount++;
|
||||
topLevelCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Bare tableRow nodes at the top level only occur for a partial cell
|
||||
// selection; a slice never mixes bare rows with other block types, so
|
||||
// "every top-level node is a row" is a safe signal to wrap-and-serialize.
|
||||
const wrapBareRows = tableRowCount > 0 && nonRowCount === 0;
|
||||
const asMarkdown =
|
||||
(hasList && topLevelCount >= 2) || hasTable || wrapBareRows;
|
||||
return { asMarkdown, wrapBareRows };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorder/dedup the footnotes of a SELF-CONTAINED pasted markdown block to the
|
||||
* canonical invariant (the live footnoteSyncPlugin never reorders an existing
|
||||
|
||||
Reference in New Issue
Block a user