diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts
index 8c17a4f1..b1ea3733 100644
--- a/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts
+++ b/apps/client/src/features/editor/extensions/markdown-clipboard.test.ts
@@ -1,5 +1,9 @@
import { describe, it, expect } from "vitest";
-import { normalizeTableColumnWidths } from "./markdown-clipboard";
+import { htmlToMarkdown } from "@docmost/editor-ext";
+import {
+ normalizeTableColumnWidths,
+ classifyClipboardSelection,
+} from "./markdown-clipboard";
// normalizeTableColumnWidths mutates a DOM subtree (jsdom provides document).
function root(html: string): HTMLElement {
@@ -124,3 +128,171 @@ describe("normalizeTableColumnWidths", () => {
).toEqual([null, null]);
});
});
+
+describe("classifyClipboardSelection", () => {
+ it("serializes a list of 2+ items as markdown", () => {
+ expect(
+ classifyClipboardSelection([{ name: "bulletList", childCount: 2 }]),
+ ).toEqual({ asMarkdown: true, wrapBareRows: false });
+ });
+
+ it("leaves a single-item list as plain text", () => {
+ expect(
+ classifyClipboardSelection([{ name: "bulletList", childCount: 1 }]),
+ ).toEqual({ asMarkdown: false, wrapBareRows: false });
+ });
+
+ it("serializes a whole table without wrapping bare rows", () => {
+ expect(
+ classifyClipboardSelection([{ name: "table", childCount: 3 }]),
+ ).toEqual({ asMarkdown: true, wrapBareRows: false });
+ });
+
+ it("serializes a partial cell selection (bare rows) and flags wrapping", () => {
+ expect(
+ classifyClipboardSelection([
+ { name: "tableRow", childCount: 2 },
+ { name: "tableRow", childCount: 2 },
+ ]),
+ ).toEqual({ asMarkdown: true, wrapBareRows: true });
+ });
+
+ it("leaves plain paragraphs as plain text", () => {
+ expect(
+ classifyClipboardSelection([{ name: "paragraph", childCount: 1 }]),
+ ).toEqual({ asMarkdown: false, wrapBareRows: false });
+ });
+
+ it("does not wrap when rows are mixed with other block types", () => {
+ expect(
+ classifyClipboardSelection([
+ { name: "tableRow", childCount: 2 },
+ { name: "paragraph", childCount: 1 },
+ ]),
+ ).toEqual({ asMarkdown: false, wrapBareRows: false });
+ });
+});
+
+// Output-level tests for the table clipboard regression: copying a table must
+// yield a real GFM pipe table, NOT one-value-per-line concatenated cells.
+// These exercise the actual markdown produced by htmlToMarkdown (the same
+// serializer step the clipboardTextSerializer runs), so they pin the OUTPUT
+// shape that the classifier-flag tests above do not cover.
+describe("table clipboard markdown output (htmlToMarkdown)", () => {
+ // Trim each line and drop blanks so structural assertions are whitespace-robust.
+ function lines(md: string): string[] {
+ return md
+ .split("\n")
+ .map((l) => l.trim())
+ .filter((l) => l.length > 0);
+ }
+
+ // A GFM separator row like "| --- | --- |" (any number of columns), tolerant
+ // of the padding turndown emits.
+ function isSeparatorRow(line: string): boolean {
+ const compact = line.replace(/\s+/g, "");
+ return /^\|(?:-{3,}\|)+$/.test(compact);
+ }
+
+ // Split a pipe-delimited row into trimmed cell values.
+ function cells(line: string): string[] {
+ return line
+ .replace(/^\|/, "")
+ .replace(/\|$/, "")
+ .split("|")
+ .map((c) => c.trim());
+ }
+
+ it("serializes a header-less partial cell selection (bare rows) as a valid GFM pipe table", () => {
+ // Mirror the serializer's `wrapBareRows` branch exactly: bare
nodes are
+ // wrapped in and htmlToMarkdown(div.innerHTML) is called.
+ // See markdown-clipboard.ts clipboardTextSerializer:
+ // const table = document.createElement("table");
+ // const tbody = document.createElement("tbody");
+ // tbody.appendChild(fragment); table.appendChild(tbody);
+ // div.appendChild(table);
+ // return htmlToMarkdown(div.innerHTML);
+ const div = document.createElement("div");
+ const table = document.createElement("table");
+ const tbody = document.createElement("tbody");
+ for (const [c1, c2] of [
+ ["a", "b"],
+ ["c", "d"],
+ ]) {
+ const tr = document.createElement("tr");
+ const td1 = document.createElement("td");
+ td1.textContent = c1;
+ const td2 = document.createElement("td");
+ td2.textContent = c2;
+ tr.appendChild(td1);
+ tr.appendChild(td2);
+ tbody.appendChild(tr);
+ }
+ table.appendChild(tbody);
+ div.appendChild(table);
+
+ const md = htmlToMarkdown(div.innerHTML);
+ const ls = lines(md);
+
+ // Valid GFM: a header/data separator row is present (an empty header is
+ // synthesized by the GFM turndown plugin for a header-less table — fine).
+ expect(ls.some(isSeparatorRow)).toBe(true);
+ // NOT the old broken "one value per line" shape: every line is pipe-delimited
+ // and no line is a bare cell value on its own.
+ expect(ls.every((l) => l.includes("|"))).toBe(true);
+ expect(md).not.toMatch(/^\s*(a|b|c|d)\s*$/m);
+ // The cell values land in real pipe-delimited data rows.
+ const dataRows = ls.filter((l) => !isSeparatorRow(l)).map(cells);
+ expect(dataRows).toContainEqual(["a", "b"]);
+ expect(dataRows).toContainEqual(["c", "d"]);
+ });
+
+ it("serializes a whole table with a header row as a proper GFM table (headline regression)", () => {
+ // Mirror the serializer's non-wrap branch: the full node is appended
+ // directly (div.appendChild(fragment)) and htmlToMarkdown(div.innerHTML) runs.
+ const div = document.createElement("div");
+ const table = document.createElement("table");
+
+ const thead = document.createElement("thead");
+ const headerRow = document.createElement("tr");
+ for (const h of ["Name", "Age"]) {
+ const th = document.createElement("th");
+ th.textContent = h;
+ headerRow.appendChild(th);
+ }
+ thead.appendChild(headerRow);
+ table.appendChild(thead);
+
+ const tbody = document.createElement("tbody");
+ for (const [name, age] of [
+ ["Alice", "30"],
+ ["Bob", "25"],
+ ]) {
+ const tr = document.createElement("tr");
+ const td1 = document.createElement("td");
+ td1.textContent = name;
+ const td2 = document.createElement("td");
+ td2.textContent = age;
+ tr.appendChild(td1);
+ tr.appendChild(td2);
+ tbody.appendChild(tr);
+ }
+ table.appendChild(tbody);
+ div.appendChild(table);
+
+ const md = htmlToMarkdown(div.innerHTML);
+ const ls = lines(md);
+
+ // Proper GFM structure: separator row + all rows pipe-delimited.
+ expect(ls.some(isSeparatorRow)).toBe(true);
+ expect(ls.every((l) => l.includes("|"))).toBe(true);
+
+ const rows = ls.filter((l) => !isSeparatorRow(l)).map(cells);
+ // Header row comes first, followed by both data rows.
+ expect(rows[0]).toEqual(["Name", "Age"]);
+ expect(rows).toContainEqual(["Alice", "30"]);
+ expect(rows).toContainEqual(["Bob", "25"]);
+ // Headline regression: the table is NOT concatenated one-value-per-line.
+ expect(md).not.toMatch(/^\s*(Name|Age|Alice|Bob|30|25)\s*$/m);
+ });
+});
diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.ts
index c8e36a1b..fa387569 100644
--- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts
+++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts
@@ -27,24 +27,36 @@ export const MarkdownClipboard = Extension.create({
key: new PluginKey("markdownClipboard"),
props: {
clipboardTextSerializer: (slice) => {
- const listTypes = ["bulletList", "orderedList", "taskList"];
- let topLevelCount = 0;
- let hasList = false;
+ const topLevelNodes: { name: string; childCount: number }[] = [];
slice.content.forEach((node) => {
- if (listTypes.includes(node.type.name)) {
- hasList = true;
- topLevelCount += node.childCount;
- } else {
- topLevelCount++;
- }
+ topLevelNodes.push({
+ name: node.type.name,
+ childCount: node.childCount,
+ });
});
- if (!hasList || topLevelCount < 2) return null;
+ const { asMarkdown, wrapBareRows } =
+ classifyClipboardSelection(topLevelNodes);
+ if (!asMarkdown) return null;
const div = document.createElement("div");
const serializer = DOMSerializer.fromSchema(this.editor.schema);
const fragment = serializer.serializeFragment(slice.content);
- div.appendChild(fragment);
+
+ if (wrapBareRows) {
+ // A partial table cell-selection serializes to bare nodes
+ // (prosemirror-tables returns the whole `table` node only when the
+ // entire table is selected). Bare
would be foster-parented
+ // away by the HTML parser inside htmlToMarkdown, so wrap them in
+ // first for the GFM turndown rule to detect them.
+ const table = document.createElement("table");
+ const tbody = document.createElement("tbody");
+ tbody.appendChild(fragment);
+ table.appendChild(tbody);
+ div.appendChild(table);
+ } else {
+ div.appendChild(fragment);
+ }
return htmlToMarkdown(div.innerHTML);
},
handlePaste: (view, event, slice) => {
@@ -153,6 +165,55 @@ export const MarkdownClipboard = Extension.create({
},
});
+/**
+ * Decide whether a copied slice's plain-text clipboard payload should be
+ * serialized as Markdown (instead of ProseMirror's default text serializer,
+ * which joins block leaves with newlines — the "one value per line" bug for
+ * tables).
+ *
+ * Serialize as Markdown for structured content:
+ * - lists with 2+ total items (a single copied bullet stays literal text);
+ * - a whole table (top-level `table` node);
+ * - a partial table cell-selection, which prosemirror-tables copies as bare
+ * `tableRow` nodes (only a full-table selection yields a `table` node).
+ *
+ * `wrapBareRows` flags the bare-rows case so the caller wraps the serialized
+ * nodes in before the HTML->Markdown step. Plain paragraphs
+ * return asMarkdown=false so a simple text copy stays literal, and internal
+ * copy/paste keeps using the richer text/html clipboard payload.
+ */
+export function classifyClipboardSelection(
+ nodes: { name: string; childCount: number }[],
+): { asMarkdown: boolean; wrapBareRows: boolean } {
+ const listTypes = ["bulletList", "orderedList", "taskList"];
+ let topLevelCount = 0;
+ let hasList = false;
+ let hasTable = false;
+ let tableRowCount = 0;
+ let nonRowCount = 0;
+
+ for (const node of nodes) {
+ if (listTypes.includes(node.name)) {
+ hasList = true;
+ topLevelCount += node.childCount;
+ nonRowCount++;
+ } else {
+ if (node.name === "table") hasTable = true;
+ if (node.name === "tableRow") tableRowCount++;
+ else nonRowCount++;
+ topLevelCount++;
+ }
+ }
+
+ // Bare tableRow nodes at the top level only occur for a partial cell
+ // selection; a slice never mixes bare rows with other block types, so
+ // "every top-level node is a row" is a safe signal to wrap-and-serialize.
+ const wrapBareRows = tableRowCount > 0 && nonRowCount === 0;
+ const asMarkdown =
+ (hasList && topLevelCount >= 2) || hasTable || wrapBareRows;
+ return { asMarkdown, wrapBareRows };
+}
+
/**
* Reorder/dedup the footnotes of a SELF-CONTAINED pasted markdown block to the
* canonical invariant (the live footnoteSyncPlugin never reorders an existing