Files
gitmost/apps/client/src/features/editor/extensions/markdown-clipboard.ts
a c4ed4a4855 fix(footnotes): strip bare definitions on rebuild; MCP full-doc + zip-import canonicalize tests (#228)
Review #6 (approve-with-comments) follow-ups:
1. canonicalize step 7 now strips bare footnoteDefinitions at ANY depth
   (stripFootnoteDefinitionsDeep), not just footnotesList, in BOTH copies. A
   definition hand-authored outside a list (e.g. nested in a callout via a
   raw-JSON write path) was left in place while a copy was also added to the
   rebuilt list -> duplicate, idempotent, self-perpetuating. Runs only in the
   rebuild path (after the lists are stripped); the fast-path / placement-keep
   branch is untouched. Added a shared-corpus case (bare def nested in a callout)
   to pin it in both mirrors.
2. markdown-clipboard: removed the dead top-level footnoteReference check in
   canonicalizePastedFootnotes (an inline atom is never a top-level slice child;
   only the descendants scan can find it).

Test coverage:
4. New MCP binding tests (full-doc-write-canonicalize.test.mjs): update_page_json
   and copy_page_content canonicalize the persisted full doc, asserted via a new
   `replacePage` seam (symmetric to the existing `mutatePage` seam) so no live
   collab socket is needed. Routed both writers through the seam.
5. New server spec (file-import-task.service.footnote-canonicalize.spec.ts): the
   zip-import path (processGenericImport) canonicalizes footnotes — real
   markdown->HTML->JSON via a real ImportService over a temp-dir .md file, DB trx
   stubbed to capture the persisted page content. FileImportTaskService had no
   spec before.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-28 01:39:25 +03:00

299 lines
11 KiB
TypeScript

// adapted from: https://github.com/aguingand/tiptap-markdown/blob/main/src/extensions/tiptap/clipboard.js - MIT
import { Extension } from "@tiptap/core";
import { Plugin, PluginKey, TextSelection } from "@tiptap/pm/state";
import { DOMParser, DOMSerializer, Fragment, Slice } from "@tiptap/pm/model";
import { find } from "linkifyjs";
import {
markdownToHtml,
htmlToMarkdown,
canonicalizeFootnotes,
FOOTNOTES_LIST_NAME,
FOOTNOTE_REFERENCE_NAME,
} from "@docmost/editor-ext";
import type { Schema } from "@tiptap/pm/model";
export const MarkdownClipboard = Extension.create({
name: "markdownClipboard",
priority: 101,
addOptions() {
return {
transformPastedText: false,
};
},
addProseMirrorPlugins() {
return [
new Plugin({
key: new PluginKey("markdownClipboard"),
props: {
clipboardTextSerializer: (slice) => {
const listTypes = ["bulletList", "orderedList", "taskList"];
let topLevelCount = 0;
let hasList = false;
slice.content.forEach((node) => {
if (listTypes.includes(node.type.name)) {
hasList = true;
topLevelCount += node.childCount;
} else {
topLevelCount++;
}
});
if (!hasList || topLevelCount < 2) return null;
const div = document.createElement("div");
const serializer = DOMSerializer.fromSchema(this.editor.schema);
const fragment = serializer.serializeFragment(slice.content);
div.appendChild(fragment);
return htmlToMarkdown(div.innerHTML);
},
handlePaste: (view, event, slice) => {
if (!event.clipboardData) {
return false;
}
if (this.editor.isActive("codeBlock")) {
return false;
}
const text = event.clipboardData.getData("text/plain");
const html = event.clipboardData.getData("text/html");
const vscode = event.clipboardData.getData("vscode-editor-data");
const vscodeData = vscode ? JSON.parse(vscode) : undefined;
const language = vscodeData?.mode;
const isVscodeMarkdown = language === "markdown";
const isPlainTextOnly = !html && !vscode && !!text;
if (!isVscodeMarkdown && !isPlainTextOnly) {
return false;
}
if (isPlainTextOnly) {
if ((view as any).input?.shiftKey || !this.options.transformPastedText) {
return false;
}
const link = find(text, {
defaultProtocol: "http",
}).find((item) => item.isLink && item.value === text);
if (link) {
return false;
}
}
const { tr } = view.state;
const { from, to } = view.state.selection;
const parsed = markdownToHtml(text.replace(/\n+$/, ""));
const body = elementFromString(parsed);
normalizeTableColumnWidths(body);
const parsedSlice = DOMParser.fromSchema(
this.editor.schema,
).parseSlice(body, {
preserveWhitespace: true,
});
// A markdown paste builds its ProseMirror fragment directly (DOM ->
// parseSlice), bypassing the editor's footnoteSyncPlugin, which never
// reorders an existing list. So a pasted markdown block whose footnote
// definitions are out of order (or contains orphan defs) would be
// stored out of order. Canonicalize the self-contained pasted block so
// its footnotes come out reference-ordered, deduped and orphan-free
// (issue #228). See canonicalizePastedFootnotes for why this is scoped
// to whole-block pastes that carry their own footnotesList.
const contentNodes = canonicalizePastedFootnotes(
parsedSlice,
this.editor.schema,
);
tr.replaceRange(from, to, contentNodes);
const insertEnd = tr.mapping.map(from, 1);
tr.setSelection(TextSelection.near(tr.doc.resolve(Math.max(from, insertEnd - 2)), -1));
tr.setMeta('paste', true)
view.dispatch(tr);
return true;
},
// Strip trailing whitespace-only paragraphs from pasted content.
// Terminals (GNOME Terminal, etc.) often include trailing
// whitespace in their HTML clipboard data, which ProseMirror
// parses as an extra paragraph. Inside a list item this creates
// an orphan empty line that breaks the list structure.
transformPasted: (slice) => {
let { content, openStart, openEnd } = slice;
// Remove trailing paragraphs that contain only whitespace
while (content.childCount > 1) {
const lastChild = content.lastChild;
if (
lastChild?.type.name === "paragraph" &&
lastChild.textContent.trim() === ""
) {
const children = [];
for (let i = 0; i < content.childCount - 1; i++) {
children.push(content.child(i));
}
content = Fragment.from(children);
} else {
break;
}
}
if (content !== slice.content) {
return new Slice(content, openStart, Math.max(openEnd, 1));
}
return slice;
},
},
}),
];
},
});
/**
* Reorder/dedup the footnotes of a SELF-CONTAINED pasted markdown block to the
* canonical invariant (the live footnoteSyncPlugin never reorders an existing
* list, so an out-of-order pasted block would otherwise persist out of order).
*
* Scoped deliberately to whole-block pastes (openStart/openEnd === 0) that carry
* their OWN footnotesList: canonicalizeFootnotes would synthesize empty
* definitions for any reference lacking a definition, which is correct for a
* standalone block but would be wrong for a reference-only paste that REUSES a
* footnote already defined in the target document — so those are left untouched
* for the paste/sync plugins to merge. Residual: when the pasted block is merged
* into a doc that already has footnotes, ordering RELATIVE to the pre-existing
* footnotes is still governed by the sync plugin (which does not reorder).
*
* Also requires at least one footnoteReference in the selection: a definitions-ONLY
* paste (`[^a]: …` with no `[^a]` reference in the same block) has no references,
* so canonicalizeFootnotes would drop the whole list and the paste would come out
* EMPTY — losing the pasted text. Such a block is left as-is for the sync plugin.
*/
export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice {
if (slice.openStart !== 0 || slice.openEnd !== 0) return slice;
let hasFootnotesList = false;
let hasReference = false;
slice.content.forEach((node) => {
if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true;
// footnoteReference is an inline atom, never a top-level slice child here
// (this function early-returns for open slices, so children are whole
// blocks), so it is only reachable by descending.
node.descendants((child) => {
if (child.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true;
});
});
if (!hasFootnotesList) return slice;
// No reference anywhere -> a definitions-only paste; canonicalizing would strip
// the reference-less list (empty paste). Leave it untouched.
if (!hasReference) return slice;
const content = slice.content.toJSON();
if (!Array.isArray(content)) return slice;
const canonical = canonicalizeFootnotes({ type: "doc", content }) as {
content?: unknown[];
};
const fragment = Fragment.fromJSON(schema, canonical.content ?? []);
return new Slice(fragment, 0, 0);
}
function elementFromString(value) {
// add a wrapper to preserve leading and trailing whitespace
const wrappedValue = `<body>${value}</body>`;
return new window.DOMParser().parseFromString(wrappedValue, "text/html").body;
}
const DEFAULT_PASTE_COL_WIDTH_PX = 150;
function parsePixelWidth(el: Element): number | null {
const attr = el.getAttribute("width");
if (attr) {
const n = parseInt(attr, 10);
if (Number.isFinite(n) && n > 0) return n;
}
const style = el.getAttribute("style") || "";
const m = style.match(/(?:^|;)\s*width\s*:\s*([\d.]+)\s*px/i);
if (m) {
const n = parseInt(m[1], 10);
if (Number.isFinite(n) && n > 0) return n;
}
return null;
}
function getFirstRow(table: Element): Element | null {
const tbodyRow = table.querySelector(":scope > tbody > tr");
if (tbodyRow) return tbodyRow;
const theadRow = table.querySelector(":scope > thead > tr");
if (theadRow) return theadRow;
return table.querySelector(":scope > tr");
}
function deriveColumnWidths(table: Element): (number | null)[] | null {
const cols = table.querySelectorAll(":scope > colgroup > col");
if (cols.length > 0) {
const widths: (number | null)[] = [];
cols.forEach((col) => widths.push(parsePixelWidth(col)));
if (widths.some((w) => w !== null)) return widths;
}
const firstRow = getFirstRow(table);
if (!firstRow) return null;
const widths: (number | null)[] = [];
Array.from(firstRow.children)
.filter((c) => c.tagName === "TD" || c.tagName === "TH")
.forEach((cell) => {
const colspan = parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
const w = parsePixelWidth(cell);
for (let i = 0; i < colspan; i++) {
widths.push(w !== null ? Math.round(w / colspan) : null);
}
});
if (widths.length === 0 || widths.every((w) => w === null)) return null;
return widths;
}
// Mirror of server normalizeTableColumnWidths (see import/utils/table-utils.ts):
// markdown source has no widths, so without this every pasted table renders
// at table-layout:fixed/100% and squashes columns to fit the editor instead of
// letting .tableWrapper's overflow-x: auto scroll.
export function normalizeTableColumnWidths(root: Element): void {
root.querySelectorAll("table").forEach((table) => {
const firstRow = getFirstRow(table);
if (!firstRow) return;
let colWidths = deriveColumnWidths(table);
if (!colWidths) {
let count = 0;
Array.from(firstRow.children)
.filter((c) => c.tagName === "TD" || c.tagName === "TH")
.forEach((cell) => {
count += parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
});
if (count === 0) return;
colWidths = new Array(count).fill(DEFAULT_PASTE_COL_WIDTH_PX);
}
let col = 0;
Array.from(firstRow.children)
.filter((c) => c.tagName === "TD" || c.tagName === "TH")
.forEach((cell) => {
if (cell.getAttribute("colwidth")) {
col += parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
return;
}
const colspan = parseInt(cell.getAttribute("colspan") || "1", 10) || 1;
const slice = colWidths!.slice(col, col + colspan);
col += colspan;
if (slice.length === 0 || slice.every((w) => w === null)) return;
const values = slice.map((w) => (w == null ? 100 : w));
cell.setAttribute("colwidth", values.join(","));
});
});
}