Files
gitmost/apps/server/src/collaboration/collaboration.util.ts
T
agent_coder d6d1195abd refactor(#345 step 1): server markdown EXPORT via canonical converter
Move every SERVER ProseMirror->Markdown path off the editor-ext markdown layer
(`htmlToMarkdown`, a second turndown-based converter) onto the canonical
`@docmost/prosemirror-markdown` package.

- `ExportService.exportPage` (page/space markdown export) and
  `collaboration.util.jsonToMarkdown` (used by page.controller's markdown
  responses and the AI public-share chat tool) now serialize DIRECTLY from
  ProseMirror JSON via `convertProseMirrorToMarkdown` — no HTML intermediate, no
  `<colgroup>` scrub (the converter emits GFM tables directly).

This is the SAME serializer the git-sync vault writer feeds, so an exported page
BODY is byte-identical to its vault representation: no more export-md vs vault-md
drift. The HTML export path is unchanged (still `jsonToHtml`).

Emitted markdown moves to the canonical forms: callouts `> [!type]` (not
`:::type`), inline footnotes `^[…]` (not `[^id]`), lossless images
`![alt](src) <!--img {…}-->` (editor-ext dropped width/height/align).

Fixtures-first: export-markdown.spec asserts those canonical forms and the
export==vault-by-construction equality (both call the package converter). The
one deliberate export/vault delta — export prepends the page title as an H1
while the vault carries it in frontmatter — is pinned by a test.

Test infra: declare the `@docmost/prosemirror-markdown` workspace dep; teach
jest to load its ESM build (babel-jest) and stub `@tiptap/react` (server code
imports editor-ext, whose node views reference React renderers only used in a
live browser editor — never on the server).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-05 03:20:25 +03:00

249 lines
6.5 KiB
TypeScript

import { StarterKit } from '@tiptap/starter-kit';
import { TextAlign } from '@tiptap/extension-text-align';
import { Superscript } from '@tiptap/extension-superscript';
import SubScript from '@tiptap/extension-subscript';
import { Typography } from '@tiptap/extension-typography';
import { TextStyle } from '@tiptap/extension-text-style';
import { Color } from '@tiptap/extension-color';
import { Youtube } from '@tiptap/extension-youtube';
import { TaskList, TaskItem } from '@tiptap/extension-list';
import {
Heading,
Callout,
Comment,
CustomCodeBlock,
Details,
DetailsContent,
DetailsSummary,
LinkExtension,
MathBlock,
MathInline,
TableHeader,
TableCell,
TableRow,
CustomTable,
TiptapImage,
TiptapVideo,
TiptapAudio,
TiptapPdf,
PageBreak,
TrailingNode,
Attachment,
Drawio,
Excalidraw,
Embed,
HtmlEmbed,
Mention,
Subpages,
Highlight,
Spoiler,
Indent,
UniqueID,
Columns,
Column,
Status,
addUniqueIdsToDoc,
TransclusionSource,
TransclusionReference,
FootnoteReference,
FootnotesList,
FootnoteDefinition,
PageEmbed,
} from '@docmost/editor-ext';
import { convertProseMirrorToMarkdown } from '@docmost/prosemirror-markdown';
import { generateText, getSchema, JSONContent } from '@tiptap/core';
import { generateHTML, generateJSON } from '../common/helpers/prosemirror/html';
// @tiptap/html library works best for generating prosemirror json state but not HTML
// see: https://github.com/ueberdosis/tiptap/issues/5352
// see:https://github.com/ueberdosis/tiptap/issues/4089
//import { generateJSON } from '@tiptap/html';
import { Node, Schema } from '@tiptap/pm/model';
import * as Y from 'yjs';
import { Logger } from '@nestjs/common';
export const tiptapExtensions = [
StarterKit.configure({
codeBlock: false,
link: false,
trailingNode: false,
heading: false,
}),
Heading,
UniqueID.configure({
types: ['heading', 'paragraph', 'transclusionSource'],
}),
Comment,
TextAlign.configure({ types: ['heading', 'paragraph'] }),
Indent,
TaskList,
TaskItem.configure({
nested: true,
}),
LinkExtension,
Superscript,
SubScript,
Highlight,
Spoiler,
Typography,
TrailingNode,
TextStyle,
Color,
MathInline,
MathBlock,
Details,
DetailsContent,
DetailsSummary,
CustomTable,
TableCell,
TableRow,
TableHeader,
Youtube,
TiptapImage,
TiptapVideo,
TiptapAudio,
TiptapPdf,
PageBreak,
Callout,
Attachment,
CustomCodeBlock,
Drawio,
Excalidraw,
Embed,
// Registered server-side so the node survives schema parsing/serialization.
// Authoring is gated to admins at the document WRITE paths (see
// stripHtmlEmbedNodes usage in persistence/page services), NOT here.
HtmlEmbed,
Mention,
Subpages,
Columns,
Column,
Status,
TransclusionSource,
TransclusionReference,
FootnoteReference,
FootnotesList,
FootnoteDefinition,
PageEmbed,
] as any;
export function jsonToHtml(tiptapJson: any) {
return generateHTML(tiptapJson, tiptapExtensions);
}
export function htmlToJson(html: string) {
const pmJson = generateJSON(html, tiptapExtensions);
try {
return addUniqueIdsToDoc(pmJson, tiptapExtensions);
} catch (error) {
console.warn('failed to add unique ids to doc', error);
return pmJson;
}
}
export function jsonToText(tiptapJson: JSONContent) {
return generateText(tiptapJson, tiptapExtensions);
}
export function jsonToNode(tiptapJson: JSONContent) {
const schema = getSchema(tiptapExtensions);
try {
return Node.fromJSON(schema, tiptapJson);
} catch (error) {
if (
error instanceof RangeError &&
error.message.includes('Unknown node type')
) {
Logger.warn('Stripping unknown node types from document:', error.message);
const cleanedJson = stripUnknownNodes(tiptapJson, schema);
return Node.fromJSON(schema, cleanedJson);
}
throw error;
}
}
export function getPageId(documentName: string) {
return documentName.split('.')[1];
}
export function isEmptyParagraphDoc(tiptapJson: JSONContent): boolean {
if (!tiptapJson || tiptapJson.type !== 'doc') return false;
const content = tiptapJson.content;
if (!Array.isArray(content) || content.length !== 1) return false;
const child = content[0];
if (!child || child.type !== 'paragraph') return false;
return (
!child.content ||
(Array.isArray(child.content) && child.content.length === 0)
);
}
function stripUnknownNodes(
json: JSONContent,
schema: Schema,
): JSONContent | null {
if (!json || typeof json !== 'object') return json;
// Recursively clean children first, flattening any unwrapped content
if (json.content && Array.isArray(json.content)) {
const newContent: JSONContent[] = [];
for (const child of json.content) {
const cleaned = stripUnknownNodes(child, schema);
if (Array.isArray(cleaned)) {
newContent.push(...cleaned);
} else if (cleaned) {
newContent.push(cleaned);
}
}
json.content = newContent;
}
// Check if this node is unknown AFTER processing children
if (json.type && !schema.nodes[json.type]) {
// Unwrap: return cleaned children directly instead of wrapping
return (
json.content && json.content.length > 0 ? json.content : null
) as any;
}
return json;
}
export function prosemirrorNodeToYElement(node: any): Y.XmlElement | Y.XmlText {
if (node.type === 'text') {
const ytext = new Y.XmlText();
ytext.insert(0, node.text || '');
if (node.marks?.length > 0) {
const attrs: Record<string, any> = {};
for (const mark of node.marks) {
attrs[mark.type] = mark.attrs || true;
}
ytext.format(0, node.text?.length || 0, attrs);
}
return ytext;
}
const element = new Y.XmlElement(node.type);
if (node.attrs) {
for (const [key, value] of Object.entries(node.attrs)) {
if (value !== null && value !== undefined) {
element.setAttribute(key, value as any);
}
}
}
if (node.content?.length > 0) {
const children = node.content.map(prosemirrorNodeToYElement);
element.insert(0, children);
}
return element;
}
export function jsonToMarkdown(tiptapJson: any): string {
// Direct ProseMirror JSON -> Markdown via the canonical converter
// (`@docmost/prosemirror-markdown`) — no HTML intermediate, no second
// editor-ext markdown layer. Same serializer as the page/space export and the
// git-sync vault writer, so every server PM->MD path emits identical canonical
// markdown (issue #345).
return convertProseMirrorToMarkdown(tiptapJson);
}