diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1d9ca3ad..7ea47b94 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,6 +72,14 @@ jobs: - name: Build editor-ext run: pnpm --filter @docmost/editor-ext build + # @docmost/prosemirror-markdown is the shared converter (#293/#326); its + # build/ is gitignored, and plain `pnpm -r test` does NOT honour nx + # `dependsOn: ^build`, so its consumers (mcp `pretest: tsc`, git-sync vitest + # typecheck) fail with TS2307 Cannot find module '@docmost/prosemirror-markdown' + # unless it is built first. Build it before the recursive test run. + - name: Build prosemirror-markdown + run: pnpm --filter @docmost/prosemirror-markdown build + - name: Run unit tests run: pnpm -r test diff --git a/.gitignore b/.gitignore index bbc6abc1..0f97bb35 100644 --- a/.gitignore +++ b/.gitignore @@ -4,12 +4,21 @@ data # compiled output /dist -node_modules/ +node_modules # git-sync compiled output (built in CI/Docker via `pnpm build`, never committed, # so src/ and prod can never silently diverge). packages/git-sync/build/ +# prosemirror-markdown compiled output (built in CI/Docker via `pnpm build`, +# never committed, so src/ and prod can never silently diverge). +packages/prosemirror-markdown/build/ + +# mcp compiled output (built in CI/Docker via `pnpm build`, never committed, so +# src/ and prod can never silently diverge). Matches the git-sync/prosemirror- +# markdown convention; the package is private and rebuilt at deploy. +packages/mcp/build/ + # Logs logs *.log diff --git a/AGENTS.md b/AGENTS.md index 1a13dea6..9bedbc39 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -200,7 +200,8 @@ pnpm workspace (`pnpm@10.4.0`) orchestrated by **Nx**. Four workspace packages: | `apps/server` | `server` | NestJS 11 + Fastify, Kysely (Postgres), Redis | Backend API, collaboration, AI | | `apps/client` | `client` | React 18 + Vite + Mantine 8 + TanStack Query + Jotai | SPA frontend | | `packages/editor-ext` | `@docmost/editor-ext` | Tiptap/ProseMirror | Shared Tiptap node/mark extensions, imported by both the client and the server | -| `packages/mcp` | `@docmost/mcp` | MCP SDK, Tiptap, Yjs | Standalone MCP server, also bundled into the server at `/mcp`. Does **not** import `editor-ext` — it keeps its own vendored mirror of the schema in `packages/mcp/src/lib/` | +| `packages/mcp` | `@docmost/mcp` | MCP SDK, Tiptap, Yjs | Standalone MCP server, also bundled into the server at `/mcp`. Consumes the shared converter/schema from `@docmost/prosemirror-markdown` (#293) — it no longer carries its own vendored converter/schema copy | +| `packages/prosemirror-markdown` | `@docmost/prosemirror-markdown` | Tiptap, marked, jsdom | The single, canonical ProseMirror↔Markdown converter + Docmost schema mirror (#293). Consumed by `mcp` and `git-sync`; there is exactly ONE copy of the converter now | `build` targets are Nx-cached and dependency-ordered (`dependsOn: ["^build"]`), so `editor-ext` builds before the apps. `nx.json` sets `affected.defaultBase: main`. @@ -282,7 +283,7 @@ The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes ### Client structure Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirrors the server domains: `page`, `space`, `comment`, `ai-chat`, `editor`, …). Conventions: - **TanStack Query** for server state (one `queries/` file per feature), **Jotai** atoms for local/shared UI state, **Mantine 8** + CSS modules (`*.module.css`) + `postcss-preset-mantine` for UI. -- The editor is Tiptap; shared node/mark extensions live in `packages/editor-ext` and are imported by **both the client and the server** (collaboration, import/export) — editor schema changes often need to be made in `editor-ext`, not just the client. Note `packages/mcp` does *not* depend on `editor-ext`; it carries its own mirrored copy of the schema, so keep the two in sync manually when the document schema changes. +- The editor is Tiptap; shared node/mark extensions live in `packages/editor-ext` and are imported by **both the client and the server** (collaboration, import/export) — editor schema changes often need to be made in `editor-ext`, not just the client. The ProseMirror↔Markdown converter and its Docmost schema mirror now live in a SINGLE package, `@docmost/prosemirror-markdown` (#293), consumed by both `mcp` and `git-sync` — do NOT reintroduce a per-package copy. `editor-ext` is the upstream source of the Tiptap schema; the package's `docmost-schema.ts` mirrors it and a serializer-contract test (`packages/prosemirror-markdown/test/serializer-contract.test.ts`) guards the boundary (every schema node must have a converter case), so a drift surfaces as a failing test rather than silent divergence. - API access goes through `apps/client/src/lib/api-client.ts` (axios). The `@` alias maps to `apps/client/src`. - Runtime config is injected at build time by `vite.config.ts` via `define` (`APP_URL`, `COLLAB_URL`, `APP_VERSION`, …) — these come from the root `.env`, not from `import.meta.env`. @@ -293,7 +294,7 @@ Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirro - The version string shown in the UI comes from `APP_VERSION` (CI/Docker) or `git describe --tags --always` (local), resolved in `vite.config.ts` — not from `package.json`. - Server TS config is permissive (`noImplicitAny: false`, `strictNullChecks: false`, `no-explicit-any` lint disabled). Follow the existing relaxed style rather than tightening types broadly. - Dependency versions are heavily pinned via `pnpm.overrides` and `pnpm.patchedDependencies` (`scimmy`, `yjs`) in the root `package.json`. Don't bump pinned/patched deps casually; the patches and overrides exist for compatibility/security reasons. -- **Adding/renaming/removing an MCP tool requires updating `SERVER_INSTRUCTIONS`** in `packages/mcp/src/index.ts` — the intent-routing guide MCP clients receive on initialize. This applies both to inline `server.registerTool(...)` calls in `index.ts` and to specs in `packages/mcp/src/tool-specs.ts`. Enforced by `packages/mcp/test/unit/server-instructions.test.mjs`, which fails when a registered tool is not mentioned in the guide (deliberate opt-outs go into its `EXCEPTIONS` list). Remember `packages/mcp/build/` is committed — rebuild after editing. +- **Adding/renaming/removing an MCP tool requires updating `SERVER_INSTRUCTIONS`** in `packages/mcp/src/index.ts` — the intent-routing guide MCP clients receive on initialize. This applies both to inline `server.registerTool(...)` calls in `index.ts` and to specs in `packages/mcp/src/tool-specs.ts`. Enforced by `packages/mcp/test/unit/server-instructions.test.mjs`, which fails when a registered tool is not mentioned in the guide (deliberate opt-outs go into its `EXCEPTIONS` list). `packages/mcp/build/` is gitignored and rebuilt in CI/Docker via `pnpm build` (same convention as `git-sync`/`prosemirror-markdown`) — never commit it; rebuild locally after editing to run the tests. ## CI / release diff --git a/Dockerfile b/Dockerfile index e6daeb72..42f5a267 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,6 +38,14 @@ COPY --from=builder /app/packages/editor-ext/dist /app/packages/editor-ext/dist COPY --from=builder /app/packages/editor-ext/package.json /app/packages/editor-ext/package.json COPY --from=builder /app/packages/mcp/build /app/packages/mcp/build COPY --from=builder /app/packages/mcp/package.json /app/packages/mcp/package.json +# mcp now depends on @docmost/prosemirror-markdown (workspace:*) and eager-imports +# it at runtime (the in-app ai-chat DocmostClient loads build/index.js -> lib/ +# markdown-converter.js). Ship the built package + its manifest, or the prod +# install resolves a broken workspace symlink and every ai-chat tool dies with +# ERR_MODULE_NOT_FOUND (#293/#326 step 5). (git-sync has no runtime consumer yet; +# revisit at step 6 when #119 lands.) +COPY --from=builder /app/packages/prosemirror-markdown/build /app/packages/prosemirror-markdown/build +COPY --from=builder /app/packages/prosemirror-markdown/package.json /app/packages/prosemirror-markdown/package.json # Copy root package files COPY --from=builder /app/package.json /app/package.json diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts index 82f2ecb6..cc0c48fa 100644 --- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts +++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts @@ -659,13 +659,21 @@ export class AiChatToolsService { listComments: tool({ description: - 'List ALL comments on a page in one call, including RESOLVED ' + - 'threads — filter by resolvedAt when you need only open ones. ' + - 'Content is returned as Markdown.', + 'List comments on a page in one call. By DEFAULT only ACTIVE ' + + 'threads are returned; resolved threads (a resolved top-level ' + + 'comment and all its replies) are hidden and their count reported ' + + 'as `resolvedThreadsHidden` so you can re-query with ' + + '`includeResolved: true` to see everything. Returns ' + + '`{ items, resolvedThreadsHidden }`. Content is returned as Markdown.', inputSchema: modelFriendlyInput({ pageId: z.string().describe('The id of the page.'), + includeResolved: z + .boolean() + .optional() + .describe('default only active threads; true — include resolved'), }), - execute: async ({ pageId }) => await client.listComments(pageId), + execute: async ({ pageId, includeResolved }) => + await client.listComments(pageId, includeResolved), }), getComment: tool({ diff --git a/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts b/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts index 42bbd097..8e6ee0c2 100644 --- a/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts +++ b/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts @@ -61,7 +61,12 @@ export interface DocmostClientLike { opts?: { regex?: boolean; caseSensitive?: boolean; limit?: number }, ): Promise>; getTable(pageId: string, tableRef: string): Promise>; - listComments(pageId: string): Promise; + // Returns `{ items, resolvedThreadsHidden }`. DEFAULT (includeResolved unset/ + // false) hides resolved threads wholesale; pass true for the full feed. + listComments( + pageId: string, + includeResolved?: boolean, + ): Promise<{ items: unknown[]; resolvedThreadsHidden: number }>; getComment( commentId: string, ): Promise<{ data: Record; success: boolean }>; diff --git a/packages/git-sync/package.json b/packages/git-sync/package.json index 96765ffa..fe37b436 100644 --- a/packages/git-sync/package.json +++ b/packages/git-sync/package.json @@ -20,6 +20,7 @@ }, "license": "MIT", "dependencies": { + "@docmost/prosemirror-markdown": "workspace:*", "@tiptap/core": "3.20.4", "@tiptap/extension-highlight": "3.20.4", "@tiptap/extension-image": "3.20.4", diff --git a/packages/git-sync/src/engine/pull.ts b/packages/git-sync/src/engine/pull.ts index b541c67a..3d7868d5 100644 --- a/packages/git-sync/src/engine/pull.ts +++ b/packages/git-sync/src/engine/pull.ts @@ -31,7 +31,7 @@ */ import { dirname } from "node:path"; import { sep } from "node:path"; -import { parsePageFile, serializePageFile } from "../lib/page-file.js"; +import { parsePageFile, serializePageFile } from "@docmost/prosemirror-markdown"; import type { GitSyncClient } from "./client.types.js"; import { buildVaultLayout, type PageNode } from "./layout.js"; import { diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts index 63d28530..903931be 100644 --- a/packages/git-sync/src/engine/push.ts +++ b/packages/git-sync/src/engine/push.ts @@ -26,8 +26,11 @@ * the gitmost server drives the engine in-process (there is no standalone CLI * entry point). */ -import { type DocmostMdMeta } from "../lib/index.js"; -import { parsePageFile, serializePageFile } from "../lib/page-file.js"; +import { + type DocmostMdMeta, + parsePageFile, + serializePageFile, +} from "@docmost/prosemirror-markdown"; import type { GitSyncClient } from "./client.types.js"; import type { DiffEntry } from "./git.js"; import { VaultGit, DEFAULT_BRANCH } from "./git.js"; diff --git a/packages/git-sync/src/engine/stabilize.ts b/packages/git-sync/src/engine/stabilize.ts index a075b634..ce1acdcf 100644 --- a/packages/git-sync/src/engine/stabilize.ts +++ b/packages/git-sync/src/engine/stabilize.ts @@ -17,7 +17,7 @@ import { markdownToProseMirror, serializeDocmostMarkdownBody, type DocmostMdMeta, -} from "../lib/index.js"; +} from "@docmost/prosemirror-markdown"; /** * Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte diff --git a/packages/git-sync/src/index.ts b/packages/git-sync/src/index.ts index a52ca8d3..8c9e87eb 100644 --- a/packages/git-sync/src/index.ts +++ b/packages/git-sync/src/index.ts @@ -8,6 +8,10 @@ */ // Pure converter (markdown <-> ProseMirror, file envelope, canonicalization). +// Re-exported from the standalone `@docmost/prosemirror-markdown` package, +// which is the single source of truth for the converter core; git-sync keeps +// only the engine (vault/git/orchestrator) and re-surfaces the converter for +// in-process consumers of the git-sync barrel. export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, @@ -16,8 +20,8 @@ export { markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, -} from "./lib/index.js"; -export type { DocmostMdMeta } from "./lib/index.js"; +} from "@docmost/prosemirror-markdown"; +export type { DocmostMdMeta } from "@docmost/prosemirror-markdown"; // Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize, // loop-guard body hash. @@ -123,4 +127,4 @@ export { } from "./engine/path-guard.js"; export type { PathGuardIo, VaultPathUnsafeReason } from "./engine/path-guard.js"; -export { parsePageFile, serializePageFile } from "./lib/page-file.js"; +export { parsePageFile, serializePageFile } from "@docmost/prosemirror-markdown"; diff --git a/packages/git-sync/src/lib/markdown-converter.ts b/packages/git-sync/src/lib/markdown-converter.ts deleted file mode 100644 index 013a54f3..00000000 --- a/packages/git-sync/src/lib/markdown-converter.ts +++ /dev/null @@ -1,1130 +0,0 @@ -import { encodeHtmlEmbedSource } from "./docmost-schema.js"; - -/** - * Hard cap on processNode recursion depth (see the depth guard below). - * - * Chosen well above any realistic document (the deepest legitimate nesting the - * editor can produce is far shallower) yet far below the point where the - * converter's own call stack overflows. The heaviest shape (deeply nested - * lists) costs ~5 JS frames per level and the runtime stack holds ~10k frames, - * so the measured overflow is around level ~650 (deeply nested lists); 400 - * leaves a comfortable margin while still rendering pathological-but-bounded - * docs in full (the 200-level stress fixture reaches depth ~204). - */ -const MAX_NODE_DEPTH = 400; - -/** - * Convert ProseMirror/TipTap JSON content to Markdown - * Supports all Docmost-specific node types and extensions - */ -export function convertProseMirrorToMarkdown(content: any): string { - if (!content || !content.content) return ""; - - // Escape a value interpolated into an HTML double-quoted attribute value - // (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the - // ATTRIBUTE context only the quote that delimits the value and the ampersand - // that starts an entity are special, so we escape ONLY & " (and ' for safety - // when single-quoted delimiters are used). We deliberately do NOT escape < or - // >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode - // </> back inside attribute values, so escaping them would corrupt the - // stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on - // every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & " - // keeps the value inert against attribute-injection while staying idempotent. - // NOTE: escape ONLY & and " here. The value is always wrapped in double - // quotes, so " is the only delimiter; ' is NOT special in a double-quoted - // value, and parse5 does not decode ' back inside attribute values, so - // escaping ' would (like < >) corrupt the value and accumulate & on every - // round-trip. Escaping & and " is idempotent (parse5 decodes them back). - const escapeAttr = (value: unknown): string => - String(value) - .replace(/&/g, "&") - .replace(/"/g, """); - - // Escape a value placed as HTML element TEXT content (between tags), where - // <, >, and & are all significant. Used for text rendered inside raw-HTML - // blocks (table cells / columns) so stored characters cannot inject markup. - const escapeHtmlText = (value: unknown): string => - String(value) - .replace(/&/g, "&") - .replace(//g, ">"); - - // Percent-encode characters that would break out of a markdown URL target - // (...) — whitespace/newlines and parentheses — so a stored src stays a - // single inert token (used for image/video/youtube srcs). - const encodeMdUrl = (value: unknown): string => - String(value || "") - .replace(/\s/g, (c: string) => (c === " " ? "%20" : encodeURIComponent(c))) - .replace(/\(/g, "%28") - .replace(/\)/g, "%29"); - - // Recursion depth guard. processNode is mutually recursive (directly and via - // processListItem/processTaskItem/blockToHtml), and a pathologically nested - // document (e.g. tens of thousands of nested blockquotes) would otherwise - // overflow the call stack and throw a RangeError, which would abort the sync - // and prevent the page from ever being written. We track the live nesting - // depth in a closure counter (the wrapper below) so we NEVER throw: past the - // limit we stop recursing and emit the node's own text (or nothing) instead. - // Normal documents never approach MAX_NODE_DEPTH, so their output is byte- - // identical. NOTE: the wrapper signature is (node) only — several callers use - // `.map(processNode)`, which would otherwise pass the array index as a second - // argument; the wrapper ignores extra arguments so that is harmless. - let nodeDepth = 0; - - // A table cell whose content is NOT a single plain paragraph — a list, code - // block, blockquote, multiple paragraphs, etc. A GFM pipe cell can only hold - // inline content on one line, so such a cell must force the HTML form - // or its structure is flattened/lost on round trip (review #8). - const cellIsMultiBlock = (cell: any): boolean => { - const blocks = cell.content || []; - if (blocks.length > 1) return true; - const only = blocks[0]; - return only != null && only.type !== "paragraph"; - }; - - // Render a whole table as raw HTML `
` (round-trips via the schema's - // table-family parseHTML). Used when a GFM pipe table would be wrong: merged - // cells (colspan/rowspan), multi-block cells (#8), OR the table sits inside a - // raw-HTML container like a column (marked does not parse markdown inside raw - // HTML, so a GFM pipe table there becomes literal "| a | b |" text — #7). - // `blockToHtml` is referenced lazily (defined below; only called at runtime). - const tableToHtml = (tableRows: any[]): string => { - const renderHtmlCell = (cell: any): string => { - const tag = cell.type === "tableHeader" ? "th" : "td"; - const a = cell.attrs || {}; - const cellParts: string[] = []; - if ((a.colspan ?? 1) > 1) - cellParts.push(`colspan="${escapeAttr(a.colspan)}"`); - if ((a.rowspan ?? 1) > 1) - cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`); - if (a.align) cellParts.push(`align="${escapeAttr(a.align)}"`); - const open = cellParts.length - ? `<${tag} ${cellParts.join(" ")}>` - : `<${tag}>`; - const inner = (cell.content || []) - .map((block: any) => blockToHtml(block)) - .join(""); - return `${open}${inner}`; - }; - const htmlRows = tableRows - .map( - (row: any) => - `${(row.content || []).map(renderHtmlCell).join("")}`, - ) - .join(""); - return `
${htmlRows}
`; - }; - - const processNode = (node: any): string => { - if (nodeDepth >= MAX_NODE_DEPTH) { - // Bail out of deeper recursion without throwing. A text node still has - // its own content worth keeping; a container at the limit collapses to - // "" (its already-too-deep subtree is dropped) rather than overflowing. - return typeof node?.text === "string" ? node.text : ""; - } - nodeDepth++; - try { - return processNodeInner(node); - } finally { - nodeDepth--; - } - }; - - const processNodeInner = (node: any): string => { - const type = node.type; - const nodeContent = node.content || []; - - switch (type) { - case "doc": - return nodeContent.map(processNode).join("\n\n"); - - case "paragraph": - const text = nodeContent.map(processNode).join(""); - const align = node.attrs?.textAlign; - if (align && align !== "left") { - // Emit alignment as a styled `

` (review #10). The old - // `

` had NO matching import parse rule — the div was - // unwrapped and alignment lost on every round trip. A styled `

` - // round-trips: the paragraph parse rule (tag:"p") matches and the - // textAlign global-attribute parseHTML (docmost-schema) reads the style. - return `

${text}

`; - } - return text || ""; - - case "heading": - const level = node.attrs?.level || 1; - const headingText = nodeContent.map(processNode).join(""); - const headingAlign = node.attrs?.textAlign; - if (headingAlign && headingAlign !== "left") { - // Emit alignment as a styled `` so it round-trips losslessly, - // symmetric to the paragraph case above (review F5/A1). The bare - // `## text` markdown form carries NO alignment, so an aligned heading - // would silently drop textAlign on export. A styled `` re-parses: - // the heading parse rule (tag:"h1".."h6") matches and the textAlign - // global-attribute parseHTML (docmost-schema) reads the style back, - // preserving BOTH level and textAlign. escapeAttr keeps the align - // value injection-safe, exactly like the paragraph arm. - return `${headingText}`; - } - // No alignment (or the default "left"): keep the plain `## text` - // markdown form — HTML-ifying an unaligned heading would be needless - // churn, exactly as the paragraph case keeps plain text when unaligned. - return "#".repeat(level) + " " + headingText; - - case "text": - let textContent = node.text || ""; - // Apply marks (bold, italic, code, etc.) - if (node.marks) { - // The schema's `code` mark declares `excludes: "_"` — it excludes every - // other inline mark — so the editor can NEVER produce a text run that - // carries `code` together with another mark, and on import any - // co-occurring mark is always dropped (the run comes back as code-only). - // The lossless, byte-stable behavior is therefore: when a run has the - // `code` mark, emit ONLY the backtick code span and ignore every other - // mark, so md1 is already code-only and md2 === md1. Runs WITHOUT a code - // mark are rendered exactly as before. - const markTypes = node.marks.map((m: any) => m.type); - const hasCode = markTypes.includes("code"); - if (hasCode) { - textContent = `\`${textContent}\``; - return textContent; - } - const codeCombined = false; - for (const mark of node.marks) { - switch (mark.type) { - case "bold": - textContent = codeCombined - ? `${textContent}` - : `**${textContent}**`; - break; - case "italic": - textContent = codeCombined - ? `${textContent}` - : `*${textContent}*`; - break; - case "code": - // When combined with another mark, wrap as so the - // surrounding HTML marks can nest around it; otherwise use the - // plain backtick span. - textContent = codeCombined - ? `${textContent}` - : `\`${textContent}\``; - break; - case "link": { - const href = mark.attrs?.href || ""; - const title = mark.attrs?.title; - if (codeCombined) { - // Emit an HTML anchor so it can wrap the nested . - const safeHref = escapeAttr(href); - if (title) { - textContent = `${textContent}`; - } else { - textContent = `${textContent}`; - } - } else if (title) { - // Emit the optional markdown link title; escape an embedded - // double-quote so it cannot terminate the title string early. - const safeTitle = String(title).replace(/"/g, '\\"'); - textContent = `[${textContent}](${href} "${safeTitle}")`; - } else { - textContent = `[${textContent}](${href})`; - } - break; - } - case "strike": - textContent = codeCombined - ? `${textContent}` - : `~~${textContent}~~`; - break; - case "underline": - textContent = `${textContent}`; - break; - case "subscript": - textContent = `${textContent}`; - break; - case "superscript": - textContent = `${textContent}`; - break; - case "highlight": { - // Preserve a null/empty color as a plain highlight (a bare - // with no background-color); only emit the style when a - // color is actually set, so a plain highlight is not forced to - // yellow on export. - const color = mark.attrs?.color; - textContent = color - ? `${textContent}` - : `${textContent}`; - break; - } - case "textStyle": - if (mark.attrs?.color) { - textContent = `${textContent}`; - } - break; - case "spoiler": - // Markdown has no native spoiler syntax, so emit the same raw - // inline HTML the editor-ext/MCP stack uses. The schema's Spoiler - // mark parses span[data-spoiler] back on import, so the mark - // survives the PM -> MD -> PM round-trip. - textContent = `${textContent}`; - break; - case "comment": { - // Emit the inline comment anchor so highlights round-trip. The - // schema's Comment mark parses span[data-comment-id] (attrs - // commentId/resolved). - const cid = mark.attrs?.commentId; - if (cid) { - const resolvedAttr = mark.attrs?.resolved - ? ` data-resolved="true"` - : ""; - textContent = `${textContent}`; - } - break; - } - } - } - } - return textContent; - - case "codeBlock": - const language = node.attrs?.language || ""; - // Strip ALL trailing newlines so the export is idempotent: marked - // re-adds exactly one trailing "\n" on import, so trimming only one - // here would let the text grow by "\n" on each round-trip. Removing - // every trailing newline makes repeated cycles stable. - const code = nodeContent - .map(processNode) - .join("") - .replace(/\n+$/, ""); - // CommonMark: an inner ``` run inside the code would prematurely close - // a 3-backtick fence (corrupting the block on re-import). Use an outer - // fence one backtick longer than the longest backtick run in the code - // (minimum 3) so the inner fence is always content. - const longestBacktickRun = (code.match(/`+/g) || []).reduce( - (max: number, run: string) => Math.max(max, run.length), - 0, - ); - const fence = "`".repeat(Math.max(3, longestBacktickRun + 1)); - return fence + language + "\n" + code + "\n" + fence; - - case "bulletList": - return nodeContent - .map((item: any) => processListItem(item, "-")) - .join("\n"); - - case "orderedList": - return nodeContent - .map((item: any, index: number) => - processListItem(item, `${index + 1}.`), - ) - .join("\n"); - - case "taskList": - return nodeContent.map((item: any) => processTaskItem(item)).join("\n"); - - case "taskItem": - // Delegate to the same helper used by taskList so multi-block and - // nested task items render and indent consistently. - return processTaskItem(node); - - case "listItem": - return nodeContent.map(processNode).join("\n"); - - case "blockquote": - // Prefix EVERY line of EVERY child with "> " and separate block-level - // children with a blank ">" line so code blocks / multi-paragraph - // quotes round-trip correctly. - return nodeContent - .map((n: any) => - processNode(n) - .split("\n") - .map((line: string) => (line.length ? `> ${line}` : ">")) - .join("\n"), - ) - .join("\n>\n"); - - case "horizontalRule": - return "---"; - - case "hardBreak": - // Two trailing spaces before the newline encode a markdown hard break; - // a bare "\n" would be reimported as a soft break and lost. - return " \n"; - - case "image": { - const imgAttrs = node.attrs || {}; - // A top-level image with layout/identity attrs beyond src/alt cannot be - // expressed by markdown `![](src)` — width/height/align/size/ - // attachmentId/aspectRatio would be silently dropped on export and lost - // on re-import. Emit the SAME schema-matching used inside columns - // (imageToHtml) so those attrs survive the round-trip. A bare image - // (only src/alt, optionally a title — which has no schema attr) keeps - // the lighter markdown form so existing image round-trip tests hold. - const hasLayoutAttrs = - imgAttrs.width != null || - imgAttrs.height != null || - imgAttrs.align || - imgAttrs.size != null || - imgAttrs.attachmentId || - imgAttrs.aspectRatio != null || - // A caption (issue #221) cannot be expressed by markdown `![](src)`, - // so route a captioned image through imageToHtml's raw form - // (data-caption) — the same lossless form used for the other - // Docmost-specific image attrs. - imgAttrs.caption; - if (hasLayoutAttrs) { - return imageToHtml(node); - } - const imgAlt = imgAttrs.alt || ""; - // Neutralize characters that could break out of the markdown image - // URL: spaces/newlines and parentheses would terminate the (...) target - // and let a stored src inject following markdown/HTML. Percent-encode - // them so the URL stays a single inert token. - const imgSrc = encodeMdUrl(imgAttrs.src); - // A bare image (only src/alt, optionally a title) has no caption, so the - // lighter markdown form is lossless here; captioned images took the - // imageToHtml branch above. - return `![${imgAlt}](${imgSrc})`; - } - - case "video": { - // Emit the schema-matching