"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.markdownToProseMirror = markdownToProseMirror; /** * Pure markdown -> ProseMirror conversion (extracted from docmost-sync's * `packages/docmost-client/src/lib/collaboration.ts`). * * Only the PURE converter path is vendored here: `markdownToProseMirror` * (marked -> HTML -> generateJSON) plus the two pre/post processors it needs * (`preprocessCallouts`, `bridgeTaskLists`). The collaboration/websocket * write-path (Hocuspocus, Yjs, `ws`, `withPageLock`, `sanitizeForYjs`) that * lives in the same upstream file is intentionally NOT vendored — the gitmost * server writes page bodies natively through the collab gateway (plan §3.3). */ const html_1 = require("@tiptap/html"); const jsdom_1 = require("jsdom"); const docmost_schema_1 = require("./docmost-schema"); // `marked` is ESM-only. Under this package's CommonJS build TS would otherwise // downlevel a literal `import()` to `require()`, which cannot load an ESM-only // module. Indirect through `Function` so the real dynamic `import()` survives // compilation and loads ESM from CommonJS at runtime in Node (same trick as // apps/server/src/core/ai-chat/tools/docmost-client.loader.ts). const esmImport = new Function("specifier", "return import(specifier)"); // Memoize the in-flight/loaded module so the dynamic import runs at most once. let markedPromise = null; /** * Lazily load the ESM-only `marked` module (cached). * * In the built CommonJS package (Node, jest with ts-jest) the `esmImport` * Function trick performs a real dynamic `import()` of the ESM module. Under * vitest, however, the transformed module is evaluated without a dynamic-import * callback, so `new Function('return import(...)')` throws "A dynamic import * callback was not specified"; there `require('marked')` succeeds because the * test runner's loader interops ESM. We therefore try the Function import first * and fall back to `require` so BOTH runtimes resolve `marked` transparently. */ async function loadMarked() { if (!markedPromise) { markedPromise = esmImport("marked") .catch(() => { // Function-trick import is unavailable (e.g. under vitest's evaluator): // fall back to require, which the test runner can interop for ESM. // eslint-disable-next-line @typescript-eslint/no-var-requires return require("marked"); }) .catch((err) => { // Do not cache a rejected import — allow the next call to retry. markedPromise = null; throw err; }); } return (await markedPromise).marked; } // Setup DOM environment for Tiptap HTML parsing in Node.js const dom = new jsdom_1.JSDOM("
"); global.window = dom.window; global.document = dom.window.document; // @ts-ignore global.Element = dom.window.Element; /** * Hard ceiling above which we skip callout preprocessing entirely. The linear * scanner below has no quadratic blow-up, but we still cap input defensively so * a pathological multi-megabyte payload cannot tie up the event loop; in that * case the markdown is passed through verbatim (callouts are simply not * detected) rather than risking a slow scan. */ const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB /** Matches an opening callout fence: `:::type` (type captured, lower-cased). */ const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/; /** Matches a bare closing callout fence: `:::`. */ const CALLOUT_CLOSE_RE = /^:::\s*$/; /** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */ const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/; /** * Pre-process Docmost-flavoured markdown: convert `:::type ... :::` * callout blocks (the syntax our markdown export produces) into HTML * divs that the callout extension parses. The inner content is rendered * through marked as regular markdown. * * Implemented as a single linear pass over the lines (no quadratic regex * rescan). It: * - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a * `:::` line that lives inside a code fence as a callout delimiter, so a * callout body that itself contains a fenced code block with a `:::` line is * no longer corrupted; * - matches an opening `:::type` line with the next CLOSING `:::` at the SAME * nesting level, supporting NESTED callouts via a depth counter (an inner * `:::type` opens a deeper level and consumes a matching `:::`); * - emits the same `text
` * wrapper is kept inside the `
// child (the shape marked emits: `
text