Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 08222345ef | |||
| 1a7b817250 | |||
| 124f5a45a2 | |||
| b751852425 | |||
| 65d81f745a | |||
| bfbd927866 | |||
| 77f5224b55 | |||
| e2a3b5fc4d | |||
| d7d8db2102 | |||
| e814bca243 | |||
| f1ab76e879 | |||
| 6dcc19ce59 | |||
| d6d7dd82f6 |
+10
-1
@@ -4,12 +4,21 @@
|
||||
data
|
||||
# compiled output
|
||||
/dist
|
||||
node_modules/
|
||||
node_modules
|
||||
|
||||
# git-sync compiled output (built in CI/Docker via `pnpm build`, never committed,
|
||||
# so src/ and prod can never silently diverge).
|
||||
packages/git-sync/build/
|
||||
|
||||
# prosemirror-markdown compiled output (built in CI/Docker via `pnpm build`,
|
||||
# never committed, so src/ and prod can never silently diverge).
|
||||
packages/prosemirror-markdown/build/
|
||||
|
||||
# mcp compiled output (built in CI/Docker via `pnpm build`, never committed, so
|
||||
# src/ and prod can never silently diverge). Matches the git-sync/prosemirror-
|
||||
# markdown convention; the package is private and rebuilt at deploy.
|
||||
packages/mcp/build/
|
||||
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
|
||||
@@ -293,7 +293,7 @@ Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirro
|
||||
- The version string shown in the UI comes from `APP_VERSION` (CI/Docker) or `git describe --tags --always` (local), resolved in `vite.config.ts` — not from `package.json`.
|
||||
- Server TS config is permissive (`noImplicitAny: false`, `strictNullChecks: false`, `no-explicit-any` lint disabled). Follow the existing relaxed style rather than tightening types broadly.
|
||||
- Dependency versions are heavily pinned via `pnpm.overrides` and `pnpm.patchedDependencies` (`scimmy`, `yjs`) in the root `package.json`. Don't bump pinned/patched deps casually; the patches and overrides exist for compatibility/security reasons.
|
||||
- **Adding/renaming/removing an MCP tool requires updating `SERVER_INSTRUCTIONS`** in `packages/mcp/src/index.ts` — the intent-routing guide MCP clients receive on initialize. This applies both to inline `server.registerTool(...)` calls in `index.ts` and to specs in `packages/mcp/src/tool-specs.ts`. Enforced by `packages/mcp/test/unit/server-instructions.test.mjs`, which fails when a registered tool is not mentioned in the guide (deliberate opt-outs go into its `EXCEPTIONS` list). Remember `packages/mcp/build/` is committed — rebuild after editing.
|
||||
- **Adding/renaming/removing an MCP tool requires updating `SERVER_INSTRUCTIONS`** in `packages/mcp/src/index.ts` — the intent-routing guide MCP clients receive on initialize. This applies both to inline `server.registerTool(...)` calls in `index.ts` and to specs in `packages/mcp/src/tool-specs.ts`. Enforced by `packages/mcp/test/unit/server-instructions.test.mjs`, which fails when a registered tool is not mentioned in the guide (deliberate opt-outs go into its `EXCEPTIONS` list). `packages/mcp/build/` is gitignored and rebuilt in CI/Docker via `pnpm build` (same convention as `git-sync`/`prosemirror-markdown`) — never commit it; rebuild locally after editing to run the tests.
|
||||
|
||||
## CI / release
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
},
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@docmost/prosemirror-markdown": "workspace:*",
|
||||
"@tiptap/core": "3.20.4",
|
||||
"@tiptap/extension-highlight": "3.20.4",
|
||||
"@tiptap/extension-image": "3.20.4",
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
*/
|
||||
import { dirname } from "node:path";
|
||||
import { sep } from "node:path";
|
||||
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
|
||||
import { parsePageFile, serializePageFile } from "@docmost/prosemirror-markdown";
|
||||
import type { GitSyncClient } from "./client.types.js";
|
||||
import { buildVaultLayout, type PageNode } from "./layout.js";
|
||||
import {
|
||||
|
||||
@@ -26,8 +26,11 @@
|
||||
* the gitmost server drives the engine in-process (there is no standalone CLI
|
||||
* entry point).
|
||||
*/
|
||||
import { type DocmostMdMeta } from "../lib/index.js";
|
||||
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
|
||||
import {
|
||||
type DocmostMdMeta,
|
||||
parsePageFile,
|
||||
serializePageFile,
|
||||
} from "@docmost/prosemirror-markdown";
|
||||
import type { GitSyncClient } from "./client.types.js";
|
||||
import type { DiffEntry } from "./git.js";
|
||||
import { VaultGit, DEFAULT_BRANCH } from "./git.js";
|
||||
|
||||
@@ -17,7 +17,7 @@ import {
|
||||
markdownToProseMirror,
|
||||
serializeDocmostMarkdownBody,
|
||||
type DocmostMdMeta,
|
||||
} from "../lib/index.js";
|
||||
} from "@docmost/prosemirror-markdown";
|
||||
|
||||
/**
|
||||
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
*/
|
||||
|
||||
// Pure converter (markdown <-> ProseMirror, file envelope, canonicalization).
|
||||
// Re-exported from the standalone `@docmost/prosemirror-markdown` package,
|
||||
// which is the single source of truth for the converter core; git-sync keeps
|
||||
// only the engine (vault/git/orchestrator) and re-surfaces the converter for
|
||||
// in-process consumers of the git-sync barrel.
|
||||
export {
|
||||
serializeDocmostMarkdown,
|
||||
serializeDocmostMarkdownBody,
|
||||
@@ -16,8 +20,8 @@ export {
|
||||
markdownToProseMirror,
|
||||
canonicalizeContent,
|
||||
docsCanonicallyEqual,
|
||||
} from "./lib/index.js";
|
||||
export type { DocmostMdMeta } from "./lib/index.js";
|
||||
} from "@docmost/prosemirror-markdown";
|
||||
export type { DocmostMdMeta } from "@docmost/prosemirror-markdown";
|
||||
|
||||
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
|
||||
// loop-guard body hash.
|
||||
@@ -123,4 +127,4 @@ export {
|
||||
} from "./engine/path-guard.js";
|
||||
export type { PathGuardIo, VaultPathUnsafeReason } from "./engine/path-guard.js";
|
||||
|
||||
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
|
||||
export { parsePageFile, serializePageFile } from "@docmost/prosemirror-markdown";
|
||||
|
||||
@@ -1,365 +0,0 @@
|
||||
/**
|
||||
* Pure markdown -> ProseMirror conversion.
|
||||
*
|
||||
* The converter path is `markdownToProseMirror` (marked -> HTML ->
|
||||
* generateJSON) plus the two pre/post processors it needs (`preprocessCallouts`,
|
||||
* `bridgeTaskLists`). The gitmost server writes the resulting page bodies
|
||||
* natively through the collab gateway, so no websocket/Yjs write-path lives
|
||||
* here.
|
||||
*/
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { marked } from "marked";
|
||||
import { docmostExtensions } from "./docmost-schema.js";
|
||||
|
||||
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||
global.window = dom.window as any;
|
||||
global.document = dom.window.document;
|
||||
// @ts-ignore
|
||||
global.Element = dom.window.Element;
|
||||
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/**
|
||||
* Matches an Obsidian-native callout opener: `> [!type]` (type captured). An
|
||||
* optional title after the type is allowed but ignored (the Docmost callout
|
||||
* schema has no title). The body is the following contiguous blockquote lines.
|
||||
*/
|
||||
const CALLOUT_BQ_OPEN_RE = /^>\s*\[!(\w+)\]/;
|
||||
/** Matches any blockquote continuation line (`>` … ). */
|
||||
const BLOCKQUOTE_LINE_RE = /^>/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown: string): Promise<string> {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines: string[]): Promise<string> => {
|
||||
const out: string[] = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines: string[] = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0) break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(
|
||||
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
|
||||
);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// An Obsidian-native callout: `> [!type]` opener; the body is the following
|
||||
// CONTIGUOUS blockquote (`>`-prefixed) lines. Strip ONE blockquote level and
|
||||
// recurse so nested callouts (`> > [!type]`) are handled, then emit the same
|
||||
// callout div the `:::` path produces. A normal blockquote (no `[!type]` on
|
||||
// its first line) does not match and stays a blockquote.
|
||||
const bqOpen = line.match(CALLOUT_BQ_OPEN_RE);
|
||||
if (bqOpen) {
|
||||
const type = bqOpen[1].toLowerCase();
|
||||
const bodyLines: string[] = [];
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
if (!BLOCKQUOTE_LINE_RE.test(lines[j])) break;
|
||||
bodyLines.push(lines[j].replace(/^>\s?/, ""));
|
||||
}
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(
|
||||
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
|
||||
);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
|
||||
return out.join("\n");
|
||||
};
|
||||
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html: string): string {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li: Element): Element[] => {
|
||||
const found: Element[] = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (
|
||||
child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox"
|
||||
) {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(
|
||||
child.querySelectorAll(":scope > input[type='checkbox']"),
|
||||
)) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter(
|
||||
(child) => child.tagName === "LI",
|
||||
);
|
||||
if (items.length === 0) continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue;
|
||||
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target: Element = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked =
|
||||
input != null &&
|
||||
(input.hasAttribute("checked") || (input as any).checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively strip content-less paragraph nodes from a generated doc.
|
||||
*
|
||||
* A block-level atom whose markdown form is INLINE (e.g. the block `image`'s
|
||||
* ``, or a bare media element) is wrapped by marked in a <p>; the schema
|
||||
* then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph
|
||||
* sibling. On the next export that empty `<p>` renders to "" and the doc "\n\n"
|
||||
* join injects a phantom blank gap, so the markdown is not byte-stable.
|
||||
*
|
||||
* Markdown blank lines are separators, never content, so generateJSON only ever
|
||||
* produces an empty paragraph as such a hoist artifact — removing them is safe
|
||||
* and general (it also subsumes the <div>-wrapper workaround the `video` case
|
||||
* uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent
|
||||
* or an empty array; every other node (including atoms without `content`) is
|
||||
* preserved, and we recurse into the content of any node that has children.
|
||||
*/
|
||||
function stripEmptyParagraphs(node: any): any {
|
||||
if (!node || !Array.isArray(node.content)) {
|
||||
// Atom / leaf node (no children to recurse into): keep as-is.
|
||||
return node;
|
||||
}
|
||||
const mapped = node.content.map((child: any) => stripEmptyParagraphs(child));
|
||||
const isEmptyParagraph = (child: any): boolean =>
|
||||
!!child &&
|
||||
child.type === "paragraph" &&
|
||||
(!Array.isArray(child.content) || child.content.length === 0);
|
||||
const filtered = mapped.filter((child: any) => !isEmptyParagraph(child));
|
||||
// Schema-validity guard: several nodes require NON-empty block content
|
||||
// (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout,
|
||||
// and the doc root). For an empty one of those, generateJSON materializes a
|
||||
// single empty paragraph as its OBLIGATORY content — that is not a hoist
|
||||
// artifact. If stripping would empty the container, keep ONE empty paragraph
|
||||
// so the result stays schema-valid (an empty cell/quote must not become `[]`).
|
||||
const cleaned =
|
||||
filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered;
|
||||
return { ...node, content: cleaned };
|
||||
}
|
||||
|
||||
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||
export async function markdownToProseMirror(
|
||||
markdownContent: string,
|
||||
): Promise<any> {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const html = await marked.parse(withCallouts);
|
||||
const bridged = bridgeTaskLists(html);
|
||||
const doc = generateJSON(bridged, docmostExtensions);
|
||||
return stripEmptyParagraphs(doc);
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { applyPushActions, LAST_PUSHED_REF } from '../src/engine/push';
|
||||
import { bodyHash } from '../src/engine/loop-guard';
|
||||
import type { ApplyPushDeps, PushActions } from '../src/engine/push';
|
||||
import { parsePageFile, serializePageFile } from '../src/lib/page-file';
|
||||
import { parsePageFile, serializePageFile } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// The Docmost space this vault mirrors (native files carry no spaceId; the run
|
||||
// supplies it). A CREATE targets this space.
|
||||
|
||||
@@ -5,7 +5,7 @@ import type {
|
||||
MetaSide,
|
||||
RenameMoveAction,
|
||||
} from '../src/engine/push';
|
||||
import type { DocmostMdMeta } from '../src/lib/index';
|
||||
import type { DocmostMdMeta } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// FS→Docmost push #3 (SPEC §5/§6/§16). `classifyRenameMoves` is the PURE half of
|
||||
// the move/rename apply: it resolves each `{pageId, oldPath, newPath}` into the
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { computePushActions } from '../src/engine/push';
|
||||
import type { DiffEntry, MetaSide } from '../src/engine/push';
|
||||
import type { DocmostMdMeta } from '../src/lib/index';
|
||||
import type { DocmostMdMeta } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// FS→Docmost push, FIRST increment (SPEC §6). `computePushActions` is the PURE
|
||||
// half: it classifies each `git diff --name-status` row into a Docmost action by
|
||||
|
||||
@@ -8,7 +8,7 @@ import { runCycle } from "../src/engine/cycle";
|
||||
import type { CycleFs } from "../src/engine/cycle";
|
||||
import { VaultGit } from "../src/engine/git";
|
||||
import type { Settings } from "../src/engine/settings";
|
||||
import { serializeDocmostMarkdownBody } from "../src/lib/index";
|
||||
import { serializeDocmostMarkdownBody } from "@docmost/prosemirror-markdown";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import { firstDivergence } from './roundtrip-helpers';
|
||||
import { applyPullActions } from '../src/engine/pull';
|
||||
import type { PullActions, ApplyPullActionsDeps } from '../src/engine/pull';
|
||||
import type { DeletionDecision } from '../src/engine/reconcile';
|
||||
import { serializePageFile, parsePageFile } from '../src/lib/page-file';
|
||||
import { serializePageFile, parsePageFile } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// Engine-layer coverage gaps flagged by the PR #119 reviewers (test-strategy
|
||||
// report, Module 2 `src/engine`). Each block targets a specific under-covered
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { readExisting } from '../src/engine/pull';
|
||||
import { serializePageFile } from '../src/lib/page-file';
|
||||
import { serializePageFile } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO
|
||||
// (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable
|
||||
|
||||
@@ -6,7 +6,7 @@ import type {
|
||||
MetaSide,
|
||||
RenameMoveAction,
|
||||
} from '../src/engine/push.js';
|
||||
import type { DocmostMdMeta } from '../src/lib/index.js';
|
||||
import type { DocmostMdMeta } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// RED-TEAM finding #4 (two facets):
|
||||
// (a) buildVaultLayout disambiguation is ORDER-DEPENDENT: which of two
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
import type { PushDeps } from '../src/engine/push';
|
||||
import type { Settings } from '../src/engine/settings';
|
||||
import { runCycle, type RunCycleDeps } from '../src/engine/cycle';
|
||||
import { serializePageFile } from '../src/lib/page-file';
|
||||
import { serializePageFile } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// Red-team confirmations for PR #119 (git-sync). Each test asserts the DESIRED
|
||||
// behavior, so it FAILS today iff the bug is real.
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
import { readFile } from 'node:fs/promises';
|
||||
import { readdirSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
docsCanonicallyEqual,
|
||||
} from 'docmost-client';
|
||||
|
||||
// Resolve fixtures relative to this test file so the test is CWD-independent.
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
const CORPUS_DIR = join(here, 'fixtures', 'corpus');
|
||||
const KNOWN_LIMITATIONS_DIR = join(here, 'fixtures', 'known-limitations');
|
||||
|
||||
/** Run a single document through export -> import -> export. */
|
||||
async function roundTrip(doc: any) {
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
return { md1, md2, doc2 };
|
||||
}
|
||||
|
||||
describe('round-trip corpus (SPEC §11)', () => {
|
||||
// Discover the corpus synchronously at collection time so each fixture gets
|
||||
// its own `it` with the file name in the test title.
|
||||
const files = readdirSync(CORPUS_DIR)
|
||||
.filter((name) => name.endsWith('.json'))
|
||||
.sort();
|
||||
|
||||
it('has a non-empty corpus', () => {
|
||||
expect(files.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
for (const name of files) {
|
||||
it(`${name}: markdown byte-stable AND canonically stable`, async () => {
|
||||
const doc = JSON.parse(await readFile(join(CORPUS_DIR, name), 'utf8'));
|
||||
const { md1, md2, doc2 } = await roundTrip(doc);
|
||||
|
||||
// 1) The byte-stable markdown property git actually needs.
|
||||
expect(md2, `${name}: markdown not byte-stable`).toBe(md1);
|
||||
// 2) Semantic stability (block ids stripped, default-null normalized).
|
||||
expect(
|
||||
docsCanonicallyEqual(doc, doc2),
|
||||
`${name}: document not canonically stable`,
|
||||
).toBe(true);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// KNOWN CONVERTER LIMITATIONS (isolated so they do NOT make CI red).
|
||||
//
|
||||
// SPEC §11 explicitly flags images and diagrams as high round-trip risk. These
|
||||
// fixtures are kept OUT of the green corpus above and asserted with `it.fails`
|
||||
// so the documented divergence is locked in (the test FAILS if the converter
|
||||
// ever starts round-tripping them — at which point promote the fixture into
|
||||
// the corpus). The precise divergences for `image-diagrams.json` are:
|
||||
//
|
||||
// * A BLOCK-LEVEL image preceded by a paragraph is NOT byte-stable on the
|
||||
// FIRST re-export. The HTML re-parser hoists the block <img> out of its
|
||||
// line and leaves an empty paragraph behind, so `paragraph` + ``
|
||||
// re-imports as paragraph + empty-paragraph + image; the empty paragraph
|
||||
// adds one blank line, so export #2 grows by a one-time "\n\n" (md1 !== md2).
|
||||
// This is NOT non-convergence: the growth happens exactly ONCE. The doc
|
||||
// CONVERGES to a fixpoint after one extra `export→import→export` pass — the
|
||||
// empty paragraph is already present after the first import, so export #2
|
||||
// and export #3 are byte-identical (md2 === md3, verified).
|
||||
//
|
||||
// * drawio / excalidraw diagrams gain `data-align="center"` on the second
|
||||
// export: the schema's diagram `align` attribute has a NON-null default of
|
||||
// "center", which materializes on import; the converter only emits
|
||||
// data-align when set, so it appears on export #2 but not #1. Like the
|
||||
// image case, this is one-time and converges after one extra pass.
|
||||
//
|
||||
// * A STANDALONE block image (no preceding paragraph) IS byte-stable from
|
||||
// export #1 (md1 === md2) — but it is still NOT canonically stable: on
|
||||
// import the bare <img> is wrapped, gaining a leading EMPTY paragraph, so
|
||||
// the canonical doc differs by that spurious paragraph node even though the
|
||||
// markdown bytes match.
|
||||
//
|
||||
// Resolution (SPEC §11, "normalize-on-write"): rather than deep-fixing the
|
||||
// converter, the engine runs ONE `export→import→export` pass when writing into
|
||||
// the vault; from that fixpoint onward the form is byte-stable, so git sees no
|
||||
// phantom diff. The green corpus above avoids these one-time asymmetries by
|
||||
// pre-authoring the materialized defaults (e.g. `align: "center"` on the
|
||||
// diagrams in 06-diagrams.json) so a single pass is already at the fixpoint.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('round-trip KNOWN LIMITATIONS (SPEC §11 image/diagram risk)', () => {
|
||||
it.fails(
|
||||
'image-diagrams.json is NOT byte-stable on export #1 (block image hoist + diagram align default; converges after one extra pass — SPEC §11 normalize-on-write)',
|
||||
async () => {
|
||||
const doc = JSON.parse(
|
||||
await readFile(join(KNOWN_LIMITATIONS_DIR, 'image-diagrams.json'), 'utf8'),
|
||||
);
|
||||
const { md1, md2 } = await roundTrip(doc);
|
||||
// This assertion FAILS today (documented divergence). `it.fails` turns a
|
||||
// failing body into a PASS; if the converter is fixed this flips and the
|
||||
// test goes red, prompting promotion into the green corpus.
|
||||
expect(md2).toBe(md1);
|
||||
},
|
||||
);
|
||||
});
|
||||
@@ -8,7 +8,7 @@ import { runPush, LAST_PUSHED_REF } from '../src/engine/push';
|
||||
import type { PushDeps } from '../src/engine/push';
|
||||
import { VaultGit } from '../src/engine/git';
|
||||
import type { Settings } from '../src/engine/settings';
|
||||
import { serializeDocmostMarkdownBody } from '../src/lib/index';
|
||||
import { serializeDocmostMarkdownBody } from '@docmost/prosemirror-markdown';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest';
|
||||
import { runPush, LAST_PUSHED_REF, DOCMOST_BRANCH } from '../src/engine/push';
|
||||
import type { PushDeps } from '../src/engine/push';
|
||||
import type { Settings } from '../src/engine/settings';
|
||||
import { serializePageFile } from '../src/lib/page-file';
|
||||
import { serializePageFile } from '@docmost/prosemirror-markdown';
|
||||
|
||||
/** A native page file: `gitmost_id` frontmatter + clean body (title = filename). */
|
||||
function fileFor(pageId: string, body = 'body'): string {
|
||||
|
||||
@@ -2,8 +2,8 @@ import { describe, expect, it } from 'vitest';
|
||||
import { stabilizePageFile, type PageMeta } from '../src/engine/stabilize.js';
|
||||
// markdownToProseMirror lives in collaboration.ts; importing it mutates the
|
||||
// global DOM via jsdom at module load time (required for @tiptap/html under Node).
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
import { parseDocmostMarkdown } from '../src/lib/markdown-document.js';
|
||||
import { markdownToProseMirror } from '@docmost/prosemirror-markdown';
|
||||
import { parseDocmostMarkdown } from '@docmost/prosemirror-markdown';
|
||||
|
||||
// stabilize.ts (SPEC §11 normalize-on-write) was 0% covered (only the gated e2e
|
||||
// touched it). stabilizePageFile is import-testable: build a small ProseMirror
|
||||
@@ -22,16 +22,27 @@ const meta: PageMeta = {
|
||||
|
||||
describe('stabilizePageFile — normalize-on-write fixpoint (SPEC §11)', () => {
|
||||
it('reaches a byte-identical fixpoint after one extra export/import/export pass', async () => {
|
||||
// A diagram is the canonical one-pass asymmetry: drawio's `align` default of
|
||||
// "center" materializes on import, so a NAIVE export differs on the second
|
||||
// export. stabilizePageFile runs the convergence pass at write time, so the
|
||||
// written body must already be at the fixpoint: re-importing its body and
|
||||
// A diagram inside a column is the canonical one-pass asymmetry: on the
|
||||
// raw-HTML/columns path a diagram's `align` default of "center" materializes
|
||||
// on import, so a NAIVE export differs on the second export. (#293 canon #8
|
||||
// made the TOP-LEVEL diagram form — `<!--drawio …-->` — byte-stable by
|
||||
// omitting the default, so the asymmetry now lives only on the columns path
|
||||
// where the schema `<div data-type="drawio">` form is retained.)
|
||||
// stabilizePageFile runs the convergence pass at write time, so the written
|
||||
// body must already be at the fixpoint: re-importing its body and
|
||||
// re-stabilizing yields the exact same bytes.
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: 'drawio', attrs: { src: '/d.drawio' } },
|
||||
{
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two_equal' },
|
||||
content: [
|
||||
{ type: 'column', content: [{ type: 'drawio', attrs: { src: '/d.drawio' } }] },
|
||||
{ type: 'column', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'side' }] }] },
|
||||
],
|
||||
},
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'outro' }] },
|
||||
],
|
||||
};
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { getSchema } from "@tiptap/core";
|
||||
|
||||
import { markdownToProseMirror } from "../src/lib/markdown-to-prosemirror";
|
||||
import { docmostExtensions } from "../src/lib/docmost-schema";
|
||||
import { markdownToProseMirror } from "@docmost/prosemirror-markdown";
|
||||
import { docmostExtensions } from "@docmost/prosemirror-markdown";
|
||||
|
||||
// REGRESSION LOCK for the stripEmptyParagraphs schema-validity guard.
|
||||
//
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,133 +0,0 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
||||
import { isInitializeRequest } from "@modelcontextprotocol/sdk/types.js";
|
||||
import { createDocmostMcpServer } from "./index.js";
|
||||
/**
|
||||
* Build a stateful Streamable-HTTP handler for the Docmost MCP server. The
|
||||
* embedding host (the gitmost NestJS server) bridges its raw Node req/res into
|
||||
* `handleRequest`. One McpServer + transport is created per MCP session and
|
||||
* kept alive between requests, keyed by the `mcp-session-id` header.
|
||||
*
|
||||
* `config` is EITHER a static `DocmostMcpConfig` (back-compat: stdio + the env
|
||||
* service account, unchanged) OR a `McpConfigResolver` run once per session at
|
||||
* `initialize` to bind that session to the request's identity.
|
||||
*/
|
||||
export function createMcpHttpHandler(config, options = {}) {
|
||||
// One transport (and one McpServer) per MCP session, keyed by session id.
|
||||
const transports = {};
|
||||
// Last activity timestamp per session id, used for idle eviction.
|
||||
const lastSeen = {};
|
||||
// Anti-session-fixation: the opaque identity key bound to each session at
|
||||
// initialize. A later request for that session whose key differs is rejected.
|
||||
const sessionIdentity = {};
|
||||
// Write a JSON-RPC error and end the response. Used for the 400/401 paths so
|
||||
// every early rejection is a well-formed JSON-RPC error, not a torn response.
|
||||
const sendJsonRpcError = (res, statusCode, code, message) => {
|
||||
res.statusCode = statusCode;
|
||||
res.setHeader("Content-Type", "application/json");
|
||||
res.end(JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
error: { code, message },
|
||||
id: null,
|
||||
}));
|
||||
};
|
||||
// Idle session TTL (ms): a session with no activity for this long is evicted.
|
||||
// Defaults to 30 min; overridable via MCP_SESSION_IDLE_MS.
|
||||
const idleTtlMs = (() => {
|
||||
const parsed = parseInt(process.env.MCP_SESSION_IDLE_MS ?? "", 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : 30 * 60 * 1000;
|
||||
})();
|
||||
// Periodically close transports idle longer than the TTL. transport.close()
|
||||
// triggers its onclose, which removes it from `transports`; we also drop the
|
||||
// lastSeen entry. unref() so this timer never keeps the process alive.
|
||||
const sweepIntervalMs = 5 * 60 * 1000;
|
||||
const sweepTimer = setInterval(() => {
|
||||
const now = Date.now();
|
||||
for (const sid of Object.keys(transports)) {
|
||||
if (now - (lastSeen[sid] ?? 0) > idleTtlMs) {
|
||||
void transports[sid].close();
|
||||
delete lastSeen[sid];
|
||||
delete sessionIdentity[sid];
|
||||
}
|
||||
}
|
||||
}, sweepIntervalMs);
|
||||
sweepTimer.unref();
|
||||
async function handleRequest(req, res, parsedBody) {
|
||||
const sessionId = req.headers["mcp-session-id"];
|
||||
const method = (req.method || "GET").toUpperCase();
|
||||
let transport = sessionId ? transports[sessionId] : undefined;
|
||||
if (method === "POST" && !transport) {
|
||||
// A new session may only be created by an initialize request without a
|
||||
// session id.
|
||||
if (sessionId || !isInitializeRequest(parsedBody)) {
|
||||
sendJsonRpcError(res, 400, -32000, "Bad Request: no valid session ID provided");
|
||||
return;
|
||||
}
|
||||
// Resolve the per-session config from the request (per-user identity) when
|
||||
// a resolver was supplied; otherwise use the static config unchanged. The
|
||||
// resolver may throw (e.g. bad credentials) — surface a clean 401, never
|
||||
// a created session.
|
||||
let sessionConfig;
|
||||
let identity;
|
||||
try {
|
||||
sessionConfig =
|
||||
typeof config === "function" ? await config(req) : config;
|
||||
if (options.identify)
|
||||
identity = await options.identify(req);
|
||||
}
|
||||
catch (err) {
|
||||
sendJsonRpcError(res, 401, -32001, err instanceof Error ? err.message : "Unauthorized");
|
||||
return;
|
||||
}
|
||||
transport = new StreamableHTTPServerTransport({
|
||||
sessionIdGenerator: () => randomUUID(),
|
||||
onsessioninitialized: (sid) => {
|
||||
transports[sid] = transport;
|
||||
lastSeen[sid] = Date.now();
|
||||
// Bind the resolved identity to the new session id for anti-fixation.
|
||||
if (identity !== undefined)
|
||||
sessionIdentity[sid] = identity;
|
||||
},
|
||||
});
|
||||
transport.onclose = () => {
|
||||
const sid = transport.sessionId;
|
||||
if (sid && transports[sid])
|
||||
delete transports[sid];
|
||||
if (sid)
|
||||
delete sessionIdentity[sid];
|
||||
};
|
||||
const server = createDocmostMcpServer(sessionConfig);
|
||||
await server.connect(transport);
|
||||
await transport.handleRequest(req, res, parsedBody);
|
||||
return;
|
||||
}
|
||||
if (!transport) {
|
||||
sendJsonRpcError(res, 400, -32000, "Bad Request: no valid session ID provided");
|
||||
return;
|
||||
}
|
||||
// Anti-session-fixation: a request reusing an existing session id must
|
||||
// present credentials/token that resolve to the SAME identity bound at
|
||||
// initialize, otherwise reject with 401. This prevents hijacking another
|
||||
// user's established session by replaying its session id with different
|
||||
// credentials.
|
||||
if (options.identify && sessionId && sessionId in sessionIdentity) {
|
||||
let presented;
|
||||
try {
|
||||
presented = await options.identify(req);
|
||||
}
|
||||
catch (err) {
|
||||
sendJsonRpcError(res, 401, -32001, err instanceof Error ? err.message : "Unauthorized");
|
||||
return;
|
||||
}
|
||||
if (presented !== sessionIdentity[sessionId]) {
|
||||
sendJsonRpcError(res, 401, -32001, "Credentials do not match the user that owns this MCP session.");
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Routing to an existing transport: refresh its idle timestamp.
|
||||
if (sessionId)
|
||||
lastSeen[sessionId] = Date.now();
|
||||
await transport.handleRequest(req, res, parsedBody);
|
||||
}
|
||||
return { handleRequest };
|
||||
}
|
||||
@@ -1,801 +0,0 @@
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { z } from "zod";
|
||||
import { readFileSync } from "fs";
|
||||
import { fileURLToPath } from "url";
|
||||
import { dirname, join } from "path";
|
||||
import { DocmostClient } from "./client.js";
|
||||
import { parseNodeArg } from "./lib/parse-node-arg.js";
|
||||
import { SHARED_TOOL_SPECS } from "./tool-specs.js";
|
||||
// Re-export the client and its config type so embedding hosts (e.g. the gitmost
|
||||
// NestJS server) can `import('@docmost/mcp')` and construct a DocmostClient
|
||||
// directly — for the credentials variant OR the per-user getToken variant.
|
||||
export { DocmostClient } from "./client.js";
|
||||
// Re-export the zod-agnostic shared tool-spec registry so the in-app AI-SDK
|
||||
// service can read it off the loaded module (it cannot import the ESM package's
|
||||
// internals directly; it goes through loadDocmostMcp()).
|
||||
export { SHARED_TOOL_SPECS } from "./tool-specs.js";
|
||||
// Read version from package.json
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const packageJson = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8"));
|
||||
const VERSION = packageJson.version;
|
||||
// Configuration for an MCP server instance is the DocmostMcpConfig union
|
||||
// (credentials OR getToken) defined and re-exported above. The factory below is
|
||||
// fully side-effect-free on import: it reads no environment variables and opens
|
||||
// no transport. The standalone stdio entrypoint (stdio.ts) and the HTTP handler
|
||||
// (http.ts) supply this config and own the process/transport lifecycle.
|
||||
// --- Modern McpServer Implementation ---
|
||||
// Editing guide surfaced to MCP clients in the initialize result so they can
|
||||
// pick the right tool by intent and avoid resending whole documents.
|
||||
//
|
||||
// MAINTENANCE RULE: when you ADD, RENAME, or REMOVE a tool (either an inline
|
||||
// server.registerTool(...) here or a spec in tool-specs.ts), you MUST update
|
||||
// this guide so the new tool is routed by intent. This is enforced by
|
||||
// test/unit/server-instructions.test.mjs, which fails when a registered tool
|
||||
// name is not mentioned below (see its EXCEPTIONS list for the rare opt-outs).
|
||||
// Exported for that test.
|
||||
export const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent.\n" +
|
||||
"READ: find a page -> search (workspace-wide full-text); list -> list_pages / list_spaces. Locate blocks and their ids CHEAPLY -> get_outline (compact top-level map; start here, not get_page_json). One block's subtree -> get_node (by attrs.id, or \"#<index>\" for tables, which carry no id). Whole page -> get_page (Markdown, lossy; inline <span data-comment-id> tags are comment anchors — markup, not text) or get_page_json (lossless ProseMirror with block ids). Hand a huge page (with images) to an external consumer without pulling it through the model context -> stash_page (returns a short-lived anonymous URL).\n" +
|
||||
"EDIT: fix wording/typos/numbers -> edit_page_text (find/replace inside blocks, no node id needed). Change ONE block (paragraph/heading/callout/etc.) structurally -> patch_node (by attrs.id from get_outline). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Tables -> table_get / table_update_cell / table_insert_row / table_delete_row (address by \"#<index>\" from get_outline; table nodes have no attrs.id). Images -> insert_image (add from a web URL) / replace_image (swap an existing image). Footnotes -> insert_footnote. Bulk/structural rewrite -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Complex/scripted rewrite (multiple coordinated edits, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes.\n" +
|
||||
"PAGES: new -> create_page (Markdown). Rename (title only) -> rename_page. Move -> move_page. Delete -> delete_page (SOFT delete — the page goes to trash and is restorable; nothing is permanent). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Sharing -> share_page / unshare_page / list_shares; share_page makes the page PUBLICLY accessible — do it only when explicitly asked.\n" +
|
||||
"COMMENTS: create_comment is always inline and requires an EXACT selection — contiguous text from a single block, <=250 chars (fails rather than leaving an unanchored comment); reply to a thread via parentCommentId. Propose a concrete text fix for one-click human approval -> create_comment with suggestedText (the exact plain-text replacement for the selection; the selection must then be UNIQUE in the page — extend it with context if needed); prefer this over editing directly when the change is subjective or needs the author's sign-off. Manage -> list_comments, update_comment, resolve_comment (resolve/reopen, reversible — prefer over delete to close), delete_comment, check_new_comments.\n" +
|
||||
"HISTORY: review what changed -> diff_page_versions (a historyId vs current, or two versions). List saved versions -> list_page_history. Undo a bad edit -> restore_page_version (writes a past version back as current; itself revertible). Lossless markdown round-trip (download, edit, re-upload, incl. comment anchors) -> export_page_markdown / import_page_markdown.";
|
||||
// Helper to format JSON responses
|
||||
const jsonContent = (data) => ({
|
||||
content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
|
||||
});
|
||||
/**
|
||||
* Create a fully configured Docmost MCP server. Side-effect-free: it does not
|
||||
* read environment variables and does not connect any transport — the caller
|
||||
* decides how to expose it (stdio or HTTP). The client talks to Docmost over
|
||||
* REST + the collaboration WebSocket using the provided service-account
|
||||
* credentials and auto-re-authenticates.
|
||||
*/
|
||||
export function createDocmostMcpServer(config) {
|
||||
// Pass the whole config union through: the client branches internally on
|
||||
// credentials vs. getToken, so both the external /mcp (creds) and the
|
||||
// internal per-user (getToken) paths are wired here unchanged.
|
||||
const docmostClient = new DocmostClient(config);
|
||||
const server = new McpServer({
|
||||
name: "docmost-mcp",
|
||||
version: VERSION,
|
||||
}, { instructions: SERVER_INSTRUCTIONS });
|
||||
// Register a tool from the shared, zod-agnostic spec registry. The spec owns
|
||||
// the canonical name + model-facing description + (optional) schema builder;
|
||||
// only the execute body is supplied per call. buildShape is invoked with THIS
|
||||
// package's zod (v3); the in-app layer passes its own zod (v4).
|
||||
//
|
||||
// The spec's schema builder returns a plain ZodRawShape (Record<string,
|
||||
// unknown> in the shared module since it must stay zod-agnostic), so the
|
||||
// McpServer.registerTool overloads cannot infer the execute arg's shape from
|
||||
// it. We type `execute` loosely and cast the call through `any`; runtime
|
||||
// behaviour is unchanged — each execute body destructures the same fields the
|
||||
// builder declares.
|
||||
const registerShared = (spec, execute) => server.registerTool(spec.mcpName, spec.buildShape
|
||||
? { description: spec.description, inputSchema: spec.buildShape(z) }
|
||||
: { description: spec.description }, execute);
|
||||
// Tool: get_workspace
|
||||
registerShared(SHARED_TOOL_SPECS.getWorkspace, async () => {
|
||||
const workspace = await docmostClient.getWorkspace();
|
||||
return jsonContent(workspace);
|
||||
});
|
||||
// Tool: list_spaces
|
||||
registerShared(SHARED_TOOL_SPECS.listSpaces, async () => {
|
||||
const spaces = await docmostClient.getSpaces();
|
||||
return jsonContent(spaces);
|
||||
});
|
||||
// Tool: list_pages
|
||||
// INTENTIONAL per-transport divergence (not in the shared registry): this
|
||||
// transport exposes a `tree:true` mode that returns the full nested hierarchy;
|
||||
// the in-app copy keeps the same tree option but is worded for the in-app agent.
|
||||
// Kept per-layer so each side can tune its own guidance.
|
||||
server.registerTool("list_pages", {
|
||||
description: "List most recent pages in a space ordered by updatedAt (descending). " +
|
||||
"Returns a bounded list (default 50, max 100) — use search for lookups " +
|
||||
"in large spaces. Pass tree:true (with spaceId) to instead get the " +
|
||||
"space's full page hierarchy as a nested tree.",
|
||||
inputSchema: {
|
||||
spaceId: z.string().optional(),
|
||||
limit: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(100)
|
||||
.optional()
|
||||
.describe("Max pages to return (default 50, max 100)"),
|
||||
tree: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe("When true, return the space's full page hierarchy as a nested tree (each node has a children array) instead of the recent-by-updatedAt flat list. Requires spaceId; ignores limit."),
|
||||
},
|
||||
}, async ({ spaceId, limit, tree }) => {
|
||||
const result = await docmostClient.listPages(spaceId, limit ?? 50, tree ?? false);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: get_page
|
||||
server.registerTool("get_page", {
|
||||
description: "Get page details with content converted to Markdown. The conversion is " +
|
||||
"LOSSY (block ids, exact table/callout structure are approximated); for a " +
|
||||
"lossless representation use get_page_json. Inline <span data-comment-id> " +
|
||||
"tags in the markdown are comment highlight anchors (also present for " +
|
||||
"RESOLVED threads) — treat them as markup, not page text.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
const page = await docmostClient.getPage(pageId);
|
||||
return jsonContent(page);
|
||||
});
|
||||
// Tool: get_page_json
|
||||
registerShared(SHARED_TOOL_SPECS.getPageJson, async ({ pageId }) => {
|
||||
const page = await docmostClient.getPageJson(pageId);
|
||||
return jsonContent(page);
|
||||
});
|
||||
// Tool: get_outline
|
||||
registerShared(SHARED_TOOL_SPECS.getOutline, async ({ pageId }) => {
|
||||
const result = await docmostClient.getOutline(pageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: get_node
|
||||
registerShared(SHARED_TOOL_SPECS.getNode, async ({ pageId, nodeId }) => {
|
||||
const result = await docmostClient.getNode(pageId, nodeId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_get
|
||||
server.registerTool("table_get", {
|
||||
description: "Read a table as a matrix. Returns {rows, cols, cells (text[][]), " +
|
||||
"cellIds (paragraph id per cell, or null)}. `table` = `#<index>` from " +
|
||||
"get_outline, or any block id inside the table. Use cellIds with " +
|
||||
"patch_node for rich-formatted cell edits. `cols` is the FIRST row's " +
|
||||
"width; ragged tables may vary per row, so use the per-row length of " +
|
||||
"`cells` for each row.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId, table }) => {
|
||||
const result = await docmostClient.getTable(pageId, table);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_insert_row
|
||||
// NOT in the shared registry: this transport names the table argument `table`,
|
||||
// while the in-app tool names it `tableRef` (ai-chat-tools.service.ts). Sharing
|
||||
// one buildShape would rename a public MCP parameter, so the table row/cell
|
||||
// tools stay per-transport by design.
|
||||
server.registerTool("table_insert_row", {
|
||||
description: "Insert a row of plain-text cells into a table. `table` = `#<index>` or " +
|
||||
"a block id inside it. `cells` = text per column (padded to the table's " +
|
||||
"column count; error if more cells than columns). `index` = 0-based " +
|
||||
"insert position (0 inserts before the header); omit to append at the end.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
cells: z.array(z.string()),
|
||||
index: z.number().int().optional(),
|
||||
},
|
||||
}, async ({ pageId, table, cells, index }) => {
|
||||
const result = await docmostClient.tableInsertRow(pageId, table, cells, index);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_delete_row
|
||||
// NOT shared — same `table` (here) vs `tableRef` (in-app) parameter-name
|
||||
// divergence as table_insert_row.
|
||||
server.registerTool("table_delete_row", {
|
||||
description: "Delete the row at 0-based `index` from a table (`table` = `#<index>` or " +
|
||||
"a block id inside it). Refuses to delete the table's only row. An " +
|
||||
"out-of-range `index` throws. Deleting `index` 0 removes the header row, " +
|
||||
"and the next row becomes the new header.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
index: z.number().int(),
|
||||
},
|
||||
}, async ({ pageId, table, index }) => {
|
||||
const result = await docmostClient.tableDeleteRow(pageId, table, index);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: table_update_cell
|
||||
// NOT shared — same `table` (here) vs `tableRef` (in-app) parameter-name
|
||||
// divergence as table_insert_row.
|
||||
server.registerTool("table_update_cell", {
|
||||
description: "Set the plain-text content of cell [row,col] (0-based) in a table " +
|
||||
"(`table` = `#<index>` or a block id inside it). Replaces the cell's " +
|
||||
"content with a single text paragraph; for rich formatting use patch_node " +
|
||||
"on the cell's paragraph id from table_get.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
table: z.string().min(1),
|
||||
row: z.number().int(),
|
||||
col: z.number().int(),
|
||||
text: z.string(),
|
||||
},
|
||||
}, async ({ pageId, table, row, col, text }) => {
|
||||
const result = await docmostClient.tableUpdateCell(pageId, table, row, col, text);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: create_page
|
||||
server.registerTool("create_page", {
|
||||
description: "Create a new page from Markdown in a space. Pass parentPageId to nest " +
|
||||
"it under a parent; omit it to create at the space root.",
|
||||
inputSchema: {
|
||||
title: z.string().min(1).describe("Title of the page"),
|
||||
content: z.string().min(1).describe("Markdown content"),
|
||||
spaceId: z.string().min(1),
|
||||
parentPageId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional parent page ID to nest under"),
|
||||
},
|
||||
}, async ({ title, content, spaceId, parentPageId }) => {
|
||||
const result = await docmostClient.createPage(title, content, spaceId, parentPageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: update_page_json
|
||||
server.registerTool("update_page_json", {
|
||||
description: "Replace a page's content with a raw ProseMirror JSON document " +
|
||||
"(lossless write: preserves the block ids, callouts, tables and " +
|
||||
"attributes you pass in). Typical flow: get_page_json -> modify the " +
|
||||
"JSON -> update_page_json. Keep existing node ids intact so heading " +
|
||||
"anchors and history stay stable. Minimal full-doc example: " +
|
||||
'{"type":"doc","content":[{"type":"paragraph","content":' +
|
||||
'[{"type":"text","text":"Hi"}]}]}. `content` may be a JSON object or a ' +
|
||||
"JSON string (both accepted), and is OPTIONAL: omit it to update only " +
|
||||
"the title (though prefer rename_page for a title-only change). " +
|
||||
"Supplying neither content nor title is an error.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1).describe("ID of the page to update"),
|
||||
content: z
|
||||
.any()
|
||||
.optional()
|
||||
.describe('ProseMirror document {"type":"doc","content":[...]} (JSON object or ' +
|
||||
"JSON string). Omit to rename only."),
|
||||
title: z.string().optional().describe("Optional new title"),
|
||||
},
|
||||
}, async ({ pageId, content, title }) => {
|
||||
// Only parse/validate the document when it was actually supplied; when it
|
||||
// is omitted, pass it straight through so the client performs a title-only
|
||||
// (or no-op) update.
|
||||
let doc;
|
||||
if (content === undefined || content === null) {
|
||||
doc = undefined;
|
||||
}
|
||||
else {
|
||||
// String -> JSON.parse (throwing on invalid); object passes through.
|
||||
doc = parseNodeArg(content, "content was a string but not valid JSON");
|
||||
}
|
||||
const result = await docmostClient.updatePageJson(pageId, doc, title);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: export_page_markdown
|
||||
server.registerTool("export_page_markdown", {
|
||||
description: "Export a page to a single self-contained, lossless Docmost-flavoured " +
|
||||
"Markdown file (custom extensions): YAML-free meta header, body with " +
|
||||
"inline comment anchors and diagrams, and a trailing comments-thread " +
|
||||
"block. Designed for a download -> edit body -> import_page_markdown " +
|
||||
"round-trip that preserves everything, including comment highlights. " +
|
||||
"Comment THREADS are preserved in the file but are not re-pushed to the " +
|
||||
"server on import.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
const md = await docmostClient.exportPageMarkdown(pageId);
|
||||
return { content: [{ type: "text", text: md }] };
|
||||
});
|
||||
// Tool: import_page_markdown
|
||||
registerShared(SHARED_TOOL_SPECS.importPageMarkdown, async ({ pageId, markdown }) => {
|
||||
const res = await docmostClient.importPageMarkdown(pageId, markdown);
|
||||
return jsonContent(res);
|
||||
});
|
||||
// Tool: copy_page_content
|
||||
registerShared(SHARED_TOOL_SPECS.copyPageContent, async ({ sourcePageId, targetPageId }) => {
|
||||
const result = await docmostClient.copyPageContent(sourcePageId, targetPageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: rename_page
|
||||
server.registerTool("rename_page", {
|
||||
description: "Rename a page (change its title only) without touching or resending " +
|
||||
"its content.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1).describe("ID of the page to rename"),
|
||||
title: z.string().min(1).describe("New title"),
|
||||
},
|
||||
}, async ({ pageId, title }) => {
|
||||
const result = await docmostClient.renamePage(pageId, title);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: edit_page_text
|
||||
registerShared(SHARED_TOOL_SPECS.editPageText, async ({ pageId, edits }) => {
|
||||
const result = await docmostClient.editPageText(pageId, edits);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: stash_page — returns a resource_link (NOT embedded text) so the doc
|
||||
// body never enters the model context. Registered directly (not via
|
||||
// registerShared) because that helper only emits text content. Also returns
|
||||
// `structuredContent` carrying the full documented `{uri, sha256, size, images}`
|
||||
// shape alongside the resource_link, so MCP clients receive the blob's sha256
|
||||
// (its ETag, for integrity) and mirror counts, not just the link.
|
||||
server.registerTool(SHARED_TOOL_SPECS.stashPage.mcpName, {
|
||||
description: SHARED_TOOL_SPECS.stashPage.description,
|
||||
inputSchema: SHARED_TOOL_SPECS.stashPage.buildShape(z),
|
||||
}, async ({ pageId }) => {
|
||||
const result = await docmostClient.stashPage(pageId);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "resource_link",
|
||||
uri: result.uri,
|
||||
name: "page.json",
|
||||
mimeType: "application/json",
|
||||
size: result.size,
|
||||
},
|
||||
],
|
||||
// Mirror the full documented result shape ({ uri, size, sha256, images })
|
||||
// as structuredContent so MCP clients get the blob's sha256 (its ETag, for
|
||||
// integrity) and the mirror counts, not just the resource_link.
|
||||
structuredContent: {
|
||||
uri: result.uri,
|
||||
sha256: result.sha256,
|
||||
size: result.size,
|
||||
images: result.images,
|
||||
},
|
||||
};
|
||||
});
|
||||
// Tool: patch_node — schema + description from the shared registry (identical
|
||||
// across both transports). The execute body keeps its own parseNodeArg
|
||||
// normalization (the model sometimes serializes `node` as a JSON string).
|
||||
registerShared(SHARED_TOOL_SPECS.patchNode, async ({ pageId, nodeId, node }) => {
|
||||
const parsedNode = parseNodeArg(node);
|
||||
const result = await docmostClient.patchNode(pageId, nodeId, parsedNode);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: insert_node — schema + description from the shared registry. As with
|
||||
// patch_node, the execute body retains parseNodeArg on the incoming node.
|
||||
registerShared(SHARED_TOOL_SPECS.insertNode, async ({ pageId, node, position, anchorNodeId, anchorText }) => {
|
||||
const parsedNode = parseNodeArg(node);
|
||||
const result = await docmostClient.insertNode(pageId, parsedNode, {
|
||||
position,
|
||||
anchorNodeId,
|
||||
anchorText,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: delete_node
|
||||
registerShared(SHARED_TOOL_SPECS.deleteNode, async ({ pageId, nodeId }) => {
|
||||
const result = await docmostClient.deleteNode(pageId, nodeId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: insert_image
|
||||
server.registerTool("insert_image", {
|
||||
description: "Download an image from a web (http/https) URL and insert it into " +
|
||||
"a page in one step. By default " +
|
||||
"appends the image at the end of the page. With replaceText, replaces the " +
|
||||
"first top-level block whose text contains that string (handy for " +
|
||||
'swapping a text placeholder like "[image: foo.png]" for the real image). ' +
|
||||
"With afterText, inserts the image right after the first block containing " +
|
||||
"that string. Preserves all other block ids.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("http(s) URL of the image to download and upload"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
replaceText: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Replace the first top-level block whose text contains this string with the image"),
|
||||
afterText: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Insert the image right after the first top-level block whose text contains this string"),
|
||||
},
|
||||
}, async ({ pageId, imageUrl, align, alt, replaceText, afterText }) => {
|
||||
const result = await docmostClient.insertImage(pageId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
replaceText,
|
||||
afterText,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: replace_image
|
||||
server.registerTool("replace_image", {
|
||||
description: "Replace an existing image on a page with a new image fetched from a web " +
|
||||
"(http/https) URL: uploads the new file as a NEW " +
|
||||
"attachment (fresh clean URL that renders and busts browser caches), then " +
|
||||
"repoints every image node referencing the old attachmentId (recursively, " +
|
||||
"incl. callouts/tables) via the live document, preserving comments, " +
|
||||
"alignment and alt. The old attachment is left as an unreferenced orphan " +
|
||||
"(Docmost has no API to delete a single attachment; it is removed only when " +
|
||||
"the page/space is deleted). In-place byte overwrite is avoided because some " +
|
||||
"Docmost versions corrupt the attachment (HTTP 500) on overwrite.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
attachmentId: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("attachmentId of the image currently in the page to replace"),
|
||||
imageUrl: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("http(s) URL of the new image to download"),
|
||||
align: z.enum(["left", "center", "right"]).optional(),
|
||||
alt: z.string().optional(),
|
||||
},
|
||||
}, async ({ pageId, attachmentId, imageUrl, align, alt }) => {
|
||||
const result = await docmostClient.replaceImage(pageId, attachmentId, imageUrl, {
|
||||
align,
|
||||
alt,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: share_page
|
||||
// INTENTIONAL per-transport divergence (not shared): the in-app copy adds a
|
||||
// security-confirmation framing ("only share when the user explicitly asked,
|
||||
// since this exposes the page to anyone with the link") tuned for the in-app
|
||||
// agent; this transport keeps the plain public-URL wording.
|
||||
server.registerTool("share_page", {
|
||||
description: "Make a page publicly accessible (idempotent) and return its public " +
|
||||
"URL. The URL format is <app>/share/<key>/p/<slugId>. This exposes the " +
|
||||
"page content to ANYONE with the URL — do it only when explicitly asked.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1).describe("ID of the page to share"),
|
||||
searchIndexing: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe("Allow search engines to index the page (default true)"),
|
||||
},
|
||||
}, async ({ pageId, searchIndexing }) => {
|
||||
const result = await docmostClient.sharePage(pageId, searchIndexing ?? true);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: unshare_page
|
||||
registerShared(SHARED_TOOL_SPECS.unsharePage, async ({ pageId }) => {
|
||||
const result = await docmostClient.unsharePage(pageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: list_shares
|
||||
registerShared(SHARED_TOOL_SPECS.listShares, async () => {
|
||||
const result = await docmostClient.listShares();
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: move_page
|
||||
server.registerTool("move_page", {
|
||||
description: "Move a page under a new parent (nesting) or to the space root.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
parentPageId: z
|
||||
.string()
|
||||
.nullable()
|
||||
.optional()
|
||||
.describe("Target parent page ID. Pass 'null' or empty string to move to root."),
|
||||
position: z
|
||||
.string()
|
||||
.min(5)
|
||||
.optional()
|
||||
.describe("fractional-index position key; min 5 chars; omit to append at the end."),
|
||||
},
|
||||
}, async ({ pageId, parentPageId, position }) => {
|
||||
const finalParentId = parentPageId === "" || parentPageId === "null" ? null : parentPageId;
|
||||
// Cheap cycle guard: a page cannot be moved directly under itself.
|
||||
// (Deeper descendant-cycle detection is intentionally out of scope.)
|
||||
if (finalParentId !== null && finalParentId === pageId) {
|
||||
throw new Error("cannot move a page under itself");
|
||||
}
|
||||
const result = await docmostClient.movePage(pageId, finalParentId || null, position);
|
||||
// Require POSITIVE confirmation: the live /pages/move success shape is
|
||||
// exactly { success: true, status: 200 }. An empty body, a 204, or any odd
|
||||
// shape lacking success === true must NOT be reported as a successful move,
|
||||
// so we surface the raw API result instead of declaring success.
|
||||
if (!(result && typeof result === "object" && result.success === true)) {
|
||||
throw new Error(`Failed to move page ${pageId}: ${JSON.stringify(result)}`);
|
||||
}
|
||||
return jsonContent({
|
||||
message: `Successfully moved page ${pageId} to parent ${finalParentId || "root"}`,
|
||||
result,
|
||||
});
|
||||
});
|
||||
// Tool: delete_page
|
||||
server.registerTool("delete_page", {
|
||||
description: "Delete a single page by ID. SOFT delete only: the page is moved to " +
|
||||
"trash and can be restored; nothing is permanently deleted.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
await docmostClient.deletePage(pageId);
|
||||
return {
|
||||
content: [
|
||||
{ type: "text", text: `Successfully deleted page ${pageId}` },
|
||||
],
|
||||
};
|
||||
});
|
||||
// --- Comment tools (ported from upstream PR #3 by Max Nikitin) ---
|
||||
// Tool: list_comments
|
||||
server.registerTool("list_comments", {
|
||||
description: "List ALL comments on a page in one call (pagination is handled " +
|
||||
"internally), including RESOLVED threads — filter by resolvedAt when you " +
|
||||
"need only open ones. Content is returned as Markdown.",
|
||||
inputSchema: {
|
||||
pageId: z.string().describe("ID of the page"),
|
||||
},
|
||||
}, async ({ pageId }) => {
|
||||
const comments = await docmostClient.listComments(pageId);
|
||||
return jsonContent(comments);
|
||||
});
|
||||
// Tool: create_comment
|
||||
// INTENTIONAL per-transport divergence (not shared): the in-app copy tunes the
|
||||
// guidance for the in-app agent (e.g. "retry with a corrected EXACT selection"
|
||||
// and "Reversible via the comment UI"); this transport keeps its own wording.
|
||||
server.registerTool("create_comment", {
|
||||
description: "Create a new comment on a page. The comment is ALWAYS inline and is " +
|
||||
"anchored to (highlights) its `selection` text — there are no page-level " +
|
||||
"comments. Content is provided as Markdown and automatically converted. " +
|
||||
"A top-level comment REQUIRES an exact `selection`; if the selection " +
|
||||
"cannot be found in the page the call fails (no orphan comment is left). " +
|
||||
"Replies (parentCommentId set) inherit the parent's anchor and take no " +
|
||||
"selection. You may also attach a `suggestedText` proposing a replacement " +
|
||||
"for the `selection`; a human applies (or rejects) it from the UI. When " +
|
||||
"`suggestedText` is set the `selection` MUST occur exactly once in the " +
|
||||
"page — expand it with surrounding context if it is ambiguous.",
|
||||
inputSchema: {
|
||||
pageId: z.string().describe("ID of the page to comment on"),
|
||||
content: z.string().min(1).describe("Comment content in Markdown format"),
|
||||
selection: z
|
||||
.string()
|
||||
.min(1)
|
||||
// Enforce the documented 250-char cap to match the description above.
|
||||
.max(250)
|
||||
.optional()
|
||||
.describe("EXACT contiguous text from a single paragraph/block to anchor the " +
|
||||
"comment on (<=250 chars). Required for a top-level comment; omit " +
|
||||
"only when replying via parentCommentId."),
|
||||
parentCommentId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Parent comment ID to create a reply (max 2 nesting levels)"),
|
||||
suggestedText: z
|
||||
.string()
|
||||
.min(1)
|
||||
.max(2000)
|
||||
.optional()
|
||||
.describe("Optional proposed replacement (PLAIN TEXT) for the `selection`, " +
|
||||
"applied by a human via the UI (never auto-applied). REQUIRES a " +
|
||||
"`selection`; NOT allowed on a reply. When set, the `selection` must " +
|
||||
"be UNIQUE in the page — expand it with surrounding context (still " +
|
||||
"<=250 chars) if it occurs more than once, or the call is refused."),
|
||||
},
|
||||
}, async ({ pageId, content, selection, parentCommentId, suggestedText }) => {
|
||||
if (!parentCommentId && (!selection || !selection.trim())) {
|
||||
throw new Error("create_comment: a 'selection' (exact text to anchor on) is required for a top-level comment; omit it only when replying via parentCommentId.");
|
||||
}
|
||||
if (suggestedText !== undefined) {
|
||||
if (parentCommentId) {
|
||||
throw new Error("create_comment: 'suggestedText' cannot be attached to a reply; it applies only to a top-level inline comment.");
|
||||
}
|
||||
if (!selection || !selection.trim()) {
|
||||
throw new Error("create_comment: 'suggestedText' requires a 'selection' to anchor and rewrite.");
|
||||
}
|
||||
}
|
||||
const result = await docmostClient.createComment(pageId, content, "inline", selection, parentCommentId, suggestedText);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: update_comment
|
||||
server.registerTool("update_comment", {
|
||||
description: "Update an existing comment's content. Only the comment creator can " +
|
||||
"update it. Content is provided as Markdown.",
|
||||
inputSchema: {
|
||||
commentId: z.string().min(1).describe("ID of the comment to update"),
|
||||
content: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("New comment content in Markdown format"),
|
||||
},
|
||||
}, async ({ commentId, content }) => {
|
||||
const result = await docmostClient.updateComment(commentId, content);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: delete_comment
|
||||
server.registerTool("delete_comment", {
|
||||
description: "Delete a comment. Only the comment creator or space admin can delete it.",
|
||||
inputSchema: {
|
||||
commentId: z.string().min(1).describe("ID of the comment to delete"),
|
||||
},
|
||||
}, async ({ commentId }) => {
|
||||
await docmostClient.deleteComment(commentId);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Successfully deleted comment ${commentId}`,
|
||||
},
|
||||
],
|
||||
};
|
||||
});
|
||||
// Tool: resolve_comment
|
||||
server.registerTool("resolve_comment", {
|
||||
description: "Resolve (close) or reopen a comment thread. Only top-level comments can " +
|
||||
"be resolved — the server rejects resolving a reply. Reversible: pass " +
|
||||
"resolved=false to reopen. Resolving keeps the thread and its replies " +
|
||||
"(unlike delete_comment, which permanently removes them).",
|
||||
inputSchema: {
|
||||
commentId: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("ID of the top-level comment thread to resolve or reopen"),
|
||||
resolved: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(true)
|
||||
.describe("true (default) marks the thread resolved/closed; false reopens it"),
|
||||
},
|
||||
}, async ({ commentId, resolved }) => {
|
||||
const result = await docmostClient.resolveComment(commentId, resolved);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: check_new_comments
|
||||
server.registerTool("check_new_comments", {
|
||||
description: "Check for new comments across pages in a space since a given timestamp. " +
|
||||
"Optionally scope to a page subtree (folder). Returns only comments " +
|
||||
"created after the specified time.",
|
||||
inputSchema: {
|
||||
spaceId: z.string().describe("Space ID to check for new comments"),
|
||||
since: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("ISO 8601 timestamp — only return comments created after this time (e.g. '2026-03-10T00:00:00Z')"),
|
||||
parentPageId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Optional root page ID to scope the check to a subtree (folder). " +
|
||||
"Only pages under this parent will be checked."),
|
||||
},
|
||||
}, async ({ spaceId, since, parentPageId }) => {
|
||||
// Reject an unparseable timestamp up front: otherwise the comparison
|
||||
// against NaN silently treats every comment as "not new" and the tool
|
||||
// returns zero results without signalling the bad input.
|
||||
if (Number.isNaN(Date.parse(since))) {
|
||||
throw new Error(`Invalid 'since' timestamp: ${JSON.stringify(since)} — expected an ISO 8601 date (e.g. '2026-03-10T00:00:00Z')`);
|
||||
}
|
||||
const result = await docmostClient.checkNewComments(spaceId, since, parentPageId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: search
|
||||
// INTENTIONAL per-transport divergence (not shared): the in-app `searchPages`
|
||||
// runs a semantic + keyword hybrid (RRF) with in-process access control and a
|
||||
// different schema (limit 1-20); this transport is a plain REST full-text search
|
||||
// (limit up to 100). Different behaviour AND schema, so kept per-layer.
|
||||
server.registerTool("search", {
|
||||
description: "Full-text search for pages and content across the whole workspace. " +
|
||||
"Results are bounded by `limit` (1-100; when omitted the server applies " +
|
||||
"its own default).",
|
||||
inputSchema: {
|
||||
query: z.string().min(1).describe("Search query"),
|
||||
limit: z
|
||||
.number()
|
||||
.int()
|
||||
.min(1)
|
||||
.max(100)
|
||||
.optional()
|
||||
.describe("Max results to return (max 100)"),
|
||||
},
|
||||
}, async ({ query, limit }) => {
|
||||
// The tool exposes no spaceId filter, so pass undefined for the client's
|
||||
// optional spaceId parameter and forward limit into its correct slot.
|
||||
const result = await docmostClient.search(query, undefined, limit);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: docmost_transform
|
||||
// INTENTIONAL per-transport divergence (not shared): the in-app `transformPage`
|
||||
// deliberately omits the `deleteComments` schema field (comment-deletion
|
||||
// guardrail) and carries a much shorter description; this transport exposes the
|
||||
// full helper catalogue. Different schema, so kept per-layer.
|
||||
server.registerTool("docmost_transform", {
|
||||
description: "Edit a page by running an arbitrary JS transform `(doc, ctx) => doc` " +
|
||||
"against its LIVE ProseMirror document, with a diff preview and page " +
|
||||
"history as the safety net. By default dryRun=true: returns a diff " +
|
||||
"preview WITHOUT writing. Set dryRun=false to apply (atomic, won't " +
|
||||
"clobber concurrent edits). `doc` is the lossless ProseMirror document " +
|
||||
"({type:'doc',content:[...]}); return a new doc of the same shape. " +
|
||||
"`ctx` gives you: comments (the page's comments, each {id, content " +
|
||||
"(markdown), selection, type}); log (array; console.log pushes to it); " +
|
||||
"consume(id) (mark a comment id as consumed — those are deleted when " +
|
||||
"deleteComments=true after a successful apply); and helpers: " +
|
||||
"blockText(node) (plain text), walk(node, fn) (depth-first over all " +
|
||||
"nodes incl. callouts/tables/lists), getList(doc, predicate) (find a " +
|
||||
"node even without attrs.id), insertMarkerAfter(doc, anchor, marker, " +
|
||||
"{beforeBlock}) (insert a plain unmarked text run after anchor, " +
|
||||
"mark-safe), setCalloutRange(doc, n) (sync a [1]…[K] callout range to " +
|
||||
"[1]…[n]), noteItem(inlineNodes) (wrap inline nodes in a listItem with a " +
|
||||
"fresh id), mdToInlineNodes(markdown) (comment markdown -> inline nodes), " +
|
||||
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
|
||||
"footnote numbering + the single bottom list from reference order, drop " +
|
||||
"orphans/duplicates — runs AUTOMATICALLY on the transform RESULT, so the " +
|
||||
"applied (and dryRun-previewed) doc is always footnote-canonical; a dryRun " +
|
||||
"diff may therefore show footnote tidy-ups your script did not make, and " +
|
||||
"it is idempotent after the first run), and " +
|
||||
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
|
||||
"marker + dedup'd definition, list derived). Footnote convention: markers are " +
|
||||
"plain '[N]' text in the body; the notes are an orderedList under a " +
|
||||
"heading whose text is 'Примечания переводчика' (that is only the DEFAULT " +
|
||||
"notesHeading — pass the notesHeading option to the helpers to use a " +
|
||||
"heading matching the page's language). The transform runs " +
|
||||
"sandboxed (no require/process/fs/network, 5s timeout) and must return a " +
|
||||
"{type:'doc'} node.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
transformJs: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("A JS function `(doc, ctx) => doc` (expression-arrow or " +
|
||||
"parenthesized function). It receives a clone of the live doc and " +
|
||||
"ctx (comments, log, consume(id), helpers: blockText/walk/getList/" +
|
||||
"insertMarkerAfter/setCalloutRange/noteItem/mdToInlineNodes/" +
|
||||
"commentsToFootnotes/canonicalizeFootnotes/insertInlineFootnote) " +
|
||||
"and must return a {type:'doc'} node."),
|
||||
dryRun: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(true)
|
||||
.describe("Preview only (no write) when true (default)."),
|
||||
deleteComments: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.default(false)
|
||||
.describe("After a successful apply, delete every comment id passed to " +
|
||||
"ctx.consume(id)."),
|
||||
},
|
||||
}, async ({ pageId, transformJs, dryRun, deleteComments }) => {
|
||||
const result = await docmostClient.transformPage(pageId, transformJs, {
|
||||
dryRun,
|
||||
deleteComments,
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: insert_footnote
|
||||
server.registerTool("insert_footnote", {
|
||||
description: "Insert an AUTHOR-INLINE footnote: you specify only WHERE (anchorText) " +
|
||||
"and WHAT (text). The footnote marker is placed right after anchorText in " +
|
||||
"the body, and the bottom footnotes list + the numbering are derived " +
|
||||
"deterministically server-side. You do NOT assign a number, and you " +
|
||||
"never see or edit the footnotes list — so footnotes cannot end up out " +
|
||||
"of order, orphaned, or as a raw '[^id]' block. If a footnote with the " +
|
||||
"SAME text already exists, its number is REUSED (one definition, several " +
|
||||
"references). The write is atomic and won't clobber concurrent edits; if " +
|
||||
"anchorText is not found, nothing is written and an error is returned.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
anchorText: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("A snippet of existing body text; the footnote marker is inserted " +
|
||||
"immediately after its first occurrence (mark-safe)."),
|
||||
text: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("The footnote content as markdown (becomes the definition)."),
|
||||
},
|
||||
}, async ({ pageId, anchorText, text }) => {
|
||||
const result = await docmostClient.insertFootnote(pageId, anchorText, text);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: diff_page_versions
|
||||
registerShared(SHARED_TOOL_SPECS.diffPageVersions, async ({ pageId, from, to }) => {
|
||||
const result = await docmostClient.diffPageVersions(pageId, from, to);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: list_page_history
|
||||
registerShared(SHARED_TOOL_SPECS.listPageHistory, async ({ pageId, cursor }) => {
|
||||
const result = await docmostClient.listPageHistory(pageId, cursor);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: restore_page_version
|
||||
registerShared(SHARED_TOOL_SPECS.restorePageVersion, async ({ historyId }) => {
|
||||
const result = await docmostClient.restorePageVersion(historyId);
|
||||
return jsonContent(result);
|
||||
});
|
||||
return server;
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
import axios from "axios";
|
||||
export async function getCollabToken(baseUrl, apiToken) {
|
||||
try {
|
||||
const response = await axios.post(`${baseUrl}/auth/collab-token`, {}, {
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
});
|
||||
// console.error('Collab Token Response:', response.data);
|
||||
// Response is wrapped in { data: { token: ... } }
|
||||
return response.data.data?.token || response.data.token;
|
||||
}
|
||||
catch (error) {
|
||||
if (axios.isAxiosError(error)) {
|
||||
// Attach the HTTP status to the plain Error so callers (e.g.
|
||||
// getCollabTokenWithReauth) can still detect a 401/403 after the
|
||||
// original AxiosError has been wrapped away.
|
||||
// Avoid leaking the full server response body by default; include only
|
||||
// status + statusText. Append the body only when DEBUG is set.
|
||||
let message = `Failed to get collab token: ${error.response?.status} ${error.response?.statusText}`;
|
||||
if (process.env.DEBUG) {
|
||||
message += ` - ${JSON.stringify(error.response?.data)}`;
|
||||
}
|
||||
const err = new Error(message);
|
||||
err.status = error.response?.status;
|
||||
throw err;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Pure cookie-parsing helper extracted from `performLogin` so the parsing logic
|
||||
* can be unit-tested without performing the login network request. Given the
|
||||
* raw `Set-Cookie` header array from the login response, return the `authToken`
|
||||
* cookie's value.
|
||||
*
|
||||
* Behavior (kept identical to the original inline logic):
|
||||
* - throws if there is no Set-Cookie header at all;
|
||||
* - matches the cookie NAME exactly (`authToken`), so a future
|
||||
* `authTokenRefresh=...` cookie is NOT picked up (a `startsWith` would be);
|
||||
* - returns everything after the FIRST `=` up to the first `;`, so a base64
|
||||
* value containing `=` padding is preserved (a naive `split("=")` would
|
||||
* truncate it);
|
||||
* - cookie attributes after the first `;` (Path, HttpOnly, Expires, …) are
|
||||
* ignored;
|
||||
* - throws if no `authToken` cookie is present.
|
||||
*/
|
||||
export function extractAuthTokenFromSetCookie(cookies) {
|
||||
if (!cookies) {
|
||||
throw new Error("No Set-Cookie header found in login response");
|
||||
}
|
||||
// Match the cookie name exactly to avoid matching a future
|
||||
// authTokenRefresh cookie (startsWith would catch it).
|
||||
const authCookie = cookies.find((c) => {
|
||||
const kv = c.split(";")[0];
|
||||
return kv.slice(0, kv.indexOf("=")) === "authToken";
|
||||
});
|
||||
if (!authCookie) {
|
||||
throw new Error("No authToken cookie found in login response");
|
||||
}
|
||||
// Take everything after the FIRST "=" up to the first ";".
|
||||
// Splitting on "=" would truncate base64 values containing "=" padding.
|
||||
const kv = authCookie.split(";")[0];
|
||||
return kv.slice(kv.indexOf("=") + 1);
|
||||
}
|
||||
export async function performLogin(baseUrl, email, password) {
|
||||
try {
|
||||
const response = await axios.post(`${baseUrl}/auth/login`, {
|
||||
email,
|
||||
password,
|
||||
});
|
||||
// Extract token from Set-Cookie header
|
||||
return extractAuthTokenFromSetCookie(response.headers["set-cookie"]);
|
||||
}
|
||||
catch (error) {
|
||||
// Avoid leaking the full server response body by default; log only the
|
||||
// HTTP status. Log the verbose body only when DEBUG is set.
|
||||
if (axios.isAxiosError(error)) {
|
||||
if (process.env.DEBUG) {
|
||||
console.error("Login failed:", error.response?.data);
|
||||
}
|
||||
else {
|
||||
console.error("Login failed:", error.response?.status);
|
||||
}
|
||||
}
|
||||
else {
|
||||
console.error("Login failed:", error.message);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@@ -1,743 +0,0 @@
|
||||
import { HocuspocusProvider } from "@hocuspocus/provider";
|
||||
import { TiptapTransformer } from "@hocuspocus/transformer";
|
||||
import * as Y from "yjs";
|
||||
import WebSocket from "ws";
|
||||
import { marked } from "marked";
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { Node as PMNode } from "@tiptap/pm/model";
|
||||
import { updateYFragment } from "y-prosemirror";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
import { summarizeChange } from "./diff.js";
|
||||
/**
|
||||
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
||||
* content type"), shared by both encode paths (`buildYDoc` -> `toYdoc` and
|
||||
* `applyDocToFragment` -> `updateYFragment`) so the message wording stays in one
|
||||
* place. `label` names the stage that failed (diagnostic). `sanitizeForYjs`
|
||||
* already stripped `undefined` attrs, so a remaining failure is pinpointed via
|
||||
* `findUnstorableAttr`.
|
||||
*/
|
||||
function unstorableYjsError(safe, label, e) {
|
||||
const bad = findUnstorableAttr(safe);
|
||||
return new Error(`Failed to encode document to Yjs (${label}): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`);
|
||||
}
|
||||
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||
global.window = dom.window;
|
||||
global.document = dom.window.document;
|
||||
// @ts-ignore
|
||||
global.Element = dom.window.Element;
|
||||
// @ts-ignore
|
||||
global.WebSocket = WebSocket;
|
||||
// Navigator is read-only in newer Node versions and already exists
|
||||
// global.navigator = dom.window.navigator;
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown) {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines) => {
|
||||
const out = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0)
|
||||
break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
return out.join("\n");
|
||||
};
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html) {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li) => {
|
||||
const found = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox") {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter((child) => child.tagName === "LI");
|
||||
if (items.length === 0)
|
||||
continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0))
|
||||
continue;
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked = input != null &&
|
||||
(input.hasAttribute("checked") || input.checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||
// definition lines are collected into a single <section data-footnotes>.
|
||||
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||
function escapeFootnoteAttr(value) {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
}
|
||||
const footnoteRefMarkedExtension = {
|
||||
name: "footnoteRef",
|
||||
level: "inline",
|
||||
start(src) {
|
||||
return src.match(/\[\^/)?.index ?? -1;
|
||||
},
|
||||
tokenizer(src) {
|
||||
const match = FOOTNOTE_REF_RE.exec(src);
|
||||
if (match && match.index === 0) {
|
||||
return { type: "footnoteRef", raw: match[0], id: match[1] };
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
renderer(token) {
|
||||
return `<sup data-footnote-ref data-id="${escapeFootnoteAttr(token.id)}"></sup>`;
|
||||
},
|
||||
};
|
||||
marked.use({ extensions: [footnoteRefMarkedExtension] });
|
||||
/**
|
||||
* Pull `[^id]: text` definition lines out of the body and render a single
|
||||
* <section data-footnotes> for them (or "" when there are none).
|
||||
*/
|
||||
function extractFootnotes(markdown) {
|
||||
const bodyLines = [];
|
||||
const defs = [];
|
||||
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||
// block is inert and stays in the body verbatim; only real definition lines
|
||||
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||
// match exactly what import keeps/strips (#166).
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (!tok.inFence && tok.definition)
|
||||
defs.push(tok.definition);
|
||||
else
|
||||
bodyLines.push(tok.line);
|
||||
}
|
||||
if (defs.length === 0)
|
||||
return { body: markdown, section: "" };
|
||||
// Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
|
||||
// editor-ext extractFootnoteDefinitions). Reference markers are left untouched
|
||||
// so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
|
||||
// #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
|
||||
// (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
|
||||
// editor-ext mirror.
|
||||
const firstById = new Map(); // id -> first definition text
|
||||
for (const def of defs) {
|
||||
if (!firstById.has(def.id))
|
||||
firstById.set(def.id, def.text);
|
||||
}
|
||||
const inner = [...firstById.entries()]
|
||||
.map(([id, text]) => `<div data-footnote-def data-id="${escapeFootnoteAttr(id)}"><p>${marked.parseInline(text || "")}</p></div>`)
|
||||
.join("");
|
||||
return {
|
||||
body: bodyLines.join("\n"),
|
||||
section: `<section data-footnotes>${inner}</section>`,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Convert markdown to a ProseMirror doc using the full Docmost schema.
|
||||
*
|
||||
* This conversion does NOT canonicalize footnotes — it is the shared, content-
|
||||
* preserving primitive used by BOTH page write paths and COMMENT bodies
|
||||
* (createComment / updateComment). Canonicalization MUST NOT run on a comment
|
||||
* body: a comment may legitimately contain a footnote-definition line
|
||||
* (`[^1]: text`) with no matching reference, and the canonicalizer drops a
|
||||
* reference-less footnotesList — which would silently delete the comment's text.
|
||||
*
|
||||
* Page write paths that DO need the canonical footnote topology call
|
||||
* `markdownToProseMirrorCanonical` instead (markdown import, update_page markdown
|
||||
* path). Keep this function reference-loss-free.
|
||||
*/
|
||||
export async function markdownToProseMirror(markdownContent) {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const { body, section } = extractFootnotes(withCallouts);
|
||||
const html = (await marked.parse(body)) + section;
|
||||
const bridged = bridgeTaskLists(html);
|
||||
return generateJSON(bridged, docmostExtensions);
|
||||
}
|
||||
/**
|
||||
* Page-write variant of `markdownToProseMirror`: converts markdown then enforces
|
||||
* the canonical footnote topology. The footnote `section` markdown is emitted in
|
||||
* DEFINITION order, but numbering derives from REFERENCE order, so without this
|
||||
* the bottom list renders out of order (`1, 4, 2, 3, …`); orphan definitions and
|
||||
* duplicate lists are also normalized. Idempotent — a no-op once canonical, and a
|
||||
* no-op for footnote-free content.
|
||||
*
|
||||
* Use this ONLY for full-document PAGE writes (never for comment bodies, where it
|
||||
* would drop a reference-less footnote definition — see `markdownToProseMirror`).
|
||||
*/
|
||||
export async function markdownToProseMirrorCanonical(markdownContent) {
|
||||
return canonicalizeFootnotes(await markdownToProseMirror(markdownContent));
|
||||
}
|
||||
/**
|
||||
* Build the collaboration WebSocket URL from an API base URL:
|
||||
* switch http(s)->ws(s), strip a trailing /api, mount on /collab.
|
||||
* Shared by the live read and the mutate path so both target the same socket.
|
||||
*/
|
||||
export function buildCollabWsUrl(baseUrl) {
|
||||
let wsUrl = baseUrl.replace(/^http/, "ws");
|
||||
try {
|
||||
const urlObj = new URL(wsUrl);
|
||||
if (urlObj.pathname.endsWith("/api") || urlObj.pathname.endsWith("/api/")) {
|
||||
urlObj.pathname = urlObj.pathname.replace(/\/api\/?$/, "");
|
||||
}
|
||||
urlObj.pathname = urlObj.pathname.replace(/\/$/, "") + "/collab";
|
||||
// Drop any query/hash from the base URL so it is not carried into the
|
||||
// collaboration ws URL.
|
||||
urlObj.search = "";
|
||||
urlObj.hash = "";
|
||||
wsUrl = urlObj.toString();
|
||||
}
|
||||
catch (e) {
|
||||
// Fallback if URL parsing fails
|
||||
if (!wsUrl.endsWith("/collab")) {
|
||||
wsUrl = wsUrl.replace(/\/$/, "") + "/collab";
|
||||
}
|
||||
}
|
||||
return wsUrl;
|
||||
}
|
||||
/**
|
||||
* Encode a ProseMirror doc to a Yjs document, sanitizing it first and turning
|
||||
* the opaque yjs "Unexpected content type" failure into a descriptive error.
|
||||
*
|
||||
* `sanitizeForYjs` strips `undefined` node/mark attributes (the common cause of
|
||||
* the failure); if `toYdoc` still throws, `findUnstorableAttr` is used to point
|
||||
* at the offending attribute path.
|
||||
*/
|
||||
export function buildYDoc(doc) {
|
||||
const safe = sanitizeForYjs(doc);
|
||||
try {
|
||||
return TiptapTransformer.toYdoc(safe, "default", docmostExtensions);
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "toYdoc", e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Write a new ProseMirror doc into the live Yjs fragment by STRUCTURAL DIFF,
|
||||
* preserving the Yjs identity of unchanged nodes (issue #152).
|
||||
*
|
||||
* The previous approach deleted the whole fragment and re-applied a fresh Y.Doc,
|
||||
* which discarded every Yjs node id. y-prosemirror anchors the editor selection
|
||||
* to those ids, so an open editor's cursor lost its anchor and snapped to the
|
||||
* end of the document on every agent write (most visibly on comment anchoring,
|
||||
* which changes no text at all). `updateYFragment` is exactly the routine the
|
||||
* editor itself uses to sync ProseMirror edits into Yjs: it diffs the new node
|
||||
* against the current fragment and touches only the changed children, so
|
||||
* unchanged nodes keep their ids and the live cursor stays put.
|
||||
*
|
||||
* Must run inside a single `transact` so the diff applies atomically (no remote
|
||||
* update interleaves). Keeps `buildYDoc`'s `findUnstorableAttr` diagnostic for
|
||||
* the opaque "Unexpected content type" encode failure.
|
||||
*/
|
||||
export function applyDocToFragment(ydoc, newDoc) {
|
||||
const safe = sanitizeForYjs(newDoc);
|
||||
const fragment = ydoc.getXmlFragment("default");
|
||||
// Hydrate the ProseMirror node in its OWN try so a failure here (e.g. an
|
||||
// unknown node type) is labelled "fromJSON" — the stage that actually threw —
|
||||
// instead of being misattributed to the Yjs write stage (#154 review).
|
||||
let pmNode;
|
||||
try {
|
||||
pmNode = PMNode.fromJSON(docmostSchema, safe);
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "fromJSON", e);
|
||||
}
|
||||
try {
|
||||
ydoc.transact(() => {
|
||||
updateYFragment(ydoc, fragment, pmNode, {
|
||||
mapping: new Map(),
|
||||
isOMark: new Map(),
|
||||
});
|
||||
});
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "updateYFragment", e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Run an independent Yjs-encodability check (the same `sanitizeForYjs` + schema
|
||||
* the apply path uses) and throw the same descriptive error when the doc cannot
|
||||
* be stored. Used by the dry-run preview.
|
||||
*
|
||||
* Note: it does NOT run `updateYFragment` against the live fragment, so it is an
|
||||
* encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc`
|
||||
* (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different
|
||||
* encoders that nonetheless reject the same unstorable attributes. To narrow the
|
||||
* preview/apply gap it ALSO rehearses the apply path's `PMNode.fromJSON`
|
||||
* hydration, so a doc that would only fail there (e.g. an unknown node type) is
|
||||
* rejected at preview time too (#154 review). Still cheap: no live fragment, no
|
||||
* `updateYFragment`.
|
||||
*/
|
||||
export function assertYjsEncodable(doc) {
|
||||
buildYDoc(doc);
|
||||
const safe = sanitizeForYjs(doc);
|
||||
try {
|
||||
PMNode.fromJSON(docmostSchema, safe);
|
||||
}
|
||||
catch (e) {
|
||||
throw unstorableYjsError(safe, "fromJSON", e);
|
||||
}
|
||||
}
|
||||
/** Time we wait for the initial handshake/sync before giving up. */
|
||||
const CONNECT_TIMEOUT_MS = 25000;
|
||||
/** Time we wait for the server to acknowledge our write before giving up. */
|
||||
const PERSIST_TIMEOUT_MS = 20000;
|
||||
/**
|
||||
* Safely mutate the live content of a page over the collaboration websocket.
|
||||
*
|
||||
* This is the single safe write path for every MCP content mutation. It:
|
||||
* 1. serializes per-page writes through withPageLock (no two MCP writes on
|
||||
* the same page overlap);
|
||||
* 2. connects to Hocuspocus and waits for the initial sync so the local ydoc
|
||||
* mirrors the authoritative server doc — INCLUDING edits/comments/images
|
||||
* that are not yet in the debounced REST snapshot;
|
||||
* 3. inside onSynced, SYNCHRONOUSLY reads the live doc, runs `transform`, and
|
||||
* writes the result back — with no `await` between read and write so no
|
||||
* remote update can interleave and clobber concurrent human edits;
|
||||
* 4. waits for the server to acknowledge the write (unsyncedChanges -> 0)
|
||||
* before resolving, so the next operation observes our change.
|
||||
*
|
||||
* `transform` receives the live ProseMirror doc and returns the NEW full
|
||||
* ProseMirror doc to write, or `null` to abort with no write (a no-op). If
|
||||
* `transform` throws, the error is propagated to the caller (not swallowed).
|
||||
*
|
||||
* Resolves a `MutationResult { doc, verify }`: `doc` is the doc that was
|
||||
* written (or the live doc when the transform aborted), and `verify` is a
|
||||
* verifiable change report (text/block/mark deltas) of what actually changed.
|
||||
* The report is computed AFTER the atomic read->write, so it never widens the
|
||||
* read->write window, and it never throws (it can NEVER break a write).
|
||||
*/
|
||||
export async function mutatePageContent(pageId, collabToken, baseUrl, transform) {
|
||||
return withPageLock(pageId, () => {
|
||||
if (process.env.DEBUG) {
|
||||
console.error(`Starting realtime content mutate for page ${pageId}`);
|
||||
// Token prefix is sensitive; only log it under DEBUG.
|
||||
console.error(`Token prefix: ${collabToken ? collabToken.substring(0, 5) : "NONE"}...`);
|
||||
}
|
||||
const ydoc = new Y.Doc();
|
||||
const wsUrl = buildCollabWsUrl(baseUrl);
|
||||
if (process.env.DEBUG)
|
||||
console.error(`Connecting to WebSocket: ${wsUrl}`);
|
||||
return new Promise((resolve, reject) => {
|
||||
let provider;
|
||||
let applied = false; // onSynced may fire again on reconnect — apply once.
|
||||
let settled = false;
|
||||
// Set true on disconnect/close so a reconnect-driven unsyncedChanges->0
|
||||
// cannot be mistaken for a successful persist of our write.
|
||||
let connectionLost = false;
|
||||
let connectTimer;
|
||||
let persistTimer;
|
||||
let unsyncedHandler;
|
||||
const cleanup = () => {
|
||||
if (connectTimer)
|
||||
clearTimeout(connectTimer);
|
||||
if (persistTimer)
|
||||
clearTimeout(persistTimer);
|
||||
if (provider) {
|
||||
if (unsyncedHandler) {
|
||||
try {
|
||||
provider.off("unsyncedChanges", unsyncedHandler);
|
||||
}
|
||||
catch (err) { }
|
||||
}
|
||||
try {
|
||||
provider.destroy();
|
||||
}
|
||||
catch (err) { }
|
||||
}
|
||||
};
|
||||
const finish = (err, value) => {
|
||||
if (settled)
|
||||
return;
|
||||
settled = true;
|
||||
cleanup();
|
||||
if (err)
|
||||
reject(err);
|
||||
else
|
||||
resolve(value);
|
||||
};
|
||||
connectTimer = setTimeout(() => {
|
||||
finish(new Error("Connection timeout to collaboration server"));
|
||||
}, CONNECT_TIMEOUT_MS);
|
||||
// Resolve once the server has acknowledged our update. The provider
|
||||
// increments unsyncedChanges when our local update is sent and
|
||||
// decrements it when the server replies with a SyncStatus(applied=true);
|
||||
// reaching 0 means the authoritative in-memory ydoc on the server now
|
||||
// contains our write.
|
||||
const waitForPersistence = () => {
|
||||
if (settled)
|
||||
return;
|
||||
// A missing provider is a failure, not a success: without it the write
|
||||
// can never have been acknowledged. Only an actual unsyncedChanges===0
|
||||
// on a live provider counts as persisted.
|
||||
if (!provider) {
|
||||
finish(new Error("collab provider gone before persistence"));
|
||||
return;
|
||||
}
|
||||
if (provider.unsyncedChanges === 0) {
|
||||
finish(null, mutationResult);
|
||||
return;
|
||||
}
|
||||
persistTimer = setTimeout(() => {
|
||||
finish(new Error("Timeout waiting for collaboration server to persist the update"));
|
||||
}, PERSIST_TIMEOUT_MS);
|
||||
unsyncedHandler = (data) => {
|
||||
// Only treat unsyncedChanges->0 as success when the connection is
|
||||
// still up. A transient disconnect + reconnect handshake can drive
|
||||
// the counter back to 0 without our write being re-transmitted; in
|
||||
// that case let the disconnect/close error win instead.
|
||||
if (data.number === 0 && !connectionLost) {
|
||||
finish(null, mutationResult);
|
||||
}
|
||||
};
|
||||
provider.on("unsyncedChanges", unsyncedHandler);
|
||||
};
|
||||
// The verifiable result resolved on every success/abort path. Set on
|
||||
// abort (no-op report) and after a real write (computed change report).
|
||||
let mutationResult;
|
||||
provider = new HocuspocusProvider({
|
||||
url: wsUrl,
|
||||
name: `page.${pageId}`,
|
||||
document: ydoc,
|
||||
token: collabToken,
|
||||
// @ts-ignore - Required for Node.js environment
|
||||
WebSocketPolyfill: WebSocket,
|
||||
onConnect: () => {
|
||||
if (process.env.DEBUG)
|
||||
console.error("WS Connect");
|
||||
},
|
||||
// An unexpected disconnect/close while we are still waiting (during the
|
||||
// connect-wait before onSynced, or during the persistence wait after the
|
||||
// write) means the update will never be acknowledged — surface it now
|
||||
// instead of hanging until the connect/persist timeout fires. `finish`
|
||||
// is idempotent via the `settled` flag, so the onClose that our own
|
||||
// cleanup()->provider.destroy() triggers (after settled=true is set) is
|
||||
// a harmless no-op and cannot cause a double-resolve.
|
||||
onDisconnect: () => {
|
||||
if (process.env.DEBUG)
|
||||
console.error("WS Disconnect");
|
||||
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
|
||||
// sees the connection as lost and won't report a false success.
|
||||
connectionLost = true;
|
||||
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
|
||||
},
|
||||
onClose: () => {
|
||||
if (process.env.DEBUG)
|
||||
console.error("WS Close");
|
||||
// Mark BEFORE finish so the unsyncedChanges handler (if it races)
|
||||
// sees the connection as lost and won't report a false success.
|
||||
connectionLost = true;
|
||||
finish(new Error("Collaboration connection closed before the update was persisted/synced"));
|
||||
},
|
||||
onSynced: () => {
|
||||
if (applied || settled)
|
||||
return;
|
||||
applied = true;
|
||||
if (process.env.DEBUG)
|
||||
console.error("Connected and synced!");
|
||||
// CRITICAL: everything between reading the live doc and writing it
|
||||
// back must stay synchronous (no await). While the JS event loop is
|
||||
// not yielded, no incoming remote update can interleave, so any
|
||||
// already-synced concurrent edits are preserved in liveDoc.
|
||||
let newDoc;
|
||||
let beforeDoc;
|
||||
try {
|
||||
let liveDoc = TiptapTransformer.fromYdoc(ydoc, "default");
|
||||
if (!liveDoc ||
|
||||
typeof liveDoc !== "object" ||
|
||||
!Array.isArray(liveDoc.content)) {
|
||||
liveDoc = { type: "doc", content: [] };
|
||||
}
|
||||
// Snapshot the before-doc for the change report. Docs are
|
||||
// JSON-serializable, so this is a safe deep clone.
|
||||
beforeDoc = JSON.parse(JSON.stringify(liveDoc));
|
||||
newDoc = transform(liveDoc);
|
||||
if (newDoc == null) {
|
||||
// Transform aborted — write nothing, return the live doc with a
|
||||
// no-op change report.
|
||||
mutationResult = {
|
||||
doc: liveDoc,
|
||||
verify: {
|
||||
changed: false,
|
||||
textInserted: 0,
|
||||
textDeleted: 0,
|
||||
blocksChanged: 0,
|
||||
marks: {},
|
||||
summary: "no changes (transform aborted)",
|
||||
},
|
||||
};
|
||||
finish(null, mutationResult);
|
||||
return;
|
||||
}
|
||||
// Structural diff into the live fragment (issue #152): preserves
|
||||
// the Yjs ids of unchanged nodes, so an open editor's cursor is not
|
||||
// yanked to the end of the document on every agent write.
|
||||
applyDocToFragment(ydoc, newDoc);
|
||||
}
|
||||
catch (e) {
|
||||
// Includes errors thrown by transform (e.g. "afterText not found",
|
||||
// "text not found"): propagate them verbatim to the caller.
|
||||
finish(e instanceof Error ? e : new Error(String(e)));
|
||||
return;
|
||||
}
|
||||
// Compute the verifiable change report AFTER the transact write: it
|
||||
// only needs the JSON before/after, so it cannot affect the atomic
|
||||
// read->write window, and summarizeChange never throws.
|
||||
mutationResult = {
|
||||
doc: newDoc,
|
||||
verify: summarizeChange(beforeDoc, newDoc),
|
||||
};
|
||||
if (process.env.DEBUG)
|
||||
console.error("Content written, waiting for server to persist...");
|
||||
waitForPersistence();
|
||||
},
|
||||
onAuthenticationFailed: () => {
|
||||
finish(new Error("Authentication failed for collaboration connection"));
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Replace the live content of a page over the collaboration websocket.
|
||||
* Accepts a ready ProseMirror JSON document; the caller controls whether
|
||||
* it was produced from markdown (ids regenerate) or edited in place
|
||||
* (existing block ids preserved).
|
||||
*
|
||||
* This is an intentional full replace (used by update_page / update_page_json),
|
||||
* but now runs under the per-page lock and waits for server persistence via
|
||||
* mutatePageContent.
|
||||
*/
|
||||
export async function replacePageContent(pageId, prosemirrorDoc, collabToken, baseUrl) {
|
||||
// Fail fast on a bad document instead of deferring the failure into the
|
||||
// collaboration write (where TiptapTransformer.toYdoc(undefined) used to
|
||||
// throw). The transform must return a valid ProseMirror doc.
|
||||
if (prosemirrorDoc == null ||
|
||||
typeof prosemirrorDoc !== "object" ||
|
||||
prosemirrorDoc.type !== "doc") {
|
||||
throw new Error("replacePageContent: invalid ProseMirror document");
|
||||
}
|
||||
return await mutatePageContent(pageId, collabToken, baseUrl, () => prosemirrorDoc);
|
||||
}
|
||||
/**
|
||||
* Markdown update path (kept for backwards compatibility).
|
||||
* NOTE: this re-imports the whole document — block ids are regenerated.
|
||||
* Tables and :::callout::: blocks survive thanks to the full schema.
|
||||
*/
|
||||
export async function updatePageContentRealtime(pageId, markdownContent, collabToken, baseUrl) {
|
||||
// PAGE write: canonicalize footnotes (markdown import builds the bottom list in
|
||||
// definition order; numbering is reference-ordered).
|
||||
const tiptapJson = await markdownToProseMirrorCanonical(markdownContent);
|
||||
return await mutatePageContent(pageId, collabToken, baseUrl, () => tiptapJson);
|
||||
}
|
||||
@@ -1,371 +0,0 @@
|
||||
/**
|
||||
* Inline-comment anchoring against a ProseMirror document.
|
||||
*
|
||||
* Docmost stores an inline comment's highlight as a `comment` MARK on the
|
||||
* document text (`{ type: "comment", attrs: { commentId, resolved } }`); the
|
||||
* `/comments/create` API only records the comment row + its `selection` text and
|
||||
* does NOT insert that mark, so the anchor has to be written into the page
|
||||
* content separately. This module finds where a selection lives in the document
|
||||
* and splices the comment mark across the matched range.
|
||||
*
|
||||
* Matching has to be robust because the agent supplies the selection as plain
|
||||
* text while the document stores rich inline content: a selection can span
|
||||
* several adjacent text nodes (inline code / bold / links each become their own
|
||||
* text node), and the document may use smart/typographic quotes, dash variants,
|
||||
* non-breaking spaces, or collapsed runs of whitespace that the agent typed as
|
||||
* ASCII quotes/hyphens/single spaces. We therefore normalize both sides before
|
||||
* comparing and match across maximal runs of consecutive text nodes within a
|
||||
* single block, while mapping every normalized character back to its raw index
|
||||
* so the mark lands on the exact original characters.
|
||||
*/
|
||||
/** Typographic double-quote variants mapped to ASCII `"`. */
|
||||
const DOUBLE_QUOTES = "«»„“”‟〝〞"";
|
||||
/** Typographic single-quote/apostrophe variants mapped to ASCII `'`. */
|
||||
const SINGLE_QUOTES = "‘’‚‛";
|
||||
/** Dash variants mapped to ASCII `-`. */
|
||||
const DASHES = "–—―−‐‑‒";
|
||||
/** Guard against pathological/cyclic documents in the depth-first walk. */
|
||||
const MAX_DEPTH = 200;
|
||||
/** The comment mark Docmost stores on anchored text. */
|
||||
function makeCommentMark(commentId) {
|
||||
// The comment mark schema declares both commentId and resolved; include
|
||||
// resolved:false for completeness so the stored mark matches the editor's.
|
||||
return { type: "comment", attrs: { commentId, resolved: false } };
|
||||
}
|
||||
/** True for any character we collapse/replace with a single normal space. */
|
||||
function isWhitespaceChar(ch) {
|
||||
// Regular ASCII whitespace plus the special spaces called out in the spec:
|
||||
// nbsp, narrow nbsp, en/em/thin/hair/figure spaces, etc. \s covers tab and
|
||||
// newline; the explicit code points cover the non-breaking variants \s misses
|
||||
// in some engines, so list them for determinism.
|
||||
return (/\s/.test(ch) ||
|
||||
ch === " " || // no-break space
|
||||
ch === " " || // figure space
|
||||
ch === " " || // narrow no-break space
|
||||
ch === " " || // thin space
|
||||
ch === " " || // hair space
|
||||
ch === " " || // en space
|
||||
ch === " " // em space
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Normalize a string for matching and return both the normalized text and a
|
||||
* `map` where `map[i]` is the index into the ORIGINAL `s` of the i-th
|
||||
* normalized character.
|
||||
*
|
||||
* Rules: map smart quotes / dashes / special spaces to their ASCII forms,
|
||||
* collapse any run of whitespace to a SINGLE space (whose map entry points at
|
||||
* the FIRST raw whitespace char of the run), and DO NOT lowercase (anchoring is
|
||||
* case-sensitive to match the exact document text).
|
||||
*/
|
||||
export function normalizeForMatch(s) {
|
||||
let norm = "";
|
||||
const map = [];
|
||||
let i = 0;
|
||||
while (i < s.length) {
|
||||
const ch = s[i];
|
||||
if (isWhitespaceChar(ch)) {
|
||||
// Collapse the whole whitespace run to one space mapped to the run start.
|
||||
const runStart = i;
|
||||
while (i < s.length && isWhitespaceChar(s[i]))
|
||||
i++;
|
||||
norm += " ";
|
||||
map.push(runStart);
|
||||
continue;
|
||||
}
|
||||
let mapped = ch;
|
||||
if (DOUBLE_QUOTES.indexOf(ch) !== -1)
|
||||
mapped = '"';
|
||||
else if (SINGLE_QUOTES.indexOf(ch) !== -1)
|
||||
mapped = "'";
|
||||
else if (DASHES.indexOf(ch) !== -1)
|
||||
mapped = "-";
|
||||
norm += mapped;
|
||||
map.push(i);
|
||||
i++;
|
||||
}
|
||||
return { norm, map };
|
||||
}
|
||||
/**
|
||||
* Find a selection inside a SINGLE block's direct `content` array.
|
||||
*
|
||||
* Builds maximal runs of consecutive `text` nodes (any non-text inline node,
|
||||
* e.g. a mention, breaks the run), normalizes each run and the selection the
|
||||
* same way, then searches each run for the normalized selection. Returns the
|
||||
* child/offset range of the FIRST matching run, or `null` if none match.
|
||||
*/
|
||||
export function findAnchorInBlock(blockContent, selection) {
|
||||
if (!Array.isArray(blockContent))
|
||||
return null;
|
||||
const normSelObj = normalizeForMatch(selection);
|
||||
// Trim leading/trailing spaces on the NORMALIZED selection only.
|
||||
const normSel = normSelObj.norm.trim();
|
||||
if (normSel.length === 0)
|
||||
return null;
|
||||
let i = 0;
|
||||
while (i < blockContent.length) {
|
||||
const node = blockContent[i];
|
||||
if (!node || typeof node !== "object" || node.type !== "text") {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// Accumulate a maximal run of consecutive text nodes.
|
||||
let rawRun = "";
|
||||
const rawToChild = [];
|
||||
let j = i;
|
||||
while (j < blockContent.length) {
|
||||
const n = blockContent[j];
|
||||
if (!n || typeof n !== "object" || n.type !== "text")
|
||||
break;
|
||||
const text = typeof n.text === "string" ? n.text : "";
|
||||
for (let k = 0; k < text.length; k++) {
|
||||
rawToChild.push({ childIdx: j, offset: k });
|
||||
}
|
||||
rawRun += text;
|
||||
j++;
|
||||
}
|
||||
// Try to match within this run.
|
||||
const { norm, map } = normalizeForMatch(rawRun);
|
||||
const idx = norm.indexOf(normSel);
|
||||
if (idx !== -1) {
|
||||
const rawStart = map[idx];
|
||||
const rawEndExclusive = idx + normSel.length < map.length
|
||||
? map[idx + normSel.length]
|
||||
: rawRun.length;
|
||||
const startLoc = rawToChild[rawStart];
|
||||
// rawEndExclusive points at the raw char AFTER the match; the last matched
|
||||
// raw char is at rawEndExclusive-1, so endOffset is its offset + 1.
|
||||
const lastLoc = rawToChild[rawEndExclusive - 1];
|
||||
return {
|
||||
startChild: startLoc.childIdx,
|
||||
startOffset: startLoc.offset,
|
||||
endChild: lastLoc.childIdx,
|
||||
endOffset: lastLoc.offset + 1,
|
||||
};
|
||||
}
|
||||
// No match in this run: continue scanning AFTER it.
|
||||
i = j > i ? j : i + 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Reconstruct the RAW text spanned by an AnchorMatch inside one block's
|
||||
* `content` array. `startChild..endChild` are all text nodes (guaranteed by
|
||||
* findAnchorInBlock, which only builds runs of `text` nodes), so concatenate
|
||||
* each node's text slice: from `startOffset` on the first node, up to
|
||||
* `endOffset` on the last, and the whole `.text` for any node fully inside the
|
||||
* range. Mirrors spliceCommentMark's per-node slicing so the string returned
|
||||
* here is EXACTLY the characters the comment mark will cover.
|
||||
*/
|
||||
function reconstructRawText(blockContent, match) {
|
||||
const { startChild, startOffset, endChild, endOffset } = match;
|
||||
let out = "";
|
||||
for (let k = startChild; k <= endChild; k++) {
|
||||
const n = blockContent[k];
|
||||
const text = typeof n.text === "string" ? n.text : "";
|
||||
const sliceStart = k === startChild ? startOffset : 0;
|
||||
const sliceEnd = k === endChild ? endOffset : text.length;
|
||||
out += text.slice(sliceStart, sliceEnd);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Return the RAW document substring that `selection` would anchor to — the exact
|
||||
* characters the comment mark will cover — or `null` when the selection cannot
|
||||
* be anchored anywhere in `doc`.
|
||||
*
|
||||
* This mirrors canAnchorInDoc / applyAnchorInDoc EXACTLY (same depth-first,
|
||||
* document-order traversal and the same findAnchorInBlock match on the FIRST
|
||||
* matching block), but instead of a boolean / an in-place mutation it
|
||||
* reconstructs the raw text spanned by the matched range. Because
|
||||
* findAnchorInBlock maps the normalized selection back to raw text-node
|
||||
* positions, the returned string is the document's ORIGINAL characters (smart
|
||||
* quotes, em-dashes, nbsp, collapsed whitespace) — NOT the normalized ASCII
|
||||
* agent input.
|
||||
*
|
||||
* Callers store THIS as the comment's `selection` so the stored value equals the
|
||||
* text actually under the mark, which is what the apply-suggestion equality
|
||||
* check (replaceYjsMarkedText's `joinedText !== expectedText`) compares against.
|
||||
* Without it a suggestion whose anchor only matched via normalization would be
|
||||
* un-appliable (spurious 409).
|
||||
*/
|
||||
export function getAnchoredText(doc, selection) {
|
||||
const visit = (node, depth) => {
|
||||
if (depth > MAX_DEPTH || !node || typeof node !== "object")
|
||||
return null;
|
||||
if (!Array.isArray(node.content))
|
||||
return null;
|
||||
const match = findAnchorInBlock(node.content, selection);
|
||||
if (match)
|
||||
return reconstructRawText(node.content, match);
|
||||
for (const child of node.content) {
|
||||
if (child && typeof child === "object" && Array.isArray(child.content)) {
|
||||
const found = visit(child, depth + 1);
|
||||
if (found !== null)
|
||||
return found;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return visit(doc, 0);
|
||||
}
|
||||
/**
|
||||
* Depth-first, document-order check for whether `selection` can be anchored
|
||||
* anywhere in `doc`. At each node with an array `content`, first try to match
|
||||
* within that node's own content, then recurse into children that themselves
|
||||
* have a `content` array.
|
||||
*/
|
||||
export function canAnchorInDoc(doc, selection) {
|
||||
const visit = (node, depth) => {
|
||||
if (depth > MAX_DEPTH || !node || typeof node !== "object")
|
||||
return false;
|
||||
if (!Array.isArray(node.content))
|
||||
return false;
|
||||
if (findAnchorInBlock(node.content, selection))
|
||||
return true;
|
||||
for (const child of node.content) {
|
||||
if (child && typeof child === "object" && Array.isArray(child.content)) {
|
||||
if (visit(child, depth + 1))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
return visit(doc, 0);
|
||||
}
|
||||
/**
|
||||
* Split the matched text nodes and splice the comment mark across the range.
|
||||
* `blockContent` is mutated IN PLACE. `match.startChild..endChild` are all text
|
||||
* nodes (guaranteed by findAnchorInBlock building runs of text nodes).
|
||||
*/
|
||||
function spliceCommentMark(blockContent, match, commentId) {
|
||||
const { startChild, startOffset, endChild, endOffset } = match;
|
||||
const commentMark = makeCommentMark(commentId);
|
||||
const fragments = [];
|
||||
for (let k = startChild; k <= endChild; k++) {
|
||||
const n = blockContent[k];
|
||||
const text = typeof n.text === "string" ? n.text : "";
|
||||
const sliceStart = k === startChild ? startOffset : 0;
|
||||
const sliceEnd = k === endChild ? endOffset : text.length;
|
||||
const before = k === startChild ? text.slice(0, startOffset) : "";
|
||||
const marked = text.slice(sliceStart, sliceEnd);
|
||||
const after = k === endChild ? text.slice(endOffset) : "";
|
||||
// Process per-node so each node's OWN marks/attrs are preserved.
|
||||
const ownMarks = Array.isArray(n.marks) ? n.marks : [];
|
||||
// Drop any pre-existing comment mark from the marked fragment so it ends up
|
||||
// with exactly one comment mark (the new one) rather than two.
|
||||
const markedBaseMarks = ownMarks.filter((m) => !(m && m.type === "comment"));
|
||||
if (before.length > 0) {
|
||||
fragments.push({ ...n, text: before, marks: [...ownMarks] });
|
||||
}
|
||||
if (marked.length > 0) {
|
||||
fragments.push({
|
||||
...n,
|
||||
text: marked,
|
||||
marks: [...markedBaseMarks, commentMark],
|
||||
});
|
||||
}
|
||||
if (after.length > 0) {
|
||||
fragments.push({ ...n, text: after, marks: [...ownMarks] });
|
||||
}
|
||||
}
|
||||
blockContent.splice(startChild, endChild - startChild + 1, ...fragments);
|
||||
}
|
||||
/**
|
||||
* Count how many times `selection` occurs across the whole document, using the
|
||||
* same normalization and run-matching as findAnchorInBlock but WITHOUT stopping
|
||||
* at the first hit: every non-overlapping occurrence within each block's text
|
||||
* runs is counted and summed across all blocks (depth-first, the same traversal
|
||||
* as canAnchorInDoc).
|
||||
*
|
||||
* This is the uniqueness gate for SUGGESTIONS: because applying a suggestion
|
||||
* rewrites the exact anchored text, an ambiguous anchor (>1 occurrence) would
|
||||
* silently edit the wrong place, so a suggestion is only allowed when this
|
||||
* returns exactly 1. Ordinary comments keep first-occurrence anchoring and do
|
||||
* not use this. (Note: counts OCCURRENCES, not just matching blocks, so two
|
||||
* occurrences inside one block are correctly reported as 2.)
|
||||
*/
|
||||
export function countAnchorMatches(doc, selection) {
|
||||
const normSel = normalizeForMatch(selection).norm.trim();
|
||||
if (normSel.length === 0)
|
||||
return 0;
|
||||
// Count non-overlapping occurrences of the normalized selection within a
|
||||
// single block's direct content, matching findAnchorInBlock's run building.
|
||||
const countInBlock = (blockContent) => {
|
||||
if (!Array.isArray(blockContent))
|
||||
return 0;
|
||||
let count = 0;
|
||||
let i = 0;
|
||||
while (i < blockContent.length) {
|
||||
const node = blockContent[i];
|
||||
if (!node || typeof node !== "object" || node.type !== "text") {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// Accumulate a maximal run of consecutive text nodes.
|
||||
let rawRun = "";
|
||||
let j = i;
|
||||
while (j < blockContent.length) {
|
||||
const n = blockContent[j];
|
||||
if (!n || typeof n !== "object" || n.type !== "text")
|
||||
break;
|
||||
rawRun += typeof n.text === "string" ? n.text : "";
|
||||
j++;
|
||||
}
|
||||
const norm = normalizeForMatch(rawRun).norm;
|
||||
// Count every non-overlapping occurrence in this run.
|
||||
let from = 0;
|
||||
for (;;) {
|
||||
const idx = norm.indexOf(normSel, from);
|
||||
if (idx === -1)
|
||||
break;
|
||||
count++;
|
||||
from = idx + normSel.length;
|
||||
}
|
||||
i = j > i ? j : i + 1;
|
||||
}
|
||||
return count;
|
||||
};
|
||||
let total = 0;
|
||||
const visit = (node, depth) => {
|
||||
if (depth > MAX_DEPTH || !node || typeof node !== "object")
|
||||
return;
|
||||
if (!Array.isArray(node.content))
|
||||
return;
|
||||
total += countInBlock(node.content);
|
||||
for (const child of node.content) {
|
||||
if (child && typeof child === "object" && Array.isArray(child.content)) {
|
||||
visit(child, depth + 1);
|
||||
}
|
||||
}
|
||||
};
|
||||
visit(doc, 0);
|
||||
return total;
|
||||
}
|
||||
/**
|
||||
* Depth-first (same order as canAnchorInDoc) over `doc`; on the FIRST block
|
||||
* whose content matches `selection`, splice the comment mark across the matched
|
||||
* range in place and return true. Returns false (and does NOT mutate) when no
|
||||
* block matches.
|
||||
*/
|
||||
export function applyAnchorInDoc(doc, selection, commentId) {
|
||||
const visit = (node, depth) => {
|
||||
if (depth > MAX_DEPTH || !node || typeof node !== "object")
|
||||
return false;
|
||||
if (!Array.isArray(node.content))
|
||||
return false;
|
||||
const match = findAnchorInBlock(node.content, selection);
|
||||
if (match) {
|
||||
spliceCommentMark(node.content, match, commentId);
|
||||
return true;
|
||||
}
|
||||
for (const child of node.content) {
|
||||
if (child && typeof child === "object" && Array.isArray(child.content)) {
|
||||
if (visit(child, depth + 1))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
return visit(doc, 0);
|
||||
}
|
||||
@@ -1,423 +0,0 @@
|
||||
/**
|
||||
* Headless, Docmost-equivalent document diff.
|
||||
*
|
||||
* Docmost's history editor computes a change set with the exact pipeline below
|
||||
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
|
||||
* editor decorations. This module runs the SAME computation but serializes the
|
||||
* result to text + integrity counts instead of decorations, so a diff can be
|
||||
* previewed without a browser.
|
||||
*
|
||||
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
|
||||
* maintained published fork of the MIT prosemirror-recreate-steps source that
|
||||
* Docmost vendors in @docmost/editor-ext; it exposes the identical
|
||||
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
|
||||
* signature.
|
||||
*
|
||||
* If recreateTransform / the changeset throws on a pathological document pair,
|
||||
* we fall back to a coarse block-level text diff so the tool never hard-fails.
|
||||
*/
|
||||
import { Node } from "@tiptap/pm/model";
|
||||
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
|
||||
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
|
||||
import { docmostSchema } from "./docmost-schema.js";
|
||||
/** Recursively concatenate the plain text of a JSON node. */
|
||||
function plainText(node) {
|
||||
if (!node || typeof node !== "object")
|
||||
return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string")
|
||||
out += node.text;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
out += plainText(child);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
|
||||
function countNodes(doc, pred) {
|
||||
let n = 0;
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (pred(node))
|
||||
n++;
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return n;
|
||||
}
|
||||
/**
|
||||
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
|
||||
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
|
||||
* run); counting link-bearing runs would over-count it. Walking the tree and
|
||||
* collecting hrefs into a Set keys each distinct link once. Link marks with a
|
||||
* missing/empty href are bucketed under a single "" key so a malformed link is
|
||||
* still counted as one.
|
||||
*/
|
||||
function countUniqueLinks(doc) {
|
||||
const hrefs = new Set();
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||
for (const m of node.marks) {
|
||||
if (m && m.type === "link") {
|
||||
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
|
||||
hrefs.add(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return hrefs.size;
|
||||
}
|
||||
/** Count footnoteReference nodes anywhere under a node (reading order). */
|
||||
function countFootnoteRefs(node) {
|
||||
if (!node || typeof node !== "object")
|
||||
return 0;
|
||||
let n = node.type === "footnoteReference" ? 1 : 0;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
n += countFootnoteRefs(child);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
/**
|
||||
* Ordered list of footnote marker numbers found in the BODY only (every
|
||||
* top-level block before the first "Примечания..." notes heading; if no such
|
||||
* heading, the whole doc), in reading order.
|
||||
*
|
||||
* Supports BOTH representations:
|
||||
* - real `footnoteReference` nodes (the current footnote feature) — numbered
|
||||
* 1..n by reading position, since their visible number is derived;
|
||||
* - legacy `[N]` text markers (older translated docs) — the literal N.
|
||||
*/
|
||||
function footnoteMarkers(doc, notesHeading) {
|
||||
const top = Array.isArray(doc?.content) ? doc.content : [];
|
||||
const notesIdx = top.findIndex((n) => n &&
|
||||
n.type === "heading" &&
|
||||
plainText(n).trim() === notesHeading);
|
||||
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
|
||||
// Real footnoteReference nodes take precedence: when present, number them by
|
||||
// reading position (their displayed number is not stored).
|
||||
let refCount = 0;
|
||||
for (const block of bodyBlocks)
|
||||
refCount += countFootnoteRefs(block);
|
||||
if (refCount > 0) {
|
||||
return Array.from({ length: refCount }, (_, i) => i + 1);
|
||||
}
|
||||
// Fallback: legacy `[N]` text markers.
|
||||
const markers = [];
|
||||
const re = /\[(\d+)\]/g;
|
||||
for (const block of bodyBlocks) {
|
||||
const text = plainText(block);
|
||||
let m;
|
||||
re.lastIndex = 0;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
markers.push(Number(m[1]));
|
||||
}
|
||||
}
|
||||
return markers;
|
||||
}
|
||||
/** Compute the [old,new] integrity tuples for two JSON docs. */
|
||||
function computeIntegrity(oldDoc, newDoc, notesHeading) {
|
||||
const images = [
|
||||
countNodes(oldDoc, (n) => n.type === "image"),
|
||||
countNodes(newDoc, (n) => n.type === "image"),
|
||||
];
|
||||
const links = [
|
||||
countUniqueLinks(oldDoc),
|
||||
countUniqueLinks(newDoc),
|
||||
];
|
||||
const tables = [
|
||||
countNodes(oldDoc, (n) => n.type === "table"),
|
||||
countNodes(newDoc, (n) => n.type === "table"),
|
||||
];
|
||||
const callouts = [
|
||||
countNodes(oldDoc, (n) => n.type === "callout"),
|
||||
countNodes(newDoc, (n) => n.type === "callout"),
|
||||
];
|
||||
const fns = [
|
||||
footnoteMarkers(oldDoc, notesHeading),
|
||||
footnoteMarkers(newDoc, notesHeading),
|
||||
];
|
||||
return { images, links, tables, callouts, footnoteMarkers: fns };
|
||||
}
|
||||
/**
|
||||
* Resolve the lead text of the top-level block in a ProseMirror Node that
|
||||
* contains the given document position. Returns "" when out of range.
|
||||
*/
|
||||
function blockContextAt(node, pos) {
|
||||
try {
|
||||
const clamped = Math.max(0, Math.min(pos, node.content.size));
|
||||
const $pos = node.resolve(clamped);
|
||||
// depth 1 is the top-level block in a doc node.
|
||||
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
|
||||
const text = block.textContent || "";
|
||||
return text.length > 80 ? text.slice(0, 77) + "..." : text;
|
||||
}
|
||||
catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
/** Truncate a string for the markdown summary. */
|
||||
function truncate(s, n = 120) {
|
||||
return s.length > n ? s.slice(0, n - 3) + "..." : s;
|
||||
}
|
||||
/**
|
||||
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
|
||||
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
|
||||
*/
|
||||
function coarseDiff(oldDoc, newDoc) {
|
||||
const oldBlocks = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
|
||||
const newBlocks = Array.isArray(newDoc?.content) ? newDoc.content : [];
|
||||
const oldTexts = oldBlocks.map(plainText);
|
||||
const newTexts = newBlocks.map(plainText);
|
||||
const oldSet = new Set(oldTexts);
|
||||
const newSet = new Set(newTexts);
|
||||
const changes = [];
|
||||
for (const t of oldTexts) {
|
||||
if (!newSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "delete", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
for (const t of newTexts) {
|
||||
if (!oldSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "insert", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
return changes;
|
||||
}
|
||||
/** Build the human-readable unified-ish markdown summary. */
|
||||
function renderMarkdown(result, fellBack) {
|
||||
const lines = [];
|
||||
const { summary, integrity, changes } = result;
|
||||
lines.push(`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`);
|
||||
if (fellBack) {
|
||||
lines.push("");
|
||||
lines.push("> note: precise diff failed; coarse block-level diff shown.");
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Integrity (old -> new)");
|
||||
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
|
||||
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
|
||||
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
|
||||
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
|
||||
lines.push(`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`);
|
||||
lines.push("");
|
||||
lines.push("## Changes");
|
||||
if (changes.length === 0) {
|
||||
lines.push("(no textual changes)");
|
||||
}
|
||||
else {
|
||||
for (const c of changes) {
|
||||
const sign = c.op === "insert" ? "+" : "-";
|
||||
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
|
||||
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
|
||||
}
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
/**
|
||||
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
|
||||
* serialize the result to text + integrity counts.
|
||||
*
|
||||
* @param oldDocJson the earlier document
|
||||
* @param newDocJson the later document
|
||||
* @param notesHeading heading delimiting body from notes for footnote counting
|
||||
*/
|
||||
export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
|
||||
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
|
||||
let changes = [];
|
||||
let inserted = 0;
|
||||
let deleted = 0;
|
||||
let fellBack = false;
|
||||
const changedBlocks = new Set();
|
||||
try {
|
||||
const oldNode = Node.fromJSON(docmostSchema, oldDocJson);
|
||||
const newNode = Node.fromJSON(docmostSchema, newDocJson);
|
||||
const tr = recreateTransform(oldNode, newNode, {
|
||||
complexSteps: false,
|
||||
wordDiffs: true,
|
||||
simplifyDiff: true,
|
||||
});
|
||||
const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
|
||||
const simplified = simplifyChanges(changeSet.changes, newNode);
|
||||
for (const change of simplified) {
|
||||
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
|
||||
if (change.toA > change.fromA) {
|
||||
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
deleted += text.length;
|
||||
const block = blockContextAt(oldNode, change.fromA);
|
||||
changes.push({ op: "delete", block, text });
|
||||
if (block)
|
||||
changedBlocks.add("d:" + block);
|
||||
}
|
||||
}
|
||||
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
|
||||
if (change.toB > change.fromB) {
|
||||
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
inserted += text.length;
|
||||
const block = blockContextAt(newNode, change.fromB);
|
||||
changes.push({ op: "insert", block, text });
|
||||
if (block)
|
||||
changedBlocks.add("i:" + block);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch {
|
||||
// Pathological pair: degrade to a coarse block-level diff so we never throw.
|
||||
fellBack = true;
|
||||
changes = coarseDiff(oldDocJson, newDocJson);
|
||||
for (const c of changes) {
|
||||
if (c.op === "insert")
|
||||
inserted += c.text.length;
|
||||
else
|
||||
deleted += c.text.length;
|
||||
if (c.block)
|
||||
changedBlocks.add(c.op[0] + ":" + c.block);
|
||||
}
|
||||
}
|
||||
const partial = {
|
||||
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
|
||||
integrity,
|
||||
changes,
|
||||
};
|
||||
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
|
||||
}
|
||||
/**
|
||||
* Recursively walk every `text` node and tally the count of each mark by
|
||||
* `mark.type` (e.g. `{ bold: 5, strike: 3, link: 2 }`). Pure and never throws.
|
||||
*/
|
||||
function markCounts(doc) {
|
||||
const counts = {};
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||
for (const m of node.marks) {
|
||||
if (m && typeof m.type === "string") {
|
||||
counts[m.type] = (counts[m.type] || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return counts;
|
||||
}
|
||||
/**
|
||||
* Build a VerifyReport for a content mutation. Pure and never throws — on any
|
||||
* internal error it returns a minimal "changed (diff unavailable)" report so it
|
||||
* can NEVER break a write.
|
||||
*
|
||||
* `changed` is VALUE-based, not JSON-string-based: it is derived from the actual
|
||||
* deltas (text chars, blocks, mark counts, structural integrity counts), so two
|
||||
* value-equal docs that differ only in JSON key order report cleanly as
|
||||
* `changed:false` / "no content change" rather than a misleading +0/-0 change.
|
||||
*
|
||||
* The structural integrity delta (from diffDocs's `integrity` tuples) is what
|
||||
* makes `changed` true for an image/table/callout/link count change that diffs
|
||||
* to zero text — closing a verify blind spot for insert_image, delete_node on a
|
||||
* table, etc.
|
||||
*/
|
||||
export function summarizeChange(before, after) {
|
||||
try {
|
||||
const diff = diffDocs(before, after);
|
||||
// Per-mark-type delta: include a type only when its count actually changed.
|
||||
const beforeMarks = markCounts(before);
|
||||
const afterMarks = markCounts(after);
|
||||
const marks = {};
|
||||
for (const type of new Set([
|
||||
...Object.keys(beforeMarks),
|
||||
...Object.keys(afterMarks),
|
||||
])) {
|
||||
const b = beforeMarks[type] || 0;
|
||||
const a = afterMarks[type] || 0;
|
||||
if (b !== a)
|
||||
marks[type] = [b, a];
|
||||
}
|
||||
// Structural integrity delta from diffDocs: count-based [old,new] tuples for
|
||||
// images/links/tables/callouts. Include a type only when old != new.
|
||||
const integrity = diff.integrity;
|
||||
const structure = {};
|
||||
const countTypes = [
|
||||
"images",
|
||||
"links",
|
||||
"tables",
|
||||
"callouts",
|
||||
];
|
||||
for (const type of countTypes) {
|
||||
const [b, a] = integrity[type];
|
||||
if (b !== a)
|
||||
structure[type] = [b, a];
|
||||
}
|
||||
const textInserted = diff.summary.inserted;
|
||||
const textDeleted = diff.summary.deleted;
|
||||
const blocksChanged = diff.summary.blocksChanged;
|
||||
const hasMarkDelta = Object.keys(marks).length > 0;
|
||||
const hasStructureDelta = Object.keys(structure).length > 0;
|
||||
// VALUE-based change decision: ignore JSON key-order no-ops entirely.
|
||||
const changed = textInserted > 0 ||
|
||||
textDeleted > 0 ||
|
||||
blocksChanged > 0 ||
|
||||
hasMarkDelta ||
|
||||
hasStructureDelta;
|
||||
if (!changed) {
|
||||
return {
|
||||
changed: false,
|
||||
textInserted: 0,
|
||||
textDeleted: 0,
|
||||
blocksChanged: 0,
|
||||
marks: {},
|
||||
summary: "no content change",
|
||||
};
|
||||
}
|
||||
const parts = [];
|
||||
// Only mention text/blocks when they actually changed (avoid a misleading
|
||||
// "+0/-0 chars, 0 block(s)" prefix on a pure mark/structure change).
|
||||
if (textInserted > 0 || textDeleted > 0 || blocksChanged > 0) {
|
||||
parts.push(`+${textInserted}/-${textDeleted} chars, ${blocksChanged} block(s)`);
|
||||
}
|
||||
const markParts = Object.entries(marks).map(([type, [b, a]]) => `${type} ${b}→${a}`);
|
||||
if (markParts.length > 0)
|
||||
parts.push(`marks: ${markParts.join(", ")}`);
|
||||
const structureParts = Object.entries(structure).map(([type, [b, a]]) => `${type} ${b}→${a}`);
|
||||
if (structureParts.length > 0)
|
||||
parts.push(structureParts.join(", "));
|
||||
// `changed` is true here, so at least one group is present and parts is non-empty.
|
||||
const summary = `changed: ${parts.join("; ")}`;
|
||||
const report = {
|
||||
changed: true,
|
||||
textInserted,
|
||||
textDeleted,
|
||||
blocksChanged,
|
||||
marks,
|
||||
summary,
|
||||
};
|
||||
if (hasStructureDelta)
|
||||
report.structure = structure;
|
||||
return report;
|
||||
}
|
||||
catch {
|
||||
// A pathological pair must never break a write: degrade to a minimal report.
|
||||
return {
|
||||
changed: true,
|
||||
textInserted: 0,
|
||||
textDeleted: 0,
|
||||
blocksChanged: 0,
|
||||
marks: {},
|
||||
summary: "changed (diff unavailable)",
|
||||
};
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,92 +0,0 @@
|
||||
/**
|
||||
* Filter functions to extract only relevant information from API responses
|
||||
* for better agent consumption
|
||||
*/
|
||||
export function filterWorkspace(data) {
|
||||
return {
|
||||
id: data.id,
|
||||
name: data.name,
|
||||
description: data.description,
|
||||
defaultSpaceId: data.defaultSpaceId,
|
||||
createdAt: data.createdAt,
|
||||
updatedAt: data.updatedAt,
|
||||
deletedAt: data.deletedAt,
|
||||
};
|
||||
}
|
||||
export function filterSpace(space) {
|
||||
return {
|
||||
id: space.id,
|
||||
name: space.name,
|
||||
description: space.description,
|
||||
slug: space.slug,
|
||||
visibility: space.visibility,
|
||||
createdAt: space.createdAt,
|
||||
updatedAt: space.updatedAt,
|
||||
deletedAt: space.deletedAt,
|
||||
};
|
||||
}
|
||||
export function filterGroup(group) {
|
||||
return {
|
||||
id: group.id,
|
||||
name: group.name,
|
||||
description: group.description,
|
||||
workspaceId: group.workspaceId,
|
||||
createdAt: group.createdAt,
|
||||
updatedAt: group.updatedAt,
|
||||
deletedAt: group.deletedAt,
|
||||
};
|
||||
}
|
||||
export function filterPage(page, content, subpages) {
|
||||
return {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
title: page.title,
|
||||
parentPageId: page.parentPageId,
|
||||
spaceId: page.spaceId,
|
||||
isLocked: page.isLocked,
|
||||
createdAt: page.createdAt,
|
||||
updatedAt: page.updatedAt,
|
||||
deletedAt: page.deletedAt,
|
||||
// Include converted markdown content if valid string (even empty)
|
||||
...(typeof content === "string" && { content }),
|
||||
// Include subpages if provided
|
||||
...(subpages &&
|
||||
subpages.length > 0 && {
|
||||
subpages: subpages.map((p) => ({ id: p.id, title: p.title })),
|
||||
}),
|
||||
};
|
||||
}
|
||||
export function filterComment(comment, markdownContent) {
|
||||
return {
|
||||
id: comment.id,
|
||||
pageId: comment.pageId,
|
||||
content: markdownContent ?? comment.content,
|
||||
selection: comment.selection || null,
|
||||
type: comment.type || "page",
|
||||
parentCommentId: comment.parentCommentId || null,
|
||||
creatorId: comment.creatorId,
|
||||
creatorName: comment.creator?.name || null,
|
||||
createdAt: comment.createdAt,
|
||||
editedAt: comment.editedAt || null,
|
||||
resolvedAt: comment.resolvedAt || null,
|
||||
resolvedById: comment.resolvedById || null,
|
||||
// Suggestion state: the proposed replacement text (if any) and, once a human
|
||||
// applies it via the UI, when and by whom.
|
||||
suggestedText: comment.suggestedText || null,
|
||||
suggestionAppliedAt: comment.suggestionAppliedAt || null,
|
||||
suggestionAppliedById: comment.suggestionAppliedById || null,
|
||||
};
|
||||
}
|
||||
export function filterSearchResult(result) {
|
||||
return {
|
||||
id: result.id,
|
||||
title: result.title,
|
||||
parentPageId: result.parentPageId,
|
||||
createdAt: result.createdAt,
|
||||
updatedAt: result.updatedAt,
|
||||
rank: result.rank,
|
||||
highlight: result.highlight,
|
||||
spaceId: result.space?.id,
|
||||
spaceName: result.space?.name,
|
||||
};
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/**
|
||||
* Footnote diagnostics for imported Markdown (issue #166).
|
||||
*
|
||||
* A PURE, fence-aware text scan (independent of the Markdown->ProseMirror
|
||||
* conversion path, so it reports the same problems for `create_page`,
|
||||
* `update_page` and `import_page_markdown`). It never changes the document — the
|
||||
* importer still creates the page; this only surfaces footnote problems to the
|
||||
* caller so an agent can fix its own markup instead of shipping broken footnotes.
|
||||
*
|
||||
* Detected problems:
|
||||
* - danglingReferences: a `[^id]` reference with no `[^id]:` definition.
|
||||
* - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace.
|
||||
* - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the
|
||||
* first is kept on import — first-wins; see extractFootnotes).
|
||||
* - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic:
|
||||
* the line, trimmed, starts with `|`) — footnotes in table cells often do not
|
||||
* render as expected.
|
||||
*/
|
||||
import { lexFootnoteLines, forEachFootnoteReference, } from "./footnote-lex.js";
|
||||
/**
|
||||
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
|
||||
*/
|
||||
export function analyzeFootnotes(markdown) {
|
||||
// Distinct reference ids in first-appearance order, plus the set of ids seen
|
||||
// inside a table row.
|
||||
const refIds = [];
|
||||
const refIdSet = new Set();
|
||||
const referencesInTables = new Set();
|
||||
const addRef = (id, inTable) => {
|
||||
if (!refIdSet.has(id)) {
|
||||
refIdSet.add(id);
|
||||
refIds.push(id);
|
||||
}
|
||||
if (inTable)
|
||||
referencesInTables.add(id);
|
||||
};
|
||||
// Definition texts per id, in first-appearance order of the id.
|
||||
const defTextsById = new Map();
|
||||
// Same lexer the importer uses, so the analysis matches exactly what import
|
||||
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (tok.inFence)
|
||||
continue;
|
||||
if (tok.definition) {
|
||||
const { id, text } = tok.definition;
|
||||
const arr = defTextsById.get(id);
|
||||
if (arr)
|
||||
arr.push(text);
|
||||
else
|
||||
defTextsById.set(id, [text]);
|
||||
// A definition's TEXT can itself reference another footnote (`[^a]: see
|
||||
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
|
||||
forEachFootnoteReference(text, (rid) => addRef(rid, false));
|
||||
continue;
|
||||
}
|
||||
const inTable = tok.line.trimStart().startsWith("|");
|
||||
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
|
||||
}
|
||||
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
|
||||
const duplicateDefinitions = [];
|
||||
const emptyDefinitions = [];
|
||||
for (const [id, texts] of defTextsById) {
|
||||
if (texts.length >= 2)
|
||||
duplicateDefinitions.push(id);
|
||||
// First-wins: the kept definition is the first one; flag it if it is blank.
|
||||
if ((texts[0] ?? "").trim().length === 0)
|
||||
emptyDefinitions.push(id);
|
||||
}
|
||||
const tableRefs = [...referencesInTables];
|
||||
const warnings = [];
|
||||
const list = (ids) => ids.map((id) => `[^${id}]`).join(", ");
|
||||
if (danglingReferences.length > 0) {
|
||||
warnings.push(`Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`);
|
||||
}
|
||||
if (emptyDefinitions.length > 0) {
|
||||
warnings.push(`Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`);
|
||||
}
|
||||
if (duplicateDefinitions.length > 0) {
|
||||
warnings.push(`Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`);
|
||||
}
|
||||
if (tableRefs.length > 0) {
|
||||
warnings.push(`Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`);
|
||||
}
|
||||
return {
|
||||
danglingReferences,
|
||||
emptyDefinitions,
|
||||
duplicateDefinitions,
|
||||
referencesInTables: tableRefs,
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* The optional `footnoteWarnings` field for a page-write tool result: present
|
||||
* (with the warning lines) only when `markdown` has footnote problems, omitted
|
||||
* otherwise. One helper so all three call sites (create/update/import) attach the
|
||||
* field identically. Spread into the result: `{ ...result, ...footnoteWarningsField(text) }`.
|
||||
*/
|
||||
export function footnoteWarningsField(markdown) {
|
||||
const { warnings } = analyzeFootnotes(markdown);
|
||||
return warnings.length > 0 ? { footnoteWarnings: warnings } : {};
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
/**
|
||||
* Inline-authoring helpers for footnotes (MCP).
|
||||
*
|
||||
* These build/identify footnote DEFINITION nodes for the author-inline tool
|
||||
* (`insertInlineFootnote` in transforms.ts): a content key to de-duplicate notes
|
||||
* by text, a definition-node factory, and a fresh uuidv7-style id generator.
|
||||
*
|
||||
* Split out of `footnote-canonicalize.ts` so that module stays a pure MIRROR of
|
||||
* the editor-ext canonicalizer (compositionally symmetric to the editor-ext
|
||||
* copy, which keeps its authoring helpers in `footnote-util.ts`). The pure
|
||||
* canonicalizer has no dependency on these.
|
||||
*/
|
||||
const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
|
||||
function cloneJson(v) {
|
||||
if (typeof structuredClone === "function")
|
||||
return structuredClone(v);
|
||||
return JSON.parse(JSON.stringify(v));
|
||||
}
|
||||
/**
|
||||
* Normalized content key for de-duplicating footnote DEFINITIONS by their text.
|
||||
*
|
||||
* Two definitions with the same key are the SAME footnote — so the inline
|
||||
* authoring tool reuses one id (one number, one definition, several references)
|
||||
* instead of minting a second definition. Key = plaintext (whitespace-collapsed,
|
||||
* trimmed) PLUS a signature of the inline mark types in order, so two notes that
|
||||
* read the same but differ in formatting (one bold, one plain) are NOT merged.
|
||||
* Conservative: only an exact match merges.
|
||||
*/
|
||||
export function footnoteContentKey(defNode) {
|
||||
const parts = [];
|
||||
const visit = (n) => {
|
||||
if (!n || typeof n !== "object")
|
||||
return;
|
||||
if (n.type === "text" && typeof n.text === "string") {
|
||||
const marks = Array.isArray(n.marks)
|
||||
? n.marks.map((m) => m?.type).filter(Boolean).sort().join(",")
|
||||
: "";
|
||||
parts.push(`${n.text}${marks}`);
|
||||
}
|
||||
if (Array.isArray(n.content))
|
||||
for (const c of n.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(defNode);
|
||||
// Collapse the assembled text's whitespace and trim, keeping the mark
|
||||
// signature attached so formatting differences still distinguish notes.
|
||||
return parts
|
||||
.join("")
|
||||
.replace(/[ \t\r\n]+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
/**
|
||||
* Build a footnoteDefinition node from inline ProseMirror nodes, keyed by id.
|
||||
*/
|
||||
export function makeFootnoteDefinition(id, inlineNodes) {
|
||||
const content = Array.isArray(inlineNodes) ? cloneJson(inlineNodes) : [];
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph", content }],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Generate a uuidv7-style id (time-ordered), matching editor-ext's
|
||||
* `generateFootnoteId`. Used for a genuinely-new inline footnote id.
|
||||
*/
|
||||
export function generateFootnoteId() {
|
||||
const now = Date.now();
|
||||
const timeHex = now.toString(16).padStart(12, "0");
|
||||
const rand = (length) => {
|
||||
let s = "";
|
||||
for (let i = 0; i < length; i++)
|
||||
s += Math.floor(Math.random() * 16).toString(16);
|
||||
return s;
|
||||
};
|
||||
const versioned = "7" + rand(3);
|
||||
const variantNibble = (8 + Math.floor(Math.random() * 4)).toString(16);
|
||||
const variant = variantNibble + rand(3);
|
||||
return (timeHex.slice(0, 8) +
|
||||
"-" +
|
||||
timeHex.slice(8, 12) +
|
||||
"-" +
|
||||
versioned +
|
||||
"-" +
|
||||
variant +
|
||||
"-" +
|
||||
rand(12));
|
||||
}
|
||||
@@ -1,215 +0,0 @@
|
||||
/**
|
||||
* Server-side footnote canonicalizer (MCP mirror — PURE).
|
||||
*
|
||||
* `canonicalizeFootnotes(doc)` is a pure ProseMirror-JSON port of the editor's
|
||||
* `footnoteSyncPlugin` end-state, identical in behaviour to
|
||||
* `@docmost/editor-ext`'s `canonicalizeFootnotes`. It is mirrored here — rather
|
||||
* than imported from editor-ext — for the SAME reason `footnote-lex.ts` and the
|
||||
* `docmost-schema.ts` nodes are mirrored: the MCP package is deliberately
|
||||
* decoupled from the browser/React-heavy editor barrel and operates on plain
|
||||
* JSON. The editor-ext copy owns the golden test against the live plugin; this
|
||||
* copy must stay behaviourally identical (a SHARED golden corpus, exercised by
|
||||
* both test suites, pins that — see `test/unit/footnote-corpus.mjs`).
|
||||
*
|
||||
* This module is the pure MIRROR only. The inline-authoring helpers
|
||||
* (`footnoteContentKey`, `makeFootnoteDefinition`, `generateFootnoteId`) used by
|
||||
* `insertInlineFootnote` live in the sibling `footnote-authoring.ts`, so this
|
||||
* file is compositionally symmetric to the editor-ext copy.
|
||||
*
|
||||
* Why it exists: every NON-editor write path (markdown import, update_page_json,
|
||||
* docmost_transform, insert_footnote) builds ProseMirror JSON directly, so the
|
||||
* editor's footnote plugins never run and the canonical topology (sequential
|
||||
* numbering by first reference, one trailing list, no orphans, no raw `[^id]`)
|
||||
* was never enforced. Running this at the end of every write path closes that
|
||||
* gap; because it is idempotent, it is a no-op when the footnotes are already
|
||||
* canonical (no spurious mutations / git-sync churn).
|
||||
*
|
||||
* ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
|
||||
* `canonicalizeFootnotes(doc)` before writing — the current callers are
|
||||
* `markdownToProseMirrorCanonical` (page markdown import/update; the plain
|
||||
* `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
|
||||
* reference-less definition), `update_page_json`, `docmost_transform`,
|
||||
* `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
|
||||
* NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
|
||||
* and comment-vs-page nuances make a single naive wrapper unsafe).
|
||||
*/
|
||||
const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
|
||||
const FOOTNOTES_LIST_NAME = "footnotesList";
|
||||
const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
|
||||
function cloneJson(v) {
|
||||
if (typeof structuredClone === "function")
|
||||
return structuredClone(v);
|
||||
return JSON.parse(JSON.stringify(v));
|
||||
}
|
||||
function isEmptyParagraph(node) {
|
||||
return (!!node &&
|
||||
node.type === "paragraph" &&
|
||||
(!Array.isArray(node.content) || node.content.length === 0));
|
||||
}
|
||||
function collectReferenceIds(node, out, seen) {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === FOOTNOTE_REFERENCE_NAME) {
|
||||
const id = node?.attrs?.id;
|
||||
if (id && !seen.has(id)) {
|
||||
seen.add(id);
|
||||
out.push(id);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectReferenceIds(child, out, seen);
|
||||
}
|
||||
}
|
||||
function collectDefinitions(node, out) {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === FOOTNOTE_DEFINITION_NAME)
|
||||
out.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectDefinitions(child, out);
|
||||
}
|
||||
}
|
||||
function emptyDefinition(id) {
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph" }],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Deep equality over plain JSON: arrays are compared POSITIONALLY
|
||||
* (order-SENSITIVE), object keys order-insensitively. The array order-sensitivity
|
||||
* is required for correctness here — a reordered `footnotesList.content` must
|
||||
* compare UNEQUAL so the canonical rebuild fires instead of leaving it in place.
|
||||
*/
|
||||
function deepEqualJson(a, b) {
|
||||
if (a === b)
|
||||
return true;
|
||||
if (a == null || b == null || typeof a !== typeof b)
|
||||
return false;
|
||||
if (Array.isArray(a) || Array.isArray(b)) {
|
||||
if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length) {
|
||||
return false;
|
||||
}
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (!deepEqualJson(a[i], b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (typeof a === "object") {
|
||||
const ka = Object.keys(a);
|
||||
const kb = Object.keys(b);
|
||||
if (ka.length !== kb.length)
|
||||
return false;
|
||||
for (const k of ka) {
|
||||
if (!Object.prototype.hasOwnProperty.call(b, k))
|
||||
return false;
|
||||
if (!deepEqualJson(a[k], b[k]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Canonicalize footnotes in a ProseMirror-JSON document. See the file header and
|
||||
* the editor-ext twin for the full contract. Pure (deep-clones input,
|
||||
* deterministic, idempotent).
|
||||
*/
|
||||
export function canonicalizeFootnotes(doc) {
|
||||
if (doc == null ||
|
||||
typeof doc !== "object" ||
|
||||
!Array.isArray(doc.content)) {
|
||||
return doc;
|
||||
}
|
||||
const out = cloneJson(doc);
|
||||
// 1) Distinct reference ids in document order (deep — refs can live in
|
||||
// callouts, tables, list items, ...). The ordering/numbering truth.
|
||||
const referenceIds = [];
|
||||
collectReferenceIds(out, referenceIds, new Set());
|
||||
// 2) Every definition node in document order (deep).
|
||||
const defNodes = [];
|
||||
collectDefinitions(out, defNodes);
|
||||
// 3) First definition per id wins; later duplicates carry the SAME id, so they
|
||||
// cannot be referenced separately and would be orphans — they are dropped.
|
||||
const defById = new Map();
|
||||
for (const d of defNodes) {
|
||||
const id = d?.attrs?.id;
|
||||
if (id && !defById.has(id))
|
||||
defById.set(id, d);
|
||||
}
|
||||
// 4) Build the ordered definition list: one per referenced id, in REFERENCE
|
||||
// order, reusing the existing node (shallow-copied, id normalized — `out` is
|
||||
// already deep-cloned and the old lists are cut) or synthesizing an empty
|
||||
// one. Definitions whose id is not referenced are orphans and never added.
|
||||
const orderedDefs = [];
|
||||
for (const id of referenceIds) {
|
||||
const existing = defById.get(id);
|
||||
if (existing) {
|
||||
orderedDefs.push({
|
||||
...existing,
|
||||
attrs: { ...(existing.attrs ?? {}), id },
|
||||
});
|
||||
}
|
||||
else {
|
||||
orderedDefs.push(emptyDefinition(id));
|
||||
}
|
||||
}
|
||||
// 5) No references -> there must be NO list at all (at any depth).
|
||||
if (referenceIds.length === 0) {
|
||||
stripFootnotesListsDeep(out);
|
||||
return out;
|
||||
}
|
||||
// 6) Placement parity with the live plugin: when the document is ALREADY in the
|
||||
// canonical single-list state, leave that list exactly where it sits rather
|
||||
// than cutting and re-inserting it at the end (the plugin never repositions a
|
||||
// sole correct list, so moving it would silently reorder any content that
|
||||
// follows the list on the first write).
|
||||
const topLevelLists = out.content.filter((n) => n && n.type === FOOTNOTES_LIST_NAME);
|
||||
if (topLevelLists.length === 1 &&
|
||||
defNodes.length === orderedDefs.length &&
|
||||
deepEqualJson(topLevelLists[0].content, orderedDefs)) {
|
||||
return out;
|
||||
}
|
||||
// 7) Otherwise rebuild: strip every footnotesList AND every bare
|
||||
// footnoteDefinition at ANY depth (collectDefinitions gathers defs
|
||||
// recursively, so a list nested in a callout/blockquote — or a bare
|
||||
// definition outside any list — would otherwise have its defs copied into the
|
||||
// rebuilt list while the original survives in place → duplicates) and
|
||||
// re-insert exactly one list after the last meaningful (non-empty paragraph)
|
||||
// top-level block.
|
||||
stripFootnotesListsDeep(out);
|
||||
stripFootnoteDefinitionsDeep(out);
|
||||
const top = out.content;
|
||||
let insertAt = top.length;
|
||||
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1]))
|
||||
insertAt--;
|
||||
top.splice(insertAt, 0, { type: FOOTNOTES_LIST_NAME, content: orderedDefs });
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
/** Remove every `footnotesList` node at ANY depth (mutates the given clone). */
|
||||
function stripFootnotesListsDeep(node) {
|
||||
if (!node || typeof node !== "object" || !Array.isArray(node.content))
|
||||
return;
|
||||
node.content = node.content.filter((c) => !(c && c.type === FOOTNOTES_LIST_NAME));
|
||||
for (const child of node.content)
|
||||
stripFootnotesListsDeep(child);
|
||||
}
|
||||
/**
|
||||
* Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given
|
||||
* clone). Runs only in the rebuild path AFTER the lists are stripped, so it
|
||||
* targets definitions that were sitting outside a list (e.g. hand-authored via a
|
||||
* raw-JSON write path and nested in a callout); their content was already copied
|
||||
* into the rebuilt list, so leaving the originals would duplicate them.
|
||||
*/
|
||||
function stripFootnoteDefinitionsDeep(node) {
|
||||
if (!node || typeof node !== "object" || !Array.isArray(node.content))
|
||||
return;
|
||||
node.content = node.content.filter((c) => !(c && c.type === FOOTNOTE_DEFINITION_NAME));
|
||||
for (const child of node.content)
|
||||
stripFootnoteDefinitionsDeep(child);
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
/**
|
||||
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
|
||||
*
|
||||
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
|
||||
* definition lines and rebuilds a footnotes section) and the diagnostics
|
||||
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
|
||||
* are definitions and which lines are inert (inside a code fence). Sharing one
|
||||
* lexer makes "the analyzer sees what the importer leaves" a structural property
|
||||
* instead of two hand-kept copies that can drift (#166 review).
|
||||
*
|
||||
* NOTE: this is deliberately NOT shared with editor-ext's
|
||||
* `extractFootnoteDefinitions` — that lives in a different package and the
|
||||
* decoupling between the editor and the MCP mirror is intentional.
|
||||
*/
|
||||
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||
/** Opening/closing code fence marker (``` or ~~~). */
|
||||
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
|
||||
export function lexFootnoteLines(markdown) {
|
||||
const out = [];
|
||||
let fence = null;
|
||||
for (const line of markdown.split("\n")) {
|
||||
const fenceMatch = FENCE_RE.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null)
|
||||
fence = marker; // opening fence
|
||||
else if (marker === fence)
|
||||
fence = null; // matching closing fence
|
||||
out.push({ line, inFence: true, definition: null });
|
||||
continue;
|
||||
}
|
||||
if (fence !== null) {
|
||||
out.push({ line, inFence: true, definition: null });
|
||||
continue;
|
||||
}
|
||||
const m = FOOTNOTE_DEF_RE.exec(line);
|
||||
out.push({
|
||||
line,
|
||||
inFence: false,
|
||||
definition: m ? { id: m[1], text: m[2] } : null,
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||
export function forEachFootnoteReference(line, onRef) {
|
||||
FOOTNOTE_REF_RE_G.lastIndex = 0;
|
||||
let m;
|
||||
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null)
|
||||
onRef(m[1]);
|
||||
}
|
||||
@@ -1,110 +0,0 @@
|
||||
// Detection + collection of INTERNAL Docmost file URLs inside a ProseMirror doc.
|
||||
//
|
||||
// An internal file URL is a relative path served by Docmost's authenticated
|
||||
// attachment route (`GET /api/files/:fileId/:fileName`). It is useless to an
|
||||
// external consumer (relative + needs a Docmost session), so the stash tool
|
||||
// mirrors every such resource into the blob sandbox and rewrites its `src`.
|
||||
//
|
||||
// The criterion is "internal file URL", NOT the node TYPE: image, drawio,
|
||||
// excalidraw, video and file nodes all carry such a `src`, so a type-agnostic
|
||||
// walker covers them all. External http(s) srcs (CDNs) are left untouched.
|
||||
//
|
||||
// Mirrors editor-ext's isInternalFileUrl / normalizeFileUrl (kept as a local
|
||||
// dup so the ESM mcp package does not depend on the editor-ext build).
|
||||
function isInternalFileUrl(url) {
|
||||
if (typeof url !== "string")
|
||||
return false;
|
||||
const normalized = url.trim();
|
||||
return (normalized.startsWith("/api/files/") || normalized.startsWith("/files/"));
|
||||
}
|
||||
/** Normalize a bare `/files/...` src to the canonical `/api/files/...` form. */
|
||||
export function normalizeFileUrl(src) {
|
||||
const trimmed = src.trim();
|
||||
if (trimmed.startsWith("/files/"))
|
||||
return "/api" + trimmed;
|
||||
return trimmed;
|
||||
}
|
||||
/**
|
||||
* Resolve a page-content `src` into the safe, `/api`-relative path the stash
|
||||
* tool may fetch over the authenticated loopback client — or THROW.
|
||||
*
|
||||
* SECURITY (SSRF / path-traversal): `src` comes from page content and is fully
|
||||
* attacker-controllable. The mirroring fetch runs through the AUTHENTICATED
|
||||
* loopback axios client whose baseURL ends in `/api`, so a naive
|
||||
* `src.replace(/^\/api/, "")` lets a crafted value like
|
||||
* `/api/files/../auth/whoami` collapse (via axios/WHATWG URL `..` resolution)
|
||||
* into an ARBITRARY internal GET endpoint, whose authed response would then be
|
||||
* stored in the anonymous sandbox (SSRF + data exfiltration). A prefix-only
|
||||
* `startsWith("/api/files/")` check does NOT defend against this because the
|
||||
* `..` segments are still present in the raw string and resolved later.
|
||||
*
|
||||
* This function defeats that by resolving the canonical pathname FIRST and only
|
||||
* then asserting it still lives under `/api/files/`:
|
||||
* - it rejects any percent-encoded dot/slash (`%2e` / `%2f`): the WHATWG URL
|
||||
* parser collapses LITERAL `../` but does NOT decode `%2f` separators, so a
|
||||
* content-controlled src must never be allowed to smuggle those past the
|
||||
* canonicalization;
|
||||
* - it resolves `new URL(trimmed, "http://internal.invalid").pathname`, which
|
||||
* normalizes `..`/`.` segments (e.g. `/api/files/../auth/whoami` →
|
||||
* `/api/auth/whoami`);
|
||||
* - it then requires the canonical pathname to start with `/api/files/`, so a
|
||||
* traversal that escaped that subtree is rejected.
|
||||
*
|
||||
* Returns the path RELATIVE to the `/api` base (e.g. `/files/<id>/<name>`),
|
||||
* ready to hand to the loopback client. The throw happens BEFORE any network
|
||||
* call, so a rejected src is counted as a failed mirror and its original src is
|
||||
* kept (the per-image try/catch in stashPage never aborts the whole document).
|
||||
*/
|
||||
export function resolveInternalFilePath(src) {
|
||||
const trimmed = src.trim();
|
||||
// Percent-encoded dot/slash must never reach the URL canonicalizer: the
|
||||
// WHATWG parser does NOT decode `%2f` into a path separator, so an encoded
|
||||
// `..%2fauth` would survive canonicalization and still escape /api/files/.
|
||||
if (/%2e|%2f/i.test(trimmed)) {
|
||||
throw new Error(`Refusing internal file src with percent-encoded path segment: "${src}"`);
|
||||
}
|
||||
let pathname;
|
||||
try {
|
||||
// The base host is irrelevant (never contacted); it only lets the parser
|
||||
// resolve a relative `src` and normalize `..`/`.` segments.
|
||||
pathname = new URL(trimmed, "http://internal.invalid").pathname;
|
||||
}
|
||||
catch {
|
||||
throw new Error(`Invalid internal file src: "${src}"`);
|
||||
}
|
||||
if (!pathname.startsWith("/api/files/")) {
|
||||
throw new Error(`Refusing internal file src that escapes /api/files/: "${src}"`);
|
||||
}
|
||||
// Strip the `/api` base prefix; the loopback client's baseURL already ends
|
||||
// in `/api`, so it expects the path relative to that (e.g. /files/<id>/<f>).
|
||||
return pathname.replace(/^\/api/, "");
|
||||
}
|
||||
/**
|
||||
* Recursively collect every node whose `attrs.src` is an internal file URL.
|
||||
* Returns references to the live nodes (so the caller can rewrite `attrs.src`
|
||||
* in place on its clone). Descends `content` arrays, covering callouts, tables,
|
||||
* details and any other nested container.
|
||||
*/
|
||||
export function collectInternalFileNodes(doc) {
|
||||
const out = [];
|
||||
const visit = (node) => {
|
||||
if (!node)
|
||||
return;
|
||||
if (Array.isArray(node)) {
|
||||
for (const child of node)
|
||||
visit(child);
|
||||
return;
|
||||
}
|
||||
if (typeof node !== "object")
|
||||
return;
|
||||
if (node.attrs && isInternalFileUrl(node.attrs.src)) {
|
||||
out.push(node);
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
visit(child);
|
||||
}
|
||||
};
|
||||
visit(doc);
|
||||
return out;
|
||||
}
|
||||
@@ -1,393 +0,0 @@
|
||||
/**
|
||||
* Surgical text edits on a ProseMirror document without re-importing it.
|
||||
*
|
||||
* Each edit replaces an exact substring of a block's inline text, preserving
|
||||
* every node id, mark and attribute around it. Matching works at the
|
||||
* INLINE-CONTAINER (block) level: a block's text nodes are flattened into a
|
||||
* per-character array, so a `find` may freely cross bold/italic/link
|
||||
* boundaries (separate text nodes). The replacement inherits marks from the
|
||||
* unchanged common prefix/suffix of the match, so editing plain text next to a
|
||||
* bold word keeps the bold word bold, and editing the inside of a bold word
|
||||
* keeps the inserted text bold. This is the safe alternative to a full markdown
|
||||
* re-import for small wording fixes.
|
||||
*/
|
||||
import { stripInlineMarkdown, stripBalancedWrappers } from "./text-normalize.js";
|
||||
/** Placeholder code unit standing in for one opaque (non-text) inline node. */
|
||||
const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER
|
||||
/**
|
||||
* Find every VALID occurrence of `needle` in a block's flattened slots.
|
||||
*
|
||||
* A candidate occurrence at slot range [start, start+needle.length) is valid
|
||||
* ONLY IF none of the slots in that range are atoms (non-text inline nodes).
|
||||
* This makes atom matching collision-safe against the U+FFFC placeholder: an
|
||||
* atom slot can never be part of a match, while a real text node containing a
|
||||
* literal U+FFFC code unit still matches normally (its slot has no `.atom`).
|
||||
*
|
||||
* Overlapping candidates that touch an atom are skipped (not counted, not
|
||||
* spliced); the scan resumes one code unit past the rejected start so a valid
|
||||
* match that begins just after an atom is not missed.
|
||||
*/
|
||||
function findValidMatches(chars, plain, needle) {
|
||||
if (!needle)
|
||||
return [];
|
||||
const positions = [];
|
||||
let idx = plain.indexOf(needle);
|
||||
while (idx !== -1) {
|
||||
const end = idx + needle.length;
|
||||
let hasAtom = false;
|
||||
for (let i = idx; i < end; i++) {
|
||||
if (chars[i] && chars[i].atom) {
|
||||
hasAtom = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasAtom) {
|
||||
positions.push(idx);
|
||||
// Non-overlapping: skip past this match.
|
||||
idx = plain.indexOf(needle, end);
|
||||
}
|
||||
else {
|
||||
// This candidate crosses an atom: reject it and resume one unit later so
|
||||
// an overlapping valid match starting after the atom is still found.
|
||||
idx = plain.indexOf(needle, idx + 1);
|
||||
}
|
||||
}
|
||||
return positions;
|
||||
}
|
||||
/** Order-sensitive deep-equality of two marks arrays. */
|
||||
function marksEqual(a, b) {
|
||||
if (a === b)
|
||||
return true;
|
||||
if (a.length !== b.length)
|
||||
return false;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (JSON.stringify(a[i]) !== JSON.stringify(b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/** A block is any node that DIRECTLY contains at least one inline text child. */
|
||||
function isInlineBlock(node) {
|
||||
return (Array.isArray(node?.content) &&
|
||||
node.content.some((child) => child && child.type === "text"));
|
||||
}
|
||||
/** Flatten a block's inline content into a per-code-unit slot array. */
|
||||
function flattenBlock(node) {
|
||||
const chars = [];
|
||||
for (const child of node.content || []) {
|
||||
if (child && child.type === "text" && typeof child.text === "string") {
|
||||
const marks = child.marks || [];
|
||||
// Iterate by UTF-16 code unit so indices align with String.indexOf.
|
||||
for (let i = 0; i < child.text.length; i++) {
|
||||
chars.push({ ch: child.text[i], marks });
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Any non-text inline node becomes one opaque slot.
|
||||
chars.push({
|
||||
ch: ATOM_PLACEHOLDER,
|
||||
marks: (child && child.marks) || [],
|
||||
atom: child,
|
||||
});
|
||||
}
|
||||
}
|
||||
return chars;
|
||||
}
|
||||
/** Re-tokenize a slot array back into ProseMirror inline nodes. */
|
||||
function tokenizeChars(chars) {
|
||||
const out = [];
|
||||
let buffer = "";
|
||||
let bufferMarks = null;
|
||||
const flush = () => {
|
||||
if (buffer.length === 0)
|
||||
return;
|
||||
const textNode = { type: "text", text: buffer };
|
||||
if (bufferMarks && bufferMarks.length > 0)
|
||||
textNode.marks = bufferMarks;
|
||||
out.push(textNode);
|
||||
buffer = "";
|
||||
bufferMarks = null;
|
||||
};
|
||||
for (const slot of chars) {
|
||||
if (slot.atom) {
|
||||
flush();
|
||||
out.push(slot.atom);
|
||||
continue;
|
||||
}
|
||||
if (bufferMarks !== null && !marksEqual(bufferMarks, slot.marks)) {
|
||||
flush();
|
||||
}
|
||||
if (bufferMarks === null)
|
||||
bufferMarks = slot.marks;
|
||||
buffer += slot.ch;
|
||||
}
|
||||
flush();
|
||||
return out;
|
||||
}
|
||||
/** Longest common prefix length of two strings. */
|
||||
function commonPrefixLen(a, b) {
|
||||
const max = Math.min(a.length, b.length);
|
||||
let i = 0;
|
||||
while (i < max && a[i] === b[i])
|
||||
i++;
|
||||
return i;
|
||||
}
|
||||
/** Longest common suffix length of two strings, capped so it can't overlap. */
|
||||
function commonSuffixLen(a, b, cap) {
|
||||
const max = Math.min(a.length, b.length, cap);
|
||||
let i = 0;
|
||||
while (i < max && a[a.length - 1 - i] === b[b.length - 1 - i])
|
||||
i++;
|
||||
return i;
|
||||
}
|
||||
/**
|
||||
* Apply one edit to one block's flattened slot array.
|
||||
*
|
||||
* The caller passes only VALID (atom-free) match positions (see
|
||||
* findValidMatches), so no match range can overlap an atom slot here.
|
||||
*/
|
||||
function applyEditToChars(chars, edit, matchPositions) {
|
||||
// Pre-compute the diff slices once (find/replace are constant per edit).
|
||||
const p = commonPrefixLen(edit.find, edit.replace);
|
||||
const s = commonSuffixLen(edit.find, edit.replace, Math.min(edit.find.length, edit.replace.length) - p);
|
||||
const insertText = edit.replace.slice(p, edit.replace.length - s);
|
||||
// Rebuild the slot array in a single left-to-right pass, splicing at each
|
||||
// match start. Offsets into `chars` stay valid because we copy through.
|
||||
const newChars = [];
|
||||
let cursor = 0;
|
||||
let spliced = 0;
|
||||
for (const mStart of matchPositions) {
|
||||
const mEnd = mStart + edit.find.length;
|
||||
const changedStart = mStart + p;
|
||||
const changedEnd = mEnd - s;
|
||||
// Copy through everything up to the changed region (incl. the prefix).
|
||||
for (; cursor < changedStart; cursor++)
|
||||
newChars.push(chars[cursor]);
|
||||
const removed = chars.slice(changedStart, changedEnd);
|
||||
// Choose the marks for the inserted characters.
|
||||
let chosenMarks = [];
|
||||
if (removed.length > 0 &&
|
||||
removed.every((r) => marksEqual(r.marks, removed[0].marks))) {
|
||||
// Uniform removed region: inherit its marks directly.
|
||||
chosenMarks = removed[0].marks;
|
||||
}
|
||||
else {
|
||||
// Empty or non-uniform removed region: inherit from the nearest TEXT
|
||||
// neighbour, skipping atom slots (an atom carries marks that do not
|
||||
// belong on inserted text). Scan left first, then right; fall back to [].
|
||||
let inherited = null;
|
||||
for (let i = changedStart - 1; i >= 0; i--) {
|
||||
if (!chars[i].atom) {
|
||||
inherited = chars[i].marks;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (inherited === null) {
|
||||
for (let i = changedEnd; i < chars.length; i++) {
|
||||
if (!chars[i].atom) {
|
||||
inherited = chars[i].marks;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
chosenMarks = inherited === null ? [] : inherited;
|
||||
}
|
||||
// Emit the inserted text (one slot per code unit).
|
||||
for (let i = 0; i < insertText.length; i++) {
|
||||
newChars.push({ ch: insertText[i], marks: chosenMarks });
|
||||
}
|
||||
// Skip the removed region.
|
||||
cursor = changedEnd;
|
||||
spliced++;
|
||||
}
|
||||
// Copy through the tail.
|
||||
for (; cursor < chars.length; cursor++)
|
||||
newChars.push(chars[cursor]);
|
||||
return { newChars, spliced };
|
||||
}
|
||||
/**
|
||||
* Apply text edits to a ProseMirror doc (operates on a deep copy, returns it).
|
||||
*
|
||||
* Returns { doc, results, failed }:
|
||||
* - results: edits that applied (replacements >= 1).
|
||||
* - failed: edits that matched zero times, were ambiguous (multi-match
|
||||
* without replaceAll), or whose changed region crosses a non-text inline
|
||||
* node. These do NOT throw — they are recorded so the caller can surface an
|
||||
* actionable message and still keep the edits that did apply.
|
||||
*
|
||||
* Edits apply IN ORDER to the same working copy, so a later edit can target
|
||||
* text produced by an earlier one. The input doc is never mutated. The only
|
||||
* thrown error is for invalid input (an empty `edit.find`).
|
||||
*/
|
||||
export function applyTextEdits(doc, edits) {
|
||||
const copy = JSON.parse(JSON.stringify(doc));
|
||||
const results = [];
|
||||
const failed = [];
|
||||
for (const edit of edits) {
|
||||
if (!edit.find)
|
||||
throw new Error("edit.find must be a non-empty string");
|
||||
// HARD-REFUSE formatting changes. edit_page_text edits PLAIN TEXT only and
|
||||
// writes the replacement verbatim, so it cannot add/remove marks. We refuse
|
||||
// only a pure formatting TOGGLE: find and replace differ ONLY by balanced
|
||||
// markdown markers (e.g. find:"~~$69~~" / replace:"$69", or find:"M5Stack" /
|
||||
// replace:"**M5Stack**" which would write literal `**`).
|
||||
//
|
||||
// The detector is the STRICT stripBalancedWrappers, NOT the lenient locator
|
||||
// stripInlineMarkdown: the lenient one also trims whitespace/emoji and
|
||||
// collapses lone `*`/`_` runs, which gives false positives on ordinary
|
||||
// plain-text edits (trailing-space trim, snake_case, `2 * 3 * 4`, URLs with
|
||||
// underscores) and wrongly refuses them. Comparing the strict strip of both
|
||||
// sides symmetrically catches every real formatting toggle while leaving
|
||||
// plain text alone; a typo fix wrapped in markdown still applies because its
|
||||
// stripped find != stripped replace.
|
||||
const formattingOnly = edit.find !== edit.replace &&
|
||||
stripBalancedWrappers(edit.find) === stripBalancedWrappers(edit.replace);
|
||||
if (formattingOnly) {
|
||||
failed.push({
|
||||
find: edit.find,
|
||||
reason: "edit_page_text edits plain text only and cannot add or remove formatting marks (bold/italic/strike/code/link); it writes the replacement as LITERAL text. This edit looks like a formatting change (markdown markers in find/replace). To change marks, read the block with get_page_json and use patch_node (or update_page_json) to set the node's marks array.",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Gather every inline block in document order (recurse the whole tree so
|
||||
// nested containers — callouts, list items, table cells, blockquotes — are
|
||||
// all covered).
|
||||
const blocks = [];
|
||||
(function collect(node) {
|
||||
if (isInlineBlock(node))
|
||||
blocks.push(node);
|
||||
for (const child of node.content || [])
|
||||
collect(child);
|
||||
})(copy);
|
||||
// Find every VALID (atom-free) occurrence per block. A candidate whose slot
|
||||
// range overlaps a non-text inline atom is never a match (collision-safe vs
|
||||
// the U+FFFC placeholder), so it is excluded from both the uniqueness count
|
||||
// and the splicing.
|
||||
const blockChars = blocks.map((b) => flattenBlock(b));
|
||||
const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join(""));
|
||||
// EXACT MATCH WINS: try the verbatim locator first.
|
||||
let effectiveFind = edit.find;
|
||||
let normalized = false;
|
||||
let validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find));
|
||||
let total = 0;
|
||||
for (const positions of validPerBlock)
|
||||
total += positions.length;
|
||||
// FALLBACK: only if the verbatim locator matched nothing, retry with the
|
||||
// markdown-stripped form. `edit.replace` is never touched — this only
|
||||
// changes what we LOCATE, not what we insert.
|
||||
const stripped = stripInlineMarkdown(edit.find);
|
||||
if (total === 0 && stripped !== edit.find && stripped.length > 0) {
|
||||
const strippedPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], stripped));
|
||||
let strippedTotal = 0;
|
||||
for (const positions of strippedPerBlock)
|
||||
strippedTotal += positions.length;
|
||||
if (strippedTotal >= 1) {
|
||||
validPerBlock = strippedPerBlock;
|
||||
total = strippedTotal;
|
||||
effectiveFind = stripped;
|
||||
normalized = true;
|
||||
}
|
||||
}
|
||||
if (total === 0) {
|
||||
// Distinguish "the text exists but only across an atom" from a plain
|
||||
// not-found: if a raw substring scan (atoms included) WOULD have hit —
|
||||
// for EITHER the verbatim or the stripped locator — the only thing
|
||||
// blocking the edit is the atom, so report that.
|
||||
const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1 ||
|
||||
(stripped !== edit.find && plain.indexOf(stripped) !== -1));
|
||||
let reason;
|
||||
if (existsAcrossAtom) {
|
||||
reason =
|
||||
"match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes.";
|
||||
}
|
||||
else {
|
||||
// Append a bounded "closest text" hint: find the FIRST block that
|
||||
// contains the longest whitespace-delimited token (>= 3 chars) of the
|
||||
// (stripped, then raw) locator, and quote that block's plain text.
|
||||
reason = "text not found in the document.";
|
||||
const tokenSource = stripped.length > 0 ? stripped : edit.find;
|
||||
const longestToken = tokenSource
|
||||
.split(/\s+/)
|
||||
.filter((t) => t.length >= 3)
|
||||
.sort((a, b) => b.length - a.length)[0];
|
||||
if (longestToken) {
|
||||
const hitBlock = blockPlain.find((plain) => plain.includes(longestToken));
|
||||
if (hitBlock) {
|
||||
// Truncate by code point (spread iterates by code point) so a
|
||||
// surrogate pair is never split; append the ellipsis only when the
|
||||
// text was actually longer than the limit.
|
||||
const points = [...hitBlock];
|
||||
const snippet = points.length > 120
|
||||
? points.slice(0, 120).join("") + "…"
|
||||
: hitBlock;
|
||||
reason += ` Closest block text: "${snippet}".`;
|
||||
}
|
||||
}
|
||||
}
|
||||
failed.push({ find: edit.find, reason });
|
||||
continue;
|
||||
}
|
||||
if (total > 1 && !edit.replaceAll) {
|
||||
failed.push({
|
||||
find: edit.find,
|
||||
reason: `matches ${total} times. Provide a longer, unique fragment or set replaceAll: true.`,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Plan the splices from the valid positions. For a non-replaceAll edit we
|
||||
// splice only the first valid match (left-to-right across blocks); for
|
||||
// replaceAll we splice every valid match.
|
||||
const plannedPerBlock = blockChars.map(() => []);
|
||||
let takenFirst = false;
|
||||
for (let b = 0; b < validPerBlock.length; b++) {
|
||||
for (const idx of validPerBlock[b]) {
|
||||
if (edit.replaceAll) {
|
||||
plannedPerBlock[b].push(idx);
|
||||
}
|
||||
else if (!takenFirst) {
|
||||
plannedPerBlock[b].push(idx);
|
||||
takenFirst = true;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!edit.replaceAll && takenFirst)
|
||||
break;
|
||||
}
|
||||
// Apply the splices block-by-block and re-tokenize changed blocks. The
|
||||
// local edit uses `effectiveFind` (verbatim or normalized) so the
|
||||
// prefix/suffix diff is computed against the ACTUALLY matched text, while
|
||||
// `edit.replace` stays literal — never stripped.
|
||||
const effectiveEdit = {
|
||||
find: effectiveFind,
|
||||
replace: edit.replace,
|
||||
replaceAll: edit.replaceAll,
|
||||
};
|
||||
let spliced = 0;
|
||||
for (let b = 0; b < blocks.length; b++) {
|
||||
if (plannedPerBlock[b].length === 0)
|
||||
continue;
|
||||
const { newChars, spliced: n } = applyEditToChars(blockChars[b], effectiveEdit, plannedPerBlock[b]);
|
||||
spliced += n;
|
||||
blocks[b].content = tokenizeChars(newChars);
|
||||
}
|
||||
// Keep `find: edit.find` (the original) so the caller can correlate.
|
||||
const result = { find: edit.find, replacements: spliced };
|
||||
if (normalized)
|
||||
result.normalized = true;
|
||||
results.push(result);
|
||||
}
|
||||
// Safety net: drop any empty text nodes (ProseMirror forbids them). The
|
||||
// re-tokenizer never emits empty text nodes, but untouched blocks could in
|
||||
// principle carry one in from upstream.
|
||||
(function prune(node) {
|
||||
if (Array.isArray(node.content)) {
|
||||
node.content = node.content.filter((child) => !(child.type === "text" && child.text === ""));
|
||||
for (const child of node.content)
|
||||
prune(child);
|
||||
}
|
||||
})(copy);
|
||||
return { doc: copy, results, failed };
|
||||
}
|
||||
@@ -1,835 +0,0 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
*/
|
||||
export function convertProseMirrorToMarkdown(content) {
|
||||
if (!content || !content.content)
|
||||
return "";
|
||||
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||
// </> back inside attribute values, so escaping them would corrupt the
|
||||
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||
// keeps the value inert against attribute-injection while staying idempotent.
|
||||
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||
// value, and parse5 does not decode ' back inside attribute values, so
|
||||
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||
const escapeAttr = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """);
|
||||
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||
const escapeHtmlText = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
// Percent-encode characters that would break out of a markdown URL target
|
||||
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||
// single inert token (used for image/video/youtube srcs).
|
||||
const encodeMdUrl = (value) => String(value || "")
|
||||
.replace(/\s/g, (c) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
const processNode = (node) => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
switch (type) {
|
||||
case "doc":
|
||||
return nodeContent.map(processNode).join("\n\n");
|
||||
case "paragraph":
|
||||
const text = nodeContent.map(processNode).join("");
|
||||
const align = node.attrs?.textAlign;
|
||||
if (align && align !== "left") {
|
||||
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||
}
|
||||
return text || "";
|
||||
case "heading":
|
||||
const level = node.attrs?.level || 1;
|
||||
const headingText = nodeContent.map(processNode).join("");
|
||||
return "#".repeat(level) + " " + headingText;
|
||||
case "text":
|
||||
let textContent = node.text || "";
|
||||
// Apply marks (bold, italic, code, etc.)
|
||||
if (node.marks) {
|
||||
// Markdown code spans (`...`) cannot carry inner formatting, so when a
|
||||
// run has the `code` mark alongside ANY other mark, backtick syntax
|
||||
// would leak literal ** / []() into the code text. In that case emit
|
||||
// nested HTML (<code> innermost, the other marks wrapping it as HTML)
|
||||
// so the output is at least well-formed and re-parseable.
|
||||
//
|
||||
// NOTE: this does NOT round-trip both marks. The schema's `code` mark
|
||||
// has `excludes: "_"` (it excludes every other mark), so on import the
|
||||
// co-occurring mark is always dropped — the run comes back as `code`
|
||||
// only. We keep the emission simple and accept that the other mark is
|
||||
// lost; preserving both is impossible while `code` excludes them.
|
||||
// Only use the backtick form when `code` is the sole mark.
|
||||
const markTypes = node.marks.map((m) => m.type);
|
||||
const hasCode = markTypes.includes("code");
|
||||
const codeCombined = hasCode && markTypes.length > 1;
|
||||
for (const mark of node.marks) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
textContent = codeCombined
|
||||
? `<strong>${textContent}</strong>`
|
||||
: `**${textContent}**`;
|
||||
break;
|
||||
case "italic":
|
||||
textContent = codeCombined
|
||||
? `<em>${textContent}</em>`
|
||||
: `*${textContent}*`;
|
||||
break;
|
||||
case "code":
|
||||
// When combined with another mark, wrap as <code> so the
|
||||
// surrounding HTML marks can nest around it; otherwise use the
|
||||
// plain backtick span.
|
||||
textContent = codeCombined
|
||||
? `<code>${textContent}</code>`
|
||||
: `\`${textContent}\``;
|
||||
break;
|
||||
case "link": {
|
||||
const href = mark.attrs?.href || "";
|
||||
const title = mark.attrs?.title;
|
||||
if (codeCombined) {
|
||||
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||
const safeHref = escapeAttr(href);
|
||||
if (title) {
|
||||
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||
}
|
||||
else {
|
||||
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||
}
|
||||
}
|
||||
else if (title) {
|
||||
// Emit the optional markdown link title; escape an embedded
|
||||
// double-quote so it cannot terminate the title string early.
|
||||
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||
}
|
||||
else {
|
||||
textContent = `[${textContent}](${href})`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "strike":
|
||||
textContent = codeCombined
|
||||
? `<s>${textContent}</s>`
|
||||
: `~~${textContent}~~`;
|
||||
break;
|
||||
case "underline":
|
||||
textContent = `<u>${textContent}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
textContent = `<sub>${textContent}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
textContent = `<sup>${textContent}</sup>`;
|
||||
break;
|
||||
case "highlight": {
|
||||
// Preserve a null/empty color as a plain highlight (a bare
|
||||
// <mark> with no background-color); only emit the style when a
|
||||
// color is actually set, so a plain highlight is not forced to
|
||||
// yellow on export.
|
||||
const color = mark.attrs?.color;
|
||||
textContent = color
|
||||
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||
: `<mark>${textContent}</mark>`;
|
||||
break;
|
||||
}
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color) {
|
||||
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
case "comment": {
|
||||
// Emit the inline comment anchor so highlights round-trip. The
|
||||
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||
// commentId/resolved).
|
||||
const cid = mark.attrs?.commentId;
|
||||
if (cid) {
|
||||
const resolvedAttr = mark.attrs?.resolved
|
||||
? ` data-resolved="true"`
|
||||
: "";
|
||||
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "spoiler":
|
||||
// Markdown has no native spoiler syntax, so emit the same
|
||||
// lossless raw HTML the editor-ext turndown rule produces; the
|
||||
// schema's Spoiler mark parses span[data-spoiler] back on import.
|
||||
textContent = `<span data-spoiler="true">${textContent}</span>`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return textContent;
|
||||
case "codeBlock":
|
||||
const language = node.attrs?.language || "";
|
||||
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||
// here would let the text grow by "\n" on each round-trip. Removing
|
||||
// every trailing newline makes repeated cycles stable.
|
||||
const code = nodeContent
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
.map((item) => processListItem(item, "-"))
|
||||
.join("\n");
|
||||
case "orderedList":
|
||||
return nodeContent
|
||||
.map((item, index) => processListItem(item, `${index + 1}.`))
|
||||
.join("\n");
|
||||
case "taskList":
|
||||
return nodeContent.map((item) => processTaskItem(item)).join("\n");
|
||||
case "taskItem":
|
||||
// Delegate to the same helper used by taskList so multi-block and
|
||||
// nested task items render and indent consistently.
|
||||
return processTaskItem(node);
|
||||
case "listItem":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "blockquote":
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line so code blocks / multi-paragraph
|
||||
// quotes round-trip correctly.
|
||||
return nodeContent
|
||||
.map((n) => processNode(n)
|
||||
.split("\n")
|
||||
.map((line) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"))
|
||||
.join("\n>\n");
|
||||
case "horizontalRule":
|
||||
return "---";
|
||||
case "hardBreak":
|
||||
// Two trailing spaces before the newline encode a markdown hard break;
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
case "image": {
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
const imgCaption = node.attrs?.caption || "";
|
||||
if (imgCaption) {
|
||||
// ![]() can't carry a caption, so (symmetric to video) emit a raw
|
||||
// <img> wrapped in a block <div>. On import marked.parse keeps the raw
|
||||
// HTML and generateJSON runs the image extension's parseHTML, which
|
||||
// restores the caption from data-caption.
|
||||
const parts = [`src="${escapeAttr(node.attrs?.src ?? "")}"`];
|
||||
if (imgAlt)
|
||||
parts.push(`alt="${escapeAttr(imgAlt)}"`);
|
||||
parts.push(`data-caption="${escapeAttr(imgCaption)}"`);
|
||||
return `<div><img ${parts.join(" ")}></div>`;
|
||||
}
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
return ``;
|
||||
}
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||
// from the standard attributes and the remaining attrs from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||
// data-type, so the schema parser ignores it and just hoists the video.
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
case "youtube": {
|
||||
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||
// src from data-src and width/height/align from data-* attributes.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "table": {
|
||||
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||
const tableRows = nodeContent;
|
||||
if (tableRows.length === 0)
|
||||
return "";
|
||||
const hasSpan = tableRows.some((row) => (row.content || []).some((cell) => (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1));
|
||||
if (hasSpan) {
|
||||
// Render each cell's block children to HTML (marked does NOT parse
|
||||
// markdown inside a raw HTML block, so emitting markdown here would
|
||||
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||
const renderHtmlCell = (cell) => {
|
||||
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||
const a = cell.attrs || {};
|
||||
const cellParts = [];
|
||||
if ((a.colspan ?? 1) > 1)
|
||||
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||
if ((a.rowspan ?? 1) > 1)
|
||||
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||
if (a.align)
|
||||
cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||
const open = cellParts.length
|
||||
? `<${tag} ${cellParts.join(" ")}>`
|
||||
: `<${tag}>`;
|
||||
const inner = (cell.content || [])
|
||||
.map((block) => blockToHtml(block))
|
||||
.join("");
|
||||
return `${open}${inner}</${tag}>`;
|
||||
};
|
||||
const htmlRows = tableRows
|
||||
.map((row) => `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`)
|
||||
.join("");
|
||||
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||
}
|
||||
// No merged cells: emit a GFM table (header row + separator) so the
|
||||
// markdown can be parsed back into a table on re-import.
|
||||
const rows = tableRows.map(processNode);
|
||||
const headerCells = tableRows[0]?.content || [];
|
||||
const columns = headerCells.length || 1;
|
||||
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||
const markers = Array.from({ length: columns }, (_, i) => {
|
||||
const align = headerCells[i]?.attrs?.align;
|
||||
switch (align) {
|
||||
case "left":
|
||||
return ":--";
|
||||
case "center":
|
||||
return ":-:";
|
||||
case "right":
|
||||
return "--:";
|
||||
default:
|
||||
return "---";
|
||||
}
|
||||
});
|
||||
const separator = "| " + markers.join(" | ") + " |";
|
||||
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||
}
|
||||
case "tableRow":
|
||||
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||
case "tableCell":
|
||||
case "tableHeader": {
|
||||
// Join multiple block children with a space (not "") so adjacent blocks
|
||||
// like a paragraph followed by a list don't collide into "line1- a".
|
||||
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||
// line break cannot corrupt the surrounding GFM row.
|
||||
return nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\r?\n/g, " ")
|
||||
.replace(/\|/g, "\\|");
|
||||
}
|
||||
case "callout":
|
||||
const calloutType = node.attrs?.type || "info";
|
||||
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||
case "details":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "detailsSummary":
|
||||
const summaryText = nodeContent.map(processNode).join("");
|
||||
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||
case "detailsContent":
|
||||
const detailsText = nodeContent.map(processNode).join("\n");
|
||||
return `${detailsText}\n</details>`;
|
||||
case "mathInline": {
|
||||
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||
// back, so the previous form was lossy.
|
||||
const inlineMath = node.attrs?.text || "";
|
||||
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||
}
|
||||
case "mathBlock": {
|
||||
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||
// for the schema's default parser to recover it.
|
||||
const blockMath = node.attrs?.text || "";
|
||||
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||
}
|
||||
case "mention": {
|
||||
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||
// plain text that cannot re-parse.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="mention"`];
|
||||
if (attrs.id)
|
||||
parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||
if (attrs.label)
|
||||
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||
if (attrs.entityType)
|
||||
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||
if (attrs.entityId)
|
||||
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||
if (attrs.slugId)
|
||||
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||
if (attrs.creatorId)
|
||||
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||
if (attrs.anchorId)
|
||||
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||
// Keep the label as visible text content too; the schema reads attrs
|
||||
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||
const mentionLabel = attrs.label || attrs.id || "";
|
||||
// The label is visible element TEXT content here (the data-* attrs above
|
||||
// carry the real values), so escape it for the text context, not attrs.
|
||||
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||
}
|
||||
case "footnoteReference": {
|
||||
// Pandoc/GFM inline marker. The number is derived (not stored), so the
|
||||
// id is the stable anchor.
|
||||
const fnId = node.attrs?.id || "";
|
||||
return fnId ? `[^${fnId}]` : "";
|
||||
}
|
||||
case "footnotesList":
|
||||
// The container renders its definitions, each on its own `[^id]: ...`
|
||||
// line. A blank line separates the body from the notes block.
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "footnoteDefinition": {
|
||||
const defId = node.attrs?.id || "";
|
||||
// Collapse the definition's paragraphs into a single line; multi-line
|
||||
// footnotes are a v2 refinement.
|
||||
const defText = nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\s*\n+\s*/g, " ")
|
||||
.trim();
|
||||
return defId ? `[^${defId}]: ${defText}` : "";
|
||||
}
|
||||
case "attachment": {
|
||||
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "drawio":
|
||||
case "excalidraw": {
|
||||
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null)
|
||||
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null)
|
||||
parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "embed": {
|
||||
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||
// src/provider/align/width/height from data-* attributes so the node
|
||||
// (and its provider iframe info) survives the round-trip.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "audio": {
|
||||
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||
// schema reads src from src and attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||
// inline-level HTML that marked would wrap in <p>.
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
case "pdf": {
|
||||
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||
// The schema reads src/width/height from standard attrs and name/
|
||||
// attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "columns": {
|
||||
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||
// multi-column layout survives. Without a case the children were
|
||||
// concatenated with no separator and the text merged. The schema reads
|
||||
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||
// which marked would not re-parse inside a raw HTML block).
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="columns"`];
|
||||
if (attrs.layout)
|
||||
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "column": {
|
||||
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||
// column width from data-width. Children are rendered as HTML so their
|
||||
// formatting survives inside this raw HTML block.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="column"`];
|
||||
if (attrs.width)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
}
|
||||
};
|
||||
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||
// accepts so it re-imports as the matching ProseMirror mark.
|
||||
const inlineToHtml = (inlineNodes) => (inlineNodes || [])
|
||||
.map((n) => {
|
||||
if (n.type === "hardBreak")
|
||||
return "<br>";
|
||||
if (n.type !== "text") {
|
||||
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||
return processNode(n);
|
||||
}
|
||||
let t = escapeHtmlText(n.text || "");
|
||||
for (const mark of n.marks || []) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
t = `<strong>${t}</strong>`;
|
||||
break;
|
||||
case "italic":
|
||||
t = `<em>${t}</em>`;
|
||||
break;
|
||||
case "code":
|
||||
t = `<code>${t}</code>`;
|
||||
break;
|
||||
case "strike":
|
||||
t = `<s>${t}</s>`;
|
||||
break;
|
||||
case "underline":
|
||||
t = `<u>${t}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
t = `<sub>${t}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
t = `<sup>${t}</sup>`;
|
||||
break;
|
||||
case "link":
|
||||
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||
break;
|
||||
case "highlight":
|
||||
t = mark.attrs?.color
|
||||
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||
: `<mark>${t}</mark>`;
|
||||
break;
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color)
|
||||
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||
break;
|
||||
case "comment":
|
||||
// Inline comment anchor inside a raw-HTML container (columns /
|
||||
// spanned table cells), so commented text there also round-trips.
|
||||
if (mark.attrs?.commentId) {
|
||||
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.join("");
|
||||
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||
// and would survive as literal text. The Image extension reads src/alt from
|
||||
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||
// attributes, so emit them by name.
|
||||
const imageToHtml = (node) => {
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.caption)
|
||||
parts.push(`data-caption="${escapeAttr(attrs.caption)}"`);
|
||||
if (attrs.title)
|
||||
parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<img ${parts.join(" ")}>`;
|
||||
};
|
||||
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||
// schema reads the banner type from data-callout-type. Children are rendered
|
||||
// as HTML so they survive inside a raw-HTML container.
|
||||
const calloutToHtml = (node) => {
|
||||
const type = (node.attrs?.type || "info").toLowerCase();
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||
};
|
||||
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||
const detailsToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<details>${inner}</details>`;
|
||||
};
|
||||
const detailsSummaryToHtml = (node) => `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||
const detailsContentToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="detailsContent">${inner}</div>`;
|
||||
};
|
||||
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||
const taskListToHtml = (node) => {
|
||||
const items = (node.content || [])
|
||||
.map((it) => {
|
||||
const checked = it.attrs?.checked ? "true" : "false";
|
||||
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||
})
|
||||
.join("");
|
||||
return `<ul data-type="taskList">${items}</ul>`;
|
||||
};
|
||||
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||
// as literal text on re-import. Nodes whose processNode case already produces
|
||||
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||
// table) are delegated to processNode; the markdown-emitting cases
|
||||
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||
const blockToHtml = (block) => {
|
||||
const children = block.content || [];
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return `<p>${inlineToHtml(children)}</p>`;
|
||||
case "heading": {
|
||||
const level = block.attrs?.level || 1;
|
||||
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||
}
|
||||
case "bulletList":
|
||||
return `<ul>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ul>`;
|
||||
case "orderedList":
|
||||
return `<ol>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ol>`;
|
||||
case "codeBlock": {
|
||||
const lang = block.attrs?.language || "";
|
||||
// The code itself is element TEXT content (between <code> tags), so it
|
||||
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||
const code = escapeHtmlText(children
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, ""));
|
||||
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||
return `<pre><code${cls}>${code}</code></pre>`;
|
||||
}
|
||||
case "image":
|
||||
return imageToHtml(block);
|
||||
case "blockquote":
|
||||
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||
case "horizontalRule":
|
||||
return "<hr>";
|
||||
case "callout":
|
||||
return calloutToHtml(block);
|
||||
case "details":
|
||||
return detailsToHtml(block);
|
||||
case "detailsSummary":
|
||||
return detailsSummaryToHtml(block);
|
||||
case "detailsContent":
|
||||
return detailsContentToHtml(block);
|
||||
case "taskList":
|
||||
return taskListToHtml(block);
|
||||
case "taskItem":
|
||||
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||
// the schema parses it; wrap it in a single-item taskList.
|
||||
return taskListToHtml({ content: [block] });
|
||||
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||
// mention, etc. already emit schema-matching HTML from processNode.
|
||||
case "table":
|
||||
case "columns":
|
||||
case "column":
|
||||
case "mathBlock":
|
||||
case "video":
|
||||
case "audio":
|
||||
case "pdf":
|
||||
case "youtube":
|
||||
case "embed":
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||
// children in a <div> so their content is preserved; if it has no block
|
||||
// children, render its inline content instead.
|
||||
if (children.length && children.some((c) => c.type !== "text")) {
|
||||
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||
}
|
||||
return `<div>${inlineToHtml(children)}</div>`;
|
||||
}
|
||||
};
|
||||
// Render the block children of a list item to HTML (a listItem holds block+
|
||||
// content). Mirrors processListItem but for the HTML fallback path.
|
||||
const blockChildrenToHtml = (item) => (item.content || []).map((b) => blockToHtml(b)).join("");
|
||||
// Indent the rendered children of a list item under a marker prefix.
|
||||
// Each child block is a (possibly multi-line) string. The very first physical
|
||||
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||
// other line — the remaining lines of the first child AND all lines of every
|
||||
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||
// to align under the marker. Without indenting these continuation lines, the
|
||||
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||
//
|
||||
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||
// same as the visible prefix width:
|
||||
// - bullet "- " -> 2 columns
|
||||
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||
// re-import. Callers therefore pass the exact indent width to use.
|
||||
const indentItemChildren = (childStrings, prefix, indentWidth) => {
|
||||
const indent = " ".repeat(indentWidth);
|
||||
const lines = [];
|
||||
childStrings.forEach((child, childIndex) => {
|
||||
child.split("\n").forEach((line, lineIndex) => {
|
||||
if (childIndex === 0 && lineIndex === 0) {
|
||||
// First physical line of the first block gets the marker.
|
||||
lines.push(`${prefix} ${line}`);
|
||||
}
|
||||
else {
|
||||
// Indent every continuation line by the marker width; keep blank
|
||||
// lines blank rather than emitting trailing whitespace.
|
||||
lines.push(line.length ? `${indent}${line}` : "");
|
||||
}
|
||||
});
|
||||
});
|
||||
return lines.join("\n");
|
||||
};
|
||||
const processListItem = (item, prefix) => {
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||
// since for those the visible prefix IS the list marker.
|
||||
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||
};
|
||||
const processTaskItem = (item) => {
|
||||
const checked = item.attrs?.checked || false;
|
||||
const checkbox = checked ? "[x]" : "[ ]";
|
||||
const prefix = `- ${checkbox}`;
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
// An empty task item still needs its checkbox marker; without this guard
|
||||
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||
// checkbox is item content, NOT part of the marker. So the continuation
|
||||
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||
return indentItemChildren(childStrings, prefix, 2);
|
||||
};
|
||||
return processNode(content).trim();
|
||||
}
|
||||
@@ -1,104 +0,0 @@
|
||||
/**
|
||||
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||
*
|
||||
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||
* a page through "download -> edit body -> re-upload":
|
||||
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||
* threads.
|
||||
*
|
||||
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||
*
|
||||
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||
* literal occurrence is left in the body untouched.
|
||||
*
|
||||
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||
* the file but are NOT pushed back to the server on import — only the inline
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
*/
|
||||
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
||||
// the JSON text between the markers.
|
||||
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
|
||||
// rather than end-anchoring a single regex (which would mis-capture across a
|
||||
// literal opener that appears earlier in the body).
|
||||
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export function serializeDocmostMarkdown(meta, body, comments) {
|
||||
const metaJson = JSON.stringify(meta);
|
||||
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||
const trimmedBody = (body ?? "").trim();
|
||||
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||
`${trimmedBody}\n\n` +
|
||||
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
|
||||
}
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
* corresponding value is returned as `null` and the whole input is treated as
|
||||
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export function parseDocmostMarkdown(full) {
|
||||
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||
let meta = null;
|
||||
let metaEnd = 0;
|
||||
const metaMatch = normalized.match(META_RE);
|
||||
if (metaMatch) {
|
||||
try {
|
||||
meta = JSON.parse(metaMatch[1]);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
// Body starts right after the matched meta block.
|
||||
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||
}
|
||||
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||
let lastOpenStart = -1;
|
||||
let lastOpenEnd = -1;
|
||||
let m;
|
||||
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||
lastOpenStart = m.index;
|
||||
lastOpenEnd = m.index + m[0].length;
|
||||
}
|
||||
let comments = null;
|
||||
let bodyEnd = normalized.length;
|
||||
if (lastOpenStart !== -1) {
|
||||
const rest = normalized.slice(lastOpenEnd);
|
||||
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||
if (close) {
|
||||
const jsonText = rest.slice(0, close.index);
|
||||
try {
|
||||
comments = JSON.parse(jsonText);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||
}
|
||||
}
|
||||
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||
return { meta, body, comments };
|
||||
}
|
||||
@@ -1,821 +0,0 @@
|
||||
/**
|
||||
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
|
||||
* tree by node id.
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
|
||||
* table cells hold their children in `content` just like any other block, so a
|
||||
* single recursive walk reaches them all.
|
||||
*
|
||||
* Every exported function operates on a DEEP CLONE of the input document and
|
||||
* returns the new document. The input doc and any `newNode`/`node` argument are
|
||||
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
import { stripInlineMarkdown } from "./text-normalize.js";
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
return structuredClone(value);
|
||||
}
|
||||
// Fallback for environments without structuredClone.
|
||||
return JSON.parse(JSON.stringify(value));
|
||||
}
|
||||
/** True if `value` is a non-null object (and not an array). */
|
||||
function isObject(value) {
|
||||
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
/** True if `node` carries the given id in `node.attrs.id`. */
|
||||
function matchesId(node, nodeId) {
|
||||
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
|
||||
}
|
||||
/**
|
||||
* Recursively concatenate all text contained in a node.
|
||||
*
|
||||
* Text nodes contribute their `text` string; container nodes contribute the
|
||||
* joined `blockPlainText` of their `content` children. Returns "" for nullish
|
||||
* or non-object inputs.
|
||||
*/
|
||||
export function blockPlainText(node) {
|
||||
if (!isObject(node))
|
||||
return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string") {
|
||||
out += node.text;
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
out += blockPlainText(child);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
|
||||
function truncate(text, n) {
|
||||
return text.length > n ? text.slice(0, n) + "…" : text;
|
||||
}
|
||||
/**
|
||||
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
|
||||
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
|
||||
* table cells — compactness is the point; use `getNodeByRef` to drill into a
|
||||
* specific block.
|
||||
*
|
||||
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
|
||||
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
|
||||
* cell texts as `header`; list blocks (types ending in "List") add `items`.
|
||||
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
|
||||
* a missing or non-object doc/content yields `[]`.
|
||||
*/
|
||||
export function buildOutline(doc) {
|
||||
if (!isObject(doc) || !Array.isArray(doc.content))
|
||||
return [];
|
||||
const out = [];
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const block = doc.content[i];
|
||||
const type = isObject(block) ? block.type : undefined;
|
||||
const entry = {
|
||||
index: i,
|
||||
type,
|
||||
id: isObject(block) && isObject(block.attrs)
|
||||
? (block.attrs.id ?? null)
|
||||
: null,
|
||||
firstText: truncate(blockPlainText(block), 100),
|
||||
};
|
||||
if (type === "heading") {
|
||||
entry.level = isObject(block.attrs) ? (block.attrs.level ?? null) : null;
|
||||
}
|
||||
else if (type === "table") {
|
||||
const headerRow = block.content?.[0]?.content ?? [];
|
||||
entry.rows = block.content?.length ?? 0;
|
||||
entry.cols = block.content?.[0]?.content?.length ?? 0;
|
||||
entry.header = headerRow.map((cell) => truncate(blockPlainText(cell), 40));
|
||||
}
|
||||
else if (typeof type === "string" && type.endsWith("List")) {
|
||||
entry.items = block.content?.length ?? 0;
|
||||
}
|
||||
out.push(entry);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Resolve a single node by reference and return `{ node, path, type }`, or
|
||||
* `null` when nothing matches.
|
||||
*
|
||||
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
|
||||
* `n` in `doc.content`. This is the only way to address table/tableRow/
|
||||
* tableCell nodes, which carry no `attrs.id`.
|
||||
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
|
||||
* tree with `attrs.id === ref` is returned.
|
||||
*
|
||||
* `path` is the array of child indices from the doc root down to the node
|
||||
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
|
||||
* so callers can mutate it without touching the input doc. Null-safe.
|
||||
*/
|
||||
export function getNodeByRef(doc, ref) {
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
// "#<n>": index into the top-level content array.
|
||||
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
|
||||
if (!isObject(block))
|
||||
return null;
|
||||
return { node: clone(block), path: [index], type: block.type };
|
||||
}
|
||||
// Otherwise: depth-first search for the first node with attrs.id === ref.
|
||||
const search = (node, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const path = [...trail, i];
|
||||
if (matchesId(child, ref)) {
|
||||
return { node: clone(child), path, type: child.type };
|
||||
}
|
||||
const hit = search(child, path);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, []);
|
||||
}
|
||||
/**
|
||||
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
|
||||
* `newNode`, anywhere in the tree (including inside callouts and table cells).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
|
||||
* is the number of nodes substituted. A fresh clone of `newNode` is used for
|
||||
* each match so they do not share references.
|
||||
*/
|
||||
export function replaceNodeById(doc, nodeId, newNode) {
|
||||
const out = clone(doc);
|
||||
let replaced = 0;
|
||||
// Walk a content array, replacing direct matches and recursing into the
|
||||
// (possibly new) children of non-matching nodes.
|
||||
const walkContent = (content) => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, nodeId)) {
|
||||
content[i] = clone(newNode);
|
||||
replaced++;
|
||||
// Do not recurse into a freshly substituted node.
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, replaced };
|
||||
}
|
||||
/**
|
||||
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
|
||||
* array, anywhere in the tree (recursive, including callouts and tables).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
|
||||
* the number of nodes removed.
|
||||
*/
|
||||
export function deleteNodeById(doc, nodeId) {
|
||||
const out = clone(doc);
|
||||
let deleted = 0;
|
||||
// Filter a content array in place, dropping matches and recursing into the
|
||||
// surviving children.
|
||||
const walkContent = (content) => {
|
||||
const kept = [];
|
||||
for (const child of content) {
|
||||
if (matchesId(child, nodeId)) {
|
||||
deleted++;
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
child.content = walkContent(child.content);
|
||||
}
|
||||
kept.push(child);
|
||||
}
|
||||
return kept;
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
out.content = walkContent(out.content);
|
||||
}
|
||||
return { doc: out, deleted };
|
||||
}
|
||||
/**
|
||||
* Throw a clear, model-actionable error when a node-id write op did NOT match
|
||||
* exactly one node (#159). `count === 0` -> "no node found"; `count > 1` ->
|
||||
* "ambiguous, refused" — Docmost duplicates block ids on copy/paste, so a write
|
||||
* by id could clobber/remove EVERY duplicate. The caller skips the write for any
|
||||
* `count !== 1` (the transform returns null), so this only REPORTS; nothing was
|
||||
* changed. No-op for the unambiguous single-match case.
|
||||
*/
|
||||
export function assertUnambiguousMatch(op, verb, count, nodeId, pageId) {
|
||||
if (count === 0) {
|
||||
throw new Error(`${op}: no node with id "${nodeId}" found on page ${pageId}`);
|
||||
}
|
||||
if (count > 1) {
|
||||
throw new Error(`${op}: id "${nodeId}" is ambiguous — ${count} nodes on page ${pageId} share it (block ids are duplicated on copy/paste). Refusing to ${verb} all of them; nothing was changed. Re-target with a more specific anchor.`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
|
||||
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
|
||||
* "Unexpected content type" when asked to store an `undefined` attribute value).
|
||||
*
|
||||
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
|
||||
* legitimate JSON-storable values and are preserved. Operates on a clone and
|
||||
* returns it; the input is never mutated. Defensively null-safe like the rest
|
||||
* of the file.
|
||||
*/
|
||||
export function sanitizeForYjs(doc) {
|
||||
const out = clone(doc);
|
||||
// Drop every key whose value is strictly `undefined` from an attrs object.
|
||||
const stripUndefined = (attrs) => {
|
||||
if (!isObject(attrs))
|
||||
return;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
if (attrs[key] === undefined) {
|
||||
delete attrs[key];
|
||||
}
|
||||
}
|
||||
};
|
||||
const walk = (node) => {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
stripUndefined(node.attrs);
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (const mark of node.marks) {
|
||||
if (isObject(mark))
|
||||
stripUndefined(mark.attrs);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
walk(out);
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Diagnostics helper: walk the tree and return a human-readable path string for
|
||||
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
|
||||
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
|
||||
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
|
||||
* every attribute is storable. Null-safe.
|
||||
*/
|
||||
export function findUnstorableAttr(doc) {
|
||||
const isUnstorable = (value) => {
|
||||
if (value === undefined)
|
||||
return "undefined";
|
||||
const t = typeof value;
|
||||
if (t === "function")
|
||||
return "function";
|
||||
if (t === "symbol")
|
||||
return "symbol";
|
||||
if (t === "bigint")
|
||||
return "bigint";
|
||||
return null;
|
||||
};
|
||||
// Check an attrs object; return the offending sub-path or null.
|
||||
const checkAttrs = (attrs, basePath) => {
|
||||
if (!isObject(attrs))
|
||||
return null;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
const kind = isUnstorable(attrs[key]);
|
||||
if (kind != null)
|
||||
return `${basePath}.${key} (${kind})`;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
const walk = (node, path) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
|
||||
if (attrHit != null)
|
||||
return attrHit;
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (let i = 0; i < node.marks.length; i++) {
|
||||
const markHit = checkAttrs(node.marks[i]?.attrs, `${path}.marks[${i}].attrs`);
|
||||
if (markHit != null)
|
||||
return markHit;
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const childHit = walk(node.content[i], `${path}.content[${i}]`);
|
||||
if (childHit != null)
|
||||
return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
// The root doc node carries no useful index, so start the path at "doc".
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
const attrHit = checkAttrs(doc.attrs, "attrs");
|
||||
if (attrHit != null)
|
||||
return attrHit;
|
||||
if (Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const childHit = walk(doc.content[i], `content[${i}]`);
|
||||
if (childHit != null)
|
||||
return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Table structural node types and the container each must live directly inside.
|
||||
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
|
||||
* rather than blindly into the anchor's direct parent (which would corrupt the
|
||||
* table's nesting).
|
||||
*/
|
||||
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
|
||||
const REQUIRED_CONTAINER = {
|
||||
tableRow: "table",
|
||||
tableCell: "tableRow",
|
||||
tableHeader: "tableRow",
|
||||
};
|
||||
/**
|
||||
* Find the index of the first TOP-LEVEL block whose plain text includes the
|
||||
* anchor, with a markdown-stripping FALLBACK. Returns -1 when none matches.
|
||||
*
|
||||
* Two passes preserve "exact wins globally":
|
||||
* - Pass 1: first block containing the verbatim `anchorText`.
|
||||
* - Pass 2 (only if pass 1 found nothing): first block containing the
|
||||
* markdown-stripped anchor, when stripping actually changed it.
|
||||
*/
|
||||
function findAnchorTextIndex(content, anchorText) {
|
||||
if (!Array.isArray(content))
|
||||
return -1;
|
||||
// Pass 1: exact.
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(anchorText))
|
||||
return i;
|
||||
}
|
||||
// Pass 2: markdown-stripped fallback.
|
||||
const a = stripInlineMarkdown(anchorText);
|
||||
if (a !== anchorText && a.length > 0) {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
if (blockPlainText(content[i]).includes(a))
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
/**
|
||||
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||
* including the matched node). Each chain entry is `{ node, index }` where
|
||||
* `index` is the node's position inside its parent's `content` array (the root
|
||||
* doc has index -1). Returns `null` when the anchor cannot be resolved.
|
||||
*/
|
||||
function findAnchorChain(doc, opts) {
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
// DFS by id anywhere in the tree, accumulating the path.
|
||||
if (opts.anchorNodeId != null) {
|
||||
const targetId = opts.anchorNodeId;
|
||||
const search = (node, index, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
const here = [...trail, { node, index }];
|
||||
if (matchesId(node, targetId))
|
||||
return here;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const hit = search(node.content[i], i, here);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, -1, []);
|
||||
}
|
||||
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||
// Exact match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||
const i = findAnchorTextIndex(doc.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
return [
|
||||
{ node: doc, index: -1 },
|
||||
{ node: doc.content[i], index: i },
|
||||
];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Insert a deep clone of `node` relative to an anchor.
|
||||
*
|
||||
* - position "append": push the node onto the top-level `doc.content`.
|
||||
* - position "before"/"after": locate the anchor and splice the node into the
|
||||
* anchor's parent `content` array immediately before / after it.
|
||||
*
|
||||
* Anchor resolution for before/after:
|
||||
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
|
||||
* anywhere in the tree (recursive);
|
||||
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
|
||||
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor could not be resolved (the doc is returned unchanged
|
||||
* apart from being cloned).
|
||||
*/
|
||||
export function insertNodeRelative(doc, node, opts) {
|
||||
const out = clone(doc);
|
||||
const fresh = clone(node);
|
||||
// Defensive: stay null-safe like the other exports — a missing opts means
|
||||
// there is nothing actionable to do.
|
||||
if (!isObject(opts))
|
||||
return { doc: out, inserted: false };
|
||||
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
|
||||
// "append": top-level push.
|
||||
if (opts.position === "append") {
|
||||
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
|
||||
// top level — appending one would produce invalid nesting.
|
||||
if (isStructural) {
|
||||
throw new Error(`insert_node: cannot append a ${node.type} at the top level; use ` +
|
||||
`position before/after with an anchor inside the target table`);
|
||||
}
|
||||
if (isObject(out)) {
|
||||
if (!Array.isArray(out.content))
|
||||
out.content = [];
|
||||
out.content.push(fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
const offset = opts.position === "after" ? 1 : 0;
|
||||
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
|
||||
// into the nearest enclosing table/tableRow rather than the anchor's direct
|
||||
// parent, so the row/cell lands at the correct level of the table.
|
||||
if (isStructural) {
|
||||
const containerType = REQUIRED_CONTAINER[node.type];
|
||||
const chain = findAnchorChain(out, opts);
|
||||
// Anchor not resolved at all — keep the existing "anchor not found" path.
|
||||
if (chain == null)
|
||||
return { doc: out, inserted: false };
|
||||
// Find the DEEPEST ancestor (including the anchor itself) of the required
|
||||
// container type.
|
||||
let containerIdx = -1;
|
||||
for (let i = chain.length - 1; i >= 0; i--) {
|
||||
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
|
||||
containerIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (containerIdx === -1) {
|
||||
throw new Error(`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
|
||||
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
|
||||
`that lives inside the target table.`);
|
||||
}
|
||||
const container = chain[containerIdx].node;
|
||||
if (!Array.isArray(container.content))
|
||||
container.content = [];
|
||||
if (containerIdx === chain.length - 1) {
|
||||
// The matched container IS the anchor node itself (e.g. anchorText
|
||||
// resolved to the table block): append/prepend within it.
|
||||
const at = opts.position === "after" ? container.content.length : 0;
|
||||
container.content.splice(at, 0, fresh);
|
||||
}
|
||||
else {
|
||||
// The immediate child on the path leading to the anchor is the row/cell
|
||||
// to splice next to.
|
||||
const enclosingChildIndex = chain[containerIdx + 1].index;
|
||||
container.content.splice(enclosingChildIndex + offset, 0, fresh);
|
||||
}
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
// Resolve by id anywhere in the tree: splice into the parent content array.
|
||||
if (opts.anchorNodeId != null) {
|
||||
let inserted = false;
|
||||
const walkContent = (content) => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, opts.anchorNodeId)) {
|
||||
content.splice(i + offset, 0, fresh);
|
||||
inserted = true;
|
||||
return;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
if (inserted)
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, inserted };
|
||||
}
|
||||
// Resolve by text: only top-level doc.content blocks are scanned. Exact
|
||||
// match wins; a markdown-stripped fallback is tried only on a miss.
|
||||
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||
const i = findAnchorTextIndex(out.content, opts.anchorText);
|
||||
if (i !== -1) {
|
||||
out.content.splice(i + offset, 0, fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
// ===========================================================================
|
||||
// Table editing helpers
|
||||
//
|
||||
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
|
||||
// table -> { type:"table", content:[tableRow...] }
|
||||
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
|
||||
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
|
||||
// content:[paragraph...] }
|
||||
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
|
||||
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
|
||||
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
|
||||
// of the input doc (via `clone`) and never mutate their inputs.
|
||||
// ===========================================================================
|
||||
/**
|
||||
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
|
||||
* `makeFreshId` so generated paragraph ids never collide with existing ones.
|
||||
*/
|
||||
function collectIds(node, used) {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
|
||||
used.add(node.attrs.id);
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectIds(child, used);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Fresh-id generator: returns a random Docmost-style id (12 chars from
|
||||
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
|
||||
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
|
||||
* exact string, so randomness is fine — and unlike a module-local counter it
|
||||
* needs no reset and cannot become predictable across calls.
|
||||
*/
|
||||
function makeFreshId(used) {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
let id;
|
||||
do {
|
||||
id = "";
|
||||
for (let i = 0; i < 12; i++) {
|
||||
id += alphabet[Math.floor(Math.random() * alphabet.length)];
|
||||
}
|
||||
} while (used.has(id) || id === "");
|
||||
used.add(id);
|
||||
return id;
|
||||
}
|
||||
/**
|
||||
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
|
||||
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
|
||||
* its index path. Returns null when no table matches.
|
||||
*
|
||||
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
|
||||
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
|
||||
* ancestor chain to the nearest `type === "table"` ancestor.
|
||||
*/
|
||||
function locateTable(rootClone, tableRef) {
|
||||
if (!isObject(rootClone))
|
||||
return null;
|
||||
// "#<n>": index into the top-level content array; must be a table.
|
||||
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(rootClone.content)
|
||||
? rootClone.content[index]
|
||||
: undefined;
|
||||
if (isObject(block) && block.type === "table") {
|
||||
return { table: block, path: [index] };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
|
||||
// climb to the nearest enclosing table.
|
||||
const search = (node, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const here = [...trail, { node: child, index: i }];
|
||||
if (matchesId(child, tableRef)) {
|
||||
// Walk UP to the nearest table ancestor (including the match itself).
|
||||
for (let j = here.length - 1; j >= 0; j--) {
|
||||
if (isObject(here[j].node) && here[j].node.type === "table") {
|
||||
return {
|
||||
table: here[j].node,
|
||||
path: here.slice(0, j + 1).map((e) => e.index),
|
||||
};
|
||||
}
|
||||
}
|
||||
return null; // id found but no enclosing table
|
||||
}
|
||||
const hit = search(child, here);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(rootClone, []);
|
||||
}
|
||||
/** Build the plain-text → single-paragraph cell content used by all writers. */
|
||||
function makeCellParagraph(id, text) {
|
||||
return {
|
||||
type: "paragraph",
|
||||
attrs: { id, indent: 0 },
|
||||
// Empty string → a paragraph with an empty content array.
|
||||
content: text ? [{ type: "text", text }] : [],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
|
||||
*
|
||||
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
|
||||
* Tables may be ragged (rows of differing length), so `cols` reflects only
|
||||
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
|
||||
* width.
|
||||
* - `cells`: `string[][]` of each cell's `blockPlainText`.
|
||||
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
|
||||
* so callers can `patch_node` a cell for rich-formatted edits.
|
||||
* - `path`: index path of the table within the doc.
|
||||
*/
|
||||
export function readTable(doc, tableRef) {
|
||||
const root = clone(doc);
|
||||
const located = locateTable(root, tableRef);
|
||||
if (located == null)
|
||||
return null;
|
||||
const { table, path } = located;
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const cols = rowNodes[0]?.content?.length ?? 0;
|
||||
const cells = [];
|
||||
const cellIds = [];
|
||||
for (const rowNode of rowNodes) {
|
||||
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
|
||||
const rowText = [];
|
||||
const rowIds = [];
|
||||
for (const cellNode of cellNodes) {
|
||||
rowText.push(blockPlainText(cellNode));
|
||||
// The cell's first paragraph carries the id used for patch_node.
|
||||
const firstPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
const id = isObject(firstPara) && isObject(firstPara.attrs)
|
||||
? (firstPara.attrs.id ?? null)
|
||||
: null;
|
||||
rowIds.push(id);
|
||||
}
|
||||
cells.push(rowText);
|
||||
cellIds.push(rowIds);
|
||||
}
|
||||
return { rows, cols, cells, cellIds, path };
|
||||
}
|
||||
/**
|
||||
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
|
||||
*
|
||||
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
|
||||
* MORE cells than columns throws. Each new cell copies `colwidth` for its
|
||||
* column from the header row when present, gets a fresh-id paragraph, and a
|
||||
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
|
||||
* the row there; otherwise the row is appended at the end.
|
||||
*/
|
||||
export function insertTableRow(doc, tableRef, cells, index) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, inserted: false };
|
||||
const { table } = located;
|
||||
if (!Array.isArray(table.content))
|
||||
table.content = [];
|
||||
const rows = table.content.length;
|
||||
const headerRow = table.content[0];
|
||||
const headerCells = Array.isArray(headerRow?.content)
|
||||
? headerRow.content
|
||||
: [];
|
||||
// Column count is the WIDEST existing row, so the guard below stays
|
||||
// meaningful for ragged tables and the new row matches the table's width.
|
||||
// Fall back to the supplied cell count only when the table has no rows.
|
||||
let colCount = 0;
|
||||
for (const r of table.content) {
|
||||
if (isObject(r) && Array.isArray(r.content))
|
||||
colCount = Math.max(colCount, r.content.length);
|
||||
}
|
||||
if (colCount === 0)
|
||||
colCount = Array.isArray(cells) ? cells.length : 0;
|
||||
if (Array.isArray(cells) && cells.length > colCount) {
|
||||
throw new Error(`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`);
|
||||
}
|
||||
// Resolve the landing index up front so the cell-type decision and the splice
|
||||
// below agree: a valid integer in [0, rows] splices there, else we append.
|
||||
const landingIndex = typeof index === "number" &&
|
||||
Number.isInteger(index) &&
|
||||
index >= 0 &&
|
||||
index <= rows
|
||||
? index
|
||||
: rows;
|
||||
// Seed the id generator with every id already in the doc so the new cell
|
||||
// paragraph ids are unique within the whole document.
|
||||
const used = new Set();
|
||||
collectIds(out, used);
|
||||
const newCells = [];
|
||||
for (let i = 0; i < colCount; i++) {
|
||||
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
|
||||
const attrs = { colspan: 1, rowspan: 1 };
|
||||
// Copy this column's colwidth from the header row's cell when present.
|
||||
const colwidth = headerCells[i]?.attrs?.colwidth;
|
||||
if (colwidth !== undefined)
|
||||
attrs.colwidth = colwidth;
|
||||
// A row landing at index 0 becomes the new header row, so inherit the
|
||||
// current header cell's type per column (Docmost uses "tableHeader" there);
|
||||
// every other position is a plain data cell.
|
||||
const cellType = landingIndex === 0 ? (headerCells[i]?.type ?? "tableCell") : "tableCell";
|
||||
newCells.push({
|
||||
type: cellType,
|
||||
attrs,
|
||||
content: [makeCellParagraph(makeFreshId(used), text)],
|
||||
});
|
||||
}
|
||||
const newRow = { type: "tableRow", content: newCells };
|
||||
// Splice at the resolved landing index (append when index was omitted/invalid).
|
||||
table.content.splice(landingIndex, 0, newRow);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
/**
|
||||
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
|
||||
* `deleted` is false only when the table cannot be located. Throws on an
|
||||
* out-of-range index, and refuses to delete the table's only row.
|
||||
*/
|
||||
export function deleteTableRow(doc, tableRef, index) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, deleted: false };
|
||||
const { table } = located;
|
||||
if (!Array.isArray(table.content))
|
||||
table.content = [];
|
||||
const rows = table.content.length;
|
||||
if (!Number.isInteger(index) || index < 0 || index >= rows) {
|
||||
throw new Error(`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`);
|
||||
}
|
||||
if (rows <= 1) {
|
||||
throw new Error("table_delete_row: refusing to delete the only row of the table");
|
||||
}
|
||||
table.content.splice(index, 1);
|
||||
return { doc: out, deleted: true };
|
||||
}
|
||||
/**
|
||||
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
|
||||
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
|
||||
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
|
||||
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
|
||||
* that reuses the cell's existing first-paragraph id when present, else a fresh
|
||||
* one.
|
||||
*/
|
||||
export function updateTableCell(doc, tableRef, row, col, text) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, updated: false };
|
||||
const { table } = located;
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const rowNode = rowNodes[row];
|
||||
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
|
||||
? rowNode.content.length
|
||||
: 0;
|
||||
if (!Number.isInteger(row) ||
|
||||
row < 0 ||
|
||||
row >= rows ||
|
||||
!Number.isInteger(col) ||
|
||||
col < 0 ||
|
||||
col >= cols) {
|
||||
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
|
||||
}
|
||||
const cellNode = rowNode.content[col];
|
||||
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
|
||||
const existingPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
let id = isObject(existingPara) && isObject(existingPara.attrs)
|
||||
? existingPara.attrs.id
|
||||
: undefined;
|
||||
if (typeof id !== "string" || id.length === 0) {
|
||||
const used = new Set();
|
||||
collectIds(out, used);
|
||||
id = makeFreshId(used);
|
||||
}
|
||||
cellNode.content = [makeCellParagraph(id, text)];
|
||||
return { doc: out, updated: true };
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
/**
|
||||
* Per-page async mutex.
|
||||
*
|
||||
* Content writes over the collaboration websocket must never overlap for the
|
||||
* same page: two concurrent full-document replaces would race on the live Yjs
|
||||
* fragment. We serialize them with a per-pageId promise chain — each new
|
||||
* operation waits for the previous one on that page to settle (success or
|
||||
* failure) before it runs. Different pages never block each other.
|
||||
*/
|
||||
const chains = new Map();
|
||||
// The returned promise carries the real result/rejection of `fn` and MUST be
|
||||
// awaited/handled by the caller; only the internal chaining tail swallows
|
||||
// errors (purely to gate ordering).
|
||||
export function withPageLock(pageId, fn) {
|
||||
// Wait for the previous op on this page; swallow its error so a failure does
|
||||
// not poison the queue for the next caller.
|
||||
const prev = (chains.get(pageId) ?? Promise.resolve()).catch(() => { });
|
||||
const run = prev.then(fn);
|
||||
// The tail used for chaining must also swallow errors (it only gates order).
|
||||
const tail = run.catch(() => { });
|
||||
chains.set(pageId, tail);
|
||||
// Drop the map entry once this op is the tail and has settled, to avoid an
|
||||
// unbounded map of resolved promises.
|
||||
tail.then(() => {
|
||||
if (chains.get(pageId) === tail) {
|
||||
chains.delete(pageId);
|
||||
}
|
||||
});
|
||||
// Callers get the real result/rejection of fn.
|
||||
return run;
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
// The model sometimes serializes a ProseMirror node arg as a JSON string
|
||||
// instead of an object. Normalize: parse a string to an object (throwing on
|
||||
// invalid JSON), pass an object through unchanged. Shared by patch_node /
|
||||
// insert_node (and the analogous update_page_json content parsing).
|
||||
export function parseNodeArg(node, errMsg = "node was a string but not valid JSON") {
|
||||
if (typeof node === "string") {
|
||||
try {
|
||||
return JSON.parse(node);
|
||||
}
|
||||
catch {
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
}
|
||||
return node;
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
/**
|
||||
* Locator normalization: strip inline markdown wrappers and trailing
|
||||
* decoration from a LOCATOR string so a find/anchor that the model wrote with
|
||||
* markdown (or a stray emoji) can still match the document's plain text.
|
||||
*
|
||||
* This is used ONLY as a fallback for LOCATING (after an exact match fails);
|
||||
* it is never applied to replacement text or inserted node content, so no
|
||||
* formatting is ever lost.
|
||||
*/
|
||||
/** Maximum unwrap passes, so pathological/nested input cannot loop forever. */
|
||||
const MAX_PASSES = 8;
|
||||
/**
|
||||
* Inline emphasis/code/strikethrough wrappers, strong BEFORE emphasis so
|
||||
* `**x**` collapses to `x` rather than leaving a stray `*x*`. Each pattern is
|
||||
* non-greedy and capture group 1 is the inner text. Applied repeatedly until
|
||||
* the string stops changing (nested wrappers like `**_x_**`).
|
||||
*/
|
||||
const WRAPPER_PATTERNS = [
|
||||
/\*\*([^*]+?)\*\*/g, // **x**
|
||||
/__([^_]+?)__/g, // __x__
|
||||
/~~([^~]+?)~~/g, // ~~x~~
|
||||
/\*([^*]+?)\*/g, // *x*
|
||||
/_([^_]+?)_/g, // _x_
|
||||
/``([^`]+?)``/g, // ``x``
|
||||
/`([^`]+?)`/g, // `x`
|
||||
];
|
||||
/** Links/images -> their visible text. `!?` covers both `[t](u)` and ``. */
|
||||
const LINK_IMAGE_RE = /!?\[([^\]]*)\]\([^)]*\)/g;
|
||||
/**
|
||||
* Apply ONLY the two balanced/link passes shared by both normalizers: first
|
||||
* collapse links/images to their visible text, then collapse balanced inline
|
||||
* wrappers repeatedly until stable. Does NOT trim decoration, does NOT guard
|
||||
* against an empty result — it returns exactly the transformed string.
|
||||
*/
|
||||
function stripWrappersAndLinks(s) {
|
||||
// 1. Links/images -> their visible text.
|
||||
let out = s.replace(LINK_IMAGE_RE, "$1");
|
||||
// 2. Strip balanced wrappers, repeating until the string is stable so nested
|
||||
// wrappers (`**_x_**`) and adjacent runs both collapse.
|
||||
for (let pass = 0; pass < MAX_PASSES; pass++) {
|
||||
const before = out;
|
||||
for (const re of WRAPPER_PATTERNS) {
|
||||
out = out.replace(re, "$1");
|
||||
}
|
||||
if (out === before)
|
||||
break;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* STRICT formatting detector — distinct from the lenient locator
|
||||
* normalization below. It strips ONLY what unambiguously is markdown markup:
|
||||
* 1. links/images `[text](url)` -> `text`, `` -> `alt`, and
|
||||
* 2. balanced inline `**`/`__`/`~~`/`*`/`_`/`` ` `` wrappers (repeat-until-stable),
|
||||
* and DELIBERATELY does NOT trim leading/trailing whitespace, emoji, or lone
|
||||
* marker chars (the lenient extras `stripInlineMarkdown` does in its step 3).
|
||||
*
|
||||
* It exists ONLY to recognize formatting-vs-plain INTENT in `applyTextEdits`
|
||||
* (deciding whether find/replace differ purely by markdown markers). Because it
|
||||
* skips the lenient trimming, ordinary plain-text edits are NOT misread as
|
||||
* formatting: a trailing-space trim, snake_case (`my_var_name`), math (`2 * 3`),
|
||||
* and identifiers/URLs with underscores all stay untouched here (their `_x_` /
|
||||
* `*x*` runs are only collapsed when actually balanced, and even then they are
|
||||
* compared symmetrically, so plain text never collapses to a different string).
|
||||
*
|
||||
* Do NOT use this for LOCATING — the locator fallback must keep using the
|
||||
* lenient `stripInlineMarkdown` (it trims stray decoration so a find still
|
||||
* matches the document's plain text).
|
||||
*/
|
||||
export function stripBalancedWrappers(s) {
|
||||
if (typeof s !== "string" || s.length === 0)
|
||||
return s;
|
||||
return stripWrappersAndLinks(s);
|
||||
}
|
||||
/**
|
||||
* Conservatively strip inline markdown from a locator string.
|
||||
*
|
||||
* Deterministic, order-fixed steps:
|
||||
* 1. Links/images: `[text](url)` -> `text`, `` -> `alt`.
|
||||
* 2. Balanced inline wrappers (strong before emphasis, code, strikethrough),
|
||||
* applied repeatedly until stable for nested cases.
|
||||
* 3. Trim leading/trailing decoration only: whitespace, leftover marker chars
|
||||
* (`* _ ~ \``) and emoji. Letters/digits and sentence punctuation (`.`/`,`
|
||||
* etc.) are NEVER trimmed.
|
||||
*
|
||||
* If the result is empty (e.g. the input was only markers like `***`), the
|
||||
* ORIGINAL string is returned so a locator can never normalize down to "" and
|
||||
* match everything.
|
||||
*/
|
||||
export function stripInlineMarkdown(s) {
|
||||
if (typeof s !== "string" || s.length === 0)
|
||||
return s;
|
||||
// 1 + 2. Shared link/image and balanced-wrapper passes.
|
||||
let out = stripWrappersAndLinks(s);
|
||||
// 3. Trim leading/trailing decoration: whitespace, leftover markdown markers,
|
||||
// and emoji (Extended_Pictographic plus the VS16 / ZWJ joiners, plus the
|
||||
// regional-indicator range U+1F1E6–U+1F1FF for flag emoji, which are NOT
|
||||
// Extended_Pictographic). The `u` flag enables the Unicode property escape.
|
||||
// Anchored runs only — interior text and sentence punctuation are untouched.
|
||||
const DECORATION = "[\\s*_~\\x60\\p{Extended_Pictographic}\\u{1F1E6}-\\u{1F1FF}\\u{FE0F}\\u{200D}]+";
|
||||
out = out
|
||||
.replace(new RegExp("^" + DECORATION, "u"), "")
|
||||
.replace(new RegExp(DECORATION + "$", "u"), "");
|
||||
// 4. Never normalize a locator down to nothing.
|
||||
if (out.length === 0)
|
||||
return s;
|
||||
return out;
|
||||
}
|
||||
@@ -1,631 +0,0 @@
|
||||
/**
|
||||
* Pure, network-free transform primitives for a ProseMirror/TipTap document
|
||||
* tree, plus one higher-level orchestration (commentsToFootnotes).
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; callouts, tables, lists all hold their children in
|
||||
* `content`, so a single recursive walk reaches them all.
|
||||
*
|
||||
* Conventions (matching node-ops.ts):
|
||||
* - functions that produce a new document deep-clone their input and return a
|
||||
* `{ doc, ... }` object; the caller's objects are never mutated.
|
||||
* - functions are defensively null-safe.
|
||||
* - `marks` arrays are preserved verbatim when fragments are split/reordered.
|
||||
*/
|
||||
import { blockPlainText } from "./node-ops.js";
|
||||
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
import { footnoteContentKey, makeFootnoteDefinition, generateFootnoteId, } from "./footnote-authoring.js";
|
||||
export { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
return structuredClone(value);
|
||||
}
|
||||
// Fallback for environments without structuredClone.
|
||||
return JSON.parse(JSON.stringify(value));
|
||||
}
|
||||
/** True if `value` is a non-null object (and not an array). */
|
||||
function isObject(value) {
|
||||
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
/**
|
||||
* Plain text of a node (re-export of node-ops' blockPlainText so transform
|
||||
* authors have a single import surface). Recurses through nested content.
|
||||
*/
|
||||
export function blockText(node) {
|
||||
return blockPlainText(node);
|
||||
}
|
||||
/**
|
||||
* Depth-first visit of every node in the tree, including the root and the
|
||||
* nested content of callouts, tables, lists, etc. `fn` is called once per node.
|
||||
* Null-safe: a nullish or non-object node is ignored.
|
||||
*/
|
||||
export function walk(node, fn) {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
fn(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
walk(child, fn);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
|
||||
* Works even when the node carries no `attrs.id` (it searches the raw tree, not
|
||||
* an id index). Returns the live node reference inside `doc` (NOT a clone), or
|
||||
* null when nothing matches. Typical use: `getList(doc, n => n.type ===
|
||||
* "orderedList")`.
|
||||
*/
|
||||
export function getList(doc, predicate) {
|
||||
let found = null;
|
||||
walk(doc, (node) => {
|
||||
if (found == null && predicate(node)) {
|
||||
found = node;
|
||||
}
|
||||
});
|
||||
return found;
|
||||
}
|
||||
/**
|
||||
* Textblocks that hold raw text but do NOT accept inline atom nodes. A
|
||||
* `footnoteReference` is `group:"inline", atom:true`; `codeBlock` is
|
||||
* `content:"text*"` (text only), so splicing a footnoteReference into it yields
|
||||
* an invalid document. (paragraph/heading/detailsSummary are `inline*` and DO
|
||||
* accept it; footnote definitions live inside a footnotesList which the
|
||||
* footnote inserter excludes via `beforeBlock`.)
|
||||
*/
|
||||
const INLINE_ATOM_FORBIDDEN_BLOCKS = new Set(["codeBlock"]);
|
||||
/**
|
||||
* Footnote-notes subtrees the inline footnote inserter must never split into (at
|
||||
* any depth): a `footnotesList` and the `footnoteDefinition`s it holds. Anchoring
|
||||
* a reference inside one of these would later be dropped as an orphan by the
|
||||
* canonicalizer, taking the existing definition's text with it.
|
||||
*/
|
||||
const FOOTNOTE_NOTES_SUBTREES = new Set([
|
||||
"footnotesList",
|
||||
"footnoteDefinition",
|
||||
]);
|
||||
/** True if `node` IS, or contains at any depth, a footnotesList/footnoteDefinition. */
|
||||
function containsFootnoteNotes(node) {
|
||||
if (!isObject(node))
|
||||
return false;
|
||||
if (FOOTNOTE_NOTES_SUBTREES.has(node.type))
|
||||
return true;
|
||||
if (Array.isArray(node.content)) {
|
||||
return node.content.some((c) => containsFootnoteNotes(c));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/**
|
||||
* Insert `marker` as a PLAIN (unmarked) text run right after the first
|
||||
* occurrence of `anchor`.
|
||||
*
|
||||
* The text run that contains the END of the anchor is SPLIT at the anchor end,
|
||||
* so all existing marks (links, bold, ...) on the surrounding text are
|
||||
* preserved, while the inserted marker run carries NO marks. The marker is
|
||||
* inserted as a leading-space-padded run (`" " + marker`) so it visually
|
||||
* separates from the preceding word.
|
||||
*
|
||||
* The anchor is matched against the concatenated plain text of each top-level
|
||||
* block (so an anchor that spans several text/mark runs still matches). The
|
||||
* insertion happens inside the inline content array that holds the anchor's
|
||||
* final character.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor text was not found in any in-scope block.
|
||||
*/
|
||||
export function insertMarkerAfter(doc, anchor, marker, opts = {}) {
|
||||
// A plain marker is a leading-space-padded unmarked text run.
|
||||
return insertNodesAfterAnchor(doc, anchor, () => [{ type: "text", text: " " + marker }], opts);
|
||||
}
|
||||
/**
|
||||
* Mark-safe insertion CORE: split the inline text run that holds the END of
|
||||
* `anchor` (preserving the surrounding marks) and splice the nodes produced by
|
||||
* `makeMiddle()` in at the split point. `insertMarkerAfter` (plain text marker)
|
||||
* and `insertInlineFootnote` (a `footnoteReference` node) are both thin callers —
|
||||
* the only difference is WHAT is inserted (a space-padded text run vs. a node
|
||||
* that should hug the preceding word), which is exactly what `makeMiddle`
|
||||
* decides. Operates on a clone; returns `{ doc, inserted }`.
|
||||
*/
|
||||
function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) {
|
||||
const out = clone(doc);
|
||||
if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
const limit = typeof opts.beforeBlock === "number"
|
||||
? Math.min(opts.beforeBlock, out.content.length)
|
||||
: out.content.length;
|
||||
for (let b = 0; b < limit; b++) {
|
||||
const block = out.content[b];
|
||||
if (!isObject(block))
|
||||
continue;
|
||||
// Quick reject: skip blocks whose plain text cannot contain the anchor.
|
||||
if (!blockPlainText(block).includes(anchor))
|
||||
continue;
|
||||
// Walk the inline content arrays inside this block, tracking a running
|
||||
// character offset so we can locate the inline array + text run that holds
|
||||
// the END of the anchor's first occurrence.
|
||||
let inserted = false;
|
||||
let offset = 0; // characters of plain text seen so far in this block
|
||||
const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
|
||||
// Recurse into inline-bearing containers (paragraph, heading, table cell,
|
||||
// callout child paragraphs, ...). We only split inside an array of inline
|
||||
// nodes (text/inline atoms); the FIRST array whose cumulative range covers
|
||||
// anchorEnd receives the split + marker.
|
||||
const visit = (container) => {
|
||||
if (inserted || !isObject(container) || !Array.isArray(container.content)) {
|
||||
return;
|
||||
}
|
||||
// Skip a forbidden subtree entirely (e.g. footnotesList/footnoteDefinition):
|
||||
// never split into it, but keep `offset` aligned for any sibling text after
|
||||
// it within this block.
|
||||
if (opts.skipSubtreeTypes && opts.skipSubtreeTypes.has(container.type)) {
|
||||
offset += blockPlainText(container).length;
|
||||
return;
|
||||
}
|
||||
const inline = container.content;
|
||||
// Detect whether this array is an inline array (contains text nodes).
|
||||
const hasText = inline.some((n) => isObject(n) && n.type === "text");
|
||||
if (hasText) {
|
||||
// Refuse a textblock whose content spec cannot hold the inserted nodes
|
||||
// (e.g. a codeBlock for an inline atom). Keep `offset` aligned for any
|
||||
// sibling textblocks in this same block, then bail so the search falls
|
||||
// through to the next candidate block.
|
||||
if (opts.forbidBlockTypes && opts.forbidBlockTypes.has(container.type)) {
|
||||
offset += blockPlainText(container).length;
|
||||
return;
|
||||
}
|
||||
for (let i = 0; i < inline.length; i++) {
|
||||
const n = inline[i];
|
||||
const len = isObject(n) ? blockPlainText(n).length : 0;
|
||||
const runStart = offset;
|
||||
const runEnd = offset + len;
|
||||
// The run that contains the anchor end (anchorEnd lands inside this
|
||||
// run, i.e. runStart < anchorEnd <= runEnd) is the split point.
|
||||
if (!inserted &&
|
||||
isObject(n) &&
|
||||
n.type === "text" &&
|
||||
typeof n.text === "string" &&
|
||||
anchorEnd > runStart &&
|
||||
anchorEnd <= runEnd) {
|
||||
const cut = anchorEnd - runStart; // split index within this text run
|
||||
const before = n.text.slice(0, cut);
|
||||
const after = n.text.slice(cut);
|
||||
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||
const parts = [];
|
||||
if (before.length > 0) {
|
||||
parts.push({ ...n, text: before, marks: [...marks] });
|
||||
}
|
||||
// The inserted nodes are caller-decided (a space-padded marker run,
|
||||
// or a node that hugs the word). They carry no copied marks.
|
||||
parts.push(...makeMiddle());
|
||||
if (after.length > 0) {
|
||||
parts.push({ ...n, text: after, marks: [...marks] });
|
||||
}
|
||||
inline.splice(i, 1, ...parts);
|
||||
inserted = true;
|
||||
return;
|
||||
}
|
||||
offset = runEnd;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Not an inline array: recurse into children (e.g. callout -> paragraph).
|
||||
for (const child of inline) {
|
||||
visit(child);
|
||||
if (inserted)
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
visit(block);
|
||||
if (inserted) {
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
// If the block matched in plain text but we could not split (e.g. anchor
|
||||
// lands inside an atom), fall through to the next block rather than failing.
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
/**
|
||||
* In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
|
||||
*
|
||||
* Docmost translations use a callout that states the footnote range, e.g.
|
||||
* "[1]…[5]". When the number of notes changes, this rewrites the trailing
|
||||
* number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
|
||||
* callout's text nodes to `[1]…[n]`. Operates on a clone; returns
|
||||
* `{ doc, changed }` where `changed` is the number of text nodes rewritten.
|
||||
*/
|
||||
export function setCalloutRange(doc, n) {
|
||||
const out = clone(doc);
|
||||
let changed = 0;
|
||||
// Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
|
||||
const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
|
||||
walk(out, (node) => {
|
||||
if (node.type === "callout") {
|
||||
walk(node, (inner) => {
|
||||
if (inner.type === "text" &&
|
||||
typeof inner.text === "string" &&
|
||||
rangeRe.test(inner.text)) {
|
||||
rangeRe.lastIndex = 0;
|
||||
inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
|
||||
changed++;
|
||||
}
|
||||
rangeRe.lastIndex = 0;
|
||||
});
|
||||
}
|
||||
});
|
||||
return { doc: out, changed };
|
||||
}
|
||||
/**
|
||||
* Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
|
||||
* a base36 random string is sufficient here (uniqueness within one document).
|
||||
*/
|
||||
function freshId() {
|
||||
return (Math.random().toString(36).slice(2, 12) +
|
||||
Math.random().toString(36).slice(2, 6));
|
||||
}
|
||||
/**
|
||||
* Wrap inline ProseMirror nodes in a list item:
|
||||
* { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
|
||||
* with a fresh random block id on the paragraph. The inline nodes are cloned so
|
||||
* the result shares no references with the caller's input.
|
||||
*/
|
||||
export function noteItem(inlineNodes) {
|
||||
const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
|
||||
return {
|
||||
type: "listItem",
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
attrs: { id: freshId() },
|
||||
content,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
|
||||
* { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
|
||||
* (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
|
||||
*
|
||||
* Built on the shared `makeFootnoteDefinition` factory (footnote-authoring.ts);
|
||||
* the only extra is a fresh block id on the inner paragraph (Docmost stamps one,
|
||||
* and the canonicalizer preserves attrs as-is). Single factory, one place to
|
||||
* change the definition shape.
|
||||
*/
|
||||
export function footnoteDefinition(id, inlineNodes) {
|
||||
const node = makeFootnoteDefinition(id, inlineNodes);
|
||||
node.content[0].attrs = { id: freshId() };
|
||||
return node;
|
||||
}
|
||||
/**
|
||||
* Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
|
||||
* an inline content array with a real `footnoteReference` node, in reading
|
||||
* order. `onMarker` is called for each replaced marker (with the original `[N]`
|
||||
* number or the placeholder index) and returns the fresh footnote id to attach
|
||||
* to the inserted node. Mutates `inline` in place.
|
||||
*/
|
||||
function replaceMarkersWithReferences(inline, onMarker) {
|
||||
const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
|
||||
for (let i = 0; i < inline.length; i++) {
|
||||
const n = inline[i];
|
||||
if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
|
||||
continue;
|
||||
}
|
||||
if (!re.test(n.text))
|
||||
continue;
|
||||
re.lastIndex = 0;
|
||||
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||
const parts = [];
|
||||
let last = 0;
|
||||
let m;
|
||||
while ((m = re.exec(n.text)) !== null) {
|
||||
if (m.index > last) {
|
||||
parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
|
||||
}
|
||||
const oldNum = m[1] != null ? Number(m[1]) : undefined;
|
||||
const phIdx = m[2] != null ? Number(m[2]) : undefined;
|
||||
const fnId = onMarker({ oldNum, phIdx });
|
||||
parts.push({ type: "footnoteReference", attrs: { id: fnId } });
|
||||
last = m.index + m[0].length;
|
||||
}
|
||||
if (last < n.text.length) {
|
||||
parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
|
||||
}
|
||||
// Drop any zero-length text runs the slicing may have produced.
|
||||
const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
|
||||
inline.splice(i, 1, ...cleaned);
|
||||
i += cleaned.length - 1;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
|
||||
* ProseMirror nodes.
|
||||
*
|
||||
* A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
|
||||
* stripped first. Then a minimal bold-split is applied: a leading
|
||||
* `**bold lead**` run becomes a text node with a bold mark, and the remainder
|
||||
* becomes a plain text node. This keeps the conversion synchronous (the
|
||||
* transform sandbox runs synchronously) and dependency-free; the existing
|
||||
* async markdownToProseMirror is intentionally NOT used here.
|
||||
*/
|
||||
export function mdToInlineNodes(markdown) {
|
||||
let md = typeof markdown === "string" ? markdown : "";
|
||||
// Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
|
||||
md = md.replace(/^\s*комментарий\s*:\s*/i, "");
|
||||
md = md.replace(/^\s*\d+\.\s+/, "");
|
||||
md = md.trim();
|
||||
if (md === "")
|
||||
return [];
|
||||
const nodes = [];
|
||||
// Leading bold lead: **...** at the very start.
|
||||
const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
|
||||
if (leadMatch) {
|
||||
const leadText = leadMatch[1];
|
||||
nodes.push({
|
||||
type: "text",
|
||||
text: leadText,
|
||||
marks: [{ type: "bold" }],
|
||||
});
|
||||
const rest = md.slice(leadMatch[0].length);
|
||||
if (rest.length > 0) {
|
||||
// Preserve the separating space that followed the bold lead.
|
||||
const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
|
||||
const spacing = sep ? sep[1] : "";
|
||||
nodes.push({ type: "text", text: spacing + rest });
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
// No bold lead: emit the whole thing as a single plain text node, with any
|
||||
// remaining **bold** spans split out inline.
|
||||
return splitInlineBold(md);
|
||||
}
|
||||
/**
|
||||
* Split a string with inline `**bold**` spans into text nodes, bolding the
|
||||
* spans. Used as the no-lead fallback in mdToInlineNodes.
|
||||
*/
|
||||
function splitInlineBold(text) {
|
||||
const nodes = [];
|
||||
const re = /\*\*([^*]+)\*\*/g;
|
||||
let last = 0;
|
||||
let m;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
if (m.index > last) {
|
||||
nodes.push({ type: "text", text: text.slice(last, m.index) });
|
||||
}
|
||||
nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
|
||||
last = m.index + m[0].length;
|
||||
}
|
||||
if (last < text.length) {
|
||||
nodes.push({ type: "text", text: text.slice(last) });
|
||||
}
|
||||
return nodes.length > 0 ? nodes : [{ type: "text", text }];
|
||||
}
|
||||
/**
|
||||
* Turn inline comments into numbered footnotes.
|
||||
*
|
||||
* For each inline comment that carries a `selection`:
|
||||
* 1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
|
||||
* sentinel) right after the selection text in the BODY (before the
|
||||
* notes heading);
|
||||
* 2. build a note list item from the comment's markdown content.
|
||||
*
|
||||
* Then RENUMBER every footnote marker in the body by reading order: existing
|
||||
* `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
|
||||
* sequential `[seq]`, and the notes orderedList is reordered so each note lines
|
||||
* up with its marker's reading-order position. Finally the disclaimer callout
|
||||
* range is synced to the new note count.
|
||||
*
|
||||
* Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
|
||||
* were successfully anchored (their selection was found and a placeholder
|
||||
* inserted). Operates on a clone of `doc`.
|
||||
*/
|
||||
export function commentsToFootnotes(doc, comments, opts = {}) {
|
||||
let working = clone(doc);
|
||||
const notesHeading = opts.notesHeading ?? "Примечания переводчика";
|
||||
const top = Array.isArray(working.content) ? working.content : [];
|
||||
const notesIdx = top.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
|
||||
if (notesIdx < 0) {
|
||||
throw new Error(`heading "${notesHeading}" not found`);
|
||||
}
|
||||
// The notes orderedList lives at or after the heading.
|
||||
const notesList = top
|
||||
.slice(notesIdx)
|
||||
.find((n) => isObject(n) && n.type === "orderedList");
|
||||
if (!notesList) {
|
||||
throw new Error("notes orderedList not found");
|
||||
}
|
||||
const consumed = [];
|
||||
const noteInlineByPh = new Map();
|
||||
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
|
||||
if (!c || !c.selection)
|
||||
return;
|
||||
// Collision-proof sentinel delimited by NUL control chars, which never occur
|
||||
// in real Docmost prose - so the marker regex cannot mistake any body text
|
||||
// (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
|
||||
// transient: the placeholder is inserted here and replaced by a
|
||||
// footnoteReference node below; it never persists in a returned document.
|
||||
const ph = `\u0000FN${i}\u0000`;
|
||||
// insertMarkerAfter returns a NEW cloned doc; reassign `working`.
|
||||
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
|
||||
beforeBlock: notesIdx,
|
||||
});
|
||||
if (!r.inserted)
|
||||
return;
|
||||
working = r.doc;
|
||||
noteInlineByPh.set(ph, mdToInlineNodes(c.content));
|
||||
consumed.push(c.id);
|
||||
});
|
||||
// Re-resolve references into the (possibly re-cloned) working doc.
|
||||
const top2 = Array.isArray(working.content) ? working.content : [];
|
||||
const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
|
||||
const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
|
||||
const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
|
||||
if (!notesList2) {
|
||||
throw new Error("notes orderedList not found");
|
||||
}
|
||||
// Inline content of each existing note (listItem -> paragraph -> inline).
|
||||
const oldNoteInline = (Array.isArray(notesList2.content)
|
||||
? notesList2.content
|
||||
: []).map((item) => {
|
||||
const para = isObject(item) && Array.isArray(item.content)
|
||||
? item.content.find((c) => isObject(c) && c.type === "paragraph")
|
||||
: null;
|
||||
return para && Array.isArray(para.content) ? para.content : [];
|
||||
});
|
||||
// Walk the body in reading order, turning each "[N]" / placeholder marker into
|
||||
// a real footnoteReference node and collecting its definition inline content.
|
||||
const definitions = [];
|
||||
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
|
||||
// Recursively visit inline arrays inside a block (paragraph, heading, callout
|
||||
// child paragraphs, table cells, ...), preserving document reading order.
|
||||
const visitInlineArrays = (container) => {
|
||||
if (!isObject(container) || !Array.isArray(container.content))
|
||||
return;
|
||||
const hasText = container.content.some((n) => isObject(n) && n.type === "text");
|
||||
if (hasText) {
|
||||
replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
|
||||
const fnId = freshId();
|
||||
if (oldNum != null) {
|
||||
const inline = oldNoteInline[oldNum - 1];
|
||||
// Every existing body marker MUST map to a real note. An out-of-range
|
||||
// marker means the document is internally inconsistent; fail loudly.
|
||||
if (inline === undefined) {
|
||||
throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
|
||||
}
|
||||
definitions.push(footnoteDefinition(fnId, inline));
|
||||
}
|
||||
else {
|
||||
const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
|
||||
definitions.push(footnoteDefinition(fnId, inline));
|
||||
}
|
||||
return fnId;
|
||||
});
|
||||
}
|
||||
else {
|
||||
for (const child of container.content)
|
||||
visitInlineArrays(child);
|
||||
}
|
||||
};
|
||||
const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
|
||||
for (let i = 0; i < notesBoundary; i++) {
|
||||
// Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
|
||||
// marker and is synced separately by setCalloutRange.
|
||||
if (isObject(top2[i]) &&
|
||||
top2[i].type === "callout" &&
|
||||
disclaimerRangeRe.test(blockText(top2[i]))) {
|
||||
continue;
|
||||
}
|
||||
visitInlineArrays(top2[i]);
|
||||
}
|
||||
// Replace the old orderedList with a real footnotesList of the collected
|
||||
// definitions (reading order). If there are no definitions, drop the list.
|
||||
if (definitions.length > 0) {
|
||||
top2[oldListIndex] = {
|
||||
type: "footnotesList",
|
||||
content: definitions,
|
||||
};
|
||||
}
|
||||
else {
|
||||
top2.splice(oldListIndex, 1);
|
||||
}
|
||||
// Sync the disclaimer callout range to the new note count.
|
||||
const synced = setCalloutRange(working, definitions.length);
|
||||
return { doc: synced.doc, consumed };
|
||||
}
|
||||
/**
|
||||
* AUTHOR-INLINE footnote insertion. The caller supplies WHERE (anchorText) and
|
||||
* WHAT (markdown text); numbering and the bottom list are derived server-side by
|
||||
* `canonicalizeFootnotes`. The caller never sees or edits `footnotesList`, never
|
||||
* assigns a number, and cannot desync — orphans / out-of-order lists / raw
|
||||
* `[^id]` markdown are structurally impossible.
|
||||
*
|
||||
* Content DEDUP (#3 in the issue): if an existing definition has the SAME
|
||||
* normalized content key, its id is REUSED (the new reference points at it: one
|
||||
* number, one definition, several references). Otherwise a fresh uuid id is
|
||||
* minted and a new definition added. Conservative — only an exact content match
|
||||
* merges.
|
||||
*
|
||||
* Mechanics: the `footnoteReference` node is inserted DIRECTLY at the anchor via
|
||||
* the same mark-safe split as `insertMarkerAfter` (the shared
|
||||
* `insertNodesAfterAnchor` core), so it hugs the preceding word with no text
|
||||
* sentinel round-trip. The whole document is then canonicalized.
|
||||
*
|
||||
* Operates on a clone of `doc`. When the anchor is not found, returns the input
|
||||
* unchanged with `inserted:false`.
|
||||
*/
|
||||
export function insertInlineFootnote(doc, opts) {
|
||||
const inline = mdToInlineNodes(opts.text ?? "");
|
||||
// footnoteContentKey only reads `.content`, so key off the inline array
|
||||
// directly instead of building a throwaway definition node.
|
||||
const key = footnoteContentKey({ content: inline });
|
||||
// Content dedup: reuse an existing definition's id when its key matches.
|
||||
let footnoteId = null;
|
||||
let reused = false;
|
||||
if (key !== "") {
|
||||
walk(doc, (n) => {
|
||||
if (footnoteId == null &&
|
||||
isObject(n) &&
|
||||
n.type === "footnoteDefinition" &&
|
||||
n.attrs &&
|
||||
typeof n.attrs.id === "string" &&
|
||||
n.attrs.id !== "" &&
|
||||
footnoteContentKey(n) === key) {
|
||||
footnoteId = n.attrs.id;
|
||||
reused = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
if (footnoteId == null)
|
||||
footnoteId = generateFootnoteId();
|
||||
// Insert the footnoteReference node directly after the anchor (mark-safe
|
||||
// split); it hugs the preceding word with no leading space. Two guards keep the
|
||||
// inline atom out of the notes section and out of blocks that cannot hold it:
|
||||
// - beforeBlock bounds the search to the BODY, before the first top-level block
|
||||
// that IS or CONTAINS (at any depth) a footnotesList/footnoteDefinition — so
|
||||
// a NESTED list or a bare definition also bounds the search, not just a
|
||||
// top-level list;
|
||||
// - skipSubtreeTypes refuses to descend into any footnotesList/footnoteDefinition
|
||||
// subtree, so a reference is never glued inside an existing definition (which
|
||||
// the canonicalizer would then drop as an orphan, losing that definition's
|
||||
// prose); and forbidBlockTypes refuses codeBlocks (an inline atom there is a
|
||||
// schema-invalid doc; insert_footnote skips validateDocStructure).
|
||||
// When the only anchor match is in such a place, the insert is refused and the
|
||||
// write aborts cleanly (inserted:false) instead of destroying content.
|
||||
const boundaryIdx = Array.isArray(doc?.content)
|
||||
? doc.content.findIndex((n) => containsFootnoteNotes(n))
|
||||
: -1;
|
||||
const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }], {
|
||||
...(boundaryIdx >= 0 ? { beforeBlock: boundaryIdx } : {}),
|
||||
forbidBlockTypes: INLINE_ATOM_FORBIDDEN_BLOCKS,
|
||||
skipSubtreeTypes: FOOTNOTE_NOTES_SUBTREES,
|
||||
});
|
||||
if (!r.inserted) {
|
||||
return { doc: clone(doc), inserted: false, footnoteId, reused };
|
||||
}
|
||||
let working = r.doc;
|
||||
// Add a NEW definition (canonicalize will order/place it); a reused id needs
|
||||
// no new definition (the existing one is shared).
|
||||
if (!reused) {
|
||||
appendDefinition(working, makeFootnoteDefinition(footnoteId, inline));
|
||||
}
|
||||
// Derive numbering + the single bottom list deterministically.
|
||||
working = canonicalizeFootnotes(working);
|
||||
return { doc: working, inserted: true, footnoteId, reused };
|
||||
}
|
||||
/**
|
||||
* Append a definition node so the canonicalizer can order/place it: into the
|
||||
* first existing footnotesList, or a new trailing list when none exists.
|
||||
*/
|
||||
function appendDefinition(doc, defNode) {
|
||||
const existingList = getList(doc, (n) => isObject(n) && n.type === "footnotesList");
|
||||
if (existingList && Array.isArray(existingList.content)) {
|
||||
existingList.content.push(defNode);
|
||||
return;
|
||||
}
|
||||
if (Array.isArray(doc.content)) {
|
||||
doc.content.push({ type: "footnotesList", content: [defNode] });
|
||||
}
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
/**
|
||||
* Pure tree-builder: turn a flat array of sidebar-style page nodes (as produced
|
||||
* by `enumerateSpacePages`) into a nested tree.
|
||||
*
|
||||
* Input: a flat array of nodes. Each node is expected to carry at least
|
||||
* { id, slugId, title, position, parentPageId } (extra fields are ignored).
|
||||
*
|
||||
* Output: an array of ROOT nodes, each shaped as
|
||||
* { id, slugId, title, children? }
|
||||
* where `children` is the array of child nodes (same shape, recursively). The
|
||||
* `children` key is OMITTED entirely when a node has no children — consistent
|
||||
* with how `filterPage` omits an empty `subpages` array — to keep the payload
|
||||
* lean (nesting alone conveys the structure; parentPageId/position/hasChildren
|
||||
* are intentionally dropped from the output).
|
||||
*
|
||||
* Linking rule: a node is attached as a child of `parentPageId` only when that
|
||||
* parent id is actually present in the input. Otherwise — including a null /
|
||||
* undefined `parentPageId`, or a parent that was capped out of the bounded walk
|
||||
* — the node is promoted to a ROOT. So "orphan whose parent is missing" is the
|
||||
* defined behavior: it surfaces at the top level rather than disappearing.
|
||||
*
|
||||
* Ordering rule: the roots array and every `children` array are sorted ascending
|
||||
* by the node's `position` string. The comparator is a plain code-unit (byte)
|
||||
* comparison — NOT localeCompare — because the server orders sidebar pages by
|
||||
* `collate "C"` (byte order), which a raw `<`/`>` compare approximates for the
|
||||
* fractional-index ASCII keys (e.g. "a0", "a1"). Nodes with a missing/undefined
|
||||
* `position` sort last.
|
||||
*
|
||||
* Pure: no I/O, no network, deterministic.
|
||||
*/
|
||||
export function buildPageTree(nodes) {
|
||||
// Map id -> output node. Build the lean output shape up front.
|
||||
const byId = new Map();
|
||||
// Preserve the original position string for sorting (kept off the output).
|
||||
const positionById = new Map();
|
||||
for (const node of nodes) {
|
||||
if (!node || typeof node !== "object" || !node.id)
|
||||
continue;
|
||||
// Defensive against duplicate ids: last one wins (overwrites the earlier
|
||||
// entry). `enumerateSpacePages` already dedups, so this is belt-and-braces.
|
||||
byId.set(node.id, {
|
||||
id: node.id,
|
||||
slugId: node.slugId,
|
||||
title: node.title,
|
||||
});
|
||||
positionById.set(node.id, node.position);
|
||||
}
|
||||
// Stable comparator on the position string: code-unit order, missing last.
|
||||
const byPosition = (aId, bId) => {
|
||||
const a = positionById.get(aId);
|
||||
const b = positionById.get(bId);
|
||||
if (a === undefined || a === null)
|
||||
return b === undefined || b === null ? 0 : 1;
|
||||
if (b === undefined || b === null)
|
||||
return -1;
|
||||
if (a < b)
|
||||
return -1;
|
||||
if (a > b)
|
||||
return 1;
|
||||
return 0;
|
||||
};
|
||||
const roots = [];
|
||||
const childrenIdsByParent = new Map();
|
||||
for (const node of nodes) {
|
||||
if (!node || typeof node !== "object" || !node.id)
|
||||
continue;
|
||||
const parentId = node.parentPageId;
|
||||
// Child only when the parent is actually present in the input; otherwise
|
||||
// (null/undefined parent, or parent capped out of the walk) -> root.
|
||||
if (parentId && byId.has(parentId)) {
|
||||
const list = childrenIdsByParent.get(parentId) ?? [];
|
||||
list.push(node.id);
|
||||
childrenIdsByParent.set(parentId, list);
|
||||
}
|
||||
else {
|
||||
roots.push(node.id);
|
||||
}
|
||||
}
|
||||
// Attach sorted children arrays to each parent, omitting empty ones.
|
||||
for (const [parentId, childIds] of childrenIdsByParent) {
|
||||
const parent = byId.get(parentId);
|
||||
if (!parent)
|
||||
continue;
|
||||
childIds.sort(byPosition);
|
||||
parent.children = childIds.map((id) => byId.get(id));
|
||||
}
|
||||
roots.sort(byPosition);
|
||||
return roots.map((id) => byId.get(id));
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
#!/usr/bin/env node
|
||||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||||
import { createDocmostMcpServer } from "./index.js";
|
||||
// Standalone stdio entrypoint. This restores the original behavior of the
|
||||
// package when run as a CLI (`docmost-mcp`): it reads credentials from the
|
||||
// environment and serves the MCP protocol over stdin/stdout. The factory in
|
||||
// index.ts stays side-effect-free; all the process/transport lifecycle lives
|
||||
// here.
|
||||
const API_URL = process.env.DOCMOST_API_URL;
|
||||
const EMAIL = process.env.DOCMOST_EMAIL;
|
||||
const PASSWORD = process.env.DOCMOST_PASSWORD;
|
||||
if (!API_URL || !EMAIL || !PASSWORD) {
|
||||
console.error("Error: DOCMOST_API_URL, DOCMOST_EMAIL, and DOCMOST_PASSWORD environment variables are required.");
|
||||
process.exit(1);
|
||||
}
|
||||
async function run() {
|
||||
// Global safety nets so a stray rejection/exception cannot silently kill
|
||||
// the stdio server. Per-tool errors still flow through the SDK and are not
|
||||
// affected by these handlers; these only catch errors raised OUTSIDE a tool
|
||||
// call (e.g. a transient ws/collab socket "error" event). Such errors must
|
||||
// NOT tear down the whole stdio server, so we log only and keep running.
|
||||
// Genuine startup failures are still fatal via run().catch(...) below.
|
||||
process.on("unhandledRejection", (reason) => {
|
||||
console.error("Unhandled promise rejection:", reason);
|
||||
});
|
||||
process.on("uncaughtException", (error) => {
|
||||
console.error("Uncaught exception:", error);
|
||||
});
|
||||
const server = createDocmostMcpServer({
|
||||
apiUrl: API_URL,
|
||||
email: EMAIL,
|
||||
password: PASSWORD,
|
||||
});
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
}
|
||||
run().catch((error) => {
|
||||
console.error("Fatal error running server:", error);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -1,322 +0,0 @@
|
||||
// Zod-agnostic shared tool-spec registry consumed by BOTH the zod-v3 MCP server
|
||||
// (packages/mcp/src/index.ts) and the zod-v4 in-app AI-SDK service
|
||||
// (apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts). Intentionally
|
||||
// imports NO zod: each consumer passes its OWN zod namespace into buildShape,
|
||||
// because the two packages are on different zod majors (v3 here, v4 in the
|
||||
// server) and a zod schema object built with one major cannot be reused by the
|
||||
// other. The builders below only touch z.string()/.min()/.optional()/.describe(),
|
||||
// z.array() and z.object() — API identical across v3 and v4 — so a single
|
||||
// builder works with either namespace.
|
||||
//
|
||||
// Only tools whose snake_case/camelCase name, input schema AND model-facing
|
||||
// description are genuinely identical across both layers live here. Tools that
|
||||
// diverge on purpose (security guardrails, tuned UX, "Reversible" framing on
|
||||
// some write tools, different limits, hybrid-RRF search, etc.) stay defined
|
||||
// per-layer and are NOT represented here.
|
||||
//
|
||||
// MAINTENANCE RULE: adding, renaming, or removing a spec here (or an inline
|
||||
// registerTool in index.ts) REQUIRES updating SERVER_INSTRUCTIONS in
|
||||
// packages/mcp/src/index.ts — the intent-routing guide MCP clients receive on
|
||||
// initialize. Enforced by test/unit/server-instructions.test.mjs.
|
||||
export const SHARED_TOOL_SPECS = {
|
||||
// --- no-argument read tools ---
|
||||
getWorkspace: {
|
||||
mcpName: 'get_workspace',
|
||||
inAppKey: 'getWorkspace',
|
||||
description: 'Fetch metadata about the current workspace (name, settings).',
|
||||
},
|
||||
listSpaces: {
|
||||
mcpName: 'list_spaces',
|
||||
inAppKey: 'listSpaces',
|
||||
description: 'List the spaces the current user can access. Returns the array of ' +
|
||||
'spaces (id, name, slug, ...).',
|
||||
},
|
||||
listShares: {
|
||||
mcpName: 'list_shares',
|
||||
inAppKey: 'listShares',
|
||||
description: 'List all public shares in the workspace with page titles and public URLs.',
|
||||
},
|
||||
// --- single-pageId read tools ---
|
||||
getPageJson: {
|
||||
mcpName: 'get_page_json',
|
||||
inAppKey: 'getPageJson',
|
||||
description: 'Get page details with the raw ProseMirror JSON content (lossless: ' +
|
||||
'includes block ids, callouts, tables, link/image attributes) plus the ' +
|
||||
'slugId used in URLs. Use the block ids it returns to make precise ' +
|
||||
'structural edits or surgical text edits without resending the page.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
getOutline: {
|
||||
mcpName: 'get_outline',
|
||||
inAppKey: 'getOutline',
|
||||
description: "Return a COMPACT outline of a page's top-level blocks ({index, type, " +
|
||||
'id, level, firstText}; tables add rows/cols/header; lists add item ' +
|
||||
'count) WITHOUT the full document body. Use it to locate sections/tables ' +
|
||||
'and grab block ids cheaply before fetching, patching or inserting ' +
|
||||
'individual blocks.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- two-id read tool ---
|
||||
getNode: {
|
||||
mcpName: 'get_node',
|
||||
inAppKey: 'getNode',
|
||||
description: "Fetch a single node's full ProseMirror subtree (lossless) without " +
|
||||
'pulling the whole document. `nodeId` is a block id from the page ' +
|
||||
'outline or page-JSON view (works for headings/paragraphs/callouts/images), OR ' +
|
||||
'`#<index>` to fetch a top-level block by its outline index — use the ' +
|
||||
'`#<index>` form for tables/rows/cells, which carry no id.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- node delete ---
|
||||
deleteNode: {
|
||||
mcpName: 'delete_node',
|
||||
inAppKey: 'deleteNode',
|
||||
description: 'Remove a single block by its attrs.id (from the page outline or ' +
|
||||
'page-JSON view) WITHOUT resending the whole document.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
nodeId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- single-block structural write (patch / insert) ---
|
||||
//
|
||||
// CANONICAL description merges both layers: the MCP copy's "WITHOUT resending
|
||||
// the whole document" + "cheaper/safer than a full-document replace" guidance
|
||||
// AND the in-app copy's "keeps the same node id" + "Reversible via page
|
||||
// history" framing — nothing either side conveyed is dropped. Sibling tools are
|
||||
// named in transport-neutral prose ("the page-JSON view", "a full-document
|
||||
// replace") to match the rest of the registry, since the two layers expose
|
||||
// those siblings under different (snake_case vs camelCase) identifiers.
|
||||
patchNode: {
|
||||
mcpName: 'patch_node',
|
||||
inAppKey: 'patchNode',
|
||||
description: 'Replace a single content block identified by its attrs.id with a new ' +
|
||||
'ProseMirror node, WITHOUT resending the whole document; the replacement ' +
|
||||
'keeps the same node id. Get the block id from the page outline (cheap) ' +
|
||||
'or the page-JSON view, then ' +
|
||||
'pass a ProseMirror node to put in its place. Example node: a paragraph ' +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
|
||||
'heading {"type":"heading","attrs":{"level":2},"content":' +
|
||||
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
|
||||
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
|
||||
'JSON object or a JSON string (both accepted). Cheaper and safer than ' +
|
||||
'replacing the whole document for one-block structural edits. Reversible: ' +
|
||||
'the previous version is kept in page history.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1).describe('ID of the page containing the block'),
|
||||
nodeId: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('attrs.id of the block to replace (from the page outline or ' +
|
||||
'page-JSON view)'),
|
||||
node: z
|
||||
.any()
|
||||
.describe('ProseMirror node to put in place of the node with this id, e.g. ' +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' +
|
||||
'JSON object or JSON string both accepted.'),
|
||||
}),
|
||||
},
|
||||
insertNode: {
|
||||
mcpName: 'insert_node',
|
||||
inAppKey: 'insertNode',
|
||||
description: 'Insert a block before/after another block (by attrs.id or anchor text) ' +
|
||||
'or append it at the end (top level). For before/after you MUST provide ' +
|
||||
'EXACTLY ONE of anchorNodeId or anchorText. Get anchor block ids from the ' +
|
||||
'page outline or the page-JSON view. Avoids resending the whole document. ' +
|
||||
'Can also insert ' +
|
||||
'table structure: to add a tableRow, pass a tableRow node with position ' +
|
||||
'before/after and anchor INSIDE the target table — anchorNodeId of any ' +
|
||||
'block/cell in it, or anchorText matching the table; to add a ' +
|
||||
'tableCell/tableHeader, use anchorNodeId of a block inside the target row ' +
|
||||
'(anchorText only resolves top-level blocks, so it cannot target a row). ' +
|
||||
"`anchorText` is matched against the block's literal rendered plain text " +
|
||||
'(no markdown); markdown/emoji are tolerated as a fallback; prefer plain ' +
|
||||
'text or anchorNodeId. Note: append is top-level only and rejects ' +
|
||||
'structural table nodes. Example node: a paragraph ' +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]} or a ' +
|
||||
'heading {"type":"heading","attrs":{"level":2},"content":' +
|
||||
'[{"type":"text","text":"Title"}]}. Bold is a mark: ' +
|
||||
'{"type":"text","text":"x","marks":[{"type":"bold"}]}. The node may be a ' +
|
||||
'JSON object or a JSON string (both accepted). Reversible via page history.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
node: z
|
||||
.any()
|
||||
.describe('ProseMirror node to insert, e.g. ' +
|
||||
'{"type":"paragraph","content":[{"type":"text","text":"Hello"}]}. ' +
|
||||
'JSON object or JSON string both accepted.'),
|
||||
position: z
|
||||
.enum(['before', 'after', 'append'])
|
||||
.describe('Where to insert relative to the anchor.'),
|
||||
anchorNodeId: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Anchor block id (for before/after).'),
|
||||
anchorText: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("Anchor text fragment (for before/after), matched against the " +
|
||||
"block's literal rendered plain text (no markdown). Markdown/emoji " +
|
||||
'are tolerated as a fallback; prefer plain text or anchorNodeId.'),
|
||||
}),
|
||||
},
|
||||
// --- share management ---
|
||||
unsharePage: {
|
||||
mcpName: 'unshare_page',
|
||||
inAppKey: 'unsharePage',
|
||||
description: 'Remove the public share of a page (revokes the public URL).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1).describe('ID of the page to unshare'),
|
||||
}),
|
||||
},
|
||||
// --- version history ---
|
||||
diffPageVersions: {
|
||||
mcpName: 'diff_page_versions',
|
||||
inAppKey: 'diffPageVersions',
|
||||
description: 'Diff two versions of a page and return a Docmost-equivalent change set ' +
|
||||
'(inserted/deleted text, integrity counts for images/links/tables/' +
|
||||
'callouts/footnote markers, and a human-readable markdown summary). ' +
|
||||
"`from`/`to` each accept a historyId, or null/'current' for the page's " +
|
||||
'current content (defaults: from=current, to=current — pass a historyId ' +
|
||||
'from the page-history list to compare against the live page).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
from: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("historyId, or 'current'/omit for current content"),
|
||||
to: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("historyId, or 'current'/omit for current content"),
|
||||
}),
|
||||
},
|
||||
listPageHistory: {
|
||||
mcpName: 'list_page_history',
|
||||
inAppKey: 'listPageHistory',
|
||||
description: "List a page's saved versions (Docmost auto-snapshots on every save), " +
|
||||
'newest first, cursor-paginated. Returns { items, nextCursor }; each ' +
|
||||
"item's id is the historyId to pass to the page diff or restore tools.",
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
cursor: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe('Pagination cursor from a previous nextCursor'),
|
||||
}),
|
||||
},
|
||||
restorePageVersion: {
|
||||
mcpName: 'restore_page_version',
|
||||
inAppKey: 'restorePageVersion',
|
||||
description: 'Restore a page to a saved version: writes that version\'s content back ' +
|
||||
'as the page\'s current content (Docmost has no restore endpoint, so ' +
|
||||
'this creates a NEW history snapshot — the restore is itself revertible). ' +
|
||||
'Get the historyId from the page-history list.',
|
||||
buildShape: (z) => ({
|
||||
historyId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- markdown round-trip ---
|
||||
importPageMarkdown: {
|
||||
mcpName: 'import_page_markdown',
|
||||
inAppKey: 'importPageMarkdown',
|
||||
description: "Replace a page's content from a self-contained Docmost-flavoured " +
|
||||
'Markdown file produced by the page-Markdown export tool. Restores comment ' +
|
||||
'highlight anchors and diagrams from their inline HTML. NOTE: comment ' +
|
||||
'thread records are NOT created/updated/deleted on the server by this ' +
|
||||
'tool — only the page body + inline comment marks are written; manage ' +
|
||||
'comment threads via the comment tools/UI.',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
markdown: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
// --- server-side content copy ---
|
||||
copyPageContent: {
|
||||
mcpName: 'copy_page_content',
|
||||
inAppKey: 'copyPageContent',
|
||||
description: "Replace targetPageId's content with a copy of sourcePageId's content, " +
|
||||
'entirely server-side — the document is NOT sent through the model. The ' +
|
||||
'target keeps its own title and slug; only its body is replaced. Ideal ' +
|
||||
"for 'make page A's content equal to B' or 'replace A with B but keep A's URL'.",
|
||||
buildShape: (z) => ({
|
||||
sourcePageId: z.string().min(1).describe('Page to copy content FROM'),
|
||||
targetPageId: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe('Page whose content is REPLACED (title/slug kept)'),
|
||||
}),
|
||||
},
|
||||
// --- surgical text edit (folds in the documented drift-bug fix) ---
|
||||
//
|
||||
// CANONICAL description is the CORRECTED in-app wording: a formatting-only
|
||||
// change is REFUSED into failed[] (not silently stripped-and-retried). The
|
||||
// stale MCP claim that "Markdown wrappers are tolerated via a strip-and-retry
|
||||
// fallback" is intentionally absent here.
|
||||
editPageText: {
|
||||
mcpName: 'edit_page_text',
|
||||
inAppKey: 'editPageText',
|
||||
description: "Surgical find/replace inside a page's text, preserving all block " +
|
||||
'ids and marks. A find MAY cross bold/italic/link boundaries; the ' +
|
||||
'replacement inherits marks from the unchanged common prefix/suffix ' +
|
||||
'(so editing plain text next to a bold word keeps it bold, and ' +
|
||||
'editing inside a bold word keeps the new text bold). Each find must ' +
|
||||
'match exactly once unless replaceAll is set. The batch applies what ' +
|
||||
'it can and returns applied[] + failed[] plus a verify change-report ' +
|
||||
'(the text/marks/structure that ACTUALLY changed — read it to confirm ' +
|
||||
'your edit landed; do not assume success); a fully-unmatched batch ' +
|
||||
'writes nothing and errors. find and replace are LITERAL text, not ' +
|
||||
'markdown. This tool edits plain text ONLY and CANNOT add or remove ' +
|
||||
'formatting marks: a formatting change — find/replace that differ only ' +
|
||||
'in markdown markers (e.g. find:"~~x~~", replace:"x"), or a replace ' +
|
||||
'containing **bold**/~~strike~~/`code` wrappers — is REFUSED into ' +
|
||||
'failed[]. To change bold/italic/strike/code/link, read the block as ' +
|
||||
'page JSON and use a structural node patch/update to set its marks. ' +
|
||||
'Examples: edits:[{find:"teh",replace:"the"}]; edits:[{find:"Hello ' +
|
||||
'world",replace:"Hello there"}] (crosses a bold boundary).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().describe('ID of the page to edit'),
|
||||
edits: z
|
||||
.array(z.object({
|
||||
find: z.string().describe('Exact text to find'),
|
||||
replace: z.string().describe('Replacement text (may be empty)'),
|
||||
replaceAll: z
|
||||
.boolean()
|
||||
.optional()
|
||||
.describe('Replace every occurrence (default: must match once)'),
|
||||
}))
|
||||
.min(1)
|
||||
.describe('List of find/replace operations, applied in order'),
|
||||
}),
|
||||
},
|
||||
// --- hand a large page to an external consumer without bloating context ---
|
||||
stashPage: {
|
||||
mcpName: 'stash_page',
|
||||
inAppKey: 'stashPage',
|
||||
description: 'Serialize a whole page (the full ProseMirror JSON, as get_page_json ' +
|
||||
'returns) into an ephemeral in-memory blob and return ONLY a short ' +
|
||||
'anonymous URL to it — the body NEVER enters the model context, so this ' +
|
||||
'is the way to hand a large page (or its images) to an external consumer ' +
|
||||
'without truncation. Every internal file/image attachment is mirrored ' +
|
||||
'into the same sandbox and its src rewritten to a sandbox URL, so the ' +
|
||||
'consumer can fetch the images anonymously too; external http(s) images ' +
|
||||
'are left untouched. Returns { uri, size, sha256, images:{mirrored, ' +
|
||||
'failed} }. Integrity: the blob is served with ETag = its sha256, so a ' +
|
||||
'truncated/corrupted fetch is detectable. Blobs are RAM-only: they expire ' +
|
||||
'after a short TTL (~1h) and are cleared on restart — consume the URL ' +
|
||||
'within the TTL and one uptime, or re-stash. A blob is bound to the ' +
|
||||
'server instance that created it: in a multi-replica deployment without ' +
|
||||
'sticky sessions a blob stored on one instance is not retrievable via the ' +
|
||||
'sandbox URL on another (it 404s like an expired one).',
|
||||
buildShape: (z) => ({
|
||||
pageId: z.string().min(1),
|
||||
}),
|
||||
},
|
||||
};
|
||||
@@ -32,6 +32,7 @@
|
||||
"author": "Moritz Krause",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@docmost/prosemirror-markdown": "workspace:*",
|
||||
"@fellow/prosemirror-recreate-transform": "^1.2.3",
|
||||
"@hocuspocus/provider": "^3.4.4",
|
||||
"@hocuspocus/transformer": "^3.4.4",
|
||||
|
||||
@@ -2,18 +2,24 @@ import { HocuspocusProvider } from "@hocuspocus/provider";
|
||||
import { TiptapTransformer } from "@hocuspocus/transformer";
|
||||
import * as Y from "yjs";
|
||||
import WebSocket from "ws";
|
||||
import { marked } from "marked";
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { Node as PMNode } from "@tiptap/pm/model";
|
||||
import { updateYFragment } from "y-prosemirror";
|
||||
import { JSDOM } from "jsdom";
|
||||
// #293 STEP 5: the pure markdown -> ProseMirror import path is now owned by the
|
||||
// shared package (canonical `^[…]` footnotes, `$…$` math, `==` highlight, the
|
||||
// media-family md forms, comment-directive attrs, callouts and task lists all
|
||||
// handled there). MCP consumes it directly instead of maintaining its own
|
||||
// drifted marked pipeline; only the collab/yjs write glue and the footnote
|
||||
// canonicalization wrapper stay mcp-side.
|
||||
import { markdownToProseMirror } from "@docmost/prosemirror-markdown";
|
||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
import { summarizeChange, VerifyReport } from "./diff.js";
|
||||
|
||||
export { markdownToProseMirror };
|
||||
|
||||
/**
|
||||
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
||||
* content type"), shared by both encode paths (`buildYDoc` -> `toYdoc` and
|
||||
@@ -51,382 +57,27 @@ global.WebSocket = WebSocket;
|
||||
// global.navigator = dom.window.navigator;
|
||||
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
* Page-write variant of the package's `markdownToProseMirror`: imports markdown
|
||||
* then re-runs mcp's footnote canonicalizer over the result.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown: string): Promise<string> {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines: string[]): Promise<string> => {
|
||||
const out: string[] = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines: string[] = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0) break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(
|
||||
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
|
||||
);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
|
||||
return out.join("\n");
|
||||
};
|
||||
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
* Footnote layering after #293 STEP 5:
|
||||
* - The package's `markdownToProseMirror` already ASSEMBLES footnotes on import
|
||||
* (canon #2): inline `^[body]` markers become the schema's
|
||||
* `footnoteReference` + a single doc-level `footnotesList`, with ids assigned
|
||||
* sequentially (`fn-1`, `fn-2`, …) in first-reference order and identical
|
||||
* bodies merged. So the import output is ALREADY in canonical footnote
|
||||
* topology.
|
||||
* - `canonicalizeFootnotes` runs AFTER as the mcp write-path invariant shared
|
||||
* with every other full-document persist path (`update_page_json`,
|
||||
* `docmost_transform`, `insert_footnote`, …). Because the package output is
|
||||
* already canonical, this layer is a no-op here (idempotent) — it exists so
|
||||
* the page-write contract is enforced uniformly regardless of how the PM doc
|
||||
* was produced, not because the import needs fixing.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html: string): string {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li: Element): Element[] => {
|
||||
const found: Element[] = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (
|
||||
child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox"
|
||||
) {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(
|
||||
child.querySelectorAll(":scope > input[type='checkbox']"),
|
||||
)) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter(
|
||||
(child) => child.tagName === "LI",
|
||||
);
|
||||
if (items.length === 0) continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue;
|
||||
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target: Element = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked =
|
||||
input != null &&
|
||||
(input.hasAttribute("checked") || (input as any).checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
|
||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||
// definition lines are collected into a single <section data-footnotes>.
|
||||
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||
|
||||
function escapeFootnoteAttr(value: string): string {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
}
|
||||
|
||||
const footnoteRefMarkedExtension = {
|
||||
name: "footnoteRef",
|
||||
level: "inline" as const,
|
||||
start(src: string) {
|
||||
return src.match(/\[\^/)?.index ?? -1;
|
||||
},
|
||||
tokenizer(src: string) {
|
||||
const match = FOOTNOTE_REF_RE.exec(src);
|
||||
if (match && match.index === 0) {
|
||||
return { type: "footnoteRef", raw: match[0], id: match[1] };
|
||||
}
|
||||
return undefined;
|
||||
},
|
||||
renderer(token: any) {
|
||||
return `<sup data-footnote-ref data-id="${escapeFootnoteAttr(
|
||||
token.id,
|
||||
)}"></sup>`;
|
||||
},
|
||||
};
|
||||
|
||||
marked.use({ extensions: [footnoteRefMarkedExtension] });
|
||||
|
||||
/**
|
||||
* Pull `[^id]: text` definition lines out of the body and render a single
|
||||
* <section data-footnotes> for them (or "" when there are none).
|
||||
*/
|
||||
function extractFootnotes(markdown: string): {
|
||||
body: string;
|
||||
section: string;
|
||||
} {
|
||||
const bodyLines: string[] = [];
|
||||
const defs: Array<{ id: string; text: string }> = [];
|
||||
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||
// block is inert and stays in the body verbatim; only real definition lines
|
||||
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||
// match exactly what import keeps/strips (#166).
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (!tok.inFence && tok.definition) defs.push(tok.definition);
|
||||
else bodyLines.push(tok.line);
|
||||
}
|
||||
if (defs.length === 0) return { body: markdown, section: "" };
|
||||
|
||||
// Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
|
||||
// editor-ext extractFootnoteDefinitions). Reference markers are left untouched
|
||||
// so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
|
||||
// #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
|
||||
// (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
|
||||
// editor-ext mirror.
|
||||
const firstById = new Map<string, string>(); // id -> first definition text
|
||||
for (const def of defs) {
|
||||
if (!firstById.has(def.id)) firstById.set(def.id, def.text);
|
||||
}
|
||||
|
||||
const inner = [...firstById.entries()]
|
||||
.map(
|
||||
([id, text]) =>
|
||||
`<div data-footnote-def data-id="${escapeFootnoteAttr(
|
||||
id,
|
||||
)}"><p>${marked.parseInline(text || "")}</p></div>`,
|
||||
)
|
||||
.join("");
|
||||
return {
|
||||
body: bodyLines.join("\n"),
|
||||
section: `<section data-footnotes>${inner}</section>`,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert markdown to a ProseMirror doc using the full Docmost schema.
|
||||
*
|
||||
* This conversion does NOT canonicalize footnotes — it is the shared, content-
|
||||
* preserving primitive used by BOTH page write paths and COMMENT bodies
|
||||
* (createComment / updateComment). Canonicalization MUST NOT run on a comment
|
||||
* body: a comment may legitimately contain a footnote-definition line
|
||||
* (`[^1]: text`) with no matching reference, and the canonicalizer drops a
|
||||
* reference-less footnotesList — which would silently delete the comment's text.
|
||||
*
|
||||
* Page write paths that DO need the canonical footnote topology call
|
||||
* `markdownToProseMirrorCanonical` instead (markdown import, update_page markdown
|
||||
* path). Keep this function reference-loss-free.
|
||||
*/
|
||||
export async function markdownToProseMirror(
|
||||
markdownContent: string,
|
||||
): Promise<any> {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const { body, section } = extractFootnotes(withCallouts);
|
||||
const html = (await marked.parse(body)) + section;
|
||||
const bridged = bridgeTaskLists(html);
|
||||
return generateJSON(bridged, docmostExtensions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Page-write variant of `markdownToProseMirror`: converts markdown then enforces
|
||||
* the canonical footnote topology. The footnote `section` markdown is emitted in
|
||||
* DEFINITION order, but numbering derives from REFERENCE order, so without this
|
||||
* the bottom list renders out of order (`1, 4, 2, 3, …`); orphan definitions and
|
||||
* duplicate lists are also normalized. Idempotent — a no-op once canonical, and a
|
||||
* no-op for footnote-free content.
|
||||
*
|
||||
* Use this ONLY for full-document PAGE writes (never for comment bodies, where it
|
||||
* would drop a reference-less footnote definition — see `markdownToProseMirror`).
|
||||
* Use this ONLY for full-document PAGE writes. Comment bodies call the package's
|
||||
* plain `markdownToProseMirror` (no canonicalization) — safe now because inline
|
||||
* `^[body]` footnotes carry their body at the reference point, so a comment can
|
||||
* no longer produce a reference-less footnote definition to be dropped.
|
||||
*/
|
||||
export async function markdownToProseMirrorCanonical(
|
||||
markdownContent: string,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Footnote diagnostics for imported Markdown (issue #166).
|
||||
* Legacy footnote diagnostics for imported Markdown (issue #166).
|
||||
*
|
||||
* A PURE, fence-aware text scan (independent of the Markdown->ProseMirror
|
||||
* conversion path, so it reports the same problems for `create_page`,
|
||||
@@ -7,11 +7,18 @@
|
||||
* importer still creates the page; this only surfaces footnote problems to the
|
||||
* caller so an agent can fix its own markup instead of shipping broken footnotes.
|
||||
*
|
||||
* SCOPE after #293 STEP 5: the canonical import form is now inline `^[body]`
|
||||
* footnotes (handled by `@docmost/prosemirror-markdown`), where these problems
|
||||
* cannot arise. This scan therefore targets the LEGACY reference-style
|
||||
* (`[^id]` / `[^id]:`) markup, which is now inert on import (left as literal
|
||||
* text). The warnings remain useful as an advisory nudge when an agent still
|
||||
* authors the old syntax, but they no longer describe what the importer builds.
|
||||
*
|
||||
* Detected problems:
|
||||
* - danglingReferences: a `[^id]` reference with no `[^id]:` definition.
|
||||
* - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace.
|
||||
* - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the
|
||||
* first is kept on import — first-wins; see extractFootnotes).
|
||||
* first would have been kept under the old first-wins import).
|
||||
* - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic:
|
||||
* the line, trimmed, starts with `|`) — footnotes in table cells often do not
|
||||
* render as expected.
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
/**
|
||||
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
|
||||
* Shared, fence-aware line lexer for legacy footnote markdown (MCP-internal).
|
||||
*
|
||||
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
|
||||
* definition lines and rebuilds a footnotes section) and the diagnostics
|
||||
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
|
||||
* are definitions and which lines are inert (inside a code fence). Sharing one
|
||||
* lexer makes "the analyzer sees what the importer leaves" a structural property
|
||||
* instead of two hand-kept copies that can drift (#166 review).
|
||||
* Since #293 STEP 5 the markdown -> ProseMirror IMPORT path lives in the shared
|
||||
* `@docmost/prosemirror-markdown` package (inline `^[body]` footnotes), so this
|
||||
* lexer no longer backs an mcp importer. It now backs ONLY the import-time
|
||||
* diagnostics (`analyzeFootnotes` in footnote-analyze.ts), which still scan the
|
||||
* raw markdown for legacy reference-style `[^id]:` definition lines and surface
|
||||
* advisory warnings (duplicate/orphan definitions) about content that is now
|
||||
* inert on import. Fence-awareness (a `[^id]:` line inside a ``` / ~~~ block is
|
||||
* NOT a definition) is the property the analyzer relies on.
|
||||
*
|
||||
* NOTE: this is deliberately NOT shared with editor-ext's
|
||||
* `extractFootnoteDefinitions` — that lives in a different package and the
|
||||
|
||||
@@ -1,903 +1,12 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
* ProseMirror -> Docmost-flavoured Markdown converter.
|
||||
*
|
||||
* #293 STEP 5: the converter CORE now lives in the shared
|
||||
* `@docmost/prosemirror-markdown` package (the canonical, lossless
|
||||
* implementation carrying every git-sync fix and the #293 canon decisions).
|
||||
* MCP consumes it directly instead of keeping its own drifted copy, so the two
|
||||
* can never diverge again. This file is a thin re-export shim kept only so the
|
||||
* many existing `./markdown-converter.js` importers (client.ts, tests) do not
|
||||
* have to move.
|
||||
*/
|
||||
export function convertProseMirrorToMarkdown(content: any): string {
|
||||
if (!content || !content.content) return "";
|
||||
|
||||
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||
// </> back inside attribute values, so escaping them would corrupt the
|
||||
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||
// keeps the value inert against attribute-injection while staying idempotent.
|
||||
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||
// value, and parse5 does not decode ' back inside attribute values, so
|
||||
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||
const escapeAttr = (value: unknown): string =>
|
||||
String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """);
|
||||
|
||||
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||
const escapeHtmlText = (value: unknown): string =>
|
||||
String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
|
||||
// Percent-encode characters that would break out of a markdown URL target
|
||||
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||
// single inert token (used for image/video/youtube srcs).
|
||||
const encodeMdUrl = (value: unknown): string =>
|
||||
String(value || "")
|
||||
.replace(/\s/g, (c: string) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
|
||||
const processNode = (node: any): string => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
|
||||
switch (type) {
|
||||
case "doc":
|
||||
return nodeContent.map(processNode).join("\n\n");
|
||||
|
||||
case "paragraph":
|
||||
const text = nodeContent.map(processNode).join("");
|
||||
const align = node.attrs?.textAlign;
|
||||
if (align && align !== "left") {
|
||||
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||
}
|
||||
return text || "";
|
||||
|
||||
case "heading":
|
||||
const level = node.attrs?.level || 1;
|
||||
const headingText = nodeContent.map(processNode).join("");
|
||||
return "#".repeat(level) + " " + headingText;
|
||||
|
||||
case "text":
|
||||
let textContent = node.text || "";
|
||||
// Apply marks (bold, italic, code, etc.)
|
||||
if (node.marks) {
|
||||
// Markdown code spans (`...`) cannot carry inner formatting, so when a
|
||||
// run has the `code` mark alongside ANY other mark, backtick syntax
|
||||
// would leak literal ** / []() into the code text. In that case emit
|
||||
// nested HTML (<code> innermost, the other marks wrapping it as HTML)
|
||||
// so the output is at least well-formed and re-parseable.
|
||||
//
|
||||
// NOTE: this does NOT round-trip both marks. The schema's `code` mark
|
||||
// has `excludes: "_"` (it excludes every other mark), so on import the
|
||||
// co-occurring mark is always dropped — the run comes back as `code`
|
||||
// only. We keep the emission simple and accept that the other mark is
|
||||
// lost; preserving both is impossible while `code` excludes them.
|
||||
// Only use the backtick form when `code` is the sole mark.
|
||||
const markTypes = node.marks.map((m: any) => m.type);
|
||||
const hasCode = markTypes.includes("code");
|
||||
const codeCombined = hasCode && markTypes.length > 1;
|
||||
for (const mark of node.marks) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
textContent = codeCombined
|
||||
? `<strong>${textContent}</strong>`
|
||||
: `**${textContent}**`;
|
||||
break;
|
||||
case "italic":
|
||||
textContent = codeCombined
|
||||
? `<em>${textContent}</em>`
|
||||
: `*${textContent}*`;
|
||||
break;
|
||||
case "code":
|
||||
// When combined with another mark, wrap as <code> so the
|
||||
// surrounding HTML marks can nest around it; otherwise use the
|
||||
// plain backtick span.
|
||||
textContent = codeCombined
|
||||
? `<code>${textContent}</code>`
|
||||
: `\`${textContent}\``;
|
||||
break;
|
||||
case "link": {
|
||||
const href = mark.attrs?.href || "";
|
||||
const title = mark.attrs?.title;
|
||||
if (codeCombined) {
|
||||
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||
const safeHref = escapeAttr(href);
|
||||
if (title) {
|
||||
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||
} else {
|
||||
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||
}
|
||||
} else if (title) {
|
||||
// Emit the optional markdown link title; escape an embedded
|
||||
// double-quote so it cannot terminate the title string early.
|
||||
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||
} else {
|
||||
textContent = `[${textContent}](${href})`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "strike":
|
||||
textContent = codeCombined
|
||||
? `<s>${textContent}</s>`
|
||||
: `~~${textContent}~~`;
|
||||
break;
|
||||
case "underline":
|
||||
textContent = `<u>${textContent}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
textContent = `<sub>${textContent}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
textContent = `<sup>${textContent}</sup>`;
|
||||
break;
|
||||
case "highlight": {
|
||||
// Preserve a null/empty color as a plain highlight (a bare
|
||||
// <mark> with no background-color); only emit the style when a
|
||||
// color is actually set, so a plain highlight is not forced to
|
||||
// yellow on export.
|
||||
const color = mark.attrs?.color;
|
||||
textContent = color
|
||||
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||
: `<mark>${textContent}</mark>`;
|
||||
break;
|
||||
}
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color) {
|
||||
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
case "comment": {
|
||||
// Emit the inline comment anchor so highlights round-trip. The
|
||||
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||
// commentId/resolved).
|
||||
const cid = mark.attrs?.commentId;
|
||||
if (cid) {
|
||||
const resolvedAttr = mark.attrs?.resolved
|
||||
? ` data-resolved="true"`
|
||||
: "";
|
||||
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "spoiler":
|
||||
// Markdown has no native spoiler syntax, so emit the same
|
||||
// lossless raw HTML the editor-ext turndown rule produces; the
|
||||
// schema's Spoiler mark parses span[data-spoiler] back on import.
|
||||
textContent = `<span data-spoiler="true">${textContent}</span>`;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return textContent;
|
||||
|
||||
case "codeBlock":
|
||||
const language = node.attrs?.language || "";
|
||||
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||
// here would let the text grow by "\n" on each round-trip. Removing
|
||||
// every trailing newline makes repeated cycles stable.
|
||||
const code = nodeContent
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
.map((item: any) => processListItem(item, "-"))
|
||||
.join("\n");
|
||||
|
||||
case "orderedList":
|
||||
return nodeContent
|
||||
.map((item: any, index: number) =>
|
||||
processListItem(item, `${index + 1}.`),
|
||||
)
|
||||
.join("\n");
|
||||
|
||||
case "taskList":
|
||||
return nodeContent.map((item: any) => processTaskItem(item)).join("\n");
|
||||
|
||||
case "taskItem":
|
||||
// Delegate to the same helper used by taskList so multi-block and
|
||||
// nested task items render and indent consistently.
|
||||
return processTaskItem(node);
|
||||
|
||||
case "listItem":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
|
||||
case "blockquote":
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line so code blocks / multi-paragraph
|
||||
// quotes round-trip correctly.
|
||||
return nodeContent
|
||||
.map((n: any) =>
|
||||
processNode(n)
|
||||
.split("\n")
|
||||
.map((line: string) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"),
|
||||
)
|
||||
.join("\n>\n");
|
||||
|
||||
case "horizontalRule":
|
||||
return "---";
|
||||
|
||||
case "hardBreak":
|
||||
// Two trailing spaces before the newline encode a markdown hard break;
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
|
||||
case "image": {
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
const imgCaption = node.attrs?.caption || "";
|
||||
if (imgCaption) {
|
||||
// ![]() can't carry a caption, so (symmetric to video) emit a raw
|
||||
// <img> wrapped in a block <div>. On import marked.parse keeps the raw
|
||||
// HTML and generateJSON runs the image extension's parseHTML, which
|
||||
// restores the caption from data-caption.
|
||||
const parts: string[] = [`src="${escapeAttr(node.attrs?.src ?? "")}"`];
|
||||
if (imgAlt) parts.push(`alt="${escapeAttr(imgAlt)}"`);
|
||||
parts.push(`data-caption="${escapeAttr(imgCaption)}"`);
|
||||
return `<div><img ${parts.join(" ")}></div>`;
|
||||
}
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
return ``;
|
||||
}
|
||||
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||
// from the standard attributes and the remaining attrs from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt) parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||
// data-type, so the schema parser ignores it and just hoists the video.
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
|
||||
case "youtube": {
|
||||
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||
// src from data-src and width/height/align from data-* attributes.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "table": {
|
||||
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||
const tableRows: any[] = nodeContent;
|
||||
if (tableRows.length === 0) return "";
|
||||
const hasSpan = tableRows.some((row: any) =>
|
||||
(row.content || []).some(
|
||||
(cell: any) =>
|
||||
(cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1,
|
||||
),
|
||||
);
|
||||
|
||||
if (hasSpan) {
|
||||
// Render each cell's block children to HTML (marked does NOT parse
|
||||
// markdown inside a raw HTML block, so emitting markdown here would
|
||||
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||
const renderHtmlCell = (cell: any): string => {
|
||||
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||
const a = cell.attrs || {};
|
||||
const cellParts: string[] = [];
|
||||
if ((a.colspan ?? 1) > 1)
|
||||
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||
if ((a.rowspan ?? 1) > 1)
|
||||
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||
if (a.align) cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||
const open = cellParts.length
|
||||
? `<${tag} ${cellParts.join(" ")}>`
|
||||
: `<${tag}>`;
|
||||
const inner = (cell.content || [])
|
||||
.map((block: any) => blockToHtml(block))
|
||||
.join("");
|
||||
return `${open}${inner}</${tag}>`;
|
||||
};
|
||||
const htmlRows = tableRows
|
||||
.map(
|
||||
(row: any) =>
|
||||
`<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`,
|
||||
)
|
||||
.join("");
|
||||
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||
}
|
||||
|
||||
// No merged cells: emit a GFM table (header row + separator) so the
|
||||
// markdown can be parsed back into a table on re-import.
|
||||
const rows = tableRows.map(processNode);
|
||||
const headerCells = tableRows[0]?.content || [];
|
||||
const columns = headerCells.length || 1;
|
||||
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||
const markers = Array.from({ length: columns }, (_, i) => {
|
||||
const align = headerCells[i]?.attrs?.align;
|
||||
switch (align) {
|
||||
case "left":
|
||||
return ":--";
|
||||
case "center":
|
||||
return ":-:";
|
||||
case "right":
|
||||
return "--:";
|
||||
default:
|
||||
return "---";
|
||||
}
|
||||
});
|
||||
const separator = "| " + markers.join(" | ") + " |";
|
||||
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||
}
|
||||
|
||||
case "tableRow":
|
||||
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||
|
||||
case "tableCell":
|
||||
case "tableHeader": {
|
||||
// Join multiple block children with a space (not "") so adjacent blocks
|
||||
// like a paragraph followed by a list don't collide into "line1- a".
|
||||
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||
// line break cannot corrupt the surrounding GFM row.
|
||||
return nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\r?\n/g, " ")
|
||||
.replace(/\|/g, "\\|");
|
||||
}
|
||||
|
||||
case "callout":
|
||||
const calloutType = node.attrs?.type || "info";
|
||||
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||
|
||||
case "details":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
|
||||
case "detailsSummary":
|
||||
const summaryText = nodeContent.map(processNode).join("");
|
||||
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||
|
||||
case "detailsContent":
|
||||
const detailsText = nodeContent.map(processNode).join("\n");
|
||||
return `${detailsText}\n</details>`;
|
||||
|
||||
case "mathInline": {
|
||||
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||
// back, so the previous form was lossy.
|
||||
const inlineMath = node.attrs?.text || "";
|
||||
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||
}
|
||||
|
||||
case "mathBlock": {
|
||||
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||
// for the schema's default parser to recover it.
|
||||
const blockMath = node.attrs?.text || "";
|
||||
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||
}
|
||||
|
||||
case "mention": {
|
||||
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||
// plain text that cannot re-parse.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="mention"`];
|
||||
if (attrs.id) parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||
if (attrs.label)
|
||||
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||
if (attrs.entityType)
|
||||
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||
if (attrs.entityId)
|
||||
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||
if (attrs.slugId)
|
||||
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||
if (attrs.creatorId)
|
||||
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||
if (attrs.anchorId)
|
||||
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||
// Keep the label as visible text content too; the schema reads attrs
|
||||
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||
const mentionLabel = attrs.label || attrs.id || "";
|
||||
// The label is visible element TEXT content here (the data-* attrs above
|
||||
// carry the real values), so escape it for the text context, not attrs.
|
||||
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||
}
|
||||
|
||||
case "footnoteReference": {
|
||||
// Pandoc/GFM inline marker. The number is derived (not stored), so the
|
||||
// id is the stable anchor.
|
||||
const fnId = node.attrs?.id || "";
|
||||
return fnId ? `[^${fnId}]` : "";
|
||||
}
|
||||
|
||||
case "footnotesList":
|
||||
// The container renders its definitions, each on its own `[^id]: ...`
|
||||
// line. A blank line separates the body from the notes block.
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
|
||||
case "footnoteDefinition": {
|
||||
const defId = node.attrs?.id || "";
|
||||
// Collapse the definition's paragraphs into a single line; multi-line
|
||||
// footnotes are a v2 refinement.
|
||||
const defText = nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\s*\n+\s*/g, " ")
|
||||
.trim();
|
||||
return defId ? `[^${defId}]: ${defText}` : "";
|
||||
}
|
||||
|
||||
case "attachment": {
|
||||
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "drawio":
|
||||
case "excalidraw": {
|
||||
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null)
|
||||
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null) parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "embed": {
|
||||
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||
// src/provider/align/width/height from data-* attributes so the node
|
||||
// (and its provider iframe info) survives the round-trip.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "audio": {
|
||||
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||
// schema reads src from src and attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||
// inline-level HTML that marked would wrap in <p>.
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
|
||||
case "pdf": {
|
||||
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||
// The schema reads src/width/height from standard attrs and name/
|
||||
// attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name) parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(
|
||||
`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`,
|
||||
);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "columns": {
|
||||
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||
// multi-column layout survives. Without a case the children were
|
||||
// concatenated with no separator and the text merged. The schema reads
|
||||
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||
// which marked would not re-parse inside a raw HTML block).
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="columns"`];
|
||||
if (attrs.layout)
|
||||
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
|
||||
case "column": {
|
||||
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||
// column width from data-width. Children are rendered as HTML so their
|
||||
// formatting survives inside this raw HTML block.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="column"`];
|
||||
if (attrs.width)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
}
|
||||
};
|
||||
|
||||
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||
// accepts so it re-imports as the matching ProseMirror mark.
|
||||
const inlineToHtml = (inlineNodes: any[]): string =>
|
||||
(inlineNodes || [])
|
||||
.map((n: any) => {
|
||||
if (n.type === "hardBreak") return "<br>";
|
||||
if (n.type !== "text") {
|
||||
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||
return processNode(n);
|
||||
}
|
||||
let t = escapeHtmlText(n.text || "");
|
||||
for (const mark of n.marks || []) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
t = `<strong>${t}</strong>`;
|
||||
break;
|
||||
case "italic":
|
||||
t = `<em>${t}</em>`;
|
||||
break;
|
||||
case "code":
|
||||
t = `<code>${t}</code>`;
|
||||
break;
|
||||
case "strike":
|
||||
t = `<s>${t}</s>`;
|
||||
break;
|
||||
case "underline":
|
||||
t = `<u>${t}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
t = `<sub>${t}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
t = `<sup>${t}</sup>`;
|
||||
break;
|
||||
case "link":
|
||||
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||
break;
|
||||
case "highlight":
|
||||
t = mark.attrs?.color
|
||||
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||
: `<mark>${t}</mark>`;
|
||||
break;
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color)
|
||||
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||
break;
|
||||
case "comment":
|
||||
// Inline comment anchor inside a raw-HTML container (columns /
|
||||
// spanned table cells), so commented text there also round-trips.
|
||||
if (mark.attrs?.commentId) {
|
||||
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.join("");
|
||||
|
||||
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||
// and would survive as literal text. The Image extension reads src/alt from
|
||||
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||
// attributes, so emit them by name.
|
||||
const imageToHtml = (node: any): string => {
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt) parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.caption)
|
||||
parts.push(`data-caption="${escapeAttr(attrs.caption)}"`);
|
||||
if (attrs.title) parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align) parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<img ${parts.join(" ")}>`;
|
||||
};
|
||||
|
||||
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||
// schema reads the banner type from data-callout-type. Children are rendered
|
||||
// as HTML so they survive inside a raw-HTML container.
|
||||
const calloutToHtml = (node: any): string => {
|
||||
const type = (node.attrs?.type || "info").toLowerCase();
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||
};
|
||||
|
||||
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||
const detailsToHtml = (node: any): string => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<details>${inner}</details>`;
|
||||
};
|
||||
const detailsSummaryToHtml = (node: any): string =>
|
||||
`<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||
const detailsContentToHtml = (node: any): string => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="detailsContent">${inner}</div>`;
|
||||
};
|
||||
|
||||
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||
const taskListToHtml = (node: any): string => {
|
||||
const items = (node.content || [])
|
||||
.map((it: any) => {
|
||||
const checked = it.attrs?.checked ? "true" : "false";
|
||||
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||
})
|
||||
.join("");
|
||||
return `<ul data-type="taskList">${items}</ul>`;
|
||||
};
|
||||
|
||||
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||
// as literal text on re-import. Nodes whose processNode case already produces
|
||||
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||
// table) are delegated to processNode; the markdown-emitting cases
|
||||
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||
const blockToHtml = (block: any): string => {
|
||||
const children = block.content || [];
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return `<p>${inlineToHtml(children)}</p>`;
|
||||
case "heading": {
|
||||
const level = block.attrs?.level || 1;
|
||||
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||
}
|
||||
case "bulletList":
|
||||
return `<ul>${children
|
||||
.map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ul>`;
|
||||
case "orderedList":
|
||||
return `<ol>${children
|
||||
.map((li: any) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ol>`;
|
||||
case "codeBlock": {
|
||||
const lang = block.attrs?.language || "";
|
||||
// The code itself is element TEXT content (between <code> tags), so it
|
||||
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||
const code = escapeHtmlText(
|
||||
children
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, ""),
|
||||
);
|
||||
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||
return `<pre><code${cls}>${code}</code></pre>`;
|
||||
}
|
||||
case "image":
|
||||
return imageToHtml(block);
|
||||
case "blockquote":
|
||||
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||
case "horizontalRule":
|
||||
return "<hr>";
|
||||
case "callout":
|
||||
return calloutToHtml(block);
|
||||
case "details":
|
||||
return detailsToHtml(block);
|
||||
case "detailsSummary":
|
||||
return detailsSummaryToHtml(block);
|
||||
case "detailsContent":
|
||||
return detailsContentToHtml(block);
|
||||
case "taskList":
|
||||
return taskListToHtml(block);
|
||||
case "taskItem":
|
||||
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||
// the schema parses it; wrap it in a single-item taskList.
|
||||
return taskListToHtml({ content: [block] });
|
||||
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||
// mention, etc. already emit schema-matching HTML from processNode.
|
||||
case "table":
|
||||
case "columns":
|
||||
case "column":
|
||||
case "mathBlock":
|
||||
case "video":
|
||||
case "audio":
|
||||
case "pdf":
|
||||
case "youtube":
|
||||
case "embed":
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||
// children in a <div> so their content is preserved; if it has no block
|
||||
// children, render its inline content instead.
|
||||
if (children.length && children.some((c: any) => c.type !== "text")) {
|
||||
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||
}
|
||||
return `<div>${inlineToHtml(children)}</div>`;
|
||||
}
|
||||
};
|
||||
|
||||
// Render the block children of a list item to HTML (a listItem holds block+
|
||||
// content). Mirrors processListItem but for the HTML fallback path.
|
||||
const blockChildrenToHtml = (item: any): string =>
|
||||
(item.content || []).map((b: any) => blockToHtml(b)).join("");
|
||||
|
||||
// Indent the rendered children of a list item under a marker prefix.
|
||||
// Each child block is a (possibly multi-line) string. The very first physical
|
||||
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||
// other line — the remaining lines of the first child AND all lines of every
|
||||
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||
// to align under the marker. Without indenting these continuation lines, the
|
||||
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||
//
|
||||
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||
// same as the visible prefix width:
|
||||
// - bullet "- " -> 2 columns
|
||||
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||
// re-import. Callers therefore pass the exact indent width to use.
|
||||
const indentItemChildren = (
|
||||
childStrings: string[],
|
||||
prefix: string,
|
||||
indentWidth: number,
|
||||
): string => {
|
||||
const indent = " ".repeat(indentWidth);
|
||||
const lines: string[] = [];
|
||||
childStrings.forEach((child, childIndex) => {
|
||||
child.split("\n").forEach((line, lineIndex) => {
|
||||
if (childIndex === 0 && lineIndex === 0) {
|
||||
// First physical line of the first block gets the marker.
|
||||
lines.push(`${prefix} ${line}`);
|
||||
} else {
|
||||
// Indent every continuation line by the marker width; keep blank
|
||||
// lines blank rather than emitting trailing whitespace.
|
||||
lines.push(line.length ? `${indent}${line}` : "");
|
||||
}
|
||||
});
|
||||
});
|
||||
return lines.join("\n");
|
||||
};
|
||||
|
||||
const processListItem = (item: any, prefix: string): string => {
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
if (childStrings.length === 0) return prefix;
|
||||
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||
// since for those the visible prefix IS the list marker.
|
||||
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||
};
|
||||
|
||||
const processTaskItem = (item: any): string => {
|
||||
const checked = item.attrs?.checked || false;
|
||||
const checkbox = checked ? "[x]" : "[ ]";
|
||||
const prefix = `- ${checkbox}`;
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
// An empty task item still needs its checkbox marker; without this guard
|
||||
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||
if (childStrings.length === 0) return prefix;
|
||||
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||
// checkbox is item content, NOT part of the marker. So the continuation
|
||||
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||
return indentItemChildren(childStrings, prefix, 2);
|
||||
};
|
||||
|
||||
return processNode(content).trim();
|
||||
}
|
||||
export { convertProseMirrorToMarkdown } from "@docmost/prosemirror-markdown";
|
||||
|
||||
@@ -1,136 +1,15 @@
|
||||
/**
|
||||
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||
* Self-contained Docmost-flavoured Markdown document envelope (`docmost:meta` /
|
||||
* `docmost:comments` blocks).
|
||||
*
|
||||
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||
* a page through "download -> edit body -> re-upload":
|
||||
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||
* threads.
|
||||
*
|
||||
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||
*
|
||||
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||
* literal occurrence is left in the body untouched.
|
||||
*
|
||||
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||
* the file but are NOT pushed back to the server on import — only the inline
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
* #293 STEP 5: this envelope is now owned by the shared
|
||||
* `@docmost/prosemirror-markdown` package (the mcp copy was byte-identical to
|
||||
* the package's, so re-exporting is lossless). Kept as a thin shim so the
|
||||
* existing `./markdown-document.js` importers (client.ts, tests) do not move.
|
||||
*/
|
||||
|
||||
export interface DocmostMdMeta {
|
||||
version: number;
|
||||
pageId?: string;
|
||||
slugId?: string;
|
||||
title?: string;
|
||||
spaceId?: string;
|
||||
parentPageId?: string | null;
|
||||
}
|
||||
|
||||
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
||||
// the JSON text between the markers.
|
||||
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
|
||||
// rather than end-anchoring a single regex (which would mis-capture across a
|
||||
// literal opener that appears earlier in the body).
|
||||
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
||||
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export function serializeDocmostMarkdown(
|
||||
meta: DocmostMdMeta,
|
||||
body: string,
|
||||
comments: any[],
|
||||
): string {
|
||||
const metaJson = JSON.stringify(meta);
|
||||
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||
const trimmedBody = (body ?? "").trim();
|
||||
return (
|
||||
`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||
`${trimmedBody}\n\n` +
|
||||
`<!-- docmost:comments\n${commentsJson}\n-->\n`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
* corresponding value is returned as `null` and the whole input is treated as
|
||||
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export function parseDocmostMarkdown(full: string): {
|
||||
meta: DocmostMdMeta | null;
|
||||
body: string;
|
||||
comments: any[] | null;
|
||||
} {
|
||||
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||
|
||||
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||
let meta: DocmostMdMeta | null = null;
|
||||
let metaEnd = 0;
|
||||
const metaMatch = normalized.match(META_RE);
|
||||
if (metaMatch) {
|
||||
try {
|
||||
meta = JSON.parse(metaMatch[1]);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Invalid docmost:meta JSON block: ${
|
||||
e instanceof Error ? e.message : String(e)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
// Body starts right after the matched meta block.
|
||||
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||
}
|
||||
|
||||
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||
let lastOpenStart = -1;
|
||||
let lastOpenEnd = -1;
|
||||
let m: RegExpExecArray | null;
|
||||
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||
lastOpenStart = m.index;
|
||||
lastOpenEnd = m.index + m[0].length;
|
||||
}
|
||||
|
||||
let comments: any[] | null = null;
|
||||
let bodyEnd = normalized.length;
|
||||
if (lastOpenStart !== -1) {
|
||||
const rest = normalized.slice(lastOpenEnd);
|
||||
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||
if (close) {
|
||||
const jsonText = rest.slice(0, close.index);
|
||||
try {
|
||||
comments = JSON.parse(jsonText);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Invalid docmost:comments JSON block: ${
|
||||
e instanceof Error ? e.message : String(e)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||
}
|
||||
}
|
||||
|
||||
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||
return { meta, body, comments };
|
||||
}
|
||||
export {
|
||||
serializeDocmostMarkdown,
|
||||
parseDocmostMarkdown,
|
||||
serializeDocmostMarkdownBody,
|
||||
} from "@docmost/prosemirror-markdown";
|
||||
export type { DocmostMdMeta } from "@docmost/prosemirror-markdown";
|
||||
|
||||
@@ -127,36 +127,54 @@ test("markdownToProseMirror: an aligned GFM table maps header alignment", async
|
||||
});
|
||||
|
||||
// Comment-body data-loss guard (#228 review #4): markdownToProseMirror is reused
|
||||
// for COMMENT bodies (createComment/updateComment), so it must NOT canonicalize —
|
||||
// a comment may legitimately carry a standalone footnote definition with no
|
||||
// matching reference, and canonicalization would drop the whole list (the text
|
||||
// would vanish). The page-write variant DOES canonicalize.
|
||||
test("markdownToProseMirror (comment path) PRESERVES a reference-less footnote definition", async () => {
|
||||
// for COMMENT bodies (createComment/updateComment), so it must NOT canonicalize.
|
||||
// Under the #293 canon, footnotes are INLINE (`^[body]`), so a comment can no
|
||||
// longer carry a reference-less definition to be dropped — but the comment path
|
||||
// must still (a) leave a legacy reference-style `[^id]:` line as harmless literal
|
||||
// TEXT (never silently deleted) and (b) preserve an inline footnote it does
|
||||
// contain (no canonicalization stripping it). The page-write variant canonicalizes.
|
||||
test("markdownToProseMirror (comment path) keeps a legacy `[^id]:` line as literal text", async () => {
|
||||
// A reference-style `[^1]:` line is not canonical footnote syntax anymore, so it
|
||||
// is not parsed into a footnote node — but its TEXT must survive verbatim (no
|
||||
// data loss on the comment write path).
|
||||
const md = "A comment.\n\n[^1]: a standalone footnote definition";
|
||||
const doc = await markdownToProseMirror(md);
|
||||
const defs = findAll(doc, "footnoteDefinition");
|
||||
assert.equal(defs.length, 1, "the footnote definition must be preserved");
|
||||
assert.equal(
|
||||
findAll(doc, "footnoteDefinition").length,
|
||||
0,
|
||||
"reference-style line is not a footnote node",
|
||||
);
|
||||
assert.match(
|
||||
JSON.stringify(doc),
|
||||
/a standalone footnote definition/,
|
||||
"the definition text must survive the comment write path",
|
||||
"the text must survive the comment write path",
|
||||
);
|
||||
});
|
||||
|
||||
test("markdownToProseMirrorCanonical (page path) DROPS a reference-less footnote definition", async () => {
|
||||
// Same input through the PAGE variant: with no reference, the canonical doc has
|
||||
// no footnotesList (this is the page-side behavior the comment path must avoid).
|
||||
const md = "A page.\n\n[^1]: a standalone footnote definition";
|
||||
const doc = await markdownToProseMirrorCanonical(md);
|
||||
assert.equal(findAll(doc, "footnotesList").length, 0);
|
||||
assert.equal(findAll(doc, "footnoteDefinition").length, 0);
|
||||
test("markdownToProseMirror (comment path) PRESERVES an inline footnote (no canonicalization)", async () => {
|
||||
// An inline `^[body]` footnote in a comment imports to a real footnote node and
|
||||
// is NOT dropped: the comment path must never canonicalize away content.
|
||||
const md = "A comment.\n\n^[an inline footnote]";
|
||||
const doc = await markdownToProseMirror(md);
|
||||
assert.equal(findAll(doc, "footnoteDefinition").length, 1);
|
||||
assert.equal(findAll(doc, "footnotesList").length, 1);
|
||||
assert.match(JSON.stringify(doc), /an inline footnote/);
|
||||
});
|
||||
|
||||
test("markdownToProseMirrorCanonical still canonicalizes a real page footnote (order)", async () => {
|
||||
// Page path must STILL canonicalize: refs b,a -> definitions reorder to b,a.
|
||||
const md = "See[^b] then[^a].\n\n[^a]: alpha\n[^b]: bravo";
|
||||
test("markdownToProseMirrorCanonical (page path) yields a single reference-ordered list", async () => {
|
||||
// Page path produces the canonical footnote topology: one trailing
|
||||
// `footnotesList`, definitions in FIRST-REFERENCE order, ids assigned
|
||||
// sequentially. Inline `^[body]` footnotes carry the body at the reference
|
||||
// point, so the bottom list is inherently reference-ordered.
|
||||
const md = "See^[bravo] then^[alpha].";
|
||||
const doc = await markdownToProseMirrorCanonical(md);
|
||||
const defs = findAll(doc, "footnoteDefinition").map((d) => d.attrs.id);
|
||||
assert.deepEqual(defs, ["b", "a"]);
|
||||
const defs = findAll(doc, "footnoteDefinition");
|
||||
assert.deepEqual(
|
||||
defs.map((d) => d.attrs.id),
|
||||
["fn-1", "fn-2"],
|
||||
);
|
||||
assert.equal(findAll(doc, "footnotesList").length, 1);
|
||||
// Bodies stay in reference order (bravo referenced before alpha).
|
||||
assert.match(JSON.stringify(defs[0]), /bravo/);
|
||||
assert.match(JSON.stringify(defs[1]), /alpha/);
|
||||
});
|
||||
|
||||
@@ -210,13 +210,17 @@ test("drawio round-trips through export and import", () => {
|
||||
],
|
||||
};
|
||||
|
||||
// #293 canon #8: the media family (image/video/audio/drawio/excalidraw)
|
||||
// serializes to the markdown image form `` plus a trailing
|
||||
// discriminator comment `<!--drawio {json}-->` carrying the non-src attrs.
|
||||
const body = convertProseMirrorToMarkdown(doc);
|
||||
assert.match(body, /data-type="drawio"/);
|
||||
assert.match(body, /data-src="https:\/\/example\/diagram\.xml"/);
|
||||
assert.match(body, /!\[\]\(https:\/\/example\/diagram\.xml\)/);
|
||||
assert.match(body, /<!--drawio \{"attachmentId":"att-7"\}-->/);
|
||||
|
||||
return markdownToProseMirror(body).then((rebuilt) => {
|
||||
const diagram = find(rebuilt, "drawio");
|
||||
assert.ok(diagram, "expected a drawio node after import");
|
||||
assert.equal(diagram.attrs.src, "https://example/diagram.xml");
|
||||
assert.equal(diagram.attrs.attachmentId, "att-7");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -253,22 +253,23 @@ test("insertInlineFootnote: anchor in body BEFORE a nested list still inserts",
|
||||
assert.equal(findAll(r.doc, "footnotesList").length, 1);
|
||||
});
|
||||
|
||||
test("markdown import (page path): out-of-order definitions render as a reference-ordered list", async () => {
|
||||
// References appear b, a, c in the body; definitions are written in a, b, c
|
||||
// order (the import order). The PAGE import path (markdownToProseMirrorCanonical)
|
||||
// canonicalizes so the bottom list follows REFERENCE order — numbers read 1, 2,
|
||||
// 3 down the list. (The non-canonicalizing markdownToProseMirror, used for
|
||||
// comment bodies, would keep the import order; see collaboration.test.mjs.)
|
||||
const md = [
|
||||
"See[^b] then[^a] then[^c].",
|
||||
"",
|
||||
"[^a]: alpha",
|
||||
"[^b]: bravo",
|
||||
"[^c]: charlie",
|
||||
].join("\n");
|
||||
test("markdown import (page path): inline footnotes render as a reference-ordered list", async () => {
|
||||
// Inline `^[body]` footnotes carry their body at the reference point, so the
|
||||
// PAGE import path (markdownToProseMirrorCanonical) materializes the bottom
|
||||
// list in REFERENCE order — numbers read 1, 2, 3 down the list — with ids
|
||||
// assigned sequentially (fn-1, fn-2, fn-3).
|
||||
const md = "See^[bravo] then^[alpha] then^[charlie].";
|
||||
const json = await markdownToProseMirrorCanonical(md);
|
||||
assert.deepEqual(defIds(json), ["b", "a", "c"]);
|
||||
assert.deepEqual(defIds(json), ["fn-1", "fn-2", "fn-3"]);
|
||||
assert.equal(findAll(json, "footnotesList").length, 1);
|
||||
// Bodies materialize in reference order (bravo, alpha, charlie).
|
||||
const defsJson = JSON.stringify(findAll(json, "footnoteDefinition"));
|
||||
assert.ok(
|
||||
defsJson.indexOf("bravo") <
|
||||
defsJson.indexOf("alpha") &&
|
||||
defsJson.indexOf("alpha") < defsJson.indexOf("charlie"),
|
||||
"definitions follow reference order",
|
||||
);
|
||||
});
|
||||
|
||||
test("generateFootnoteId: valid uuidv7 shape (version 7, variant 8..b) and unique", () => {
|
||||
|
||||
@@ -49,15 +49,19 @@ const footnoteDoc = {
|
||||
],
|
||||
};
|
||||
|
||||
test("JSON -> Markdown emits pandoc footnote syntax", () => {
|
||||
test("JSON -> Markdown emits canonical inline footnote syntax (#293 canon #2)", () => {
|
||||
// Canonical markdown form is Pandoc/Obsidian INLINE footnotes: the note body is
|
||||
// written at the reference point as `^[body]`. There is NO `[^id]` reference
|
||||
// marker and NO trailing `[^id]: …` definition list; the schema id never
|
||||
// reaches markdown.
|
||||
const md = convertProseMirrorToMarkdown(footnoteDoc);
|
||||
assert.match(md, /\[\^fn1\]/);
|
||||
assert.match(md, /\[\^fn2\]/);
|
||||
assert.match(md, /\[\^fn1\]: First note\./);
|
||||
assert.match(md, /\[\^fn2\]: Second note\./);
|
||||
assert.match(md, /\^\[First note\.\]/);
|
||||
assert.match(md, /\^\[Second note\.\]/);
|
||||
assert.doesNotMatch(md, /\[\^/); // no reference-style markers
|
||||
assert.doesNotMatch(md, /^\[\^.+\]:/m); // no bottom definition lines
|
||||
});
|
||||
|
||||
test("Markdown -> JSON rebuilds footnote nodes", async () => {
|
||||
test("Markdown -> JSON rebuilds footnote nodes with sequential fn-N ids", async () => {
|
||||
const md = convertProseMirrorToMarkdown(footnoteDoc);
|
||||
const json = await markdownToProseMirror(md);
|
||||
|
||||
@@ -65,42 +69,39 @@ test("Markdown -> JSON rebuilds footnote nodes", async () => {
|
||||
const list = findAll(json, "footnotesList");
|
||||
const defs = findAll(json, "footnoteDefinition");
|
||||
|
||||
// Structure is preserved; ids are (re)assigned sequentially in first-reference
|
||||
// order by the importer (fn-1, fn-2, …) — the concrete id is never carried in
|
||||
// markdown, so it is derived on import.
|
||||
assert.equal(refs.length, 2);
|
||||
assert.deepEqual(
|
||||
refs.map((r) => r.attrs.id),
|
||||
["fn1", "fn2"],
|
||||
["fn-1", "fn-2"],
|
||||
);
|
||||
assert.equal(list.length, 1);
|
||||
assert.equal(defs.length, 2);
|
||||
assert.deepEqual(
|
||||
defs.map((d) => d.attrs.id),
|
||||
["fn1", "fn2"],
|
||||
["fn-1", "fn-2"],
|
||||
);
|
||||
});
|
||||
|
||||
test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
|
||||
test("JSON -> MD -> JSON is byte-stable and preserves footnote body text", async () => {
|
||||
const md = convertProseMirrorToMarkdown(footnoteDoc);
|
||||
const json = await markdownToProseMirror(md);
|
||||
const md2 = convertProseMirrorToMarkdown(json);
|
||||
|
||||
// The second markdown serialization carries the same markers + definitions.
|
||||
assert.match(md2, /\[\^fn1\]/);
|
||||
assert.match(md2, /\[\^fn2\]/);
|
||||
assert.match(md2, /\[\^fn1\]: First note\./);
|
||||
assert.match(md2, /\[\^fn2\]: Second note\./);
|
||||
// The round trip is byte-stable (ids are not written to markdown, so the
|
||||
// concrete import id cannot perturb the output) and the bodies survive.
|
||||
assert.equal(md2, md);
|
||||
assert.match(md2, /\^\[First note\.\]/);
|
||||
assert.match(md2, /\^\[Second note\.\]/);
|
||||
});
|
||||
|
||||
test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => {
|
||||
// Reuse semantics: many `[^d]` references + several `[^d]:` definitions import
|
||||
// as ONE footnote — the references all keep id "d" (reuse), and only the FIRST
|
||||
// definition is kept (first-wins). Deterministic and stable across re-imports.
|
||||
const md = [
|
||||
"See[^d] one[^d] two[^d].",
|
||||
"",
|
||||
"[^d]: first",
|
||||
"[^d]: second",
|
||||
"[^d]: third",
|
||||
].join("\n");
|
||||
test("identical footnote bodies MERGE to one shared definition (#293 canon #2)", async () => {
|
||||
// Two references whose bodies are byte-identical import as ONE definition
|
||||
// shared by both references (dedup on the exact body text). Two DIFFERENT
|
||||
// bodies stay distinct. Deterministic and stable across re-imports.
|
||||
const md = "See^[same] and^[same], but^[other].";
|
||||
|
||||
const idsOf = async () => {
|
||||
const json = await markdownToProseMirror(md);
|
||||
@@ -120,11 +121,11 @@ test("repeated references REUSE one footnote; duplicate definitions are first-wi
|
||||
|
||||
// Stable across runs.
|
||||
assert.deepEqual(a, b);
|
||||
// Reuse: all three reference markers stay "d".
|
||||
assert.deepEqual(a.refs, ["d", "d", "d"]);
|
||||
// First-wins: a single definition "d" with the FIRST text.
|
||||
assert.deepEqual(a.defIds, ["d"]);
|
||||
assert.equal(a.defText, "first");
|
||||
// Merge: the two "same" references share fn-1; the "other" reference is fn-2.
|
||||
assert.deepEqual(a.refs, ["fn-1", "fn-1", "fn-2"]);
|
||||
// One definition per unique body, in first-reference order.
|
||||
assert.deepEqual(a.defIds, ["fn-1", "fn-2"]);
|
||||
assert.equal(a.defText, "same|other");
|
||||
});
|
||||
|
||||
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
|
||||
|
||||
@@ -70,7 +70,7 @@ test("hardBreak -> trailing two-spaces+newline", () => {
|
||||
assert.equal(convertProseMirrorToMarkdown(input), "line1 \nline2");
|
||||
});
|
||||
|
||||
test("table cell with two block children joined by a space (and a pipe escaped)", () => {
|
||||
test("table cell with two block children falls back to a raw HTML table", () => {
|
||||
const input = doc({
|
||||
type: "table",
|
||||
content: [
|
||||
@@ -86,11 +86,12 @@ test("table cell with two block children joined by a space (and a pipe escaped)"
|
||||
],
|
||||
});
|
||||
|
||||
// Single-column header row + separator. The cell joins its two paragraphs
|
||||
// with a space ("a|b c") then escapes the pipe -> "a\|b c".
|
||||
// A pipe-table cell cannot represent two block children, so the canonical
|
||||
// converter emits the whole table as raw HTML (lossless) rather than lossily
|
||||
// flattening the paragraphs into one cell.
|
||||
assert.equal(
|
||||
convertProseMirrorToMarkdown(input),
|
||||
"| a\\|b c |\n| --- |",
|
||||
"<table><tbody><tr><td><p>a|b</p><p>c</p></td></tr></tbody></table>",
|
||||
);
|
||||
});
|
||||
|
||||
@@ -108,20 +109,20 @@ test("code block trailing newline trimmed", () => {
|
||||
);
|
||||
});
|
||||
|
||||
test("textAlign value: delimiting double-quote escaped (attribute-safe, idempotent; < > left literal/inert)", () => {
|
||||
test("textAlign is carried in a trailing attached-comment directive (JSON-encoded, safe)", () => {
|
||||
const input = doc({
|
||||
type: "paragraph",
|
||||
attrs: { textAlign: 'right"><b' },
|
||||
content: [text("body")],
|
||||
});
|
||||
|
||||
// Attribute values escape only & and " so the value cannot break out of the
|
||||
// quoted attribute. < and > are left literal: parse5/jsdom does NOT decode
|
||||
// </> inside attribute values, so escaping them would corrupt the value
|
||||
// and accumulate on every round-trip. The literal < > are inert inside quotes.
|
||||
// #293 canon #9: paragraph textAlign has no native markdown syntax, so it is
|
||||
// attached as a trailing `<!--attrs {json}-->` comment on the block. The value
|
||||
// is JSON-encoded, so a hostile value (`"`, `<`, `>`) is carried verbatim and
|
||||
// inert — it cannot break out of the comment.
|
||||
assert.equal(
|
||||
convertProseMirrorToMarkdown(input),
|
||||
'<div align="right"><b">body</div>',
|
||||
'body <!--attrs {"textAlign":"right\\"><b"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -150,10 +151,10 @@ test("empty task item still emits its marker", () => {
|
||||
assert.equal(convertProseMirrorToMarkdown(input), "- [ ]\n- [x]");
|
||||
});
|
||||
|
||||
// Image captions (issue #221). An image WITHOUT a caption stays the lossy-free
|
||||
// ``; WITH a caption it is emitted as a raw <img data-caption>
|
||||
// wrapped in a block <div> (symmetric to video) so the round-trip md -> html ->
|
||||
// json restores the caption via the image extension's parseHTML.
|
||||
// Image captions (issue #221 / #293 canon #8). An image WITHOUT a caption stays
|
||||
// the plain ``; WITH a caption (or any other non-src attr) the extra
|
||||
// attrs ride in a trailing `<!--img {json}-->` discriminator comment on the
|
||||
// markdown image form, so the round-trip md -> json restores them.
|
||||
test("image without a caption emits plain ", () => {
|
||||
const input = doc({
|
||||
type: "image",
|
||||
@@ -162,24 +163,24 @@ test("image without a caption emits plain ", () => {
|
||||
assert.equal(convertProseMirrorToMarkdown(input), "");
|
||||
});
|
||||
|
||||
test("image with a caption emits a raw <img data-caption> in a block div", () => {
|
||||
test("image with a caption emits  plus an <!--img--> directive", () => {
|
||||
const input = doc({
|
||||
type: "image",
|
||||
attrs: { src: "/files/a.png", alt: "cat", caption: "A grey cat" },
|
||||
});
|
||||
assert.equal(
|
||||
convertProseMirrorToMarkdown(input),
|
||||
'<div><img src="/files/a.png" alt="cat" data-caption="A grey cat"></div>',
|
||||
' <!--img {"caption":"A grey cat"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
test("image caption escapes & and \" in the data-caption attribute", () => {
|
||||
test("image caption is JSON-encoded in the <!--img--> directive (& and \" safe)", () => {
|
||||
const input = doc({
|
||||
type: "image",
|
||||
attrs: { src: "/files/a.png", caption: 'Tom & "Jerry"' },
|
||||
});
|
||||
assert.equal(
|
||||
convertProseMirrorToMarkdown(input),
|
||||
'<div><img src="/files/a.png" data-caption="Tom & "Jerry""></div>',
|
||||
' <!--img {"caption":"Tom & \\"Jerry\\""}-->',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -55,8 +55,10 @@ test("round-trip: drawio diagram survives with src, title, dimensions, align, at
|
||||
},
|
||||
"drawio",
|
||||
);
|
||||
// The converter must emit the schema-matching div[data-type="drawio"].
|
||||
assert.match(md, /data-type="drawio"/);
|
||||
// #293 canon #8: the media family serializes to the markdown image form plus a
|
||||
// trailing discriminator comment carrying the non-src attrs.
|
||||
assert.match(md, /^!\[\]\(\/api\/files\/d\.drawio\)/);
|
||||
assert.match(md, /<!--drawio \{.*"attachmentId":"dz1".*\}-->/);
|
||||
assert.equal(found.length, 1, "drawio node must survive the round-trip");
|
||||
const a = found[0].attrs;
|
||||
assert.equal(a.src, "/api/files/d.drawio");
|
||||
@@ -123,13 +125,19 @@ test("round-trip: pdf preserves width/height (standard attrs) plus name", async
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Escaping: a src containing a double quote must survive the attribute-quoted
|
||||
// HTML emission (escapeAttr) and re-parse to the exact original value, with no
|
||||
// node loss and no HTML injection.
|
||||
// Escaping: a src containing a double quote must survive the markdown image form
|
||||
// with no node loss and no injection. In the `` link the URL is
|
||||
// normalized (a raw `"` percent-encodes to `%22`) on import — a semantically
|
||||
// equivalent, IDEMPOTENT normalization (it does not drift on further round
|
||||
// trips), not data loss.
|
||||
// ---------------------------------------------------------------------------
|
||||
test("round-trip: a src containing a double quote is escaped and recovered intact", async () => {
|
||||
test("round-trip: a src containing a double quote is normalized (idempotent) and survives", async () => {
|
||||
const tricky = 'https://e.com/x?a="b"&c=1';
|
||||
const normalized = "https://e.com/x?a=%22b%22&c=1";
|
||||
const { found } = await roundtrip({ type: "youtube", attrs: { src: tricky } }, "youtube");
|
||||
assert.equal(found.length, 1, "node must survive a quote-bearing src");
|
||||
assert.equal(found[0].attrs.src, tricky, "the exact src is recovered");
|
||||
assert.equal(found[0].attrs.src, normalized, "the quote is percent-encoded in the URL");
|
||||
// Idempotent: a second round trip from the normalized node is byte-stable.
|
||||
const again = await roundtrip({ type: "youtube", attrs: { src: normalized } }, "youtube");
|
||||
assert.equal(again.found[0].attrs.src, normalized);
|
||||
});
|
||||
|
||||
@@ -25,19 +25,15 @@ const findAll = (node, type, acc = []) => {
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DATA-LOSS: atom block nodes with no converter case serialize to "" and the
|
||||
// whole block disappears from markdown export.
|
||||
//
|
||||
// markdown-converter.ts has a `default` branch (~line 601) that renders a node
|
||||
// as `nodeContent.map(processNode).join("")`. For a leaf/atom node (no
|
||||
// content) that yields "" — so the node (and ALL its attributes) is dropped.
|
||||
// `htmlEmbed` and `pageBreak` are both block atoms in docmost-schema.ts with no
|
||||
// case in the converter, so they vanish on markdown export.
|
||||
//
|
||||
// These tests assert the CURRENT (buggy) behavior and name it, so that when a
|
||||
// converter case is added the failing assertion flags the test for an update.
|
||||
// #293 canon: atom block nodes with no NATIVE markdown syntax are preserved via
|
||||
// dedicated converter forms (they used to serialize to "" and vanish — the old
|
||||
// mcp converter's data-loss gap, now fixed by consuming the shared package):
|
||||
// - htmlEmbed -> a raw `<div data-type="htmlEmbed" data-source=… data-height=…>`
|
||||
// block (source base64-encoded so arbitrary HTML is inert);
|
||||
// - pageBreak -> a standalone `<!--pagebreak-->` machinery comment (#5).
|
||||
// Both survive markdown export AND a full PM -> markdown -> PM round-trip.
|
||||
// ---------------------------------------------------------------------------
|
||||
test("DATA-LOSS: an htmlEmbed block is silently dropped from markdown export (no converter case)", () => {
|
||||
test("htmlEmbed block survives markdown export (source + height preserved)", () => {
|
||||
const input = doc(
|
||||
para(text("before")),
|
||||
{ type: "htmlEmbed", attrs: { source: "<b>hi</b>", height: 200 } },
|
||||
@@ -45,32 +41,31 @@ test("DATA-LOSS: an htmlEmbed block is silently dropped from markdown export (no
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(input);
|
||||
|
||||
// BUG: the htmlEmbed block, including its `source` and `height` attrs, is
|
||||
// gone — only the surrounding paragraphs survive. If a future fix adds an
|
||||
// htmlEmbed case, update this test to assert the block (or a placeholder)
|
||||
// survives instead.
|
||||
assert.equal(md, "before\n\n\n\nafter", "htmlEmbed currently disappears");
|
||||
assert.ok(!md.includes("<b>hi</b>"), "the embed source is NOT preserved (data-loss)");
|
||||
assert.match(md, /data-type="htmlEmbed"/);
|
||||
assert.match(md, /data-height="200"/);
|
||||
// The raw source is base64-encoded in data-source (not emitted verbatim), so
|
||||
// the surrounding markdown cannot be corrupted by hostile embed HTML.
|
||||
assert.match(md, /data-source="[^"]+"/);
|
||||
assert.ok(md.includes("before") && md.includes("after"));
|
||||
});
|
||||
|
||||
test("DATA-LOSS: an htmlEmbed does NOT round-trip (PM -> markdown -> PM loses the node)", async () => {
|
||||
test("htmlEmbed round-trips PM -> markdown -> PM (node + source recovered)", async () => {
|
||||
const input = doc(
|
||||
para(text("x")),
|
||||
{ type: "htmlEmbed", attrs: { source: "<i>raw</i>", height: 120 } },
|
||||
);
|
||||
const out = await markdownToProseMirror(convertProseMirrorToMarkdown(input));
|
||||
assert.equal(
|
||||
findAll(out, "htmlEmbed").length,
|
||||
0,
|
||||
"htmlEmbed is lost across a markdown round-trip (known data-loss gap)",
|
||||
);
|
||||
const embeds = findAll(out, "htmlEmbed");
|
||||
assert.equal(embeds.length, 1, "htmlEmbed survives the markdown round-trip");
|
||||
assert.equal(embeds[0].attrs.source, "<i>raw</i>", "source recovered intact");
|
||||
});
|
||||
|
||||
test("DATA-LOSS: a pageBreak block is silently dropped from markdown export (no converter case)", () => {
|
||||
test("pageBreak block survives markdown export and round-trips", async () => {
|
||||
const input = doc(para(text("a")), { type: "pageBreak" }, para(text("b")));
|
||||
const md = convertProseMirrorToMarkdown(input);
|
||||
// BUG: pageBreak (a block atom with no converter case) disappears.
|
||||
assert.equal(md, "a\n\n\n\nb", "pageBreak currently disappears");
|
||||
assert.match(md, /<!--pagebreak-->/);
|
||||
const out = await markdownToProseMirror(md);
|
||||
assert.equal(findAll(out, "pageBreak").length, 1);
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -165,3 +165,67 @@ test("import: a colored mention span keeps the mention node", async () => {
|
||||
const out = await markdownToProseMirror('<span data-type="mention" data-id="u1" data-label="Alice" style="color: blue">@Alice</span>');
|
||||
assert.equal(findNodes(out, "mention").length, 1, "mention node must survive a colored span");
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #293 STEP 5 canon safety net. These assert STRUCTURE/content preservation
|
||||
// (format-agnostic: the node/mark and its value survive PM -> markdown -> PM,
|
||||
// and the markdown is idempotent), NOT the exact markdown bytes — so they stay
|
||||
// valid regardless of the concrete canonical spelling. They cover the node/mark
|
||||
// types whose canonical markdown form changed in #293 (highlight-without-color,
|
||||
// textAlign, subpages, inline footnotes) and complement the existing math /
|
||||
// media / mention / column round-trips above.
|
||||
// ---------------------------------------------------------------------------
|
||||
test("round-trip: highlight WITHOUT a color survives as a highlight mark (==)", async () => {
|
||||
const input = doc(para(text("hi", [{ type: "highlight", attrs: { color: null } }])));
|
||||
const md = convertProseMirrorToMarkdown(input);
|
||||
const out = await roundtrip(input);
|
||||
const hit = findNodes(out, "text").find(
|
||||
(n) => n.text === "hi" && (n.marks || []).some((m) => m.type === "highlight"),
|
||||
);
|
||||
assert.ok(hit, "the highlight mark must survive a color-less round-trip");
|
||||
// Idempotent markdown.
|
||||
assert.equal(convertProseMirrorToMarkdown(out), md);
|
||||
});
|
||||
|
||||
test("round-trip: paragraph textAlign survives via the attached-comment directive", async () => {
|
||||
const input = doc({
|
||||
type: "paragraph",
|
||||
attrs: { textAlign: "center" },
|
||||
content: [text("mid")],
|
||||
});
|
||||
const md = convertProseMirrorToMarkdown(input);
|
||||
const out = await roundtrip(input);
|
||||
const p = findNodes(out, "paragraph").find((n) => n.attrs && n.attrs.textAlign === "center");
|
||||
assert.ok(p, "textAlign must be restored on the paragraph");
|
||||
assert.equal(convertProseMirrorToMarkdown(out), md, "textAlign round-trip is idempotent");
|
||||
});
|
||||
|
||||
test("round-trip: subpages atom survives", async () => {
|
||||
const input = doc({ type: "subpages" });
|
||||
const out = await roundtrip(input);
|
||||
assert.equal(findNodes(out, "subpages").length, 1, "subpages node must survive");
|
||||
});
|
||||
|
||||
test("round-trip: inline footnote survives with body text (canonical structure)", async () => {
|
||||
const input = doc(
|
||||
para(text("Claim"), { type: "footnoteReference", attrs: { id: "fnA" } }),
|
||||
{
|
||||
type: "footnotesList",
|
||||
content: [
|
||||
{
|
||||
type: "footnoteDefinition",
|
||||
attrs: { id: "fnA" },
|
||||
content: [para(text("the evidence"))],
|
||||
},
|
||||
],
|
||||
},
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(input);
|
||||
const out = await roundtrip(input);
|
||||
assert.equal(findNodes(out, "footnoteReference").length, 1);
|
||||
assert.equal(findNodes(out, "footnotesList").length, 1);
|
||||
assert.equal(findNodes(out, "footnoteDefinition").length, 1);
|
||||
assert.match(JSON.stringify(out), /the evidence/, "footnote body survives");
|
||||
// Byte-stable (the schema id is never written to markdown).
|
||||
assert.equal(convertProseMirrorToMarkdown(out), md);
|
||||
});
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"name": "@docmost/prosemirror-markdown",
|
||||
"version": "0.1.0",
|
||||
"description": "Pure ProseMirror <-> Markdown converter + schema mirror (headless, framework-free).",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"main": "./build/index.js",
|
||||
"types": "./build/index.d.ts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./build/index.d.ts",
|
||||
"default": "./build/index.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"watch": "tsc --watch",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest"
|
||||
},
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@tiptap/core": "3.20.4",
|
||||
"@tiptap/extension-highlight": "3.20.4",
|
||||
"@tiptap/extension-image": "3.20.4",
|
||||
"@tiptap/extension-subscript": "3.20.4",
|
||||
"@tiptap/extension-superscript": "3.20.4",
|
||||
"@tiptap/extension-task-item": "3.20.4",
|
||||
"@tiptap/extension-task-list": "3.20.4",
|
||||
"@tiptap/html": "3.20.4",
|
||||
"@tiptap/pm": "3.20.4",
|
||||
"@tiptap/starter-kit": "3.20.4",
|
||||
"jsdom": "25.0.0",
|
||||
"marked": "17.0.5",
|
||||
"zod": "4.3.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@docmost/editor-ext": "workspace:*",
|
||||
"@types/jsdom": "^21.1.7",
|
||||
"@types/node": "^20.0.0",
|
||||
"fast-check": "^4.8.0",
|
||||
"typescript": "^5.0.0",
|
||||
"vitest": "4.1.6"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
/**
|
||||
* Public surface of `@docmost/prosemirror-markdown`.
|
||||
*
|
||||
* A headless, framework-free ProseMirror <-> Markdown converter plus the
|
||||
* Docmost schema mirror. Everything lives under `lib/` (the converter core);
|
||||
* this top-level barrel simply re-exports that surface so the package entry is
|
||||
* the converter surface.
|
||||
*/
|
||||
export * from "./lib/index.js";
|
||||
@@ -0,0 +1,124 @@
|
||||
/**
|
||||
* Attached-comment convention (#293 canon).
|
||||
*
|
||||
* Some block-level attributes have no native markdown syntax (paragraph/heading
|
||||
* `textAlign` — #9; image/media attrs — #4/#8). Rather than HTML-wrapping the
|
||||
* whole block (the old `<div align>` / `<p style>` forms, which the maintainer
|
||||
* had to patch repeatedly and which did not round-trip cleanly), we ATTACH a
|
||||
* compact HTML comment at the END of the block's rendered line:
|
||||
*
|
||||
* Some paragraph text <!--attrs {"textAlign":"center"}-->
|
||||
*
|
||||
* The comment is invisible in any markdown renderer and is dropped by the
|
||||
* DOM/generateJSON import stage, so it can never leak into the document body.
|
||||
* The importer intercepts it BEFORE that stage (see markdown-to-prosemirror's
|
||||
* applyAttachedComments) and re-applies the encoded attributes to the node.
|
||||
*
|
||||
* This module holds the two PURE, reusable primitives of the convention so the
|
||||
* serializer, the parser, and future decisions (#4 image, #8 media) share ONE
|
||||
* implementation:
|
||||
* - `attachedCommentFor(name, json)` — build the comment string.
|
||||
* - `parseAttachedComment(data)` — parse a comment node's data back.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A parsed attached comment: the leading `name` token and the decoded JSON
|
||||
* object payload (empty object when the comment carried no JSON body).
|
||||
*/
|
||||
export interface AttachedComment {
|
||||
name: string;
|
||||
attrs: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grammar of an attached comment's DATA (the text between `<!--` and `-->`):
|
||||
* a leading name token (`attrs`, `img`, …) optionally followed by whitespace
|
||||
* and a single JSON object. The name deliberately does NOT allow `:` so the
|
||||
* file-level envelope comments (`docmost:meta` / `docmost:comments`) never match
|
||||
* and stay inert here.
|
||||
*/
|
||||
const ATTACHED_COMMENT_RE = /^\s*([A-Za-z][\w-]*)(?:\s+(\{[\s\S]*\}))?\s*$/;
|
||||
|
||||
/**
|
||||
* Build an attached HTML comment `<!--name {compact-json}-->` for `json`.
|
||||
*
|
||||
* The JSON is emitted compactly (no spaces) via `JSON.stringify`. A string value
|
||||
* may legitimately contain two consecutive hyphens `--`, which would prematurely
|
||||
* close the HTML comment (`-->`). We defuse that WITHOUT changing the decoded
|
||||
* value: each hyphen of every `--` pair is rewritten as the JSON unicode escape
|
||||
* `-`, so `JSON.parse` on the reading side restores the exact original
|
||||
* hyphens. `--` can only occur inside a JSON string (structural JSON never
|
||||
* produces it), so a blanket replace over the stringified payload is safe.
|
||||
*/
|
||||
export function attachedCommentFor(name: string, json: object): string {
|
||||
return `<!--${name} ${escapeCommentJson(json)}-->`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compactly stringify `json` and defuse any `--` pair so the payload can never
|
||||
* close the HTML comment early. Shared by `attachedCommentFor` (attached form)
|
||||
* and `standaloneCommentFor` (standalone form) so both stay in sync.
|
||||
*
|
||||
* A string value may legitimately contain two consecutive hyphens `--`, which
|
||||
* would prematurely close the comment (`-->`). We defuse that WITHOUT changing
|
||||
* the decoded value: each hyphen of every `--` pair is rewritten as the JSON
|
||||
* unicode escape `-`, so `JSON.parse` on the reading side restores the exact
|
||||
* original hyphens. `--` can only occur inside a JSON string (structural JSON
|
||||
* never produces it), so a blanket replace over the stringified payload is safe.
|
||||
* Scanning left-to-right and replacing each `--` handles odd runs too (`---` ->
|
||||
* two escapes + one bare `-`, still `---` after JSON.parse).
|
||||
*/
|
||||
function escapeCommentJson(json: object): string {
|
||||
return JSON.stringify(json).replace(/--/g, "\\u002d\\u002d");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a STANDALONE machinery comment (#293 canon #5) for a block node that
|
||||
* lives on its OWN line, e.g. `<!--pagebreak-->` or `<!--subpages-->`.
|
||||
*
|
||||
* Grammar is identical to the attached form (`<!--name {JSON?}-->`), but the
|
||||
* JSON body is emitted ONLY when there are real attributes to carry:
|
||||
* - `standaloneCommentFor("pagebreak")` -> `<!--pagebreak-->`
|
||||
* - `standaloneCommentFor("subpages")` -> `<!--subpages-->`
|
||||
* - `standaloneCommentFor("subpages", {recursive:true})`
|
||||
* -> `<!--subpages {"recursive":true}-->`
|
||||
*
|
||||
* When `attrs` is undefined/null/empty-object the comment is name-only (no JSON,
|
||||
* which parses back to default attrs). Otherwise the JSON body is emitted with
|
||||
* the SAME `--`-escaping as `attachedCommentFor` (via `escapeCommentJson`), so
|
||||
* the standalone and attached encoders can never diverge.
|
||||
*/
|
||||
export function standaloneCommentFor(name: string, attrs?: object | null): string {
|
||||
if (!attrs || Object.keys(attrs).length === 0) {
|
||||
return `<!--${name}-->`;
|
||||
}
|
||||
return `<!--${name} ${escapeCommentJson(attrs)}-->`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the DATA of a comment node into `{ name, attrs }`, or `null` when it is
|
||||
* not a well-formed attached comment.
|
||||
*
|
||||
* Fail-open by design (maintainer spec): a comment whose name token is missing,
|
||||
* whose JSON body is malformed, or whose body is not a plain object returns
|
||||
* `null` so the caller ignores it and keeps default attributes. Unknown keys in
|
||||
* a valid object are preserved here and filtered by the caller.
|
||||
*/
|
||||
export function parseAttachedComment(data: string): AttachedComment | null {
|
||||
const m = ATTACHED_COMMENT_RE.exec(data);
|
||||
if (!m) return null;
|
||||
const name = m[1];
|
||||
if (m[2] === undefined) {
|
||||
// Name-only comment (no JSON body): a valid attached marker with no attrs.
|
||||
return { name, attrs: {} };
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(m[2]);
|
||||
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
||||
return { name, attrs: parsed as Record<string, unknown> };
|
||||
}
|
||||
return null; // fail-open: payload is not a plain object
|
||||
} catch {
|
||||
return null; // fail-open: malformed JSON -> ignore the comment
|
||||
}
|
||||
}
|
||||
+8
-3
@@ -45,9 +45,11 @@
|
||||
* converter coercing numeric `width`/`height` to strings, which is outside
|
||||
* canonicalize's scope.
|
||||
*
|
||||
* NOTE: `image` has NO non-null align default — its `align` defaults to `null`
|
||||
* (docmost-schema.ts L174), so it is already handled by the null-drop rule and
|
||||
* is intentionally NOT listed here.
|
||||
* NOTE: `image` align now defaults to `"center"` — unified with editor-ext
|
||||
* (#293 canon #4). It is listed below so a canonical image drops `align` when
|
||||
* it equals "center" (absent ≡ default), exactly like the diagram/media nodes.
|
||||
* A null align is likewise dropped by the null-drop rule and re-imports as the
|
||||
* "center" default, so bare `` images stay canonically clean.
|
||||
*/
|
||||
const KNOWN_DEFAULTS: Record<string, Record<string, unknown>> = {
|
||||
// mark types
|
||||
@@ -62,6 +64,9 @@ const KNOWN_DEFAULTS: Record<string, Record<string, unknown>> = {
|
||||
orderedList: {
|
||||
start: 1,
|
||||
},
|
||||
image: {
|
||||
align: "center",
|
||||
},
|
||||
drawio: {
|
||||
align: "center",
|
||||
},
|
||||
+16
-1
@@ -256,7 +256,22 @@ const DocmostAttributes = Extension.create({
|
||||
{
|
||||
types: ["image"],
|
||||
attributes: {
|
||||
align: { default: null },
|
||||
// #293 canon #4: the image `align` default is unified to "center"
|
||||
// (matching editor-ext, the source of real user documents) so an
|
||||
// editor-authored image — which is always align="center" — serializes
|
||||
// as the clean `` form with NO attached comment, and only a
|
||||
// genuinely non-default alignment (left/right) emits an `<!--img-->`
|
||||
// comment. The DOM attribute name stays `align` (imageToHtml already
|
||||
// round-trips it as align="…"); only the DEFAULT value changed from
|
||||
// null to "center". parseHTML reads the `align` attribute so a bare
|
||||
// <img> with no align falls back to "center", and <img align="left">
|
||||
// reads "left".
|
||||
align: {
|
||||
default: "center",
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("align") || "center",
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.align && attrs.align !== "center" ? { align: attrs.align } : {},
|
||||
},
|
||||
// imageToHtml emits these Docmost-specific image attrs as data-*; map
|
||||
// them back explicitly so a top-level image (or one inside a column)
|
||||
// round-trips them. Without a parseHTML the default reads the bare
|
||||
@@ -0,0 +1,61 @@
|
||||
/**
|
||||
* #293 canon #2: inline footnotes `^[text]`.
|
||||
*
|
||||
* Shared, side-effect-free helpers used by BOTH the serializer
|
||||
* (markdown-converter.ts) and the importer (markdown-to-prosemirror.ts) so the
|
||||
* two directions cannot drift.
|
||||
*
|
||||
* The canonical markdown form is Pandoc/Obsidian inline footnotes: the note body
|
||||
* is written AT the reference point as `^[body]`; there is no separate
|
||||
* `[^id]: …` definition line and no bottom `<section>` list in the markdown. On
|
||||
* import the body is re-assembled into the schema's doc-level
|
||||
* `footnotesList`/`footnoteDefinition` so the editor sees the usual three-node
|
||||
* footnote model, while identical bodies MERGE to a single definition shared by
|
||||
* every reference. Ids are assigned by the importer's assembleFootnotes pass
|
||||
* (dedup on the EXACT body text -> sequential `fn-N`), NOT derived from a hash,
|
||||
* so two DIFFERENT bodies can never collide onto one definition (F1). The id is
|
||||
* never written to markdown (`^[body]` carries only text), so the round trip
|
||||
* stays byte-stable regardless of the concrete id.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Split an ENCODED footnote body (the inner captured between `^[` and its
|
||||
* matching `]`, or the value of a `data-fn-text` attribute) into its paragraph
|
||||
* markdown strings.
|
||||
*
|
||||
* Paragraph boundaries are the two-character literal separator `\n` (backslash +
|
||||
* n); a REAL backslash-n in the body was encoded as `\\n` (an escaped backslash
|
||||
* followed by n) by the serializer, so it must NOT split. The scan therefore
|
||||
* treats any `\<char>` as an escaped pair kept verbatim (so `\\` `n` stays a
|
||||
* literal backslash-then-n and the trailing `n` is plain), and only an
|
||||
* UNescaped `\n` is a separator. Every other backslash escape (`\=`, `\$`,
|
||||
* `\[`, …) is preserved untouched so the per-paragraph `parseInline` decodes it.
|
||||
*/
|
||||
export function splitFootnoteParagraphs(encoded: string): string[] {
|
||||
const paragraphs: string[] = [];
|
||||
let current = "";
|
||||
let i = 0;
|
||||
while (i < encoded.length) {
|
||||
const c = encoded[i];
|
||||
if (c === "\\" && i + 1 < encoded.length) {
|
||||
const next = encoded[i + 1];
|
||||
if (next === "n") {
|
||||
// Unescaped backslash-n: a paragraph separator.
|
||||
paragraphs.push(current);
|
||||
current = "";
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
// Any other escaped pair (including `\\`) is kept verbatim; consuming
|
||||
// BOTH chars is what makes an encoded real `\n` (`\\n`) safe — the `\\`
|
||||
// pair is taken here, leaving the following `n` as an ordinary literal.
|
||||
current += c + next;
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
current += c;
|
||||
i++;
|
||||
}
|
||||
paragraphs.push(current);
|
||||
return paragraphs;
|
||||
}
|
||||
@@ -19,6 +19,25 @@ export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
|
||||
|
||||
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
|
||||
|
||||
// The Docmost tiptap schema mirror. Exposed so consumers (and the sync
|
||||
// engine's schema-validity regression tests) can build the exact ProseMirror
|
||||
// schema the converter targets.
|
||||
export { docmostExtensions } from "./docmost-schema.js";
|
||||
|
||||
// Schema-adjacent sanitizers used by consumers (mcp) so the single canonical,
|
||||
// alias-aware / allowlist implementations live ONLY here (no drifting copies).
|
||||
export { clampCalloutType, sanitizeCssColor } from "./docmost-schema.js";
|
||||
|
||||
// Attached-comment convention (#293 canon #9/#4/#8): the reusable primitives
|
||||
// the serializer/parser use to encode attrs that have no native markdown syntax
|
||||
// as trailing `<!--name {json}-->` comments.
|
||||
export {
|
||||
attachedCommentFor,
|
||||
standaloneCommentFor,
|
||||
parseAttachedComment,
|
||||
} from "./attached-comment.js";
|
||||
export type { AttachedComment } from "./attached-comment.js";
|
||||
|
||||
export {
|
||||
canonicalizeContent,
|
||||
docsCanonicallyEqual,
|
||||
+646
-281
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,78 @@
|
||||
/**
|
||||
* Shared inline-math boundary rule (#293 canon #6).
|
||||
*
|
||||
* Pandoc's inline-math rule lives here because it is used in TWO directions that
|
||||
* MUST agree byte-for-byte on which `$…$` spans are math:
|
||||
*
|
||||
* - the IMPORT tokenizer (markdown-to-prosemirror.ts) that turns `$LaTeX$`
|
||||
* into a `mathInline` node, and
|
||||
* - the EXPORT escaper (markdown-converter.ts) that backslash-escapes a
|
||||
* would-be-math `$…$` span sitting in PROSE text so it re-imports as literal
|
||||
* text instead of silently materializing a phantom math node.
|
||||
*
|
||||
* Defining the rule ONCE guarantees the two directions never drift: a span the
|
||||
* tokenizer would match is EXACTLY a span the escaper neutralizes, so a prose
|
||||
* `$x$` round-trips as literal text and math `$x^2$` round-trips as math.
|
||||
*
|
||||
* The rule (currency-safe, from pandoc): an opening `$` is NOT followed by
|
||||
* whitespace; the closing `$` is NOT preceded by whitespace AND NOT immediately
|
||||
* followed by a digit; the inner run is non-empty, single-line, and may embed an
|
||||
* escaped `\$` (which never counts as the closer). Under this rule `$5`,
|
||||
* `$5 and $10`, `price is $5`, `a $5 b $6 c` all stay literal (no VALID closing
|
||||
* `$` exists — the `$` before a space-preceded amount fails the "not preceded by
|
||||
* whitespace" test, and a lone `$` has no closer), while `$x^2$` is math.
|
||||
*/
|
||||
|
||||
// Core pattern (unanchored). Escaping note for the string form:
|
||||
// \\$ -> a literal `$`
|
||||
// (?!\s) -> opening `$` NOT followed by whitespace (also forces a
|
||||
// non-empty inner: the next char must exist and be non-space)
|
||||
// (?:\\\\\\$|[^$\n])+? -> inner: shortest run of either an escaped `\$`
|
||||
// (consumed as a unit so it is never the closer) or any char
|
||||
// that is neither an unescaped `$` nor a newline
|
||||
// (?<!\s) -> the char before the closing `$` is NOT whitespace
|
||||
// \\$ -> closing `$`
|
||||
// (?![0-9]) -> closing `$` NOT immediately followed by a digit (currency)
|
||||
export const INLINE_MATH_SOURCE =
|
||||
"\\$(?!\\s)((?:\\\\\\$|[^$\\n])+?)(?<!\\s)\\$(?![0-9])";
|
||||
|
||||
/** Global matcher for the export-side prose escaper. */
|
||||
export const inlineMathGlobalRe = (): RegExp =>
|
||||
new RegExp(INLINE_MATH_SOURCE, "g");
|
||||
|
||||
/** Anchored matcher for the import-side marked tokenizer. */
|
||||
export const inlineMathAnchoredRe = (): RegExp =>
|
||||
new RegExp("^" + INLINE_MATH_SOURCE);
|
||||
|
||||
/** Decode a tokenizer-captured inner LaTeX: an escaped `\$` becomes `$`. */
|
||||
export const decodeInlineMathLatex = (inner: string): string =>
|
||||
inner.replace(/\\\$/g, "$");
|
||||
|
||||
/** Escape LaTeX for the `$…$` inline form so a literal `$` cannot close early. */
|
||||
export const encodeInlineMathLatex = (latex: string): string =>
|
||||
latex.replace(/\$/g, "\\$");
|
||||
|
||||
/**
|
||||
* Whether a `mathInline` node's LaTeX can be safely serialized as `$LaTeX$`
|
||||
* (vs. the always-lossless schema-HTML `<span>` fallback). Requires:
|
||||
* - non-empty (an empty span has no readable `$…$` form),
|
||||
* - non-whitespace edges (pandoc's opening/closing whitespace rules),
|
||||
* - single line (inline math never spans lines),
|
||||
* - no pre-existing `\$` and no trailing `\` — either would make the
|
||||
* `$`→`\$` escape ambiguous on decode (a `\\$` sequence, or an escaped
|
||||
* closing `$`), so those rare cases take the `<span>` fallback instead.
|
||||
* NOTE: a following-sibling digit (which would also break the pandoc closing
|
||||
* rule) cannot be seen from the node alone; that case is handled by the
|
||||
* serializer's inline-children pass, not here.
|
||||
*/
|
||||
export const inlineMathSerializable = (latex: string): boolean =>
|
||||
latex.length > 0 &&
|
||||
!/^\s/.test(latex) &&
|
||||
!/\s$/.test(latex) &&
|
||||
!/[\r\n]/.test(latex) &&
|
||||
!latex.includes("\\$") &&
|
||||
!/\\$/.test(latex);
|
||||
|
||||
/** Escape a value for an HTML double-quoted attribute (only & and " matter). */
|
||||
export const escapeMathAttr = (value: string): string =>
|
||||
value.replace(/&/g, "&").replace(/"/g, """);
|
||||
@@ -0,0 +1,172 @@
|
||||
/**
|
||||
* Shared schema-HTML builders for the media/discriminator family (#293 canon
|
||||
* #8).
|
||||
*
|
||||
* Canon #8 gives ten node types (youtube/video/audio/drawio/excalidraw —
|
||||
* image-form; pdf/attachment/embed — link-form; pageEmbed/transclusionReference
|
||||
* — standalone) a readable markdown TOP-LEVEL form (``/`[text](src)`/a
|
||||
* bare comment) plus a discriminator `<!--name {…}-->` comment. But TWO other
|
||||
* paths still need the RAW SCHEMA-HTML form of each node:
|
||||
*
|
||||
* 1. The serializer's raw-HTML/columns path (`blockToHtml`): a comment node is
|
||||
* dropped by the DOM parse stage that reads a raw-HTML block back, so inside
|
||||
* a column/cell these nodes MUST stay schema HTML or they vanish (data loss).
|
||||
* 2. The importer's `applyCommentDirectives`: to materialize the discriminator
|
||||
* comment it rebuilds the SAME schema element the raw-HTML path emits, then
|
||||
* swaps it in for the `<img>`/`<a>`/comment.
|
||||
*
|
||||
* Keeping these builders in ONE module means the serializer's raw-HTML path and
|
||||
* the importer's materialization can never drift: both call the same function.
|
||||
* Each builder reproduces BYTE-FOR-BYTE the schema HTML the top-level
|
||||
* `processNode` cases previously returned (so existing columns/raw-HTML goldens
|
||||
* stay green), and each output round-trips through the matching schema parseHTML
|
||||
* in docmost-schema.ts.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Escape a value interpolated into an HTML double-quoted attribute value.
|
||||
* Identical semantics to markdown-converter's `escapeAttr`: escape ONLY `&` and
|
||||
* `"` (idempotent; parse5 decodes both back). `<`/`>`/`'` are deliberately left
|
||||
* alone so values never accumulate escapes across round-trips.
|
||||
*/
|
||||
const escapeAttr = (value: unknown): string =>
|
||||
String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
|
||||
/**
|
||||
* Uploaded `<video>` player. Emits `<div><video …></video></div>`; the outer
|
||||
* `<div>` (no data-type) forces block treatment so marked does not wrap the
|
||||
* inline `<video>` in a `<p>`. Mirrors the Video schema: src/aria-label standard
|
||||
* attrs, the rest as data-*.
|
||||
*/
|
||||
export function videoToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt) parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align) parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
|
||||
/** YouTube embed. Emits `div[data-type="youtube"]` (src via data-src). */
|
||||
export function youtubeToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align) parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
/** Uploaded `<audio>` player. Emits `<div><audio …></audio></div>`. */
|
||||
export function audioToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* draw.io / excalidraw diagram (shared diagramAttributes). Emits
|
||||
* `div[data-type="drawio"|"excalidraw"]` carrying src/title/alt/width/height/
|
||||
* size/aspectRatio/align/attachmentId as data-*.
|
||||
*/
|
||||
export function diagramToHtml(
|
||||
type: "drawio" | "excalidraw",
|
||||
attrs: Record<string, any>,
|
||||
): string {
|
||||
const parts: string[] = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null) parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null) parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align) parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
/** Generic provider embed. Emits `div[data-type="embed"]` (src/provider/… data-*). */
|
||||
export function embedToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align) parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
/** Uploaded file attachment. Emits `div[data-type="attachment"]` (data-attachment-*). */
|
||||
export function attachmentToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
/** Embedded PDF viewer. Emits `div[data-type="pdf"]` (src std, name/… data-*). */
|
||||
export function pdfToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name) parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null) parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
/** Whole-page live embed. Emits `div[data-type="pageEmbed"]` (data-source-page-id). */
|
||||
export function pageEmbedToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [`data-type="pageEmbed"`];
|
||||
if (attrs.sourcePageId)
|
||||
parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Live transclusion reference. Emits `div[data-type="transclusionReference"]`
|
||||
* (data-source-page-id + data-transclusion-id).
|
||||
*/
|
||||
export function transclusionReferenceToHtml(attrs: Record<string, any>): string {
|
||||
const parts: string[] = [`data-type="transclusionReference"`];
|
||||
if (attrs.sourcePageId)
|
||||
parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`);
|
||||
if (attrs.transclusionId)
|
||||
parts.push(`data-transclusion-id="${escapeAttr(attrs.transclusionId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
+14
-7
@@ -6,7 +6,8 @@ import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gaps NOT covered by canonicalize.test.ts (test-strategy report §2 diff):
|
||||
// - the *.align family (drawio/excalidraw/video/youtube/embed): a "center"
|
||||
// - the *.align family (drawio/excalidraw/video/youtube/embed AND image, whose
|
||||
// align default is unified to "center" per #293 canon #4): a "center"
|
||||
// default is dropped, a non-default value is kept;
|
||||
// - comment.resolved: TRUE is PRESERVED (only resolved:false is normalized);
|
||||
// - link.target / link.rel NON-default values are kept;
|
||||
@@ -39,21 +40,27 @@ describe('canonicalizeContent — *.align default family', () => {
|
||||
});
|
||||
}
|
||||
|
||||
it('image align is NOT in KNOWN_DEFAULTS: a non-null align survives, null is dropped', () => {
|
||||
// image.align defaults to null, so it is handled by the null-drop rule and
|
||||
// a real value ("left") must be kept (no spurious default match).
|
||||
it('image align default is now "center" (#293 canon #4): center/null dropped, left kept', () => {
|
||||
// A real non-default value ("left") must be kept.
|
||||
const kept = canonicalizeContent({
|
||||
type: 'image',
|
||||
attrs: { id: 'i-1', src: '/a.png', align: 'left' },
|
||||
});
|
||||
expect(kept.attrs).toEqual({ src: '/a.png', align: 'left' });
|
||||
// An image with align:"center" must KEEP it (center is NOT a default for
|
||||
// image, only for the diagram/media family) — guards against over-matching.
|
||||
// #293 canon #4 unified the image align default to "center" (matching
|
||||
// editor-ext), so a center image now DROPS align exactly like the diagram/
|
||||
// media family — bare `` images stay canonically clean.
|
||||
const center = canonicalizeContent({
|
||||
type: 'image',
|
||||
attrs: { id: 'i-2', src: '/b.png', align: 'center' },
|
||||
});
|
||||
expect(center.attrs).toEqual({ src: '/b.png', align: 'center' });
|
||||
expect(center.attrs).toEqual({ src: '/b.png' });
|
||||
// A null align is likewise dropped (null-drop rule) and re-imports as center.
|
||||
const nullAlign = canonicalizeContent({
|
||||
type: 'image',
|
||||
attrs: { id: 'i-3', src: '/c.png', align: null },
|
||||
});
|
||||
expect(nullAlign.attrs).toEqual({ src: '/c.png' });
|
||||
});
|
||||
});
|
||||
|
||||
+19
-21
@@ -32,15 +32,16 @@ describe('diagram round-trip (docmost-schema diagramAttributes)', () => {
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
|
||||
// Exact serialized form: numbers render as bare data-* values; attribute
|
||||
// order follows the converter's emit order (src, then width/height/size/
|
||||
// aspect-ratio/align, then attachment-id).
|
||||
// #293 canon #8 (image-form): src is the markdown target; every OTHER
|
||||
// non-default attr rides in the ALWAYS-emitted `drawio` discriminator comment
|
||||
// (numerics stringified, stable key order width/height/size/aspectRatio then
|
||||
// attachmentId). align="center" is the schema default, so it is OMITTED.
|
||||
expect(md1).toBe(
|
||||
'<div data-type="drawio" data-src="/d.drawio" data-width="640" data-height="480" data-size="1234" data-aspect-ratio="1.777" data-align="center" data-attachment-id="att-1"></div>',
|
||||
'<!--drawio {"width":"640","height":"480","size":"1234","aspectRatio":"1.777","attachmentId":"att-1"}-->',
|
||||
);
|
||||
|
||||
// A second export reproduces the first byte-for-byte (drawio align default
|
||||
// is already "center", so nothing new materializes on import).
|
||||
// A second export reproduces the first byte-for-byte: align="center"
|
||||
// re-materializes as the schema default on import and is omitted again.
|
||||
expect(md2).toBe(md1);
|
||||
|
||||
// Re-import coerces every numeric attr to a STRING because parseHTML reads
|
||||
@@ -64,10 +65,10 @@ describe('diagram round-trip (docmost-schema diagramAttributes)', () => {
|
||||
});
|
||||
|
||||
// SPEC case 2: minimal excalidraw atom with ONLY string attrs (no align, no
|
||||
// numeric attrs). Locks the one-time export divergence (align='center'
|
||||
// default materializes only on import) plus escapeAttr of title/alt through
|
||||
// the data-title/data-alt path.
|
||||
it('excalidraw materializes align default only on import and escapes title/alt', async () => {
|
||||
// numeric attrs). #293 canon #8 image-form: title/alt ride in the comment JSON
|
||||
// (JSON-encoded, NOT HTML-escaped) and align='center' is omitted as the
|
||||
// schema default — so the one-time divergence the OLD div-form had is GONE.
|
||||
it('excalidraw round-trips title/alt via the discriminator comment (byte-stable, align default omitted)', async () => {
|
||||
const input = doc({
|
||||
type: 'excalidraw',
|
||||
attrs: {
|
||||
@@ -81,21 +82,18 @@ describe('diagram round-trip (docmost-schema diagramAttributes)', () => {
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
|
||||
// First export: no align emitted (the input doc carries no align), and the
|
||||
// " in title becomes ", the & in alt becomes & via escapeAttr.
|
||||
// #293 canon #8: src in the target; title/alt in the ALWAYS-emitted
|
||||
// `excalidraw` comment as compact JSON (the " in title is JSON-escaped as \",
|
||||
// the & in alt stays literal — JSON, not HTML). No align emitted (default).
|
||||
expect(md1).toBe(
|
||||
'<div data-type="excalidraw" data-src="/e.excalidraw" data-title="My "Diagram"" data-alt="a&b"></div>',
|
||||
'<!--excalidraw {"title":"My \\"Diagram\\"","alt":"a&b"}-->',
|
||||
);
|
||||
|
||||
// Second export: align='center' has now materialized (the schema's
|
||||
// diagramAttributes default), so md2 gains a data-align="center" suffix and
|
||||
// is NOT byte-equal to md1. This one-time divergence is the diagram quirk.
|
||||
expect(md2).toBe(
|
||||
'<div data-type="excalidraw" data-src="/e.excalidraw" data-title="My "Diagram"" data-alt="a&b" data-align="center"></div>',
|
||||
);
|
||||
expect(md2).not.toBe(md1);
|
||||
// Byte-stable: align='center' re-materializes as the schema default on import
|
||||
// and is omitted again on export #2, so md2 === md1 (no diagram quirk now).
|
||||
expect(md2).toBe(md1);
|
||||
|
||||
// Re-import decodes the escaped entities back to the original characters.
|
||||
// Re-import decodes the JSON payload back to the original characters.
|
||||
const attrs2 = doc2.content[0].attrs;
|
||||
expect(attrs2.title).toBe('My "Diagram"');
|
||||
expect(attrs2.alt).toBe('a&b');
|
||||
@@ -0,0 +1,509 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
// Import the converters DIRECTLY from src (NOT the docmost-client barrel, which
|
||||
// mutates the global DOM at import time), matching the other converter tests.
|
||||
import { convertProseMirrorToMarkdown } from "../src/lib/markdown-converter.js";
|
||||
import { markdownToProseMirror } from "../src/lib/markdown-to-prosemirror.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tiny builders (mirror the other converter tests).
|
||||
// ---------------------------------------------------------------------------
|
||||
const doc = (...nodes: any[]) => ({ type: "doc", content: nodes });
|
||||
const P = (...content: any[]) => ({ type: "paragraph", content });
|
||||
const T = (text: string, marks?: any[]) =>
|
||||
marks ? { type: "text", text, marks } : { type: "text", text };
|
||||
const ref = (id: string) => ({ type: "footnoteReference", attrs: { id } });
|
||||
const list = (...defs: any[]) => ({ type: "footnotesList", content: defs });
|
||||
const def = (id: string, ...paras: any[]) => ({
|
||||
type: "footnoteDefinition",
|
||||
attrs: { id },
|
||||
content: paras,
|
||||
});
|
||||
|
||||
// Find the FIRST node of a type anywhere in a PM tree (depth first).
|
||||
function findNode(n: any, type: string): any {
|
||||
if (!n || typeof n !== "object") return undefined;
|
||||
if (n.type === type) return n;
|
||||
if (Array.isArray(n.content)) {
|
||||
for (const c of n.content) {
|
||||
const hit = findNode(c, type);
|
||||
if (hit) return hit;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
// Collect EVERY node of a type.
|
||||
function findAll(n: any, type: string, out: any[] = []): any[] {
|
||||
if (!n || typeof n !== "object") return out;
|
||||
if (n.type === type) out.push(n);
|
||||
if (Array.isArray(n.content)) n.content.forEach((c: any) => findAll(c, type, out));
|
||||
return out;
|
||||
}
|
||||
// Concatenate all text under a node.
|
||||
function allText(n: any): string {
|
||||
if (!n || typeof n !== "object") return "";
|
||||
if (n.type === "text") return n.text || "";
|
||||
if (Array.isArray(n.content)) return n.content.map(allText).join("");
|
||||
return "";
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// basic: `^[body]` at the reference point, byte-stable round trip.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: basic", () => {
|
||||
it("serializes a ref + def to `text^[a note]` and re-imports losslessly", async () => {
|
||||
const d = doc(P(T("text"), ref("fn1")), list(def("fn1", P(T("a note")))));
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe("text^[a note]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const r = findNode(back, "footnoteReference");
|
||||
const l = findNode(back, "footnotesList");
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(r).toBeDefined();
|
||||
expect(l).toBeDefined();
|
||||
expect(dfn).toBeDefined();
|
||||
// The note body rode along, not just the wrapper.
|
||||
expect(allText(dfn)).toBe("a note");
|
||||
// The reference points at the matching definition (derived id).
|
||||
expect(r.attrs.id).toBe(dfn.attrs.id);
|
||||
// Ids are assigned sequentially by the import post-pass (F1), not hashed.
|
||||
expect(r.attrs.id).toBe("fn-1");
|
||||
|
||||
// Byte-stable: re-export equals the first export.
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// bracket balancing (MANDATORY): a `[link](url)` inside the body is captured
|
||||
// whole and survives as a link mark in the definition.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: bracket balancing", () => {
|
||||
it("captures a full balanced `[link](url)` body and keeps the link", async () => {
|
||||
const body = "note with a ";
|
||||
const d = doc(
|
||||
P(T("x"), ref("fn1")),
|
||||
list(
|
||||
def(
|
||||
"fn1",
|
||||
P(
|
||||
T(body),
|
||||
T("link", [{ type: "link", attrs: { href: "https://x" } }]),
|
||||
T(" inside"),
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe("x^[note with a [link](https://x) inside]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(allText(dfn)).toBe("note with a link inside");
|
||||
// The link mark survived inside the definition (parser did NOT cut at the
|
||||
// first inner `]`).
|
||||
const linkText = findAll(dfn, "text").find((t: any) =>
|
||||
(t.marks || []).some((m: any) => m.type === "link"),
|
||||
);
|
||||
expect(linkText).toBeDefined();
|
||||
expect(linkText.text).toBe("link");
|
||||
expect(linkText.marks[0].attrs.href).toBe("https://x");
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
|
||||
it("escapes a STRAY unbalanced `]`/`[` in body text and round-trips it", async () => {
|
||||
const d = doc(
|
||||
P(T("x"), ref("fn1")),
|
||||
list(def("fn1", P(T("a ] and [ stray")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
// The stray brackets are backslash-escaped so `^[…]` stays parseable.
|
||||
expect(md).toBe("x^[a \\] and \\[ stray]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(allText(dfn)).toBe("a ] and [ stray");
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// multi-paragraph body -> literal `\n` separator.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: multi-paragraph body", () => {
|
||||
it("joins two paragraphs with a literal `\\n` and re-splits them", async () => {
|
||||
const d = doc(
|
||||
P(T("x"), ref("fn1")),
|
||||
list(def("fn1", P(T("para one")), P(T("para two")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
// The separator is the two literal characters backslash + n.
|
||||
expect(md).toBe("x^[para one\\npara two]");
|
||||
expect(md.includes("\\n")).toBe(true);
|
||||
// NOT a real newline inside the footnote.
|
||||
expect(md.includes("\n")).toBe(false);
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
const paras = (dfn.content || []).filter((p: any) => p.type === "paragraph");
|
||||
expect(paras.length).toBe(2);
|
||||
expect(allText(paras[0])).toBe("para one");
|
||||
expect(allText(paras[1])).toBe("para two");
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// real backslash-n escaping (MANDATORY): a literal `\n` in the body text is
|
||||
// emitted as `\\n` and round-trips to the literal text, NOT a paragraph break.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: real backslash-n escaping", () => {
|
||||
it("escapes a literal `\\n` as `\\\\n` and keeps it a single paragraph", async () => {
|
||||
// Body text contains the two literal characters: backslash, n.
|
||||
const d = doc(
|
||||
P(T("x"), ref("fn1")),
|
||||
list(def("fn1", P(T("path C:\\new here")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
// The real backslash-n becomes an ESCAPED backslash-n (`\\n`).
|
||||
expect(md).toBe("x^[path C:\\\\new here]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
const paras = (dfn.content || []).filter((p: any) => p.type === "paragraph");
|
||||
// A single paragraph — the `\n` was NOT read as a paragraph break.
|
||||
expect(paras.length).toBe(1);
|
||||
expect(allText(dfn)).toBe("path C:\\new here");
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// dedup / multiple refs.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: dedup", () => {
|
||||
it("two refs to the SAME def emit `^[same]` twice and MERGE on parse", async () => {
|
||||
const d = doc(
|
||||
P(T("a"), ref("fn1"), T(" b"), ref("fn1")),
|
||||
list(def("fn1", P(T("same text")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe("a^[same text] b^[same text]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
// Two references, ONE definition (merged), sharing the same id.
|
||||
const refs = findAll(back, "footnoteReference");
|
||||
const defs = findAll(back, "footnoteDefinition");
|
||||
expect(refs.length).toBe(2);
|
||||
expect(defs.length).toBe(1);
|
||||
expect(refs[0].attrs.id).toBe(refs[1].attrs.id);
|
||||
expect(refs[0].attrs.id).toBe(defs[0].attrs.id);
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
|
||||
it("two `^[identical]` in SOURCE merge to one definition", async () => {
|
||||
const back = await markdownToProseMirror("a^[note] b^[note]");
|
||||
const refs = findAll(back, "footnoteReference");
|
||||
const defs = findAll(back, "footnoteDefinition");
|
||||
expect(refs.length).toBe(2);
|
||||
expect(defs.length).toBe(1);
|
||||
expect(refs[0].attrs.id).toBe(defs[0].attrs.id);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// footnote inside a column -> raw-HTML `<sup data-fn-text>` form (NOT `^[…]`).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: inside a column", () => {
|
||||
it("carries the body on `<sup data-fn-text>` and round-trips", async () => {
|
||||
const d = doc(
|
||||
{
|
||||
type: "columns",
|
||||
content: [
|
||||
{
|
||||
type: "column",
|
||||
attrs: { width: "50%" },
|
||||
content: [P(T("col "), ref("fn1"))],
|
||||
},
|
||||
],
|
||||
},
|
||||
list(def("fn1", P(T("colnote")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
// Raw-HTML path: the ref carries its text ON the sup, NOT as `^[…]`.
|
||||
expect(md).toContain('data-fn-text="colnote"');
|
||||
expect(md).not.toContain("^[");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
// The reference stays inside the column; the definition is at doc level.
|
||||
const col = findNode(back, "column");
|
||||
expect(findNode(col, "footnoteReference")).toBeDefined();
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(allText(dfn)).toBe("colnote");
|
||||
const r = findNode(back, "footnoteReference");
|
||||
expect(r.attrs.id).toBe(dfn.attrs.id);
|
||||
|
||||
// The footnote portion is byte-stable on re-export (the surrounding columns
|
||||
// node applies its own layout/width normalization, unrelated to footnotes).
|
||||
// The raw-HTML column sup carries the body on data-fn-text and NO id (F1);
|
||||
// the id is assigned by the import post-pass.
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toContain('data-fn-text="colnote"');
|
||||
expect(md2).not.toContain("data-id=");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// orphan definition: a def with no reference is not silently lost.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: orphan definition", () => {
|
||||
it("appends an unreferenced definition as its own `^[body]` line", async () => {
|
||||
const d = doc(P(T("body text")), list(def("fnX", P(T("orphan note")))));
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe("body text\n\n^[orphan note]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(dfn).toBeDefined();
|
||||
expect(allText(dfn)).toBe("orphan note");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// no backward compat: `[^id]` / `[^id]: def` stay literal (no footnote node).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: no backward compat for the reference form", () => {
|
||||
it("does not parse `[^1]` into a footnote node", async () => {
|
||||
const back = await markdownToProseMirror("see [^1] here");
|
||||
expect(findNode(back, "footnoteReference")).toBeUndefined();
|
||||
expect(findNode(back, "footnotesList")).toBeUndefined();
|
||||
// The literal text survives.
|
||||
expect(allText(back)).toContain("[^1]");
|
||||
});
|
||||
|
||||
it("does not parse a `[^1]: def` definition line into a footnote node", async () => {
|
||||
const back = await markdownToProseMirror("text\n\n[^1]: a definition");
|
||||
expect(findNode(back, "footnoteReference")).toBeUndefined();
|
||||
expect(findNode(back, "footnoteDefinition")).toBeUndefined();
|
||||
expect(findNode(back, "footnotesList")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// literal `^[` in prose must NOT materialize a phantom footnote on re-import.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: literal `^[` in prose", () => {
|
||||
it("escapes a literal `^[…]` in text so it stays text, byte-stable", async () => {
|
||||
const d = doc(P(T("see ^[not a note] here")));
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
// The opening `^[` is broken with a backslash so the tokenizer never fires.
|
||||
expect(md).toBe("see ^\\[not a note] here");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
expect(findNode(back, "footnoteReference")).toBeUndefined();
|
||||
expect(allText(back)).toBe("see ^[not a note] here");
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// fail-open: unbalanced `^[` and empty `^[]` do not crash.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: fail-open", () => {
|
||||
it("leaves an unbalanced `^[` as literal text, no crash", async () => {
|
||||
const back = await markdownToProseMirror("dangling ^[ open bracket");
|
||||
expect(findNode(back, "footnoteReference")).toBeUndefined();
|
||||
expect(allText(back)).toContain("^[ open bracket");
|
||||
});
|
||||
|
||||
it("treats `^[]` as a footnote with an empty body, no crash", async () => {
|
||||
const back = await markdownToProseMirror("empty^[]");
|
||||
const r = findNode(back, "footnoteReference");
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(r).toBeDefined();
|
||||
expect(dfn).toBeDefined();
|
||||
expect(allText(dfn)).toBe("");
|
||||
// Byte-stable: an empty-body footnote re-exports as `^[]`.
|
||||
expect(convertProseMirrorToMarkdown(back)).toBe("empty^[]");
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// F1 (CRITICAL): DIFFERENT bodies must NEVER merge — dedup keys on exact text,
|
||||
// not a 32-bit hash (the old djb2 hash collided `"sgrs rj"` / `"a gtkfr"`).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: distinct bodies never merge (F1)", () => {
|
||||
it("keeps the hash-colliding pair `sgrs rj` / `a gtkfr` as two distinct defs", async () => {
|
||||
// These two DIFFERENT bodies hashed to the same fn-16myybs under djb2, which
|
||||
// silently dropped the second body. With text-exact dedup they must survive
|
||||
// as two separate definitions.
|
||||
const d = doc(
|
||||
P(T("x"), ref("fnA"), T(" y"), ref("fnB")),
|
||||
list(def("fnA", P(T("sgrs rj"))), def("fnB", P(T("a gtkfr")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe("x^[sgrs rj] y^[a gtkfr]");
|
||||
|
||||
const back = await markdownToProseMirror(md);
|
||||
const defs = findAll(back, "footnoteDefinition");
|
||||
const refs = findAll(back, "footnoteReference");
|
||||
// BOTH bodies survive as DISTINCT definitions.
|
||||
expect(defs.length).toBe(2);
|
||||
const bodies = defs.map(allText).sort();
|
||||
expect(bodies).toEqual(["a gtkfr", "sgrs rj"]);
|
||||
// Two refs, each pointing at a DIFFERENT def id.
|
||||
expect(refs.length).toBe(2);
|
||||
expect(refs[0].attrs.id).not.toBe(refs[1].attrs.id);
|
||||
expect(new Set(defs.map((x: any) => x.attrs.id)).size).toBe(2);
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// F2 (CRITICAL): a body ending in `\` (or `\` before `]`) must survive `^[…]`.
|
||||
// Each must round-trip BYTE-STABLE across 3 iterations, footnote intact.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: raw backslash bodies survive (F2)", () => {
|
||||
const cases: Array<{ name: string; body: string; expectMd: string }> = [
|
||||
{
|
||||
name: "trailing backslash (Windows path)",
|
||||
body: "C:\\dir\\",
|
||||
expectMd: "x^[C:\\\\dir\\\\]",
|
||||
},
|
||||
{
|
||||
name: "backslash before a literal bracket",
|
||||
body: "a \\] b",
|
||||
expectMd: "x^[a \\\\\\] b]",
|
||||
},
|
||||
{
|
||||
name: "regex with trailing backslash",
|
||||
body: "re\\gex\\",
|
||||
expectMd: "x^[re\\\\gex\\\\]",
|
||||
},
|
||||
];
|
||||
for (const { name, body, expectMd } of cases) {
|
||||
it(`round-trips ${name} byte-stable x3 with the backslash preserved`, async () => {
|
||||
const d = doc(P(T("x"), ref("fn1")), list(def("fn1", P(T(body)))));
|
||||
let md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe(expectMd);
|
||||
|
||||
// Three full iterations must all be byte-identical and keep the footnote.
|
||||
for (let iter = 0; iter < 3; iter++) {
|
||||
const back = await markdownToProseMirror(md);
|
||||
const dfn = findNode(back, "footnoteDefinition");
|
||||
expect(dfn).toBeDefined();
|
||||
// The backslashes are preserved EXACTLY in the note body.
|
||||
expect(allText(dfn)).toBe(body);
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md);
|
||||
md = md2;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// F4: assembleFootnotes must not emit a DUPLICATE <section data-footnotes> when
|
||||
// the HTML already carries one (a footnote list that landed in a column).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: no duplicate footnotes section (F4)", () => {
|
||||
it("produces exactly one footnotesList when a column footnote is present", async () => {
|
||||
const d = doc(
|
||||
{
|
||||
type: "columns",
|
||||
content: [
|
||||
{ type: "column", attrs: { width: "50%" }, content: [P(T("c "), ref("fn1"))] },
|
||||
],
|
||||
},
|
||||
list(def("fn1", P(T("colnote")))),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
const back = await markdownToProseMirror(md);
|
||||
// Exactly one assembled footnotes list, not two.
|
||||
expect(findAll(back, "footnotesList").length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// N1 (data-loss): NESTED inline footnotes must round-trip — the assembly pass
|
||||
// runs to a FIXED POINT so an inner `^[…]` spawned by parseInline is also
|
||||
// assigned an id, built into a def, and stripped (no dangling ref, no lost body).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: nested footnotes (N1)", () => {
|
||||
it("keeps `^[outer ^[inner] tail]` as TWO defs, inner preserved, byte-stable", async () => {
|
||||
const md1 = "text ^[outer ^[inner] tail] end";
|
||||
const back = await markdownToProseMirror(md1);
|
||||
const defs = findAll(back, "footnoteDefinition");
|
||||
const refs = findAll(back, "footnoteReference");
|
||||
// Two distinct definitions (outer + inner); two references.
|
||||
expect(defs.length).toBe(2);
|
||||
expect(refs.length).toBe(2);
|
||||
expect(new Set(defs.map((d: any) => d.attrs.id)).size).toBe(2);
|
||||
const bodies = defs.map(allText).sort();
|
||||
expect(bodies).toEqual(["inner", "outer tail"]);
|
||||
// The OUTER definition body carries a footnoteReference to the inner def.
|
||||
const outer = defs.find((d: any) => allText(d).includes("outer"));
|
||||
const inner = defs.find((d: any) => allText(d) === "inner");
|
||||
const nestedRef = findNode(outer, "footnoteReference");
|
||||
expect(nestedRef).toBeDefined();
|
||||
expect(nestedRef.attrs.id).toBe(inner.attrs.id);
|
||||
// Byte-stable across two further iterations (md1 === md2 === md3).
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md1);
|
||||
const md3 = convertProseMirrorToMarkdown(await markdownToProseMirror(md2));
|
||||
expect(md3).toBe(md2);
|
||||
});
|
||||
|
||||
it("round-trips a 3-level nest `^[a ^[b ^[c] d] e]` (three defs)", async () => {
|
||||
const md1 = "z ^[a ^[b ^[c] d] e] z";
|
||||
const back = await markdownToProseMirror(md1);
|
||||
const defs = findAll(back, "footnoteDefinition");
|
||||
expect(defs.length).toBe(3);
|
||||
expect(new Set(defs.map((d: any) => d.attrs.id)).size).toBe(3);
|
||||
expect(defs.map(allText).sort()).toEqual(["a e", "b d", "c"]);
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md1);
|
||||
const md3 = convertProseMirrorToMarkdown(await markdownToProseMirror(md2));
|
||||
expect(md3).toBe(md2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// N2: a generated id must never collide with an id already present in a REUSED
|
||||
// footnotes section (the counter is seeded past the max existing `fn-N`).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe("inline footnote: generated ids never collide with a reused section (N2)", () => {
|
||||
it("seeds the counter past an existing `fn-1` def in a legacy section", async () => {
|
||||
// A legacy `<section data-footnotes>` (existing `fn-1`) reaches the body as
|
||||
// raw HTML; the new inline `^[…]` must NOT be assigned `fn-1` too.
|
||||
const md =
|
||||
"text^[new note]\n\n" +
|
||||
'<section data-footnotes><div data-footnote-def data-id="fn-1">' +
|
||||
"<p>existing note</p></div></section>";
|
||||
const back = await markdownToProseMirror(md);
|
||||
const defs = findAll(back, "footnoteDefinition");
|
||||
// Both notes survive as DISTINCT definitions in a SINGLE list.
|
||||
expect(defs.length).toBe(2);
|
||||
expect(new Set(defs.map((d: any) => d.attrs.id)).size).toBe(2);
|
||||
expect(findAll(back, "footnotesList").length).toBe(1);
|
||||
expect(defs.map(allText).sort()).toEqual(["existing note", "new note"]);
|
||||
// The pre-existing id is preserved; the new one is seeded past it.
|
||||
expect(defs.map((d: any) => d.attrs.id)).toContain("fn-1");
|
||||
expect(defs.map((d: any) => d.attrs.id)).toContain("fn-2");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,249 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import both directions DIRECTLY from src (NOT the docmost-client barrel, which
|
||||
// pulls in collaboration.ts and mutates the global DOM at import time), matching
|
||||
// the other converter unit tests.
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
|
||||
// #293 canon #7: a `highlight` mark WITHOUT a color serializes as the
|
||||
// Obsidian/GFM `==text==` syntax; a highlight WITH a color KEEPS the
|
||||
// `<mark style="background-color: …">` HTML form. On the raw-HTML path
|
||||
// (columns / spanned cells) BOTH forms stay `<mark>` because markdown is not
|
||||
// re-parsed there. This file locks the serialize form, the round-trip, and the
|
||||
// literal-`==` escape that keeps a literal `==` from becoming a phantom mark.
|
||||
|
||||
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
|
||||
const text = (t: string, marks?: any[]) =>
|
||||
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
|
||||
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
|
||||
|
||||
// Find the first text node anywhere in a PM tree that carries a mark of `type`.
|
||||
const firstMarkedText = (node: any, type: string): any => {
|
||||
if (node?.type === 'text' && (node.marks || []).some((m: any) => m.type === type)) {
|
||||
return node;
|
||||
}
|
||||
for (const child of node?.content || []) {
|
||||
const hit = firstMarkedText(child, type);
|
||||
if (hit) return hit;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
const mark = (textNode: any, type: string): any =>
|
||||
(textNode?.marks || []).find((m: any) => m.type === type);
|
||||
// Concatenate all text within a subtree (order-preserving).
|
||||
const allText = (node: any): string => {
|
||||
if (node?.type === 'text') return node.text || '';
|
||||
return (node?.content || []).map(allText).join('');
|
||||
};
|
||||
// Does ANY text node in the tree carry a mark of `type`?
|
||||
const hasMark = (node: any, type: string): boolean => !!firstMarkedText(node, type);
|
||||
|
||||
// PM -> MD -> PM round-trip.
|
||||
const roundTrip = async (d: any) => {
|
||||
const md1 = convertProseMirrorToMarkdown(d);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
return { md1, doc2, md2 };
|
||||
};
|
||||
|
||||
describe('#293 #7: no-color highlight <-> ==text==', () => {
|
||||
it('serializes a no-color highlight as exactly ==text==', () => {
|
||||
expect(convertProseMirrorToMarkdown(doc(para(text('important', [{ type: 'highlight' }]))))).toBe(
|
||||
'==important==',
|
||||
);
|
||||
});
|
||||
|
||||
it('imports ==text== as a highlight mark with NO color', async () => {
|
||||
const d = await markdownToProseMirror('==important==');
|
||||
const t = firstMarkedText(d, 'highlight');
|
||||
expect(t).toBeTruthy();
|
||||
expect(t.text).toBe('important');
|
||||
// A bare <mark> carries no background-color, so the color attr is null.
|
||||
expect(mark(t, 'highlight').attrs?.color ?? null).toBeNull();
|
||||
});
|
||||
|
||||
it('is byte-stable and re-imports as a color-less highlight', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(
|
||||
doc(para(text('a base '), text('hl', [{ type: 'highlight' }]), text(' tail'))),
|
||||
);
|
||||
expect(md1).toBe('a base ==hl== tail');
|
||||
expect(md2).toBe(md1);
|
||||
const t = firstMarkedText(doc2, 'highlight');
|
||||
expect(t.text).toBe('hl');
|
||||
expect(mark(t, 'highlight').attrs?.color ?? null).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: colored highlight keeps <mark style=…>', () => {
|
||||
it('serializes a colored highlight as the <mark style=…> HTML form (NOT ==)', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para(text('c', [{ type: 'highlight', attrs: { color: '#ff0000' } }]))),
|
||||
);
|
||||
expect(out).toBe('<mark style="background-color: #ff0000">c</mark>');
|
||||
expect(out).not.toContain('==');
|
||||
});
|
||||
|
||||
it('round-trips a colored highlight preserving its color', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(
|
||||
doc(para(text('c', [{ type: 'highlight', attrs: { color: '#abcdef' } }]))),
|
||||
);
|
||||
expect(md1).toBe('<mark style="background-color: #abcdef">c</mark>');
|
||||
expect(md2).toBe(md1);
|
||||
const t = firstMarkedText(doc2, 'highlight');
|
||||
expect(mark(t, 'highlight').attrs?.color).toBe('#abcdef');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: raw-HTML path (columns) stays <mark>, never ==', () => {
|
||||
const oneColumn = (...blocks: any[]) => ({
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two' },
|
||||
content: [{ type: 'column', content: blocks }],
|
||||
});
|
||||
|
||||
it('a no-color highlight inside a column serializes as <mark> (inlineToHtml), not ==', () => {
|
||||
const out = convertProseMirrorToMarkdown(doc(oneColumn(para(text('p', [{ type: 'highlight' }])))));
|
||||
expect(out).toContain('<mark>p</mark>');
|
||||
// The `==` markdown syntax must NOT leak into a raw-HTML container (it would
|
||||
// survive as literal text there because columns are not re-parsed).
|
||||
expect(out).not.toContain('==');
|
||||
});
|
||||
|
||||
it('a colored highlight inside a column keeps <mark style=…>', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(oneColumn(para(text('p', [{ type: 'highlight', attrs: { color: '#00ff00' } }])))),
|
||||
);
|
||||
expect(out).toContain('<mark style="background-color: #00ff00">p</mark>');
|
||||
});
|
||||
|
||||
it('round-trips a highlight inside a column (byte-stable, mark preserved)', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(
|
||||
doc(oneColumn(para(text('p', [{ type: 'highlight' }])))),
|
||||
);
|
||||
expect(md1).toContain('<mark>p</mark>');
|
||||
expect(md2).toBe(md1);
|
||||
expect(hasMark(doc2, 'highlight')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: highlight wrapping other marks', () => {
|
||||
it('serializes bold-inside-highlight as ==**x**== and round-trips both marks', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(
|
||||
doc(para(text('x', [{ type: 'bold' }, { type: 'highlight' }]))),
|
||||
);
|
||||
expect(md1).toBe('==**x**==');
|
||||
expect(md2).toBe(md1);
|
||||
const t = firstMarkedText(doc2, 'highlight');
|
||||
expect(t).toBeTruthy();
|
||||
expect((t.marks || []).some((m: any) => m.type === 'bold')).toBe(true);
|
||||
expect(t.text).toBe('x');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: inline code containing == stays code, not a highlight', () => {
|
||||
it('imports `a == b` as an inline code span, not a highlight', async () => {
|
||||
const d = await markdownToProseMirror('`a == b`');
|
||||
expect(hasMark(d, 'highlight')).toBe(false);
|
||||
const codeText = firstMarkedText(d, 'code');
|
||||
expect(codeText).toBeTruthy();
|
||||
expect(codeText.text).toBe('a == b');
|
||||
});
|
||||
|
||||
it('round-trips an inline code span carrying == (byte-stable, no highlight)', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(doc(para(text('a == b', [{ type: 'code' }]))));
|
||||
expect(md1).toBe('`a == b`');
|
||||
expect(md2).toBe(md1);
|
||||
expect(hasMark(doc2, 'highlight')).toBe(false);
|
||||
expect(firstMarkedText(doc2, 'code').text).toBe('a == b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: literal == in plain prose round-trips as text (no phantom highlight)', () => {
|
||||
it('a lone literal == (a == b) is escaped and re-imports as literal text', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(doc(para(text('a == b'))));
|
||||
// Each `=` of the pair is backslash-escaped so marked decodes it back.
|
||||
expect(md1).toBe('a \\=\\= b');
|
||||
expect(md2).toBe(md1);
|
||||
expect(hasMark(doc2, 'highlight')).toBe(false);
|
||||
expect(allText(doc2)).toBe('a == b');
|
||||
});
|
||||
|
||||
it('a literal ==...== pair in prose does NOT materialize a highlight', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(doc(para(text('x ==not hl== y'))));
|
||||
expect(md1).toBe('x \\=\\=not hl\\=\\= y');
|
||||
expect(md2).toBe(md1);
|
||||
expect(hasMark(doc2, 'highlight')).toBe(false);
|
||||
expect(allText(doc2)).toBe('x ==not hl== y');
|
||||
});
|
||||
|
||||
it('a highlight over text that itself contains a literal == round-trips both', async () => {
|
||||
const { md1, md2, doc2 } = await roundTrip(
|
||||
doc(para(text('a == b', [{ type: 'highlight' }]))),
|
||||
);
|
||||
// The inner literal `==` is escaped; the highlight `==` delimiters are added
|
||||
// AFTER escaping, so the mark's own delimiters are intact.
|
||||
expect(md1).toBe('==a \\=\\= b==');
|
||||
expect(md2).toBe(md1);
|
||||
const t = firstMarkedText(doc2, 'highlight');
|
||||
expect(t.text).toBe('a == b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: fail-open edges (empty / unbalanced ==)', () => {
|
||||
it('empty ==== does not crash and stays literal (no highlight)', async () => {
|
||||
const d = await markdownToProseMirror('====');
|
||||
expect(hasMark(d, 'highlight')).toBe(false);
|
||||
expect(allText(d)).toBe('====');
|
||||
});
|
||||
|
||||
it('unbalanced ==x does not crash and stays literal (no highlight)', async () => {
|
||||
const d = await markdownToProseMirror('==x');
|
||||
expect(hasMark(d, 'highlight')).toBe(false);
|
||||
expect(allText(d)).toBe('==x');
|
||||
});
|
||||
|
||||
it('two highlights on one line both parse (lazy inner)', async () => {
|
||||
const d = await markdownToProseMirror('==a== ==b==');
|
||||
const first = firstMarkedText(d, 'highlight');
|
||||
expect(first.text).toBe('a');
|
||||
// Both highlighted runs are present.
|
||||
expect(allText(d)).toContain('a');
|
||||
expect(allText(d)).toContain('b');
|
||||
});
|
||||
});
|
||||
|
||||
describe('#293 #7: a codeBlock containing == is NOT escaped (literal code preserved)', () => {
|
||||
// Regression: the canon #7 `==` -> `\=\=` escape lives in `case "text"`, but
|
||||
// code-fence content is literal and marked does NOT decode `\=` inside a fence,
|
||||
// so routing code through that path would permanently stamp backslashes into a
|
||||
// `==` comparison (ubiquitous in source). codeBlock must read raw child text.
|
||||
const codeBlock = (t: string, language = '') => ({
|
||||
type: 'codeBlock',
|
||||
attrs: { language },
|
||||
content: [{ type: 'text', text: t }],
|
||||
});
|
||||
|
||||
it('exports `==` in code verbatim (no \\=\\=) and round-trips byte-stably', async () => {
|
||||
const d = doc(codeBlock('if (a == b) return c == d;', 'js'));
|
||||
const md1 = convertProseMirrorToMarkdown(d);
|
||||
expect(md1).toBe('```js\nif (a == b) return c == d;\n```');
|
||||
expect(md1).not.toContain('\\='); // no backslash corruption
|
||||
const back = await markdownToProseMirror(md1);
|
||||
// The code text survives with no backslash corruption and no phantom
|
||||
// highlight (marked re-adds a trailing "\n" to fence content on import,
|
||||
// which the serializer strips again — hence trimEnd here; byte-stability of
|
||||
// the markdown is asserted separately below).
|
||||
expect(allText(back).trimEnd()).toBe('if (a == b) return c == d;');
|
||||
expect(allText(back)).not.toContain('\\=');
|
||||
expect(hasMark(back, 'highlight')).toBe(false);
|
||||
expect(convertProseMirrorToMarkdown(back)).toBe(md1); // byte-stable
|
||||
});
|
||||
|
||||
it('a real markdown code block with == imports clean and re-exports clean', async () => {
|
||||
const src = '```\nx == y\n```';
|
||||
const back = await markdownToProseMirror(src);
|
||||
expect(allText(back).trimEnd()).toBe('x == y');
|
||||
expect(allText(back)).not.toContain('\\=');
|
||||
expect(convertProseMirrorToMarkdown(back)).toBe(src); // byte-stable
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,265 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import DIRECTLY from src (matching the other converter unit tests), not the
|
||||
// docmost-client barrel.
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #293 canon #4: images ALWAYS serialize as ``. Non-default attrs
|
||||
// ride along in an attached `<!--img {…}-->` comment on the SAME line, which the
|
||||
// importer materializes back onto the <img> before generateJSON drops it. An
|
||||
// attr equal to the schema default is NOT emitted. The image align default is
|
||||
// unified to "center" (matching editor-ext), so bare/center images stay clean
|
||||
// and only a genuinely non-default alignment (left/right) emits a comment.
|
||||
//
|
||||
// In raw-HTML contexts (inside a column / spanned cell) the prior `<img …>` form
|
||||
// is kept; comments are dropped by the DOM parse stage there.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
|
||||
const image = (attrs: any) => doc({ type: 'image', attrs: { src: '/i.png', ...attrs } });
|
||||
|
||||
// Find the first image node anywhere in a PM JSON doc.
|
||||
function findImage(node: any): any | null {
|
||||
if (!node || typeof node !== 'object') return null;
|
||||
if (node.type === 'image') return node;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
const hit = findImage(child);
|
||||
if (hit) return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Export a doc, re-import it, and hand back the markdown + re-imported image.
|
||||
async function roundTrip(source: any): Promise<{ md: string; img: any; back: any }> {
|
||||
const md = convertProseMirrorToMarkdown(source);
|
||||
const back = await markdownToProseMirror(md);
|
||||
return { md, img: findImage(back), back };
|
||||
}
|
||||
|
||||
describe('#293 canon #4 — image serialization + attached img-comment', () => {
|
||||
it('a bare image (src only) emits `![]()` with NO comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({}));
|
||||
expect(md).toBe('');
|
||||
expect(md).not.toContain('<!--img');
|
||||
expect(img).toBeTruthy();
|
||||
expect(img.attrs.src).toBe('/i.png');
|
||||
// align falls back to the unified "center" default on import.
|
||||
expect(img.attrs.align).toBe('center');
|
||||
});
|
||||
|
||||
it('src + alt emits `` with NO comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ alt: 'схема' }));
|
||||
expect(md).toBe('');
|
||||
expect(md).not.toContain('<!--img');
|
||||
expect(img.attrs.alt).toBe('схема');
|
||||
});
|
||||
|
||||
it('alt with markdown-ACTIVE punctuation is escaped and round-trips byte-stable (F1)', async () => {
|
||||
// The alt sits in the `![alt]` label, re-parsed as CommonMark inline on
|
||||
// import; without escaping, a bracket/emphasis in a realistic description
|
||||
// would make the image node VANISH or collapse emphasis. Assert the image
|
||||
// survives with the exact alt AND the markdown is byte-stable on re-export.
|
||||
for (const alt of [
|
||||
'a]b[c', 'Figure [1]', 'the *new* logo', 'x_y_z', 'see ![img', 'a & b',
|
||||
// Canon inline-extension triggers this same package introduces (F5): math
|
||||
// `$`, highlight `==`, footnote `^[` — an unescaped one turns the alt into
|
||||
// a math/highlight/footnote node on import.
|
||||
'x $A$ y', '5$ and 10$', 'use ==bold==', '^[fn]', 'cost $5 == price',
|
||||
]) {
|
||||
const md1 = convertProseMirrorToMarkdown(image({ alt }));
|
||||
const back = await markdownToProseMirror(md1);
|
||||
const img = findImage(back);
|
||||
expect(img).toBeTruthy(); // image node did NOT vanish
|
||||
expect(img.attrs.alt).toBe(alt); // exact alt preserved
|
||||
expect(convertProseMirrorToMarkdown(back)).toBe(md1); // byte-stable
|
||||
}
|
||||
});
|
||||
|
||||
it('align "center" (the default) emits a bare image, NO comment, round-trips to center', async () => {
|
||||
const { md, img } = await roundTrip(image({ align: 'center' }));
|
||||
expect(md).toBe('');
|
||||
expect(md).not.toContain('<!--img');
|
||||
expect(img.attrs.align).toBe('center');
|
||||
});
|
||||
|
||||
it('a null align emits a bare image and re-imports as the "center" default', async () => {
|
||||
const { md, img } = await roundTrip(image({ align: null }));
|
||||
expect(md).toBe('');
|
||||
expect(img.attrs.align).toBe('center');
|
||||
});
|
||||
|
||||
it('align "left" emits an img-comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ align: 'left' }));
|
||||
expect(md).toBe(' <!--img {"align":"left"}-->');
|
||||
expect(img.attrs.align).toBe('left');
|
||||
});
|
||||
|
||||
it('align "right" emits an img-comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ align: 'right' }));
|
||||
expect(md).toBe(' <!--img {"align":"right"}-->');
|
||||
expect(img.attrs.align).toBe('right');
|
||||
});
|
||||
|
||||
it('width alone emits a single-key comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ width: '420' }));
|
||||
expect(md).toBe(' <!--img {"width":"420"}-->');
|
||||
expect(img.attrs.width).toBe('420');
|
||||
});
|
||||
|
||||
it('height alone emits a single-key comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ height: '300' }));
|
||||
expect(md).toBe(' <!--img {"height":"300"}-->');
|
||||
expect(img.attrs.height).toBe('300');
|
||||
});
|
||||
|
||||
it('size alone emits a single-key comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ size: '48' }));
|
||||
expect(md).toBe(' <!--img {"size":"48"}-->');
|
||||
expect(img.attrs.size).toBe('48');
|
||||
});
|
||||
|
||||
it('aspectRatio alone emits a single-key comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ aspectRatio: '1.777' }));
|
||||
expect(md).toBe(' <!--img {"aspectRatio":"1.777"}-->');
|
||||
expect(img.attrs.aspectRatio).toBe('1.777');
|
||||
});
|
||||
|
||||
it('attachmentId (the file link — data-loss critical) rides in the comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ attachmentId: 'att-777' }));
|
||||
expect(md).toBe(' <!--img {"attachmentId":"att-777"}-->');
|
||||
expect(img.attrs.attachmentId).toBe('att-777');
|
||||
});
|
||||
|
||||
it('caption rides in the comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ caption: 'Рис. 1' }));
|
||||
expect(md).toBe(' <!--img {"caption":"Рис. 1"}-->');
|
||||
expect(img.attrs.caption).toBe('Рис. 1');
|
||||
});
|
||||
|
||||
it('title rides in the comment and round-trips', async () => {
|
||||
const { md, img } = await roundTrip(image({ title: 'a tooltip' }));
|
||||
expect(md).toBe(' <!--img {"title":"a tooltip"}-->');
|
||||
expect(img.attrs.title).toBe('a tooltip');
|
||||
});
|
||||
|
||||
it('multiple attrs at once appear in the stable key order and round-trip', async () => {
|
||||
const { md, img } = await roundTrip(
|
||||
image({
|
||||
alt: 'схема',
|
||||
width: '420',
|
||||
height: '300',
|
||||
align: 'left',
|
||||
size: '48',
|
||||
aspectRatio: '1.5',
|
||||
attachmentId: 'att-1',
|
||||
caption: 'Рис. 1',
|
||||
title: 'tip',
|
||||
}),
|
||||
);
|
||||
// Stable order: width, height, align, size, aspectRatio, attachmentId, caption, title.
|
||||
expect(md).toBe(
|
||||
' <!--img {"width":"420","height":"300","align":"left","size":"48","aspectRatio":"1.5","attachmentId":"att-1","caption":"Рис. 1","title":"tip"}-->',
|
||||
);
|
||||
expect(img.attrs.width).toBe('420');
|
||||
expect(img.attrs.height).toBe('300');
|
||||
expect(img.attrs.align).toBe('left');
|
||||
expect(img.attrs.size).toBe('48');
|
||||
expect(img.attrs.aspectRatio).toBe('1.5');
|
||||
expect(img.attrs.attachmentId).toBe('att-1');
|
||||
expect(img.attrs.caption).toBe('Рис. 1');
|
||||
expect(img.attrs.title).toBe('tip');
|
||||
});
|
||||
|
||||
// MANDATORY (#293 canon #4): a caption containing the comment-closing `-->`
|
||||
// must be encoded so it can never break the HTML comment; JSON.parse restores
|
||||
// it byte-exact on import.
|
||||
it('a caption containing `-->` is escaped, does not break the comment, and round-trips byte-exact', async () => {
|
||||
const caption = 'see --> here';
|
||||
const { md, img } = await roundTrip(image({ caption }));
|
||||
// The `--` pair is defused as the JSON unicode escape, so the literal
|
||||
// caption text is NOT present verbatim and the comment cannot close early.
|
||||
expect(md).toContain('\\u002d\\u002d');
|
||||
expect(md).not.toContain('see --> here');
|
||||
// The comment still closes exactly once, at the very end.
|
||||
expect(md.endsWith('-->')).toBe(true);
|
||||
// Restored byte-exact on re-import.
|
||||
expect(img.attrs.caption).toBe('see --> here');
|
||||
});
|
||||
|
||||
// A whole raw comment-closer as the caption is the adversarial edge.
|
||||
it('a caption that IS `-->` round-trips byte-exact', async () => {
|
||||
const { img } = await roundTrip(image({ caption: '-->' }));
|
||||
expect(img.attrs.caption).toBe('-->');
|
||||
});
|
||||
|
||||
it('an image INSIDE a column keeps the raw <img> form (no img-comment) and round-trips', async () => {
|
||||
const source = doc({
|
||||
type: 'columns',
|
||||
attrs: { layout: 'two' },
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
attrs: { src: '/i.png', alt: 'c', width: '320', align: 'left', attachmentId: 'att-9' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'column', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] }] },
|
||||
],
|
||||
});
|
||||
const md = convertProseMirrorToMarkdown(source);
|
||||
expect(md).toContain('<img');
|
||||
expect(md).not.toContain('<!--img');
|
||||
const back = await markdownToProseMirror(md);
|
||||
const img = findImage(back);
|
||||
expect(img).toBeTruthy();
|
||||
expect(img.attrs.width).toBe('320');
|
||||
expect(img.attrs.align).toBe('left');
|
||||
expect(img.attrs.attachmentId).toBe('att-9');
|
||||
});
|
||||
|
||||
// ---- Fail-open behavior ---------------------------------------------------
|
||||
|
||||
it('malformed JSON in an img-comment is ignored; the image keeps default attrs (no crash)', async () => {
|
||||
const back = await markdownToProseMirror(' <!--img {bad-->');
|
||||
const img = findImage(back);
|
||||
expect(img).toBeTruthy();
|
||||
expect(img.attrs.width).toBeNull();
|
||||
expect(img.attrs.align).toBe('center'); // default
|
||||
});
|
||||
|
||||
it('a STANDALONE img-comment (no adjacent <img>) is inert — no image materialized', async () => {
|
||||
const back = await markdownToProseMirror('<!--img {"width":10}-->');
|
||||
expect(findImage(back)).toBeNull();
|
||||
});
|
||||
|
||||
it('unknown keys in a valid img-comment are ignored; the image is otherwise default', async () => {
|
||||
const back = await markdownToProseMirror(' <!--img {"zzz":1}-->');
|
||||
const img = findImage(back);
|
||||
expect(img).toBeTruthy();
|
||||
expect(img.attrs.width).toBeNull();
|
||||
expect(img.attrs.align).toBe('center');
|
||||
expect((img.attrs as any).zzz).toBeUndefined();
|
||||
});
|
||||
|
||||
it('NUMERIC sizing attrs serialize as strings and round-trip byte-stably', () => {
|
||||
// The import side reads DOM attributes back as strings, so a numeric source
|
||||
// value must be stringified in the payload or the first round-trip churns
|
||||
// `420 -> "420"` (a spurious one-time git diff). Assert the emitted string
|
||||
// form AND that a second export is byte-identical to the first.
|
||||
const d = image({ width: 420, height: 200, size: 80, aspectRatio: 1.5 });
|
||||
const md1 = convertProseMirrorToMarkdown(d);
|
||||
expect(md1).toBe(
|
||||
' <!--img {"width":"420","height":"200","size":"80","aspectRatio":"1.5"}-->',
|
||||
);
|
||||
return markdownToProseMirror(md1).then((back) => {
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md1); // byte-stable: no 420 -> "420" churn
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,194 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import DIRECTLY from src (NOT the docmost-client barrel, which pulls in
|
||||
// collaboration.ts and mutates the global DOM at import time), matching the
|
||||
// other converter unit tests.
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
import { standaloneCommentFor } from '../src/lib/attached-comment.js';
|
||||
|
||||
// #293 canon decision #5: `subpages` and `pageBreak` serialize as STANDALONE
|
||||
// HTML comments on their own line —
|
||||
// <!--subpages--> <!--subpages {"recursive":true}--> <!--pagebreak-->
|
||||
// — invisible in any markdown renderer, yet round-tripping (the importer
|
||||
// materializes them back into the block atom before generateJSON drops the
|
||||
// comment). Position determines legality: they are honored ONLY standalone; a
|
||||
// comment attached after visible text is INERT. Inside a raw-HTML container
|
||||
// (columns/cells) the DOM parse stage discards comment nodes, so there the
|
||||
// schema `<div data-type="...">` form is emitted instead. These tests assert the
|
||||
// EXACT emitted markdown and a lossless round trip (non-vacuous).
|
||||
|
||||
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
|
||||
const text = (t: string) => ({ type: 'text', text: t });
|
||||
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
|
||||
|
||||
// Recursively collect every node type present in a doc.
|
||||
const collectTypes = (n: any, set = new Set<string>()): Set<string> => {
|
||||
if (!n || typeof n !== 'object') return set;
|
||||
if (n.type) set.add(n.type);
|
||||
if (Array.isArray(n.content)) n.content.forEach((c: any) => collectTypes(c, set));
|
||||
return set;
|
||||
};
|
||||
|
||||
// Find the subpages node anywhere in a doc (for attribute assertions).
|
||||
const findNode = (n: any, type: string): any => {
|
||||
if (!n || typeof n !== 'object') return undefined;
|
||||
if (n.type === type) return n;
|
||||
if (Array.isArray(n.content)) {
|
||||
for (const c of n.content) {
|
||||
const hit = findNode(c, type);
|
||||
if (hit) return hit;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
};
|
||||
|
||||
describe('standaloneCommentFor primitive (#293 #5)', () => {
|
||||
it('emits a name-only comment when there are no attrs', () => {
|
||||
expect(standaloneCommentFor('pagebreak')).toBe('<!--pagebreak-->');
|
||||
expect(standaloneCommentFor('subpages')).toBe('<!--subpages-->');
|
||||
expect(standaloneCommentFor('subpages', {})).toBe('<!--subpages-->');
|
||||
expect(standaloneCommentFor('subpages', null)).toBe('<!--subpages-->');
|
||||
});
|
||||
|
||||
it('emits a compact JSON body when attrs are present', () => {
|
||||
expect(standaloneCommentFor('subpages', { recursive: true })).toBe(
|
||||
'<!--subpages {"recursive":true}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('shares the attached encoder `--` escaping (payload cannot close early)', () => {
|
||||
const s = standaloneCommentFor('subpages', { note: 'a--b' });
|
||||
expect(s).toBe('<!--subpages {"note":"a\\u002d\\u002db"}-->');
|
||||
// No premature `--` inside the payload -> the comment cannot terminate early.
|
||||
expect(s.slice('<!--'.length, -'-->'.length)).not.toContain('--');
|
||||
});
|
||||
});
|
||||
|
||||
describe('subpages standalone serialization (#293 #5)', () => {
|
||||
it('default subpages -> exactly <!--subpages-->', () => {
|
||||
expect(convertProseMirrorToMarkdown(doc({ type: 'subpages' }))).toBe('<!--subpages-->');
|
||||
});
|
||||
|
||||
it('markdown <!--subpages--> -> a subpages node, byte-stable re-export', async () => {
|
||||
const md = '<!--subpages-->';
|
||||
const doc2 = await markdownToProseMirror(md);
|
||||
expect(collectTypes(doc2).has('subpages')).toBe(true);
|
||||
expect(convertProseMirrorToMarkdown(doc2)).toBe(md);
|
||||
});
|
||||
|
||||
it('recursive subpages -> <!--subpages {"recursive":true}--> and round-trips recursive:true', async () => {
|
||||
const md = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'subpages', attrs: { recursive: true } }),
|
||||
);
|
||||
expect(md).toBe('<!--subpages {"recursive":true}-->');
|
||||
const doc2 = await markdownToProseMirror(md);
|
||||
const node = findNode(doc2, 'subpages');
|
||||
expect(node).toBeTruthy();
|
||||
expect(node.attrs.recursive).toBe(true);
|
||||
// Byte-stable second export closes the loop.
|
||||
expect(convertProseMirrorToMarkdown(doc2)).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pageBreak standalone serialization (#293 #5)', () => {
|
||||
it('pageBreak -> exactly <!--pagebreak-->', () => {
|
||||
expect(convertProseMirrorToMarkdown(doc({ type: 'pageBreak' }))).toBe('<!--pagebreak-->');
|
||||
});
|
||||
|
||||
it('markdown <!--pagebreak--> round-trips to a pageBreak node, byte-stable', async () => {
|
||||
const md = '<!--pagebreak-->';
|
||||
const doc2 = await markdownToProseMirror(md);
|
||||
expect(collectTypes(doc2).has('pageBreak')).toBe(true);
|
||||
expect(convertProseMirrorToMarkdown(doc2)).toBe(md);
|
||||
});
|
||||
});
|
||||
|
||||
describe('subpages inside a column uses the div-form, not a comment (#293 #5)', () => {
|
||||
// A column is a raw-HTML block: the DOM parse stage discards comment nodes, so
|
||||
// a comment inside it would silently vanish. The converter MUST emit the
|
||||
// schema div-form there instead.
|
||||
const columnsDoc = doc({
|
||||
type: 'columns',
|
||||
content: [
|
||||
{ type: 'column', attrs: { width: '50%' }, content: [{ type: 'subpages' }] },
|
||||
{ type: 'column', attrs: { width: '50%' }, content: [para(text('side'))] },
|
||||
],
|
||||
});
|
||||
|
||||
it('serializes the column subpages as <div data-type="subpages">, not <!--subpages-->', () => {
|
||||
const md = convertProseMirrorToMarkdown(columnsDoc);
|
||||
expect(md).toContain('data-type="subpages"');
|
||||
// The bare standalone comment must NOT appear inside the raw-HTML column.
|
||||
expect(md).not.toContain('<!--subpages-->');
|
||||
});
|
||||
|
||||
it('round-trips back to a subpages node still inside a column', async () => {
|
||||
const md = convertProseMirrorToMarkdown(columnsDoc);
|
||||
const doc2 = await markdownToProseMirror(md);
|
||||
const column = findNode(doc2, 'column');
|
||||
expect(column).toBeTruthy();
|
||||
expect(collectTypes(column).has('subpages')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('position legality / fail-open (#293 #5)', () => {
|
||||
it('an ATTACHED <!--subpages--> after paragraph text is INERT (no subpages node)', async () => {
|
||||
const doc2 = await markdownToProseMirror('para text <!--subpages-->');
|
||||
expect(collectTypes(doc2).has('subpages')).toBe(false);
|
||||
// The paragraph text survives, and no comment marker leaks into the body.
|
||||
expect(JSON.stringify(doc2)).not.toContain('<!--');
|
||||
});
|
||||
|
||||
it('a malformed <!--subpages {bad--> is INERT (no crash, no subpages node)', async () => {
|
||||
const doc2 = await markdownToProseMirror('<!--subpages {bad-->');
|
||||
expect(collectTypes(doc2).has('subpages')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('multi-node document order across standalone comments (#293 #5)', () => {
|
||||
// The riskiest part of the parser change: a LEADING standalone comment is
|
||||
// parsed at document level (outside <body>) and must be re-inserted into the
|
||||
// body in document order, interleaved correctly with real block content. A
|
||||
// MID-document comment (pageBreak here) exercises the in-body branch. This
|
||||
// locks the ordering the review flagged as covered only by manual checks.
|
||||
const topTypes = (d: any) => (d.content || []).map((n: any) => n.type);
|
||||
|
||||
it('leading + mid + trailing standalone comments keep document order', async () => {
|
||||
const d = doc(
|
||||
{ type: 'subpages' }, // leading -> parsed at document level
|
||||
para(text('a')),
|
||||
{ type: 'pageBreak' }, // mid -> parsed in-body
|
||||
para(text('b')),
|
||||
);
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe('<!--subpages-->\n\na\n\n<!--pagebreak-->\n\nb');
|
||||
const d2 = await markdownToProseMirror(md);
|
||||
// Order must be preserved exactly, not just membership.
|
||||
expect(topTypes(d2)).toEqual([
|
||||
'subpages',
|
||||
'paragraph',
|
||||
'pageBreak',
|
||||
'paragraph',
|
||||
]);
|
||||
// And byte-stable on re-export.
|
||||
expect(convertProseMirrorToMarkdown(d2)).toBe(md);
|
||||
});
|
||||
|
||||
it('two leading standalone comments keep their relative order', async () => {
|
||||
const d = doc({ type: 'subpages' }, { type: 'pageBreak' }, para(text('x')));
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe('<!--subpages-->\n\n<!--pagebreak-->\n\nx');
|
||||
const d2 = await markdownToProseMirror(md);
|
||||
expect(topTypes(d2)).toEqual(['subpages', 'pageBreak', 'paragraph']);
|
||||
expect(convertProseMirrorToMarkdown(d2)).toBe(md);
|
||||
});
|
||||
|
||||
it('a trailing standalone comment stays last', async () => {
|
||||
const d = doc(para(text('x')), { type: 'subpages' });
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toBe('x\n\n<!--subpages-->');
|
||||
const d2 = await markdownToProseMirror(md);
|
||||
expect(topTypes(d2)).toEqual(['paragraph', 'subpages']);
|
||||
expect(convertProseMirrorToMarkdown(d2)).toBe(md);
|
||||
});
|
||||
});
|
||||
+39
-33
@@ -36,29 +36,30 @@ async function roundTrip(node: any): Promise<{ md1: string; doc2: any; md2: stri
|
||||
// existing documented `it.fails` bugs in markdown-roundtrip.property.test.ts).
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('pageBreak data loss (no converter case — SPEC §11 divergence)', () => {
|
||||
it('exports a pageBreak node to the schema-matching block div', () => {
|
||||
// FIXED: a standalone pageBreak now emits the block-level HTML div so the
|
||||
// node survives instead of being erased to "".
|
||||
it('exports a pageBreak node to the standalone comment (#293 #5)', () => {
|
||||
// #293 canon #5: a standalone pageBreak now serializes as the readable,
|
||||
// renderer-invisible comment `<!--pagebreak-->` (re-materialized on import),
|
||||
// instead of the earlier raw <div> block.
|
||||
expect(convertProseMirrorToMarkdown(doc({ type: 'pageBreak' }))).toBe(
|
||||
'<div data-type="pageBreak"></div>',
|
||||
'<!--pagebreak-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('keeps a pageBreak sitting BETWEEN two paragraphs on export', () => {
|
||||
// FIXED: with surrounding content the divider is emitted as its own block
|
||||
// With surrounding content the divider is emitted as its own comment line
|
||||
// between the two paragraphs (joined by the doc "\n\n"), no longer dropped.
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para(text('before')), { type: 'pageBreak' }, para(text('after'))),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'before\n\n<div data-type="pageBreak"></div>\n\nafter',
|
||||
'before\n\n<!--pagebreak-->\n\nafter',
|
||||
);
|
||||
expect(out).toContain('pageBreak');
|
||||
expect(out).toContain('<!--pagebreak-->');
|
||||
});
|
||||
|
||||
// FIXED: a pageBreak node now survives an export -> import -> export cycle
|
||||
// because the FIRST export emits the schema-matching block div, which marked
|
||||
// passes through and generateJSON rebuilds into a pageBreak node again.
|
||||
// because the FIRST export emits the standalone comment, which the importer
|
||||
// materializes back into a pageBreak node again.
|
||||
it('a pageBreak node round-trips (export -> import yields a pageBreak)', async () => {
|
||||
const { md1, doc2 } = await roundTrip({ type: 'pageBreak' });
|
||||
expect(md1).not.toBe('');
|
||||
@@ -68,18 +69,18 @@ describe('pageBreak data loss (no converter case — SPEC §11 divergence)', ()
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. subpages round-trip (`case "subpages"` emits the schema-matching div).
|
||||
// 2. subpages round-trip (#293 #5 standalone comment).
|
||||
//
|
||||
// It used to emit the literal `{{SUBPAGES}}`, which has no markdown/HTML meaning,
|
||||
// so on re-import the subpages BLOCK came back as a plain PARAGRAPH carrying the
|
||||
// literal string (the embed rendered as visible "{{SUBPAGES}}" text on the page
|
||||
// after a sync — data loss). It now emits `<div data-type="subpages">` like the
|
||||
// other embed nodes, so the schema's parseHTML rebuilds the subpages node.
|
||||
// after a sync — data loss). Per canon #5 it now emits the standalone comment
|
||||
// `<!--subpages-->`, which the importer materializes back into a subpages node.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('subpages round-trip (schema-matching div)', () => {
|
||||
it('emits the subpages div and re-imports as a subpages node (no literal leak)', async () => {
|
||||
describe('subpages round-trip (standalone comment #293 #5)', () => {
|
||||
it('emits the subpages comment and re-imports as a subpages node (no literal leak)', async () => {
|
||||
const { md1, doc2 } = await roundTrip({ type: 'subpages' });
|
||||
expect(md1).toBe('<div data-type="subpages"></div>');
|
||||
expect(md1).toBe('<!--subpages-->');
|
||||
|
||||
const collect = (n: any): string[] => [
|
||||
n.type,
|
||||
@@ -477,22 +478,24 @@ describe('converter gap coverage — documented round-trip data loss (specs 12
|
||||
expect(docsCanonicallyEqual(d, doc2)).toBe(false);
|
||||
});
|
||||
|
||||
// 14. The image emitter drops the title attribute (silently lost on round-trip).
|
||||
it('an image title attribute is dropped on export and lost on re-import', async () => {
|
||||
// 14. #293 canon #4: the image title now round-trips via the attached
|
||||
// `<!--img {…}-->` comment (previously silently dropped).
|
||||
it('an image title attribute round-trips via the attached img-comment', async () => {
|
||||
const d = doc({
|
||||
type: 'image',
|
||||
attrs: { src: '/i.png', alt: 'a', title: 't"q' },
|
||||
});
|
||||
const md1 = convertProseMirrorToMarkdown(d);
|
||||
expect(md1).toBe(''); // no title, no quotes
|
||||
// The quote in the title is JSON-escaped inside the comment payload.
|
||||
expect(md1).toBe(' <!--img {"title":"t\\"q"}-->');
|
||||
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
const img = (doc2.content || []).find((n: any) => n.type === 'image');
|
||||
expect(img).toBeTruthy();
|
||||
expect(img.attrs?.title).toBeNull(); // the original 't"q' was dropped
|
||||
expect(img.attrs?.title).toBe('t"q'); // restored byte-exact
|
||||
expect(img.attrs?.src).toBe('/i.png');
|
||||
expect(img.attrs?.alt).toBe('a');
|
||||
expect(docsCanonicallyEqual(d, doc2)).toBe(false);
|
||||
expect(docsCanonicallyEqual(d, doc2)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -506,8 +509,10 @@ describe('converter gap coverage — raw-HTML container round-trips (specs 15–
|
||||
attrs: { src: '/i.png', alt: 'cap', width: 320, align: 'center' },
|
||||
}),
|
||||
);
|
||||
// #293 canon #4: image align default is unified to "center", so a center
|
||||
// image inside a column no longer emits a redundant align="center".
|
||||
expect(md1).toBe(
|
||||
'<div data-type="columns" data-layout="two"><div data-type="column"><img src="/i.png" alt="cap" width="320" align="center"></div></div>',
|
||||
'<div data-type="columns" data-layout="two"><div data-type="column"><img src="/i.png" alt="cap" width="320"></div></div>',
|
||||
);
|
||||
expect(md2).toBe(md1);
|
||||
expect(colChildOf(doc2)?.type).toBe('image');
|
||||
@@ -786,11 +791,11 @@ describe('converter gap coverage — raw-HTML container round-trips (specs 15–
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
// 30. heading.textAlign round-trip (A1). The paragraph case already exports a
|
||||
// non-default alignment as a styled `<p style="text-align:…">` that re-parses
|
||||
// losslessly; headings used to emit only the bare `## text` form, silently
|
||||
// DROPPING textAlign on export. The heading case is now symmetric: an aligned
|
||||
// heading exports as `<hN style="text-align:…">` and re-parses back to a heading
|
||||
// 30. heading.textAlign round-trip (A1). Bare `## text` markdown carries no
|
||||
// alignment, so an aligned heading used to silently DROP textAlign on export.
|
||||
// Per #293 canon #9 an aligned heading now keeps the readable `## text` form and
|
||||
// ATTACHES a trailing `<!--attrs {"textAlign":…}-->` comment (replacing the old
|
||||
// `<hN style="text-align:…">` HTML form). It re-parses back to a heading
|
||||
// carrying BOTH the level and the textAlign, so the round-trip is lossless; an
|
||||
// UNaligned heading still emits the bare `## text` markdown form (no churn).
|
||||
// ===========================================================================
|
||||
@@ -801,21 +806,22 @@ const alignedHeading = (level: number, align: string, ...inline: any[]) => ({
|
||||
});
|
||||
|
||||
describe('heading.textAlign round-trip (A1)', () => {
|
||||
it('an aligned heading exports as <hN style="text-align:…"> (not bare ##)', () => {
|
||||
it('an aligned heading keeps "## text" and attaches a <!--attrs--> comment (#293 #9)', () => {
|
||||
expect(convertProseMirrorToMarkdown(doc(alignedHeading(2, 'center', text('Title'))))).toBe(
|
||||
'<h2 style="text-align:center">Title</h2>',
|
||||
'## Title <!--attrs {"textAlign":"center"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('survives export -> import -> export losslessly (level AND textAlign preserved)', async () => {
|
||||
const input = alignedHeading(2, 'center', text('Title'));
|
||||
const { md1, doc2, md2 } = await roundTrip(input);
|
||||
// Export direction: a styled <hN>, injection-safe via escapeAttr.
|
||||
expect(md1).toBe('<h2 style="text-align:center">Title</h2>');
|
||||
// Export direction: `## Title` plus the attached alignment comment (#293 #9).
|
||||
expect(md1).toBe('## Title <!--attrs {"textAlign":"center"}-->');
|
||||
// Import direction: re-parses to a heading node with the level AND textAlign
|
||||
// (the raw <hN style> HTML block flows through marked -> generateJSON, where
|
||||
// the heading parse rule matches and the textAlign global attr reads the
|
||||
// style back). Byte-stable second export closes the loop.
|
||||
// (marked keeps the comment inside the <h2>; applyAttachedComments re-expresses
|
||||
// it as an inline style before generateJSON, where the heading parse rule
|
||||
// matches and the textAlign global attr reads it back). Byte-stable second
|
||||
// export closes the loop.
|
||||
const h = doc2.content[0];
|
||||
expect(h.type).toBe('heading');
|
||||
expect(h.attrs.level).toBe(2);
|
||||
+73
-56
@@ -51,43 +51,49 @@ describe('columns / column (raw-HTML layout wrapper)', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('embed / audio / pdf (previously emitted nothing — invisible regression)', () => {
|
||||
it('embed emits div[data-type="embed"] with src/provider', () => {
|
||||
describe('embed / audio / pdf top-level md-form + discriminator (#293 #8)', () => {
|
||||
it('embed emits link-form [provider](src) + bare discriminator (defaults omitted)', () => {
|
||||
// provider is the visible link text; align/width/height are all at their
|
||||
// schema defaults (center/800/600), so the comment is name-only.
|
||||
expect(c({ type: 'embed', attrs: { src: 'https://x.com/e', provider: 'iframe' } })).toBe(
|
||||
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe"></div>',
|
||||
'[iframe](https://x.com/e)<!--embed-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('audio emits a div-wrapped <audio> with src', () => {
|
||||
expect(c({ type: 'audio', attrs: { src: '/a.mp3' } })).toBe(
|
||||
'<div><audio src="/a.mp3"></audio></div>',
|
||||
);
|
||||
it('audio emits image-form  + bare discriminator', () => {
|
||||
expect(c({ type: 'audio', attrs: { src: '/a.mp3' } })).toBe('<!--audio-->');
|
||||
});
|
||||
|
||||
it('pdf emits div[data-type="pdf"] with src and name', () => {
|
||||
it('pdf emits link-form [name](src) + bare discriminator', () => {
|
||||
expect(c({ type: 'pdf', attrs: { src: '/d.pdf', name: 'd.pdf' } })).toBe(
|
||||
'<div data-type="pdf" src="/d.pdf" data-name="d.pdf"></div>',
|
||||
'[d.pdf](/d.pdf)<!--pdf-->',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('drawio / excalidraw data-align asymmetry (SPEC §11)', () => {
|
||||
it('drawio: data-align is ABSENT when align is unset', () => {
|
||||
describe('drawio / excalidraw align emission in the discriminator comment (#293 #8)', () => {
|
||||
it('drawio: NO align key when align is unset (bare discriminator)', () => {
|
||||
const out = c({ type: 'drawio', attrs: { src: '/d.drawio' } });
|
||||
expect(out).toBe('<div data-type="drawio" data-src="/d.drawio"></div>');
|
||||
expect(out).not.toContain('data-align');
|
||||
expect(out).toBe('<!--drawio-->');
|
||||
expect(out).not.toContain('align');
|
||||
});
|
||||
|
||||
it('drawio: data-align is PRESENT for a non-default align', () => {
|
||||
it('drawio: an "align" key IS present for a non-default align', () => {
|
||||
expect(c({ type: 'drawio', attrs: { src: '/d.drawio', align: 'right' } })).toBe(
|
||||
'<div data-type="drawio" data-src="/d.drawio" data-align="right"></div>',
|
||||
'<!--drawio {"align":"right"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('excalidraw: data-align is ABSENT when align is unset', () => {
|
||||
it('drawio: the default align "center" is OMITTED (byte-stable image-form parity)', () => {
|
||||
const out = c({ type: 'drawio', attrs: { src: '/d.drawio', align: 'center' } });
|
||||
expect(out).toBe('<!--drawio-->');
|
||||
expect(out).not.toContain('align');
|
||||
});
|
||||
|
||||
it('excalidraw: NO align key when align is unset (bare discriminator)', () => {
|
||||
const out = c({ type: 'excalidraw', attrs: { src: '/e.excalidraw' } });
|
||||
expect(out).toBe('<div data-type="excalidraw" data-src="/e.excalidraw"></div>');
|
||||
expect(out).not.toContain('data-align');
|
||||
expect(out).toBe('<!--excalidraw-->');
|
||||
expect(out).not.toContain('align');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -97,7 +103,9 @@ describe('inline-mark matrix (underline/sub/sup/highlight±color/textStyle/comme
|
||||
[[{ type: 'underline' }], '<u>m</u>'],
|
||||
[[{ type: 'subscript' }], '<sub>m</sub>'],
|
||||
[[{ type: 'superscript' }], '<sup>m</sup>'],
|
||||
[[{ type: 'highlight' }], '<mark>m</mark>'],
|
||||
// #293 canon #7: a no-color highlight now serializes as `==m==` (Obsidian
|
||||
// syntax); only a COLORED highlight keeps the `<mark style=…>` HTML form.
|
||||
[[{ type: 'highlight' }], '==m=='],
|
||||
[
|
||||
[{ type: 'highlight', attrs: { color: '#ff0000' } }],
|
||||
'<mark style="background-color: #ff0000">m</mark>',
|
||||
@@ -129,27 +137,26 @@ describe('inline-mark matrix (underline/sub/sup/highlight±color/textStyle/comme
|
||||
});
|
||||
});
|
||||
|
||||
describe('paragraph.textAlign -> <p style="text-align:...">', () => {
|
||||
it('non-default alignment emits an HTML <p style="text-align:...">', () => {
|
||||
// #7 fix: a non-default paragraph alignment now round-trips. It is exported
|
||||
// as an HTML `<p style="text-align:center">` (the schema's paragraph
|
||||
// parseHTML reads `style="text-align"` back onto `textAlign` on import), so
|
||||
// the alignment survives instead of collapsing to bare text. (The old
|
||||
// `<div align="center">` form was NOT re-parsed onto the paragraph and was
|
||||
// therefore lossy.)
|
||||
describe('paragraph.textAlign -> attached <!--attrs--> comment (#293 #9)', () => {
|
||||
it('non-default alignment emits a trailing <!--attrs {"textAlign":…}--> comment', () => {
|
||||
// #293 canon #9: a non-default paragraph alignment now round-trips as an
|
||||
// ATTACHED HTML comment at the END of the block line instead of the old
|
||||
// `<p style="text-align:center">` wrapper (which the maintainer had to patch
|
||||
// A14->A15->A16). The importer's applyAttachedComments step reads the comment
|
||||
// back onto `textAlign` before the DOM stage drops it.
|
||||
expect(c({ type: 'paragraph', attrs: { textAlign: 'center' }, content: [text('x')] })).toBe(
|
||||
'<p style="text-align:center">x</p>',
|
||||
'x <!--attrs {"textAlign":"center"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('textAlign "left" (the default) is NOT wrapped', () => {
|
||||
it('textAlign "left" (the default) emits NO comment', () => {
|
||||
expect(c({ type: 'paragraph', attrs: { textAlign: 'left' }, content: [text('x')] })).toBe('x');
|
||||
});
|
||||
});
|
||||
|
||||
describe('subpages token + unknown-in-container fallback', () => {
|
||||
it('subpages emits the schema-matching div (round-trips, unlike the old {{SUBPAGES}} literal)', () => {
|
||||
expect(c({ type: 'subpages' })).toBe('<div data-type="subpages"></div>');
|
||||
it('subpages emits the standalone comment (#293 #5, unlike the old {{SUBPAGES}} literal)', () => {
|
||||
expect(c({ type: 'subpages' })).toBe('<!--subpages-->');
|
||||
});
|
||||
|
||||
it('an unknown block inside a raw-HTML container is wrapped in <div> (never markdown)', () => {
|
||||
@@ -177,13 +184,20 @@ describe('subpages token + unknown-in-container fallback', () => {
|
||||
|
||||
describe('escaping idempotence (SPEC §11 phantom-diff guard)', () => {
|
||||
it('escapeAttr escapes ONLY & and " in an attribute context, and is idempotent', () => {
|
||||
// The mathBlock `text` attr goes through escapeAttr. & -> &, " -> ".
|
||||
const once = c({ type: 'mathBlock', attrs: { text: 'a & "b"' } });
|
||||
expect(once).toBe(
|
||||
// #293 canon #6: a TOP-LEVEL mathBlock now serializes as a `$$` fence, so
|
||||
// to exercise the schema-HTML `text` attr (which DOES go through escapeAttr)
|
||||
// we wrap the math in a COLUMN — the raw-HTML path keeps the `<div>` form.
|
||||
const col = (child: any) => ({
|
||||
type: 'columns',
|
||||
content: [{ type: 'column', content: [child] }],
|
||||
});
|
||||
// & -> &, " -> " in the attribute context.
|
||||
const once = c(col({ type: 'mathBlock', attrs: { text: 'a & "b"' } }));
|
||||
expect(once).toContain(
|
||||
'<div data-type="mathBlock" data-katex="true" text="a & "b""></div>',
|
||||
);
|
||||
// < and > are deliberately NOT escaped (would accumulate on round-trips).
|
||||
const angled = c({ type: 'mathBlock', attrs: { text: 'a < b > c' } });
|
||||
const angled = c(col({ type: 'mathBlock', attrs: { text: 'a < b > c' } }));
|
||||
expect(angled).toContain('text="a < b > c"');
|
||||
expect(angled).not.toContain('<');
|
||||
expect(angled).not.toContain('>');
|
||||
@@ -248,7 +262,10 @@ describe('empty / single-column tables', () => {
|
||||
// orderedList and a hardBreak inside a column.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('media / attachment / container full-attribute golden coverage', () => {
|
||||
it('video: emits all optional attrs in source order (alt->aria-label, attachmentId/size/align/aspectRatio->data-*)', () => {
|
||||
it('video: emits all optional attrs in the comment JSON in stable order (align center omitted)', () => {
|
||||
// #293 canon #8 image-form: src in the target, all OTHER non-default attrs in
|
||||
// the comment JSON (stable order alt/attachmentId/width/height/size/
|
||||
// aspectRatio; align="center" is the default and is omitted).
|
||||
expect(
|
||||
c({
|
||||
type: 'video',
|
||||
@@ -264,50 +281,49 @@ describe('media / attachment / container full-attribute golden coverage', () =>
|
||||
},
|
||||
}),
|
||||
).toBe(
|
||||
'<div><video src="/v.mp4" aria-label="clip" data-attachment-id="att-1" width="640" height="480" data-size="1234" data-align="center" data-aspect-ratio="1.777"></video></div>',
|
||||
'<!--video {"alt":"clip","attachmentId":"att-1","width":"640","height":"480","size":"1234","aspectRatio":"1.777"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('video: with only src, every optional guard takes its false branch (src-only <video>, no data-type on wrapper)', () => {
|
||||
expect(c({ type: 'video', attrs: { src: '/v.mp4' } })).toBe(
|
||||
'<div><video src="/v.mp4"></video></div>',
|
||||
);
|
||||
it('video: with only src, the discriminator is still emitted name-only (bare <!--video-->)', () => {
|
||||
expect(c({ type: 'video', attrs: { src: '/v.mp4' } })).toBe('<!--video-->');
|
||||
});
|
||||
|
||||
it('youtube + embed: each emits its full optional attr set in source order', () => {
|
||||
// (a) youtube: width/height/align all present -> data-* in order.
|
||||
it('youtube + embed: each emits its full optional attr set in the discriminator comment', () => {
|
||||
// (a) youtube (image-form): width/height/align(right) in the comment JSON.
|
||||
expect(
|
||||
c({
|
||||
type: 'youtube',
|
||||
attrs: { src: 'https://youtu.be/abc', width: 560, height: 315, align: 'right' },
|
||||
}),
|
||||
).toBe(
|
||||
'<div data-type="youtube" data-src="https://youtu.be/abc" data-width="560" data-height="315" data-align="right"></div>',
|
||||
'<!--youtube {"width":"560","height":"315","align":"right"}-->',
|
||||
);
|
||||
// (b) embed: align/width/height optional branches after src+provider.
|
||||
// (b) embed (link-form): provider is the visible text; a non-default align/
|
||||
// width/height (left/600/400 — the defaults are center/800/600) ride in JSON.
|
||||
expect(
|
||||
c({
|
||||
type: 'embed',
|
||||
attrs: { src: 'https://x.com/e', provider: 'iframe', align: 'left', width: 600, height: 400 },
|
||||
}),
|
||||
).toBe(
|
||||
'<div data-type="embed" data-src="https://x.com/e" data-provider="iframe" data-align="left" data-width="600" data-height="400"></div>',
|
||||
'[iframe](https://x.com/e)<!--embed {"align":"left","width":"600","height":"400"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('audio: emits data-attachment-id then data-size after src when both are set', () => {
|
||||
it('audio: emits attachmentId then size in the comment JSON when both are set', () => {
|
||||
expect(c({ type: 'audio', attrs: { src: '/a.mp3', attachmentId: 'att-7', size: 9001 } })).toBe(
|
||||
'<div><audio src="/a.mp3" data-attachment-id="att-7" data-size="9001"></audio></div>',
|
||||
'<!--audio {"attachmentId":"att-7","size":"9001"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('audio: with attachmentId but no size, data-size is suppressed (size != null false branch)', () => {
|
||||
it('audio: with attachmentId but no size, the size key is suppressed (size != null false branch)', () => {
|
||||
expect(c({ type: 'audio', attrs: { src: '/a.mp3', attachmentId: 'att-7' } })).toBe(
|
||||
'<div><audio src="/a.mp3" data-attachment-id="att-7"></audio></div>',
|
||||
'<!--audio {"attachmentId":"att-7"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('pdf: emits the full optional attr set in order (data-name, data-attachment-id, data-size, width, height)', () => {
|
||||
it('pdf: emits the full optional attr set in the comment JSON (attachmentId, size, width, height)', () => {
|
||||
expect(
|
||||
c({
|
||||
type: 'pdf',
|
||||
@@ -321,11 +337,11 @@ describe('media / attachment / container full-attribute golden coverage', () =>
|
||||
},
|
||||
}),
|
||||
).toBe(
|
||||
'<div data-type="pdf" src="/d.pdf" data-name="d.pdf" data-attachment-id="att-9" data-size="2048" width="800" height="600"></div>',
|
||||
'[d.pdf](/d.pdf)<!--pdf {"attachmentId":"att-9","size":"2048","width":"800","height":"600"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('attachment: emits data-attachment-name/mime/size/id in order after the always-present url', () => {
|
||||
it('attachment: emits mime/size/attachmentId in the comment JSON after the [name](url) target', () => {
|
||||
expect(
|
||||
c({
|
||||
type: 'attachment',
|
||||
@@ -338,13 +354,14 @@ describe('media / attachment / container full-attribute golden coverage', () =>
|
||||
},
|
||||
}),
|
||||
).toBe(
|
||||
'<div data-type="attachment" data-attachment-url="/f.zip" data-attachment-name="f.zip" data-attachment-mime="application/zip" data-attachment-size="512" data-attachment-id="att-3"></div>',
|
||||
'[f.zip](/f.zip)<!--attachment {"mime":"application/zip","size":"512","attachmentId":"att-3"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('attachment: with only a url, no spurious data-attachment-name/mime/size/id appear (all guards false)', () => {
|
||||
it('attachment: with only a url, the link text is empty and the discriminator is name-only', () => {
|
||||
// name is null -> empty visible text `[]`; no mime/size/id -> bare comment.
|
||||
expect(c({ type: 'attachment', attrs: { url: '/f.zip' } })).toBe(
|
||||
'<div data-type="attachment" data-attachment-url="/f.zip"></div>',
|
||||
'[](/f.zip)<!--attachment-->',
|
||||
);
|
||||
});
|
||||
|
||||
+19
-23
@@ -390,27 +390,26 @@ describe('convertProseMirrorToMarkdown', () => {
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('math', () => {
|
||||
it('inline math carries LaTeX in a text attr WITHOUT escaping < or >', () => {
|
||||
it('inline math serializes as $LaTeX$ (Obsidian-native), no HTML escaping', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc(para({ type: 'mathInline', attrs: { text: 'a < b' } })),
|
||||
);
|
||||
// < and > must NOT be HTML-escaped (idempotency); only & and " would be.
|
||||
expect(out).toBe(
|
||||
'<span data-type="mathInline" data-katex="true" text="a < b"></span>',
|
||||
);
|
||||
// #293 canon #6: readable `$…$` form; the LaTeX is verbatim (no HTML
|
||||
// attribute escaping of < or & in the fence form).
|
||||
expect(out).toBe('$a < b$');
|
||||
expect(out).not.toContain('<');
|
||||
expect(out).not.toContain('<span');
|
||||
});
|
||||
|
||||
it('block math carries LaTeX in a text attr WITHOUT escaping < or >', () => {
|
||||
it('block math serializes as a $$ fence on its own lines', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({ type: 'mathBlock', attrs: { text: 'x > y & z' } }),
|
||||
);
|
||||
// & IS escaped (entity-significant), but < and > are NOT.
|
||||
expect(out).toBe(
|
||||
'<div data-type="mathBlock" data-katex="true" text="x > y & z"></div>',
|
||||
);
|
||||
expect(out).not.toContain('<');
|
||||
expect(out).not.toContain('>');
|
||||
// #293 canon #6: `$$\n<latex>\n$$`. The LaTeX is verbatim inside the fence
|
||||
// (plain markdown, so & is NOT entity-escaped as it would be in an attr).
|
||||
expect(out).toBe('$$\nx > y & z\n$$');
|
||||
expect(out).not.toContain('&');
|
||||
expect(out).not.toContain('<div');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -430,33 +429,31 @@ describe('convertProseMirrorToMarkdown', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('attachment emits div with schema data-attachment-* attrs', () => {
|
||||
it('attachment emits link-form [name](url) + discriminator comment (#293 #8)', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'attachment',
|
||||
attrs: { url: '/files/x.zip', name: 'x.zip', mime: 'application/zip', size: 99 },
|
||||
}),
|
||||
);
|
||||
// #293 canon #8: url is the markdown target, name is the visible link text,
|
||||
// and every other attr rides in the ALWAYS-emitted `attachment` comment.
|
||||
expect(out).toBe(
|
||||
'<div data-type="attachment" data-attachment-url="/files/x.zip" ' +
|
||||
'data-attachment-name="x.zip" data-attachment-mime="application/zip" ' +
|
||||
'data-attachment-size="99"></div>',
|
||||
'[x.zip](/files/x.zip)<!--attachment {"mime":"application/zip","size":"99"}-->',
|
||||
);
|
||||
});
|
||||
|
||||
it('video emits a <div>-wrapped <video> with schema attrs', () => {
|
||||
it('video emits image-form  + discriminator comment (#293 #8)', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'video',
|
||||
attrs: { src: '/v.mp4', alt: 'clip', width: 640 },
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<div><video src="/v.mp4" aria-label="clip" width="640"></video></div>',
|
||||
);
|
||||
expect(out).toBe('<!--video {"alt":"clip","width":"640"}-->');
|
||||
});
|
||||
|
||||
it('youtube emits a div[data-type="youtube"] with data-src', () => {
|
||||
it('youtube emits image-form  + discriminator comment (#293 #8)', () => {
|
||||
const out = convertProseMirrorToMarkdown(
|
||||
doc({
|
||||
type: 'youtube',
|
||||
@@ -464,8 +461,7 @@ describe('convertProseMirrorToMarkdown', () => {
|
||||
}),
|
||||
);
|
||||
expect(out).toBe(
|
||||
'<div data-type="youtube" data-src="https://youtu.be/abc" ' +
|
||||
'data-width="560" data-height="315"></div>',
|
||||
'<!--youtube {"width":"560","height":"315"}-->',
|
||||
);
|
||||
});
|
||||
});
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user