/** * Pure, network-free transform primitives for a ProseMirror/TipTap document * tree, plus one higher-level orchestration (commentsToFootnotes). * * A ProseMirror node here is a plain JSON object of the shape produced by * Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the * `content` array; callouts, tables, lists all hold their children in * `content`, so a single recursive walk reaches them all. * * Conventions (matching node-ops.ts): * - functions that produce a new document deep-clone their input and return a * `{ doc, ... }` object; the caller's objects are never mutated. * - functions are defensively null-safe. * - `marks` arrays are preserved verbatim when fragments are split/reordered. */ import { blockPlainText } from "./node-ops.js"; /** Deep-clone a JSON-serializable value without mutating the original. */ function clone(value: T): T { if (typeof structuredClone === "function") { return structuredClone(value); } // Fallback for environments without structuredClone. return JSON.parse(JSON.stringify(value)) as T; } /** True if `value` is a non-null object (and not an array). */ function isObject(value: any): value is Record { return value != null && typeof value === "object" && !Array.isArray(value); } /** * Plain text of a node (re-export of node-ops' blockPlainText so transform * authors have a single import surface). Recurses through nested content. */ export function blockText(node: any): string { return blockPlainText(node); } /** * Depth-first visit of every node in the tree, including the root and the * nested content of callouts, tables, lists, etc. `fn` is called once per node. * Null-safe: a nullish or non-object node is ignored. */ export function walk(node: any, fn: (node: any) => void): void { if (!isObject(node)) return; fn(node); if (Array.isArray(node.content)) { for (const child of node.content) { walk(child, fn); } } } /** * Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree. * Works even when the node carries no `attrs.id` (it searches the raw tree, not * an id index). Returns the live node reference inside `doc` (NOT a clone), or * null when nothing matches. Typical use: `getList(doc, n => n.type === * "orderedList")`. */ export function getList( doc: any, predicate: (node: any) => boolean, ): any | null { let found: any | null = null; walk(doc, (node) => { if (found == null && predicate(node)) { found = node; } }); return found; } /** Options for insertMarkerAfter. */ export interface InsertMarkerOptions { /** * Limit the search to TOP-LEVEL blocks with index < beforeBlock. Used to keep * footnote markers in the body and out of the notes section. */ beforeBlock?: number; } /** * Insert `marker` as a PLAIN (unmarked) text run right after the first * occurrence of `anchor`. * * The text run that contains the END of the anchor is SPLIT at the anchor end, * so all existing marks (links, bold, ...) on the surrounding text are * preserved, while the inserted marker run carries NO marks. The marker is * inserted as a leading-space-padded run (`" " + marker`) so it visually * separates from the preceding word. * * The anchor is matched against the concatenated plain text of each top-level * block (so an anchor that spans several text/mark runs still matches). The * insertion happens inside the inline content array that holds the anchor's * final character. * * Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is * false when the anchor text was not found in any in-scope block. */ export function insertMarkerAfter( doc: any, anchor: string, marker: string, opts: InsertMarkerOptions = {}, ): { doc: any; inserted: boolean } { const out = clone(doc); if (!isObject(out) || !Array.isArray(out.content) || !anchor) { return { doc: out, inserted: false }; } const limit = typeof opts.beforeBlock === "number" ? Math.min(opts.beforeBlock, out.content.length) : out.content.length; for (let b = 0; b < limit; b++) { const block = out.content[b]; if (!isObject(block)) continue; // Quick reject: skip blocks whose plain text cannot contain the anchor. if (!blockPlainText(block).includes(anchor)) continue; // Walk the inline content arrays inside this block, tracking a running // character offset so we can locate the inline array + text run that holds // the END of the anchor's first occurrence. let inserted = false; let offset = 0; // characters of plain text seen so far in this block const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)(); // Recurse into inline-bearing containers (paragraph, heading, table cell, // callout child paragraphs, ...). We only split inside an array of inline // nodes (text/inline atoms); the FIRST array whose cumulative range covers // anchorEnd receives the split + marker. const visit = (container: any): void => { if (inserted || !isObject(container) || !Array.isArray(container.content)) { return; } const inline = container.content; // Detect whether this array is an inline array (contains text nodes). const hasText = inline.some( (n: any) => isObject(n) && n.type === "text", ); if (hasText) { for (let i = 0; i < inline.length; i++) { const n = inline[i]; const len = isObject(n) ? blockPlainText(n).length : 0; const runStart = offset; const runEnd = offset + len; // The run that contains the anchor end (anchorEnd lands inside this // run, i.e. runStart < anchorEnd <= runEnd) is the split point. if ( !inserted && isObject(n) && n.type === "text" && typeof n.text === "string" && anchorEnd > runStart && anchorEnd <= runEnd ) { const cut = anchorEnd - runStart; // split index within this text run const before = n.text.slice(0, cut); const after = n.text.slice(cut); const marks = Array.isArray(n.marks) ? n.marks : []; const parts: any[] = []; if (before.length > 0) { parts.push({ ...n, text: before, marks: [...marks] }); } // Marker is a PLAIN run: no marks copied. Leading space separates it. parts.push({ type: "text", text: " " + marker }); if (after.length > 0) { parts.push({ ...n, text: after, marks: [...marks] }); } inline.splice(i, 1, ...parts); inserted = true; return; } offset = runEnd; } } else { // Not an inline array: recurse into children (e.g. callout -> paragraph). for (const child of inline) { visit(child); if (inserted) return; } } }; visit(block); if (inserted) { return { doc: out, inserted: true }; } // If the block matched in plain text but we could not split (e.g. anchor // lands inside an atom), fall through to the next block rather than failing. } return { doc: out, inserted: false }; } /** * In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`. * * Docmost translations use a callout that states the footnote range, e.g. * "[1]…[5]". When the number of notes changes, this rewrites the trailing * number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a * callout's text nodes to `[1]…[n]`. Operates on a clone; returns * `{ doc, changed }` where `changed` is the number of text nodes rewritten. */ export function setCalloutRange( doc: any, n: number, ): { doc: any; changed: number } { const out = clone(doc); let changed = 0; // Match "[1]" + (… or ...) + "[]"; rewrite the last number to n. const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g; walk(out, (node) => { if (node.type === "callout") { walk(node, (inner) => { if ( inner.type === "text" && typeof inner.text === "string" && rangeRe.test(inner.text) ) { rangeRe.lastIndex = 0; inner.text = inner.text.replace(rangeRe, `$1${n}$2`); changed++; } rangeRe.lastIndex = 0; }); } }); return { doc: out, changed }; } /** * Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid; * a base36 random string is sufficient here (uniqueness within one document). */ function freshId(): string { return ( Math.random().toString(36).slice(2, 12) + Math.random().toString(36).slice(2, 6) ); } /** * Wrap inline ProseMirror nodes in a list item: * { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] } * with a fresh random block id on the paragraph. The inline nodes are cloned so * the result shares no references with the caller's input. */ export function noteItem(inlineNodes: any[]): any { const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : []; return { type: "listItem", content: [ { type: "paragraph", attrs: { id: freshId() }, content, }, ], }; } /** * Convert a comment's markdown (e.g. `**Lead.** body...`) into inline * ProseMirror nodes. * * A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is * stripped first. Then a minimal bold-split is applied: a leading * `**bold lead**` run becomes a text node with a bold mark, and the remainder * becomes a plain text node. This keeps the conversion synchronous (the * transform sandbox runs synchronously) and dependency-free; the existing * async markdownToProseMirror is intentionally NOT used here. */ export function mdToInlineNodes(markdown: string): any[] { let md = typeof markdown === "string" ? markdown : ""; // Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix. md = md.replace(/^\s*комментарий\s*:\s*/i, ""); md = md.replace(/^\s*\d+\.\s+/, ""); md = md.trim(); if (md === "") return []; const nodes: any[] = []; // Leading bold lead: **...** at the very start. const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md); if (leadMatch) { const leadText = leadMatch[1]; nodes.push({ type: "text", text: leadText, marks: [{ type: "bold" }], }); const rest = md.slice(leadMatch[0].length); if (rest.length > 0) { // Preserve the separating space that followed the bold lead. const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md); const spacing = sep ? sep[1] : ""; nodes.push({ type: "text", text: spacing + rest }); } return nodes; } // No bold lead: emit the whole thing as a single plain text node, with any // remaining **bold** spans split out inline. return splitInlineBold(md); } /** * Split a string with inline `**bold**` spans into text nodes, bolding the * spans. Used as the no-lead fallback in mdToInlineNodes. */ function splitInlineBold(text: string): any[] { const nodes: any[] = []; const re = /\*\*([^*]+)\*\*/g; let last = 0; let m: RegExpExecArray | null; while ((m = re.exec(text)) !== null) { if (m.index > last) { nodes.push({ type: "text", text: text.slice(last, m.index) }); } nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] }); last = m.index + m[0].length; } if (last < text.length) { nodes.push({ type: "text", text: text.slice(last) }); } return nodes.length > 0 ? nodes : [{ type: "text", text }]; } /** Options for commentsToFootnotes. */ export interface CommentsToFootnotesOptions { /** Heading text under which the notes orderedList lives. */ notesHeading?: string; } /** A comment shape as returned by DocmostClient.listComments. */ export interface FootnoteComment { id: string; content: string; selection?: string | null; [k: string]: any; } /** * Turn inline comments into numbered footnotes. * * For each inline comment that carries a `selection`: * 1. insert a placeholder marker (a NUL-delimited "\u0000FN\u0000" * sentinel) right after the selection text in the BODY (before the * notes heading); * 2. build a note list item from the comment's markdown content. * * Then RENUMBER every footnote marker in the body by reading order: existing * `[N]` markers and the new "\u0000FN\u0000" placeholders are both replaced by a * sequential `[seq]`, and the notes orderedList is reordered so each note lines * up with its marker's reading-order position. Finally the disclaimer callout * range is synced to the new note count. * * Returns `{ doc, consumed }` where `consumed` lists the ids of comments that * were successfully anchored (their selection was found and a placeholder * inserted). Operates on a clone of `doc`. */ export function commentsToFootnotes( doc: any, comments: FootnoteComment[], opts: CommentsToFootnotesOptions = {}, ): { doc: any; consumed: string[] } { let working = clone(doc); const notesHeading = opts.notesHeading ?? "Примечания переводчика"; const top: any[] = Array.isArray(working.content) ? working.content : []; const notesIdx = top.findIndex( (n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading, ); if (notesIdx < 0) { throw new Error(`heading "${notesHeading}" not found`); } // The notes orderedList lives at or after the heading. const notesList = top .slice(notesIdx) .find((n) => isObject(n) && n.type === "orderedList"); if (!notesList) { throw new Error("notes orderedList not found"); } const consumed: string[] = []; const noteByPh = new Map(); (Array.isArray(comments) ? comments : []).forEach((c, i) => { if (!c || !c.selection) return; // Collision-proof sentinel delimited by NUL control chars, which never occur // in real Docmost prose — so the renumber regex below cannot mistake any body // text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is // transient: the placeholder round-trips within this function (insertMarkerAfter // inserts it, the renumber pass replaces it with "[N]"), so it never persists // in a returned/pushed document. const ph = `\u0000FN${i}\u0000`; // insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh // the `top` / `notesList` references that point into it. const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, { beforeBlock: notesIdx, }); if (!r.inserted) return; working = r.doc; noteByPh.set(ph, noteItem(mdToInlineNodes(c.content))); consumed.push(c.id); }); // Re-resolve references into the (possibly re-cloned) working doc. const top2: any[] = Array.isArray(working.content) ? working.content : []; const notesList2 = top2 .slice(notesIdx) .find((n) => isObject(n) && n.type === "orderedList"); if (!notesList2) { throw new Error("notes orderedList not found"); } const oldNotes: any[] = Array.isArray(notesList2.content) ? notesList2.content : []; const newNotes: any[] = []; let seq = 0; // Match either an existing "[N]" marker or a NUL-delimited "\u0000FN\u0000" // placeholder, in reading order across the body (blocks before the notes heading). const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g; // Same range regex setCalloutRange uses to detect the disclaimer callout's // "[1]…[K]" range; used here to decide whether a top-level callout is the // disclaimer (skip) or an ordinary callout (renumber normally). const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/; for (let i = 0; i < notesIdx; i++) { // Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote // marker and is synced separately by setCalloutRange. Renumbering it here // would consume note slots and corrupt the sequence. Other top-level // callouts may carry legitimate "[N]" body markers and are renumbered. if ( isObject(top2[i]) && top2[i].type === "callout" && disclaimerRangeRe.test(blockText(top2[i])) ) { continue; } walk(top2[i], (node) => { if (node.type !== "text" || typeof node.text !== "string") return; node.text = node.text.replace(re, (_m: string, oldNum: string, phIdx: string) => { if (oldNum != null) { const note = oldNotes[Number(oldNum) - 1]; // Every existing body marker MUST map to a real note. An out-of-range // marker means the document is internally inconsistent; fail loudly // rather than silently dropping the note and desyncing the callout. if (note === undefined) { throw new Error( `footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`, ); } newNotes.push(note); } else { newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`)); } return `[${++seq}]`; }); }); } // Reorder the notes list IN PLACE on `working` first, THEN sync the callout // range. setCalloutRange clones `working`, so the reordered notes (mutated // before the clone) are carried into its result automatically. No null-filter // here: marker count and note count must stay exactly equal (the out-of-range // guard above guarantees no undefined entry is ever pushed). notesList2.content = newNotes; const synced = setCalloutRange(working, notesList2.content.length); return { doc: synced.doc, consumed }; }