gitmost/packages/mcp/build/lib/transforms.js

/**
 * Pure, network-free transform primitives for a ProseMirror/TipTap document
 * tree, plus one higher-level orchestration (commentsToFootnotes).
 *
 * A ProseMirror node here is a plain JSON object of the shape produced by
 * Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
 * `content` array; callouts, tables, lists all hold their children in
 * `content`, so a single recursive walk reaches them all.
 *
 * Conventions (matching node-ops.ts):
 *  - functions that produce a new document deep-clone their input and return a
 *    `{ doc, ... }` object; the caller's objects are never mutated.
 *  - functions are defensively null-safe.
 *  - `marks` arrays are preserved verbatim when fragments are split/reordered.
 */
import { blockPlainText } from "./node-ops.js";
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
import { footnoteContentKey, makeFootnoteDefinition, generateFootnoteId, } from "./footnote-authoring.js";
export { canonicalizeFootnotes } from "./footnote-canonicalize.js";
/** Deep-clone a JSON-serializable value without mutating the original. */
function clone(value) {
    if (typeof structuredClone === "function") {
        return structuredClone(value);
    }
    // Fallback for environments without structuredClone.
    return JSON.parse(JSON.stringify(value));
}
/** True if `value` is a non-null object (and not an array). */
function isObject(value) {
    return value != null && typeof value === "object" && !Array.isArray(value);
}
/**
 * Plain text of a node (re-export of node-ops' blockPlainText so transform
 * authors have a single import surface). Recurses through nested content.
 */
export function blockText(node) {
    return blockPlainText(node);
}
/**
 * Depth-first visit of every node in the tree, including the root and the
 * nested content of callouts, tables, lists, etc. `fn` is called once per node.
 * Null-safe: a nullish or non-object node is ignored.
 */
export function walk(node, fn) {
    if (!isObject(node))
        return;
    fn(node);
    if (Array.isArray(node.content)) {
        for (const child of node.content) {
            walk(child, fn);
        }
    }
}
/**
 * Find the FIRST node (depth-first) matching `predicate`, anywhere in the tree.
 * Works even when the node carries no `attrs.id` (it searches the raw tree, not
 * an id index). Returns the live node reference inside `doc` (NOT a clone), or
 * null when nothing matches. Typical use: `getList(doc, n => n.type ===
 * "orderedList")`.
 */
export function getList(doc, predicate) {
    let found = null;
    walk(doc, (node) => {
        if (found == null && predicate(node)) {
            found = node;
        }
    });
    return found;
}
/**
 * Insert `marker` as a PLAIN (unmarked) text run right after the first
 * occurrence of `anchor`.
 *
 * The text run that contains the END of the anchor is SPLIT at the anchor end,
 * so all existing marks (links, bold, ...) on the surrounding text are
 * preserved, while the inserted marker run carries NO marks. The marker is
 * inserted as a leading-space-padded run (`" " + marker`) so it visually
 * separates from the preceding word.
 *
 * The anchor is matched against the concatenated plain text of each top-level
 * block (so an anchor that spans several text/mark runs still matches). The
 * insertion happens inside the inline content array that holds the anchor's
 * final character.
 *
 * Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
 * false when the anchor text was not found in any in-scope block.
 */
export function insertMarkerAfter(doc, anchor, marker, opts = {}) {
    // A plain marker is a leading-space-padded unmarked text run.
    return insertNodesAfterAnchor(doc, anchor, () => [{ type: "text", text: " " + marker }], opts);
}
/**
 * Mark-safe insertion CORE: split the inline text run that holds the END of
 * `anchor` (preserving the surrounding marks) and splice the nodes produced by
 * `makeMiddle()` in at the split point. `insertMarkerAfter` (plain text marker)
 * and `insertInlineFootnote` (a `footnoteReference` node) are both thin callers —
 * the only difference is WHAT is inserted (a space-padded text run vs. a node
 * that should hug the preceding word), which is exactly what `makeMiddle`
 * decides. Operates on a clone; returns `{ doc, inserted }`.
 */
function insertNodesAfterAnchor(doc, anchor, makeMiddle, opts = {}) {
    const out = clone(doc);
    if (!isObject(out) || !Array.isArray(out.content) || !anchor) {
        return { doc: out, inserted: false };
    }
    const limit = typeof opts.beforeBlock === "number"
        ? Math.min(opts.beforeBlock, out.content.length)
        : out.content.length;
    for (let b = 0; b < limit; b++) {
        const block = out.content[b];
        if (!isObject(block))
            continue;
        // Quick reject: skip blocks whose plain text cannot contain the anchor.
        if (!blockPlainText(block).includes(anchor))
            continue;
        // Walk the inline content arrays inside this block, tracking a running
        // character offset so we can locate the inline array + text run that holds
        // the END of the anchor's first occurrence.
        let inserted = false;
        let offset = 0; // characters of plain text seen so far in this block
        const anchorEnd = (() => blockPlainText(block).indexOf(anchor) + anchor.length)();
        // Recurse into inline-bearing containers (paragraph, heading, table cell,
        // callout child paragraphs, ...). We only split inside an array of inline
        // nodes (text/inline atoms); the FIRST array whose cumulative range covers
        // anchorEnd receives the split + marker.
        const visit = (container) => {
            if (inserted || !isObject(container) || !Array.isArray(container.content)) {
                return;
            }
            const inline = container.content;
            // Detect whether this array is an inline array (contains text nodes).
            const hasText = inline.some((n) => isObject(n) && n.type === "text");
            if (hasText) {
                for (let i = 0; i < inline.length; i++) {
                    const n = inline[i];
                    const len = isObject(n) ? blockPlainText(n).length : 0;
                    const runStart = offset;
                    const runEnd = offset + len;
                    // The run that contains the anchor end (anchorEnd lands inside this
                    // run, i.e. runStart < anchorEnd <= runEnd) is the split point.
                    if (!inserted &&
                        isObject(n) &&
                        n.type === "text" &&
                        typeof n.text === "string" &&
                        anchorEnd > runStart &&
                        anchorEnd <= runEnd) {
                        const cut = anchorEnd - runStart; // split index within this text run
                        const before = n.text.slice(0, cut);
                        const after = n.text.slice(cut);
                        const marks = Array.isArray(n.marks) ? n.marks : [];
                        const parts = [];
                        if (before.length > 0) {
                            parts.push({ ...n, text: before, marks: [...marks] });
                        }
                        // The inserted nodes are caller-decided (a space-padded marker run,
                        // or a node that hugs the word). They carry no copied marks.
                        parts.push(...makeMiddle());
                        if (after.length > 0) {
                            parts.push({ ...n, text: after, marks: [...marks] });
                        }
                        inline.splice(i, 1, ...parts);
                        inserted = true;
                        return;
                    }
                    offset = runEnd;
                }
            }
            else {
                // Not an inline array: recurse into children (e.g. callout -> paragraph).
                for (const child of inline) {
                    visit(child);
                    if (inserted)
                        return;
                }
            }
        };
        visit(block);
        if (inserted) {
            return { doc: out, inserted: true };
        }
        // If the block matched in plain text but we could not split (e.g. anchor
        // lands inside an atom), fall through to the next block rather than failing.
    }
    return { doc: out, inserted: false };
}
/**
 * In the disclaimer callout, replace a `[1]…[K]` range marker with `[1]…[n]`.
 *
 * Docmost translations use a callout that states the footnote range, e.g.
 * "[1]…[5]". When the number of notes changes, this rewrites the trailing
 * number of any `[1]…[K]` (or `[1]...[K]`, ASCII ellipsis) occurrence found in a
 * callout's text nodes to `[1]…[n]`. Operates on a clone; returns
 * `{ doc, changed }` where `changed` is the number of text nodes rewritten.
 */
export function setCalloutRange(doc, n) {
    const out = clone(doc);
    let changed = 0;
    // Match "[1]" + (… or ...) + "[<digits>]"; rewrite the last number to n.
    const rangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/g;
    walk(out, (node) => {
        if (node.type === "callout") {
            walk(node, (inner) => {
                if (inner.type === "text" &&
                    typeof inner.text === "string" &&
                    rangeRe.test(inner.text)) {
                    rangeRe.lastIndex = 0;
                    inner.text = inner.text.replace(rangeRe, `$1${n}$2`);
                    changed++;
                }
                rangeRe.lastIndex = 0;
            });
        }
    });
    return { doc: out, changed };
}
/**
 * Generate a short random id for a new block's `attrs.id`. Docmost uses nanoid;
 * a base36 random string is sufficient here (uniqueness within one document).
 */
function freshId() {
    return (Math.random().toString(36).slice(2, 12) +
        Math.random().toString(36).slice(2, 6));
}
/**
 * Wrap inline ProseMirror nodes in a list item:
 *   { type:"listItem", content:[{ type:"paragraph", attrs:{id}, content: inlineNodes }] }
 * with a fresh random block id on the paragraph. The inline nodes are cloned so
 * the result shares no references with the caller's input.
 */
export function noteItem(inlineNodes) {
    const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
    return {
        type: "listItem",
        content: [
            {
                type: "paragraph",
                attrs: { id: freshId() },
                content,
            },
        ],
    };
}
/**
 * Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
 *   { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
 * (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
 */
export function footnoteDefinition(id, inlineNodes) {
    const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
    return {
        type: "footnoteDefinition",
        attrs: { id },
        content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
    };
}
/**
 * Replace every `[N]` body marker and `\u0000FN<i>\u0000` comment placeholder in
 * an inline content array with a real `footnoteReference` node, in reading
 * order. `onMarker` is called for each replaced marker (with the original `[N]`
 * number or the placeholder index) and returns the fresh footnote id to attach
 * to the inserted node. Mutates `inline` in place.
 */
function replaceMarkersWithReferences(inline, onMarker) {
    const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
    for (let i = 0; i < inline.length; i++) {
        const n = inline[i];
        if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
            continue;
        }
        if (!re.test(n.text))
            continue;
        re.lastIndex = 0;
        const marks = Array.isArray(n.marks) ? n.marks : [];
        const parts = [];
        let last = 0;
        let m;
        while ((m = re.exec(n.text)) !== null) {
            if (m.index > last) {
                parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
            }
            const oldNum = m[1] != null ? Number(m[1]) : undefined;
            const phIdx = m[2] != null ? Number(m[2]) : undefined;
            const fnId = onMarker({ oldNum, phIdx });
            parts.push({ type: "footnoteReference", attrs: { id: fnId } });
            last = m.index + m[0].length;
        }
        if (last < n.text.length) {
            parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
        }
        // Drop any zero-length text runs the slicing may have produced.
        const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
        inline.splice(i, 1, ...cleaned);
        i += cleaned.length - 1;
    }
}
/**
 * Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
 * ProseMirror nodes.
 *
 * A leading `комментарий: ` (case-insensitive) or `N. ` numeric prefix is
 * stripped first. Then a minimal bold-split is applied: a leading
 * `**bold lead**` run becomes a text node with a bold mark, and the remainder
 * becomes a plain text node. This keeps the conversion synchronous (the
 * transform sandbox runs synchronously) and dependency-free; the existing
 * async markdownToProseMirror is intentionally NOT used here.
 */
export function mdToInlineNodes(markdown) {
    let md = typeof markdown === "string" ? markdown : "";
    // Strip a leading "комментарий: " prefix (case-insensitive) or a "N. " prefix.
    md = md.replace(/^\s*комментарий\s*:\s*/i, "");
    md = md.replace(/^\s*\d+\.\s+/, "");
    md = md.trim();
    if (md === "")
        return [];
    const nodes = [];
    // Leading bold lead: **...** at the very start.
    const leadMatch = /^\*\*([^*]+)\*\*\s*/.exec(md);
    if (leadMatch) {
        const leadText = leadMatch[1];
        nodes.push({
            type: "text",
            text: leadText,
            marks: [{ type: "bold" }],
        });
        const rest = md.slice(leadMatch[0].length);
        if (rest.length > 0) {
            // Preserve the separating space that followed the bold lead.
            const sep = /^\*\*[^*]+\*\*(\s*)/.exec(md);
            const spacing = sep ? sep[1] : "";
            nodes.push({ type: "text", text: spacing + rest });
        }
        return nodes;
    }
    // No bold lead: emit the whole thing as a single plain text node, with any
    // remaining **bold** spans split out inline.
    return splitInlineBold(md);
}
/**
 * Split a string with inline `**bold**` spans into text nodes, bolding the
 * spans. Used as the no-lead fallback in mdToInlineNodes.
 */
function splitInlineBold(text) {
    const nodes = [];
    const re = /\*\*([^*]+)\*\*/g;
    let last = 0;
    let m;
    while ((m = re.exec(text)) !== null) {
        if (m.index > last) {
            nodes.push({ type: "text", text: text.slice(last, m.index) });
        }
        nodes.push({ type: "text", text: m[1], marks: [{ type: "bold" }] });
        last = m.index + m[0].length;
    }
    if (last < text.length) {
        nodes.push({ type: "text", text: text.slice(last) });
    }
    return nodes.length > 0 ? nodes : [{ type: "text", text }];
}
/**
 * Turn inline comments into numbered footnotes.
 *
 * For each inline comment that carries a `selection`:
 *   1. insert a placeholder marker (a NUL-delimited "\u0000FN<i>\u0000"
 *      sentinel) right after the selection text in the BODY (before the
 *      notes heading);
 *   2. build a note list item from the comment's markdown content.
 *
 * Then RENUMBER every footnote marker in the body by reading order: existing
 * `[N]` markers and the new "\u0000FN<i>\u0000" placeholders are both replaced by a
 * sequential `[seq]`, and the notes orderedList is reordered so each note lines
 * up with its marker's reading-order position. Finally the disclaimer callout
 * range is synced to the new note count.
 *
 * Returns `{ doc, consumed }` where `consumed` lists the ids of comments that
 * were successfully anchored (their selection was found and a placeholder
 * inserted). Operates on a clone of `doc`.
 */
export function commentsToFootnotes(doc, comments, opts = {}) {
    let working = clone(doc);
    const notesHeading = opts.notesHeading ?? "Примечания переводчика";
    const top = Array.isArray(working.content) ? working.content : [];
    const notesIdx = top.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
    if (notesIdx < 0) {
        throw new Error(`heading "${notesHeading}" not found`);
    }
    // The notes orderedList lives at or after the heading.
    const notesList = top
        .slice(notesIdx)
        .find((n) => isObject(n) && n.type === "orderedList");
    if (!notesList) {
        throw new Error("notes orderedList not found");
    }
    const consumed = [];
    const noteInlineByPh = new Map();
    (Array.isArray(comments) ? comments : []).forEach((c, i) => {
        if (!c || !c.selection)
            return;
        // Collision-proof sentinel delimited by NUL control chars, which never occur
        // in real Docmost prose - so the marker regex cannot mistake any body text
        // (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
        // transient: the placeholder is inserted here and replaced by a
        // footnoteReference node below; it never persists in a returned document.
        const ph = `\u0000FN${i}\u0000`;
        // insertMarkerAfter returns a NEW cloned doc; reassign `working`.
        const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
            beforeBlock: notesIdx,
        });
        if (!r.inserted)
            return;
        working = r.doc;
        noteInlineByPh.set(ph, mdToInlineNodes(c.content));
        consumed.push(c.id);
    });
    // Re-resolve references into the (possibly re-cloned) working doc.
    const top2 = Array.isArray(working.content) ? working.content : [];
    const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
    const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
    const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
    if (!notesList2) {
        throw new Error("notes orderedList not found");
    }
    // Inline content of each existing note (listItem -> paragraph -> inline).
    const oldNoteInline = (Array.isArray(notesList2.content)
        ? notesList2.content
        : []).map((item) => {
        const para = isObject(item) && Array.isArray(item.content)
            ? item.content.find((c) => isObject(c) && c.type === "paragraph")
            : null;
        return para && Array.isArray(para.content) ? para.content : [];
    });
    // Walk the body in reading order, turning each "[N]" / placeholder marker into
    // a real footnoteReference node and collecting its definition inline content.
    const definitions = [];
    const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
    // Recursively visit inline arrays inside a block (paragraph, heading, callout
    // child paragraphs, table cells, ...), preserving document reading order.
    const visitInlineArrays = (container) => {
        if (!isObject(container) || !Array.isArray(container.content))
            return;
        const hasText = container.content.some((n) => isObject(n) && n.type === "text");
        if (hasText) {
            replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
                const fnId = freshId();
                if (oldNum != null) {
                    const inline = oldNoteInline[oldNum - 1];
                    // Every existing body marker MUST map to a real note. An out-of-range
                    // marker means the document is internally inconsistent; fail loudly.
                    if (inline === undefined) {
                        throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
                    }
                    definitions.push(footnoteDefinition(fnId, inline));
                }
                else {
                    const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
                    definitions.push(footnoteDefinition(fnId, inline));
                }
                return fnId;
            });
        }
        else {
            for (const child of container.content)
                visitInlineArrays(child);
        }
    };
    const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
    for (let i = 0; i < notesBoundary; i++) {
        // Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
        // marker and is synced separately by setCalloutRange.
        if (isObject(top2[i]) &&
            top2[i].type === "callout" &&
            disclaimerRangeRe.test(blockText(top2[i]))) {
            continue;
        }
        visitInlineArrays(top2[i]);
    }
    // Replace the old orderedList with a real footnotesList of the collected
    // definitions (reading order). If there are no definitions, drop the list.
    if (definitions.length > 0) {
        top2[oldListIndex] = {
            type: "footnotesList",
            content: definitions,
        };
    }
    else {
        top2.splice(oldListIndex, 1);
    }
    // Sync the disclaimer callout range to the new note count.
    const synced = setCalloutRange(working, definitions.length);
    return { doc: synced.doc, consumed };
}
/**
 * AUTHOR-INLINE footnote insertion. The caller supplies WHERE (anchorText) and
 * WHAT (markdown text); numbering and the bottom list are derived server-side by
 * `canonicalizeFootnotes`. The caller never sees or edits `footnotesList`, never
 * assigns a number, and cannot desync — orphans / out-of-order lists / raw
 * `[^id]` markdown are structurally impossible.
 *
 * Content DEDUP (#3 in the issue): if an existing definition has the SAME
 * normalized content key, its id is REUSED (the new reference points at it: one
 * number, one definition, several references). Otherwise a fresh uuid id is
 * minted and a new definition added. Conservative — only an exact content match
 * merges.
 *
 * Mechanics: the `footnoteReference` node is inserted DIRECTLY at the anchor via
 * the same mark-safe split as `insertMarkerAfter` (the shared
 * `insertNodesAfterAnchor` core), so it hugs the preceding word with no text
 * sentinel round-trip. The whole document is then canonicalized.
 *
 * Operates on a clone of `doc`. When the anchor is not found, returns the input
 * unchanged with `inserted:false`.
 */
export function insertInlineFootnote(doc, opts) {
    const inline = mdToInlineNodes(opts.text ?? "");
    const key = footnoteContentKey(makeFootnoteDefinition("", inline));
    // Content dedup: reuse an existing definition's id when its key matches.
    let footnoteId = null;
    let reused = false;
    if (key !== "") {
        walk(doc, (n) => {
            if (footnoteId == null &&
                isObject(n) &&
                n.type === "footnoteDefinition" &&
                n.attrs &&
                typeof n.attrs.id === "string" &&
                n.attrs.id !== "" &&
                footnoteContentKey(n) === key) {
                footnoteId = n.attrs.id;
                reused = true;
            }
        });
    }
    if (footnoteId == null)
        footnoteId = generateFootnoteId();
    // Insert the footnoteReference node directly after the anchor (mark-safe
    // split); it hugs the preceding word with no leading space.
    const r = insertNodesAfterAnchor(doc, (opts.anchorText ?? "").trimEnd(), () => [{ type: "footnoteReference", attrs: { id: footnoteId } }]);
    if (!r.inserted) {
        return { doc: clone(doc), inserted: false, footnoteId, reused };
    }
    let working = r.doc;
    // Add a NEW definition (canonicalize will order/place it); a reused id needs
    // no new definition (the existing one is shared).
    if (!reused) {
        appendDefinition(working, makeFootnoteDefinition(footnoteId, inline));
    }
    // Derive numbering + the single bottom list deterministically.
    working = canonicalizeFootnotes(working);
    return { doc: working, inserted: true, footnoteId, reused };
}
/**
 * Append a definition node so the canonicalizer can order/place it: into the
 * first existing footnotesList, or a new trailing list when none exists.
 */
function appendDefinition(doc, defNode) {
    const existingList = getList(doc, (n) => isObject(n) && n.type === "footnotesList");
    if (existingList && Array.isArray(existingList.content)) {
        existingList.content.push(defNode);
        return;
    }
    if (Array.isArray(doc.content)) {
        doc.content.push({ type: "footnotesList", content: [defNode] });
    }
}