gitmost/packages/mcp/build/lib/json-edit.js

/**
 * Surgical text edits on a ProseMirror document without re-importing it.
 *
 * Each edit replaces an exact substring of a block's inline text, preserving
 * every node id, mark and attribute around it. Matching works at the
 * INLINE-CONTAINER (block) level: a block's text nodes are flattened into a
 * per-character array, so a `find` may freely cross bold/italic/link
 * boundaries (separate text nodes). The replacement inherits marks from the
 * unchanged common prefix/suffix of the match, so editing plain text next to a
 * bold word keeps the bold word bold, and editing the inside of a bold word
 * keeps the inserted text bold. This is the safe alternative to a full markdown
 * re-import for small wording fixes.
 */
/** Placeholder code unit standing in for one opaque (non-text) inline node. */
const ATOM_PLACEHOLDER = ""; // OBJECT REPLACEMENT CHARACTER
/**
 * Find every VALID occurrence of `needle` in a block's flattened slots.
 *
 * A candidate occurrence at slot range [start, start+needle.length) is valid
 * ONLY IF none of the slots in that range are atoms (non-text inline nodes).
 * This makes atom matching collision-safe against the U+FFFC placeholder: an
 * atom slot can never be part of a match, while a real text node containing a
 * literal U+FFFC code unit still matches normally (its slot has no `.atom`).
 *
 * Overlapping candidates that touch an atom are skipped (not counted, not
 * spliced); the scan resumes one code unit past the rejected start so a valid
 * match that begins just after an atom is not missed.
 */
function findValidMatches(chars, plain, needle) {
    if (!needle)
        return [];
    const positions = [];
    let idx = plain.indexOf(needle);
    while (idx !== -1) {
        const end = idx + needle.length;
        let hasAtom = false;
        for (let i = idx; i < end; i++) {
            if (chars[i] && chars[i].atom) {
                hasAtom = true;
                break;
            }
        }
        if (!hasAtom) {
            positions.push(idx);
            // Non-overlapping: skip past this match.
            idx = plain.indexOf(needle, end);
        }
        else {
            // This candidate crosses an atom: reject it and resume one unit later so
            // an overlapping valid match starting after the atom is still found.
            idx = plain.indexOf(needle, idx + 1);
        }
    }
    return positions;
}
/** Order-sensitive deep-equality of two marks arrays. */
function marksEqual(a, b) {
    if (a === b)
        return true;
    if (a.length !== b.length)
        return false;
    for (let i = 0; i < a.length; i++) {
        if (JSON.stringify(a[i]) !== JSON.stringify(b[i]))
            return false;
    }
    return true;
}
/** A block is any node that DIRECTLY contains at least one inline text child. */
function isInlineBlock(node) {
    return (Array.isArray(node?.content) &&
        node.content.some((child) => child && child.type === "text"));
}
/** Flatten a block's inline content into a per-code-unit slot array. */
function flattenBlock(node) {
    const chars = [];
    for (const child of node.content || []) {
        if (child && child.type === "text" && typeof child.text === "string") {
            const marks = child.marks || [];
            // Iterate by UTF-16 code unit so indices align with String.indexOf.
            for (let i = 0; i < child.text.length; i++) {
                chars.push({ ch: child.text[i], marks });
            }
        }
        else {
            // Any non-text inline node becomes one opaque slot.
            chars.push({
                ch: ATOM_PLACEHOLDER,
                marks: (child && child.marks) || [],
                atom: child,
            });
        }
    }
    return chars;
}
/** Re-tokenize a slot array back into ProseMirror inline nodes. */
function tokenizeChars(chars) {
    const out = [];
    let buffer = "";
    let bufferMarks = null;
    const flush = () => {
        if (buffer.length === 0)
            return;
        const textNode = { type: "text", text: buffer };
        if (bufferMarks && bufferMarks.length > 0)
            textNode.marks = bufferMarks;
        out.push(textNode);
        buffer = "";
        bufferMarks = null;
    };
    for (const slot of chars) {
        if (slot.atom) {
            flush();
            out.push(slot.atom);
            continue;
        }
        if (bufferMarks !== null && !marksEqual(bufferMarks, slot.marks)) {
            flush();
        }
        if (bufferMarks === null)
            bufferMarks = slot.marks;
        buffer += slot.ch;
    }
    flush();
    return out;
}
/** Longest common prefix length of two strings. */
function commonPrefixLen(a, b) {
    const max = Math.min(a.length, b.length);
    let i = 0;
    while (i < max && a[i] === b[i])
        i++;
    return i;
}
/** Longest common suffix length of two strings, capped so it can't overlap. */
function commonSuffixLen(a, b, cap) {
    const max = Math.min(a.length, b.length, cap);
    let i = 0;
    while (i < max && a[a.length - 1 - i] === b[b.length - 1 - i])
        i++;
    return i;
}
/**
 * Apply one edit to one block's flattened slot array.
 *
 * The caller passes only VALID (atom-free) match positions (see
 * findValidMatches), so no match range can overlap an atom slot here.
 */
function applyEditToChars(chars, edit, matchPositions) {
    // Pre-compute the diff slices once (find/replace are constant per edit).
    const p = commonPrefixLen(edit.find, edit.replace);
    const s = commonSuffixLen(edit.find, edit.replace, Math.min(edit.find.length, edit.replace.length) - p);
    const insertText = edit.replace.slice(p, edit.replace.length - s);
    // Rebuild the slot array in a single left-to-right pass, splicing at each
    // match start. Offsets into `chars` stay valid because we copy through.
    const newChars = [];
    let cursor = 0;
    let spliced = 0;
    for (const mStart of matchPositions) {
        const mEnd = mStart + edit.find.length;
        const changedStart = mStart + p;
        const changedEnd = mEnd - s;
        // Copy through everything up to the changed region (incl. the prefix).
        for (; cursor < changedStart; cursor++)
            newChars.push(chars[cursor]);
        const removed = chars.slice(changedStart, changedEnd);
        // Choose the marks for the inserted characters.
        let chosenMarks = [];
        if (removed.length > 0 &&
            removed.every((r) => marksEqual(r.marks, removed[0].marks))) {
            // Uniform removed region: inherit its marks directly.
            chosenMarks = removed[0].marks;
        }
        else {
            // Empty or non-uniform removed region: inherit from the nearest TEXT
            // neighbour, skipping atom slots (an atom carries marks that do not
            // belong on inserted text). Scan left first, then right; fall back to [].
            let inherited = null;
            for (let i = changedStart - 1; i >= 0; i--) {
                if (!chars[i].atom) {
                    inherited = chars[i].marks;
                    break;
                }
            }
            if (inherited === null) {
                for (let i = changedEnd; i < chars.length; i++) {
                    if (!chars[i].atom) {
                        inherited = chars[i].marks;
                        break;
                    }
                }
            }
            chosenMarks = inherited === null ? [] : inherited;
        }
        // Emit the inserted text (one slot per code unit).
        for (let i = 0; i < insertText.length; i++) {
            newChars.push({ ch: insertText[i], marks: chosenMarks });
        }
        // Skip the removed region.
        cursor = changedEnd;
        spliced++;
    }
    // Copy through the tail.
    for (; cursor < chars.length; cursor++)
        newChars.push(chars[cursor]);
    return { newChars, spliced };
}
/**
 * Apply text edits to a ProseMirror doc (operates on a deep copy, returns it).
 *
 * Returns { doc, results, failed }:
 *  - results: edits that applied (replacements >= 1).
 *  - failed:  edits that matched zero times, were ambiguous (multi-match
 *    without replaceAll), or whose changed region crosses a non-text inline
 *    node. These do NOT throw — they are recorded so the caller can surface an
 *    actionable message and still keep the edits that did apply.
 *
 * Edits apply IN ORDER to the same working copy, so a later edit can target
 * text produced by an earlier one. The input doc is never mutated. The only
 * thrown error is for invalid input (an empty `edit.find`).
 */
export function applyTextEdits(doc, edits) {
    const copy = JSON.parse(JSON.stringify(doc));
    const results = [];
    const failed = [];
    for (const edit of edits) {
        if (!edit.find)
            throw new Error("edit.find must be a non-empty string");
        // Gather every inline block in document order (recurse the whole tree so
        // nested containers — callouts, list items, table cells, blockquotes — are
        // all covered).
        const blocks = [];
        (function collect(node) {
            if (isInlineBlock(node))
                blocks.push(node);
            for (const child of node.content || [])
                collect(child);
        })(copy);
        // Find every VALID (atom-free) occurrence per block. A candidate whose slot
        // range overlaps a non-text inline atom is never a match (collision-safe vs
        // the U+FFFC placeholder), so it is excluded from both the uniqueness count
        // and the splicing.
        const blockChars = blocks.map((b) => flattenBlock(b));
        const blockPlain = blockChars.map((chars) => chars.map((c) => c.ch).join(""));
        const validPerBlock = blockChars.map((chars, b) => findValidMatches(chars, blockPlain[b], edit.find));
        let total = 0;
        for (const positions of validPerBlock)
            total += positions.length;
        if (total === 0) {
            // Distinguish "the text exists but only across an atom" from a plain
            // not-found: if a raw substring scan (atoms included) WOULD have hit,
            // the only thing blocking the edit is the atom, so report that.
            const existsAcrossAtom = blockPlain.some((plain) => plain.indexOf(edit.find) !== -1);
            failed.push({
                find: edit.find,
                reason: existsAcrossAtom
                    ? "match crosses a non-text inline node (image/break/mention); use update_page_json for structural changes."
                    : "text not found in the document.",
            });
            continue;
        }
        if (total > 1 && !edit.replaceAll) {
            failed.push({
                find: edit.find,
                reason: `matches ${total} times. Provide a longer, unique fragment or set replaceAll: true.`,
            });
            continue;
        }
        // Plan the splices from the valid positions. For a non-replaceAll edit we
        // splice only the first valid match (left-to-right across blocks); for
        // replaceAll we splice every valid match.
        const plannedPerBlock = blockChars.map(() => []);
        let takenFirst = false;
        for (let b = 0; b < validPerBlock.length; b++) {
            for (const idx of validPerBlock[b]) {
                if (edit.replaceAll) {
                    plannedPerBlock[b].push(idx);
                }
                else if (!takenFirst) {
                    plannedPerBlock[b].push(idx);
                    takenFirst = true;
                    break;
                }
                else {
                    break;
                }
            }
            if (!edit.replaceAll && takenFirst)
                break;
        }
        // Apply the splices block-by-block and re-tokenize changed blocks.
        let spliced = 0;
        for (let b = 0; b < blocks.length; b++) {
            if (plannedPerBlock[b].length === 0)
                continue;
            const { newChars, spliced: n } = applyEditToChars(blockChars[b], edit, plannedPerBlock[b]);
            spliced += n;
            blocks[b].content = tokenizeChars(newChars);
        }
        results.push({ find: edit.find, replacements: spliced });
    }
    // Safety net: drop any empty text nodes (ProseMirror forbids them). The
    // re-tokenizer never emits empty text nodes, but untouched blocks could in
    // principle carry one in from upstream.
    (function prune(node) {
        if (Array.isArray(node.content)) {
            node.content = node.content.filter((child) => !(child.type === "text" && child.text === ""));
            for (const child of node.content)
                prune(child);
        }
    })(copy);
    return { doc: copy, results, failed };
}