/** * Inline-comment anchoring against a ProseMirror document. * * Docmost stores an inline comment's highlight as a `comment` MARK on the * document text (`{ type: "comment", attrs: { commentId, resolved } }`); the * `/comments/create` API only records the comment row + its `selection` text and * does NOT insert that mark, so the anchor has to be written into the page * content separately. This module finds where a selection lives in the document * and splices the comment mark across the matched range. * * Matching has to be robust because the agent supplies the selection as plain * text while the document stores rich inline content: a selection can span * several adjacent text nodes (inline code / bold / links each become their own * text node), and the document may use smart/typographic quotes, dash variants, * non-breaking spaces, or collapsed runs of whitespace that the agent typed as * ASCII quotes/hyphens/single spaces. We therefore normalize both sides before * comparing and match across maximal runs of consecutive text nodes within a * single block, while mapping every normalized character back to its raw index * so the mark lands on the exact original characters. */ /** Typographic double-quote variants mapped to ASCII `"`. */ const DOUBLE_QUOTES = "«»„“”‟〝〞""; /** Typographic single-quote/apostrophe variants mapped to ASCII `'`. */ const SINGLE_QUOTES = "‘’‚‛"; /** Dash variants mapped to ASCII `-`. */ const DASHES = "–—―−‐‑‒"; /** Guard against pathological/cyclic documents in the depth-first walk. */ const MAX_DEPTH = 200; /** The comment mark Docmost stores on anchored text. */ function makeCommentMark(commentId) { // The comment mark schema declares both commentId and resolved; include // resolved:false for completeness so the stored mark matches the editor's. return { type: "comment", attrs: { commentId, resolved: false } }; } /** True for any character we collapse/replace with a single normal space. */ function isWhitespaceChar(ch) { // Regular ASCII whitespace plus the special spaces called out in the spec: // nbsp, narrow nbsp, en/em/thin/hair/figure spaces, etc. \s covers tab and // newline; the explicit code points cover the non-breaking variants \s misses // in some engines, so list them for determinism. return (/\s/.test(ch) || ch === " " || // no-break space ch === " " || // figure space ch === " " || // narrow no-break space ch === " " || // thin space ch === " " || // hair space ch === " " || // en space ch === " " // em space ); } /** * Normalize a string for matching and return both the normalized text and a * `map` where `map[i]` is the index into the ORIGINAL `s` of the i-th * normalized character. * * Rules: map smart quotes / dashes / special spaces to their ASCII forms, * collapse any run of whitespace to a SINGLE space (whose map entry points at * the FIRST raw whitespace char of the run), and DO NOT lowercase (anchoring is * case-sensitive to match the exact document text). */ export function normalizeForMatch(s) { let norm = ""; const map = []; let i = 0; while (i < s.length) { const ch = s[i]; if (isWhitespaceChar(ch)) { // Collapse the whole whitespace run to one space mapped to the run start. const runStart = i; while (i < s.length && isWhitespaceChar(s[i])) i++; norm += " "; map.push(runStart); continue; } let mapped = ch; if (DOUBLE_QUOTES.indexOf(ch) !== -1) mapped = '"'; else if (SINGLE_QUOTES.indexOf(ch) !== -1) mapped = "'"; else if (DASHES.indexOf(ch) !== -1) mapped = "-"; norm += mapped; map.push(i); i++; } return { norm, map }; } /** * Find a selection inside a SINGLE block's direct `content` array. * * Builds maximal runs of consecutive `text` nodes (any non-text inline node, * e.g. a mention, breaks the run), normalizes each run and the selection the * same way, then searches each run for the normalized selection. Returns the * child/offset range of the FIRST matching run, or `null` if none match. */ export function findAnchorInBlock(blockContent, selection) { if (!Array.isArray(blockContent)) return null; const normSelObj = normalizeForMatch(selection); // Trim leading/trailing spaces on the NORMALIZED selection only. const normSel = normSelObj.norm.trim(); if (normSel.length === 0) return null; let i = 0; while (i < blockContent.length) { const node = blockContent[i]; if (!node || typeof node !== "object" || node.type !== "text") { i++; continue; } // Accumulate a maximal run of consecutive text nodes. let rawRun = ""; const rawToChild = []; let j = i; while (j < blockContent.length) { const n = blockContent[j]; if (!n || typeof n !== "object" || n.type !== "text") break; const text = typeof n.text === "string" ? n.text : ""; for (let k = 0; k < text.length; k++) { rawToChild.push({ childIdx: j, offset: k }); } rawRun += text; j++; } // Try to match within this run. const { norm, map } = normalizeForMatch(rawRun); const idx = norm.indexOf(normSel); if (idx !== -1) { const rawStart = map[idx]; const rawEndExclusive = idx + normSel.length < map.length ? map[idx + normSel.length] : rawRun.length; const startLoc = rawToChild[rawStart]; // rawEndExclusive points at the raw char AFTER the match; the last matched // raw char is at rawEndExclusive-1, so endOffset is its offset + 1. const lastLoc = rawToChild[rawEndExclusive - 1]; return { startChild: startLoc.childIdx, startOffset: startLoc.offset, endChild: lastLoc.childIdx, endOffset: lastLoc.offset + 1, }; } // No match in this run: continue scanning AFTER it. i = j > i ? j : i + 1; } return null; } /** * Depth-first, document-order check for whether `selection` can be anchored * anywhere in `doc`. At each node with an array `content`, first try to match * within that node's own content, then recurse into children that themselves * have a `content` array. */ export function canAnchorInDoc(doc, selection) { const visit = (node, depth) => { if (depth > MAX_DEPTH || !node || typeof node !== "object") return false; if (!Array.isArray(node.content)) return false; if (findAnchorInBlock(node.content, selection)) return true; for (const child of node.content) { if (child && typeof child === "object" && Array.isArray(child.content)) { if (visit(child, depth + 1)) return true; } } return false; }; return visit(doc, 0); } /** * Split the matched text nodes and splice the comment mark across the range. * `blockContent` is mutated IN PLACE. `match.startChild..endChild` are all text * nodes (guaranteed by findAnchorInBlock building runs of text nodes). */ function spliceCommentMark(blockContent, match, commentId) { const { startChild, startOffset, endChild, endOffset } = match; const commentMark = makeCommentMark(commentId); const fragments = []; for (let k = startChild; k <= endChild; k++) { const n = blockContent[k]; const text = typeof n.text === "string" ? n.text : ""; const sliceStart = k === startChild ? startOffset : 0; const sliceEnd = k === endChild ? endOffset : text.length; const before = k === startChild ? text.slice(0, startOffset) : ""; const marked = text.slice(sliceStart, sliceEnd); const after = k === endChild ? text.slice(endOffset) : ""; // Process per-node so each node's OWN marks/attrs are preserved. const ownMarks = Array.isArray(n.marks) ? n.marks : []; // Drop any pre-existing comment mark from the marked fragment so it ends up // with exactly one comment mark (the new one) rather than two. const markedBaseMarks = ownMarks.filter((m) => !(m && m.type === "comment")); if (before.length > 0) { fragments.push({ ...n, text: before, marks: [...ownMarks] }); } if (marked.length > 0) { fragments.push({ ...n, text: marked, marks: [...markedBaseMarks, commentMark], }); } if (after.length > 0) { fragments.push({ ...n, text: after, marks: [...ownMarks] }); } } blockContent.splice(startChild, endChild - startChild + 1, ...fragments); } /** * Depth-first (same order as canAnchorInDoc) over `doc`; on the FIRST block * whose content matches `selection`, splice the comment mark across the matched * range in place and return true. Returns false (and does NOT mutate) when no * block matches. */ export function applyAnchorInDoc(doc, selection, commentId) { const visit = (node, depth) => { if (depth > MAX_DEPTH || !node || typeof node !== "object") return false; if (!Array.isArray(node.content)) return false; const match = findAnchorInBlock(node.content, selection); if (match) { spliceCommentMark(node.content, match, commentId); return true; } for (const child of node.content) { if (child && typeof child === "object" && Array.isArray(child.content)) { if (visit(child, depth + 1)) return true; } } return false; }; return visit(doc, 0); }