Must-fix: - insertInlineFootnote could glue a footnoteReference inside an EXISTING definition (nested footnotesList, or a bare footnoteDefinition with no list wrapper), which canonicalize then dropped as an orphan — silently losing the definition's prose. Now: (a) the body/notes boundary is computed from the first top-level block that IS or CONTAINS (recursively) a footnotesList/ footnoteDefinition, not just a top-level list; and (b) the insertNodesAfterAnchor core skips footnotesList/footnoteDefinition subtrees entirely (skipSubtreeTypes), so an anchor whose only match is inside a definition -> inserted:false (clean abort, no write). Added tests: nested-definition, bare-definition, and body-before-nested-list-still-inserts. - editor-ext footnote-canonicalize header listed `markdownToProseMirror` among the canonicalizing MCP paths; it is the NON-canonicalizing primitive. Replaced with `markdownToProseMirrorCanonical` (+ note that the plain primitive is for comment bodies) and added copy_page_content. - Client paste: canonicalizePastedFootnotes now skips a definitions-ONLY paste (no footnoteReference anywhere) — canonicalizing it would strip the reference-less list and yield an EMPTY paste. Added a test. Suggestions: - docmost_transform now runs validateDocStructure/validateDocUrls on the RAW transform output BEFORE canonicalizeFootnotes (mirrors updatePageJson), so a too-deep doc gives the intended max-depth error instead of a stack overflow. - docmost_transform tool description now states the RESULT is footnote-canonical (dryRun diff may show tidy-ups; idempotent after first run). - insertFootnote: dropped the dead `result ? … : undefined` ternaries and the `as any` casts (result is always set by the time we return; the not-found path throws and aborts mutatePage). `const r = result!;`. Tests / architecture: - Added a LIVE-plugin golden case: the real footnoteSyncPlugin leaves a list with non-empty content after it in place, and canonicalize agrees (placement parity is now a driven property, not a hand-set expected). - Added generateFootnoteId uuidv7 shape + uniqueness test. - Item 9: added the ENFORCEMENT-RULE comments at the server parseProsemirrorContent and the MCP canonicalizer header (any NEW full-doc persist path MUST canonicalize; fragments/append/prepend and comment bodies MUST NOT). Kept per-call-site over a brittle grep CI test (the replace-vs-fragment + comment-vs-page nuance makes a single wrapper unsafe). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
199 lines
8.3 KiB
JavaScript
199 lines
8.3 KiB
JavaScript
/**
|
|
* Server-side footnote canonicalizer (MCP mirror — PURE).
|
|
*
|
|
* `canonicalizeFootnotes(doc)` is a pure ProseMirror-JSON port of the editor's
|
|
* `footnoteSyncPlugin` end-state, identical in behaviour to
|
|
* `@docmost/editor-ext`'s `canonicalizeFootnotes`. It is mirrored here — rather
|
|
* than imported from editor-ext — for the SAME reason `footnote-lex.ts` and the
|
|
* `docmost-schema.ts` nodes are mirrored: the MCP package is deliberately
|
|
* decoupled from the browser/React-heavy editor barrel and operates on plain
|
|
* JSON. The editor-ext copy owns the golden test against the live plugin; this
|
|
* copy must stay behaviourally identical (a SHARED golden corpus, exercised by
|
|
* both test suites, pins that — see `test/unit/footnote-corpus.mjs`).
|
|
*
|
|
* This module is the pure MIRROR only. The inline-authoring helpers
|
|
* (`footnoteContentKey`, `makeFootnoteDefinition`, `generateFootnoteId`) used by
|
|
* `insertInlineFootnote` live in the sibling `footnote-authoring.ts`, so this
|
|
* file is compositionally symmetric to the editor-ext copy.
|
|
*
|
|
* Why it exists: every NON-editor write path (markdown import, update_page_json,
|
|
* docmost_transform, insert_footnote) builds ProseMirror JSON directly, so the
|
|
* editor's footnote plugins never run and the canonical topology (sequential
|
|
* numbering by first reference, one trailing list, no orphans, no raw `[^id]`)
|
|
* was never enforced. Running this at the end of every write path closes that
|
|
* gap; because it is idempotent, it is a no-op when the footnotes are already
|
|
* canonical (no spurious mutations / git-sync churn).
|
|
*
|
|
* ENFORCEMENT RULE (#228): any NEW FULL-document persist path MUST call
|
|
* `canonicalizeFootnotes(doc)` before writing — the current callers are
|
|
* `markdownToProseMirrorCanonical` (page markdown import/update; the plain
|
|
* `markdownToProseMirror` used for COMMENT bodies must NOT, or it would drop a
|
|
* reference-less definition), `update_page_json`, `docmost_transform`,
|
|
* `insert_footnote`, and `copy_page_content`. Append/prepend FRAGMENT writes MUST
|
|
* NOT canonicalize. This is deliberately per-call-site (the replace-vs-fragment
|
|
* and comment-vs-page nuances make a single naive wrapper unsafe).
|
|
*/
|
|
const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
|
|
const FOOTNOTES_LIST_NAME = "footnotesList";
|
|
const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
|
|
function cloneJson(v) {
|
|
if (typeof structuredClone === "function")
|
|
return structuredClone(v);
|
|
return JSON.parse(JSON.stringify(v));
|
|
}
|
|
function isEmptyParagraph(node) {
|
|
return (!!node &&
|
|
node.type === "paragraph" &&
|
|
(!Array.isArray(node.content) || node.content.length === 0));
|
|
}
|
|
function collectReferenceIds(node, out, seen) {
|
|
if (!node || typeof node !== "object")
|
|
return;
|
|
if (node.type === FOOTNOTE_REFERENCE_NAME) {
|
|
const id = node?.attrs?.id;
|
|
if (id && !seen.has(id)) {
|
|
seen.add(id);
|
|
out.push(id);
|
|
}
|
|
}
|
|
if (Array.isArray(node.content)) {
|
|
for (const child of node.content)
|
|
collectReferenceIds(child, out, seen);
|
|
}
|
|
}
|
|
function collectDefinitions(node, out) {
|
|
if (!node || typeof node !== "object")
|
|
return;
|
|
if (node.type === FOOTNOTE_DEFINITION_NAME)
|
|
out.push(node);
|
|
if (Array.isArray(node.content)) {
|
|
for (const child of node.content)
|
|
collectDefinitions(child, out);
|
|
}
|
|
}
|
|
function emptyDefinition(id) {
|
|
return {
|
|
type: FOOTNOTE_DEFINITION_NAME,
|
|
attrs: { id },
|
|
content: [{ type: "paragraph" }],
|
|
};
|
|
}
|
|
/**
|
|
* Deep equality over plain JSON: arrays are compared POSITIONALLY
|
|
* (order-SENSITIVE), object keys order-insensitively. The array order-sensitivity
|
|
* is required for correctness here — a reordered `footnotesList.content` must
|
|
* compare UNEQUAL so the canonical rebuild fires instead of leaving it in place.
|
|
*/
|
|
function deepEqualJson(a, b) {
|
|
if (a === b)
|
|
return true;
|
|
if (a == null || b == null || typeof a !== typeof b)
|
|
return false;
|
|
if (Array.isArray(a) || Array.isArray(b)) {
|
|
if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length) {
|
|
return false;
|
|
}
|
|
for (let i = 0; i < a.length; i++) {
|
|
if (!deepEqualJson(a[i], b[i]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
if (typeof a === "object") {
|
|
const ka = Object.keys(a);
|
|
const kb = Object.keys(b);
|
|
if (ka.length !== kb.length)
|
|
return false;
|
|
for (const k of ka) {
|
|
if (!Object.prototype.hasOwnProperty.call(b, k))
|
|
return false;
|
|
if (!deepEqualJson(a[k], b[k]))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
/**
|
|
* Canonicalize footnotes in a ProseMirror-JSON document. See the file header and
|
|
* the editor-ext twin for the full contract. Pure (deep-clones input,
|
|
* deterministic, idempotent).
|
|
*/
|
|
export function canonicalizeFootnotes(doc) {
|
|
if (doc == null ||
|
|
typeof doc !== "object" ||
|
|
!Array.isArray(doc.content)) {
|
|
return doc;
|
|
}
|
|
const out = cloneJson(doc);
|
|
// 1) Distinct reference ids in document order (deep — refs can live in
|
|
// callouts, tables, list items, ...). The ordering/numbering truth.
|
|
const referenceIds = [];
|
|
collectReferenceIds(out, referenceIds, new Set());
|
|
// 2) Every definition node in document order (deep).
|
|
const defNodes = [];
|
|
collectDefinitions(out, defNodes);
|
|
// 3) First definition per id wins; later duplicates carry the SAME id, so they
|
|
// cannot be referenced separately and would be orphans — they are dropped.
|
|
const defById = new Map();
|
|
for (const d of defNodes) {
|
|
const id = d?.attrs?.id;
|
|
if (id && !defById.has(id))
|
|
defById.set(id, d);
|
|
}
|
|
// 4) Build the ordered definition list: one per referenced id, in REFERENCE
|
|
// order, reusing the existing node (shallow-copied, id normalized — `out` is
|
|
// already deep-cloned and the old lists are cut) or synthesizing an empty
|
|
// one. Definitions whose id is not referenced are orphans and never added.
|
|
const orderedDefs = [];
|
|
for (const id of referenceIds) {
|
|
const existing = defById.get(id);
|
|
if (existing) {
|
|
orderedDefs.push({
|
|
...existing,
|
|
attrs: { ...(existing.attrs ?? {}), id },
|
|
});
|
|
}
|
|
else {
|
|
orderedDefs.push(emptyDefinition(id));
|
|
}
|
|
}
|
|
// 5) No references -> there must be NO list at all (at any depth).
|
|
if (referenceIds.length === 0) {
|
|
stripFootnotesListsDeep(out);
|
|
return out;
|
|
}
|
|
// 6) Placement parity with the live plugin: when the document is ALREADY in the
|
|
// canonical single-list state, leave that list exactly where it sits rather
|
|
// than cutting and re-inserting it at the end (the plugin never repositions a
|
|
// sole correct list, so moving it would silently reorder any content that
|
|
// follows the list on the first write).
|
|
const topLevelLists = out.content.filter((n) => n && n.type === FOOTNOTES_LIST_NAME);
|
|
if (topLevelLists.length === 1 &&
|
|
defNodes.length === orderedDefs.length &&
|
|
deepEqualJson(topLevelLists[0].content, orderedDefs)) {
|
|
return out;
|
|
}
|
|
// 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions
|
|
// gathers defs recursively, so a list nested in a callout/blockquote would
|
|
// otherwise have its defs copied into the new list while the original
|
|
// survives — duplicates) and re-insert exactly one after the last meaningful
|
|
// (non-empty paragraph) top-level block.
|
|
stripFootnotesListsDeep(out);
|
|
const top = out.content;
|
|
let insertAt = top.length;
|
|
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1]))
|
|
insertAt--;
|
|
top.splice(insertAt, 0, { type: FOOTNOTES_LIST_NAME, content: orderedDefs });
|
|
out.content = top;
|
|
return out;
|
|
}
|
|
/** Remove every `footnotesList` node at ANY depth (mutates the given clone). */
|
|
function stripFootnotesListsDeep(node) {
|
|
if (!node || typeof node !== "object" || !Array.isArray(node.content))
|
|
return;
|
|
node.content = node.content.filter((c) => !(c && c.type === FOOTNOTES_LIST_NAME));
|
|
for (const child of node.content)
|
|
stripFootnotesListsDeep(child);
|
|
}
|