fix(footnotes): strip bare definitions on rebuild; MCP full-doc + zip-import canonicalize tests (#228)

Review #6 (approve-with-comments) follow-ups:
1. canonicalize step 7 now strips bare footnoteDefinitions at ANY depth
   (stripFootnoteDefinitionsDeep), not just footnotesList, in BOTH copies. A
   definition hand-authored outside a list (e.g. nested in a callout via a
   raw-JSON write path) was left in place while a copy was also added to the
   rebuilt list -> duplicate, idempotent, self-perpetuating. Runs only in the
   rebuild path (after the lists are stripped); the fast-path / placement-keep
   branch is untouched. Added a shared-corpus case (bare def nested in a callout)
   to pin it in both mirrors.
2. markdown-clipboard: removed the dead top-level footnoteReference check in
   canonicalizePastedFootnotes (an inline atom is never a top-level slice child;
   only the descendants scan can find it).

Test coverage:
4. New MCP binding tests (full-doc-write-canonicalize.test.mjs): update_page_json
   and copy_page_content canonicalize the persisted full doc, asserted via a new
   `replacePage` seam (symmetric to the existing `mutatePage` seam) so no live
   collab socket is needed. Routed both writers through the seam.
5. New server spec (file-import-task.service.footnote-canonicalize.spec.ts): the
   zip-import path (processGenericImport) canonicalizes footnotes — real
   markdown->HTML->JSON via a real ImportService over a temp-dir .md file, DB trx
   stubbed to capture the persisted page content. FileImportTaskService had no
   spec before.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
a
2026-06-28 01:39:25 +03:00
parent 9c1f952b2f
commit c4ed4a4855
10 changed files with 515 additions and 25 deletions

View File

@@ -147,14 +147,17 @@ export function canonicalizeFootnotes<T = any>(doc: T): T {
return out;
}
// 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions
// gathers defs recursively, so a list nested in a callout/blockquote would
// otherwise have its defs copied into the new list while the original
// survives — duplicates) and re-insert exactly one after the last meaningful
// (non-empty paragraph) top-level block, so it coexists with a trailing-node
// empty paragraph. This both repairs a non-canonical doc and (in the import
// case) physically reorders the list into reference order.
// 7) Otherwise rebuild: strip every footnotesList AND every bare
// footnoteDefinition at ANY depth (collectDefinitions gathers defs
// recursively, so a list nested in a callout/blockquote — or a bare
// definition outside any list — would otherwise have its defs copied into the
// rebuilt list while the original survives in place → duplicates) and
// re-insert exactly one list after the last meaningful (non-empty paragraph)
// top-level block, so it coexists with a trailing-node empty paragraph. This
// both repairs a non-canonical doc and (in the import case) physically
// reorders the list into reference order.
stripFootnotesListsDeep(out);
stripFootnoteDefinitionsDeep(out);
const top: any[] = out.content;
let insertAt = top.length;
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) insertAt--;
@@ -172,6 +175,21 @@ function stripFootnotesListsDeep(node: any): void {
for (const child of node.content) stripFootnotesListsDeep(child);
}
/**
* Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given
* clone). Runs only in the rebuild path AFTER the lists are stripped, so it
* targets definitions that were sitting outside a list (e.g. hand-authored via a
* raw-JSON write path and nested in a callout); their content was already copied
* into the rebuilt list, so leaving the originals would duplicate them.
*/
function stripFootnoteDefinitionsDeep(node: any): void {
if (!node || typeof node !== 'object' || !Array.isArray(node.content)) return;
node.content = node.content.filter(
(c: any) => !(c && c.type === FOOTNOTE_DEFINITION_NAME),
);
for (const child of node.content) stripFootnoteDefinitionsDeep(child);
}
/**
* Deep equality over plain JSON: arrays are compared POSITIONALLY
* (order-SENSITIVE), object keys order-insensitively. The array order-sensitivity

View File

@@ -7,12 +7,13 @@
* Both the editor-ext copy and the MCP mirror of `canonicalizeFootnotes` are run
* against this corpus by their respective test suites, which turns "the two
* pure copies behave identically" into a checkable property without coupling the
* packages at build time. When you change one corpus, change the other.
* packages. When you change one corpus, change the other.
*
* Coverage includes (besides ordering/orphan/reuse/dedup/synth/merge): a single
* canonical list with NON-EMPTY content after it (must NOT be repositioned —
* plugin placement parity, must-fix #2) and a reference nested inside a callout
* (the recursive collection, test-coverage #14).
* plugin placement parity, must-fix #2), a reference nested inside a callout
* (the recursive collection, test-coverage #14), and a BARE footnoteDefinition
* nested in a callout (rebuild must strip the original so it is not duplicated).
*/
export interface FootnoteCorpusCase {
name: string;
@@ -1145,6 +1146,97 @@ export const FOOTNOTE_CORPUS: FootnoteCorpusCase[] = [
]
}
},
{
"name": "bare footnoteDefinition nested in a callout is collected, NOT duplicated",
"input": {
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "see "
},
{
"type": "footnoteReference",
"attrs": {
"id": "a"
}
}
]
},
{
"type": "callout",
"content": [
{
"type": "footnoteDefinition",
"attrs": {
"id": "a"
},
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "note A"
}
]
}
]
}
]
}
]
},
"expected": {
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "see "
},
{
"type": "footnoteReference",
"attrs": {
"id": "a"
}
}
]
},
{
"type": "callout",
"content": []
},
{
"type": "footnotesList",
"content": [
{
"type": "footnoteDefinition",
"attrs": {
"id": "a"
},
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "note A"
}
]
}
]
}
]
}
]
}
},
{
"name": "no footnotes at all is unchanged",
"input": {

View File

@@ -1071,7 +1071,7 @@ export class DocmostClient {
// Write the BODY first, then the title (#159 split-brain): a failed body
// write (e.g. persist timeout) must not leave a new title over the old body.
const collabToken = await this.getCollabTokenWithReauth();
const mutation = await replacePageContent(pageId, doc, collabToken, this.apiUrl);
const mutation = await this.replacePage(pageId, doc, collabToken, this.apiUrl);
// Body persisted successfully — now it is safe to set the title.
if (title) {
await this.client.post("/pages/update", { pageId, title });
@@ -1142,6 +1142,15 @@ export class DocmostClient {
mutatePage(pageId, collabToken, apiUrl, transform) {
return mutatePageContent(pageId, collabToken, apiUrl, transform);
}
/**
* Full-document write seam over collaboration.replacePageContent. Production
* just delegates; it exists as an overridable method so the full-doc write
* tools (update_page_json, copy_page_content) can have their footnote-
* canonicalization binding unit-tested without a live Hocuspocus collab socket.
*/
replacePage(pageId, doc, collabToken, apiUrl) {
return replacePageContent(pageId, doc, collabToken, apiUrl);
}
/**
* Export a page to a single self-contained Docmost-flavoured markdown file:
* meta block + body (with inline comment anchors + diagrams) + comment
@@ -1270,7 +1279,7 @@ export class DocmostClient {
// to the target (parity with the other full-doc write paths).
const canonical = canonicalizeFootnotes(content);
const collabToken = await this.getCollabTokenWithReauth();
const mutation = await replacePageContent(targetPageId, canonical, collabToken, this.apiUrl);
const mutation = await this.replacePage(targetPageId, canonical, collabToken, this.apiUrl);
return {
success: true,
sourcePageId,

View File

@@ -174,12 +174,15 @@ export function canonicalizeFootnotes(doc) {
deepEqualJson(topLevelLists[0].content, orderedDefs)) {
return out;
}
// 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions
// gathers defs recursively, so a list nested in a callout/blockquote would
// otherwise have its defs copied into the new list while the original
// survives — duplicates) and re-insert exactly one after the last meaningful
// (non-empty paragraph) top-level block.
// 7) Otherwise rebuild: strip every footnotesList AND every bare
// footnoteDefinition at ANY depth (collectDefinitions gathers defs
// recursively, so a list nested in a callout/blockquote — or a bare
// definition outside any list — would otherwise have its defs copied into the
// rebuilt list while the original survives in place → duplicates) and
// re-insert exactly one list after the last meaningful (non-empty paragraph)
// top-level block.
stripFootnotesListsDeep(out);
stripFootnoteDefinitionsDeep(out);
const top = out.content;
let insertAt = top.length;
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1]))
@@ -196,3 +199,17 @@ function stripFootnotesListsDeep(node) {
for (const child of node.content)
stripFootnotesListsDeep(child);
}
/**
* Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given
* clone). Runs only in the rebuild path AFTER the lists are stripped, so it
* targets definitions that were sitting outside a list (e.g. hand-authored via a
* raw-JSON write path and nested in a callout); their content was already copied
* into the rebuilt list, so leaving the originals would duplicate them.
*/
function stripFootnoteDefinitionsDeep(node) {
if (!node || typeof node !== "object" || !Array.isArray(node.content))
return;
node.content = node.content.filter((c) => !(c && c.type === FOOTNOTE_DEFINITION_NAME));
for (const child of node.content)
stripFootnoteDefinitionsDeep(child);
}

View File

@@ -1356,7 +1356,7 @@ export class DocmostClient {
// Write the BODY first, then the title (#159 split-brain): a failed body
// write (e.g. persist timeout) must not leave a new title over the old body.
const collabToken = await this.getCollabTokenWithReauth();
const mutation = await replacePageContent(
const mutation = await this.replacePage(
pageId,
doc,
collabToken,
@@ -1451,6 +1451,21 @@ export class DocmostClient {
return mutatePageContent(pageId, collabToken, apiUrl, transform);
}
/**
* Full-document write seam over collaboration.replacePageContent. Production
* just delegates; it exists as an overridable method so the full-doc write
* tools (update_page_json, copy_page_content) can have their footnote-
* canonicalization binding unit-tested without a live Hocuspocus collab socket.
*/
protected replacePage(
pageId: string,
doc: any,
collabToken: string,
apiUrl: string,
): Promise<{ doc?: any; verify?: any }> {
return replacePageContent(pageId, doc, collabToken, apiUrl);
}
/**
* Export a page to a single self-contained Docmost-flavoured markdown file:
* meta block + body (with inline comment anchors + diagrams) + comment
@@ -1594,7 +1609,7 @@ export class DocmostClient {
const canonical = canonicalizeFootnotes(content);
const collabToken = await this.getCollabTokenWithReauth();
const mutation = await replacePageContent(
const mutation = await this.replacePage(
targetPageId,
canonical,
collabToken,

View File

@@ -183,12 +183,15 @@ export function canonicalizeFootnotes<T = any>(doc: T): T {
return out;
}
// 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions
// gathers defs recursively, so a list nested in a callout/blockquote would
// otherwise have its defs copied into the new list while the original
// survives — duplicates) and re-insert exactly one after the last meaningful
// (non-empty paragraph) top-level block.
// 7) Otherwise rebuild: strip every footnotesList AND every bare
// footnoteDefinition at ANY depth (collectDefinitions gathers defs
// recursively, so a list nested in a callout/blockquote — or a bare
// definition outside any list — would otherwise have its defs copied into the
// rebuilt list while the original survives in place → duplicates) and
// re-insert exactly one list after the last meaningful (non-empty paragraph)
// top-level block.
stripFootnotesListsDeep(out);
stripFootnoteDefinitionsDeep(out);
const top: any[] = out.content;
let insertAt = top.length;
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) insertAt--;
@@ -205,3 +208,18 @@ function stripFootnotesListsDeep(node: any): void {
);
for (const child of node.content) stripFootnotesListsDeep(child);
}
/**
* Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given
* clone). Runs only in the rebuild path AFTER the lists are stripped, so it
* targets definitions that were sitting outside a list (e.g. hand-authored via a
* raw-JSON write path and nested in a callout); their content was already copied
* into the rebuilt list, so leaving the originals would duplicate them.
*/
function stripFootnoteDefinitionsDeep(node: any): void {
if (!node || typeof node !== "object" || !Array.isArray(node.content)) return;
node.content = node.content.filter(
(c: any) => !(c && c.type === FOOTNOTE_DEFINITION_NAME),
);
for (const child of node.content) stripFootnoteDefinitionsDeep(child);
}

View File

@@ -0,0 +1,78 @@
// Footnote-canonicalization binding tests for the MCP FULL-document write tools
// (issue #228, review #4): update_page_json and copy_page_content must persist a
// footnote-canonical doc. These override the `replacePage` seam (symmetric to the
// `mutatePage` seam used by the insert-footnote-wrapper test) to capture the
// persisted doc WITHOUT a live Hocuspocus collab socket. Symmetric to the
// server-side focus specs for createPage / updatePageContent('replace').
import { test } from "node:test";
import assert from "node:assert/strict";
import { DocmostClient } from "../../build/client.js";
const para = (...c) => ({ type: "paragraph", content: c });
const ref = (id) => ({ type: "footnoteReference", attrs: { id } });
const def = (id, text) => ({
type: "footnoteDefinition",
attrs: { id },
content: [{ type: "paragraph", content: [{ type: "text", text }] }],
});
const list = (...d) => ({ type: "footnotesList", content: d });
function findAll(node, type, acc = []) {
if (!node || typeof node !== "object") return acc;
if (node.type === type) acc.push(node);
if (Array.isArray(node.content)) for (const c of node.content) findAll(c, type, acc);
return acc;
}
const defIds = (doc) => findAll(doc, "footnoteDefinition").map((d) => d.attrs.id);
function makeClient(sourceDoc) {
const calls = { replaced: [] };
class TestClient extends DocmostClient {
async ensureAuthenticated() {}
async getCollabTokenWithReauth() {
return "collab-token";
}
async getPageRaw(pageId) {
return { id: pageId, slugId: "s", title: "P", spaceId: "sp", content: sourceDoc };
}
async replacePage(pageId, doc, token, apiUrl) {
calls.replaced.push({ pageId, doc });
return { doc, verify: { ok: true } };
}
}
const client = new TestClient("http://127.0.0.1:1/api", "e@x.com", "pw");
return { client, calls };
}
test("update_page_json canonicalizes the persisted full doc (out-of-order -> reference order)", async () => {
const { client, calls } = makeClient();
const outOfOrder = {
type: "doc",
content: [
para({ type: "text", text: "x" }, ref("b"), ref("a")),
list(def("a", "A"), def("b", "B")),
],
};
await client.updatePageJson("p1", outOfOrder);
assert.equal(calls.replaced.length, 1);
// Definitions reordered to reference order [b, a] before persisting.
assert.deepEqual(defIds(calls.replaced[0].doc), ["b", "a"]);
assert.equal(findAll(calls.replaced[0].doc, "footnotesList").length, 1);
});
test("copy_page_content canonicalizes the persisted copy (orphan definition dropped)", async () => {
const sourceDoc = {
type: "doc",
content: [
para({ type: "text", text: "x" }, ref("a")),
list(def("a", "A"), def("orphan", "O")),
],
};
const { client, calls } = makeClient(sourceDoc);
const res = await client.copyPageContent("src", "dst");
assert.equal(calls.replaced.length, 1);
assert.equal(calls.replaced[0].pageId, "dst");
// The orphan definition is dropped by canonicalization before the copy lands.
assert.deepEqual(defIds(calls.replaced[0].doc), ["a"]);
assert.equal(res.success, true);
});

View File

@@ -1130,6 +1130,97 @@ export const FOOTNOTE_CORPUS = [
]
}
},
{
"name": "bare footnoteDefinition nested in a callout is collected, NOT duplicated",
"input": {
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "see "
},
{
"type": "footnoteReference",
"attrs": {
"id": "a"
}
}
]
},
{
"type": "callout",
"content": [
{
"type": "footnoteDefinition",
"attrs": {
"id": "a"
},
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "note A"
}
]
}
]
}
]
}
]
},
"expected": {
"type": "doc",
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "see "
},
{
"type": "footnoteReference",
"attrs": {
"id": "a"
}
}
]
},
{
"type": "callout",
"content": []
},
{
"type": "footnotesList",
"content": [
{
"type": "footnoteDefinition",
"attrs": {
"id": "a"
},
"content": [
{
"type": "paragraph",
"content": [
{
"type": "text",
"text": "note A"
}
]
}
]
}
]
}
]
}
},
{
"name": "no footnotes at all is unchanged",
"input": {