diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.ts index c28d2690..c8e36a1b 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts @@ -179,7 +179,9 @@ export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice let hasReference = false; slice.content.forEach((node) => { if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true; - if (node.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true; + // footnoteReference is an inline atom, never a top-level slice child here + // (this function early-returns for open slices, so children are whole + // blocks), so it is only reachable by descending. node.descendants((child) => { if (child.type.name === FOOTNOTE_REFERENCE_NAME) hasReference = true; }); diff --git a/apps/server/src/integrations/import/services/file-import-task.service.footnote-canonicalize.spec.ts b/apps/server/src/integrations/import/services/file-import-task.service.footnote-canonicalize.spec.ts new file mode 100644 index 00000000..08ecce15 --- /dev/null +++ b/apps/server/src/integrations/import/services/file-import-task.service.footnote-canonicalize.spec.ts @@ -0,0 +1,150 @@ +// Importing FileImportTaskService transitively loads import-formatter.ts, which +// imports the ESM-only @sindresorhus/slugify package (not in jest's transform +// allowlist). slugify is irrelevant to the path under test, so it is mocked out +// to keep the module graph loadable under ts-jest (mirrors the import.service spec). +jest.mock('@sindresorhus/slugify', () => ({ + __esModule: true, + default: (input: string) => String(input), +})); +// import-attachment.service.ts (loaded transitively for DI typing) imports the +// ESM-only `p-limit` / `image-dimensions`; neither is exercised on the path under +// test, so stub them so the module graph loads under ts-jest. +jest.mock('p-limit', () => ({ + __esModule: true, + default: () => (fn: any) => fn(), +})); +jest.mock('image-dimensions', () => ({ + __esModule: true, + imageDimensionsFromData: () => undefined, +})); + +import { promises as fs } from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { FileImportTaskService } from './file-import-task.service'; +import { ImportService } from './import.service'; + +/** + * Binding test for issue #228 / review #5: FileImportTaskService.processGenericImport + * is a NON-editor write path (markdownToHtml -> processHTML -> JSON, never runs + * footnoteSyncPlugin), so it canonicalizes footnotes before persisting. This pins + * that binding — the same one import.service has a spec for — which previously had + * NO spec at all. + * + * The markdown -> HTML -> ProseMirror conversion is REAL (a real ImportService, + * its createYdoc stubbed); the filesystem is a real temp dir with one .md file; + * the DB transaction is stubbed to capture the persisted page content. + */ + +// Out-of-order references (c, a, b), a REUSED reference ([^a] twice), and an +// ORPHAN definition ([^z], never referenced). +const MARKDOWN = [ + '# Title', + '', + 'Body refs [^c] and [^a] and [^b] and again [^a].', + '', + '[^a]: note A', + '[^b]: note B', + '[^c]: note C', + '[^z]: orphan note', +].join('\n'); + +function footnoteListIds(content: any): string[] { + const list = (content?.content ?? []).find( + (n: any) => n.type === 'footnotesList', + ); + return (list?.content ?? []) + .filter((n: any) => n.type === 'footnoteDefinition') + .map((n: any) => n.attrs?.id); +} + +// A permissive chainable stub for the spaces lookup (selectFrom(...).select(...) +// .where(...).executeTakeFirst()). +function chainable(result: any): any { + const proxy: any = new Proxy(function () {}, { + get: (_t, prop) => { + if (prop === 'executeTakeFirst') return async () => result; + if (prop === 'execute') return async () => []; + return () => proxy; + }, + }); + return proxy; +} + +describe('FileImportTaskService.processGenericImport — footnote canonicalization (#228)', () => { + it('orders footnotes by first reference, dedupes reuse, and drops orphans on zip import', async () => { + const extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'fit-canon-')); + await fs.writeFile(path.join(extractDir, 'note.md'), MARKDOWN, 'utf-8'); + + // Real ImportService for the html -> JSON conversion; stub the yjs encode. + const importService = new ImportService( + {} as any, + {} as any, + {} as any, + {} as any, + ); + jest + .spyOn(importService as any, 'createYdoc') + .mockResolvedValue(Buffer.from([]) as any); + + let captured: any = null; + const trx = { + insertInto: (table: string) => ({ + values: (v: any) => { + if (table === 'pages') captured = v; + return { execute: async () => {} }; + }, + }), + }; + const db: any = { + selectFrom: () => chainable({ slug: 'space-slug' }), + transaction: () => ({ execute: (fn: any) => fn(trx) }), + }; + + const importAttachmentService = { + processAttachments: async ({ html }: any) => html, + }; + const backlinkRepo = { insertBacklink: jest.fn() }; + const eventEmitter = { emit: jest.fn() }; + const auditService = { logBatchWithContext: jest.fn() }; + + const pageService = { nextPagePosition: async () => 'a0' }; + + const service = new FileImportTaskService( + {} as any, // storageService + importService as any, + pageService as any, + backlinkRepo as any, + db, + importAttachmentService as any, + eventEmitter as any, + auditService as any, + ); + + const fileTask: any = { + id: 'task-1', + source: 'generic', + spaceId: 'space-1', + workspaceId: 'ws-1', + creatorId: 'user-1', + }; + + try { + await service.processGenericImport({ extractDir, fileTask }); + + expect(captured).toBeTruthy(); + const content = captured.content; + // Reference order is c, a, b (NOT the markdown definition order a, b, c). + expect(footnoteListIds(content)).toEqual(['c', 'a', 'b']); + // Orphan [^z] dropped; reused [^a] collapses to one definition; one list. + expect(footnoteListIds(content)).not.toContain('z'); + const lists = (content.content ?? []).filter( + (n: any) => n.type === 'footnotesList', + ); + expect(lists).toHaveLength(1); + expect(footnoteListIds(content).filter((id) => id === 'a')).toHaveLength(1); + } finally { + await fs.rm(extractDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts index 21719eba..f7a05f94 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts @@ -147,14 +147,17 @@ export function canonicalizeFootnotes(doc: T): T { return out; } - // 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions - // gathers defs recursively, so a list nested in a callout/blockquote would - // otherwise have its defs copied into the new list while the original - // survives — duplicates) and re-insert exactly one after the last meaningful - // (non-empty paragraph) top-level block, so it coexists with a trailing-node - // empty paragraph. This both repairs a non-canonical doc and (in the import - // case) physically reorders the list into reference order. + // 7) Otherwise rebuild: strip every footnotesList AND every bare + // footnoteDefinition at ANY depth (collectDefinitions gathers defs + // recursively, so a list nested in a callout/blockquote — or a bare + // definition outside any list — would otherwise have its defs copied into the + // rebuilt list while the original survives in place → duplicates) and + // re-insert exactly one list after the last meaningful (non-empty paragraph) + // top-level block, so it coexists with a trailing-node empty paragraph. This + // both repairs a non-canonical doc and (in the import case) physically + // reorders the list into reference order. stripFootnotesListsDeep(out); + stripFootnoteDefinitionsDeep(out); const top: any[] = out.content; let insertAt = top.length; while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) insertAt--; @@ -172,6 +175,21 @@ function stripFootnotesListsDeep(node: any): void { for (const child of node.content) stripFootnotesListsDeep(child); } +/** + * Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given + * clone). Runs only in the rebuild path AFTER the lists are stripped, so it + * targets definitions that were sitting outside a list (e.g. hand-authored via a + * raw-JSON write path and nested in a callout); their content was already copied + * into the rebuilt list, so leaving the originals would duplicate them. + */ +function stripFootnoteDefinitionsDeep(node: any): void { + if (!node || typeof node !== 'object' || !Array.isArray(node.content)) return; + node.content = node.content.filter( + (c: any) => !(c && c.type === FOOTNOTE_DEFINITION_NAME), + ); + for (const child of node.content) stripFootnoteDefinitionsDeep(child); +} + /** * Deep equality over plain JSON: arrays are compared POSITIONALLY * (order-SENSITIVE), object keys order-insensitively. The array order-sensitivity diff --git a/packages/editor-ext/src/lib/footnote/footnote-corpus.ts b/packages/editor-ext/src/lib/footnote/footnote-corpus.ts index e8521b74..dd5f41d0 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-corpus.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-corpus.ts @@ -7,12 +7,13 @@ * Both the editor-ext copy and the MCP mirror of `canonicalizeFootnotes` are run * against this corpus by their respective test suites, which turns "the two * pure copies behave identically" into a checkable property without coupling the - * packages at build time. When you change one corpus, change the other. + * packages. When you change one corpus, change the other. * * Coverage includes (besides ordering/orphan/reuse/dedup/synth/merge): a single * canonical list with NON-EMPTY content after it (must NOT be repositioned — - * plugin placement parity, must-fix #2) and a reference nested inside a callout - * (the recursive collection, test-coverage #14). + * plugin placement parity, must-fix #2), a reference nested inside a callout + * (the recursive collection, test-coverage #14), and a BARE footnoteDefinition + * nested in a callout (rebuild must strip the original so it is not duplicated). */ export interface FootnoteCorpusCase { name: string; @@ -1145,6 +1146,97 @@ export const FOOTNOTE_CORPUS: FootnoteCorpusCase[] = [ ] } }, + { + "name": "bare footnoteDefinition nested in a callout is collected, NOT duplicated", + "input": { + "type": "doc", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "see " + }, + { + "type": "footnoteReference", + "attrs": { + "id": "a" + } + } + ] + }, + { + "type": "callout", + "content": [ + { + "type": "footnoteDefinition", + "attrs": { + "id": "a" + }, + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "note A" + } + ] + } + ] + } + ] + } + ] + }, + "expected": { + "type": "doc", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "see " + }, + { + "type": "footnoteReference", + "attrs": { + "id": "a" + } + } + ] + }, + { + "type": "callout", + "content": [] + }, + { + "type": "footnotesList", + "content": [ + { + "type": "footnoteDefinition", + "attrs": { + "id": "a" + }, + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "note A" + } + ] + } + ] + } + ] + } + ] + } + }, { "name": "no footnotes at all is unchanged", "input": { diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index 422b0e6c..082f8e68 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -1071,7 +1071,7 @@ export class DocmostClient { // Write the BODY first, then the title (#159 split-brain): a failed body // write (e.g. persist timeout) must not leave a new title over the old body. const collabToken = await this.getCollabTokenWithReauth(); - const mutation = await replacePageContent(pageId, doc, collabToken, this.apiUrl); + const mutation = await this.replacePage(pageId, doc, collabToken, this.apiUrl); // Body persisted successfully — now it is safe to set the title. if (title) { await this.client.post("/pages/update", { pageId, title }); @@ -1142,6 +1142,15 @@ export class DocmostClient { mutatePage(pageId, collabToken, apiUrl, transform) { return mutatePageContent(pageId, collabToken, apiUrl, transform); } + /** + * Full-document write seam over collaboration.replacePageContent. Production + * just delegates; it exists as an overridable method so the full-doc write + * tools (update_page_json, copy_page_content) can have their footnote- + * canonicalization binding unit-tested without a live Hocuspocus collab socket. + */ + replacePage(pageId, doc, collabToken, apiUrl) { + return replacePageContent(pageId, doc, collabToken, apiUrl); + } /** * Export a page to a single self-contained Docmost-flavoured markdown file: * meta block + body (with inline comment anchors + diagrams) + comment @@ -1270,7 +1279,7 @@ export class DocmostClient { // to the target (parity with the other full-doc write paths). const canonical = canonicalizeFootnotes(content); const collabToken = await this.getCollabTokenWithReauth(); - const mutation = await replacePageContent(targetPageId, canonical, collabToken, this.apiUrl); + const mutation = await this.replacePage(targetPageId, canonical, collabToken, this.apiUrl); return { success: true, sourcePageId, diff --git a/packages/mcp/build/lib/footnote-canonicalize.js b/packages/mcp/build/lib/footnote-canonicalize.js index 1df76154..d2d91400 100644 --- a/packages/mcp/build/lib/footnote-canonicalize.js +++ b/packages/mcp/build/lib/footnote-canonicalize.js @@ -174,12 +174,15 @@ export function canonicalizeFootnotes(doc) { deepEqualJson(topLevelLists[0].content, orderedDefs)) { return out; } - // 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions - // gathers defs recursively, so a list nested in a callout/blockquote would - // otherwise have its defs copied into the new list while the original - // survives — duplicates) and re-insert exactly one after the last meaningful - // (non-empty paragraph) top-level block. + // 7) Otherwise rebuild: strip every footnotesList AND every bare + // footnoteDefinition at ANY depth (collectDefinitions gathers defs + // recursively, so a list nested in a callout/blockquote — or a bare + // definition outside any list — would otherwise have its defs copied into the + // rebuilt list while the original survives in place → duplicates) and + // re-insert exactly one list after the last meaningful (non-empty paragraph) + // top-level block. stripFootnotesListsDeep(out); + stripFootnoteDefinitionsDeep(out); const top = out.content; let insertAt = top.length; while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) @@ -196,3 +199,17 @@ function stripFootnotesListsDeep(node) { for (const child of node.content) stripFootnotesListsDeep(child); } +/** + * Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given + * clone). Runs only in the rebuild path AFTER the lists are stripped, so it + * targets definitions that were sitting outside a list (e.g. hand-authored via a + * raw-JSON write path and nested in a callout); their content was already copied + * into the rebuilt list, so leaving the originals would duplicate them. + */ +function stripFootnoteDefinitionsDeep(node) { + if (!node || typeof node !== "object" || !Array.isArray(node.content)) + return; + node.content = node.content.filter((c) => !(c && c.type === FOOTNOTE_DEFINITION_NAME)); + for (const child of node.content) + stripFootnoteDefinitionsDeep(child); +} diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 85a5a3a4..181c7e79 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1356,7 +1356,7 @@ export class DocmostClient { // Write the BODY first, then the title (#159 split-brain): a failed body // write (e.g. persist timeout) must not leave a new title over the old body. const collabToken = await this.getCollabTokenWithReauth(); - const mutation = await replacePageContent( + const mutation = await this.replacePage( pageId, doc, collabToken, @@ -1451,6 +1451,21 @@ export class DocmostClient { return mutatePageContent(pageId, collabToken, apiUrl, transform); } + /** + * Full-document write seam over collaboration.replacePageContent. Production + * just delegates; it exists as an overridable method so the full-doc write + * tools (update_page_json, copy_page_content) can have their footnote- + * canonicalization binding unit-tested without a live Hocuspocus collab socket. + */ + protected replacePage( + pageId: string, + doc: any, + collabToken: string, + apiUrl: string, + ): Promise<{ doc?: any; verify?: any }> { + return replacePageContent(pageId, doc, collabToken, apiUrl); + } + /** * Export a page to a single self-contained Docmost-flavoured markdown file: * meta block + body (with inline comment anchors + diagrams) + comment @@ -1594,7 +1609,7 @@ export class DocmostClient { const canonical = canonicalizeFootnotes(content); const collabToken = await this.getCollabTokenWithReauth(); - const mutation = await replacePageContent( + const mutation = await this.replacePage( targetPageId, canonical, collabToken, diff --git a/packages/mcp/src/lib/footnote-canonicalize.ts b/packages/mcp/src/lib/footnote-canonicalize.ts index b4ae6e03..c83d41e4 100644 --- a/packages/mcp/src/lib/footnote-canonicalize.ts +++ b/packages/mcp/src/lib/footnote-canonicalize.ts @@ -183,12 +183,15 @@ export function canonicalizeFootnotes(doc: T): T { return out; } - // 7) Otherwise rebuild: strip every footnotesList at ANY depth (collectDefinitions - // gathers defs recursively, so a list nested in a callout/blockquote would - // otherwise have its defs copied into the new list while the original - // survives — duplicates) and re-insert exactly one after the last meaningful - // (non-empty paragraph) top-level block. + // 7) Otherwise rebuild: strip every footnotesList AND every bare + // footnoteDefinition at ANY depth (collectDefinitions gathers defs + // recursively, so a list nested in a callout/blockquote — or a bare + // definition outside any list — would otherwise have its defs copied into the + // rebuilt list while the original survives in place → duplicates) and + // re-insert exactly one list after the last meaningful (non-empty paragraph) + // top-level block. stripFootnotesListsDeep(out); + stripFootnoteDefinitionsDeep(out); const top: any[] = out.content; let insertAt = top.length; while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) insertAt--; @@ -205,3 +208,18 @@ function stripFootnotesListsDeep(node: any): void { ); for (const child of node.content) stripFootnotesListsDeep(child); } + +/** + * Remove every BARE `footnoteDefinition` node at ANY depth (mutates the given + * clone). Runs only in the rebuild path AFTER the lists are stripped, so it + * targets definitions that were sitting outside a list (e.g. hand-authored via a + * raw-JSON write path and nested in a callout); their content was already copied + * into the rebuilt list, so leaving the originals would duplicate them. + */ +function stripFootnoteDefinitionsDeep(node: any): void { + if (!node || typeof node !== "object" || !Array.isArray(node.content)) return; + node.content = node.content.filter( + (c: any) => !(c && c.type === FOOTNOTE_DEFINITION_NAME), + ); + for (const child of node.content) stripFootnoteDefinitionsDeep(child); +} diff --git a/packages/mcp/test/mock/full-doc-write-canonicalize.test.mjs b/packages/mcp/test/mock/full-doc-write-canonicalize.test.mjs new file mode 100644 index 00000000..8fcdf4a2 --- /dev/null +++ b/packages/mcp/test/mock/full-doc-write-canonicalize.test.mjs @@ -0,0 +1,78 @@ +// Footnote-canonicalization binding tests for the MCP FULL-document write tools +// (issue #228, review #4): update_page_json and copy_page_content must persist a +// footnote-canonical doc. These override the `replacePage` seam (symmetric to the +// `mutatePage` seam used by the insert-footnote-wrapper test) to capture the +// persisted doc WITHOUT a live Hocuspocus collab socket. Symmetric to the +// server-side focus specs for createPage / updatePageContent('replace'). +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { DocmostClient } from "../../build/client.js"; + +const para = (...c) => ({ type: "paragraph", content: c }); +const ref = (id) => ({ type: "footnoteReference", attrs: { id } }); +const def = (id, text) => ({ + type: "footnoteDefinition", + attrs: { id }, + content: [{ type: "paragraph", content: [{ type: "text", text }] }], +}); +const list = (...d) => ({ type: "footnotesList", content: d }); + +function findAll(node, type, acc = []) { + if (!node || typeof node !== "object") return acc; + if (node.type === type) acc.push(node); + if (Array.isArray(node.content)) for (const c of node.content) findAll(c, type, acc); + return acc; +} +const defIds = (doc) => findAll(doc, "footnoteDefinition").map((d) => d.attrs.id); + +function makeClient(sourceDoc) { + const calls = { replaced: [] }; + class TestClient extends DocmostClient { + async ensureAuthenticated() {} + async getCollabTokenWithReauth() { + return "collab-token"; + } + async getPageRaw(pageId) { + return { id: pageId, slugId: "s", title: "P", spaceId: "sp", content: sourceDoc }; + } + async replacePage(pageId, doc, token, apiUrl) { + calls.replaced.push({ pageId, doc }); + return { doc, verify: { ok: true } }; + } + } + const client = new TestClient("http://127.0.0.1:1/api", "e@x.com", "pw"); + return { client, calls }; +} + +test("update_page_json canonicalizes the persisted full doc (out-of-order -> reference order)", async () => { + const { client, calls } = makeClient(); + const outOfOrder = { + type: "doc", + content: [ + para({ type: "text", text: "x" }, ref("b"), ref("a")), + list(def("a", "A"), def("b", "B")), + ], + }; + await client.updatePageJson("p1", outOfOrder); + assert.equal(calls.replaced.length, 1); + // Definitions reordered to reference order [b, a] before persisting. + assert.deepEqual(defIds(calls.replaced[0].doc), ["b", "a"]); + assert.equal(findAll(calls.replaced[0].doc, "footnotesList").length, 1); +}); + +test("copy_page_content canonicalizes the persisted copy (orphan definition dropped)", async () => { + const sourceDoc = { + type: "doc", + content: [ + para({ type: "text", text: "x" }, ref("a")), + list(def("a", "A"), def("orphan", "O")), + ], + }; + const { client, calls } = makeClient(sourceDoc); + const res = await client.copyPageContent("src", "dst"); + assert.equal(calls.replaced.length, 1); + assert.equal(calls.replaced[0].pageId, "dst"); + // The orphan definition is dropped by canonicalization before the copy lands. + assert.deepEqual(defIds(calls.replaced[0].doc), ["a"]); + assert.equal(res.success, true); +}); diff --git a/packages/mcp/test/unit/footnote-corpus.mjs b/packages/mcp/test/unit/footnote-corpus.mjs index 3a213491..648281f4 100644 --- a/packages/mcp/test/unit/footnote-corpus.mjs +++ b/packages/mcp/test/unit/footnote-corpus.mjs @@ -1130,6 +1130,97 @@ export const FOOTNOTE_CORPUS = [ ] } }, + { + "name": "bare footnoteDefinition nested in a callout is collected, NOT duplicated", + "input": { + "type": "doc", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "see " + }, + { + "type": "footnoteReference", + "attrs": { + "id": "a" + } + } + ] + }, + { + "type": "callout", + "content": [ + { + "type": "footnoteDefinition", + "attrs": { + "id": "a" + }, + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "note A" + } + ] + } + ] + } + ] + } + ] + }, + "expected": { + "type": "doc", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "see " + }, + { + "type": "footnoteReference", + "attrs": { + "id": "a" + } + } + ] + }, + { + "type": "callout", + "content": [] + }, + { + "type": "footnotesList", + "content": [ + { + "type": "footnoteDefinition", + "attrs": { + "id": "a" + }, + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "note A" + } + ] + } + ] + } + ] + } + ] + } + }, { "name": "no footnotes at all is unchanged", "input": {