diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts new file mode 100644 index 00000000..65d10481 --- /dev/null +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.canonicalize.test.ts @@ -0,0 +1,142 @@ +import { describe, it, expect } from "vitest"; +import { Editor } from "@tiptap/core"; +import { Document } from "@tiptap/extension-document"; +import { Paragraph } from "@tiptap/extension-paragraph"; +import { Text } from "@tiptap/extension-text"; +import { Node as PMNode, Fragment, Slice } from "@tiptap/pm/model"; +import { + FootnoteReference, + FootnotesList, + FootnoteDefinition, + FOOTNOTE_REFERENCE_NAME, + FOOTNOTE_DEFINITION_NAME, + FOOTNOTES_LIST_NAME, +} from "@docmost/editor-ext"; +import { canonicalizePastedFootnotes } from "./markdown-clipboard"; + +/** + * A markdown paste builds its ProseMirror fragment via DOM -> parseSlice and is + * applied with a manual transaction (handlePaste returns true), so it bypasses + * the editor's footnoteSyncPlugin — which never reorders an existing list. These + * tests pin canonicalizePastedFootnotes, the focused hook that makes a pasted + * out-of-order markdown footnote block come out canonical (issue #228). + */ + +const extensions = [ + Document, + Paragraph, + Text, + FootnoteReference, + FootnotesList, + FootnoteDefinition, +]; + +function makeSchema() { + const editor = new Editor({ extensions, content: { type: "doc", content: [] } }); + const { schema } = editor; + return { editor, schema }; +} + +/** List footnote def ids of the (single) footnotesList in a slice, in order. */ +function listIds(slice: Slice): string[] { + const out: string[] = []; + slice.content.forEach((node: PMNode) => { + if (node.type.name === FOOTNOTES_LIST_NAME) { + node.content.forEach((def: PMNode) => { + if (def.type.name === FOOTNOTE_DEFINITION_NAME) out.push(def.attrs.id); + }); + } + }); + return out; +} + +function hasList(slice: Slice): boolean { + let found = false; + slice.content.forEach((n: PMNode) => { + if (n.type.name === FOOTNOTES_LIST_NAME) found = true; + }); + return found; +} + +describe("canonicalizePastedFootnotes", () => { + it("reorders a pasted block to reference order, dedups reuse, drops orphans", () => { + const { editor, schema } = makeSchema(); + // Body references c, a, b (and again a => reuse); definitions a, b, c, z + // (z is an orphan) — the exact shape a markdown paste produces. + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.text("body "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "c" }), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("note A")]), + ]), + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [ + schema.nodes.paragraph.create(null, [schema.text("note B")]), + ]), + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "c" }, [ + schema.nodes.paragraph.create(null, [schema.text("note C")]), + ]), + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "z" }, [ + schema.nodes.paragraph.create(null, [schema.text("orphan")]), + ]), + ]), + ]), + 0, + 0, + ); + + const out = canonicalizePastedFootnotes(slice, schema); + // Reference order, orphan z dropped, reused a appears once. + expect(listIds(out)).toEqual(["c", "a", "b"]); + editor.destroy(); + }); + + it("leaves a reference-ONLY paste untouched (no synthesized definitions)", () => { + // A paste that reuses an id defined in the TARGET doc must NOT gain a + // synthesized empty definition here — it carries no footnotesList of its own. + const { editor, schema } = makeSchema(); + const slice = new Slice( + Fragment.from( + schema.nodes.paragraph.create(null, [ + schema.text("see "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + ), + 0, + 0, + ); + const out = canonicalizePastedFootnotes(slice, schema); + expect(hasList(out)).toBe(false); + expect(out).toBe(slice); // returned unchanged (same reference) + editor.destroy(); + }); + + it("leaves an open (partial) slice untouched even if it carries a list", () => { + // An open slice (openStart/openEnd > 0) is a partial selection, not a + // standalone block, so it is returned as-is BEFORE any footnote handling. + const { editor, schema } = makeSchema(); + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("A")]), + ]), + ]), + ]), + 1, + 1, + ); + const out = canonicalizePastedFootnotes(slice, schema); + expect(out).toBe(slice); + editor.destroy(); + }); +}); diff --git a/apps/client/src/features/editor/extensions/markdown-clipboard.ts b/apps/client/src/features/editor/extensions/markdown-clipboard.ts index bebb567a..89b7c22e 100644 --- a/apps/client/src/features/editor/extensions/markdown-clipboard.ts +++ b/apps/client/src/features/editor/extensions/markdown-clipboard.ts @@ -3,7 +3,13 @@ import { Extension } from "@tiptap/core"; import { Plugin, PluginKey, TextSelection } from "@tiptap/pm/state"; import { DOMParser, DOMSerializer, Fragment, Slice } from "@tiptap/pm/model"; import { find } from "linkifyjs"; -import { markdownToHtml, htmlToMarkdown } from "@docmost/editor-ext"; +import { + markdownToHtml, + htmlToMarkdown, + canonicalizeFootnotes, + FOOTNOTES_LIST_NAME, +} from "@docmost/editor-ext"; +import type { Schema } from "@tiptap/pm/model"; export const MarkdownClipboard = Extension.create({ name: "markdownClipboard", @@ -83,12 +89,25 @@ export const MarkdownClipboard = Extension.create({ const body = elementFromString(parsed); normalizeTableColumnWidths(body); - const contentNodes = DOMParser.fromSchema( + const parsedSlice = DOMParser.fromSchema( this.editor.schema, ).parseSlice(body, { preserveWhitespace: true, }); + // A markdown paste builds its ProseMirror fragment directly (DOM -> + // parseSlice), bypassing the editor's footnoteSyncPlugin, which never + // reorders an existing list. So a pasted markdown block whose footnote + // definitions are out of order (or contains orphan defs) would be + // stored out of order. Canonicalize the self-contained pasted block so + // its footnotes come out reference-ordered, deduped and orphan-free + // (issue #228). See canonicalizePastedFootnotes for why this is scoped + // to whole-block pastes that carry their own footnotesList. + const contentNodes = canonicalizePastedFootnotes( + parsedSlice, + this.editor.schema, + ); + tr.replaceRange(from, to, contentNodes); const insertEnd = tr.mapping.map(from, 1); tr.setSelection(TextSelection.near(tr.doc.resolve(Math.max(from, insertEnd - 2)), -1)); @@ -133,6 +152,39 @@ export const MarkdownClipboard = Extension.create({ }, }); +/** + * Reorder/dedup the footnotes of a SELF-CONTAINED pasted markdown block to the + * canonical invariant (the live footnoteSyncPlugin never reorders an existing + * list, so an out-of-order pasted block would otherwise persist out of order). + * + * Scoped deliberately to whole-block pastes (openStart/openEnd === 0) that carry + * their OWN footnotesList: canonicalizeFootnotes would synthesize empty + * definitions for any reference lacking a definition, which is correct for a + * standalone block but would be wrong for a reference-only paste that REUSES a + * footnote already defined in the target document — so those are left untouched + * for the paste/sync plugins to merge. Residual: when the pasted block is merged + * into a doc that already has footnotes, ordering RELATIVE to the pre-existing + * footnotes is still governed by the sync plugin (which does not reorder). + */ +export function canonicalizePastedFootnotes(slice: Slice, schema: Schema): Slice { + if (slice.openStart !== 0 || slice.openEnd !== 0) return slice; + + let hasFootnotesList = false; + slice.content.forEach((node) => { + if (node.type.name === FOOTNOTES_LIST_NAME) hasFootnotesList = true; + }); + if (!hasFootnotesList) return slice; + + const content = slice.content.toJSON(); + if (!Array.isArray(content)) return slice; + + const canonical = canonicalizeFootnotes({ type: "doc", content }) as { + content?: unknown[]; + }; + const fragment = Fragment.fromJSON(schema, canonical.content ?? []); + return new Slice(fragment, 0, 0); +} + function elementFromString(value) { // add a wrapper to preserve leading and trailing whitespace const wrappedValue = `${value}`; diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index aeb59eff..97133e01 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -52,7 +52,7 @@ import { INTERNAL_LINK_REGEX, extractPageSlugId, } from '../../../integrations/export/utils'; -import { markdownToHtml } from '@docmost/editor-ext'; +import { markdownToHtml, canonicalizeFootnotes } from '@docmost/editor-ext'; import { WatcherService } from '../../watcher/watcher.service'; import { sql } from 'kysely'; import { TransclusionService } from '../transclusion/transclusion.service'; @@ -1301,6 +1301,14 @@ export class PageService { } } + // markdown/html are converted via markdownToHtml -> htmlToJson and json may + // be written programmatically (API createPage/updatePageContent) — none of + // these run the editor's footnoteSyncPlugin, so footnotes keep the source's + // physical order, orphans survive, and reused references aren't collapsed. + // Canonicalize to the editor's invariant before persisting (issue #228). + // Pure + idempotent + shape-safe: a doc with no footnotes is unchanged. + prosemirrorJson = canonicalizeFootnotes(prosemirrorJson); + try { jsonToNode(prosemirrorJson); } catch (err) { diff --git a/apps/server/src/integrations/import/services/file-import-task.service.ts b/apps/server/src/integrations/import/services/file-import-task.service.ts index 218c75ca..7666e9b7 100644 --- a/apps/server/src/integrations/import/services/file-import-task.service.ts +++ b/apps/server/src/integrations/import/services/file-import-task.service.ts @@ -18,7 +18,7 @@ import { generateSlugId } from '../../../common/helpers'; import { v7 } from 'uuid'; import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; import { FileTask, InsertablePage } from '@docmost/db/types/entity.types'; -import { markdownToHtml } from '@docmost/editor-ext'; +import { markdownToHtml, canonicalizeFootnotes } from '@docmost/editor-ext'; import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils'; import { formatImportHtml } from '../utils/import-formatter'; import { @@ -496,9 +496,16 @@ export class FileImportTaskService { await this.importService.processHTML(html), ); - const { title, prosemirrorJson } = + const { title, prosemirrorJson: extractedJson } = this.importService.extractTitleAndRemoveHeading(pmState); + // Canonicalize footnote topology on this non-editor write path + // (markdownToHtml/processHTML never runs footnoteSyncPlugin), so a + // zip-imported page's footnotes are reference-ordered, deduped, and + // orphan-free like the editor's invariant (issue #228). Pure + + // idempotent + shape-safe; a footnote-free doc is unchanged. + const prosemirrorJson = canonicalizeFootnotes(extractedJson); + const insertablePage: InsertablePage = { id: page.id, slugId: page.slugId, diff --git a/apps/server/src/integrations/import/services/import.service.footnote-canonicalize.spec.ts b/apps/server/src/integrations/import/services/import.service.footnote-canonicalize.spec.ts new file mode 100644 index 00000000..e53b17a1 --- /dev/null +++ b/apps/server/src/integrations/import/services/import.service.footnote-canonicalize.spec.ts @@ -0,0 +1,139 @@ +// Importing ImportService transitively loads import-formatter.ts, which imports +// the ESM-only @sindresorhus/slugify package (not in jest's transform +// allowlist). slugify is irrelevant to the path under test, so it is mocked out +// to keep the module graph loadable under ts-jest. +jest.mock('@sindresorhus/slugify', () => ({ + __esModule: true, + default: (input: string) => String(input), +})); + +import { ImportService } from './import.service'; +import { canonicalizeFootnotes } from '@docmost/editor-ext'; + +/** + * Integration-ish test for the USER-FACING markdown import path + * (`ImportService.importPage`). It exercises the REAL markdown -> HTML -> JSON + * conversion and asserts that the stored page content has its footnotes + * canonicalized — the gap that issue #228 fixes: the import path builds + * ProseMirror JSON directly (never running the editor's footnoteSyncPlugin), so + * before this wiring the stored footnotes kept the markdown's physical + * definition order (out of order vs. references), retained orphan definitions, + * and did not collapse reused references. + * + * The DB/ydoc side-effects are stubbed: `getNewPagePosition` (DB query) and + * `createYdoc` (Yjs encode) are spied, and `pageRepo.insertPage` captures the + * persisted `content`. Everything between markdown and persistence is REAL. + */ + +// Out-of-order references (c, a, b), a REUSED reference ([^a] twice -> one +// footnote), and an ORPHAN definition ([^z], never referenced). +const MARKDOWN = [ + '# Title', + '', + 'Body refs [^c] and [^a] and [^b] and again [^a].', + '', + '[^a]: note A', + '[^b]: note B', + '[^c]: note C', + '[^z]: orphan note', +].join('\n'); + +function makeFile(filename: string, contents: string) { + return { + filename, + toBuffer: async () => Buffer.from(contents), + } as any; +} + +function makeService() { + let captured: any = null; + const pageRepo = { + insertPage: jest.fn(async (values: any) => { + captured = values; + return { id: 'page-id', slugId: 'slug-id' }; + }), + }; + const service = new ImportService( + pageRepo as any, + {} as any, + {} as any, + {} as any, + ); + jest.spyOn(service as any, 'getNewPagePosition').mockResolvedValue('a0'); + jest + .spyOn(service as any, 'createYdoc') + .mockResolvedValue(Buffer.from([]) as any); + return { service, pageRepo, getCaptured: () => captured }; +} + +/** List the footnote-definition ids of the (single) footnotesList, in order. */ +function footnoteListIds(content: any): string[] { + const list = (content.content ?? []).find( + (n: any) => n.type === 'footnotesList', + ); + if (!list) return []; + return (list.content ?? []) + .filter((n: any) => n.type === 'footnoteDefinition') + .map((n: any) => n.attrs?.id); +} + +function definitionText(content: any, id: string): string | undefined { + const list = (content.content ?? []).find( + (n: any) => n.type === 'footnotesList', + ); + const def = (list?.content ?? []).find( + (n: any) => n.type === 'footnoteDefinition' && n.attrs?.id === id, + ); + return def?.content?.[0]?.content?.[0]?.text; +} + +describe('ImportService.importPage — footnote canonicalization (#228)', () => { + it('orders footnotes by first reference, dedupes reuse, and drops orphans', async () => { + const { service, getCaptured } = makeService(); + + await service.importPage( + Promise.resolve(makeFile('note.md', MARKDOWN)), + 'user-id', + 'space-id', + 'workspace-id', + ); + + const content = getCaptured().content; + expect(content).toBeTruthy(); + + // Reference order is c, a, b (NOT the markdown definition order a, b, c). + expect(footnoteListIds(content)).toEqual(['c', 'a', 'b']); + + // Definitions preserved and attached to the right ids. + expect(definitionText(content, 'c')).toBe('note C'); + expect(definitionText(content, 'a')).toBe('note A'); + expect(definitionText(content, 'b')).toBe('note B'); + + // Orphan definition [^z] is dropped. + expect(footnoteListIds(content)).not.toContain('z'); + + // Reused [^a] yields exactly ONE definition, and exactly one list. + const lists = (content.content ?? []).filter( + (n: any) => n.type === 'footnotesList', + ); + expect(lists).toHaveLength(1); + expect(footnoteListIds(content).filter((id) => id === 'a')).toHaveLength(1); + }); + + it('is idempotent: canonicalizing the stored output again is a no-op', async () => { + const { service, getCaptured } = makeService(); + await service.importPage( + Promise.resolve(makeFile('note.md', MARKDOWN)), + 'user-id', + 'space-id', + 'workspace-id', + ); + const stored = getCaptured().content; + + // The stored content is already canonical; running the canonicalizer a second + // time must not change it (safe to wire into every write path). + const second = canonicalizeFootnotes(stored); + expect(second).toEqual(stored); + expect(footnoteListIds(second)).toEqual(['c', 'a', 'b']); + }); +}); diff --git a/apps/server/src/integrations/import/services/import.service.ts b/apps/server/src/integrations/import/services/import.service.ts index 19bffe8d..c2057a73 100644 --- a/apps/server/src/integrations/import/services/import.service.ts +++ b/apps/server/src/integrations/import/services/import.service.ts @@ -17,7 +17,7 @@ import { import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; import { TiptapTransformer } from '@hocuspocus/transformer'; import * as Y from 'yjs'; -import { markdownToHtml } from '@docmost/editor-ext'; +import { markdownToHtml, canonicalizeFootnotes } from '@docmost/editor-ext'; import { FileTaskStatus, FileTaskType, @@ -85,7 +85,13 @@ export class ImportService { const extracted = this.extractTitleAndRemoveHeading(prosemirrorState); const title = extracted.title; - const prosemirrorJson = extracted.prosemirrorJson; + // Imported markdown/HTML is built via markdownToHtml -> htmlToJson, which + // never runs the editor's footnoteSyncPlugin, so the footnote topology keeps + // the source's PHYSICAL definition order (out of order vs. references), + // retains orphan definitions, and is not deduped. Canonicalize before + // persisting so the stored page matches the editor's invariant (issue #228). + // Pure + idempotent + shape-safe: a doc with no footnotes is unchanged. + const prosemirrorJson = canonicalizeFootnotes(extracted.prosemirrorJson); const pageTitle = title || fileName;