fix(footnotes): canonicalize footnotes on server import + markdown paste (#228)

The footnote canonicalizer was wired into the MCP and editor-ext write paths
but NOT into the server's user-facing markdown/HTML import paths, so importing
or pasting markdown with out-of-order, reused, or orphan footnotes did not
canonicalize -- the exact trigger bug #228 fixes was still reproduced on
import. markdownToHtml -> htmlToJson builds ProseMirror JSON directly and never
runs the editor's footnoteSyncPlugin, and that plugin does not reorder an
existing list, so the stored footnotes kept the source's physical definition
order, retained orphans, and did not collapse reused references.

Wire canonicalizeFootnotes (already exported from @docmost/editor-ext) into
every server markdown/HTML -> page-JSON seam, before persisting:
  - ImportService.importPage (REST single-file .md/.html import)
  - FileImportTaskService (zip import worker)
  - PageService.parseProsemirrorContent (API createPage / updatePageContent)

Also hook the client markdown paste: handlePaste applies a manual transaction
(returns true), bypassing transformPasted/footnoteSyncPlugin, so a pasted
out-of-order markdown footnote block would persist out of order.
canonicalizePastedFootnotes reorders a self-contained pasted block (one that
carries its own footnotesList) to reference order, deduped and orphan-free; it
is deliberately scoped to whole-block pastes so a reference-only paste that
reuses a footnote already defined in the target doc is left untouched.

canonicalizeFootnotes is pure, idempotent and shape-safe (a doc with no
footnotes is unchanged), so it is safe on every write path.

Residual: when a pasted block merges into a doc that already has footnotes,
ordering relative to the pre-existing footnotes is still governed by the live
sync plugin (which does not reorder across the boundary).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
a
2026-06-27 17:10:41 +03:00
parent 30cb9d293c
commit fa929c9e86
6 changed files with 361 additions and 7 deletions

View File

@@ -18,7 +18,7 @@ import { generateSlugId } from '../../../common/helpers';
import { v7 } from 'uuid';
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { markdownToHtml, canonicalizeFootnotes } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
import { formatImportHtml } from '../utils/import-formatter';
import {
@@ -496,9 +496,16 @@ export class FileImportTaskService {
await this.importService.processHTML(html),
);
const { title, prosemirrorJson } =
const { title, prosemirrorJson: extractedJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
// Canonicalize footnote topology on this non-editor write path
// (markdownToHtml/processHTML never runs footnoteSyncPlugin), so a
// zip-imported page's footnotes are reference-ordered, deduped, and
// orphan-free like the editor's invariant (issue #228). Pure +
// idempotent + shape-safe; a footnote-free doc is unchanged.
const prosemirrorJson = canonicalizeFootnotes(extractedJson);
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,

View File

@@ -0,0 +1,139 @@
// Importing ImportService transitively loads import-formatter.ts, which imports
// the ESM-only @sindresorhus/slugify package (not in jest's transform
// allowlist). slugify is irrelevant to the path under test, so it is mocked out
// to keep the module graph loadable under ts-jest.
jest.mock('@sindresorhus/slugify', () => ({
__esModule: true,
default: (input: string) => String(input),
}));
import { ImportService } from './import.service';
import { canonicalizeFootnotes } from '@docmost/editor-ext';
/**
* Integration-ish test for the USER-FACING markdown import path
* (`ImportService.importPage`). It exercises the REAL markdown -> HTML -> JSON
* conversion and asserts that the stored page content has its footnotes
* canonicalized — the gap that issue #228 fixes: the import path builds
* ProseMirror JSON directly (never running the editor's footnoteSyncPlugin), so
* before this wiring the stored footnotes kept the markdown's physical
* definition order (out of order vs. references), retained orphan definitions,
* and did not collapse reused references.
*
* The DB/ydoc side-effects are stubbed: `getNewPagePosition` (DB query) and
* `createYdoc` (Yjs encode) are spied, and `pageRepo.insertPage` captures the
* persisted `content`. Everything between markdown and persistence is REAL.
*/
// Out-of-order references (c, a, b), a REUSED reference ([^a] twice -> one
// footnote), and an ORPHAN definition ([^z], never referenced).
const MARKDOWN = [
'# Title',
'',
'Body refs [^c] and [^a] and [^b] and again [^a].',
'',
'[^a]: note A',
'[^b]: note B',
'[^c]: note C',
'[^z]: orphan note',
].join('\n');
function makeFile(filename: string, contents: string) {
return {
filename,
toBuffer: async () => Buffer.from(contents),
} as any;
}
function makeService() {
let captured: any = null;
const pageRepo = {
insertPage: jest.fn(async (values: any) => {
captured = values;
return { id: 'page-id', slugId: 'slug-id' };
}),
};
const service = new ImportService(
pageRepo as any,
{} as any,
{} as any,
{} as any,
);
jest.spyOn(service as any, 'getNewPagePosition').mockResolvedValue('a0');
jest
.spyOn(service as any, 'createYdoc')
.mockResolvedValue(Buffer.from([]) as any);
return { service, pageRepo, getCaptured: () => captured };
}
/** List the footnote-definition ids of the (single) footnotesList, in order. */
function footnoteListIds(content: any): string[] {
const list = (content.content ?? []).find(
(n: any) => n.type === 'footnotesList',
);
if (!list) return [];
return (list.content ?? [])
.filter((n: any) => n.type === 'footnoteDefinition')
.map((n: any) => n.attrs?.id);
}
function definitionText(content: any, id: string): string | undefined {
const list = (content.content ?? []).find(
(n: any) => n.type === 'footnotesList',
);
const def = (list?.content ?? []).find(
(n: any) => n.type === 'footnoteDefinition' && n.attrs?.id === id,
);
return def?.content?.[0]?.content?.[0]?.text;
}
describe('ImportService.importPage — footnote canonicalization (#228)', () => {
it('orders footnotes by first reference, dedupes reuse, and drops orphans', async () => {
const { service, getCaptured } = makeService();
await service.importPage(
Promise.resolve(makeFile('note.md', MARKDOWN)),
'user-id',
'space-id',
'workspace-id',
);
const content = getCaptured().content;
expect(content).toBeTruthy();
// Reference order is c, a, b (NOT the markdown definition order a, b, c).
expect(footnoteListIds(content)).toEqual(['c', 'a', 'b']);
// Definitions preserved and attached to the right ids.
expect(definitionText(content, 'c')).toBe('note C');
expect(definitionText(content, 'a')).toBe('note A');
expect(definitionText(content, 'b')).toBe('note B');
// Orphan definition [^z] is dropped.
expect(footnoteListIds(content)).not.toContain('z');
// Reused [^a] yields exactly ONE definition, and exactly one list.
const lists = (content.content ?? []).filter(
(n: any) => n.type === 'footnotesList',
);
expect(lists).toHaveLength(1);
expect(footnoteListIds(content).filter((id) => id === 'a')).toHaveLength(1);
});
it('is idempotent: canonicalizing the stored output again is a no-op', async () => {
const { service, getCaptured } = makeService();
await service.importPage(
Promise.resolve(makeFile('note.md', MARKDOWN)),
'user-id',
'space-id',
'workspace-id',
);
const stored = getCaptured().content;
// The stored content is already canonical; running the canonicalizer a second
// time must not change it (safe to wire into every write path).
const second = canonicalizeFootnotes(stored);
expect(second).toEqual(stored);
expect(footnoteListIds(second)).toEqual(['c', 'a', 'b']);
});
});

View File

@@ -17,7 +17,7 @@ import {
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { TiptapTransformer } from '@hocuspocus/transformer';
import * as Y from 'yjs';
import { markdownToHtml } from '@docmost/editor-ext';
import { markdownToHtml, canonicalizeFootnotes } from '@docmost/editor-ext';
import {
FileTaskStatus,
FileTaskType,
@@ -85,7 +85,13 @@ export class ImportService {
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
const prosemirrorJson = extracted.prosemirrorJson;
// Imported markdown/HTML is built via markdownToHtml -> htmlToJson, which
// never runs the editor's footnoteSyncPlugin, so the footnote topology keeps
// the source's PHYSICAL definition order (out of order vs. references),
// retains orphan definitions, and is not deduped. Canonicalize before
// persisting so the stored page matches the editor's invariant (issue #228).
// Pure + idempotent + shape-safe: a doc with no footnotes is unchanged.
const prosemirrorJson = canonicalizeFootnotes(extracted.prosemirrorJson);
const pageTitle = title || fileName;