feat(footnotes): inline authoring + deterministic server-side canonicalization
Make footnotes author-inline: the agent/tool inserts a footnote at its point of use (anchor + text) and the numbering plus the bottom list are DERIVED deterministically server-side. The agent has no access to footnotesList and cannot desync — out-of-order lists, orphan definitions, and raw trailing [^id] blocks become structurally impossible. editor-ext: - canonicalizeFootnotes(docJSON) -> docJSON: a pure, EditorView-free port of footnoteSyncPlugin's end-state. Distinct reference ids in document order are the source of truth; exactly one trailing footnotesList holds one definition per referenced id in reference order (reusing the existing node or synthesizing an empty one); orphans dropped; duplicate definitions resolved deterministically (first wins, never lost); idempotent. - Unit tests + a golden parity suite: on every editor-reachable steady state the live footnoteSyncPlugin's JSON is a canonicalize no-op (byte-for-byte parity), and the canonicalizer additionally repairs the out-of-order list a non-editor write produces. mcp: - footnote-canonicalize.ts: behavioural mirror of the editor-ext canonicalizer (the MCP package is intentionally decoupled from the editor barrel, like footnote-lex/docmost-schema), plus footnoteContentKey for content dedup. - Auto-canonicalize on EVERY write path: markdownToProseMirror (fixes import ordering), update_page_json, and after every docmost_transform. Idempotent, so it is a no-op when footnotes are already canonical. - insert_footnote tool + insertInlineFootnote: anchor + markdown text -> a mark-safe footnoteReference and a content-dedup'd definition; the list and numbering are derived. Same-content footnotes reuse one number/definition. - canonicalizeFootnotes + insertInlineFootnote exposed as docmost_transform sandbox helpers. Tests: editor-ext 157 green; MCP 325 green; server + client tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,327 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { Editor, getSchema } from '@tiptap/core';
|
||||
import { Document } from '@tiptap/extension-document';
|
||||
import { Paragraph } from '@tiptap/extension-paragraph';
|
||||
import { Text } from '@tiptap/extension-text';
|
||||
import { FootnoteReference } from './footnote-reference';
|
||||
import { FootnotesList } from './footnotes-list';
|
||||
import { FootnoteDefinition } from './footnote-definition';
|
||||
import { canonicalizeFootnotes } from './footnote-canonicalize';
|
||||
import {
|
||||
collectReferenceIds,
|
||||
computeFootnoteNumbers,
|
||||
FOOTNOTE_REFERENCE_NAME,
|
||||
FOOTNOTES_LIST_NAME,
|
||||
FOOTNOTE_DEFINITION_NAME,
|
||||
} from './footnote-util';
|
||||
import { Node as PMNode } from '@tiptap/pm/model';
|
||||
|
||||
const extensions = [
|
||||
Document,
|
||||
Paragraph,
|
||||
Text,
|
||||
FootnoteReference,
|
||||
FootnotesList,
|
||||
FootnoteDefinition,
|
||||
];
|
||||
|
||||
const ref = (id: string) => ({ type: FOOTNOTE_REFERENCE_NAME, attrs: { id } });
|
||||
const def = (id: string, text?: string) => ({
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [
|
||||
text
|
||||
? { type: 'paragraph', content: [{ type: 'text', text }] }
|
||||
: { type: 'paragraph' },
|
||||
],
|
||||
});
|
||||
const list = (...defs: any[]) => ({ type: FOOTNOTES_LIST_NAME, content: defs });
|
||||
const para = (...inline: any[]) => ({ type: 'paragraph', content: inline });
|
||||
|
||||
/** Find every node of `type`, document order. */
|
||||
function findAll(node: any, type: string, acc: any[] = []): any[] {
|
||||
if (!node || typeof node !== 'object') return acc;
|
||||
if (node.type === type) acc.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const c of node.content) findAll(c, type, acc);
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
/** Physical id order of the definitions in the (single) footnotesList. */
|
||||
function defOrder(doc: any): string[] {
|
||||
return findAll(doc, FOOTNOTE_DEFINITION_NAME).map((d) => d.attrs.id);
|
||||
}
|
||||
|
||||
const schema = getSchema(extensions);
|
||||
/** Reference order (distinct, document order) computed via the shared util. */
|
||||
function refOrder(doc: any): string[] {
|
||||
return collectReferenceIds(PMNode.fromJSON(schema, doc));
|
||||
}
|
||||
|
||||
describe('canonicalizeFootnotes (pure JSON)', () => {
|
||||
it('orders definitions by FIRST reference (out-of-order list -> 1..N)', () => {
|
||||
// References appear b, a, d, c; the bottom list is in a different (import)
|
||||
// order. The canonical list must follow reference order so reading it top to
|
||||
// bottom yields numbers 1..N.
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para(
|
||||
{ type: 'text', text: 'x' },
|
||||
ref('b'),
|
||||
ref('a'),
|
||||
ref('d'),
|
||||
ref('c'),
|
||||
),
|
||||
list(def('a', 'A'), def('c', 'C'), def('b', 'B'), def('d', 'D')),
|
||||
],
|
||||
};
|
||||
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
expect(defOrder(out)).toEqual(['b', 'a', 'd', 'c']);
|
||||
// The physical definition order now matches reference order, so the derived
|
||||
// numbers (1..N) run sequentially down the list.
|
||||
expect(refOrder(out)).toEqual(['b', 'a', 'd', 'c']);
|
||||
const numbers = computeFootnoteNumbers(PMNode.fromJSON(schema, out));
|
||||
expect(numbers.get('b')).toBe(1);
|
||||
expect(numbers.get('a')).toBe(2);
|
||||
expect(numbers.get('d')).toBe(3);
|
||||
expect(numbers.get('c')).toBe(4);
|
||||
});
|
||||
|
||||
it('numbers run 1..N down the canonical list', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'x' }, ref('b'), ref('a'), ref('c')),
|
||||
list(def('a', 'A'), def('c', 'C'), def('b', 'B')),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
// Definition order == reference order == 1,2,3 reading down.
|
||||
expect(defOrder(out)).toEqual(['b', 'a', 'c']);
|
||||
});
|
||||
|
||||
it('drops an orphan definition (no matching reference)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'x' }, ref('a')),
|
||||
list(def('a', 'A'), def('orphan', 'O')),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
expect(defOrder(out)).toEqual(['a']);
|
||||
expect(findAll(out, FOOTNOTE_DEFINITION_NAME)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('with NO references, removes the footnotesList entirely', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'plain' }),
|
||||
list(def('orphan', 'O')),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
expect(findAll(out, FOOTNOTES_LIST_NAME)).toHaveLength(0);
|
||||
expect(findAll(out, FOOTNOTE_DEFINITION_NAME)).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('reuse: repeated references collapse to ONE definition/number', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para(ref('d'), { type: 'text', text: ' a ' }, ref('d'), ref('d')),
|
||||
list(def('d', 'shared')),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
// One definition; the three references keep id "d".
|
||||
expect(defOrder(out)).toEqual(['d']);
|
||||
expect(
|
||||
findAll(out, FOOTNOTE_REFERENCE_NAME).map((r) => r.attrs.id),
|
||||
).toEqual(['d', 'd', 'd']);
|
||||
});
|
||||
|
||||
it('duplicate definitions: first wins, the rest are dropped (never resurface as orphans)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'x' }, ref('d')),
|
||||
list(def('d', 'first'), def('d', 'second'), def('d', 'third')),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
const defs = findAll(out, FOOTNOTE_DEFINITION_NAME);
|
||||
expect(defs.map((d) => d.attrs.id)).toEqual(['d']);
|
||||
expect(defs[0].content[0].content[0].text).toBe('first');
|
||||
});
|
||||
|
||||
it('synthesizes an empty definition for a reference that has none', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [para({ type: 'text', text: 'x' }, ref('missing'))],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
expect(defOrder(out)).toEqual(['missing']);
|
||||
const list0 = findAll(out, FOOTNOTES_LIST_NAME);
|
||||
expect(list0).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('merges multiple footnotesList nodes into one', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'a' }, ref('x'), ref('y')),
|
||||
list(def('x', 'X')),
|
||||
para({ type: 'text', text: 'tail' }),
|
||||
list(def('y', 'Y')),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
expect(findAll(out, FOOTNOTES_LIST_NAME)).toHaveLength(1);
|
||||
expect(defOrder(out)).toEqual(['x', 'y']);
|
||||
});
|
||||
|
||||
it('places the single list before trailing empty paragraphs', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'x' }, ref('a')),
|
||||
list(def('a', 'A')),
|
||||
{ type: 'paragraph' },
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
const last = out.content[out.content.length - 1];
|
||||
expect(last.type).toBe('paragraph');
|
||||
expect(out.content[out.content.length - 2].type).toBe(FOOTNOTES_LIST_NAME);
|
||||
});
|
||||
|
||||
it('is idempotent: canonicalize(canonicalize(x)) === canonicalize(x)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'x' }, ref('b'), ref('a')),
|
||||
list(def('a', 'A'), def('b', 'B'), def('orphan', 'O')),
|
||||
],
|
||||
};
|
||||
const once = canonicalizeFootnotes(doc);
|
||||
const twice = canonicalizeFootnotes(once);
|
||||
expect(twice).toEqual(once);
|
||||
});
|
||||
|
||||
it('does not mutate its input', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'x' }, ref('a')),
|
||||
list(def('orphan', 'O')),
|
||||
],
|
||||
};
|
||||
const snapshot = JSON.parse(JSON.stringify(doc));
|
||||
canonicalizeFootnotes(doc);
|
||||
expect(doc).toEqual(snapshot);
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* GOLDEN PARITY against the live `footnoteSyncPlugin`. The server canonicalizer
|
||||
* must produce EXACTLY what the editor keeps. For every editor-reachable steady
|
||||
* state (the list is already reference-ordered there), driving a real editor to
|
||||
* convergence and then running `canonicalizeFootnotes` on its JSON must be a
|
||||
* byte-for-byte no-op — proving the server output is identical to the editor's.
|
||||
*/
|
||||
describe('canonicalizeFootnotes golden parity with footnoteSyncPlugin', () => {
|
||||
function makeEditor(content: any) {
|
||||
return new Editor({ extensions, content });
|
||||
}
|
||||
|
||||
/** Load `content`, fire one local edit so the sync plugin converges, return JSON. */
|
||||
function pluginSteadyState(content: any): any {
|
||||
const editor = makeEditor(content);
|
||||
// A local doc change triggers footnoteSyncPlugin.appendTransaction.
|
||||
editor.commands.insertContentAt(1, ' ');
|
||||
const json = editor.state.doc.toJSON();
|
||||
editor.destroy();
|
||||
return json;
|
||||
}
|
||||
|
||||
const corpus: Array<{ name: string; content: any }> = [
|
||||
{
|
||||
name: 'plain ref + def',
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [para({ type: 'text', text: 'a' }, ref('x')), list(def('x', 'X'))],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'two refs, two defs in reference order',
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'a' }, ref('x'), { type: 'text', text: 'b' }, ref('y')),
|
||||
list(def('x', 'X'), def('y', 'Y')),
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'orphan definition gets removed',
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [para({ type: 'text', text: 'a' }, ref('x')), list(def('x', 'X'), def('orphan', 'O'))],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'reference missing its definition (synth empty)',
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [para({ type: 'text', text: 'a' }, ref('x'))],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'reuse: repeated references, one definition',
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para(ref('d'), { type: 'text', text: ' a ' }, ref('d'), ref('d')),
|
||||
list(def('d', 'shared')),
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'no footnotes at all',
|
||||
content: {
|
||||
type: 'doc',
|
||||
content: [para({ type: 'text', text: 'just text' })],
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
for (const { name, content } of corpus) {
|
||||
it(`steady state is a canonicalize no-op: ${name}`, () => {
|
||||
const steady = pluginSteadyState(content);
|
||||
expect(canonicalizeFootnotes(steady)).toEqual(steady);
|
||||
});
|
||||
}
|
||||
|
||||
it('the canonicalizer and the editor agree on reference order and definition set', () => {
|
||||
const content = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
para({ type: 'text', text: 'a' }, ref('x'), { type: 'text', text: 'b' }, ref('y')),
|
||||
list(def('y', 'Y'), def('x', 'X')), // physically reversed
|
||||
],
|
||||
};
|
||||
const steady = pluginSteadyState(content);
|
||||
const canon = canonicalizeFootnotes(content);
|
||||
// Same reference order and same DEFINITION SET (ids) in both, even though the
|
||||
// physical list order may differ (the plugin preserves node identity, the
|
||||
// canonicalizer reorders). Numbering — derived from reference order — matches.
|
||||
expect(refOrder(steady)).toEqual(['x', 'y']);
|
||||
expect(defOrder(canon)).toEqual(['x', 'y']);
|
||||
expect(new Set(defOrder(steady))).toEqual(new Set(defOrder(canon)));
|
||||
});
|
||||
});
|
||||
190
packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts
Normal file
190
packages/editor-ext/src/lib/footnote/footnote-canonicalize.ts
Normal file
@@ -0,0 +1,190 @@
|
||||
import {
|
||||
FOOTNOTE_REFERENCE_NAME,
|
||||
FOOTNOTES_LIST_NAME,
|
||||
FOOTNOTE_DEFINITION_NAME,
|
||||
deriveFootnoteId,
|
||||
} from './footnote-util';
|
||||
|
||||
/**
|
||||
* Server-side, EditorView-free port of the footnote integrity invariant that
|
||||
* `footnoteSyncPlugin` maintains in the live editor. Where the plugin is an
|
||||
* `appendTransaction` that only runs inside a ProseMirror `EditorView`, this is
|
||||
* a PURE function over ProseMirror JSON: `canonicalizeFootnotes(doc) -> doc`.
|
||||
*
|
||||
* It exists because every NON-editor write path (the MCP `markdownToProseMirror`
|
||||
* importer, `update_page_json`, `docmost_transform`, the future git-sync writer)
|
||||
* builds ProseMirror JSON directly via `TiptapTransformer`/`updateYFragment`,
|
||||
* which NEVER runs the editor's plugins — so the canonical footnote topology was
|
||||
* never enforced on those writes. That is the root cause of the symptom in the
|
||||
* issue: footnotes rendered out of order (`1, 4, 2, 3, …`), a raw trailing
|
||||
* `[^id]: …` block, and orphan definitions, all of which are simply the result
|
||||
* of content written PAST the canonicalizer.
|
||||
*
|
||||
* The desired end-state (identical to the plugin's) is:
|
||||
*
|
||||
* 1. Reference ids in DOCUMENT ORDER are the single source of truth for which
|
||||
* definitions exist and in what order (numbering is derived from this, see
|
||||
* `computeFootnoteNumbers`). Repeated references that share an id are REUSE
|
||||
* (one footnote, one number, one definition) — never re-id'd.
|
||||
* 2. Exactly ONE `footnotesList`, holding one definition per referenced id in
|
||||
* REFERENCE order, reusing the existing definition node (content preserved)
|
||||
* or synthesizing an empty one when missing. The list sits after the last
|
||||
* meaningful block (only trailing empty paragraphs may follow it).
|
||||
* 3. Orphan definitions (no matching reference) are dropped.
|
||||
* 4. Duplicate DEFINITIONS (two nodes sharing an id) are resolved
|
||||
* deterministically: the first keeps the id; each later duplicate is re-id'd
|
||||
* via `deriveFootnoteId` (never random) so it is never silently lost — and,
|
||||
* lacking a matching reference, it then falls under the orphan policy and is
|
||||
* dropped. This matches the editor's never-lose-by-collision rule and the
|
||||
* importer's first-wins rule (both converge to "one definition per id").
|
||||
* 5. Idempotent: a document that already satisfies the invariant is returned
|
||||
* structurally unchanged (the existing definition/list nodes are reused
|
||||
* verbatim), so re-running the canonicalizer — or running it on a write that
|
||||
* the editor already canonicalized — is a no-op. This is what makes it safe
|
||||
* to wire into EVERY write path without spurious mutations / git-sync churn.
|
||||
*
|
||||
* Divergence from the live plugin (intentional): the plugin preserves the
|
||||
* PHYSICAL order of existing definition nodes to keep their Yjs/CRDT subtree
|
||||
* identity stable across collaborators (numbering is decoration-derived, so the
|
||||
* displayed numbers are correct regardless of physical order). This function has
|
||||
* no live CRDT to protect, so it physically REORDERS the list into reference
|
||||
* order — which is exactly the repair the out-of-order import needs. On every
|
||||
* editor-reachable steady state (where the list is already reference-ordered) the
|
||||
* two agree byte-for-byte; see the golden test.
|
||||
*
|
||||
* Pure: deep-clones its input, never mutates the caller's object, and is
|
||||
* deterministic (no `Math.random`/`Date.now`).
|
||||
*/
|
||||
export function canonicalizeFootnotes<T = any>(doc: T): T {
|
||||
if (
|
||||
doc == null ||
|
||||
typeof doc !== 'object' ||
|
||||
!Array.isArray((doc as any).content)
|
||||
) {
|
||||
return doc;
|
||||
}
|
||||
const out = cloneJson(doc) as any;
|
||||
|
||||
// 1) Distinct reference ids in document order (deep — references can live in
|
||||
// callouts, tables, list items, ...). This is the ordering/numbering truth.
|
||||
const referenceIds: string[] = [];
|
||||
const seenRefIds = new Set<string>();
|
||||
collectReferenceIds(out, referenceIds, seenRefIds);
|
||||
|
||||
// 2) Every definition node in document order (deep — defs normally live inside
|
||||
// one or more `footnotesList` blocks, but we tolerate stray placements).
|
||||
const defNodes: any[] = [];
|
||||
collectDefinitions(out, defNodes);
|
||||
|
||||
// 3) Resolve the id topology deterministically. The first definition for an id
|
||||
// keeps it; a later duplicate is re-id'd to a fresh derived id (never lost),
|
||||
// which — having no matching reference — is dropped as an orphan in step 4.
|
||||
const taken = new Set<string>(referenceIds);
|
||||
for (const d of defNodes) {
|
||||
const id = d?.attrs?.id;
|
||||
if (id) taken.add(id);
|
||||
}
|
||||
const occurrenceOf = new Map<string, number>();
|
||||
const seenDefIds = new Set<string>();
|
||||
// finalId -> definition node (the node reference inside `out`).
|
||||
const defByFinalId = new Map<string, any>();
|
||||
for (const d of defNodes) {
|
||||
const origId = d?.attrs?.id;
|
||||
if (!origId) continue;
|
||||
if (!seenDefIds.has(origId)) {
|
||||
seenDefIds.add(origId);
|
||||
defByFinalId.set(origId, d);
|
||||
} else {
|
||||
const next = (occurrenceOf.get(origId) ?? 1) + 1;
|
||||
occurrenceOf.set(origId, next);
|
||||
const newId = deriveFootnoteId(origId, next, taken);
|
||||
taken.add(newId);
|
||||
defByFinalId.set(newId, d);
|
||||
}
|
||||
}
|
||||
|
||||
// 4) Build the ordered definition list: one per referenced id, in REFERENCE
|
||||
// order, reusing the existing node (content preserved, id normalized) or
|
||||
// synthesizing an empty definition. Definitions whose final id is NOT
|
||||
// referenced are orphans and are simply never added.
|
||||
const orderedDefs: any[] = [];
|
||||
for (const id of referenceIds) {
|
||||
const existing = defByFinalId.get(id);
|
||||
if (existing) {
|
||||
const node = cloneJson(existing);
|
||||
node.attrs = { ...(node.attrs ?? {}), id };
|
||||
orderedDefs.push(node);
|
||||
} else {
|
||||
orderedDefs.push(emptyDefinition(id));
|
||||
}
|
||||
}
|
||||
|
||||
// 5) Strip every existing top-level footnotesList; we rebuild a single one.
|
||||
const top: any[] = out.content.filter(
|
||||
(n: any) => !(n && n.type === FOOTNOTES_LIST_NAME),
|
||||
);
|
||||
|
||||
// 6) No references -> there must be NO list at all.
|
||||
if (referenceIds.length === 0) {
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
|
||||
// 7) Insert exactly one footnotesList after the last meaningful (non-empty
|
||||
// paragraph) block, so it coexists with a trailing-node empty paragraph.
|
||||
let insertAt = top.length;
|
||||
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) insertAt--;
|
||||
top.splice(insertAt, 0, { type: FOOTNOTES_LIST_NAME, content: orderedDefs });
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
|
||||
/** A fresh empty definition node for a referenced id with no definition. */
|
||||
function emptyDefinition(id: string): any {
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: 'paragraph' }],
|
||||
};
|
||||
}
|
||||
|
||||
function isEmptyParagraph(node: any): boolean {
|
||||
return (
|
||||
!!node &&
|
||||
node.type === 'paragraph' &&
|
||||
(!Array.isArray(node.content) || node.content.length === 0)
|
||||
);
|
||||
}
|
||||
|
||||
/** Collect DISTINCT footnoteReference ids in document order (first appearance). */
|
||||
function collectReferenceIds(
|
||||
node: any,
|
||||
out: string[],
|
||||
seen: Set<string>,
|
||||
): void {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
if (node.type === FOOTNOTE_REFERENCE_NAME) {
|
||||
const id = node?.attrs?.id;
|
||||
if (id && !seen.has(id)) {
|
||||
seen.add(id);
|
||||
out.push(id);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) collectReferenceIds(child, out, seen);
|
||||
}
|
||||
}
|
||||
|
||||
/** Collect every footnoteDefinition node in document order. */
|
||||
function collectDefinitions(node: any, out: any[]): void {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
if (node.type === FOOTNOTE_DEFINITION_NAME) out.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) collectDefinitions(child, out);
|
||||
}
|
||||
}
|
||||
|
||||
function cloneJson<T>(v: T): T {
|
||||
if (typeof structuredClone === 'function') return structuredClone(v);
|
||||
return JSON.parse(JSON.stringify(v)) as T;
|
||||
}
|
||||
@@ -4,3 +4,4 @@ export * from "./footnotes-list";
|
||||
export * from "./footnote-definition";
|
||||
export * from "./footnote-numbering";
|
||||
export * from "./footnote-sync";
|
||||
export * from "./footnote-canonicalize";
|
||||
|
||||
@@ -17,7 +17,7 @@ import { applyTextEdits, } from "./lib/json-edit.js";
|
||||
import { getCollabToken, performLogin } from "./lib/auth-utils.js";
|
||||
import { diffDocs, summarizeChange } from "./lib/diff.js";
|
||||
import { applyAnchorInDoc, canAnchorInDoc } from "./lib/comment-anchor.js";
|
||||
import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, } from "./lib/transforms.js";
|
||||
import { blockText, walk, getList, insertMarkerAfter, setCalloutRange, noteItem, mdToInlineNodes, commentsToFootnotes, canonicalizeFootnotes, insertInlineFootnote, } from "./lib/transforms.js";
|
||||
import vm from "node:vm";
|
||||
// Supported image types, kept as two lookup tables so both a local file
|
||||
// extension and a remote Content-Type can be mapped to the same canonical set.
|
||||
@@ -1063,6 +1063,11 @@ export class DocmostClient {
|
||||
// the markdown link path (which TipTap sanitizes), raw JSON could otherwise
|
||||
// inject javascript:/data: link hrefs or media srcs straight into the doc.
|
||||
this.validateDocUrls(doc);
|
||||
// Canonicalize footnotes (idempotent): an agent-authored JSON doc cannot
|
||||
// leave footnotes out of order, orphaned, or in multiple lists — the bottom
|
||||
// list + numbering are always derived from reference order. No-op when the
|
||||
// footnotes are already canonical.
|
||||
doc = canonicalizeFootnotes(doc);
|
||||
// Write the BODY first, then the title (#159 split-brain): a failed body
|
||||
// write (e.g. persist timeout) must not leave a new title over the old body.
|
||||
const collabToken = await this.getCollabTokenWithReauth();
|
||||
@@ -1079,6 +1084,49 @@ export class DocmostClient {
|
||||
verify: mutation.verify,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* AUTHOR-INLINE footnote insertion. The agent supplies only WHERE
|
||||
* (`anchorText`, a snippet of body text to attach the marker after) and WHAT
|
||||
* (`text`, the footnote content as markdown). Numbering and the bottom
|
||||
* `footnotesList` are derived deterministically server-side
|
||||
* (`insertInlineFootnote` -> `canonicalizeFootnotes`): the agent never sees,
|
||||
* assigns, or edits a footnote number or the list, so it CANNOT desync.
|
||||
*
|
||||
* Content DEDUP: when an existing definition has the same content, its id is
|
||||
* reused (one number, one definition, several references). The write is atomic
|
||||
* via `mutatePageContent` (single-writer, page-locked); if the anchor text is
|
||||
* not found the transform aborts with a clear error and no write happens.
|
||||
*/
|
||||
async insertFootnote(pageId, anchorText, text) {
|
||||
await this.ensureAuthenticated();
|
||||
if (!anchorText || !anchorText.trim()) {
|
||||
throw new Error("insert_footnote: anchorText is required");
|
||||
}
|
||||
if (text == null || `${text}`.trim() === "") {
|
||||
throw new Error("insert_footnote: text is required");
|
||||
}
|
||||
const collabToken = await this.getCollabTokenWithReauth();
|
||||
let result = null;
|
||||
const mutation = await mutatePageContent(pageId, collabToken, this.apiUrl, (liveDoc) => {
|
||||
const r = insertInlineFootnote(liveDoc, { anchorText, text });
|
||||
if (!r.inserted) {
|
||||
throw new Error(`insert_footnote: anchor text not found: ${JSON.stringify(anchorText.slice(0, 80))}`);
|
||||
}
|
||||
result = { footnoteId: r.footnoteId, reused: r.reused };
|
||||
return r.doc;
|
||||
});
|
||||
return {
|
||||
success: true,
|
||||
modified: true,
|
||||
pageId,
|
||||
footnoteId: result ? result.footnoteId : undefined,
|
||||
reused: result ? result.reused : undefined,
|
||||
message: result && result.reused
|
||||
? "Footnote inserted (reused an existing same-content definition)."
|
||||
: "Footnote inserted.",
|
||||
verify: mutation.verify,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Export a page to a single self-contained Docmost-flavoured markdown file:
|
||||
* meta block + body (with inline comment anchors + diagrams) + comment
|
||||
@@ -2422,6 +2470,8 @@ export class DocmostClient {
|
||||
noteItem,
|
||||
mdToInlineNodes,
|
||||
commentsToFootnotes,
|
||||
canonicalizeFootnotes,
|
||||
insertInlineFootnote,
|
||||
},
|
||||
};
|
||||
// Captured oldDoc / newDoc for the diff (set inside runTransform).
|
||||
@@ -2455,13 +2505,18 @@ export class DocmostClient {
|
||||
if (typeof fn !== "function") {
|
||||
throw new Error("transform must evaluate to a function (doc, ctx) => doc");
|
||||
}
|
||||
const result = vm.runInNewContext("f(d, c)", { f: fn, d: sandbox.doc, c: ctx }, { timeout: 5000 });
|
||||
if (!result ||
|
||||
typeof result !== "object" ||
|
||||
result.type !== "doc" ||
|
||||
!Array.isArray(result.content)) {
|
||||
const raw = vm.runInNewContext("f(d, c)", { f: fn, d: sandbox.doc, c: ctx }, { timeout: 5000 });
|
||||
if (!raw ||
|
||||
typeof raw !== "object" ||
|
||||
raw.type !== "doc" ||
|
||||
!Array.isArray(raw.content)) {
|
||||
throw new Error('transform must return a ProseMirror doc node ({ type:"doc", content:[...] })');
|
||||
}
|
||||
// Auto-canonicalize footnotes after the transform (idempotent): no write
|
||||
// path can leave footnotes out of order / orphaned / in a raw `[^id]`
|
||||
// block. In a dryRun preview this may surface footnote edits the script
|
||||
// author did not write (the canonicalizer tidied them) — that is expected.
|
||||
const result = canonicalizeFootnotes(raw);
|
||||
// Validate the returned doc before it can be written.
|
||||
this.validateDocStructure(result);
|
||||
this.validateDocUrls(result);
|
||||
|
||||
@@ -637,8 +637,12 @@ export function createDocmostMcpServer(config) {
|
||||
"mark-safe), setCalloutRange(doc, n) (sync a [1]…[K] callout range to " +
|
||||
"[1]…[n]), noteItem(inlineNodes) (wrap inline nodes in a listItem with a " +
|
||||
"fresh id), mdToInlineNodes(markdown) (comment markdown -> inline nodes), " +
|
||||
"and commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes). Footnote convention: markers are " +
|
||||
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
|
||||
"footnote numbering + the single bottom list from reference order, drop " +
|
||||
"orphans/duplicates — runs automatically after every transform too), and " +
|
||||
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
|
||||
"marker + dedup'd definition, list derived). Footnote convention: markers are " +
|
||||
"plain '[N]' text in the body; the notes are an orderedList under a " +
|
||||
"heading whose text is 'Примечания переводчика'. The transform runs " +
|
||||
"sandboxed (no require/process/fs/network, 5s timeout) and must return a " +
|
||||
@@ -672,6 +676,33 @@ export function createDocmostMcpServer(config) {
|
||||
});
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: insert_footnote
|
||||
server.registerTool("insert_footnote", {
|
||||
description: "Insert an AUTHOR-INLINE footnote: you specify only WHERE (anchorText) " +
|
||||
"and WHAT (text). The footnote marker is placed right after anchorText in " +
|
||||
"the body, and the bottom footnotes list + the numbering are derived " +
|
||||
"deterministically server-side. You do NOT assign a number, and you " +
|
||||
"never see or edit the footnotes list — so footnotes cannot end up out " +
|
||||
"of order, orphaned, or as a raw '[^id]' block. If a footnote with the " +
|
||||
"SAME text already exists, its number is REUSED (one definition, several " +
|
||||
"references). The write is atomic and won't clobber concurrent edits; if " +
|
||||
"anchorText is not found, nothing is written and an error is returned.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
anchorText: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("A snippet of existing body text; the footnote marker is inserted " +
|
||||
"immediately after its first occurrence (mark-safe)."),
|
||||
text: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("The footnote content as markdown (becomes the definition)."),
|
||||
},
|
||||
}, async ({ pageId, anchorText, text }) => {
|
||||
const result = await docmostClient.insertFootnote(pageId, anchorText, text);
|
||||
return jsonContent(result);
|
||||
});
|
||||
// Tool: diff_page_versions
|
||||
registerShared(SHARED_TOOL_SPECS.diffPageVersions, async ({ pageId, from, to }) => {
|
||||
const result = await docmostClient.diffPageVersions(pageId, from, to);
|
||||
|
||||
@@ -11,6 +11,7 @@ import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
import { summarizeChange } from "./diff.js";
|
||||
/**
|
||||
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
||||
@@ -349,7 +350,12 @@ export async function markdownToProseMirror(markdownContent) {
|
||||
const { body, section } = extractFootnotes(withCallouts);
|
||||
const html = (await marked.parse(body)) + section;
|
||||
const bridged = bridgeTaskLists(html);
|
||||
return generateJSON(bridged, docmostExtensions);
|
||||
const json = generateJSON(bridged, docmostExtensions);
|
||||
// Canonicalize footnotes on EVERY import: the section above is built in
|
||||
// definition order, but numbering is derived from REFERENCE order — so without
|
||||
// this the bottom list renders out of order (`1, 4, 2, 3, …`). Idempotent, so
|
||||
// it is a no-op when the footnotes are already canonical.
|
||||
return canonicalizeFootnotes(json);
|
||||
}
|
||||
/**
|
||||
* Build the collaboration WebSocket URL from an API base URL:
|
||||
|
||||
226
packages/mcp/build/lib/footnote-canonicalize.js
Normal file
226
packages/mcp/build/lib/footnote-canonicalize.js
Normal file
@@ -0,0 +1,226 @@
|
||||
/**
|
||||
* Server-side footnote canonicalizer + inline authoring helper (MCP mirror).
|
||||
*
|
||||
* `canonicalizeFootnotes(doc)` is a pure ProseMirror-JSON port of the editor's
|
||||
* `footnoteSyncPlugin` end-state, identical in behaviour to
|
||||
* `@docmost/editor-ext`'s `canonicalizeFootnotes`. It is mirrored here — rather
|
||||
* than imported from editor-ext — for the SAME reason `footnote-lex.ts` and the
|
||||
* `docmost-schema.ts` nodes are mirrored: the MCP package is deliberately
|
||||
* decoupled from the browser/React-heavy editor barrel and operates on plain
|
||||
* JSON. The editor-ext copy owns the golden test against the live plugin; this
|
||||
* copy must stay behaviourally identical.
|
||||
*
|
||||
* Why it exists: every NON-editor write path (markdown import, update_page_json,
|
||||
* docmost_transform, insert_footnote) builds ProseMirror JSON directly, so the
|
||||
* editor's footnote plugins never run and the canonical topology (sequential
|
||||
* numbering by first reference, one trailing list, no orphans, no raw `[^id]`)
|
||||
* was never enforced. Running this at the end of every write path closes that
|
||||
* gap; because it is idempotent, it is a no-op when the footnotes are already
|
||||
* canonical (no spurious mutations / git-sync churn).
|
||||
*/
|
||||
const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
|
||||
const FOOTNOTES_LIST_NAME = "footnotesList";
|
||||
const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
|
||||
function cloneJson(v) {
|
||||
if (typeof structuredClone === "function")
|
||||
return structuredClone(v);
|
||||
return JSON.parse(JSON.stringify(v));
|
||||
}
|
||||
/**
|
||||
* Deterministic unique id for the k-th (k >= 2) duplicate of an id during
|
||||
* collision resolution. Pure function of (originalId, occurrence, taken) — no
|
||||
* Math.random/Date.now — mirroring editor-ext's `deriveFootnoteId`. Kept local
|
||||
* (the importer's first-wins de-dup means duplicates are rare here, but the
|
||||
* canonicalizer must still resolve them deterministically).
|
||||
*/
|
||||
export function deriveFootnoteId(originalId, occurrence, taken) {
|
||||
let candidate = `${originalId}__${occurrence}`;
|
||||
let n = 0;
|
||||
while (taken.has(candidate)) {
|
||||
n += 1;
|
||||
candidate = `${originalId}__${occurrence}${suffix(n)}`;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
function suffix(n) {
|
||||
let out = "";
|
||||
let x = n;
|
||||
while (x > 0) {
|
||||
const rem = (x - 1) % 25;
|
||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
||||
x = Math.floor((x - 1) / 25);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
function isEmptyParagraph(node) {
|
||||
return (!!node &&
|
||||
node.type === "paragraph" &&
|
||||
(!Array.isArray(node.content) || node.content.length === 0));
|
||||
}
|
||||
function collectReferenceIds(node, out, seen) {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === FOOTNOTE_REFERENCE_NAME) {
|
||||
const id = node?.attrs?.id;
|
||||
if (id && !seen.has(id)) {
|
||||
seen.add(id);
|
||||
out.push(id);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectReferenceIds(child, out, seen);
|
||||
}
|
||||
}
|
||||
function collectDefinitions(node, out) {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === FOOTNOTE_DEFINITION_NAME)
|
||||
out.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectDefinitions(child, out);
|
||||
}
|
||||
}
|
||||
function emptyDefinition(id) {
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph" }],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Canonicalize footnotes in a ProseMirror-JSON document. See the file header and
|
||||
* the editor-ext twin for the full contract. Pure (deep-clones input,
|
||||
* deterministic, idempotent).
|
||||
*/
|
||||
export function canonicalizeFootnotes(doc) {
|
||||
if (doc == null ||
|
||||
typeof doc !== "object" ||
|
||||
!Array.isArray(doc.content)) {
|
||||
return doc;
|
||||
}
|
||||
const out = cloneJson(doc);
|
||||
const referenceIds = [];
|
||||
collectReferenceIds(out, referenceIds, new Set());
|
||||
const defNodes = [];
|
||||
collectDefinitions(out, defNodes);
|
||||
const taken = new Set(referenceIds);
|
||||
for (const d of defNodes) {
|
||||
const id = d?.attrs?.id;
|
||||
if (id)
|
||||
taken.add(id);
|
||||
}
|
||||
const occurrenceOf = new Map();
|
||||
const seenDefIds = new Set();
|
||||
const defByFinalId = new Map();
|
||||
for (const d of defNodes) {
|
||||
const origId = d?.attrs?.id;
|
||||
if (!origId)
|
||||
continue;
|
||||
if (!seenDefIds.has(origId)) {
|
||||
seenDefIds.add(origId);
|
||||
defByFinalId.set(origId, d);
|
||||
}
|
||||
else {
|
||||
const next = (occurrenceOf.get(origId) ?? 1) + 1;
|
||||
occurrenceOf.set(origId, next);
|
||||
const newId = deriveFootnoteId(origId, next, taken);
|
||||
taken.add(newId);
|
||||
defByFinalId.set(newId, d);
|
||||
}
|
||||
}
|
||||
const orderedDefs = [];
|
||||
for (const id of referenceIds) {
|
||||
const existing = defByFinalId.get(id);
|
||||
if (existing) {
|
||||
const node = cloneJson(existing);
|
||||
node.attrs = { ...(node.attrs ?? {}), id };
|
||||
orderedDefs.push(node);
|
||||
}
|
||||
else {
|
||||
orderedDefs.push(emptyDefinition(id));
|
||||
}
|
||||
}
|
||||
const top = out.content.filter((n) => !(n && n.type === FOOTNOTES_LIST_NAME));
|
||||
if (referenceIds.length === 0) {
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
let insertAt = top.length;
|
||||
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1]))
|
||||
insertAt--;
|
||||
top.splice(insertAt, 0, { type: FOOTNOTES_LIST_NAME, content: orderedDefs });
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Normalized content key for de-duplicating footnote DEFINITIONS by their text.
|
||||
*
|
||||
* Two definitions with the same key are the SAME footnote — so the inline
|
||||
* authoring tool reuses one id (one number, one definition, several references)
|
||||
* instead of minting a second definition. Key = plaintext (whitespace-collapsed,
|
||||
* trimmed) PLUS a signature of the inline mark types in order, so two notes that
|
||||
* read the same but differ in formatting (one bold, one plain) are NOT merged.
|
||||
* Conservative: only an exact match merges.
|
||||
*/
|
||||
export function footnoteContentKey(defNode) {
|
||||
const parts = [];
|
||||
const visit = (n) => {
|
||||
if (!n || typeof n !== "object")
|
||||
return;
|
||||
if (n.type === "text" && typeof n.text === "string") {
|
||||
const marks = Array.isArray(n.marks)
|
||||
? n.marks.map((m) => m?.type).filter(Boolean).sort().join(",")
|
||||
: "";
|
||||
parts.push(`${n.text}${marks}`);
|
||||
}
|
||||
if (Array.isArray(n.content))
|
||||
for (const c of n.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(defNode);
|
||||
// Collapse the assembled text's whitespace and trim, keeping the mark
|
||||
// signature attached so formatting differences still distinguish notes.
|
||||
return parts
|
||||
.join("")
|
||||
.replace(/[ \t\r\n]+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
/**
|
||||
* Build a footnoteDefinition node from inline ProseMirror nodes, keyed by id.
|
||||
*/
|
||||
export function makeFootnoteDefinition(id, inlineNodes) {
|
||||
const content = Array.isArray(inlineNodes) ? cloneJson(inlineNodes) : [];
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph", content }],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Generate a uuidv7-style id (time-ordered), matching editor-ext's
|
||||
* `generateFootnoteId`. Used for a genuinely-new inline footnote id.
|
||||
*/
|
||||
export function generateFootnoteId() {
|
||||
const now = Date.now();
|
||||
const timeHex = now.toString(16).padStart(12, "0");
|
||||
const rand = (length) => {
|
||||
let s = "";
|
||||
for (let i = 0; i < length; i++)
|
||||
s += Math.floor(Math.random() * 16).toString(16);
|
||||
return s;
|
||||
};
|
||||
const versioned = "7" + rand(3);
|
||||
const variantNibble = (8 + Math.floor(Math.random() * 4)).toString(16);
|
||||
const variant = variantNibble + rand(3);
|
||||
return (timeHex.slice(0, 8) +
|
||||
"-" +
|
||||
timeHex.slice(8, 12) +
|
||||
"-" +
|
||||
versioned +
|
||||
"-" +
|
||||
variant +
|
||||
"-" +
|
||||
rand(12));
|
||||
}
|
||||
@@ -14,6 +14,8 @@
|
||||
* - `marks` arrays are preserved verbatim when fragments are split/reordered.
|
||||
*/
|
||||
import { blockPlainText } from "./node-ops.js";
|
||||
import { canonicalizeFootnotes, footnoteContentKey, makeFootnoteDefinition, generateFootnoteId, } from "./footnote-canonicalize.js";
|
||||
export { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
@@ -471,3 +473,121 @@ export function commentsToFootnotes(doc, comments, opts = {}) {
|
||||
const synced = setCalloutRange(working, definitions.length);
|
||||
return { doc: synced.doc, consumed };
|
||||
}
|
||||
/** A NUL-delimited sentinel that cannot occur in real prose. */
|
||||
const INLINE_FOOTNOTE_SENTINEL = "\u0000IFN\u0000";
|
||||
/**
|
||||
* AUTHOR-INLINE footnote insertion. The caller supplies WHERE (anchorText) and
|
||||
* WHAT (markdown text); numbering and the bottom list are derived server-side by
|
||||
* `canonicalizeFootnotes`. The caller never sees or edits `footnotesList`, never
|
||||
* assigns a number, and cannot desync — orphans / out-of-order lists / raw
|
||||
* `[^id]` markdown are structurally impossible.
|
||||
*
|
||||
* Content DEDUP (#3 in the issue): if an existing definition has the SAME
|
||||
* normalized content key, its id is REUSED (the new reference points at it: one
|
||||
* number, one definition, several references). Otherwise a fresh uuid id is
|
||||
* minted and a new definition added. Conservative — only an exact content match
|
||||
* merges.
|
||||
*
|
||||
* Mechanics: the marker is inserted with the same mark-safe `insertMarkerAfter`
|
||||
* split used elsewhere, via a sentinel that is then replaced by a real
|
||||
* `footnoteReference` node (dropping the inserted leading space so the marker
|
||||
* attaches to the preceding word). The whole document is then canonicalized.
|
||||
*
|
||||
* Operates on a clone of `doc`. When the anchor is not found, returns the input
|
||||
* unchanged with `inserted:false`.
|
||||
*/
|
||||
export function insertInlineFootnote(doc, opts) {
|
||||
const inline = mdToInlineNodes(opts.text ?? "");
|
||||
const key = footnoteContentKey(makeFootnoteDefinition("", inline));
|
||||
// Content dedup: reuse an existing definition's id when its key matches.
|
||||
let footnoteId = null;
|
||||
let reused = false;
|
||||
if (key !== "") {
|
||||
walk(doc, (n) => {
|
||||
if (footnoteId == null &&
|
||||
isObject(n) &&
|
||||
n.type === "footnoteDefinition" &&
|
||||
n.attrs &&
|
||||
typeof n.attrs.id === "string" &&
|
||||
n.attrs.id !== "" &&
|
||||
footnoteContentKey(n) === key) {
|
||||
footnoteId = n.attrs.id;
|
||||
reused = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
if (footnoteId == null)
|
||||
footnoteId = generateFootnoteId();
|
||||
// Insert a sentinel marker after the anchor (mark-safe split).
|
||||
const r = insertMarkerAfter(doc, (opts.anchorText ?? "").trimEnd(), INLINE_FOOTNOTE_SENTINEL);
|
||||
if (!r.inserted) {
|
||||
return { doc: clone(doc), inserted: false, footnoteId, reused };
|
||||
}
|
||||
let working = r.doc;
|
||||
// Replace the sentinel run with a real footnoteReference node.
|
||||
replaceSentinelWithReference(working, footnoteId);
|
||||
// Add a NEW definition (canonicalize will order/place it); a reused id needs
|
||||
// no new definition (the existing one is shared).
|
||||
if (!reused) {
|
||||
appendDefinition(working, makeFootnoteDefinition(footnoteId, inline));
|
||||
}
|
||||
// Derive numbering + the single bottom list deterministically.
|
||||
working = canonicalizeFootnotes(working);
|
||||
return { doc: working, inserted: true, footnoteId, reused };
|
||||
}
|
||||
/**
|
||||
* Replace the lone sentinel text run (created by insertMarkerAfter as
|
||||
* `" " + sentinel`) with a footnoteReference node, dropping the leading space so
|
||||
* the marker attaches to the preceding word. Mutates `doc` in place.
|
||||
*/
|
||||
function replaceSentinelWithReference(doc, footnoteId) {
|
||||
let done = false;
|
||||
const visit = (container) => {
|
||||
if (done || !isObject(container) || !Array.isArray(container.content))
|
||||
return;
|
||||
const arr = container.content;
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
const n = arr[i];
|
||||
if (isObject(n) &&
|
||||
n.type === "text" &&
|
||||
typeof n.text === "string" &&
|
||||
n.text.includes(INLINE_FOOTNOTE_SENTINEL)) {
|
||||
const idx = n.text.indexOf(INLINE_FOOTNOTE_SENTINEL);
|
||||
// Text before the sentinel, with a single trailing space (the one
|
||||
// insertMarkerAfter prepended) stripped so the ref hugs the word.
|
||||
const before = n.text.slice(0, idx).replace(/ $/, "");
|
||||
const after = n.text.slice(idx + INLINE_FOOTNOTE_SENTINEL.length);
|
||||
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||
const parts = [];
|
||||
if (before.length > 0)
|
||||
parts.push({ ...n, text: before, marks: [...marks] });
|
||||
parts.push({ type: "footnoteReference", attrs: { id: footnoteId } });
|
||||
if (after.length > 0)
|
||||
parts.push({ ...n, text: after, marks: [...marks] });
|
||||
arr.splice(i, 1, ...parts);
|
||||
done = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (const child of arr) {
|
||||
visit(child);
|
||||
if (done)
|
||||
return;
|
||||
}
|
||||
};
|
||||
visit(doc);
|
||||
}
|
||||
/**
|
||||
* Append a definition node so the canonicalizer can order/place it: into the
|
||||
* first existing footnotesList, or a new trailing list when none exists.
|
||||
*/
|
||||
function appendDefinition(doc, defNode) {
|
||||
const existingList = getList(doc, (n) => isObject(n) && n.type === "footnotesList");
|
||||
if (existingList && Array.isArray(existingList.content)) {
|
||||
existingList.content.push(defNode);
|
||||
return;
|
||||
}
|
||||
if (Array.isArray(doc.content)) {
|
||||
doc.content.push({ type: "footnotesList", content: [defNode] });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,6 +60,8 @@ import {
|
||||
noteItem,
|
||||
mdToInlineNodes,
|
||||
commentsToFootnotes,
|
||||
canonicalizeFootnotes,
|
||||
insertInlineFootnote,
|
||||
} from "./lib/transforms.js";
|
||||
import vm from "node:vm";
|
||||
|
||||
@@ -1344,6 +1346,12 @@ export class DocmostClient {
|
||||
// inject javascript:/data: link hrefs or media srcs straight into the doc.
|
||||
this.validateDocUrls(doc);
|
||||
|
||||
// Canonicalize footnotes (idempotent): an agent-authored JSON doc cannot
|
||||
// leave footnotes out of order, orphaned, or in multiple lists — the bottom
|
||||
// list + numbering are always derived from reference order. No-op when the
|
||||
// footnotes are already canonical.
|
||||
doc = canonicalizeFootnotes(doc);
|
||||
|
||||
// Write the BODY first, then the title (#159 split-brain): a failed body
|
||||
// write (e.g. persist timeout) must not leave a new title over the old body.
|
||||
const collabToken = await this.getCollabTokenWithReauth();
|
||||
@@ -1368,6 +1376,59 @@ export class DocmostClient {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* AUTHOR-INLINE footnote insertion. The agent supplies only WHERE
|
||||
* (`anchorText`, a snippet of body text to attach the marker after) and WHAT
|
||||
* (`text`, the footnote content as markdown). Numbering and the bottom
|
||||
* `footnotesList` are derived deterministically server-side
|
||||
* (`insertInlineFootnote` -> `canonicalizeFootnotes`): the agent never sees,
|
||||
* assigns, or edits a footnote number or the list, so it CANNOT desync.
|
||||
*
|
||||
* Content DEDUP: when an existing definition has the same content, its id is
|
||||
* reused (one number, one definition, several references). The write is atomic
|
||||
* via `mutatePageContent` (single-writer, page-locked); if the anchor text is
|
||||
* not found the transform aborts with a clear error and no write happens.
|
||||
*/
|
||||
async insertFootnote(pageId: string, anchorText: string, text: string) {
|
||||
await this.ensureAuthenticated();
|
||||
if (!anchorText || !anchorText.trim()) {
|
||||
throw new Error("insert_footnote: anchorText is required");
|
||||
}
|
||||
if (text == null || `${text}`.trim() === "") {
|
||||
throw new Error("insert_footnote: text is required");
|
||||
}
|
||||
const collabToken = await this.getCollabTokenWithReauth();
|
||||
let result: { footnoteId: string; reused: boolean } | null = null;
|
||||
const mutation = await mutatePageContent(
|
||||
pageId,
|
||||
collabToken,
|
||||
this.apiUrl,
|
||||
(liveDoc: any) => {
|
||||
const r = insertInlineFootnote(liveDoc, { anchorText, text });
|
||||
if (!r.inserted) {
|
||||
throw new Error(
|
||||
`insert_footnote: anchor text not found: ${JSON.stringify(
|
||||
anchorText.slice(0, 80),
|
||||
)}`,
|
||||
);
|
||||
}
|
||||
result = { footnoteId: r.footnoteId, reused: r.reused };
|
||||
return r.doc;
|
||||
},
|
||||
);
|
||||
return {
|
||||
success: true,
|
||||
modified: true,
|
||||
pageId,
|
||||
footnoteId: result ? (result as any).footnoteId : undefined,
|
||||
reused: result ? (result as any).reused : undefined,
|
||||
message: result && (result as any).reused
|
||||
? "Footnote inserted (reused an existing same-content definition)."
|
||||
: "Footnote inserted.",
|
||||
verify: mutation.verify,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Export a page to a single self-contained Docmost-flavoured markdown file:
|
||||
* meta block + body (with inline comment anchors + diagrams) + comment
|
||||
@@ -2986,6 +3047,8 @@ export class DocmostClient {
|
||||
noteItem,
|
||||
mdToInlineNodes,
|
||||
commentsToFootnotes,
|
||||
canonicalizeFootnotes,
|
||||
insertInlineFootnote,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -3022,21 +3085,26 @@ export class DocmostClient {
|
||||
"transform must evaluate to a function (doc, ctx) => doc",
|
||||
);
|
||||
}
|
||||
const result = vm.runInNewContext(
|
||||
const raw = vm.runInNewContext(
|
||||
"f(d, c)",
|
||||
{ f: fn, d: sandbox.doc, c: ctx },
|
||||
{ timeout: 5000 },
|
||||
);
|
||||
if (
|
||||
!result ||
|
||||
typeof result !== "object" ||
|
||||
result.type !== "doc" ||
|
||||
!Array.isArray(result.content)
|
||||
!raw ||
|
||||
typeof raw !== "object" ||
|
||||
raw.type !== "doc" ||
|
||||
!Array.isArray(raw.content)
|
||||
) {
|
||||
throw new Error(
|
||||
'transform must return a ProseMirror doc node ({ type:"doc", content:[...] })',
|
||||
);
|
||||
}
|
||||
// Auto-canonicalize footnotes after the transform (idempotent): no write
|
||||
// path can leave footnotes out of order / orphaned / in a raw `[^id]`
|
||||
// block. In a dryRun preview this may surface footnote edits the script
|
||||
// author did not write (the canonicalizer tidied them) — that is expected.
|
||||
const result = canonicalizeFootnotes(raw);
|
||||
// Validate the returned doc before it can be written.
|
||||
this.validateDocStructure(result);
|
||||
this.validateDocUrls(result);
|
||||
|
||||
@@ -892,8 +892,12 @@ server.registerTool(
|
||||
"mark-safe), setCalloutRange(doc, n) (sync a [1]…[K] callout range to " +
|
||||
"[1]…[n]), noteItem(inlineNodes) (wrap inline nodes in a listItem with a " +
|
||||
"fresh id), mdToInlineNodes(markdown) (comment markdown -> inline nodes), " +
|
||||
"and commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes). Footnote convention: markers are " +
|
||||
"commentsToFootnotes(doc, comments, {notesHeading}) (turn inline " +
|
||||
"comments into numbered footnotes), canonicalizeFootnotes(doc) (derive " +
|
||||
"footnote numbering + the single bottom list from reference order, drop " +
|
||||
"orphans/duplicates — runs automatically after every transform too), and " +
|
||||
"insertInlineFootnote(doc, {anchorText, text}) (author-inline footnote: " +
|
||||
"marker + dedup'd definition, list derived). Footnote convention: markers are " +
|
||||
"plain '[N]' text in the body; the notes are an orderedList under a " +
|
||||
"heading whose text is 'Примечания переводчика'. The transform runs " +
|
||||
"sandboxed (no require/process/fs/network, 5s timeout) and must return a " +
|
||||
@@ -934,6 +938,41 @@ server.registerTool(
|
||||
},
|
||||
);
|
||||
|
||||
// Tool: insert_footnote
|
||||
server.registerTool(
|
||||
"insert_footnote",
|
||||
{
|
||||
description:
|
||||
"Insert an AUTHOR-INLINE footnote: you specify only WHERE (anchorText) " +
|
||||
"and WHAT (text). The footnote marker is placed right after anchorText in " +
|
||||
"the body, and the bottom footnotes list + the numbering are derived " +
|
||||
"deterministically server-side. You do NOT assign a number, and you " +
|
||||
"never see or edit the footnotes list — so footnotes cannot end up out " +
|
||||
"of order, orphaned, or as a raw '[^id]' block. If a footnote with the " +
|
||||
"SAME text already exists, its number is REUSED (one definition, several " +
|
||||
"references). The write is atomic and won't clobber concurrent edits; if " +
|
||||
"anchorText is not found, nothing is written and an error is returned.",
|
||||
inputSchema: {
|
||||
pageId: z.string().min(1),
|
||||
anchorText: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe(
|
||||
"A snippet of existing body text; the footnote marker is inserted " +
|
||||
"immediately after its first occurrence (mark-safe).",
|
||||
),
|
||||
text: z
|
||||
.string()
|
||||
.min(1)
|
||||
.describe("The footnote content as markdown (becomes the definition)."),
|
||||
},
|
||||
},
|
||||
async ({ pageId, anchorText, text }) => {
|
||||
const result = await docmostClient.insertFootnote(pageId, anchorText, text);
|
||||
return jsonContent(result);
|
||||
},
|
||||
);
|
||||
|
||||
// Tool: diff_page_versions
|
||||
registerShared(
|
||||
SHARED_TOOL_SPECS.diffPageVersions,
|
||||
|
||||
@@ -11,6 +11,7 @@ import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
import { summarizeChange, VerifyReport } from "./diff.js";
|
||||
|
||||
/**
|
||||
@@ -400,7 +401,12 @@ export async function markdownToProseMirror(
|
||||
const { body, section } = extractFootnotes(withCallouts);
|
||||
const html = (await marked.parse(body)) + section;
|
||||
const bridged = bridgeTaskLists(html);
|
||||
return generateJSON(bridged, docmostExtensions);
|
||||
const json = generateJSON(bridged, docmostExtensions);
|
||||
// Canonicalize footnotes on EVERY import: the section above is built in
|
||||
// definition order, but numbering is derived from REFERENCE order — so without
|
||||
// this the bottom list renders out of order (`1, 4, 2, 3, …`). Idempotent, so
|
||||
// it is a no-op when the footnotes are already canonical.
|
||||
return canonicalizeFootnotes(json);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
243
packages/mcp/src/lib/footnote-canonicalize.ts
Normal file
243
packages/mcp/src/lib/footnote-canonicalize.ts
Normal file
@@ -0,0 +1,243 @@
|
||||
/**
|
||||
* Server-side footnote canonicalizer + inline authoring helper (MCP mirror).
|
||||
*
|
||||
* `canonicalizeFootnotes(doc)` is a pure ProseMirror-JSON port of the editor's
|
||||
* `footnoteSyncPlugin` end-state, identical in behaviour to
|
||||
* `@docmost/editor-ext`'s `canonicalizeFootnotes`. It is mirrored here — rather
|
||||
* than imported from editor-ext — for the SAME reason `footnote-lex.ts` and the
|
||||
* `docmost-schema.ts` nodes are mirrored: the MCP package is deliberately
|
||||
* decoupled from the browser/React-heavy editor barrel and operates on plain
|
||||
* JSON. The editor-ext copy owns the golden test against the live plugin; this
|
||||
* copy must stay behaviourally identical.
|
||||
*
|
||||
* Why it exists: every NON-editor write path (markdown import, update_page_json,
|
||||
* docmost_transform, insert_footnote) builds ProseMirror JSON directly, so the
|
||||
* editor's footnote plugins never run and the canonical topology (sequential
|
||||
* numbering by first reference, one trailing list, no orphans, no raw `[^id]`)
|
||||
* was never enforced. Running this at the end of every write path closes that
|
||||
* gap; because it is idempotent, it is a no-op when the footnotes are already
|
||||
* canonical (no spurious mutations / git-sync churn).
|
||||
*/
|
||||
|
||||
const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
|
||||
const FOOTNOTES_LIST_NAME = "footnotesList";
|
||||
const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
|
||||
|
||||
function cloneJson<T>(v: T): T {
|
||||
if (typeof structuredClone === "function") return structuredClone(v);
|
||||
return JSON.parse(JSON.stringify(v)) as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deterministic unique id for the k-th (k >= 2) duplicate of an id during
|
||||
* collision resolution. Pure function of (originalId, occurrence, taken) — no
|
||||
* Math.random/Date.now — mirroring editor-ext's `deriveFootnoteId`. Kept local
|
||||
* (the importer's first-wins de-dup means duplicates are rare here, but the
|
||||
* canonicalizer must still resolve them deterministically).
|
||||
*/
|
||||
export function deriveFootnoteId(
|
||||
originalId: string,
|
||||
occurrence: number,
|
||||
taken: Set<string> | ReadonlySet<string>,
|
||||
): string {
|
||||
let candidate = `${originalId}__${occurrence}`;
|
||||
let n = 0;
|
||||
while (taken.has(candidate)) {
|
||||
n += 1;
|
||||
candidate = `${originalId}__${occurrence}${suffix(n)}`;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
function suffix(n: number): string {
|
||||
let out = "";
|
||||
let x = n;
|
||||
while (x > 0) {
|
||||
const rem = (x - 1) % 25;
|
||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
||||
x = Math.floor((x - 1) / 25);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function isEmptyParagraph(node: any): boolean {
|
||||
return (
|
||||
!!node &&
|
||||
node.type === "paragraph" &&
|
||||
(!Array.isArray(node.content) || node.content.length === 0)
|
||||
);
|
||||
}
|
||||
|
||||
function collectReferenceIds(node: any, out: string[], seen: Set<string>): void {
|
||||
if (!node || typeof node !== "object") return;
|
||||
if (node.type === FOOTNOTE_REFERENCE_NAME) {
|
||||
const id = node?.attrs?.id;
|
||||
if (id && !seen.has(id)) {
|
||||
seen.add(id);
|
||||
out.push(id);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) collectReferenceIds(child, out, seen);
|
||||
}
|
||||
}
|
||||
|
||||
function collectDefinitions(node: any, out: any[]): void {
|
||||
if (!node || typeof node !== "object") return;
|
||||
if (node.type === FOOTNOTE_DEFINITION_NAME) out.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) collectDefinitions(child, out);
|
||||
}
|
||||
}
|
||||
|
||||
function emptyDefinition(id: string): any {
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph" }],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonicalize footnotes in a ProseMirror-JSON document. See the file header and
|
||||
* the editor-ext twin for the full contract. Pure (deep-clones input,
|
||||
* deterministic, idempotent).
|
||||
*/
|
||||
export function canonicalizeFootnotes<T = any>(doc: T): T {
|
||||
if (
|
||||
doc == null ||
|
||||
typeof doc !== "object" ||
|
||||
!Array.isArray((doc as any).content)
|
||||
) {
|
||||
return doc;
|
||||
}
|
||||
const out = cloneJson(doc) as any;
|
||||
|
||||
const referenceIds: string[] = [];
|
||||
collectReferenceIds(out, referenceIds, new Set<string>());
|
||||
|
||||
const defNodes: any[] = [];
|
||||
collectDefinitions(out, defNodes);
|
||||
|
||||
const taken = new Set<string>(referenceIds);
|
||||
for (const d of defNodes) {
|
||||
const id = d?.attrs?.id;
|
||||
if (id) taken.add(id);
|
||||
}
|
||||
const occurrenceOf = new Map<string, number>();
|
||||
const seenDefIds = new Set<string>();
|
||||
const defByFinalId = new Map<string, any>();
|
||||
for (const d of defNodes) {
|
||||
const origId = d?.attrs?.id;
|
||||
if (!origId) continue;
|
||||
if (!seenDefIds.has(origId)) {
|
||||
seenDefIds.add(origId);
|
||||
defByFinalId.set(origId, d);
|
||||
} else {
|
||||
const next = (occurrenceOf.get(origId) ?? 1) + 1;
|
||||
occurrenceOf.set(origId, next);
|
||||
const newId = deriveFootnoteId(origId, next, taken);
|
||||
taken.add(newId);
|
||||
defByFinalId.set(newId, d);
|
||||
}
|
||||
}
|
||||
|
||||
const orderedDefs: any[] = [];
|
||||
for (const id of referenceIds) {
|
||||
const existing = defByFinalId.get(id);
|
||||
if (existing) {
|
||||
const node = cloneJson(existing);
|
||||
node.attrs = { ...(node.attrs ?? {}), id };
|
||||
orderedDefs.push(node);
|
||||
} else {
|
||||
orderedDefs.push(emptyDefinition(id));
|
||||
}
|
||||
}
|
||||
|
||||
const top: any[] = out.content.filter(
|
||||
(n: any) => !(n && n.type === FOOTNOTES_LIST_NAME),
|
||||
);
|
||||
|
||||
if (referenceIds.length === 0) {
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
|
||||
let insertAt = top.length;
|
||||
while (insertAt > 0 && isEmptyParagraph(top[insertAt - 1])) insertAt--;
|
||||
top.splice(insertAt, 0, { type: FOOTNOTES_LIST_NAME, content: orderedDefs });
|
||||
out.content = top;
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalized content key for de-duplicating footnote DEFINITIONS by their text.
|
||||
*
|
||||
* Two definitions with the same key are the SAME footnote — so the inline
|
||||
* authoring tool reuses one id (one number, one definition, several references)
|
||||
* instead of minting a second definition. Key = plaintext (whitespace-collapsed,
|
||||
* trimmed) PLUS a signature of the inline mark types in order, so two notes that
|
||||
* read the same but differ in formatting (one bold, one plain) are NOT merged.
|
||||
* Conservative: only an exact match merges.
|
||||
*/
|
||||
export function footnoteContentKey(defNode: any): string {
|
||||
const parts: string[] = [];
|
||||
const visit = (n: any): void => {
|
||||
if (!n || typeof n !== "object") return;
|
||||
if (n.type === "text" && typeof n.text === "string") {
|
||||
const marks = Array.isArray(n.marks)
|
||||
? n.marks.map((m: any) => m?.type).filter(Boolean).sort().join(",")
|
||||
: "";
|
||||
parts.push(`${n.text}${marks}`);
|
||||
}
|
||||
if (Array.isArray(n.content)) for (const c of n.content) visit(c);
|
||||
};
|
||||
visit(defNode);
|
||||
// Collapse the assembled text's whitespace and trim, keeping the mark
|
||||
// signature attached so formatting differences still distinguish notes.
|
||||
return parts
|
||||
.join("")
|
||||
.replace(/[ \t\r\n]+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a footnoteDefinition node from inline ProseMirror nodes, keyed by id.
|
||||
*/
|
||||
export function makeFootnoteDefinition(id: string, inlineNodes: any[]): any {
|
||||
const content = Array.isArray(inlineNodes) ? cloneJson(inlineNodes) : [];
|
||||
return {
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph", content }],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a uuidv7-style id (time-ordered), matching editor-ext's
|
||||
* `generateFootnoteId`. Used for a genuinely-new inline footnote id.
|
||||
*/
|
||||
export function generateFootnoteId(): string {
|
||||
const now = Date.now();
|
||||
const timeHex = now.toString(16).padStart(12, "0");
|
||||
const rand = (length: number) => {
|
||||
let s = "";
|
||||
for (let i = 0; i < length; i++)
|
||||
s += Math.floor(Math.random() * 16).toString(16);
|
||||
return s;
|
||||
};
|
||||
const versioned = "7" + rand(3);
|
||||
const variantNibble = (8 + Math.floor(Math.random() * 4)).toString(16);
|
||||
const variant = variantNibble + rand(3);
|
||||
return (
|
||||
timeHex.slice(0, 8) +
|
||||
"-" +
|
||||
timeHex.slice(8, 12) +
|
||||
"-" +
|
||||
versioned +
|
||||
"-" +
|
||||
variant +
|
||||
"-" +
|
||||
rand(12)
|
||||
);
|
||||
}
|
||||
@@ -15,6 +15,14 @@
|
||||
*/
|
||||
|
||||
import { blockPlainText } from "./node-ops.js";
|
||||
import {
|
||||
canonicalizeFootnotes,
|
||||
footnoteContentKey,
|
||||
makeFootnoteDefinition,
|
||||
generateFootnoteId,
|
||||
} from "./footnote-canonicalize.js";
|
||||
|
||||
export { canonicalizeFootnotes } from "./footnote-canonicalize.js";
|
||||
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone<T>(value: T): T {
|
||||
@@ -559,3 +567,151 @@ export function commentsToFootnotes(
|
||||
|
||||
return { doc: synced.doc, consumed };
|
||||
}
|
||||
|
||||
/** Options for insertInlineFootnote. */
|
||||
export interface InsertInlineFootnoteOptions {
|
||||
/** Body text after which the footnote marker is placed (mark-safe). */
|
||||
anchorText: string;
|
||||
/** Footnote content as markdown (converted to inline nodes). */
|
||||
text: string;
|
||||
}
|
||||
|
||||
/** Result of insertInlineFootnote. */
|
||||
export interface InsertInlineFootnoteResult {
|
||||
doc: any;
|
||||
/** False when the anchor text was not found (no write). */
|
||||
inserted: boolean;
|
||||
/** The footnote id used (new or reused). */
|
||||
footnoteId: string;
|
||||
/** True when an existing same-content definition was reused (content dedup). */
|
||||
reused: boolean;
|
||||
}
|
||||
|
||||
/** A NUL-delimited sentinel that cannot occur in real prose. */
|
||||
const INLINE_FOOTNOTE_SENTINEL = "\u0000IFN\u0000";
|
||||
|
||||
/**
|
||||
* AUTHOR-INLINE footnote insertion. The caller supplies WHERE (anchorText) and
|
||||
* WHAT (markdown text); numbering and the bottom list are derived server-side by
|
||||
* `canonicalizeFootnotes`. The caller never sees or edits `footnotesList`, never
|
||||
* assigns a number, and cannot desync — orphans / out-of-order lists / raw
|
||||
* `[^id]` markdown are structurally impossible.
|
||||
*
|
||||
* Content DEDUP (#3 in the issue): if an existing definition has the SAME
|
||||
* normalized content key, its id is REUSED (the new reference points at it: one
|
||||
* number, one definition, several references). Otherwise a fresh uuid id is
|
||||
* minted and a new definition added. Conservative — only an exact content match
|
||||
* merges.
|
||||
*
|
||||
* Mechanics: the marker is inserted with the same mark-safe `insertMarkerAfter`
|
||||
* split used elsewhere, via a sentinel that is then replaced by a real
|
||||
* `footnoteReference` node (dropping the inserted leading space so the marker
|
||||
* attaches to the preceding word). The whole document is then canonicalized.
|
||||
*
|
||||
* Operates on a clone of `doc`. When the anchor is not found, returns the input
|
||||
* unchanged with `inserted:false`.
|
||||
*/
|
||||
export function insertInlineFootnote(
|
||||
doc: any,
|
||||
opts: InsertInlineFootnoteOptions,
|
||||
): InsertInlineFootnoteResult {
|
||||
const inline = mdToInlineNodes(opts.text ?? "");
|
||||
const key = footnoteContentKey(makeFootnoteDefinition("", inline));
|
||||
|
||||
// Content dedup: reuse an existing definition's id when its key matches.
|
||||
let footnoteId: string | null = null;
|
||||
let reused = false;
|
||||
if (key !== "") {
|
||||
walk(doc, (n) => {
|
||||
if (
|
||||
footnoteId == null &&
|
||||
isObject(n) &&
|
||||
n.type === "footnoteDefinition" &&
|
||||
n.attrs &&
|
||||
typeof n.attrs.id === "string" &&
|
||||
n.attrs.id !== "" &&
|
||||
footnoteContentKey(n) === key
|
||||
) {
|
||||
footnoteId = n.attrs.id;
|
||||
reused = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
if (footnoteId == null) footnoteId = generateFootnoteId();
|
||||
|
||||
// Insert a sentinel marker after the anchor (mark-safe split).
|
||||
const r = insertMarkerAfter(doc, (opts.anchorText ?? "").trimEnd(), INLINE_FOOTNOTE_SENTINEL);
|
||||
if (!r.inserted) {
|
||||
return { doc: clone(doc), inserted: false, footnoteId, reused };
|
||||
}
|
||||
let working = r.doc;
|
||||
|
||||
// Replace the sentinel run with a real footnoteReference node.
|
||||
replaceSentinelWithReference(working, footnoteId);
|
||||
|
||||
// Add a NEW definition (canonicalize will order/place it); a reused id needs
|
||||
// no new definition (the existing one is shared).
|
||||
if (!reused) {
|
||||
appendDefinition(working, makeFootnoteDefinition(footnoteId, inline));
|
||||
}
|
||||
|
||||
// Derive numbering + the single bottom list deterministically.
|
||||
working = canonicalizeFootnotes(working);
|
||||
return { doc: working, inserted: true, footnoteId, reused };
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace the lone sentinel text run (created by insertMarkerAfter as
|
||||
* `" " + sentinel`) with a footnoteReference node, dropping the leading space so
|
||||
* the marker attaches to the preceding word. Mutates `doc` in place.
|
||||
*/
|
||||
function replaceSentinelWithReference(doc: any, footnoteId: string): void {
|
||||
let done = false;
|
||||
const visit = (container: any): void => {
|
||||
if (done || !isObject(container) || !Array.isArray(container.content)) return;
|
||||
const arr = container.content;
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
const n = arr[i];
|
||||
if (
|
||||
isObject(n) &&
|
||||
n.type === "text" &&
|
||||
typeof n.text === "string" &&
|
||||
n.text.includes(INLINE_FOOTNOTE_SENTINEL)
|
||||
) {
|
||||
const idx = n.text.indexOf(INLINE_FOOTNOTE_SENTINEL);
|
||||
// Text before the sentinel, with a single trailing space (the one
|
||||
// insertMarkerAfter prepended) stripped so the ref hugs the word.
|
||||
const before = n.text.slice(0, idx).replace(/ $/, "");
|
||||
const after = n.text.slice(idx + INLINE_FOOTNOTE_SENTINEL.length);
|
||||
const marks = Array.isArray(n.marks) ? n.marks : [];
|
||||
const parts: any[] = [];
|
||||
if (before.length > 0) parts.push({ ...n, text: before, marks: [...marks] });
|
||||
parts.push({ type: "footnoteReference", attrs: { id: footnoteId } });
|
||||
if (after.length > 0) parts.push({ ...n, text: after, marks: [...marks] });
|
||||
arr.splice(i, 1, ...parts);
|
||||
done = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (const child of arr) {
|
||||
visit(child);
|
||||
if (done) return;
|
||||
}
|
||||
};
|
||||
visit(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a definition node so the canonicalizer can order/place it: into the
|
||||
* first existing footnotesList, or a new trailing list when none exists.
|
||||
*/
|
||||
function appendDefinition(doc: any, defNode: any): void {
|
||||
const existingList = getList(doc, (n) => isObject(n) && n.type === "footnotesList");
|
||||
if (existingList && Array.isArray(existingList.content)) {
|
||||
existingList.content.push(defNode);
|
||||
return;
|
||||
}
|
||||
if (Array.isArray(doc.content)) {
|
||||
doc.content.push({ type: "footnotesList", content: [defNode] });
|
||||
}
|
||||
}
|
||||
|
||||
200
packages/mcp/test/unit/footnote-canonicalize.test.mjs
Normal file
200
packages/mcp/test/unit/footnote-canonicalize.test.mjs
Normal file
@@ -0,0 +1,200 @@
|
||||
import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import {
|
||||
canonicalizeFootnotes,
|
||||
footnoteContentKey,
|
||||
} from "../../build/lib/footnote-canonicalize.js";
|
||||
import { insertInlineFootnote } from "../../build/lib/transforms.js";
|
||||
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
|
||||
|
||||
function findAll(node, type, acc = []) {
|
||||
if (!node || typeof node !== "object") return acc;
|
||||
if (node.type === type) acc.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const c of node.content) findAll(c, type, acc);
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
const defIds = (doc) =>
|
||||
findAll(doc, "footnoteDefinition").map((d) => d.attrs.id);
|
||||
const refIds = (doc) =>
|
||||
findAll(doc, "footnoteReference").map((r) => r.attrs.id);
|
||||
|
||||
const ref = (id) => ({ type: "footnoteReference", attrs: { id } });
|
||||
const def = (id, text) => ({
|
||||
type: "footnoteDefinition",
|
||||
attrs: { id },
|
||||
content: [{ type: "paragraph", content: [{ type: "text", text }] }],
|
||||
});
|
||||
const para = (...inline) => ({ type: "paragraph", content: inline });
|
||||
const list = (...defs) => ({ type: "footnotesList", content: defs });
|
||||
|
||||
test("canonicalize orders definitions by first reference (out-of-order -> 1..N)", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "x" }, ref("b"), ref("a"), ref("d"), ref("c")),
|
||||
list(def("a", "A"), def("c", "C"), def("b", "B"), def("d", "D")),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
assert.deepEqual(defIds(out), ["b", "a", "d", "c"]);
|
||||
assert.equal(findAll(out, "footnotesList").length, 1);
|
||||
});
|
||||
|
||||
test("canonicalize drops orphan definitions", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "x" }, ref("a")),
|
||||
list(def("a", "A"), def("orphan", "O")),
|
||||
],
|
||||
};
|
||||
assert.deepEqual(defIds(canonicalizeFootnotes(doc)), ["a"]);
|
||||
});
|
||||
|
||||
test("canonicalize: no references -> no list", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [para({ type: "text", text: "x" }), list(def("o", "O"))],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
assert.equal(findAll(out, "footnotesList").length, 0);
|
||||
});
|
||||
|
||||
test("canonicalize: duplicate definitions -> first wins, rest dropped", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "x" }, ref("d")),
|
||||
list(def("d", "first"), def("d", "second")),
|
||||
],
|
||||
};
|
||||
const out = canonicalizeFootnotes(doc);
|
||||
assert.deepEqual(defIds(out), ["d"]);
|
||||
assert.match(JSON.stringify(out), /"first"/);
|
||||
assert.doesNotMatch(JSON.stringify(out), /"second"/);
|
||||
});
|
||||
|
||||
test("canonicalize is idempotent", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [
|
||||
para({ type: "text", text: "x" }, ref("b"), ref("a")),
|
||||
list(def("a", "A"), def("b", "B"), def("orphan", "O")),
|
||||
],
|
||||
};
|
||||
const once = canonicalizeFootnotes(doc);
|
||||
const twice = canonicalizeFootnotes(once);
|
||||
assert.deepEqual(twice, once);
|
||||
});
|
||||
|
||||
test("canonicalize does not mutate its input", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [para({ type: "text", text: "x" }, ref("a")), list(def("o", "O"))],
|
||||
};
|
||||
const snap = JSON.parse(JSON.stringify(doc));
|
||||
canonicalizeFootnotes(doc);
|
||||
assert.deepEqual(doc, snap);
|
||||
});
|
||||
|
||||
test("footnoteContentKey: same text -> same key; formatting differs -> different key", () => {
|
||||
const plain = def("x", "hello world");
|
||||
const sameText = def("y", "hello world"); // whitespace-collapsed match
|
||||
const bold = {
|
||||
type: "footnoteDefinition",
|
||||
attrs: { id: "z" },
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{ type: "text", text: "hello world", marks: [{ type: "bold" }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
assert.equal(footnoteContentKey(plain), footnoteContentKey(sameText));
|
||||
assert.notEqual(footnoteContentKey(plain), footnoteContentKey(bold));
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: places a reference at the anchor and derives the list", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [para({ type: "text", text: "The sky is blue today." })],
|
||||
};
|
||||
const r = insertInlineFootnote(doc, {
|
||||
anchorText: "blue",
|
||||
text: "Rayleigh scattering.",
|
||||
});
|
||||
assert.equal(r.inserted, true);
|
||||
assert.equal(r.reused, false);
|
||||
assert.equal(refIds(r.doc).length, 1);
|
||||
assert.deepEqual(defIds(r.doc), [r.footnoteId]);
|
||||
// The marker hugs the anchor word (no leading space text run before the ref).
|
||||
assert.equal(findAll(r.doc, "footnotesList").length, 1);
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: content dedup -> same text reuses one definition, two refs", () => {
|
||||
let doc = {
|
||||
type: "doc",
|
||||
content: [para({ type: "text", text: "Alpha and beta and gamma." })],
|
||||
};
|
||||
const r1 = insertInlineFootnote(doc, {
|
||||
anchorText: "Alpha",
|
||||
text: "shared note",
|
||||
});
|
||||
const r2 = insertInlineFootnote(r1.doc, {
|
||||
anchorText: "beta",
|
||||
text: "shared note",
|
||||
});
|
||||
assert.equal(r2.reused, true);
|
||||
assert.equal(r2.footnoteId, r1.footnoteId);
|
||||
// One definition, two references both pointing at it.
|
||||
assert.deepEqual(defIds(r2.doc), [r1.footnoteId]);
|
||||
assert.deepEqual(refIds(r2.doc), [r1.footnoteId, r1.footnoteId]);
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: distinct text -> two definitions numbered by reference order", () => {
|
||||
let doc = {
|
||||
type: "doc",
|
||||
content: [para({ type: "text", text: "First point, second point." })],
|
||||
};
|
||||
const r1 = insertInlineFootnote(doc, { anchorText: "First", text: "note one" });
|
||||
const r2 = insertInlineFootnote(r1.doc, {
|
||||
anchorText: "second",
|
||||
text: "note two",
|
||||
});
|
||||
assert.equal(r2.reused, false);
|
||||
// Reference order in the body is [First-ref, second-ref]; the derived list
|
||||
// matches that order.
|
||||
assert.deepEqual(defIds(r2.doc), refIds(r2.doc));
|
||||
assert.equal(defIds(r2.doc).length, 2);
|
||||
});
|
||||
|
||||
test("insertInlineFootnote: anchor not found -> inserted:false, no write", () => {
|
||||
const doc = {
|
||||
type: "doc",
|
||||
content: [para({ type: "text", text: "nothing to anchor on" })],
|
||||
};
|
||||
const r = insertInlineFootnote(doc, { anchorText: "ZZZ", text: "x" });
|
||||
assert.equal(r.inserted, false);
|
||||
assert.equal(findAll(r.doc, "footnoteReference").length, 0);
|
||||
});
|
||||
|
||||
test("markdown import: out-of-order definitions render as a reference-ordered list", async () => {
|
||||
// References appear b, a, c in the body; definitions are written in a, b, c
|
||||
// order (the import order). After canonicalization the bottom list follows
|
||||
// REFERENCE order so the numbers read 1, 2, 3 down the list.
|
||||
const md = [
|
||||
"See[^b] then[^a] then[^c].",
|
||||
"",
|
||||
"[^a]: alpha",
|
||||
"[^b]: bravo",
|
||||
"[^c]: charlie",
|
||||
].join("\n");
|
||||
const json = await markdownToProseMirror(md);
|
||||
assert.deepEqual(defIds(json), ["b", "a", "c"]);
|
||||
assert.equal(findAll(json, "footnotesList").length, 1);
|
||||
});
|
||||
Reference in New Issue
Block a user