refactor(footnotes): address PR #169 review
- footnote-sync: remove the now-dead `refReids` (CollisionPlan field, local, return, the 6a consumer loop) — references are never re-id'd under reuse, so it was dead structure on the hot reconciliation path. Rewrite the stale comments (plugin header, step 0, refOccurrences field) that still described the old "duplicates re-id'd so both survive" model to the reuse model. - Shared footnote lexer: new packages/mcp/src/lib/footnote-lex.ts (lexFootnoteLines + forEachFootnoteReference). extractFootnotes (collaboration) and analyzeFootnotes now consume the SAME fence-aware lexer, so "the analyzer sees exactly what the importer keeps/strips" is structural, not comment-kept. Removed the duplicated DEF_RE/fence machine from both consumers. - Tests: new mock test for the footnoteWarnings plumbing on createPage (problems -> field present; clean -> omitted); new paste-reuse case for TWO colliding pasted definitions (reservation -> distinct ids). Updated the derive-id golden test header (no MCP copy / parity test anymore). - CHANGELOG: [Unreleased] entries for footnote reuse (Changed, supersedes 0.93.0) and footnoteWarnings (Added). editor-ext 129, MCP 301, server roundtrip 2; client+server tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -133,6 +133,70 @@ describe("footnotePastePlugin — reuse-aware id remap", () => {
|
||||
editor.destroy();
|
||||
});
|
||||
|
||||
it("re-ids TWO colliding pasted definitions to DISTINCT ids (reservation works)", () => {
|
||||
// Existing doc has footnotes "a" and "b". Paste a slice that defines BOTH —
|
||||
// each must get its own fresh id; the reservation (existing.add(newId)) keeps
|
||||
// the second from deriving onto the first's new id.
|
||||
const editor = new Editor({
|
||||
extensions,
|
||||
content: {
|
||||
type: "doc",
|
||||
content: [
|
||||
{
|
||||
type: "paragraph",
|
||||
content: [
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } },
|
||||
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "b" } },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTES_LIST_NAME,
|
||||
content: [
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "a" },
|
||||
content: [{ type: "paragraph", content: [{ type: "text", text: "A" }] }],
|
||||
},
|
||||
{
|
||||
type: FOOTNOTE_DEFINITION_NAME,
|
||||
attrs: { id: "b" },
|
||||
content: [{ type: "paragraph", content: [{ type: "text", text: "B" }] }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const { schema } = editor;
|
||||
const slice = new Slice(
|
||||
Fragment.fromArray([
|
||||
schema.nodes.paragraph.create(null, [
|
||||
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }),
|
||||
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }),
|
||||
]),
|
||||
schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
|
||||
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [
|
||||
schema.nodes.paragraph.create(null, [schema.text("pasted A")]),
|
||||
]),
|
||||
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [
|
||||
schema.nodes.paragraph.create(null, [schema.text("pasted B")]),
|
||||
]),
|
||||
]),
|
||||
]),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
const out = paste(editor, slice);
|
||||
const ids = sliceFootnoteIds(out);
|
||||
const distinct = new Set(ids.map((x) => x.id));
|
||||
// Two ids, both remapped off the originals, and distinct from each other.
|
||||
expect(distinct.size).toBe(2);
|
||||
expect(distinct.has("a")).toBe(false);
|
||||
expect(distinct.has("b")).toBe(false);
|
||||
expect([...distinct].sort()).toEqual(["a__2", "b__2"]);
|
||||
editor.destroy();
|
||||
});
|
||||
|
||||
it("leaves the slice untouched when no pasted definition collides", () => {
|
||||
const editor = makeEditorWithFootnoteA();
|
||||
const { schema } = editor;
|
||||
|
||||
@@ -29,9 +29,9 @@ interface DefOccurrence {
|
||||
|
||||
interface FootnoteScan {
|
||||
/**
|
||||
* Every reference occurrence in document order (NOT de-duplicated). Needed so
|
||||
* that duplicate ids — which would otherwise be silently collapsed — can be
|
||||
* detected and (together with their definitions) re-id'd instead of dropped.
|
||||
* Every reference occurrence in document order (NOT de-duplicated). Repeated
|
||||
* ids are kept so the FIRST appearance fixes definition order; later repeats
|
||||
* are reuse (same footnote) and are never re-id'd.
|
||||
*/
|
||||
refOccurrences: RefOccurrence[];
|
||||
/**
|
||||
@@ -67,15 +67,13 @@ function scan(doc: ProseMirrorNode): FootnoteScan {
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the
|
||||
* concrete reference re-id edits that must be applied to the body so the doc no
|
||||
* longer contains two footnotes sharing a single id.
|
||||
* Result of resolving the footnote id topology: the distinct reference order and
|
||||
* one definition node per id.
|
||||
*
|
||||
* The overriding invariant is that NO definition is ever dropped here: every
|
||||
* definition occurrence ends up with a unique id and therefore survives the
|
||||
* canonical rebuild. Repeated references that share an id are REUSE (one
|
||||
* footnote) and are left untouched; only duplicate DEFINITIONS are re-id'd, so a
|
||||
* pasted/merged second `[^d]:` survives as its own (then orphaned) footnote.
|
||||
* References are NEVER re-id'd here — repeated ids are REUSE (one footnote). Only
|
||||
* duplicate DEFINITIONS are re-id'd; lacking a matching reference, a re-id'd
|
||||
* duplicate is then dropped by the orphan policy. No definition is ever dropped
|
||||
* for COLLIDING — only for being an orphan.
|
||||
*/
|
||||
interface CollisionPlan {
|
||||
/**
|
||||
@@ -86,12 +84,6 @@ interface CollisionPlan {
|
||||
referenceIds: string[];
|
||||
/** id -> definition node, after duplicate definitions were re-id'd. One per id. */
|
||||
definitions: Map<string, ProseMirrorNode>;
|
||||
/**
|
||||
* Body reference re-id edits. ALWAYS EMPTY under reuse semantics (references
|
||||
* are never re-id'd); retained so the downstream consumer stays a harmless
|
||||
* no-op rather than needing removal.
|
||||
*/
|
||||
refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>;
|
||||
/** True when a duplicate definition required a re-id. */
|
||||
changed: boolean;
|
||||
}
|
||||
@@ -123,13 +115,6 @@ interface CollisionPlan {
|
||||
*/
|
||||
function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
||||
const definitions = new Map<string, ProseMirrorNode>();
|
||||
// References are never re-id'd under reuse semantics, so this stays empty; it
|
||||
// is retained so the CollisionPlan shape (and its no-op consumer) is unchanged.
|
||||
const refReids: Array<{
|
||||
pos: number;
|
||||
node: ProseMirrorNode;
|
||||
newId: string;
|
||||
}> = [];
|
||||
const referenceIds: string[] = [];
|
||||
const seenRefIds = new Set<string>();
|
||||
let changed = false;
|
||||
@@ -173,7 +158,7 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
||||
}
|
||||
}
|
||||
|
||||
return { referenceIds, definitions, refReids, changed };
|
||||
return { referenceIds, definitions, changed };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -209,14 +194,13 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
||||
* ping-pong forever (list moved to end -> trailing paragraph appended -> list
|
||||
* no longer last -> moved again ...).
|
||||
*
|
||||
* Duplicate-id collisions (two references and/or two definitions sharing one
|
||||
* id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a
|
||||
* reference+definition pair) are resolved up front by resolveCollisions(): the
|
||||
* duplicates are re-id'd to fresh unique ids so BOTH survive as distinct
|
||||
* footnotes. This guarantees the overriding invariant — no footnoteDefinition is
|
||||
* ever silently deleted by this automatic (addToHistory:false) transaction. A
|
||||
* The id topology is resolved up front by resolveCollisions() (#166): repeated
|
||||
* references sharing an id are REUSE — one footnote, never re-id'd — while a
|
||||
* duplicate DEFINITION (from pasting/duplicating a definition, or a collab merge)
|
||||
* is re-id'd to a fresh unique id. No footnoteDefinition is ever silently deleted
|
||||
* by this automatic (addToHistory:false) transaction because of a COLLISION; a
|
||||
* definition is only ever removed when it has NO matching reference (orphan
|
||||
* policy), never because its id collided with another.
|
||||
* policy) — which is also what then drops a re-id'd duplicate definition.
|
||||
*/
|
||||
export function footnoteSyncPlugin(
|
||||
isRemoteTransaction?: (tr: Transaction) => boolean,
|
||||
@@ -247,18 +231,16 @@ export function footnoteSyncPlugin(
|
||||
|
||||
const info = scan(doc);
|
||||
|
||||
// 0) Resolve duplicate-id collisions (two references and/or two
|
||||
// definitions sharing one id) by re-id'ing duplicates to fresh unique
|
||||
// ids. This is the critical defense: the old last-wins Map silently
|
||||
// dropped all but the last definition for a shared id; here EVERY
|
||||
// definition survives with a unique id, and duplicate references are
|
||||
// paired with duplicate definitions so two same-id imports/pastes yield
|
||||
// two distinct footnotes instead of one.
|
||||
// 0) Resolve the id topology (#166): repeated references that share an id
|
||||
// are REUSE — collapsed to one entry in `referenceIds`, never re-id'd —
|
||||
// while a duplicate DEFINITION is re-id'd to a fresh deterministic id
|
||||
// (and, lacking a matching reference, removed by the orphan policy
|
||||
// below). No definition is dropped for COLLIDING, only for being orphan.
|
||||
const plan = resolveCollisions(info);
|
||||
const referenceIds = plan.referenceIds;
|
||||
|
||||
// The set of ids that must have a definition, in reference order (after
|
||||
// collision re-id). De-duplicated already by resolveCollisions.
|
||||
// The set of ids that must have a definition, in reference order.
|
||||
// De-duplicated already by resolveCollisions.
|
||||
const referenceIdSet = new Set(referenceIds);
|
||||
|
||||
// 1) For each definition occurrence, compute the id it should END UP with
|
||||
@@ -361,21 +343,15 @@ export function footnoteSyncPlugin(
|
||||
|
||||
// 6) Apply the targeted, minimal mutations in ONE transaction. We never
|
||||
// delete-and-recreate an unchanged definition subtree; we only:
|
||||
// (a) re-id specific colliding references and definitions (attr-only),
|
||||
// (a) re-id colliding definitions (attr-only),
|
||||
// (b) delete genuine orphan definitions and extra/empty lists,
|
||||
// (c) insert genuinely-missing empty definitions and migrate defs out
|
||||
// of extra lists into the primary list,
|
||||
// (d) create the primary list if references exist but none does yet.
|
||||
// References are never re-id'd (reuse), so there is no reference edit.
|
||||
const tr = newState.tr;
|
||||
|
||||
// 6a) Re-id colliding references (inline atoms: attr-only, size-stable).
|
||||
for (const reid of plan.refReids) {
|
||||
tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
|
||||
...reid.node.attrs,
|
||||
id: reid.newId,
|
||||
});
|
||||
}
|
||||
// 6b) Re-id colliding definitions IN PLACE (attr-only). This preserves the
|
||||
// 6a) Re-id colliding definitions IN PLACE (attr-only). This preserves the
|
||||
// definition's content subtree — never delete+recreate it.
|
||||
for (const reid of defReidsToApply) {
|
||||
tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
|
||||
|
||||
@@ -4,16 +4,12 @@ import { deriveFootnoteId } from "./footnote-util";
|
||||
/**
|
||||
* GOLDEN TABLE for `deriveFootnoteId` (and its private alphabetic `suffix`).
|
||||
*
|
||||
* deriveFootnoteId is DELIBERATELY duplicated in
|
||||
* packages/mcp/src/lib/collaboration.ts
|
||||
* and the two copies MUST stay byte-for-byte equivalent in behavior so the same
|
||||
* markdown imported through the editor and through the MCP path yields identical
|
||||
* footnote ids. This table is the SHARED contract: the parity test
|
||||
* packages/mcp/test/unit/derive-id-parity.test.mjs
|
||||
* pins the exact SAME (input -> expected) pairs against the COMPILED mcp build.
|
||||
* If either copy drifts, one of the two tests goes red.
|
||||
*
|
||||
* Keep this constant in sync with GOLDEN in the mcp parity test.
|
||||
* `deriveFootnoteId` lives ONLY in editor-ext now — it is used by
|
||||
* `resolveCollisions` (re-id of a duplicate definition) and `footnotePastePlugin`
|
||||
* (re-id of a pasted colliding definition). The MCP/marked import paths no longer
|
||||
* derive ids (duplicate definitions there are first-wins-dropped, #166), so there
|
||||
* is no cross-package copy and no parity test to keep in sync. This table pins the
|
||||
* deterministic scheme so a future change to it is a conscious one.
|
||||
*/
|
||||
export const DERIVE_GOLDEN: Array<{
|
||||
originalId: string;
|
||||
|
||||
@@ -10,6 +10,7 @@ import { JSDOM } from "jsdom";
|
||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { summarizeChange } from "./diff.js";
|
||||
/**
|
||||
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
||||
@@ -280,7 +281,8 @@ function bridgeTaskLists(html) {
|
||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||
// definition lines are collected into a single <section data-footnotes>.
|
||||
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||
function escapeFootnoteAttr(value) {
|
||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||
@@ -308,28 +310,17 @@ marked.use({ extensions: [footnoteRefMarkedExtension] });
|
||||
* <section data-footnotes> for them (or "" when there are none).
|
||||
*/
|
||||
function extractFootnotes(markdown) {
|
||||
const lines = markdown.split("\n");
|
||||
const bodyLines = [];
|
||||
const defs = [];
|
||||
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
|
||||
// block is preserved verbatim and not treated as a footnote definition.
|
||||
let fence = null;
|
||||
for (const line of lines) {
|
||||
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null)
|
||||
fence = marker;
|
||||
else if (marker === fence)
|
||||
fence = null;
|
||||
bodyLines.push(line);
|
||||
continue;
|
||||
}
|
||||
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
|
||||
if (m)
|
||||
defs.push({ id: m[1], text: m[2] });
|
||||
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||
// block is inert and stays in the body verbatim; only real definition lines
|
||||
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||
// match exactly what import keeps/strips (#166).
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (!tok.inFence && tok.definition)
|
||||
defs.push(tok.definition);
|
||||
else
|
||||
bodyLines.push(line);
|
||||
bodyLines.push(tok.line);
|
||||
}
|
||||
if (defs.length === 0)
|
||||
return { body: markdown, section: "" };
|
||||
|
||||
@@ -16,24 +16,11 @@
|
||||
* the line, trimmed, starts with `|`) — footnotes in table cells often do not
|
||||
* render as expected.
|
||||
*/
|
||||
/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||
const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||
const REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||
/** Opening/closing fence marker (``` or ~~~). */
|
||||
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||
function forEachReference(line, onRef) {
|
||||
REF_RE_G.lastIndex = 0;
|
||||
let m;
|
||||
while ((m = REF_RE_G.exec(line)) !== null)
|
||||
onRef(m[1]);
|
||||
}
|
||||
import { lexFootnoteLines, forEachFootnoteReference, } from "./footnote-lex.js";
|
||||
/**
|
||||
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
|
||||
*/
|
||||
export function analyzeFootnotes(markdown) {
|
||||
const lines = markdown.split("\n");
|
||||
// Distinct reference ids in first-appearance order, plus the set of ids seen
|
||||
// inside a table row.
|
||||
const refIds = [];
|
||||
@@ -49,24 +36,13 @@ export function analyzeFootnotes(markdown) {
|
||||
};
|
||||
// Definition texts per id, in first-appearance order of the id.
|
||||
const defTextsById = new Map();
|
||||
let fence = null;
|
||||
for (const line of lines) {
|
||||
const fenceMatch = FENCE_RE.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null)
|
||||
fence = marker;
|
||||
else if (marker === fence)
|
||||
fence = null;
|
||||
// Same lexer the importer uses, so the analysis matches exactly what import
|
||||
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (tok.inFence)
|
||||
continue;
|
||||
}
|
||||
// Footnote syntax shown inside a code fence is not real markup.
|
||||
if (fence !== null)
|
||||
continue;
|
||||
const defM = DEF_RE.exec(line);
|
||||
if (defM) {
|
||||
const id = defM[1];
|
||||
const text = defM[2];
|
||||
if (tok.definition) {
|
||||
const { id, text } = tok.definition;
|
||||
const arr = defTextsById.get(id);
|
||||
if (arr)
|
||||
arr.push(text);
|
||||
@@ -74,11 +50,11 @@ export function analyzeFootnotes(markdown) {
|
||||
defTextsById.set(id, [text]);
|
||||
// A definition's TEXT can itself reference another footnote (`[^a]: see
|
||||
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
|
||||
forEachReference(text, (rid) => addRef(rid, false));
|
||||
forEachFootnoteReference(text, (rid) => addRef(rid, false));
|
||||
continue;
|
||||
}
|
||||
const inTable = line.trimStart().startsWith("|");
|
||||
forEachReference(line, (id) => addRef(id, inTable));
|
||||
const inTable = tok.line.trimStart().startsWith("|");
|
||||
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
|
||||
}
|
||||
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
|
||||
const duplicateDefinitions = [];
|
||||
|
||||
55
packages/mcp/build/lib/footnote-lex.js
Normal file
55
packages/mcp/build/lib/footnote-lex.js
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
|
||||
*
|
||||
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
|
||||
* definition lines and rebuilds a footnotes section) and the diagnostics
|
||||
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
|
||||
* are definitions and which lines are inert (inside a code fence). Sharing one
|
||||
* lexer makes "the analyzer sees what the importer leaves" a structural property
|
||||
* instead of two hand-kept copies that can drift (#166 review).
|
||||
*
|
||||
* NOTE: this is deliberately NOT shared with editor-ext's
|
||||
* `extractFootnoteDefinitions` — that lives in a different package and the
|
||||
* decoupling between the editor and the MCP mirror is intentional.
|
||||
*/
|
||||
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||
/** Opening/closing code fence marker (``` or ~~~). */
|
||||
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
|
||||
export function lexFootnoteLines(markdown) {
|
||||
const out = [];
|
||||
let fence = null;
|
||||
for (const line of markdown.split("\n")) {
|
||||
const fenceMatch = FENCE_RE.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null)
|
||||
fence = marker; // opening fence
|
||||
else if (marker === fence)
|
||||
fence = null; // matching closing fence
|
||||
out.push({ line, inFence: true, definition: null });
|
||||
continue;
|
||||
}
|
||||
if (fence !== null) {
|
||||
out.push({ line, inFence: true, definition: null });
|
||||
continue;
|
||||
}
|
||||
const m = FOOTNOTE_DEF_RE.exec(line);
|
||||
out.push({
|
||||
line,
|
||||
inFence: false,
|
||||
definition: m ? { id: m[1], text: m[2] } : null,
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||
export function forEachFootnoteReference(line, onRef) {
|
||||
FOOTNOTE_REF_RE_G.lastIndex = 0;
|
||||
let m;
|
||||
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null)
|
||||
onRef(m[1]);
|
||||
}
|
||||
@@ -10,6 +10,7 @@ import { JSDOM } from "jsdom";
|
||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||
import { withPageLock } from "./page-lock.js";
|
||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||
import { summarizeChange, VerifyReport } from "./diff.js";
|
||||
|
||||
/**
|
||||
@@ -316,7 +317,8 @@ function bridgeTaskLists(html: string): string {
|
||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||
// definition lines are collected into a single <section data-footnotes>.
|
||||
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||
|
||||
function escapeFootnoteAttr(value: string): string {
|
||||
@@ -353,24 +355,15 @@ function extractFootnotes(markdown: string): {
|
||||
body: string;
|
||||
section: string;
|
||||
} {
|
||||
const lines = markdown.split("\n");
|
||||
const bodyLines: string[] = [];
|
||||
const defs: Array<{ id: string; text: string }> = [];
|
||||
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
|
||||
// block is preserved verbatim and not treated as a footnote definition.
|
||||
let fence: string | null = null;
|
||||
for (const line of lines) {
|
||||
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null) fence = marker;
|
||||
else if (marker === fence) fence = null;
|
||||
bodyLines.push(line);
|
||||
continue;
|
||||
}
|
||||
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
|
||||
if (m) defs.push({ id: m[1], text: m[2] });
|
||||
else bodyLines.push(line);
|
||||
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||
// block is inert and stays in the body verbatim; only real definition lines
|
||||
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||
// match exactly what import keeps/strips (#166).
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (!tok.inFence && tok.definition) defs.push(tok.definition);
|
||||
else bodyLines.push(tok.line);
|
||||
}
|
||||
if (defs.length === 0) return { body: markdown, section: "" };
|
||||
|
||||
|
||||
@@ -17,12 +17,10 @@
|
||||
* render as expected.
|
||||
*/
|
||||
|
||||
/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||
const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||
const REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||
/** Opening/closing fence marker (``` or ~~~). */
|
||||
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
import {
|
||||
lexFootnoteLines,
|
||||
forEachFootnoteReference,
|
||||
} from "./footnote-lex.js";
|
||||
|
||||
export interface FootnoteDiagnostics {
|
||||
/** Reference ids (distinct, document order) with no matching definition. */
|
||||
@@ -37,19 +35,10 @@ export interface FootnoteDiagnostics {
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||
function forEachReference(line: string, onRef: (id: string) => void): void {
|
||||
REF_RE_G.lastIndex = 0;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
|
||||
*/
|
||||
export function analyzeFootnotes(markdown: string): FootnoteDiagnostics {
|
||||
const lines = markdown.split("\n");
|
||||
|
||||
// Distinct reference ids in first-appearance order, plus the set of ids seen
|
||||
// inside a table row.
|
||||
const refIds: string[] = [];
|
||||
@@ -66,33 +55,22 @@ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics {
|
||||
// Definition texts per id, in first-appearance order of the id.
|
||||
const defTextsById = new Map<string, string[]>();
|
||||
|
||||
let fence: string | null = null;
|
||||
for (const line of lines) {
|
||||
const fenceMatch = FENCE_RE.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null) fence = marker;
|
||||
else if (marker === fence) fence = null;
|
||||
continue;
|
||||
}
|
||||
// Footnote syntax shown inside a code fence is not real markup.
|
||||
if (fence !== null) continue;
|
||||
|
||||
const defM = DEF_RE.exec(line);
|
||||
if (defM) {
|
||||
const id = defM[1];
|
||||
const text = defM[2];
|
||||
// Same lexer the importer uses, so the analysis matches exactly what import
|
||||
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
|
||||
for (const tok of lexFootnoteLines(markdown)) {
|
||||
if (tok.inFence) continue;
|
||||
if (tok.definition) {
|
||||
const { id, text } = tok.definition;
|
||||
const arr = defTextsById.get(id);
|
||||
if (arr) arr.push(text);
|
||||
else defTextsById.set(id, [text]);
|
||||
// A definition's TEXT can itself reference another footnote (`[^a]: see
|
||||
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
|
||||
forEachReference(text, (rid) => addRef(rid, false));
|
||||
forEachFootnoteReference(text, (rid) => addRef(rid, false));
|
||||
continue;
|
||||
}
|
||||
|
||||
const inTable = line.trimStart().startsWith("|");
|
||||
forEachReference(line, (id) => addRef(id, inTable));
|
||||
const inTable = tok.line.trimStart().startsWith("|");
|
||||
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
|
||||
}
|
||||
|
||||
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
|
||||
|
||||
71
packages/mcp/src/lib/footnote-lex.ts
Normal file
71
packages/mcp/src/lib/footnote-lex.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
|
||||
*
|
||||
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
|
||||
* definition lines and rebuilds a footnotes section) and the diagnostics
|
||||
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
|
||||
* are definitions and which lines are inert (inside a code fence). Sharing one
|
||||
* lexer makes "the analyzer sees what the importer leaves" a structural property
|
||||
* instead of two hand-kept copies that can drift (#166 review).
|
||||
*
|
||||
* NOTE: this is deliberately NOT shared with editor-ext's
|
||||
* `extractFootnoteDefinitions` — that lives in a different package and the
|
||||
* decoupling between the editor and the MCP mirror is intentional.
|
||||
*/
|
||||
|
||||
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||
/** Opening/closing code fence marker (``` or ~~~). */
|
||||
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
|
||||
export interface FootnoteLine {
|
||||
/** The raw line, verbatim. */
|
||||
line: string;
|
||||
/**
|
||||
* True for a code-fence marker line AND every line inside a fence — footnote
|
||||
* syntax on such lines is inert (example text, not real markup). The importer
|
||||
* keeps these in the body; the analyzer skips them.
|
||||
*/
|
||||
inFence: boolean;
|
||||
/** The parsed definition, when this is a `[^id]: text` line OUTSIDE any fence. */
|
||||
definition: { id: string; text: string } | null;
|
||||
}
|
||||
|
||||
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
|
||||
export function lexFootnoteLines(markdown: string): FootnoteLine[] {
|
||||
const out: FootnoteLine[] = [];
|
||||
let fence: string | null = null;
|
||||
for (const line of markdown.split("\n")) {
|
||||
const fenceMatch = FENCE_RE.exec(line);
|
||||
if (fenceMatch) {
|
||||
const marker = fenceMatch[2][0];
|
||||
if (fence === null) fence = marker; // opening fence
|
||||
else if (marker === fence) fence = null; // matching closing fence
|
||||
out.push({ line, inFence: true, definition: null });
|
||||
continue;
|
||||
}
|
||||
if (fence !== null) {
|
||||
out.push({ line, inFence: true, definition: null });
|
||||
continue;
|
||||
}
|
||||
const m = FOOTNOTE_DEF_RE.exec(line);
|
||||
out.push({
|
||||
line,
|
||||
inFence: false,
|
||||
definition: m ? { id: m[1], text: m[2] } : null,
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||
export function forEachFootnoteReference(
|
||||
line: string,
|
||||
onRef: (id: string) => void,
|
||||
): void {
|
||||
FOOTNOTE_REF_RE_G.lastIndex = 0;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) onRef(m[1]);
|
||||
}
|
||||
110
packages/mcp/test/mock/footnote-warnings.test.mjs
Normal file
110
packages/mcp/test/mock/footnote-warnings.test.mjs
Normal file
@@ -0,0 +1,110 @@
|
||||
// Mock-HTTP test for the footnoteWarnings plumbing (#166). createPage is the
|
||||
// representative path that is fully plain-HTTP (import + getPage) and so is
|
||||
// mockable here; updatePage / importPageMarkdown attach footnoteWarnings with the
|
||||
// IDENTICAL wiring (`analyzeFootnotes(...)` + spread-when-non-empty) but run their
|
||||
// mutation over the Hocuspocus collab WebSocket, which this plain-HTTP harness
|
||||
// does not stand up. The analyzer itself is unit-tested in footnote-analyze.test.
|
||||
import { test, after } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import http from "node:http";
|
||||
import { DocmostClient } from "../../build/client.js";
|
||||
|
||||
function readBody(req) {
|
||||
return new Promise((resolve) => {
|
||||
let raw = "";
|
||||
req.on("data", (c) => (raw += c));
|
||||
req.on("end", () => resolve(raw));
|
||||
});
|
||||
}
|
||||
|
||||
function sendJson(res, status, obj, extraHeaders = {}) {
|
||||
res.writeHead(status, { "Content-Type": "application/json", ...extraHeaders });
|
||||
res.end(JSON.stringify(obj));
|
||||
}
|
||||
|
||||
const openServers = [];
|
||||
function spawn(handler) {
|
||||
return new Promise((resolve) => {
|
||||
const server = http.createServer(handler);
|
||||
openServers.push(server);
|
||||
server.listen(0, "127.0.0.1", () => {
|
||||
const { port } = server.address();
|
||||
resolve(`http://127.0.0.1:${port}/api`);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
after(async () => {
|
||||
await Promise.all(
|
||||
openServers.map((s) => new Promise((r) => s.close(r))),
|
||||
);
|
||||
});
|
||||
|
||||
// A handler that imports a page, lets getPage read it back, and 404s everything
|
||||
// else (listSidebarPages fails gracefully inside getPage).
|
||||
function pageHandler() {
|
||||
return async (req, res) => {
|
||||
await readBody(req);
|
||||
if (req.url === "/api/auth/login") {
|
||||
sendJson(res, 200, { success: true }, {
|
||||
"Set-Cookie": "authToken=t; Path=/; HttpOnly",
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (req.url === "/api/pages/import") {
|
||||
sendJson(res, 200, { data: { id: "new-1" } });
|
||||
return;
|
||||
}
|
||||
if (req.url === "/api/pages/update") {
|
||||
// The title-restore step after import.
|
||||
sendJson(res, 200, { data: { id: "new-1" } });
|
||||
return;
|
||||
}
|
||||
if (req.url === "/api/pages/info") {
|
||||
sendJson(res, 200, {
|
||||
data: {
|
||||
id: "new-1",
|
||||
slugId: "slug-1",
|
||||
title: "T",
|
||||
spaceId: "sp-1",
|
||||
content: { type: "doc", content: [] },
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
sendJson(res, 404, { message: "not found" });
|
||||
};
|
||||
}
|
||||
|
||||
test("createPage attaches footnoteWarnings when the content has footnote problems", async () => {
|
||||
const baseURL = await spawn(pageHandler());
|
||||
const client = new DocmostClient(baseURL, "user@example.com", "pw");
|
||||
// A dangling reference + a duplicate definition + a table marker.
|
||||
const content = [
|
||||
"Intro[^missing] and| cell[^t] |.",
|
||||
"",
|
||||
"[^d]: one",
|
||||
"[^d]: two",
|
||||
"[^t]: in table",
|
||||
].join("\n");
|
||||
const result = await client.createPage("T", content, "sp-1");
|
||||
assert.ok(Array.isArray(result.footnoteWarnings), "footnoteWarnings present");
|
||||
const joined = result.footnoteWarnings.join("\n");
|
||||
assert.match(joined, /no matching definition/); // dangling [^missing]
|
||||
assert.match(joined, /defined more than once/); // duplicate [^d]
|
||||
// The page itself is still returned.
|
||||
assert.equal(result.success, true);
|
||||
});
|
||||
|
||||
test("createPage omits footnoteWarnings when the content is clean", async () => {
|
||||
const baseURL = await spawn(pageHandler());
|
||||
const client = new DocmostClient(baseURL, "user@example.com", "pw");
|
||||
const content = ["A[^a] and reuse[^a].", "", "[^a]: fine"].join("\n");
|
||||
const result = await client.createPage("T", content, "sp-1");
|
||||
assert.equal(
|
||||
"footnoteWarnings" in result,
|
||||
false,
|
||||
"no footnoteWarnings field on clean input",
|
||||
);
|
||||
assert.equal(result.success, true);
|
||||
});
|
||||
Reference in New Issue
Block a user