diff --git a/CHANGELOG.md b/CHANGELOG.md index efb96a72..9ab0ca99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `UPDATE users SET is_agent = true WHERE email = ''`. Never flag a human or shared account, or its normal edits get mis-attributed as AI. See the AI-agent block in `.env.example`. (#143) +- **Footnote import diagnostics.** The MCP page-write tools (`create_page`, + `update_page`, `import_page_markdown`) now return a `footnoteWarnings` array + flagging dangling references, empty or duplicate definitions, and `[^id]` + markers inside table rows, so an agent can fix its own markup. The page is + still created; the field is omitted when there are no problems. (#166) ### Changed +- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the + same id are ONE footnote — one number, one definition, several back-references + — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are + first-wins on import (the rest are dropped and reported via `footnoteWarnings`), + and a reference with no definition yields a single empty footnote rather than + one per occurrence. This supersedes the 0.93.0 "survive duplicate-id + definitions" behavior for the import path. (#166) + - **Public share AI: default per-workspace hourly assistant cap lowered 300 → 100.** The limiter falls back to this default whenever `SHARE_AI_WORKSPACE_MAX_PER_HOUR` is unset, so a `0.93.0` deployment that diff --git a/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts index 5790faf8..bd4057f9 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts @@ -133,6 +133,70 @@ describe("footnotePastePlugin — reuse-aware id remap", () => { editor.destroy(); }); + it("re-ids TWO colliding pasted definitions to DISTINCT ids (reservation works)", () => { + // Existing doc has footnotes "a" and "b". Paste a slice that defines BOTH — + // each must get its own fresh id; the reservation (existing.add(newId)) keeps + // the second from deriving onto the first's new id. + const editor = new Editor({ + extensions, + content: { + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "b" } }, + ], + }, + { + type: FOOTNOTES_LIST_NAME, + content: [ + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "a" }, + content: [{ type: "paragraph", content: [{ type: "text", text: "A" }] }], + }, + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "b" }, + content: [{ type: "paragraph", content: [{ type: "text", text: "B" }] }], + }, + ], + }, + ], + }, + }); + const { schema } = editor; + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("pasted A")]), + ]), + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [ + schema.nodes.paragraph.create(null, [schema.text("pasted B")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = paste(editor, slice); + const ids = sliceFootnoteIds(out); + const distinct = new Set(ids.map((x) => x.id)); + // Two ids, both remapped off the originals, and distinct from each other. + expect(distinct.size).toBe(2); + expect(distinct.has("a")).toBe(false); + expect(distinct.has("b")).toBe(false); + expect([...distinct].sort()).toEqual(["a__2", "b__2"]); + editor.destroy(); + }); + it("leaves the slice untouched when no pasted definition collides", () => { const editor = makeEditorWithFootnoteA(); const { schema } = editor; diff --git a/packages/editor-ext/src/lib/footnote/footnote-sync.ts b/packages/editor-ext/src/lib/footnote/footnote-sync.ts index e861ed0e..d0891e1a 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-sync.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-sync.ts @@ -29,9 +29,9 @@ interface DefOccurrence { interface FootnoteScan { /** - * Every reference occurrence in document order (NOT de-duplicated). Needed so - * that duplicate ids — which would otherwise be silently collapsed — can be - * detected and (together with their definitions) re-id'd instead of dropped. + * Every reference occurrence in document order (NOT de-duplicated). Repeated + * ids are kept so the FIRST appearance fixes definition order; later repeats + * are reuse (same footnote) and are never re-id'd. */ refOccurrences: RefOccurrence[]; /** @@ -67,15 +67,13 @@ function scan(doc: ProseMirrorNode): FootnoteScan { } /** - * Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the - * concrete reference re-id edits that must be applied to the body so the doc no - * longer contains two footnotes sharing a single id. + * Result of resolving the footnote id topology: the distinct reference order and + * one definition node per id. * - * The overriding invariant is that NO definition is ever dropped here: every - * definition occurrence ends up with a unique id and therefore survives the - * canonical rebuild. Repeated references that share an id are REUSE (one - * footnote) and are left untouched; only duplicate DEFINITIONS are re-id'd, so a - * pasted/merged second `[^d]:` survives as its own (then orphaned) footnote. + * References are NEVER re-id'd here — repeated ids are REUSE (one footnote). Only + * duplicate DEFINITIONS are re-id'd; lacking a matching reference, a re-id'd + * duplicate is then dropped by the orphan policy. No definition is ever dropped + * for COLLIDING — only for being an orphan. */ interface CollisionPlan { /** @@ -86,12 +84,6 @@ interface CollisionPlan { referenceIds: string[]; /** id -> definition node, after duplicate definitions were re-id'd. One per id. */ definitions: Map; - /** - * Body reference re-id edits. ALWAYS EMPTY under reuse semantics (references - * are never re-id'd); retained so the downstream consumer stays a harmless - * no-op rather than needing removal. - */ - refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>; /** True when a duplicate definition required a re-id. */ changed: boolean; } @@ -123,13 +115,6 @@ interface CollisionPlan { */ function resolveCollisions(scan: FootnoteScan): CollisionPlan { const definitions = new Map(); - // References are never re-id'd under reuse semantics, so this stays empty; it - // is retained so the CollisionPlan shape (and its no-op consumer) is unchanged. - const refReids: Array<{ - pos: number; - node: ProseMirrorNode; - newId: string; - }> = []; const referenceIds: string[] = []; const seenRefIds = new Set(); let changed = false; @@ -173,7 +158,7 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { } } - return { referenceIds, definitions, refReids, changed }; + return { referenceIds, definitions, changed }; } /** @@ -209,14 +194,13 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { * ping-pong forever (list moved to end -> trailing paragraph appended -> list * no longer last -> moved again ...). * - * Duplicate-id collisions (two references and/or two definitions sharing one - * id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a - * reference+definition pair) are resolved up front by resolveCollisions(): the - * duplicates are re-id'd to fresh unique ids so BOTH survive as distinct - * footnotes. This guarantees the overriding invariant — no footnoteDefinition is - * ever silently deleted by this automatic (addToHistory:false) transaction. A + * The id topology is resolved up front by resolveCollisions() (#166): repeated + * references sharing an id are REUSE — one footnote, never re-id'd — while a + * duplicate DEFINITION (from pasting/duplicating a definition, or a collab merge) + * is re-id'd to a fresh unique id. No footnoteDefinition is ever silently deleted + * by this automatic (addToHistory:false) transaction because of a COLLISION; a * definition is only ever removed when it has NO matching reference (orphan - * policy), never because its id collided with another. + * policy) — which is also what then drops a re-id'd duplicate definition. */ export function footnoteSyncPlugin( isRemoteTransaction?: (tr: Transaction) => boolean, @@ -247,18 +231,16 @@ export function footnoteSyncPlugin( const info = scan(doc); - // 0) Resolve duplicate-id collisions (two references and/or two - // definitions sharing one id) by re-id'ing duplicates to fresh unique - // ids. This is the critical defense: the old last-wins Map silently - // dropped all but the last definition for a shared id; here EVERY - // definition survives with a unique id, and duplicate references are - // paired with duplicate definitions so two same-id imports/pastes yield - // two distinct footnotes instead of one. + // 0) Resolve the id topology (#166): repeated references that share an id + // are REUSE — collapsed to one entry in `referenceIds`, never re-id'd — + // while a duplicate DEFINITION is re-id'd to a fresh deterministic id + // (and, lacking a matching reference, removed by the orphan policy + // below). No definition is dropped for COLLIDING, only for being orphan. const plan = resolveCollisions(info); const referenceIds = plan.referenceIds; - // The set of ids that must have a definition, in reference order (after - // collision re-id). De-duplicated already by resolveCollisions. + // The set of ids that must have a definition, in reference order. + // De-duplicated already by resolveCollisions. const referenceIdSet = new Set(referenceIds); // 1) For each definition occurrence, compute the id it should END UP with @@ -361,21 +343,15 @@ export function footnoteSyncPlugin( // 6) Apply the targeted, minimal mutations in ONE transaction. We never // delete-and-recreate an unchanged definition subtree; we only: - // (a) re-id specific colliding references and definitions (attr-only), + // (a) re-id colliding definitions (attr-only), // (b) delete genuine orphan definitions and extra/empty lists, // (c) insert genuinely-missing empty definitions and migrate defs out // of extra lists into the primary list, // (d) create the primary list if references exist but none does yet. + // References are never re-id'd (reuse), so there is no reference edit. const tr = newState.tr; - // 6a) Re-id colliding references (inline atoms: attr-only, size-stable). - for (const reid of plan.refReids) { - tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, { - ...reid.node.attrs, - id: reid.newId, - }); - } - // 6b) Re-id colliding definitions IN PLACE (attr-only). This preserves the + // 6a) Re-id colliding definitions IN PLACE (attr-only). This preserves the // definition's content subtree — never delete+recreate it. for (const reid of defReidsToApply) { tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, { diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts index 279c2b8c..07acab01 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts @@ -4,16 +4,12 @@ import { deriveFootnoteId } from "./footnote-util"; /** * GOLDEN TABLE for `deriveFootnoteId` (and its private alphabetic `suffix`). * - * deriveFootnoteId is DELIBERATELY duplicated in - * packages/mcp/src/lib/collaboration.ts - * and the two copies MUST stay byte-for-byte equivalent in behavior so the same - * markdown imported through the editor and through the MCP path yields identical - * footnote ids. This table is the SHARED contract: the parity test - * packages/mcp/test/unit/derive-id-parity.test.mjs - * pins the exact SAME (input -> expected) pairs against the COMPILED mcp build. - * If either copy drifts, one of the two tests goes red. - * - * Keep this constant in sync with GOLDEN in the mcp parity test. + * `deriveFootnoteId` lives ONLY in editor-ext now — it is used by + * `resolveCollisions` (re-id of a duplicate definition) and `footnotePastePlugin` + * (re-id of a pasted colliding definition). The MCP/marked import paths no longer + * derive ids (duplicate definitions there are first-wins-dropped, #166), so there + * is no cross-package copy and no parity test to keep in sync. This table pins the + * deterministic scheme so a future change to it is a conscious one. */ export const DERIVE_GOLDEN: Array<{ originalId: string; diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index dc4ef79c..87f0ef8a 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -10,6 +10,7 @@ import { JSDOM } from "jsdom"; import { docmostExtensions, docmostSchema } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +import { lexFootnoteLines } from "./footnote-lex.js"; import { summarizeChange } from "./diff.js"; /** * Build the descriptive error for an opaque Yjs encode failure ("Unexpected @@ -280,7 +281,8 @@ function bridgeTaskLists(html) { // Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline // marker becomes , and `[^id]: text` // definition lines are collected into a single
. -const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +// Definition detection + fence handling are shared with analyzeFootnotes via +// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's. const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value) { return String(value).replace(/&/g, "&").replace(/"/g, """); @@ -308,28 +310,17 @@ marked.use({ extensions: [footnoteRefMarkedExtension] }); *
for them (or "" when there are none). */ function extractFootnotes(markdown) { - const lines = markdown.split("\n"); const bodyLines = []; const defs = []; - // Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code - // block is preserved verbatim and not treated as a footnote definition. - let fence = null; - for (const line of lines) { - const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) - fence = marker; - else if (marker === fence) - fence = null; - bodyLines.push(line); - continue; - } - const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null; - if (m) - defs.push({ id: m[1], text: m[2] }); + // Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code + // block is inert and stays in the body verbatim; only real definition lines + // are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics + // match exactly what import keeps/strips (#166). + for (const tok of lexFootnoteLines(markdown)) { + if (!tok.inFence && tok.definition) + defs.push(tok.definition); else - bodyLines.push(line); + bodyLines.push(tok.line); } if (defs.length === 0) return { body: markdown, section: "" }; diff --git a/packages/mcp/build/lib/footnote-analyze.js b/packages/mcp/build/lib/footnote-analyze.js index 919674b3..598148cd 100644 --- a/packages/mcp/build/lib/footnote-analyze.js +++ b/packages/mcp/build/lib/footnote-analyze.js @@ -16,24 +16,11 @@ * the line, trimmed, starts with `|`) — footnotes in table cells often do not * render as expected. */ -/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ -const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; -/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ -const REF_RE_G = /\[\^([^\]\s]+)\]/g; -/** Opening/closing fence marker (``` or ~~~). */ -const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; -/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ -function forEachReference(line, onRef) { - REF_RE_G.lastIndex = 0; - let m; - while ((m = REF_RE_G.exec(line)) !== null) - onRef(m[1]); -} +import { lexFootnoteLines, forEachFootnoteReference, } from "./footnote-lex.js"; /** * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. */ export function analyzeFootnotes(markdown) { - const lines = markdown.split("\n"); // Distinct reference ids in first-appearance order, plus the set of ids seen // inside a table row. const refIds = []; @@ -49,24 +36,13 @@ export function analyzeFootnotes(markdown) { }; // Definition texts per id, in first-appearance order of the id. const defTextsById = new Map(); - let fence = null; - for (const line of lines) { - const fenceMatch = FENCE_RE.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) - fence = marker; - else if (marker === fence) - fence = null; + // Same lexer the importer uses, so the analysis matches exactly what import + // keeps/strips (#166): fenced lines are inert, definition lines are pulled. + for (const tok of lexFootnoteLines(markdown)) { + if (tok.inFence) continue; - } - // Footnote syntax shown inside a code fence is not real markup. - if (fence !== null) - continue; - const defM = DEF_RE.exec(line); - if (defM) { - const id = defM[1]; - const text = defM[2]; + if (tok.definition) { + const { id, text } = tok.definition; const arr = defTextsById.get(id); if (arr) arr.push(text); @@ -74,11 +50,11 @@ export function analyzeFootnotes(markdown) { defTextsById.set(id, [text]); // A definition's TEXT can itself reference another footnote (`[^a]: see // [^b]`); count those so such a `[^b]` is not falsely reported dangling. - forEachReference(text, (rid) => addRef(rid, false)); + forEachFootnoteReference(text, (rid) => addRef(rid, false)); continue; } - const inTable = line.trimStart().startsWith("|"); - forEachReference(line, (id) => addRef(id, inTable)); + const inTable = tok.line.trimStart().startsWith("|"); + forEachFootnoteReference(tok.line, (id) => addRef(id, inTable)); } const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); const duplicateDefinitions = []; diff --git a/packages/mcp/build/lib/footnote-lex.js b/packages/mcp/build/lib/footnote-lex.js new file mode 100644 index 00000000..3c22d149 --- /dev/null +++ b/packages/mcp/build/lib/footnote-lex.js @@ -0,0 +1,55 @@ +/** + * Shared, fence-aware line lexer for footnote markdown (MCP-internal). + * + * Both the importer (`extractFootnotes` in collaboration.ts, which strips + * definition lines and rebuilds a footnotes section) and the diagnostics + * (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines + * are definitions and which lines are inert (inside a code fence). Sharing one + * lexer makes "the analyzer sees what the importer leaves" a structural property + * instead of two hand-kept copies that can drift (#166 review). + * + * NOTE: this is deliberately NOT shared with editor-ext's + * `extractFootnoteDefinitions` — that lives in a different package and the + * decoupling between the editor and the MCP mirror is intentional. + */ +/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */ +export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */ +export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing code fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +/** Classify every line of `markdown`, tracking fenced-code state. Pure. */ +export function lexFootnoteLines(markdown) { + const out = []; + let fence = null; + for (const line of markdown.split("\n")) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) + fence = marker; // opening fence + else if (marker === fence) + fence = null; // matching closing fence + out.push({ line, inFence: true, definition: null }); + continue; + } + if (fence !== null) { + out.push({ line, inFence: true, definition: null }); + continue; + } + const m = FOOTNOTE_DEF_RE.exec(line); + out.push({ + line, + inFence: false, + definition: m ? { id: m[1], text: m[2] } : null, + }); + } + return out; +} +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +export function forEachFootnoteReference(line, onRef) { + FOOTNOTE_REF_RE_G.lastIndex = 0; + let m; + while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) + onRef(m[1]); +} diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index 178ff71b..aec82aa1 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -10,6 +10,7 @@ import { JSDOM } from "jsdom"; import { docmostExtensions, docmostSchema } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +import { lexFootnoteLines } from "./footnote-lex.js"; import { summarizeChange, VerifyReport } from "./diff.js"; /** @@ -316,7 +317,8 @@ function bridgeTaskLists(html: string): string { // Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline // marker becomes , and `[^id]: text` // definition lines are collected into a single
. -const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +// Definition detection + fence handling are shared with analyzeFootnotes via +// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's. const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value: string): string { @@ -353,24 +355,15 @@ function extractFootnotes(markdown: string): { body: string; section: string; } { - const lines = markdown.split("\n"); const bodyLines: string[] = []; const defs: Array<{ id: string; text: string }> = []; - // Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code - // block is preserved verbatim and not treated as a footnote definition. - let fence: string | null = null; - for (const line of lines) { - const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) fence = marker; - else if (marker === fence) fence = null; - bodyLines.push(line); - continue; - } - const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null; - if (m) defs.push({ id: m[1], text: m[2] }); - else bodyLines.push(line); + // Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code + // block is inert and stays in the body verbatim; only real definition lines + // are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics + // match exactly what import keeps/strips (#166). + for (const tok of lexFootnoteLines(markdown)) { + if (!tok.inFence && tok.definition) defs.push(tok.definition); + else bodyLines.push(tok.line); } if (defs.length === 0) return { body: markdown, section: "" }; diff --git a/packages/mcp/src/lib/footnote-analyze.ts b/packages/mcp/src/lib/footnote-analyze.ts index 97264dbc..e6e0d2b9 100644 --- a/packages/mcp/src/lib/footnote-analyze.ts +++ b/packages/mcp/src/lib/footnote-analyze.ts @@ -17,12 +17,10 @@ * render as expected. */ -/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ -const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; -/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ -const REF_RE_G = /\[\^([^\]\s]+)\]/g; -/** Opening/closing fence marker (``` or ~~~). */ -const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +import { + lexFootnoteLines, + forEachFootnoteReference, +} from "./footnote-lex.js"; export interface FootnoteDiagnostics { /** Reference ids (distinct, document order) with no matching definition. */ @@ -37,19 +35,10 @@ export interface FootnoteDiagnostics { warnings: string[]; } -/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ -function forEachReference(line: string, onRef: (id: string) => void): void { - REF_RE_G.lastIndex = 0; - let m: RegExpExecArray | null; - while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]); -} - /** * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. */ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { - const lines = markdown.split("\n"); - // Distinct reference ids in first-appearance order, plus the set of ids seen // inside a table row. const refIds: string[] = []; @@ -66,33 +55,22 @@ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { // Definition texts per id, in first-appearance order of the id. const defTextsById = new Map(); - let fence: string | null = null; - for (const line of lines) { - const fenceMatch = FENCE_RE.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) fence = marker; - else if (marker === fence) fence = null; - continue; - } - // Footnote syntax shown inside a code fence is not real markup. - if (fence !== null) continue; - - const defM = DEF_RE.exec(line); - if (defM) { - const id = defM[1]; - const text = defM[2]; + // Same lexer the importer uses, so the analysis matches exactly what import + // keeps/strips (#166): fenced lines are inert, definition lines are pulled. + for (const tok of lexFootnoteLines(markdown)) { + if (tok.inFence) continue; + if (tok.definition) { + const { id, text } = tok.definition; const arr = defTextsById.get(id); if (arr) arr.push(text); else defTextsById.set(id, [text]); // A definition's TEXT can itself reference another footnote (`[^a]: see // [^b]`); count those so such a `[^b]` is not falsely reported dangling. - forEachReference(text, (rid) => addRef(rid, false)); + forEachFootnoteReference(text, (rid) => addRef(rid, false)); continue; } - - const inTable = line.trimStart().startsWith("|"); - forEachReference(line, (id) => addRef(id, inTable)); + const inTable = tok.line.trimStart().startsWith("|"); + forEachFootnoteReference(tok.line, (id) => addRef(id, inTable)); } const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); diff --git a/packages/mcp/src/lib/footnote-lex.ts b/packages/mcp/src/lib/footnote-lex.ts new file mode 100644 index 00000000..30da676b --- /dev/null +++ b/packages/mcp/src/lib/footnote-lex.ts @@ -0,0 +1,71 @@ +/** + * Shared, fence-aware line lexer for footnote markdown (MCP-internal). + * + * Both the importer (`extractFootnotes` in collaboration.ts, which strips + * definition lines and rebuilds a footnotes section) and the diagnostics + * (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines + * are definitions and which lines are inert (inside a code fence). Sharing one + * lexer makes "the analyzer sees what the importer leaves" a structural property + * instead of two hand-kept copies that can drift (#166 review). + * + * NOTE: this is deliberately NOT shared with editor-ext's + * `extractFootnoteDefinitions` — that lives in a different package and the + * decoupling between the editor and the MCP mirror is intentional. + */ + +/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */ +export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */ +export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing code fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; + +export interface FootnoteLine { + /** The raw line, verbatim. */ + line: string; + /** + * True for a code-fence marker line AND every line inside a fence — footnote + * syntax on such lines is inert (example text, not real markup). The importer + * keeps these in the body; the analyzer skips them. + */ + inFence: boolean; + /** The parsed definition, when this is a `[^id]: text` line OUTSIDE any fence. */ + definition: { id: string; text: string } | null; +} + +/** Classify every line of `markdown`, tracking fenced-code state. Pure. */ +export function lexFootnoteLines(markdown: string): FootnoteLine[] { + const out: FootnoteLine[] = []; + let fence: string | null = null; + for (const line of markdown.split("\n")) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) fence = marker; // opening fence + else if (marker === fence) fence = null; // matching closing fence + out.push({ line, inFence: true, definition: null }); + continue; + } + if (fence !== null) { + out.push({ line, inFence: true, definition: null }); + continue; + } + const m = FOOTNOTE_DEF_RE.exec(line); + out.push({ + line, + inFence: false, + definition: m ? { id: m[1], text: m[2] } : null, + }); + } + return out; +} + +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +export function forEachFootnoteReference( + line: string, + onRef: (id: string) => void, +): void { + FOOTNOTE_REF_RE_G.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) onRef(m[1]); +} diff --git a/packages/mcp/test/mock/footnote-warnings.test.mjs b/packages/mcp/test/mock/footnote-warnings.test.mjs new file mode 100644 index 00000000..2f8e0b7d --- /dev/null +++ b/packages/mcp/test/mock/footnote-warnings.test.mjs @@ -0,0 +1,110 @@ +// Mock-HTTP test for the footnoteWarnings plumbing (#166). createPage is the +// representative path that is fully plain-HTTP (import + getPage) and so is +// mockable here; updatePage / importPageMarkdown attach footnoteWarnings with the +// IDENTICAL wiring (`analyzeFootnotes(...)` + spread-when-non-empty) but run their +// mutation over the Hocuspocus collab WebSocket, which this plain-HTTP harness +// does not stand up. The analyzer itself is unit-tested in footnote-analyze.test. +import { test, after } from "node:test"; +import assert from "node:assert/strict"; +import http from "node:http"; +import { DocmostClient } from "../../build/client.js"; + +function readBody(req) { + return new Promise((resolve) => { + let raw = ""; + req.on("data", (c) => (raw += c)); + req.on("end", () => resolve(raw)); + }); +} + +function sendJson(res, status, obj, extraHeaders = {}) { + res.writeHead(status, { "Content-Type": "application/json", ...extraHeaders }); + res.end(JSON.stringify(obj)); +} + +const openServers = []; +function spawn(handler) { + return new Promise((resolve) => { + const server = http.createServer(handler); + openServers.push(server); + server.listen(0, "127.0.0.1", () => { + const { port } = server.address(); + resolve(`http://127.0.0.1:${port}/api`); + }); + }); +} + +after(async () => { + await Promise.all( + openServers.map((s) => new Promise((r) => s.close(r))), + ); +}); + +// A handler that imports a page, lets getPage read it back, and 404s everything +// else (listSidebarPages fails gracefully inside getPage). +function pageHandler() { + return async (req, res) => { + await readBody(req); + if (req.url === "/api/auth/login") { + sendJson(res, 200, { success: true }, { + "Set-Cookie": "authToken=t; Path=/; HttpOnly", + }); + return; + } + if (req.url === "/api/pages/import") { + sendJson(res, 200, { data: { id: "new-1" } }); + return; + } + if (req.url === "/api/pages/update") { + // The title-restore step after import. + sendJson(res, 200, { data: { id: "new-1" } }); + return; + } + if (req.url === "/api/pages/info") { + sendJson(res, 200, { + data: { + id: "new-1", + slugId: "slug-1", + title: "T", + spaceId: "sp-1", + content: { type: "doc", content: [] }, + }, + }); + return; + } + sendJson(res, 404, { message: "not found" }); + }; +} + +test("createPage attaches footnoteWarnings when the content has footnote problems", async () => { + const baseURL = await spawn(pageHandler()); + const client = new DocmostClient(baseURL, "user@example.com", "pw"); + // A dangling reference + a duplicate definition + a table marker. + const content = [ + "Intro[^missing] and| cell[^t] |.", + "", + "[^d]: one", + "[^d]: two", + "[^t]: in table", + ].join("\n"); + const result = await client.createPage("T", content, "sp-1"); + assert.ok(Array.isArray(result.footnoteWarnings), "footnoteWarnings present"); + const joined = result.footnoteWarnings.join("\n"); + assert.match(joined, /no matching definition/); // dangling [^missing] + assert.match(joined, /defined more than once/); // duplicate [^d] + // The page itself is still returned. + assert.equal(result.success, true); +}); + +test("createPage omits footnoteWarnings when the content is clean", async () => { + const baseURL = await spawn(pageHandler()); + const client = new DocmostClient(baseURL, "user@example.com", "pw"); + const content = ["A[^a] and reuse[^a].", "", "[^a]: fine"].join("\n"); + const result = await client.createPage("T", content, "sp-1"); + assert.equal( + "footnoteWarnings" in result, + false, + "no footnoteWarnings field on clean input", + ); + assert.equal(result.success, true); +});