refactor(footnotes): address PR #169 review

- footnote-sync: remove the now-dead `refReids` (CollisionPlan field, local,
  return, the 6a consumer loop) — references are never re-id'd under reuse, so it
  was dead structure on the hot reconciliation path. Rewrite the stale comments
  (plugin header, step 0, refOccurrences field) that still described the old
  "duplicates re-id'd so both survive" model to the reuse model.
- Shared footnote lexer: new packages/mcp/src/lib/footnote-lex.ts
  (lexFootnoteLines + forEachFootnoteReference). extractFootnotes (collaboration)
  and analyzeFootnotes now consume the SAME fence-aware lexer, so "the analyzer
  sees exactly what the importer keeps/strips" is structural, not comment-kept.
  Removed the duplicated DEF_RE/fence machine from both consumers.
- Tests: new mock test for the footnoteWarnings plumbing on createPage (problems
  -> field present; clean -> omitted); new paste-reuse case for TWO colliding
  pasted definitions (reservation -> distinct ids). Updated the derive-id golden
  test header (no MCP copy / parity test anymore).
- CHANGELOG: [Unreleased] entries for footnote reuse (Changed, supersedes 0.93.0)
  and footnoteWarnings (Added).

editor-ext 129, MCP 301, server roundtrip 2; client+server tsc clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-24 16:16:30 +03:00
parent 17e683a311
commit a0cc625dfe
11 changed files with 389 additions and 166 deletions

View File

@@ -10,6 +10,7 @@ import { JSDOM } from "jsdom";
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
import { withPageLock } from "./page-lock.js";
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
import { lexFootnoteLines } from "./footnote-lex.js";
import { summarizeChange } from "./diff.js";
/**
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
@@ -280,7 +281,8 @@ function bridgeTaskLists(html) {
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
// definition lines are collected into a single <section data-footnotes>.
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
// Definition detection + fence handling are shared with analyzeFootnotes via
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
function escapeFootnoteAttr(value) {
return String(value).replace(/&/g, "&amp;").replace(/"/g, "&quot;");
@@ -308,28 +310,17 @@ marked.use({ extensions: [footnoteRefMarkedExtension] });
* <section data-footnotes> for them (or "" when there are none).
*/
function extractFootnotes(markdown) {
const lines = markdown.split("\n");
const bodyLines = [];
const defs = [];
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
// block is preserved verbatim and not treated as a footnote definition.
let fence = null;
for (const line of lines) {
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null)
fence = marker;
else if (marker === fence)
fence = null;
bodyLines.push(line);
continue;
}
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
if (m)
defs.push({ id: m[1], text: m[2] });
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
// block is inert and stays in the body verbatim; only real definition lines
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
// match exactly what import keeps/strips (#166).
for (const tok of lexFootnoteLines(markdown)) {
if (!tok.inFence && tok.definition)
defs.push(tok.definition);
else
bodyLines.push(line);
bodyLines.push(tok.line);
}
if (defs.length === 0)
return { body: markdown, section: "" };

View File

@@ -16,24 +16,11 @@
* the line, trimmed, starts with `|`) — footnotes in table cells often do not
* render as expected.
*/
/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */
const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */
const REF_RE_G = /\[\^([^\]\s]+)\]/g;
/** Opening/closing fence marker (``` or ~~~). */
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
function forEachReference(line, onRef) {
REF_RE_G.lastIndex = 0;
let m;
while ((m = REF_RE_G.exec(line)) !== null)
onRef(m[1]);
}
import { lexFootnoteLines, forEachFootnoteReference, } from "./footnote-lex.js";
/**
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
*/
export function analyzeFootnotes(markdown) {
const lines = markdown.split("\n");
// Distinct reference ids in first-appearance order, plus the set of ids seen
// inside a table row.
const refIds = [];
@@ -49,24 +36,13 @@ export function analyzeFootnotes(markdown) {
};
// Definition texts per id, in first-appearance order of the id.
const defTextsById = new Map();
let fence = null;
for (const line of lines) {
const fenceMatch = FENCE_RE.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null)
fence = marker;
else if (marker === fence)
fence = null;
// Same lexer the importer uses, so the analysis matches exactly what import
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
for (const tok of lexFootnoteLines(markdown)) {
if (tok.inFence)
continue;
}
// Footnote syntax shown inside a code fence is not real markup.
if (fence !== null)
continue;
const defM = DEF_RE.exec(line);
if (defM) {
const id = defM[1];
const text = defM[2];
if (tok.definition) {
const { id, text } = tok.definition;
const arr = defTextsById.get(id);
if (arr)
arr.push(text);
@@ -74,11 +50,11 @@ export function analyzeFootnotes(markdown) {
defTextsById.set(id, [text]);
// A definition's TEXT can itself reference another footnote (`[^a]: see
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
forEachReference(text, (rid) => addRef(rid, false));
forEachFootnoteReference(text, (rid) => addRef(rid, false));
continue;
}
const inTable = line.trimStart().startsWith("|");
forEachReference(line, (id) => addRef(id, inTable));
const inTable = tok.line.trimStart().startsWith("|");
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
}
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
const duplicateDefinitions = [];

View File

@@ -0,0 +1,55 @@
/**
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
*
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
* definition lines and rebuilds a footnotes section) and the diagnostics
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
* are definitions and which lines are inert (inside a code fence). Sharing one
* lexer makes "the analyzer sees what the importer leaves" a structural property
* instead of two hand-kept copies that can drift (#166 review).
*
* NOTE: this is deliberately NOT shared with editor-ext's
* `extractFootnoteDefinitions` — that lives in a different package and the
* decoupling between the editor and the MCP mirror is intentional.
*/
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
/** Opening/closing code fence marker (``` or ~~~). */
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
export function lexFootnoteLines(markdown) {
const out = [];
let fence = null;
for (const line of markdown.split("\n")) {
const fenceMatch = FENCE_RE.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null)
fence = marker; // opening fence
else if (marker === fence)
fence = null; // matching closing fence
out.push({ line, inFence: true, definition: null });
continue;
}
if (fence !== null) {
out.push({ line, inFence: true, definition: null });
continue;
}
const m = FOOTNOTE_DEF_RE.exec(line);
out.push({
line,
inFence: false,
definition: m ? { id: m[1], text: m[2] } : null,
});
}
return out;
}
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
export function forEachFootnoteReference(line, onRef) {
FOOTNOTE_REF_RE_G.lastIndex = 0;
let m;
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null)
onRef(m[1]);
}

View File

@@ -10,6 +10,7 @@ import { JSDOM } from "jsdom";
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
import { withPageLock } from "./page-lock.js";
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
import { lexFootnoteLines } from "./footnote-lex.js";
import { summarizeChange, VerifyReport } from "./diff.js";
/**
@@ -316,7 +317,8 @@ function bridgeTaskLists(html: string): string {
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
// definition lines are collected into a single <section data-footnotes>.
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
// Definition detection + fence handling are shared with analyzeFootnotes via
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
function escapeFootnoteAttr(value: string): string {
@@ -353,24 +355,15 @@ function extractFootnotes(markdown: string): {
body: string;
section: string;
} {
const lines = markdown.split("\n");
const bodyLines: string[] = [];
const defs: Array<{ id: string; text: string }> = [];
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
// block is preserved verbatim and not treated as a footnote definition.
let fence: string | null = null;
for (const line of lines) {
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null) fence = marker;
else if (marker === fence) fence = null;
bodyLines.push(line);
continue;
}
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
if (m) defs.push({ id: m[1], text: m[2] });
else bodyLines.push(line);
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
// block is inert and stays in the body verbatim; only real definition lines
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
// match exactly what import keeps/strips (#166).
for (const tok of lexFootnoteLines(markdown)) {
if (!tok.inFence && tok.definition) defs.push(tok.definition);
else bodyLines.push(tok.line);
}
if (defs.length === 0) return { body: markdown, section: "" };

View File

@@ -17,12 +17,10 @@
* render as expected.
*/
/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */
const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */
const REF_RE_G = /\[\^([^\]\s]+)\]/g;
/** Opening/closing fence marker (``` or ~~~). */
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
import {
lexFootnoteLines,
forEachFootnoteReference,
} from "./footnote-lex.js";
export interface FootnoteDiagnostics {
/** Reference ids (distinct, document order) with no matching definition. */
@@ -37,19 +35,10 @@ export interface FootnoteDiagnostics {
warnings: string[];
}
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
function forEachReference(line: string, onRef: (id: string) => void): void {
REF_RE_G.lastIndex = 0;
let m: RegExpExecArray | null;
while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]);
}
/**
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
*/
export function analyzeFootnotes(markdown: string): FootnoteDiagnostics {
const lines = markdown.split("\n");
// Distinct reference ids in first-appearance order, plus the set of ids seen
// inside a table row.
const refIds: string[] = [];
@@ -66,33 +55,22 @@ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics {
// Definition texts per id, in first-appearance order of the id.
const defTextsById = new Map<string, string[]>();
let fence: string | null = null;
for (const line of lines) {
const fenceMatch = FENCE_RE.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null) fence = marker;
else if (marker === fence) fence = null;
continue;
}
// Footnote syntax shown inside a code fence is not real markup.
if (fence !== null) continue;
const defM = DEF_RE.exec(line);
if (defM) {
const id = defM[1];
const text = defM[2];
// Same lexer the importer uses, so the analysis matches exactly what import
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
for (const tok of lexFootnoteLines(markdown)) {
if (tok.inFence) continue;
if (tok.definition) {
const { id, text } = tok.definition;
const arr = defTextsById.get(id);
if (arr) arr.push(text);
else defTextsById.set(id, [text]);
// A definition's TEXT can itself reference another footnote (`[^a]: see
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
forEachReference(text, (rid) => addRef(rid, false));
forEachFootnoteReference(text, (rid) => addRef(rid, false));
continue;
}
const inTable = line.trimStart().startsWith("|");
forEachReference(line, (id) => addRef(id, inTable));
const inTable = tok.line.trimStart().startsWith("|");
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
}
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));

View File

@@ -0,0 +1,71 @@
/**
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
*
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
* definition lines and rebuilds a footnotes section) and the diagnostics
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
* are definitions and which lines are inert (inside a code fence). Sharing one
* lexer makes "the analyzer sees what the importer leaves" a structural property
* instead of two hand-kept copies that can drift (#166 review).
*
* NOTE: this is deliberately NOT shared with editor-ext's
* `extractFootnoteDefinitions` — that lives in a different package and the
* decoupling between the editor and the MCP mirror is intentional.
*/
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
/** Opening/closing code fence marker (``` or ~~~). */
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
export interface FootnoteLine {
/** The raw line, verbatim. */
line: string;
/**
* True for a code-fence marker line AND every line inside a fence — footnote
* syntax on such lines is inert (example text, not real markup). The importer
* keeps these in the body; the analyzer skips them.
*/
inFence: boolean;
/** The parsed definition, when this is a `[^id]: text` line OUTSIDE any fence. */
definition: { id: string; text: string } | null;
}
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
export function lexFootnoteLines(markdown: string): FootnoteLine[] {
const out: FootnoteLine[] = [];
let fence: string | null = null;
for (const line of markdown.split("\n")) {
const fenceMatch = FENCE_RE.exec(line);
if (fenceMatch) {
const marker = fenceMatch[2][0];
if (fence === null) fence = marker; // opening fence
else if (marker === fence) fence = null; // matching closing fence
out.push({ line, inFence: true, definition: null });
continue;
}
if (fence !== null) {
out.push({ line, inFence: true, definition: null });
continue;
}
const m = FOOTNOTE_DEF_RE.exec(line);
out.push({
line,
inFence: false,
definition: m ? { id: m[1], text: m[2] } : null,
});
}
return out;
}
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
export function forEachFootnoteReference(
line: string,
onRef: (id: string) => void,
): void {
FOOTNOTE_REF_RE_G.lastIndex = 0;
let m: RegExpExecArray | null;
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) onRef(m[1]);
}

View File

@@ -0,0 +1,110 @@
// Mock-HTTP test for the footnoteWarnings plumbing (#166). createPage is the
// representative path that is fully plain-HTTP (import + getPage) and so is
// mockable here; updatePage / importPageMarkdown attach footnoteWarnings with the
// IDENTICAL wiring (`analyzeFootnotes(...)` + spread-when-non-empty) but run their
// mutation over the Hocuspocus collab WebSocket, which this plain-HTTP harness
// does not stand up. The analyzer itself is unit-tested in footnote-analyze.test.
import { test, after } from "node:test";
import assert from "node:assert/strict";
import http from "node:http";
import { DocmostClient } from "../../build/client.js";
function readBody(req) {
return new Promise((resolve) => {
let raw = "";
req.on("data", (c) => (raw += c));
req.on("end", () => resolve(raw));
});
}
function sendJson(res, status, obj, extraHeaders = {}) {
res.writeHead(status, { "Content-Type": "application/json", ...extraHeaders });
res.end(JSON.stringify(obj));
}
const openServers = [];
function spawn(handler) {
return new Promise((resolve) => {
const server = http.createServer(handler);
openServers.push(server);
server.listen(0, "127.0.0.1", () => {
const { port } = server.address();
resolve(`http://127.0.0.1:${port}/api`);
});
});
}
after(async () => {
await Promise.all(
openServers.map((s) => new Promise((r) => s.close(r))),
);
});
// A handler that imports a page, lets getPage read it back, and 404s everything
// else (listSidebarPages fails gracefully inside getPage).
function pageHandler() {
return async (req, res) => {
await readBody(req);
if (req.url === "/api/auth/login") {
sendJson(res, 200, { success: true }, {
"Set-Cookie": "authToken=t; Path=/; HttpOnly",
});
return;
}
if (req.url === "/api/pages/import") {
sendJson(res, 200, { data: { id: "new-1" } });
return;
}
if (req.url === "/api/pages/update") {
// The title-restore step after import.
sendJson(res, 200, { data: { id: "new-1" } });
return;
}
if (req.url === "/api/pages/info") {
sendJson(res, 200, {
data: {
id: "new-1",
slugId: "slug-1",
title: "T",
spaceId: "sp-1",
content: { type: "doc", content: [] },
},
});
return;
}
sendJson(res, 404, { message: "not found" });
};
}
test("createPage attaches footnoteWarnings when the content has footnote problems", async () => {
const baseURL = await spawn(pageHandler());
const client = new DocmostClient(baseURL, "user@example.com", "pw");
// A dangling reference + a duplicate definition + a table marker.
const content = [
"Intro[^missing] and| cell[^t] |.",
"",
"[^d]: one",
"[^d]: two",
"[^t]: in table",
].join("\n");
const result = await client.createPage("T", content, "sp-1");
assert.ok(Array.isArray(result.footnoteWarnings), "footnoteWarnings present");
const joined = result.footnoteWarnings.join("\n");
assert.match(joined, /no matching definition/); // dangling [^missing]
assert.match(joined, /defined more than once/); // duplicate [^d]
// The page itself is still returned.
assert.equal(result.success, true);
});
test("createPage omits footnoteWarnings when the content is clean", async () => {
const baseURL = await spawn(pageHandler());
const client = new DocmostClient(baseURL, "user@example.com", "pw");
const content = ["A[^a] and reuse[^a].", "", "[^a]: fine"].join("\n");
const result = await client.createPage("T", content, "sp-1");
assert.equal(
"footnoteWarnings" in result,
false,
"no footnoteWarnings field on clean input",
);
assert.equal(result.success, true);
});