Compare commits
5 Commits
fix/ai-cha
...
fix/footno
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e8af13122 | ||
|
|
b9056e2bee | ||
|
|
a0cc625dfe | ||
|
|
17e683a311 | ||
|
|
13cac155c1 |
13
CHANGELOG.md
13
CHANGELOG.md
@@ -20,9 +20,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
`UPDATE users SET is_agent = true WHERE email = '<mcp-account>'`. Never flag a
|
`UPDATE users SET is_agent = true WHERE email = '<mcp-account>'`. Never flag a
|
||||||
human or shared account, or its normal edits get mis-attributed as AI. See the
|
human or shared account, or its normal edits get mis-attributed as AI. See the
|
||||||
AI-agent block in `.env.example`. (#143)
|
AI-agent block in `.env.example`. (#143)
|
||||||
|
- **Footnote import diagnostics.** The MCP page-write tools (`create_page`,
|
||||||
|
`update_page`, `import_page_markdown`) now return a `footnoteWarnings` array
|
||||||
|
flagging dangling references, empty or duplicate definitions, and `[^id]`
|
||||||
|
markers inside table rows, so an agent can fix its own markup. The page is
|
||||||
|
still created; the field is omitted when there are no problems. (#166)
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
|
- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the
|
||||||
|
same id are ONE footnote — one number, one definition, several back-references
|
||||||
|
— instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are
|
||||||
|
first-wins on import (the rest are dropped and reported via `footnoteWarnings`),
|
||||||
|
and a reference with no definition yields a single empty footnote rather than
|
||||||
|
one per occurrence. This supersedes the 0.93.0 "survive duplicate-id
|
||||||
|
definitions" behavior for the import path. (#166)
|
||||||
|
|
||||||
- **Public share AI: default per-workspace hourly assistant cap lowered
|
- **Public share AI: default per-workspace hourly assistant cap lowered
|
||||||
300 → 100.** The limiter falls back to this default whenever
|
300 → 100.** The limiter falls back to this default whenever
|
||||||
`SHARE_AI_WORKSPACE_MAX_PER_HOUR` is unset, so a `0.93.0` deployment that
|
`SHARE_AI_WORKSPACE_MAX_PER_HOUR` is unset, so a `0.93.0` deployment that
|
||||||
|
|||||||
@@ -159,6 +159,9 @@ export class AiChatController {
|
|||||||
// we also drop it on response `finish` so it never lingers after the stream
|
// we also drop it on response `finish` so it never lingers after the stream
|
||||||
// completes normally (the AI SDK pipes the response fire-and-forget, so we
|
// completes normally (the AI SDK pipes the response fire-and-forget, so we
|
||||||
// cannot simply remove it once `stream()` returns).
|
// cannot simply remove it once `stream()` returns).
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: wall-clock at
|
||||||
|
// which a Safari disconnect is observed, measured from request receipt.
|
||||||
|
const reqStartedAt = Date.now();
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const onClose = (): void => {
|
const onClose = (): void => {
|
||||||
// A genuine disconnect leaves the response unfinished (unlike a normal
|
// A genuine disconnect leaves the response unfinished (unlike a normal
|
||||||
@@ -167,7 +170,8 @@ export class AiChatController {
|
|||||||
// so log it here before aborting the agent loop.
|
// so log it here before aborting the agent loop.
|
||||||
if (!res.raw.writableEnded) {
|
if (!res.raw.writableEnded) {
|
||||||
this.logger.warn(
|
this.logger.warn(
|
||||||
'AI chat stream: client disconnected before completion; aborting turn',
|
`AI chat stream: client disconnected before completion; aborting turn ` +
|
||||||
|
`(elapsed=${Date.now() - reqStartedAt}ms since request received)`,
|
||||||
);
|
);
|
||||||
controller.abort();
|
controller.abort();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -380,6 +380,15 @@ export class AiChatService {
|
|||||||
const capturedSteps: StepLike[] = [];
|
const capturedSteps: StepLike[] = [];
|
||||||
let inProgressText = '';
|
let inProgressText = '';
|
||||||
|
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
|
||||||
|
// first-chunk latency, the model-silent gap right before a disconnect, and
|
||||||
|
// how many SSE heartbeats were written, so a Safari drop can be classified
|
||||||
|
// (idle-gap vs hard wall-clock cap vs slow first chunk).
|
||||||
|
const streamStartedAt = Date.now();
|
||||||
|
let firstModelChunkAt: number | undefined;
|
||||||
|
let lastModelChunkAt = streamStartedAt;
|
||||||
|
let heartbeatsSent = 0;
|
||||||
|
|
||||||
// NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous
|
// NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous
|
||||||
// failure here (or in pipe below) would skip the terminal callbacks, so the
|
// failure here (or in pipe below) would skip the terminal callbacks, so the
|
||||||
// catch releases the leased external clients to avoid a connection leak.
|
// catch releases the leased external clients to avoid a connection leak.
|
||||||
@@ -404,6 +413,12 @@ export class AiChatService {
|
|||||||
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
|
prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system),
|
||||||
abortSignal: signal,
|
abortSignal: signal,
|
||||||
onChunk: ({ chunk }) => {
|
onChunk: ({ chunk }) => {
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model
|
||||||
|
// output chunk means the stream is actively emitting bytes; track first
|
||||||
|
// + most-recent activity timestamps.
|
||||||
|
const now = Date.now();
|
||||||
|
firstModelChunkAt ??= now;
|
||||||
|
lastModelChunkAt = now;
|
||||||
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
|
// 'text-delta' is the assistant's prose; tool-call args are separate chunk
|
||||||
// types — so this mirrors exactly what streams to the client.
|
// types — so this mirrors exactly what streams to the client.
|
||||||
if (chunk.type === 'text-delta') inProgressText += chunk.text;
|
if (chunk.type === 'text-delta') inProgressText += chunk.text;
|
||||||
@@ -415,6 +430,14 @@ export class AiChatService {
|
|||||||
inProgressText = '';
|
inProgressText = '';
|
||||||
},
|
},
|
||||||
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
|
||||||
|
// baseline for Safari comparison.
|
||||||
|
const diagNow = Date.now();
|
||||||
|
this.logger.log(
|
||||||
|
`AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||||
|
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||||
|
`heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
|
||||||
|
);
|
||||||
await persistAssistant({
|
await persistAssistant({
|
||||||
text,
|
text,
|
||||||
toolCalls: serializeSteps(steps),
|
toolCalls: serializeSteps(steps),
|
||||||
@@ -464,6 +487,14 @@ export class AiChatService {
|
|||||||
const e = error as { stack?: string };
|
const e = error as { stack?: string };
|
||||||
const errorText = describeProviderError(error, String(error));
|
const errorText = describeProviderError(error, String(error));
|
||||||
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
|
this.logger.error(`AI chat stream error: ${errorText}`, e?.stack);
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of
|
||||||
|
// an error-terminated stream.
|
||||||
|
const diagNow = Date.now();
|
||||||
|
this.logger.warn(
|
||||||
|
`AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||||
|
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||||
|
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
|
||||||
|
);
|
||||||
// Persist the PARTIAL answer streamed before the failure (text + any
|
// Persist the PARTIAL answer streamed before the failure (text + any
|
||||||
// finished tool steps) WITH the error in metadata, so the turn shows what
|
// finished tool steps) WITH the error in metadata, so the turn shows what
|
||||||
// the user already saw plus the cause — not just a bare error.
|
// the user already saw plus the cause — not just a bare error.
|
||||||
@@ -488,6 +519,15 @@ export class AiChatService {
|
|||||||
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
|
`AI chat stream aborted (chat ${chatId}) after ${steps.length} ` +
|
||||||
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
|
`step(s), ${partialChars} chars partial text; persisting partial turn.`,
|
||||||
);
|
);
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key
|
||||||
|
// line — classifies the Safari drop.
|
||||||
|
const diagNow = Date.now();
|
||||||
|
this.logger.warn(
|
||||||
|
`AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` +
|
||||||
|
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||||
|
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
|
||||||
|
`steps=${steps.length}`,
|
||||||
|
);
|
||||||
await persistAssistant(
|
await persistAssistant(
|
||||||
buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
|
buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'),
|
||||||
);
|
);
|
||||||
@@ -566,7 +606,11 @@ export class AiChatService {
|
|||||||
// headers are sent, and is guarded for response-likes that lack it.
|
// headers are sent, and is guarded for response-likes that lack it.
|
||||||
res.raw.flushHeaders?.();
|
res.raw.flushHeaders?.();
|
||||||
// Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout).
|
// Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout).
|
||||||
startSseHeartbeat(res.raw);
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: count beats so a disconnect log can show
|
||||||
|
// how many pings were written before Safari dropped.
|
||||||
|
startSseHeartbeat(res.raw, 15_000, () => {
|
||||||
|
heartbeatsSent += 1;
|
||||||
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// Synchronous failure before/while wiring the stream: the terminal
|
// Synchronous failure before/while wiring the stream: the terminal
|
||||||
// callbacks will not run, so release the leased external clients here and
|
// callbacks will not run, so release the leased external clients here and
|
||||||
|
|||||||
@@ -28,15 +28,24 @@ import type { ServerResponse } from 'node:http';
|
|||||||
* the response finishes or the socket closes. The interval is unref()'d so it
|
* the response finishes or the socket closes. The interval is unref()'d so it
|
||||||
* never keeps the process alive, and writes are guarded so we never write to an
|
* never keeps the process alive, and writes are guarded so we never write to an
|
||||||
* already-ended/destroyed socket.
|
* already-ended/destroyed socket.
|
||||||
|
*
|
||||||
|
* `onBeat` is an OPTIONAL diagnostic hook invoked once after each heartbeat that
|
||||||
|
* was actually written (only when the write did not throw). It is purely for
|
||||||
|
* telemetry/counters and never affects the heartbeat behavior.
|
||||||
*/
|
*/
|
||||||
export function startSseHeartbeat(
|
export function startSseHeartbeat(
|
||||||
res: ServerResponse,
|
res: ServerResponse,
|
||||||
intervalMs = 15_000,
|
intervalMs = 15_000,
|
||||||
|
onBeat?: () => void,
|
||||||
): () => void {
|
): () => void {
|
||||||
const timer = setInterval(() => {
|
const timer = setInterval(() => {
|
||||||
if (res.writableEnded || res.destroyed) return;
|
if (res.writableEnded || res.destroyed) return;
|
||||||
try {
|
try {
|
||||||
res.write(': ping\n\n');
|
res.write(': ping\n\n');
|
||||||
|
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Notify the
|
||||||
|
// optional hook only after a successful write, so beat counters reflect
|
||||||
|
// pings that actually reached the socket.
|
||||||
|
onBeat?.();
|
||||||
} catch {
|
} catch {
|
||||||
// Socket vanished between the guard and the write; nothing to do.
|
// Socket vanished between the guard and the write; nothing to do.
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,10 +55,11 @@ describe("footnote markdown round-trip", () => {
|
|||||||
expect(html).not.toContain("data-footnote-def");
|
expect(html).not.toContain("data-footnote-def");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => {
|
it("extractFootnoteDefinitions keeps the FIRST duplicate definition and reuses markers", () => {
|
||||||
// Two definitions share id `d`, and the body has two `[^d]` markers. The
|
// Two definitions share id `d`, and the body has two `[^d]` markers. Under
|
||||||
// output must keep BOTH definitions with DISTINCT ids and rewrite the second
|
// the import model (#166) duplicate definition ids are FIRST-WINS: only the
|
||||||
// marker so the (reference, definition) pairing stays 1:1.
|
// first definition is kept; markers are NEVER rewritten, so the two `[^d]`
|
||||||
|
// references reuse the single footnote.
|
||||||
const md = [
|
const md = [
|
||||||
"See here[^d] and there[^d].",
|
"See here[^d] and there[^d].",
|
||||||
"",
|
"",
|
||||||
@@ -68,30 +69,23 @@ describe("footnote markdown round-trip", () => {
|
|||||||
|
|
||||||
const { body, section } = extractFootnoteDefinitions(md);
|
const { body, section } = extractFootnoteDefinitions(md);
|
||||||
|
|
||||||
// Pull out the def ids from the section in order.
|
|
||||||
const defIds = Array.from(
|
const defIds = Array.from(
|
||||||
section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
||||||
).map((m) => m[1]);
|
).map((m) => m[1]);
|
||||||
expect(defIds.length).toBe(2);
|
expect(defIds).toEqual(["d"]); // first-wins: one definition
|
||||||
expect(new Set(defIds).size).toBe(2); // distinct
|
|
||||||
expect(defIds[0]).toBe("d"); // first definition keeps the id
|
|
||||||
|
|
||||||
// Both definition texts survive.
|
|
||||||
expect(section).toContain("first");
|
expect(section).toContain("first");
|
||||||
expect(section).toContain("second");
|
expect(section).not.toContain("second"); // duplicate dropped
|
||||||
|
|
||||||
// The body still has two markers, now pointing at the two distinct ids.
|
// Both markers stay `[^d]` (reuse) — no `d__2` minting.
|
||||||
const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
|
const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
|
||||||
(m) => m[1],
|
(m) => m[1],
|
||||||
);
|
);
|
||||||
expect(refIds.length).toBe(2);
|
expect(refIds).toEqual(["d", "d"]);
|
||||||
expect(refIds.sort()).toEqual(defIds.sort());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => {
|
it("extractFootnoteDefinitions is DETERMINISTIC and stable (same input -> same output)", () => {
|
||||||
// The derived id must be a pure function of the input markdown so importing
|
// The output must be a pure function of the input markdown so importing the
|
||||||
// the same source twice (or via the editor and the MCP mirror) yields
|
// same source twice (or via the editor and the MCP mirror) is identical.
|
||||||
// identical ids — never random/time-based.
|
|
||||||
const md = [
|
const md = [
|
||||||
"See[^d] one[^d] two[^d].",
|
"See[^d] one[^d] two[^d].",
|
||||||
"",
|
"",
|
||||||
@@ -113,15 +107,13 @@ describe("footnote markdown round-trip", () => {
|
|||||||
|
|
||||||
const a = run();
|
const a = run();
|
||||||
const b = run();
|
const b = run();
|
||||||
// Identical across runs (this is what would FAIL on the random-id version).
|
expect(a).toEqual(b);
|
||||||
expect(a.defIds).toEqual(b.defIds);
|
// First-wins: one kept definition `d`; all three reuse markers stay `d`.
|
||||||
expect(a.refIds).toEqual(b.refIds);
|
expect(a.defIds).toEqual(["d"]);
|
||||||
// Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
|
expect(a.refIds).toEqual(["d", "d", "d"]);
|
||||||
expect(a.defIds).toEqual(["d", "d__2", "d__3"]);
|
|
||||||
expect(a.refIds.sort()).toEqual(a.defIds.sort());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => {
|
it("markdownToHtml with a reused id renders ONE shared footnote def", async () => {
|
||||||
const md = [
|
const md = [
|
||||||
"See here[^d] and there[^d].",
|
"See here[^d] and there[^d].",
|
||||||
"",
|
"",
|
||||||
@@ -132,9 +124,8 @@ describe("footnote markdown round-trip", () => {
|
|||||||
const defIds = Array.from(
|
const defIds = Array.from(
|
||||||
html.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
html.matchAll(/data-footnote-def data-id="([^"]+)"/g),
|
||||||
).map((m) => m[1]);
|
).map((m) => m[1]);
|
||||||
expect(defIds.length).toBe(2);
|
expect(defIds).toEqual(["d"]); // one shared definition
|
||||||
expect(new Set(defIds).size).toBe(2);
|
|
||||||
expect(html).toContain("first");
|
expect(html).toContain("first");
|
||||||
expect(html).toContain("second");
|
expect(html).not.toContain("second");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
226
packages/editor-ext/src/lib/footnote/footnote-paste.test.ts
Normal file
226
packages/editor-ext/src/lib/footnote/footnote-paste.test.ts
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { Editor } from "@tiptap/core";
|
||||||
|
import { Document } from "@tiptap/extension-document";
|
||||||
|
import { Paragraph } from "@tiptap/extension-paragraph";
|
||||||
|
import { Text } from "@tiptap/extension-text";
|
||||||
|
import { Node as PMNode, Fragment, Slice } from "@tiptap/pm/model";
|
||||||
|
import { FootnoteReference } from "./footnote-reference";
|
||||||
|
import { FootnotesList } from "./footnotes-list";
|
||||||
|
import { FootnoteDefinition } from "./footnote-definition";
|
||||||
|
import { footnotePastePlugin } from "./footnote-sync";
|
||||||
|
import {
|
||||||
|
FOOTNOTE_REFERENCE_NAME,
|
||||||
|
FOOTNOTE_DEFINITION_NAME,
|
||||||
|
FOOTNOTES_LIST_NAME,
|
||||||
|
} from "./footnote-util";
|
||||||
|
|
||||||
|
// transformPasted reuse semantics (#166): a pasted reference to an id that
|
||||||
|
// already exists must KEEP the id (reuse → resolves to the existing footnote);
|
||||||
|
// only a pasted DEFINITION that collides is re-id'd (it would otherwise clobber
|
||||||
|
// the existing definition's text), and its paired references follow it.
|
||||||
|
|
||||||
|
const extensions = [
|
||||||
|
Document,
|
||||||
|
Paragraph,
|
||||||
|
Text,
|
||||||
|
FootnoteReference,
|
||||||
|
FootnotesList,
|
||||||
|
FootnoteDefinition,
|
||||||
|
];
|
||||||
|
|
||||||
|
/** An editor whose doc already contains footnote "a" (ref + definition). */
|
||||||
|
function makeEditorWithFootnoteA() {
|
||||||
|
return new Editor({
|
||||||
|
extensions,
|
||||||
|
content: {
|
||||||
|
type: "doc",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "paragraph",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "x" },
|
||||||
|
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: FOOTNOTES_LIST_NAME,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: FOOTNOTE_DEFINITION_NAME,
|
||||||
|
attrs: { id: "a" },
|
||||||
|
content: [
|
||||||
|
{ type: "paragraph", content: [{ type: "text", text: "note A" }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Run footnotePastePlugin's transformPasted against the editor's current doc. */
|
||||||
|
function paste(editor: Editor, slice: Slice): Slice {
|
||||||
|
const plugin = footnotePastePlugin();
|
||||||
|
return plugin.props!.transformPasted!(slice, editor.view);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Collect the ids of footnote refs/defs in a slice, in order (single DFS). */
|
||||||
|
function sliceFootnoteIds(slice: Slice): Array<{ kind: string; id: string }> {
|
||||||
|
const out: Array<{ kind: string; id: string }> = [];
|
||||||
|
const walk = (frag: Fragment) => {
|
||||||
|
frag.forEach((node: PMNode) => {
|
||||||
|
if (node.type.name === FOOTNOTE_REFERENCE_NAME)
|
||||||
|
out.push({ kind: "ref", id: node.attrs.id });
|
||||||
|
if (node.type.name === FOOTNOTE_DEFINITION_NAME)
|
||||||
|
out.push({ kind: "def", id: node.attrs.id });
|
||||||
|
walk(node.content);
|
||||||
|
});
|
||||||
|
};
|
||||||
|
walk(slice.content);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("footnotePastePlugin — reuse-aware id remap", () => {
|
||||||
|
it("keeps a pasted lone reference to an existing id (reuse, no remap)", () => {
|
||||||
|
const editor = makeEditorWithFootnoteA();
|
||||||
|
const { schema } = editor;
|
||||||
|
// Paste: a paragraph containing only a reference to the existing id "a".
|
||||||
|
const slice = new Slice(
|
||||||
|
Fragment.from(
|
||||||
|
schema.nodes.paragraph.create(null, [
|
||||||
|
schema.text("see "),
|
||||||
|
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }),
|
||||||
|
]),
|
||||||
|
),
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
const out = paste(editor, slice);
|
||||||
|
// The reference keeps id "a" so it reuses the existing footnote.
|
||||||
|
expect(sliceFootnoteIds(out)).toEqual([{ kind: "ref", id: "a" }]);
|
||||||
|
editor.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("re-ids a pasted DEFINITION (and its paired reference) that collides", () => {
|
||||||
|
const editor = makeEditorWithFootnoteA();
|
||||||
|
const { schema } = editor;
|
||||||
|
// Paste: a reference AND a definition both carrying the existing id "a". The
|
||||||
|
// definition would clobber the existing one, so both are remapped together.
|
||||||
|
const slice = new Slice(
|
||||||
|
Fragment.fromArray([
|
||||||
|
schema.nodes.paragraph.create(null, [
|
||||||
|
schema.text("dup "),
|
||||||
|
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }),
|
||||||
|
]),
|
||||||
|
schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
|
||||||
|
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [
|
||||||
|
schema.nodes.paragraph.create(null, [schema.text("pasted note")]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
const out = paste(editor, slice);
|
||||||
|
const ids = sliceFootnoteIds(out);
|
||||||
|
// Both the pasted ref and def were remapped to the SAME fresh id (paired),
|
||||||
|
// and it is the deterministic derived id (not "a").
|
||||||
|
const remappedIds = new Set(ids.map((x) => x.id));
|
||||||
|
expect(remappedIds.size).toBe(1);
|
||||||
|
expect(remappedIds.has("a")).toBe(false);
|
||||||
|
expect([...remappedIds][0]).toBe("a__2");
|
||||||
|
editor.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("re-ids TWO colliding pasted definitions to DISTINCT ids (reservation works)", () => {
|
||||||
|
// Existing doc has footnotes "a" and "b". Paste a slice that defines BOTH —
|
||||||
|
// each must get its own fresh id; the reservation (existing.add(newId)) keeps
|
||||||
|
// the second from deriving onto the first's new id.
|
||||||
|
const editor = new Editor({
|
||||||
|
extensions,
|
||||||
|
content: {
|
||||||
|
type: "doc",
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: "paragraph",
|
||||||
|
content: [
|
||||||
|
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } },
|
||||||
|
{ type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "b" } },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: FOOTNOTES_LIST_NAME,
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: FOOTNOTE_DEFINITION_NAME,
|
||||||
|
attrs: { id: "a" },
|
||||||
|
content: [{ type: "paragraph", content: [{ type: "text", text: "A" }] }],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
type: FOOTNOTE_DEFINITION_NAME,
|
||||||
|
attrs: { id: "b" },
|
||||||
|
content: [{ type: "paragraph", content: [{ type: "text", text: "B" }] }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const { schema } = editor;
|
||||||
|
const slice = new Slice(
|
||||||
|
Fragment.fromArray([
|
||||||
|
schema.nodes.paragraph.create(null, [
|
||||||
|
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }),
|
||||||
|
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }),
|
||||||
|
]),
|
||||||
|
schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
|
||||||
|
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [
|
||||||
|
schema.nodes.paragraph.create(null, [schema.text("pasted A")]),
|
||||||
|
]),
|
||||||
|
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [
|
||||||
|
schema.nodes.paragraph.create(null, [schema.text("pasted B")]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
const out = paste(editor, slice);
|
||||||
|
const ids = sliceFootnoteIds(out);
|
||||||
|
const distinct = new Set(ids.map((x) => x.id));
|
||||||
|
// Two ids, both remapped off the originals, and distinct from each other.
|
||||||
|
expect(distinct.size).toBe(2);
|
||||||
|
expect(distinct.has("a")).toBe(false);
|
||||||
|
expect(distinct.has("b")).toBe(false);
|
||||||
|
expect([...distinct].sort()).toEqual(["a__2", "b__2"]);
|
||||||
|
editor.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("leaves the slice untouched when no pasted definition collides", () => {
|
||||||
|
const editor = makeEditorWithFootnoteA();
|
||||||
|
const { schema } = editor;
|
||||||
|
// A pasted reference+definition for a BRAND-NEW id "b" — no collision.
|
||||||
|
const slice = new Slice(
|
||||||
|
Fragment.fromArray([
|
||||||
|
schema.nodes.paragraph.create(null, [
|
||||||
|
schema.text("new "),
|
||||||
|
schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }),
|
||||||
|
]),
|
||||||
|
schema.nodes[FOOTNOTES_LIST_NAME].create(null, [
|
||||||
|
schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [
|
||||||
|
schema.nodes.paragraph.create(null, [schema.text("note B")]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
]),
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
const out = paste(editor, slice);
|
||||||
|
expect(sliceFootnoteIds(out)).toEqual([
|
||||||
|
{ kind: "ref", id: "b" },
|
||||||
|
{ kind: "def", id: "b" },
|
||||||
|
]);
|
||||||
|
editor.destroy();
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -29,9 +29,9 @@ interface DefOccurrence {
|
|||||||
|
|
||||||
interface FootnoteScan {
|
interface FootnoteScan {
|
||||||
/**
|
/**
|
||||||
* Every reference occurrence in document order (NOT de-duplicated). Needed so
|
* Every reference occurrence in document order (NOT de-duplicated). Repeated
|
||||||
* that duplicate ids — which would otherwise be silently collapsed — can be
|
* ids are kept so the FIRST appearance fixes definition order; later repeats
|
||||||
* detected and (together with their definitions) re-id'd instead of dropped.
|
* are reuse (same footnote) and are never re-id'd.
|
||||||
*/
|
*/
|
||||||
refOccurrences: RefOccurrence[];
|
refOccurrences: RefOccurrence[];
|
||||||
/**
|
/**
|
||||||
@@ -67,77 +67,66 @@ function scan(doc: ProseMirrorNode): FootnoteScan {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the
|
* Result of resolving the footnote id topology: the distinct reference order and
|
||||||
* concrete reference re-id edits that must be applied to the body so the doc no
|
* one definition node per id.
|
||||||
* longer contains two footnotes sharing a single id.
|
|
||||||
*
|
*
|
||||||
* The overriding invariant is that NO definition is ever dropped here: every
|
* References are NEVER re-id'd here — repeated ids are REUSE (one footnote). Only
|
||||||
* definition occurrence ends up with a unique id and therefore survives the
|
* duplicate DEFINITIONS are re-id'd; lacking a matching reference, a re-id'd
|
||||||
* canonical rebuild. Duplicate references are likewise re-id'd (and paired with
|
* duplicate is then dropped by the orphan policy. No definition is ever dropped
|
||||||
* a duplicate definition when one exists) so importing/pasting `[^d]` twice with
|
* for COLLIDING — only for being an orphan.
|
||||||
* two `[^d]:` definitions yields TWO distinct footnotes rather than one.
|
|
||||||
*/
|
*/
|
||||||
interface CollisionPlan {
|
interface CollisionPlan {
|
||||||
/**
|
/**
|
||||||
* Reference ids in document order, de-duplicated AFTER re-id. This is the
|
* Distinct reference ids in document order (first appearance). Repeated ids
|
||||||
* source of truth for definition order/numbering, exactly as before — only
|
* are reuse and collapse to a single entry. Source of truth for definition
|
||||||
* now collisions have been resolved so it no longer hides duplicates.
|
* order/numbering.
|
||||||
*/
|
*/
|
||||||
referenceIds: string[];
|
referenceIds: string[];
|
||||||
/** id -> definition node, after duplicates were re-id'd. One entry per id. */
|
/** id -> definition node, after duplicate definitions were re-id'd. One per id. */
|
||||||
definitions: Map<string, ProseMirrorNode>;
|
definitions: Map<string, ProseMirrorNode>;
|
||||||
/**
|
/** True when a duplicate definition required a re-id. */
|
||||||
* Body reference re-id edits to apply (position of a reference node -> the
|
|
||||||
* fresh id it must carry). Empty when there are no colliding references.
|
|
||||||
*/
|
|
||||||
refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>;
|
|
||||||
/** True when any collision required a re-id (refs and/or defs). */
|
|
||||||
changed: boolean;
|
changed: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolve duplicate-id collisions among references and definitions WITHOUT ever
|
* Resolve the footnote id topology WITHOUT ever dropping a definition.
|
||||||
* dropping a definition.
|
|
||||||
*
|
*
|
||||||
* Strategy:
|
* Reference REUSE (Pandoc semantics, #166): repeated `[^a]` references that share
|
||||||
* - Walk references in document order. The FIRST reference for an id keeps it.
|
* an id are the SAME footnote — they get one number and one definition and are
|
||||||
* Any later reference sharing that id is a duplicate and gets a fresh unique
|
* NEVER re-id'd. So the reference walk only records the FIRST occurrence of each
|
||||||
* id; if a still-unclaimed duplicate definition with the original id exists,
|
* id (de-duplicating in document order); later occurrences are reuse and produce
|
||||||
* it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched.
|
* no mutation at all.
|
||||||
* - Walk definitions in document order. The FIRST definition for an id keeps
|
|
||||||
* it; later duplicates that were not already claimed by a duplicate reference
|
|
||||||
* get their own fresh unique id (surviving as a distinct footnote/orphan).
|
|
||||||
*
|
*
|
||||||
* Re-id determinism: every fresh id is DERIVED from document state via
|
* Duplicate DEFINITIONS (two `[^d]:` nodes sharing an id reaching the LIVE editor
|
||||||
* deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids
|
* via paste/collab merge) keep the never-lose policy: the first keeps the id, and
|
||||||
* already present) — NEVER random/time-based. Because the sync plugin runs
|
* each later duplicate is re-id'd to a DETERMINISTIC fresh id (deriveFootnoteId:
|
||||||
* identically on every collaborating client, a deterministic re-id is the only
|
* `X__2`, `X__3`, collision-bumped) so it survives as a distinct footnote — which,
|
||||||
* way they can converge on the SAME ids; a random id (the previous
|
* having no matching reference, then falls under the normal orphan policy. It is
|
||||||
* implementation) made two clients editing the same duplicate-id document mint
|
* only ever dropped for lacking a reference, never for colliding. The IMPORT
|
||||||
* DIFFERENT ids for the same duplicate, causing permanent Yjs divergence.
|
* paths (footnote.marked.ts / MCP extractFootnotes) instead apply first-wins +
|
||||||
|
* drop + warn for duplicate definitions; that divergence is intentional — import
|
||||||
|
* is an agent-authored artifact we sanitize, the editor is live user data we must
|
||||||
|
* not lose.
|
||||||
|
*
|
||||||
|
* Re-id determinism: every fresh id is DERIVED from document state, NEVER
|
||||||
|
* random/time-based, because the sync plugin runs identically on every
|
||||||
|
* collaborating client and a random id would make two clients mint DIFFERENT ids
|
||||||
|
* for the same duplicate, causing permanent Yjs divergence.
|
||||||
*/
|
*/
|
||||||
function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
||||||
const definitions = new Map<string, ProseMirrorNode>();
|
const definitions = new Map<string, ProseMirrorNode>();
|
||||||
const refReids: Array<{
|
|
||||||
pos: number;
|
|
||||||
node: ProseMirrorNode;
|
|
||||||
newId: string;
|
|
||||||
}> = [];
|
|
||||||
const referenceIds: string[] = [];
|
const referenceIds: string[] = [];
|
||||||
const seenRefIds = new Set<string>();
|
const seenRefIds = new Set<string>();
|
||||||
let changed = false;
|
let changed = false;
|
||||||
|
|
||||||
// `taken` is the set of every id that must be avoided when minting a derived
|
// `taken` is the set of every id to avoid when minting a derived id for a
|
||||||
// id: all original reference + definition ids in the document PLUS every id we
|
// duplicate definition: all original reference + definition ids PLUS every id
|
||||||
// mint during this pass. It is pure document state, so the derivation stays
|
// minted in this pass. Pure document state, so the derivation is deterministic
|
||||||
// deterministic across clients. Per-original occurrence counters make the k-th
|
// across clients.
|
||||||
// duplicate of `X` deterministically become `X__2`, `X__3`, ...
|
|
||||||
const taken = new Set<string>();
|
const taken = new Set<string>();
|
||||||
for (const occ of scan.refOccurrences) taken.add(occ.id);
|
for (const occ of scan.refOccurrences) taken.add(occ.id);
|
||||||
for (const occ of scan.defOccurrences) taken.add(occ.id);
|
for (const occ of scan.defOccurrences) taken.add(occ.id);
|
||||||
const occurrenceOf = new Map<string, number>();
|
const occurrenceOf = new Map<string, number>();
|
||||||
// Mint a deterministic unique id for a duplicate of `originalId`. The first
|
|
||||||
// duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ...
|
|
||||||
const mintId = (originalId: string): string => {
|
const mintId = (originalId: string): string => {
|
||||||
const next = (occurrenceOf.get(originalId) ?? 1) + 1;
|
const next = (occurrenceOf.get(originalId) ?? 1) + 1;
|
||||||
occurrenceOf.set(originalId, next);
|
occurrenceOf.set(originalId, next);
|
||||||
@@ -146,70 +135,30 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
|||||||
return id;
|
return id;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Bucket definition occurrences by their original id so a duplicate reference
|
// References: record each DISTINCT id once, in first-appearance order. Repeated
|
||||||
// can claim a matching (as-yet-unclaimed) duplicate definition and re-id the
|
// ids are reuse — nothing to mint, nothing to re-id.
|
||||||
// pair together. defByOriginalId[id] is consumed front-to-back.
|
|
||||||
const defByOriginalId = new Map<string, DefOccurrence[]>();
|
|
||||||
for (const occ of scan.defOccurrences) {
|
|
||||||
const arr = defByOriginalId.get(occ.id);
|
|
||||||
if (arr) arr.push(occ);
|
|
||||||
else defByOriginalId.set(occ.id, [occ]);
|
|
||||||
}
|
|
||||||
// The FIRST definition for each id is the canonical keeper of that id.
|
|
||||||
const claimed = new Set<DefOccurrence>();
|
|
||||||
|
|
||||||
for (const ref of scan.refOccurrences) {
|
for (const ref of scan.refOccurrences) {
|
||||||
if (!seenRefIds.has(ref.id)) {
|
if (!seenRefIds.has(ref.id)) {
|
||||||
// First reference with this id keeps it.
|
|
||||||
seenRefIds.add(ref.id);
|
seenRefIds.add(ref.id);
|
||||||
referenceIds.push(ref.id);
|
referenceIds.push(ref.id);
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Duplicate reference: assign a deterministic derived id. Pair it with the
|
|
||||||
// next unclaimed duplicate definition (NOT the first keeper) carrying the
|
|
||||||
// same original id, if one exists, so the (ref, def) pairing is preserved
|
|
||||||
// 1:1.
|
|
||||||
const newId = mintId(ref.id);
|
|
||||||
refReids.push({ pos: ref.pos, node: ref.node, newId });
|
|
||||||
seenRefIds.add(newId);
|
|
||||||
referenceIds.push(newId);
|
|
||||||
changed = true;
|
|
||||||
|
|
||||||
const candidates = defByOriginalId.get(ref.id) ?? [];
|
|
||||||
// Skip the first occurrence (it keeps the original id); pick the first
|
|
||||||
// duplicate not already claimed.
|
|
||||||
for (let i = 1; i < candidates.length; i++) {
|
|
||||||
const cand = candidates[i];
|
|
||||||
if (!claimed.has(cand)) {
|
|
||||||
claimed.add(cand);
|
|
||||||
definitions.set(newId, cand.node);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now place every definition under a unique id. The first occurrence of each
|
// Definitions: the first occurrence of each id keeps it; a later duplicate is
|
||||||
// original id keeps it; remaining duplicates either were paired with a
|
// re-id'd deterministically so it is never silently dropped (never-lose).
|
||||||
// duplicate reference above (already placed) or get a fresh standalone id.
|
|
||||||
const seenDefIds = new Set<string>();
|
const seenDefIds = new Set<string>();
|
||||||
for (const occ of scan.defOccurrences) {
|
for (const occ of scan.defOccurrences) {
|
||||||
if (claimed.has(occ)) continue; // already placed against a duplicate ref id
|
|
||||||
if (!seenDefIds.has(occ.id)) {
|
if (!seenDefIds.has(occ.id)) {
|
||||||
seenDefIds.add(occ.id);
|
seenDefIds.add(occ.id);
|
||||||
definitions.set(occ.id, occ.node);
|
definitions.set(occ.id, occ.node);
|
||||||
} else {
|
} else {
|
||||||
// Duplicate definition with no duplicate reference to pair with: keep it
|
|
||||||
// with a deterministic derived id so it is NEVER silently dropped. (It
|
|
||||||
// becomes an orphan and is then subject to the normal orphan policy — but
|
|
||||||
// only ever because it has no matching reference, never because it
|
|
||||||
// collided.)
|
|
||||||
const newId = mintId(occ.id);
|
const newId = mintId(occ.id);
|
||||||
definitions.set(newId, occ.node);
|
definitions.set(newId, occ.node);
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return { referenceIds, definitions, refReids, changed };
|
return { referenceIds, definitions, changed };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -245,14 +194,13 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan {
|
|||||||
* ping-pong forever (list moved to end -> trailing paragraph appended -> list
|
* ping-pong forever (list moved to end -> trailing paragraph appended -> list
|
||||||
* no longer last -> moved again ...).
|
* no longer last -> moved again ...).
|
||||||
*
|
*
|
||||||
* Duplicate-id collisions (two references and/or two definitions sharing one
|
* The id topology is resolved up front by resolveCollisions() (#166): repeated
|
||||||
* id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a
|
* references sharing an id are REUSE — one footnote, never re-id'd — while a
|
||||||
* reference+definition pair) are resolved up front by resolveCollisions(): the
|
* duplicate DEFINITION (from pasting/duplicating a definition, or a collab merge)
|
||||||
* duplicates are re-id'd to fresh unique ids so BOTH survive as distinct
|
* is re-id'd to a fresh unique id. No footnoteDefinition is ever silently deleted
|
||||||
* footnotes. This guarantees the overriding invariant — no footnoteDefinition is
|
* by this automatic (addToHistory:false) transaction because of a COLLISION; a
|
||||||
* ever silently deleted by this automatic (addToHistory:false) transaction. A
|
|
||||||
* definition is only ever removed when it has NO matching reference (orphan
|
* definition is only ever removed when it has NO matching reference (orphan
|
||||||
* policy), never because its id collided with another.
|
* policy) — which is also what then drops a re-id'd duplicate definition.
|
||||||
*/
|
*/
|
||||||
export function footnoteSyncPlugin(
|
export function footnoteSyncPlugin(
|
||||||
isRemoteTransaction?: (tr: Transaction) => boolean,
|
isRemoteTransaction?: (tr: Transaction) => boolean,
|
||||||
@@ -283,18 +231,16 @@ export function footnoteSyncPlugin(
|
|||||||
|
|
||||||
const info = scan(doc);
|
const info = scan(doc);
|
||||||
|
|
||||||
// 0) Resolve duplicate-id collisions (two references and/or two
|
// 0) Resolve the id topology (#166): repeated references that share an id
|
||||||
// definitions sharing one id) by re-id'ing duplicates to fresh unique
|
// are REUSE — collapsed to one entry in `referenceIds`, never re-id'd —
|
||||||
// ids. This is the critical defense: the old last-wins Map silently
|
// while a duplicate DEFINITION is re-id'd to a fresh deterministic id
|
||||||
// dropped all but the last definition for a shared id; here EVERY
|
// (and, lacking a matching reference, removed by the orphan policy
|
||||||
// definition survives with a unique id, and duplicate references are
|
// below). No definition is dropped for COLLIDING, only for being orphan.
|
||||||
// paired with duplicate definitions so two same-id imports/pastes yield
|
|
||||||
// two distinct footnotes instead of one.
|
|
||||||
const plan = resolveCollisions(info);
|
const plan = resolveCollisions(info);
|
||||||
const referenceIds = plan.referenceIds;
|
const referenceIds = plan.referenceIds;
|
||||||
|
|
||||||
// The set of ids that must have a definition, in reference order (after
|
// The set of ids that must have a definition, in reference order.
|
||||||
// collision re-id). De-duplicated already by resolveCollisions.
|
// De-duplicated already by resolveCollisions.
|
||||||
const referenceIdSet = new Set(referenceIds);
|
const referenceIdSet = new Set(referenceIds);
|
||||||
|
|
||||||
// 1) For each definition occurrence, compute the id it should END UP with
|
// 1) For each definition occurrence, compute the id it should END UP with
|
||||||
@@ -397,21 +343,15 @@ export function footnoteSyncPlugin(
|
|||||||
|
|
||||||
// 6) Apply the targeted, minimal mutations in ONE transaction. We never
|
// 6) Apply the targeted, minimal mutations in ONE transaction. We never
|
||||||
// delete-and-recreate an unchanged definition subtree; we only:
|
// delete-and-recreate an unchanged definition subtree; we only:
|
||||||
// (a) re-id specific colliding references and definitions (attr-only),
|
// (a) re-id colliding definitions (attr-only),
|
||||||
// (b) delete genuine orphan definitions and extra/empty lists,
|
// (b) delete genuine orphan definitions and extra/empty lists,
|
||||||
// (c) insert genuinely-missing empty definitions and migrate defs out
|
// (c) insert genuinely-missing empty definitions and migrate defs out
|
||||||
// of extra lists into the primary list,
|
// of extra lists into the primary list,
|
||||||
// (d) create the primary list if references exist but none does yet.
|
// (d) create the primary list if references exist but none does yet.
|
||||||
|
// References are never re-id'd (reuse), so there is no reference edit.
|
||||||
const tr = newState.tr;
|
const tr = newState.tr;
|
||||||
|
|
||||||
// 6a) Re-id colliding references (inline atoms: attr-only, size-stable).
|
// 6a) Re-id colliding definitions IN PLACE (attr-only). This preserves the
|
||||||
for (const reid of plan.refReids) {
|
|
||||||
tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
|
|
||||||
...reid.node.attrs,
|
|
||||||
id: reid.newId,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
// 6b) Re-id colliding definitions IN PLACE (attr-only). This preserves the
|
|
||||||
// definition's content subtree — never delete+recreate it.
|
// definition's content subtree — never delete+recreate it.
|
||||||
for (const reid of defReidsToApply) {
|
for (const reid of defReidsToApply) {
|
||||||
tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
|
tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
|
||||||
@@ -546,13 +486,17 @@ export const footnotePastePluginKey = new PluginKey("footnotePaste");
|
|||||||
* Without this, pasting a reference+definition pair copied from elsewhere — or
|
* Without this, pasting a reference+definition pair copied from elsewhere — or
|
||||||
* duplicating one in place — would merge with (or clobber) the existing footnote
|
* duplicating one in place — would merge with (or clobber) the existing footnote
|
||||||
* of the same id. The schema-sync plugin already guarantees no definition is
|
* of the same id. The schema-sync plugin already guarantees no definition is
|
||||||
* ever silently deleted after the fact (it re-id's collisions), but regenerating
|
* ever silently deleted after the fact (it re-id's duplicate definitions), but
|
||||||
* at paste time keeps the pasted footnote cleanly separate from the start and
|
* regenerating at paste time keeps the pasted footnote cleanly separate from the
|
||||||
* avoids any transient merge.
|
* start and avoids any transient merge.
|
||||||
*
|
*
|
||||||
* Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is
|
* REUSE-aware (#166): only a colliding DEFINITION forces a remap. Pasting a lone
|
||||||
* not present elsewhere is left untouched (so it still resolves to its existing
|
* reference whose id already exists is REUSE — it must keep the id so it resolves
|
||||||
* definition).
|
* to the existing footnote (one number, shared definition). So we remap an id
|
||||||
|
* only when the pasted slice itself carries a `footnoteDefinition` for it (which
|
||||||
|
* would otherwise clobber the existing definition's text); the matching pasted
|
||||||
|
* references are remapped along with it to stay paired. A self-paste of just a
|
||||||
|
* reference is left untouched.
|
||||||
*/
|
*/
|
||||||
export function footnotePastePlugin(): Plugin {
|
export function footnotePastePlugin(): Plugin {
|
||||||
return new Plugin({
|
return new Plugin({
|
||||||
@@ -572,31 +516,35 @@ export function footnotePastePlugin(): Plugin {
|
|||||||
});
|
});
|
||||||
if (existing.size === 0) return slice;
|
if (existing.size === 0) return slice;
|
||||||
|
|
||||||
// Build a remap (old id -> fresh id) for every COLLIDING id found in the
|
// Ids the pasted slice DEFINES (carries a footnoteDefinition for). Only
|
||||||
// pasted slice, shared by references and definitions so a pasted pair
|
// these can clobber an existing footnote's text, so only these force a
|
||||||
// stays matched. A paste is a distinct local user action (not a
|
// remap; a pasted reference to an already-existing id is reuse and keeps
|
||||||
// shared-state convergence point), so determinism is not strictly
|
// its id.
|
||||||
// required here — but we derive the new id deterministically anyway
|
const sliceDefIds = new Set<string>();
|
||||||
// (deriveFootnoteId against the current doc's id set) for consistency
|
const collectDefIds = (node: ProseMirrorNode) => {
|
||||||
// with the sync/import paths and to keep Math.random off this code path.
|
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
|
||||||
const remap = new Map<string, string>();
|
|
||||||
const collectColliding = (node: ProseMirrorNode) => {
|
|
||||||
if (
|
|
||||||
node.type.name === FOOTNOTE_REFERENCE_NAME ||
|
|
||||||
node.type.name === FOOTNOTE_DEFINITION_NAME
|
|
||||||
) {
|
|
||||||
const id = node.attrs.id;
|
const id = node.attrs.id;
|
||||||
if (id && existing.has(id) && !remap.has(id)) {
|
if (id) sliceDefIds.add(id);
|
||||||
const newId = deriveFootnoteId(id, 2, existing);
|
|
||||||
remap.set(id, newId);
|
|
||||||
// Reserve it so a second colliding id deriving to the same base
|
|
||||||
// bumps instead of clashing.
|
|
||||||
existing.add(newId);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
node.descendants(collectColliding);
|
node.descendants(collectDefIds);
|
||||||
};
|
};
|
||||||
slice.content.descendants(collectColliding);
|
slice.content.descendants(collectDefIds);
|
||||||
|
|
||||||
|
// Build a remap (old id -> fresh id) for every colliding id the slice
|
||||||
|
// DEFINES, shared by references and definitions so a pasted pair stays
|
||||||
|
// matched. The new id is derived deterministically (deriveFootnoteId
|
||||||
|
// against the current doc's id set) for consistency with the sync/import
|
||||||
|
// paths and to keep Math.random off this code path.
|
||||||
|
const remap = new Map<string, string>();
|
||||||
|
for (const id of sliceDefIds) {
|
||||||
|
if (existing.has(id) && !remap.has(id)) {
|
||||||
|
const newId = deriveFootnoteId(id, 2, existing);
|
||||||
|
remap.set(id, newId);
|
||||||
|
// Reserve it so a second colliding id deriving to the same base
|
||||||
|
// bumps instead of clashing.
|
||||||
|
existing.add(newId);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (remap.size === 0) return slice;
|
if (remap.size === 0) return slice;
|
||||||
|
|
||||||
// Rewrite the colliding ids throughout the slice.
|
// Rewrite the colliding ids throughout the slice.
|
||||||
|
|||||||
@@ -4,16 +4,12 @@ import { deriveFootnoteId } from "./footnote-util";
|
|||||||
/**
|
/**
|
||||||
* GOLDEN TABLE for `deriveFootnoteId` (and its private alphabetic `suffix`).
|
* GOLDEN TABLE for `deriveFootnoteId` (and its private alphabetic `suffix`).
|
||||||
*
|
*
|
||||||
* deriveFootnoteId is DELIBERATELY duplicated in
|
* `deriveFootnoteId` lives ONLY in editor-ext now — it is used by
|
||||||
* packages/mcp/src/lib/collaboration.ts
|
* `resolveCollisions` (re-id of a duplicate definition) and `footnotePastePlugin`
|
||||||
* and the two copies MUST stay byte-for-byte equivalent in behavior so the same
|
* (re-id of a pasted colliding definition). The MCP/marked import paths no longer
|
||||||
* markdown imported through the editor and through the MCP path yields identical
|
* derive ids (duplicate definitions there are first-wins-dropped, #166), so there
|
||||||
* footnote ids. This table is the SHARED contract: the parity test
|
* is no cross-package copy and no parity test to keep in sync. This table pins the
|
||||||
* packages/mcp/test/unit/derive-id-parity.test.mjs
|
* deterministic scheme so a future change to it is a conscious one.
|
||||||
* pins the exact SAME (input -> expected) pairs against the COMPILED mcp build.
|
|
||||||
* If either copy drifts, one of the two tests goes red.
|
|
||||||
*
|
|
||||||
* Keep this constant in sync with GOLDEN in the mcp parity test.
|
|
||||||
*/
|
*/
|
||||||
export const DERIVE_GOLDEN: Array<{
|
export const DERIVE_GOLDEN: Array<{
|
||||||
originalId: string;
|
originalId: string;
|
||||||
@@ -56,7 +52,7 @@ function singleLetterSuffixes(): string[] {
|
|||||||
return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i));
|
return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i));
|
||||||
}
|
}
|
||||||
|
|
||||||
describe("deriveFootnoteId golden table (cross-package drift guard)", () => {
|
describe("deriveFootnoteId golden table (deterministic-scheme pin)", () => {
|
||||||
for (const row of DERIVE_GOLDEN) {
|
for (const row of DERIVE_GOLDEN) {
|
||||||
it(`derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) === "${row.expected}" — ${row.why}`, () => {
|
it(`derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) === "${row.expected}" — ${row.why}`, () => {
|
||||||
const got = deriveFootnoteId(
|
const got = deriveFootnoteId(
|
||||||
|
|||||||
@@ -62,10 +62,11 @@ export function generateFootnoteId(): string {
|
|||||||
* `taken` is consulted but NOT mutated here; the caller adds the returned id to
|
* `taken` is consulted but NOT mutated here; the caller adds the returned id to
|
||||||
* its own seen-set before requesting the next derived id.
|
* its own seen-set before requesting the next derived id.
|
||||||
*
|
*
|
||||||
* NOTE: this implementation is intentionally duplicated in
|
* Used only inside editor-ext now (resolveCollisions for a re-id'd duplicate
|
||||||
* packages/mcp/src/lib/collaboration.ts (deriveFootnoteId)
|
* DEFINITION, and footnotePastePlugin). The MCP/marked import paths no longer
|
||||||
* and MUST stay in sync with it so markdown imported through either path yields
|
* derive ids — duplicate definitions there are first-wins-dropped (#166) — so
|
||||||
* identical ids.
|
* there is no cross-package copy to keep in sync. The golden table in
|
||||||
|
* footnote-util.derive-id.test.ts pins the scheme.
|
||||||
*/
|
*/
|
||||||
export function deriveFootnoteId(
|
export function deriveFootnoteId(
|
||||||
originalId: string,
|
originalId: string,
|
||||||
|
|||||||
@@ -307,13 +307,12 @@ describe("footnote sync plugin (orphans)", () => {
|
|||||||
editor.destroy();
|
editor.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => {
|
it("repeated references REUSE one footnote; a duplicate definition is dropped (first-wins)", () => {
|
||||||
// Reproduces the verified data-loss bug: two footnoteDefinition nodes share
|
// Reuse semantics (#166): two references with id "d" are the SAME footnote
|
||||||
// id "d", and there are two references with id "d". The OLD code built the
|
// (one number, shared definition) — they are NEVER re-id'd. Two definitions
|
||||||
// definitions Map last-wins and emitted exactly one definition for the
|
// sharing id "d" are first-wins: the first keeps "d", the second is re-id'd
|
||||||
// de-duplicated reference, so the very first keystroke's sync transaction
|
// to a deterministic orphan id and then dropped by the orphan policy (it has
|
||||||
// deleted the whole list and rebuilt it from one definition — silently
|
// no matching reference). So the result is ONE reused footnote on "first".
|
||||||
// destroying "first" and keeping only "second".
|
|
||||||
const editor = makeEditor({
|
const editor = makeEditor({
|
||||||
type: "doc",
|
type: "doc",
|
||||||
content: [
|
content: [
|
||||||
@@ -351,8 +350,8 @@ describe("footnote sync plugin (orphans)", () => {
|
|||||||
editor.commands.insertContentAt(1, " ");
|
editor.commands.insertContentAt(1, " ");
|
||||||
|
|
||||||
const doc = editor.state.doc;
|
const doc = editor.state.doc;
|
||||||
// BOTH definitions survive.
|
// One shared definition survives (first-wins); the duplicate is dropped.
|
||||||
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
|
expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1);
|
||||||
const defTexts: string[] = [];
|
const defTexts: string[] = [];
|
||||||
const defIds: string[] = [];
|
const defIds: string[] = [];
|
||||||
doc.descendants((node) => {
|
doc.descendants((node) => {
|
||||||
@@ -361,27 +360,23 @@ describe("footnote sync plugin (orphans)", () => {
|
|||||||
defTexts.push(node.textContent);
|
defTexts.push(node.textContent);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
// No content was lost: both "first" and "second" are still present.
|
expect(defTexts).toEqual(["first"]);
|
||||||
expect(defTexts.sort()).toEqual(["first", "second"]);
|
expect(defIds).toEqual(["d"]);
|
||||||
// The colliding ids were made distinct.
|
// Both references keep id "d" (reuse — not re-id'd).
|
||||||
expect(new Set(defIds).size).toBe(2);
|
|
||||||
// Each definition's id matches exactly one reference (1:1 pairing).
|
|
||||||
const refIds: string[] = [];
|
const refIds: string[] = [];
|
||||||
doc.descendants((node) => {
|
doc.descendants((node) => {
|
||||||
if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id);
|
if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id);
|
||||||
});
|
});
|
||||||
expect(refIds.sort()).toEqual(defIds.sort());
|
expect(refIds).toEqual(["d", "d"]);
|
||||||
editor.destroy();
|
editor.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => {
|
it("reuse outcome is DETERMINISTIC across clients (Yjs convergence)", () => {
|
||||||
// Cross-client determinism guard. Two collaborating clients each see the
|
// Cross-client determinism guard. Two collaborating clients each see the
|
||||||
// SAME duplicate-id document and each make a local edit. The sync plugin
|
// SAME document and make a local edit; the sync plugin runs identically, so
|
||||||
// runs identically on every client, so it MUST mint the SAME new ids on both
|
// the resolved state MUST be identical (else they diverge over Yjs). Under
|
||||||
// — otherwise the two clients diverge permanently over Yjs (duplicated
|
// reuse the three "d" references collapse to one footnote and the duplicate
|
||||||
// footnotes). This is exactly the blocker the previous random-id
|
// definitions are dropped (first-wins) — deterministically on every client.
|
||||||
// (generateFootnoteId / Math.random) implementation caused: it would mint
|
|
||||||
// DIFFERENT ids on each client and this assertion would fail.
|
|
||||||
const duplicateDoc = {
|
const duplicateDoc = {
|
||||||
type: "doc",
|
type: "doc",
|
||||||
content: [
|
content: [
|
||||||
@@ -435,30 +430,28 @@ describe("footnote sync plugin (orphans)", () => {
|
|||||||
editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs
|
editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs
|
||||||
const refIds: string[] = [];
|
const refIds: string[] = [];
|
||||||
const defIds: string[] = [];
|
const defIds: string[] = [];
|
||||||
|
const defTexts: string[] = [];
|
||||||
editor.state.doc.descendants((node) => {
|
editor.state.doc.descendants((node) => {
|
||||||
if (node.type.name === FOOTNOTE_REFERENCE_NAME)
|
if (node.type.name === FOOTNOTE_REFERENCE_NAME)
|
||||||
refIds.push(node.attrs.id);
|
refIds.push(node.attrs.id);
|
||||||
if (node.type.name === FOOTNOTE_DEFINITION_NAME)
|
if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
|
||||||
defIds.push(node.attrs.id);
|
defIds.push(node.attrs.id);
|
||||||
|
defTexts.push(node.textContent);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
editor.destroy();
|
editor.destroy();
|
||||||
return { refIds, defIds };
|
return { refIds, defIds, defTexts };
|
||||||
};
|
};
|
||||||
|
|
||||||
const clientA = idsAfterLocalEdit();
|
const clientA = idsAfterLocalEdit();
|
||||||
const clientB = idsAfterLocalEdit();
|
const clientB = idsAfterLocalEdit();
|
||||||
|
|
||||||
// Both clients computed IDENTICAL ids (the property that makes Yjs converge).
|
// Both clients resolved to IDENTICAL state (the Yjs-convergence property).
|
||||||
expect(clientA.refIds).toEqual(clientB.refIds);
|
expect(clientA).toEqual(clientB);
|
||||||
expect(clientA.defIds).toEqual(clientB.defIds);
|
// Reuse: the three references stay "d"; one definition survives (first-wins).
|
||||||
|
expect(clientA.refIds).toEqual(["d", "d", "d"]);
|
||||||
// And the ids are deterministic-derived (not random uuid-style): the keeper
|
expect(clientA.defIds).toEqual(["d"]);
|
||||||
// keeps "d", the duplicates become "d__2", "d__3".
|
expect(clientA.defTexts).toEqual(["one"]);
|
||||||
expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"]));
|
|
||||||
// Every definition survived with a unique id, 1:1 with the references.
|
|
||||||
expect(clientA.defIds.length).toBe(3);
|
|
||||||
expect(new Set(clientA.defIds).size).toBe(3);
|
|
||||||
expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("removes an orphan definition with no matching reference", () => {
|
it("removes an orphan definition with no matching reference", () => {
|
||||||
|
|||||||
@@ -13,36 +13,33 @@ function bodyMarkers(body: string): string[] {
|
|||||||
return [...body.matchAll(/\[\^([^\]\s]+)\]/g)].map((m) => m[1]);
|
return [...body.matchAll(/\[\^([^\]\s]+)\]/g)].map((m) => m[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
describe("extractFootnoteDefinitions: more definitions than markers (orphans)", () => {
|
describe("extractFootnoteDefinitions: duplicate definition ids (first-wins)", () => {
|
||||||
// Body has ONE `[^d]` reference marker but THREE `[^d]:` definitions. The
|
// Body has ONE `[^d]` reference but THREE `[^d]:` definitions. Under the
|
||||||
// surplus definitions have no marker to pair with — they must NOT be silently
|
// import model (#166) a duplicate definition id is FIRST-WINS: only the first
|
||||||
// merged into one footnote (the editor's last-wins sync would otherwise drop
|
// definition is kept; the rest are DROPPED (and surfaced by analyzeFootnotes,
|
||||||
// two of them). The dedup gives each colliding definition a deterministic
|
// not silently re-id'd into orphan footnotes as before). Reference markers are
|
||||||
// derived id so all three survive as distinct footnoteDefinition nodes.
|
// never rewritten, so repeated references would reuse the single footnote.
|
||||||
const md = ["See[^d].", "", "[^d]: a", "[^d]: b", "[^d]: c"].join("\n");
|
const md = ["See[^d].", "", "[^d]: a", "[^d]: b", "[^d]: c"].join("\n");
|
||||||
|
|
||||||
it("emits 3 DISTINCT definition ids: d, d__2, d__3 (derived scheme, in order)", () => {
|
it("keeps only the FIRST definition for the id (first-wins)", () => {
|
||||||
const { section } = extractFootnoteDefinitions(md);
|
const { section } = extractFootnoteDefinitions(md);
|
||||||
const ids = defIds(section);
|
const ids = defIds(section);
|
||||||
expect(ids).toEqual(["d", "d__2", "d__3"]);
|
expect(ids).toEqual(["d"]);
|
||||||
// All distinct: nothing was merged away.
|
|
||||||
expect(new Set(ids).size).toBe(3);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("preserves each definition's text against its (possibly derived) id", () => {
|
it("keeps the first definition's text and drops the duplicates", () => {
|
||||||
const { section } = extractFootnoteDefinitions(md);
|
const { section } = extractFootnoteDefinitions(md);
|
||||||
// First definition keeps the original id and its text.
|
|
||||||
expect(section).toContain('data-footnote-def data-id="d"><p>a</p>');
|
expect(section).toContain('data-footnote-def data-id="d"><p>a</p>');
|
||||||
// The two surplus definitions survive as orphans with derived ids.
|
// No derived `d__2` / `d__3` ids are emitted anymore.
|
||||||
expect(section).toContain('data-footnote-def data-id="d__2"><p>b</p>');
|
expect(section).not.toContain("d__2");
|
||||||
expect(section).toContain('data-footnote-def data-id="d__3"><p>c</p>');
|
expect(section).not.toContain("d__3");
|
||||||
|
// The dropped duplicate texts are not in the section.
|
||||||
|
expect(section).not.toContain("<p>b</p>");
|
||||||
|
expect(section).not.toContain("<p>c</p>");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("leaves the SINGLE body marker as [^d] (no surplus marker to rewrite)", () => {
|
it("leaves the SINGLE body marker as [^d] (markers are never rewritten)", () => {
|
||||||
const { body } = extractFootnoteDefinitions(md);
|
const { body } = extractFootnoteDefinitions(md);
|
||||||
// There is exactly one reference marker and it is untouched: the keeper
|
|
||||||
// definition pairs with it. The orphan defs have no marker, so the body is
|
|
||||||
// unchanged except for the stripped definition lines.
|
|
||||||
expect(bodyMarkers(body)).toEqual(["d"]);
|
expect(bodyMarkers(body)).toEqual(["d"]);
|
||||||
expect(body).toContain("See[^d].");
|
expect(body).toContain("See[^d].");
|
||||||
// The definition lines themselves were pulled OUT of the body.
|
// The definition lines themselves were pulled OUT of the body.
|
||||||
@@ -55,9 +52,21 @@ describe("extractFootnoteDefinitions: more definitions than markers (orphans)",
|
|||||||
const { section } = extractFootnoteDefinitions(md);
|
const { section } = extractFootnoteDefinitions(md);
|
||||||
expect(section.startsWith("<section data-footnotes>")).toBe(true);
|
expect(section.startsWith("<section data-footnotes>")).toBe(true);
|
||||||
expect(section.endsWith("</section>")).toBe(true);
|
expect(section.endsWith("</section>")).toBe(true);
|
||||||
// Exactly three definition divs.
|
// Exactly one definition div (first-wins).
|
||||||
expect(
|
expect([...section.matchAll(/<div data-footnote-def/g)]).toHaveLength(1);
|
||||||
[...section.matchAll(/<div data-footnote-def/g)],
|
});
|
||||||
).toHaveLength(3);
|
});
|
||||||
|
|
||||||
|
describe("extractFootnoteDefinitions: reuse (repeated references, one definition)", () => {
|
||||||
|
// Pandoc semantics: many `[^a]` references + one `[^a]:` definition = one
|
||||||
|
// footnote, shared. Markers are left intact so the editor numbers them as one.
|
||||||
|
const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared note"].join("\n");
|
||||||
|
|
||||||
|
it("emits exactly one definition and leaves every reference marker as [^a]", () => {
|
||||||
|
const { section, body } = extractFootnoteDefinitions(md);
|
||||||
|
expect(defIds(section)).toEqual(["a"]);
|
||||||
|
expect(section).toContain('data-footnote-def data-id="a"><p>shared note</p>');
|
||||||
|
// All three reference markers stay `a` (no `a__2`/`a__3` minting).
|
||||||
|
expect(bodyMarkers(body)).toEqual(["a", "a", "a"]);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import { marked } from "marked";
|
import { marked } from "marked";
|
||||||
import { deriveFootnoteId } from "../../footnote/footnote-util";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline.
|
* Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline.
|
||||||
@@ -13,8 +12,12 @@ import { deriveFootnoteId } from "../../footnote/footnote-util";
|
|||||||
* single <section data-footnotes> with one <div data-footnote-def> per
|
* single <section data-footnotes> with one <div data-footnote-def> per
|
||||||
* definition, so the round-trip rebuilds footnotesList + footnoteDefinition.
|
* definition, so the round-trip rebuilds footnotesList + footnoteDefinition.
|
||||||
*
|
*
|
||||||
* Only definitions that have a matching reference are emitted (and vice-versa
|
* Every FIRST definition line is emitted — duplicate ids are first-wins (the
|
||||||
* the sync plugin fills any gaps on the editor side), keeping the output valid.
|
* rest are dropped, and surfaced via analyzeFootnotes), and reference markers are
|
||||||
|
* left untouched so repeated `[^a]` references reuse the one footnote (#166).
|
||||||
|
* Orphan definitions (no matching reference) are still emitted here; the editor's
|
||||||
|
* sync plugin reconciles the final reference/definition set (drops orphans,
|
||||||
|
* synthesizes a single empty definition for a reference that lacks one).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const DEFINITION_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
const DEFINITION_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||||
@@ -53,10 +56,6 @@ function escapeAttr(value: string): string {
|
|||||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeRegExp(value: string): string {
|
|
||||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract `[^id]: text` definition lines from the markdown body, returning the
|
* Extract `[^id]: text` definition lines from the markdown body, returning the
|
||||||
* cleaned body plus a rendered <section data-footnotes> (empty string when no
|
* cleaned body plus a rendered <section data-footnotes> (empty string when no
|
||||||
@@ -101,70 +100,32 @@ export function extractFootnoteDefinitions(markdown: string): {
|
|||||||
return { body: markdown, section: "" };
|
return { body: markdown, section: "" };
|
||||||
}
|
}
|
||||||
|
|
||||||
// De-duplicate colliding definition ids. Two definitions sharing an id (e.g.
|
// Duplicate definition ids (e.g. `[^d]: first` / `[^d]: second`): FIRST WINS,
|
||||||
// `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote
|
// the rest are DROPPED. Reference markers are left UNTOUCHED so repeated `[^a]`
|
||||||
// downstream (the editor's last-wins sync). Rename each colliding id to a
|
// references reuse the single footnote (Pandoc semantics, #166). This differs
|
||||||
// DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference
|
// from the live editor's never-lose policy (resolveCollisions re-ids a
|
||||||
// marker so the (reference, definition) pairing stays 1:1. The FIRST
|
// duplicate definition into an orphan) on purpose: an import is an
|
||||||
// definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth
|
// agent-authored artifact we sanitize, and the dropped duplicate is surfaced
|
||||||
// duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]`
|
// to the caller via analyzeFootnotes' `duplicateDefinitions` warning instead.
|
||||||
// marker. If there are fewer markers than definitions, the surplus definition
|
const firstById = new Map<string, string>(); // id -> first definition text
|
||||||
// keeps a derived (orphan) id so it is never silently merged away.
|
|
||||||
//
|
|
||||||
// The id is derived (deriveFootnoteId), NOT random: importing the same
|
|
||||||
// markdown through two paths (here and the MCP mirror) must yield identical
|
|
||||||
// ids, and re-importing the same markdown twice must be stable.
|
|
||||||
let dedupedBody = bodyLines.join("\n");
|
|
||||||
// Every original definition id is reserved up front so a derived id can never
|
|
||||||
// collide with an unrelated original id present in the document.
|
|
||||||
const taken = new Set<string>(definitions.map((d) => d.id));
|
|
||||||
const seenDefIds = new Map<string, number>(); // original id -> how many seen
|
|
||||||
for (const def of definitions) {
|
for (const def of definitions) {
|
||||||
const originalId = def.id;
|
if (!firstById.has(def.id)) firstById.set(def.id, def.text);
|
||||||
const count = seenDefIds.get(originalId) ?? 0;
|
|
||||||
seenDefIds.set(originalId, count + 1);
|
|
||||||
if (count === 0) continue; // first definition keeps its id
|
|
||||||
|
|
||||||
// count is the 0-based number of PRIOR occurrences; this is occurrence
|
|
||||||
// (count + 1), i.e. 2 for the first duplicate, 3 for the next, ...
|
|
||||||
const newId = deriveFootnoteId(originalId, count + 1, taken);
|
|
||||||
taken.add(newId);
|
|
||||||
def.id = newId;
|
|
||||||
|
|
||||||
// Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not
|
|
||||||
// belong to the keeper definition. After a prior duplicate rewrote its
|
|
||||||
// marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the
|
|
||||||
// remaining matches are: index 0 = the keeper's marker (left alone), index 1
|
|
||||||
// = this duplicate's marker. Rewrite index 1.
|
|
||||||
let occurrence = 0;
|
|
||||||
let rewritten = false;
|
|
||||||
const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g");
|
|
||||||
dedupedBody = dedupedBody.replace(re, (match) => {
|
|
||||||
const idx = occurrence++;
|
|
||||||
if (!rewritten && idx === 1) {
|
|
||||||
rewritten = true;
|
|
||||||
return `[^${newId}]`;
|
|
||||||
}
|
|
||||||
return match;
|
|
||||||
});
|
|
||||||
// If there was no second marker (more definitions than references), the
|
|
||||||
// duplicate simply survives as an orphan with its fresh id — no body change.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const defsHtml = definitions
|
const defsHtml = [...firstById.entries()]
|
||||||
.map((d) => {
|
.map(([id, text]) => {
|
||||||
// Render the definition text as inline markdown so emphasis/links inside
|
// Render the definition text as inline markdown so emphasis/links inside
|
||||||
// a footnote survive the round-trip; wrap in a paragraph (the node's
|
// a footnote survive the round-trip; wrap in a paragraph (the node's
|
||||||
// content is paragraph+).
|
// content is paragraph+).
|
||||||
const inner = marked.parseInline(d.text || "");
|
const inner = marked.parseInline(text || "");
|
||||||
return `<div data-footnote-def data-id="${escapeAttr(
|
return `<div data-footnote-def data-id="${escapeAttr(
|
||||||
d.id,
|
id,
|
||||||
)}"><p>${inner}</p></div>`;
|
)}"><p>${inner}</p></div>`;
|
||||||
})
|
})
|
||||||
.join("");
|
.join("");
|
||||||
|
|
||||||
return {
|
return {
|
||||||
body: dedupedBody,
|
body: bodyLines.join("\n"),
|
||||||
section: `<section data-footnotes>${defsHtml}</section>`,
|
section: `<section data-footnotes>${defsHtml}</section>`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import WebSocket from "ws";
|
|||||||
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
|
import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js";
|
||||||
import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js";
|
import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js";
|
||||||
import { docmostExtensions } from "./lib/docmost-schema.js";
|
import { docmostExtensions } from "./lib/docmost-schema.js";
|
||||||
|
import { footnoteWarningsField } from "./lib/footnote-analyze.js";
|
||||||
import { buildPageTree } from "./lib/tree.js";
|
import { buildPageTree } from "./lib/tree.js";
|
||||||
import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js";
|
import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js";
|
||||||
import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js";
|
import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js";
|
||||||
@@ -566,7 +567,9 @@ export class DocmostClient {
|
|||||||
// Always fetch subpages to provide context to the agent
|
// Always fetch subpages to provide context to the agent
|
||||||
let subpages = [];
|
let subpages = [];
|
||||||
try {
|
try {
|
||||||
subpages = await this.listSidebarPages(resultData.spaceId, pageId);
|
// `pageId` may be a slugId, but the sidebar-pages endpoint requires the
|
||||||
|
// UUID; `resultData.id` holds the resolved UUID returned by getPageRaw.
|
||||||
|
subpages = await this.listSidebarPages(resultData.spaceId, resultData.id);
|
||||||
}
|
}
|
||||||
catch (e) {
|
catch (e) {
|
||||||
console.warn("Failed to fetch subpages:", e);
|
console.warn("Failed to fetch subpages:", e);
|
||||||
@@ -814,7 +817,10 @@ export class DocmostClient {
|
|||||||
if (title) {
|
if (title) {
|
||||||
await this.client.post("/pages/update", { pageId: newPageId, title });
|
await this.client.post("/pages/update", { pageId: newPageId, title });
|
||||||
}
|
}
|
||||||
return this.getPage(newPageId);
|
const page = await this.getPage(newPageId);
|
||||||
|
// Surface non-fatal footnote problems (dangling refs, empty/duplicate
|
||||||
|
// definitions, markers in tables) so the agent can fix its markup (#166).
|
||||||
|
return { ...page, ...footnoteWarningsField(content) };
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Update a page's content from markdown and optionally its title.
|
* Update a page's content from markdown and optionally its title.
|
||||||
@@ -850,6 +856,8 @@ export class DocmostClient {
|
|||||||
message: "Page updated successfully.",
|
message: "Page updated successfully.",
|
||||||
pageId: pageId,
|
pageId: pageId,
|
||||||
verify: mutation.verify,
|
verify: mutation.verify,
|
||||||
|
// Non-fatal footnote diagnostics (#166); omitted when there are none.
|
||||||
|
...footnoteWarningsField(content),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@@ -1119,6 +1127,11 @@ export class DocmostClient {
|
|||||||
if (meta?.pageId && meta.pageId !== pageId) {
|
if (meta?.pageId && meta.pageId !== pageId) {
|
||||||
result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
|
result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
|
||||||
}
|
}
|
||||||
|
// Non-fatal footnote diagnostics (#166), analyzed on the BODY (the part after
|
||||||
|
// the docmost:meta / docmost:comments blocks) — so a `[^x]`-like token inside
|
||||||
|
// those JSON blocks never produces a false warning, while real markers in the
|
||||||
|
// body do. `body` comes from parseDocmostMarkdown(fullMarkdown) above.
|
||||||
|
Object.assign(result, footnoteWarningsField(body));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
@@ -2422,9 +2435,9 @@ export class DocmostClient {
|
|||||||
const raw = await this.getPageRaw(pageId);
|
const raw = await this.getPageRaw(pageId);
|
||||||
const current = raw.content || { type: "doc", content: [] };
|
const current = raw.content || { type: "doc", content: [] };
|
||||||
runTransform(current);
|
runTransform(current);
|
||||||
// Exercise the same Yjs encoder the apply path uses, so the preview
|
// Run an independent Yjs-encodability check (same sanitize + schema as the
|
||||||
// fails with the SAME descriptive error when the doc is not encodable
|
// apply path), so the preview fails with the same descriptive error when
|
||||||
// instead of returning a misleadingly-green diff.
|
// the doc is not encodable instead of returning a misleadingly-green diff.
|
||||||
assertYjsEncodable(newDoc);
|
assertYjsEncodable(newDoc);
|
||||||
return {
|
return {
|
||||||
pushed: false,
|
pushed: false,
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import { JSDOM } from "jsdom";
|
|||||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||||
import { withPageLock } from "./page-lock.js";
|
import { withPageLock } from "./page-lock.js";
|
||||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||||
|
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||||
import { summarizeChange } from "./diff.js";
|
import { summarizeChange } from "./diff.js";
|
||||||
/**
|
/**
|
||||||
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
* Build the descriptive error for an opaque Yjs encode failure ("Unexpected
|
||||||
@@ -280,49 +281,12 @@ function bridgeTaskLists(html) {
|
|||||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||||
// definition lines are collected into a single <section data-footnotes>.
|
// definition lines are collected into a single <section data-footnotes>.
|
||||||
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||||
|
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||||
function escapeFootnoteAttr(value) {
|
function escapeFootnoteAttr(value) {
|
||||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||||
}
|
}
|
||||||
function escapeFootnoteRegExp(value) {
|
|
||||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
|
|
||||||
* an original id `X` during definition dedup.
|
|
||||||
*
|
|
||||||
* EXACT MIRROR of editor-ext `deriveFootnoteId`
|
|
||||||
* (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
|
|
||||||
* STAY IN SYNC: the same markdown imported through the editor and through this
|
|
||||||
* MCP path has to produce identical ids, and the sync plugin (which re-ids on
|
|
||||||
* every collaborating client) relies on the same scheme to converge. NEVER use
|
|
||||||
* Math.random()/Date.now()/uuid here — a random id would diverge across clients.
|
|
||||||
*
|
|
||||||
* Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
|
|
||||||
* with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
|
|
||||||
* `taken` (the set of ids already present / already minted — pure doc state).
|
|
||||||
*/
|
|
||||||
function deriveFootnoteId(originalId, occurrence, taken) {
|
|
||||||
let candidate = `${originalId}__${occurrence}`;
|
|
||||||
let n = 0;
|
|
||||||
while (taken.has(candidate)) {
|
|
||||||
n += 1;
|
|
||||||
candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
|
|
||||||
}
|
|
||||||
return candidate;
|
|
||||||
}
|
|
||||||
/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
|
|
||||||
function footnoteSuffix(n) {
|
|
||||||
let out = "";
|
|
||||||
let x = n;
|
|
||||||
while (x > 0) {
|
|
||||||
const rem = (x - 1) % 25;
|
|
||||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
|
||||||
x = Math.floor((x - 1) / 25);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
const footnoteRefMarkedExtension = {
|
const footnoteRefMarkedExtension = {
|
||||||
name: "footnoteRef",
|
name: "footnoteRef",
|
||||||
level: "inline",
|
level: "inline",
|
||||||
@@ -346,68 +310,36 @@ marked.use({ extensions: [footnoteRefMarkedExtension] });
|
|||||||
* <section data-footnotes> for them (or "" when there are none).
|
* <section data-footnotes> for them (or "" when there are none).
|
||||||
*/
|
*/
|
||||||
function extractFootnotes(markdown) {
|
function extractFootnotes(markdown) {
|
||||||
const lines = markdown.split("\n");
|
|
||||||
const bodyLines = [];
|
const bodyLines = [];
|
||||||
const defs = [];
|
const defs = [];
|
||||||
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
|
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||||
// block is preserved verbatim and not treated as a footnote definition.
|
// block is inert and stays in the body verbatim; only real definition lines
|
||||||
let fence = null;
|
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||||
for (const line of lines) {
|
// match exactly what import keeps/strips (#166).
|
||||||
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
|
for (const tok of lexFootnoteLines(markdown)) {
|
||||||
if (fenceMatch) {
|
if (!tok.inFence && tok.definition)
|
||||||
const marker = fenceMatch[2][0];
|
defs.push(tok.definition);
|
||||||
if (fence === null)
|
|
||||||
fence = marker;
|
|
||||||
else if (marker === fence)
|
|
||||||
fence = null;
|
|
||||||
bodyLines.push(line);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
|
|
||||||
if (m)
|
|
||||||
defs.push({ id: m[1], text: m[2] });
|
|
||||||
else
|
else
|
||||||
bodyLines.push(line);
|
bodyLines.push(tok.line);
|
||||||
}
|
}
|
||||||
if (defs.length === 0)
|
if (defs.length === 0)
|
||||||
return { body: markdown, section: "" };
|
return { body: markdown, section: "" };
|
||||||
// De-duplicate colliding definition ids (mirror of editor-ext
|
// Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
|
||||||
// extractFootnoteDefinitions). Two definitions sharing an id would otherwise
|
// editor-ext extractFootnoteDefinitions). Reference markers are left untouched
|
||||||
// collapse into one footnote downstream; rename each colliding id to a
|
// so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
|
||||||
// DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
|
// #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
|
||||||
// marker so the (reference, definition) pairing stays 1:1. Determinism lets
|
// (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
|
||||||
// the same markdown imported here and via the editor produce identical ids.
|
// editor-ext mirror.
|
||||||
let dedupedBody = bodyLines.join("\n");
|
const firstById = new Map(); // id -> first definition text
|
||||||
const taken = new Set(defs.map((d) => d.id));
|
|
||||||
const seenDefIds = new Map();
|
|
||||||
for (const def of defs) {
|
for (const def of defs) {
|
||||||
const originalId = def.id;
|
if (!firstById.has(def.id))
|
||||||
const count = seenDefIds.get(originalId) ?? 0;
|
firstById.set(def.id, def.text);
|
||||||
seenDefIds.set(originalId, count + 1);
|
|
||||||
if (count === 0)
|
|
||||||
continue; // first definition keeps its id
|
|
||||||
const newId = deriveFootnoteId(originalId, count + 1, taken);
|
|
||||||
taken.add(newId);
|
|
||||||
def.id = newId;
|
|
||||||
// Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
|
|
||||||
// index 1 = this duplicate's marker. Rewrite index 1.
|
|
||||||
let occurrence = 0;
|
|
||||||
let rewritten = false;
|
|
||||||
const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
|
|
||||||
dedupedBody = dedupedBody.replace(re, (match) => {
|
|
||||||
const idx = occurrence++;
|
|
||||||
if (!rewritten && idx === 1) {
|
|
||||||
rewritten = true;
|
|
||||||
return `[^${newId}]`;
|
|
||||||
}
|
|
||||||
return match;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
const inner = defs
|
const inner = [...firstById.entries()]
|
||||||
.map((d) => `<div data-footnote-def data-id="${escapeFootnoteAttr(d.id)}"><p>${marked.parseInline(d.text || "")}</p></div>`)
|
.map(([id, text]) => `<div data-footnote-def data-id="${escapeFootnoteAttr(id)}"><p>${marked.parseInline(text || "")}</p></div>`)
|
||||||
.join("");
|
.join("");
|
||||||
return {
|
return {
|
||||||
body: dedupedBody,
|
body: bodyLines.join("\n"),
|
||||||
section: `<section data-footnotes>${inner}</section>`,
|
section: `<section data-footnotes>${inner}</section>`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
101
packages/mcp/build/lib/footnote-analyze.js
Normal file
101
packages/mcp/build/lib/footnote-analyze.js
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
/**
|
||||||
|
* Footnote diagnostics for imported Markdown (issue #166).
|
||||||
|
*
|
||||||
|
* A PURE, fence-aware text scan (independent of the Markdown->ProseMirror
|
||||||
|
* conversion path, so it reports the same problems for `create_page`,
|
||||||
|
* `update_page` and `import_page_markdown`). It never changes the document — the
|
||||||
|
* importer still creates the page; this only surfaces footnote problems to the
|
||||||
|
* caller so an agent can fix its own markup instead of shipping broken footnotes.
|
||||||
|
*
|
||||||
|
* Detected problems:
|
||||||
|
* - danglingReferences: a `[^id]` reference with no `[^id]:` definition.
|
||||||
|
* - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace.
|
||||||
|
* - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the
|
||||||
|
* first is kept on import — first-wins; see extractFootnotes).
|
||||||
|
* - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic:
|
||||||
|
* the line, trimmed, starts with `|`) — footnotes in table cells often do not
|
||||||
|
* render as expected.
|
||||||
|
*/
|
||||||
|
import { lexFootnoteLines, forEachFootnoteReference, } from "./footnote-lex.js";
|
||||||
|
/**
|
||||||
|
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
|
||||||
|
*/
|
||||||
|
export function analyzeFootnotes(markdown) {
|
||||||
|
// Distinct reference ids in first-appearance order, plus the set of ids seen
|
||||||
|
// inside a table row.
|
||||||
|
const refIds = [];
|
||||||
|
const refIdSet = new Set();
|
||||||
|
const referencesInTables = new Set();
|
||||||
|
const addRef = (id, inTable) => {
|
||||||
|
if (!refIdSet.has(id)) {
|
||||||
|
refIdSet.add(id);
|
||||||
|
refIds.push(id);
|
||||||
|
}
|
||||||
|
if (inTable)
|
||||||
|
referencesInTables.add(id);
|
||||||
|
};
|
||||||
|
// Definition texts per id, in first-appearance order of the id.
|
||||||
|
const defTextsById = new Map();
|
||||||
|
// Same lexer the importer uses, so the analysis matches exactly what import
|
||||||
|
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
|
||||||
|
for (const tok of lexFootnoteLines(markdown)) {
|
||||||
|
if (tok.inFence)
|
||||||
|
continue;
|
||||||
|
if (tok.definition) {
|
||||||
|
const { id, text } = tok.definition;
|
||||||
|
const arr = defTextsById.get(id);
|
||||||
|
if (arr)
|
||||||
|
arr.push(text);
|
||||||
|
else
|
||||||
|
defTextsById.set(id, [text]);
|
||||||
|
// A definition's TEXT can itself reference another footnote (`[^a]: see
|
||||||
|
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
|
||||||
|
forEachFootnoteReference(text, (rid) => addRef(rid, false));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const inTable = tok.line.trimStart().startsWith("|");
|
||||||
|
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
|
||||||
|
}
|
||||||
|
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
|
||||||
|
const duplicateDefinitions = [];
|
||||||
|
const emptyDefinitions = [];
|
||||||
|
for (const [id, texts] of defTextsById) {
|
||||||
|
if (texts.length >= 2)
|
||||||
|
duplicateDefinitions.push(id);
|
||||||
|
// First-wins: the kept definition is the first one; flag it if it is blank.
|
||||||
|
if ((texts[0] ?? "").trim().length === 0)
|
||||||
|
emptyDefinitions.push(id);
|
||||||
|
}
|
||||||
|
const tableRefs = [...referencesInTables];
|
||||||
|
const warnings = [];
|
||||||
|
const list = (ids) => ids.map((id) => `[^${id}]`).join(", ");
|
||||||
|
if (danglingReferences.length > 0) {
|
||||||
|
warnings.push(`Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`);
|
||||||
|
}
|
||||||
|
if (emptyDefinitions.length > 0) {
|
||||||
|
warnings.push(`Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`);
|
||||||
|
}
|
||||||
|
if (duplicateDefinitions.length > 0) {
|
||||||
|
warnings.push(`Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`);
|
||||||
|
}
|
||||||
|
if (tableRefs.length > 0) {
|
||||||
|
warnings.push(`Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`);
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
danglingReferences,
|
||||||
|
emptyDefinitions,
|
||||||
|
duplicateDefinitions,
|
||||||
|
referencesInTables: tableRefs,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* The optional `footnoteWarnings` field for a page-write tool result: present
|
||||||
|
* (with the warning lines) only when `markdown` has footnote problems, omitted
|
||||||
|
* otherwise. One helper so all three call sites (create/update/import) attach the
|
||||||
|
* field identically. Spread into the result: `{ ...result, ...footnoteWarningsField(text) }`.
|
||||||
|
*/
|
||||||
|
export function footnoteWarningsField(markdown) {
|
||||||
|
const { warnings } = analyzeFootnotes(markdown);
|
||||||
|
return warnings.length > 0 ? { footnoteWarnings: warnings } : {};
|
||||||
|
}
|
||||||
55
packages/mcp/build/lib/footnote-lex.js
Normal file
55
packages/mcp/build/lib/footnote-lex.js
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
/**
|
||||||
|
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
|
||||||
|
*
|
||||||
|
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
|
||||||
|
* definition lines and rebuilds a footnotes section) and the diagnostics
|
||||||
|
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
|
||||||
|
* are definitions and which lines are inert (inside a code fence). Sharing one
|
||||||
|
* lexer makes "the analyzer sees what the importer leaves" a structural property
|
||||||
|
* instead of two hand-kept copies that can drift (#166 review).
|
||||||
|
*
|
||||||
|
* NOTE: this is deliberately NOT shared with editor-ext's
|
||||||
|
* `extractFootnoteDefinitions` — that lives in a different package and the
|
||||||
|
* decoupling between the editor and the MCP mirror is intentional.
|
||||||
|
*/
|
||||||
|
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||||
|
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||||
|
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||||
|
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||||
|
/** Opening/closing code fence marker (``` or ~~~). */
|
||||||
|
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||||
|
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
|
||||||
|
export function lexFootnoteLines(markdown) {
|
||||||
|
const out = [];
|
||||||
|
let fence = null;
|
||||||
|
for (const line of markdown.split("\n")) {
|
||||||
|
const fenceMatch = FENCE_RE.exec(line);
|
||||||
|
if (fenceMatch) {
|
||||||
|
const marker = fenceMatch[2][0];
|
||||||
|
if (fence === null)
|
||||||
|
fence = marker; // opening fence
|
||||||
|
else if (marker === fence)
|
||||||
|
fence = null; // matching closing fence
|
||||||
|
out.push({ line, inFence: true, definition: null });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (fence !== null) {
|
||||||
|
out.push({ line, inFence: true, definition: null });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const m = FOOTNOTE_DEF_RE.exec(line);
|
||||||
|
out.push({
|
||||||
|
line,
|
||||||
|
inFence: false,
|
||||||
|
definition: m ? { id: m[1], text: m[2] } : null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||||
|
export function forEachFootnoteReference(line, onRef) {
|
||||||
|
FOOTNOTE_REF_RE_G.lastIndex = 0;
|
||||||
|
let m;
|
||||||
|
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null)
|
||||||
|
onRef(m[1]);
|
||||||
|
}
|
||||||
@@ -23,6 +23,7 @@ import {
|
|||||||
MutationResult,
|
MutationResult,
|
||||||
} from "./lib/collaboration.js";
|
} from "./lib/collaboration.js";
|
||||||
import { docmostExtensions } from "./lib/docmost-schema.js";
|
import { docmostExtensions } from "./lib/docmost-schema.js";
|
||||||
|
import { footnoteWarningsField } from "./lib/footnote-analyze.js";
|
||||||
import { buildPageTree } from "./lib/tree.js";
|
import { buildPageTree } from "./lib/tree.js";
|
||||||
import {
|
import {
|
||||||
serializeDocmostMarkdown,
|
serializeDocmostMarkdown,
|
||||||
@@ -1054,7 +1055,10 @@ export class DocmostClient {
|
|||||||
await this.client.post("/pages/update", { pageId: newPageId, title });
|
await this.client.post("/pages/update", { pageId: newPageId, title });
|
||||||
}
|
}
|
||||||
|
|
||||||
return this.getPage(newPageId);
|
const page = await this.getPage(newPageId);
|
||||||
|
// Surface non-fatal footnote problems (dangling refs, empty/duplicate
|
||||||
|
// definitions, markers in tables) so the agent can fix its markup (#166).
|
||||||
|
return { ...page, ...footnoteWarningsField(content) };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1101,6 +1105,8 @@ export class DocmostClient {
|
|||||||
message: "Page updated successfully.",
|
message: "Page updated successfully.",
|
||||||
pageId: pageId,
|
pageId: pageId,
|
||||||
verify: mutation.verify,
|
verify: mutation.verify,
|
||||||
|
// Non-fatal footnote diagnostics (#166); omitted when there are none.
|
||||||
|
...footnoteWarningsField(content),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1416,6 +1422,11 @@ export class DocmostClient {
|
|||||||
if (meta?.pageId && meta.pageId !== pageId) {
|
if (meta?.pageId && meta.pageId !== pageId) {
|
||||||
result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
|
result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`;
|
||||||
}
|
}
|
||||||
|
// Non-fatal footnote diagnostics (#166), analyzed on the BODY (the part after
|
||||||
|
// the docmost:meta / docmost:comments blocks) — so a `[^x]`-like token inside
|
||||||
|
// those JSON blocks never produces a false warning, while real markers in the
|
||||||
|
// body do. `body` comes from parseDocmostMarkdown(fullMarkdown) above.
|
||||||
|
Object.assign(result, footnoteWarningsField(body));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import { JSDOM } from "jsdom";
|
|||||||
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
import { docmostExtensions, docmostSchema } from "./docmost-schema.js";
|
||||||
import { withPageLock } from "./page-lock.js";
|
import { withPageLock } from "./page-lock.js";
|
||||||
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js";
|
||||||
|
import { lexFootnoteLines } from "./footnote-lex.js";
|
||||||
import { summarizeChange, VerifyReport } from "./diff.js";
|
import { summarizeChange, VerifyReport } from "./diff.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -316,58 +317,14 @@ function bridgeTaskLists(html: string): string {
|
|||||||
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
|
||||||
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
// marker becomes <sup data-footnote-ref data-id="id">, and `[^id]: text`
|
||||||
// definition lines are collected into a single <section data-footnotes>.
|
// definition lines are collected into a single <section data-footnotes>.
|
||||||
const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
// Definition detection + fence handling are shared with analyzeFootnotes via
|
||||||
|
// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's.
|
||||||
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
|
||||||
|
|
||||||
function escapeFootnoteAttr(value: string): string {
|
function escapeFootnoteAttr(value: string): string {
|
||||||
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
return String(value).replace(/&/g, "&").replace(/"/g, """);
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeFootnoteRegExp(value: string): string {
|
|
||||||
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
|
|
||||||
* an original id `X` during definition dedup.
|
|
||||||
*
|
|
||||||
* EXACT MIRROR of editor-ext `deriveFootnoteId`
|
|
||||||
* (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
|
|
||||||
* STAY IN SYNC: the same markdown imported through the editor and through this
|
|
||||||
* MCP path has to produce identical ids, and the sync plugin (which re-ids on
|
|
||||||
* every collaborating client) relies on the same scheme to converge. NEVER use
|
|
||||||
* Math.random()/Date.now()/uuid here — a random id would diverge across clients.
|
|
||||||
*
|
|
||||||
* Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
|
|
||||||
* with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
|
|
||||||
* `taken` (the set of ids already present / already minted — pure doc state).
|
|
||||||
*/
|
|
||||||
function deriveFootnoteId(
|
|
||||||
originalId: string,
|
|
||||||
occurrence: number,
|
|
||||||
taken: Set<string>,
|
|
||||||
): string {
|
|
||||||
let candidate = `${originalId}__${occurrence}`;
|
|
||||||
let n = 0;
|
|
||||||
while (taken.has(candidate)) {
|
|
||||||
n += 1;
|
|
||||||
candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
|
|
||||||
}
|
|
||||||
return candidate;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
|
|
||||||
function footnoteSuffix(n: number): string {
|
|
||||||
let out = "";
|
|
||||||
let x = n;
|
|
||||||
while (x > 0) {
|
|
||||||
const rem = (x - 1) % 25;
|
|
||||||
out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
|
|
||||||
x = Math.floor((x - 1) / 25);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
const footnoteRefMarkedExtension = {
|
const footnoteRefMarkedExtension = {
|
||||||
name: "footnoteRef",
|
name: "footnoteRef",
|
||||||
level: "inline" as const,
|
level: "inline" as const,
|
||||||
@@ -398,69 +355,39 @@ function extractFootnotes(markdown: string): {
|
|||||||
body: string;
|
body: string;
|
||||||
section: string;
|
section: string;
|
||||||
} {
|
} {
|
||||||
const lines = markdown.split("\n");
|
|
||||||
const bodyLines: string[] = [];
|
const bodyLines: string[] = [];
|
||||||
const defs: Array<{ id: string; text: string }> = [];
|
const defs: Array<{ id: string; text: string }> = [];
|
||||||
// Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
|
// Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code
|
||||||
// block is preserved verbatim and not treated as a footnote definition.
|
// block is inert and stays in the body verbatim; only real definition lines
|
||||||
let fence: string | null = null;
|
// are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics
|
||||||
for (const line of lines) {
|
// match exactly what import keeps/strips (#166).
|
||||||
const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
|
for (const tok of lexFootnoteLines(markdown)) {
|
||||||
if (fenceMatch) {
|
if (!tok.inFence && tok.definition) defs.push(tok.definition);
|
||||||
const marker = fenceMatch[2][0];
|
else bodyLines.push(tok.line);
|
||||||
if (fence === null) fence = marker;
|
|
||||||
else if (marker === fence) fence = null;
|
|
||||||
bodyLines.push(line);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
|
|
||||||
if (m) defs.push({ id: m[1], text: m[2] });
|
|
||||||
else bodyLines.push(line);
|
|
||||||
}
|
}
|
||||||
if (defs.length === 0) return { body: markdown, section: "" };
|
if (defs.length === 0) return { body: markdown, section: "" };
|
||||||
|
|
||||||
// De-duplicate colliding definition ids (mirror of editor-ext
|
// Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of
|
||||||
// extractFootnoteDefinitions). Two definitions sharing an id would otherwise
|
// editor-ext extractFootnoteDefinitions). Reference markers are left untouched
|
||||||
// collapse into one footnote downstream; rename each colliding id to a
|
// so repeated `[^a]` references reuse the single footnote (Pandoc semantics,
|
||||||
// DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
|
// #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes
|
||||||
// marker so the (reference, definition) pairing stays 1:1. Determinism lets
|
// (`duplicateDefinitions`), not silently lost. MUST stay in sync with the
|
||||||
// the same markdown imported here and via the editor produce identical ids.
|
// editor-ext mirror.
|
||||||
let dedupedBody = bodyLines.join("\n");
|
const firstById = new Map<string, string>(); // id -> first definition text
|
||||||
const taken = new Set<string>(defs.map((d) => d.id));
|
|
||||||
const seenDefIds = new Map<string, number>();
|
|
||||||
for (const def of defs) {
|
for (const def of defs) {
|
||||||
const originalId = def.id;
|
if (!firstById.has(def.id)) firstById.set(def.id, def.text);
|
||||||
const count = seenDefIds.get(originalId) ?? 0;
|
|
||||||
seenDefIds.set(originalId, count + 1);
|
|
||||||
if (count === 0) continue; // first definition keeps its id
|
|
||||||
const newId = deriveFootnoteId(originalId, count + 1, taken);
|
|
||||||
taken.add(newId);
|
|
||||||
def.id = newId;
|
|
||||||
// Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
|
|
||||||
// index 1 = this duplicate's marker. Rewrite index 1.
|
|
||||||
let occurrence = 0;
|
|
||||||
let rewritten = false;
|
|
||||||
const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
|
|
||||||
dedupedBody = dedupedBody.replace(re, (match) => {
|
|
||||||
const idx = occurrence++;
|
|
||||||
if (!rewritten && idx === 1) {
|
|
||||||
rewritten = true;
|
|
||||||
return `[^${newId}]`;
|
|
||||||
}
|
|
||||||
return match;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const inner = defs
|
const inner = [...firstById.entries()]
|
||||||
.map(
|
.map(
|
||||||
(d) =>
|
([id, text]) =>
|
||||||
`<div data-footnote-def data-id="${escapeFootnoteAttr(
|
`<div data-footnote-def data-id="${escapeFootnoteAttr(
|
||||||
d.id,
|
id,
|
||||||
)}"><p>${marked.parseInline(d.text || "")}</p></div>`,
|
)}"><p>${marked.parseInline(text || "")}</p></div>`,
|
||||||
)
|
)
|
||||||
.join("");
|
.join("");
|
||||||
return {
|
return {
|
||||||
body: dedupedBody,
|
body: bodyLines.join("\n"),
|
||||||
section: `<section data-footnotes>${inner}</section>`,
|
section: `<section data-footnotes>${inner}</section>`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
129
packages/mcp/src/lib/footnote-analyze.ts
Normal file
129
packages/mcp/src/lib/footnote-analyze.ts
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
/**
|
||||||
|
* Footnote diagnostics for imported Markdown (issue #166).
|
||||||
|
*
|
||||||
|
* A PURE, fence-aware text scan (independent of the Markdown->ProseMirror
|
||||||
|
* conversion path, so it reports the same problems for `create_page`,
|
||||||
|
* `update_page` and `import_page_markdown`). It never changes the document — the
|
||||||
|
* importer still creates the page; this only surfaces footnote problems to the
|
||||||
|
* caller so an agent can fix its own markup instead of shipping broken footnotes.
|
||||||
|
*
|
||||||
|
* Detected problems:
|
||||||
|
* - danglingReferences: a `[^id]` reference with no `[^id]:` definition.
|
||||||
|
* - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace.
|
||||||
|
* - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the
|
||||||
|
* first is kept on import — first-wins; see extractFootnotes).
|
||||||
|
* - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic:
|
||||||
|
* the line, trimmed, starts with `|`) — footnotes in table cells often do not
|
||||||
|
* render as expected.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
lexFootnoteLines,
|
||||||
|
forEachFootnoteReference,
|
||||||
|
} from "./footnote-lex.js";
|
||||||
|
|
||||||
|
export interface FootnoteDiagnostics {
|
||||||
|
/** Reference ids (distinct, document order) with no matching definition. */
|
||||||
|
danglingReferences: string[];
|
||||||
|
/** Definition ids whose first (kept) text is empty/whitespace. */
|
||||||
|
emptyDefinitions: string[];
|
||||||
|
/** Ids defined by two or more `[^id]:` lines (only the first is kept). */
|
||||||
|
duplicateDefinitions: string[];
|
||||||
|
/** Reference ids found inside a GFM table row (heuristic). */
|
||||||
|
referencesInTables: string[];
|
||||||
|
/** Human-readable warning lines for the tool result (one per problem class). */
|
||||||
|
warnings: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyze the footnotes in a Markdown string. Pure; safe to call on any body.
|
||||||
|
*/
|
||||||
|
export function analyzeFootnotes(markdown: string): FootnoteDiagnostics {
|
||||||
|
// Distinct reference ids in first-appearance order, plus the set of ids seen
|
||||||
|
// inside a table row.
|
||||||
|
const refIds: string[] = [];
|
||||||
|
const refIdSet = new Set<string>();
|
||||||
|
const referencesInTables = new Set<string>();
|
||||||
|
const addRef = (id: string, inTable: boolean) => {
|
||||||
|
if (!refIdSet.has(id)) {
|
||||||
|
refIdSet.add(id);
|
||||||
|
refIds.push(id);
|
||||||
|
}
|
||||||
|
if (inTable) referencesInTables.add(id);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Definition texts per id, in first-appearance order of the id.
|
||||||
|
const defTextsById = new Map<string, string[]>();
|
||||||
|
|
||||||
|
// Same lexer the importer uses, so the analysis matches exactly what import
|
||||||
|
// keeps/strips (#166): fenced lines are inert, definition lines are pulled.
|
||||||
|
for (const tok of lexFootnoteLines(markdown)) {
|
||||||
|
if (tok.inFence) continue;
|
||||||
|
if (tok.definition) {
|
||||||
|
const { id, text } = tok.definition;
|
||||||
|
const arr = defTextsById.get(id);
|
||||||
|
if (arr) arr.push(text);
|
||||||
|
else defTextsById.set(id, [text]);
|
||||||
|
// A definition's TEXT can itself reference another footnote (`[^a]: see
|
||||||
|
// [^b]`); count those so such a `[^b]` is not falsely reported dangling.
|
||||||
|
forEachFootnoteReference(text, (rid) => addRef(rid, false));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const inTable = tok.line.trimStart().startsWith("|");
|
||||||
|
forEachFootnoteReference(tok.line, (id) => addRef(id, inTable));
|
||||||
|
}
|
||||||
|
|
||||||
|
const danglingReferences = refIds.filter((id) => !defTextsById.has(id));
|
||||||
|
const duplicateDefinitions: string[] = [];
|
||||||
|
const emptyDefinitions: string[] = [];
|
||||||
|
for (const [id, texts] of defTextsById) {
|
||||||
|
if (texts.length >= 2) duplicateDefinitions.push(id);
|
||||||
|
// First-wins: the kept definition is the first one; flag it if it is blank.
|
||||||
|
if ((texts[0] ?? "").trim().length === 0) emptyDefinitions.push(id);
|
||||||
|
}
|
||||||
|
const tableRefs = [...referencesInTables];
|
||||||
|
|
||||||
|
const warnings: string[] = [];
|
||||||
|
const list = (ids: string[]) => ids.map((id) => `[^${id}]`).join(", ");
|
||||||
|
if (danglingReferences.length > 0) {
|
||||||
|
warnings.push(
|
||||||
|
`Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (emptyDefinitions.length > 0) {
|
||||||
|
warnings.push(
|
||||||
|
`Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (duplicateDefinitions.length > 0) {
|
||||||
|
warnings.push(
|
||||||
|
`Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (tableRefs.length > 0) {
|
||||||
|
warnings.push(
|
||||||
|
`Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
danglingReferences,
|
||||||
|
emptyDefinitions,
|
||||||
|
duplicateDefinitions,
|
||||||
|
referencesInTables: tableRefs,
|
||||||
|
warnings,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The optional `footnoteWarnings` field for a page-write tool result: present
|
||||||
|
* (with the warning lines) only when `markdown` has footnote problems, omitted
|
||||||
|
* otherwise. One helper so all three call sites (create/update/import) attach the
|
||||||
|
* field identically. Spread into the result: `{ ...result, ...footnoteWarningsField(text) }`.
|
||||||
|
*/
|
||||||
|
export function footnoteWarningsField(markdown: string): {
|
||||||
|
footnoteWarnings?: string[];
|
||||||
|
} {
|
||||||
|
const { warnings } = analyzeFootnotes(markdown);
|
||||||
|
return warnings.length > 0 ? { footnoteWarnings: warnings } : {};
|
||||||
|
}
|
||||||
71
packages/mcp/src/lib/footnote-lex.ts
Normal file
71
packages/mcp/src/lib/footnote-lex.ts
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
/**
|
||||||
|
* Shared, fence-aware line lexer for footnote markdown (MCP-internal).
|
||||||
|
*
|
||||||
|
* Both the importer (`extractFootnotes` in collaboration.ts, which strips
|
||||||
|
* definition lines and rebuilds a footnotes section) and the diagnostics
|
||||||
|
* (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines
|
||||||
|
* are definitions and which lines are inert (inside a code fence). Sharing one
|
||||||
|
* lexer makes "the analyzer sees what the importer leaves" a structural property
|
||||||
|
* instead of two hand-kept copies that can drift (#166 review).
|
||||||
|
*
|
||||||
|
* NOTE: this is deliberately NOT shared with editor-ext's
|
||||||
|
* `extractFootnoteDefinitions` — that lives in a different package and the
|
||||||
|
* decoupling between the editor and the MCP mirror is intentional.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */
|
||||||
|
export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
|
||||||
|
/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */
|
||||||
|
export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g;
|
||||||
|
/** Opening/closing code fence marker (``` or ~~~). */
|
||||||
|
const FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||||
|
|
||||||
|
export interface FootnoteLine {
|
||||||
|
/** The raw line, verbatim. */
|
||||||
|
line: string;
|
||||||
|
/**
|
||||||
|
* True for a code-fence marker line AND every line inside a fence — footnote
|
||||||
|
* syntax on such lines is inert (example text, not real markup). The importer
|
||||||
|
* keeps these in the body; the analyzer skips them.
|
||||||
|
*/
|
||||||
|
inFence: boolean;
|
||||||
|
/** The parsed definition, when this is a `[^id]: text` line OUTSIDE any fence. */
|
||||||
|
definition: { id: string; text: string } | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Classify every line of `markdown`, tracking fenced-code state. Pure. */
|
||||||
|
export function lexFootnoteLines(markdown: string): FootnoteLine[] {
|
||||||
|
const out: FootnoteLine[] = [];
|
||||||
|
let fence: string | null = null;
|
||||||
|
for (const line of markdown.split("\n")) {
|
||||||
|
const fenceMatch = FENCE_RE.exec(line);
|
||||||
|
if (fenceMatch) {
|
||||||
|
const marker = fenceMatch[2][0];
|
||||||
|
if (fence === null) fence = marker; // opening fence
|
||||||
|
else if (marker === fence) fence = null; // matching closing fence
|
||||||
|
out.push({ line, inFence: true, definition: null });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (fence !== null) {
|
||||||
|
out.push({ line, inFence: true, definition: null });
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const m = FOOTNOTE_DEF_RE.exec(line);
|
||||||
|
out.push({
|
||||||
|
line,
|
||||||
|
inFence: false,
|
||||||
|
definition: m ? { id: m[1], text: m[2] } : null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */
|
||||||
|
export function forEachFootnoteReference(
|
||||||
|
line: string,
|
||||||
|
onRef: (id: string) => void,
|
||||||
|
): void {
|
||||||
|
FOOTNOTE_REF_RE_G.lastIndex = 0;
|
||||||
|
let m: RegExpExecArray | null;
|
||||||
|
while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) onRef(m[1]);
|
||||||
|
}
|
||||||
110
packages/mcp/test/mock/footnote-warnings.test.mjs
Normal file
110
packages/mcp/test/mock/footnote-warnings.test.mjs
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
// Mock-HTTP test for the footnoteWarnings plumbing (#166). createPage is the
|
||||||
|
// representative path that is fully plain-HTTP (import + getPage) and so is
|
||||||
|
// mockable here; updatePage / importPageMarkdown attach footnoteWarnings with the
|
||||||
|
// IDENTICAL wiring (`analyzeFootnotes(...)` + spread-when-non-empty) but run their
|
||||||
|
// mutation over the Hocuspocus collab WebSocket, which this plain-HTTP harness
|
||||||
|
// does not stand up. The analyzer itself is unit-tested in footnote-analyze.test.
|
||||||
|
import { test, after } from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import http from "node:http";
|
||||||
|
import { DocmostClient } from "../../build/client.js";
|
||||||
|
|
||||||
|
function readBody(req) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
let raw = "";
|
||||||
|
req.on("data", (c) => (raw += c));
|
||||||
|
req.on("end", () => resolve(raw));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function sendJson(res, status, obj, extraHeaders = {}) {
|
||||||
|
res.writeHead(status, { "Content-Type": "application/json", ...extraHeaders });
|
||||||
|
res.end(JSON.stringify(obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
const openServers = [];
|
||||||
|
function spawn(handler) {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const server = http.createServer(handler);
|
||||||
|
openServers.push(server);
|
||||||
|
server.listen(0, "127.0.0.1", () => {
|
||||||
|
const { port } = server.address();
|
||||||
|
resolve(`http://127.0.0.1:${port}/api`);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
after(async () => {
|
||||||
|
await Promise.all(
|
||||||
|
openServers.map((s) => new Promise((r) => s.close(r))),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// A handler that imports a page, lets getPage read it back, and 404s everything
|
||||||
|
// else (listSidebarPages fails gracefully inside getPage).
|
||||||
|
function pageHandler() {
|
||||||
|
return async (req, res) => {
|
||||||
|
await readBody(req);
|
||||||
|
if (req.url === "/api/auth/login") {
|
||||||
|
sendJson(res, 200, { success: true }, {
|
||||||
|
"Set-Cookie": "authToken=t; Path=/; HttpOnly",
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.url === "/api/pages/import") {
|
||||||
|
sendJson(res, 200, { data: { id: "new-1" } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.url === "/api/pages/update") {
|
||||||
|
// The title-restore step after import.
|
||||||
|
sendJson(res, 200, { data: { id: "new-1" } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.url === "/api/pages/info") {
|
||||||
|
sendJson(res, 200, {
|
||||||
|
data: {
|
||||||
|
id: "new-1",
|
||||||
|
slugId: "slug-1",
|
||||||
|
title: "T",
|
||||||
|
spaceId: "sp-1",
|
||||||
|
content: { type: "doc", content: [] },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sendJson(res, 404, { message: "not found" });
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test("createPage attaches footnoteWarnings when the content has footnote problems", async () => {
|
||||||
|
const baseURL = await spawn(pageHandler());
|
||||||
|
const client = new DocmostClient(baseURL, "user@example.com", "pw");
|
||||||
|
// A dangling reference + a duplicate definition + a table marker.
|
||||||
|
const content = [
|
||||||
|
"Intro[^missing] and| cell[^t] |.",
|
||||||
|
"",
|
||||||
|
"[^d]: one",
|
||||||
|
"[^d]: two",
|
||||||
|
"[^t]: in table",
|
||||||
|
].join("\n");
|
||||||
|
const result = await client.createPage("T", content, "sp-1");
|
||||||
|
assert.ok(Array.isArray(result.footnoteWarnings), "footnoteWarnings present");
|
||||||
|
const joined = result.footnoteWarnings.join("\n");
|
||||||
|
assert.match(joined, /no matching definition/); // dangling [^missing]
|
||||||
|
assert.match(joined, /defined more than once/); // duplicate [^d]
|
||||||
|
// The page itself is still returned.
|
||||||
|
assert.equal(result.success, true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("createPage omits footnoteWarnings when the content is clean", async () => {
|
||||||
|
const baseURL = await spawn(pageHandler());
|
||||||
|
const client = new DocmostClient(baseURL, "user@example.com", "pw");
|
||||||
|
const content = ["A[^a] and reuse[^a].", "", "[^a]: fine"].join("\n");
|
||||||
|
const result = await client.createPage("T", content, "sp-1");
|
||||||
|
assert.equal(
|
||||||
|
"footnoteWarnings" in result,
|
||||||
|
false,
|
||||||
|
"no footnoteWarnings field on clean input",
|
||||||
|
);
|
||||||
|
assert.equal(result.success, true);
|
||||||
|
});
|
||||||
@@ -1,134 +0,0 @@
|
|||||||
import { test } from "node:test";
|
|
||||||
import assert from "node:assert/strict";
|
|
||||||
|
|
||||||
import { markdownToProseMirror } from "../../build/lib/collaboration.js";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CROSS-PACKAGE DRIFT GUARD for the footnote id derivation scheme.
|
|
||||||
*
|
|
||||||
* `deriveFootnoteId` is duplicated in two places that MUST behave identically:
|
|
||||||
* - packages/editor-ext/src/lib/footnote/footnote-util.ts (exported)
|
|
||||||
* - packages/mcp/src/lib/collaboration.ts (internal helper)
|
|
||||||
* so the same markdown imported through the editor and through the MCP path
|
|
||||||
* derives identical footnote ids.
|
|
||||||
*
|
|
||||||
* The mcp copy is NOT exported from the compiled build (it is an internal helper
|
|
||||||
* of collaboration.js), and production source must not be modified to export it.
|
|
||||||
* So this test exercises the REAL compiled `deriveFootnoteId` *indirectly*, the
|
|
||||||
* same way production does: through `markdownToProseMirror`, which runs
|
|
||||||
* extractFootnotes -> deriveFootnoteId during duplicate-id dedup. We craft the
|
|
||||||
* `taken` set via literal pre-existing definition ids and read back the derived
|
|
||||||
* footnoteDefinition ids.
|
|
||||||
*
|
|
||||||
* GOLDEN below mirrors DERIVE_GOLDEN in
|
|
||||||
* packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts
|
|
||||||
* (asserted there by a DIRECT call). Same (originalId, occurrence, taken) ->
|
|
||||||
* same expected id. If the two copies drift, one of the two suites goes red.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** The 25 single-letter suffixes the scheme uses (n=1..25): b, c, ..., z. */
|
|
||||||
function singleLetterSuffixes() {
|
|
||||||
return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Identical matrix + expected values to the editor-ext golden table.
|
|
||||||
const GOLDEN = [
|
|
||||||
{ originalId: "d", occurrence: 2, taken: [], expected: "d__2" },
|
|
||||||
{ originalId: "d", occurrence: 3, taken: [], expected: "d__3" },
|
|
||||||
{ originalId: "d", occurrence: 2, taken: ["d__2"], expected: "d__2b" },
|
|
||||||
{ originalId: "d", occurrence: 2, taken: ["d__2", "d__2b"], expected: "d__2c" },
|
|
||||||
{
|
|
||||||
originalId: "d",
|
|
||||||
occurrence: 2,
|
|
||||||
taken: ["d__2", "d__2b", "d__2c", "d__2d"],
|
|
||||||
expected: "d__2e",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
originalId: "d",
|
|
||||||
occurrence: 2,
|
|
||||||
taken: ["d__2", ...singleLetterSuffixes().map((s) => `d__2${s}`)],
|
|
||||||
expected: "d__2bb",
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
/** Recursively collect every node of `type`. */
|
|
||||||
function findAll(node, type, acc = []) {
|
|
||||||
if (!node || typeof node !== "object") return acc;
|
|
||||||
if (node.type === type) acc.push(node);
|
|
||||||
if (Array.isArray(node.content)) for (const c of node.content) findAll(c, type, acc);
|
|
||||||
return acc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build markdown that drives the real `deriveFootnoteId(originalId, occurrence,
|
|
||||||
* taken)`:
|
|
||||||
* - `occurrence` duplicate definitions of `[^originalId]` so the dedup walk
|
|
||||||
* reaches the requested occurrence (occurrence=2 -> 1 keeper + 1 duplicate;
|
|
||||||
* occurrence=3 -> keeper + 2 duplicates, of which the LAST is the one whose
|
|
||||||
* id we read);
|
|
||||||
* - one literal pre-existing definition for every id in `taken`, each with its
|
|
||||||
* own reference marker so it is a real (non-orphan) definition. Those ids are
|
|
||||||
* reserved up-front in the dedup `taken` set, exactly forcing the bump.
|
|
||||||
*
|
|
||||||
* Returns the derived id of the FINAL duplicate of `originalId`.
|
|
||||||
*/
|
|
||||||
async function deriveViaMarkdown(originalId, occurrence, takenIds) {
|
|
||||||
// References: one [^originalId] per definition (keeper + duplicates) so each
|
|
||||||
// duplicate has a marker to pair with, plus one marker per taken id.
|
|
||||||
const dupCount = occurrence; // keeper + (occurrence-1) duplicates = `occurrence` defs
|
|
||||||
const refMarkers = [];
|
|
||||||
for (let i = 0; i < dupCount; i++) refMarkers.push(`[^${originalId}]`);
|
|
||||||
for (const id of takenIds) refMarkers.push(`[^${id}]`);
|
|
||||||
const refLine = `Body ${refMarkers.join(" ")}.`;
|
|
||||||
|
|
||||||
// Definitions: `occurrence` copies of [^originalId]: ... then the taken ids.
|
|
||||||
const defLines = [];
|
|
||||||
for (let i = 0; i < dupCount; i++) {
|
|
||||||
defLines.push(`[^${originalId}]: copy ${i}`);
|
|
||||||
}
|
|
||||||
for (const id of takenIds) {
|
|
||||||
defLines.push(`[^${id}]: reserved ${id}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const md = [refLine, "", ...defLines].join("\n");
|
|
||||||
const json = await markdownToProseMirror(md);
|
|
||||||
const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
|
|
||||||
|
|
||||||
// The derived id we want is the one that is neither the keeper (originalId),
|
|
||||||
// nor any reserved taken id, nor a lower-occurrence derived id. For
|
|
||||||
// occurrence=2 that is the single bumped id; for occurrence=3 it is the
|
|
||||||
// highest `${originalId}__3...` id. Compute it generically: among the def ids
|
|
||||||
// that start with `${originalId}__${occurrence}`, the expected one is present.
|
|
||||||
return { defIds, json };
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const row of GOLDEN) {
|
|
||||||
test(`parity: derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) -> "${row.expected}"`, async () => {
|
|
||||||
const { defIds } = await deriveViaMarkdown(
|
|
||||||
row.originalId,
|
|
||||||
row.occurrence,
|
|
||||||
row.taken,
|
|
||||||
);
|
|
||||||
// The real compiled deriveFootnoteId must have minted exactly the golden id.
|
|
||||||
assert.ok(
|
|
||||||
defIds.includes(row.expected),
|
|
||||||
`expected derived id "${row.expected}" among def ids ${JSON.stringify(defIds)}`,
|
|
||||||
);
|
|
||||||
// And every id is distinct: nothing collapsed.
|
|
||||||
assert.equal(new Set(defIds).size, defIds.length, "all def ids distinct");
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
test("parity: the simple keeper+two-duplicate case mints d, d__2, d__3", async () => {
|
|
||||||
// The canonical no-collision path, asserted as a whole set for clarity.
|
|
||||||
const md = [
|
|
||||||
"See[^d] one[^d] two[^d].",
|
|
||||||
"",
|
|
||||||
"[^d]: first",
|
|
||||||
"[^d]: second",
|
|
||||||
"[^d]: third",
|
|
||||||
].join("\n");
|
|
||||||
const json = await markdownToProseMirror(md);
|
|
||||||
const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
|
|
||||||
assert.deepEqual([...defIds].sort(), ["d", "d__2", "d__3"]);
|
|
||||||
});
|
|
||||||
106
packages/mcp/test/unit/footnote-analyze.test.mjs
Normal file
106
packages/mcp/test/unit/footnote-analyze.test.mjs
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
import { test } from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
|
||||||
|
import { analyzeFootnotes } from "../../build/lib/footnote-analyze.js";
|
||||||
|
|
||||||
|
test("clean footnotes produce no diagnostics", () => {
|
||||||
|
const md = ["A[^a] and B[^b].", "", "[^a]: first", "[^b]: second"].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.danglingReferences, []);
|
||||||
|
assert.deepEqual(d.emptyDefinitions, []);
|
||||||
|
assert.deepEqual(d.duplicateDefinitions, []);
|
||||||
|
assert.deepEqual(d.referencesInTables, []);
|
||||||
|
assert.deepEqual(d.warnings, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("reuse (repeated references to one definition) is NOT a warning", () => {
|
||||||
|
const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared"].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.danglingReferences, []);
|
||||||
|
assert.deepEqual(d.warnings, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("dangling reference (no definition) is reported", () => {
|
||||||
|
const md = ["See[^missing] and[^a].", "", "[^a]: defined"].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.danglingReferences, ["missing"]);
|
||||||
|
assert.equal(d.warnings.length, 1);
|
||||||
|
assert.match(d.warnings[0], /no matching definition/);
|
||||||
|
assert.match(d.warnings[0], /\[\^missing\]/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("empty definition text is reported", () => {
|
||||||
|
const md = ["See[^a].", "", "[^a]: "].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.emptyDefinitions, ["a"]);
|
||||||
|
assert.match(d.warnings.join("\n"), /empty text/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("duplicate definition id is reported (first-wins)", () => {
|
||||||
|
const md = ["See[^d].", "", "[^d]: first", "[^d]: second"].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.duplicateDefinitions, ["d"]);
|
||||||
|
assert.match(d.warnings.join("\n"), /defined more than once/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("reference inside a GFM table row is reported (heuristic)", () => {
|
||||||
|
const md = [
|
||||||
|
"| Col |",
|
||||||
|
"| --- |",
|
||||||
|
"| cell[^t] |",
|
||||||
|
"",
|
||||||
|
"[^t]: table note",
|
||||||
|
].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.referencesInTables, ["t"]);
|
||||||
|
assert.match(d.warnings.join("\n"), /table/);
|
||||||
|
// It is defined, so it is NOT also dangling.
|
||||||
|
assert.deepEqual(d.danglingReferences, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("footnote syntax inside a code fence is ignored", () => {
|
||||||
|
const md = [
|
||||||
|
"Intro.",
|
||||||
|
"",
|
||||||
|
"```",
|
||||||
|
"Example[^demo]",
|
||||||
|
"[^demo]: not a real definition",
|
||||||
|
"```",
|
||||||
|
"",
|
||||||
|
"Outro[^a].",
|
||||||
|
"",
|
||||||
|
"[^a]: real",
|
||||||
|
].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
// `[^demo]` lives only in the fenced block, so it is neither a reference nor a
|
||||||
|
// dangling one, and `[^demo]:` is not counted as a definition.
|
||||||
|
assert.deepEqual(d.danglingReferences, []);
|
||||||
|
assert.deepEqual(d.duplicateDefinitions, []);
|
||||||
|
assert.deepEqual(d.warnings, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("a reference that only appears inside a definition's text is not dangling", () => {
|
||||||
|
// `[^b]` is referenced from within [^a]'s text and has its own definition.
|
||||||
|
const md = ["See[^a].", "", "[^a]: see also [^b]", "[^b]: the other"].join(
|
||||||
|
"\n",
|
||||||
|
);
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
assert.deepEqual(d.danglingReferences, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("multiple problem classes accumulate distinct warnings", () => {
|
||||||
|
const md = [
|
||||||
|
"Ref[^x] and[^dup].",
|
||||||
|
"",
|
||||||
|
"[^dup]: one",
|
||||||
|
"[^dup]: two",
|
||||||
|
"[^empty]:",
|
||||||
|
].join("\n");
|
||||||
|
const d = analyzeFootnotes(md);
|
||||||
|
// x has no definition; dup is defined twice; empty is empty AND has no ref.
|
||||||
|
assert.ok(d.danglingReferences.includes("x"));
|
||||||
|
assert.deepEqual(d.duplicateDefinitions, ["dup"]);
|
||||||
|
assert.deepEqual(d.emptyDefinitions, ["empty"]);
|
||||||
|
// One warning line per problem class present.
|
||||||
|
assert.ok(d.warnings.length >= 3);
|
||||||
|
});
|
||||||
63
packages/mcp/test/unit/footnote-warnings-import.test.mjs
Normal file
63
packages/mcp/test/unit/footnote-warnings-import.test.mjs
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import { test } from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
|
||||||
|
import {
|
||||||
|
analyzeFootnotes,
|
||||||
|
footnoteWarningsField,
|
||||||
|
} from "../../build/lib/footnote-analyze.js";
|
||||||
|
import {
|
||||||
|
serializeDocmostMarkdown,
|
||||||
|
parseDocmostMarkdown,
|
||||||
|
} from "../../build/lib/markdown-document.js";
|
||||||
|
|
||||||
|
// Pins the footnoteWarnings PLUMBING contract (#169 review): the field is
|
||||||
|
// present only on problems and omitted on clean input, AND `import_page_markdown`
|
||||||
|
// analyzes the BODY (after the docmost:meta / docmost:comments blocks) — so a
|
||||||
|
// footnote-like token inside those JSON blocks never warns, while a real marker
|
||||||
|
// in the body does. importPageMarkdown does exactly
|
||||||
|
// `footnoteWarningsField(parseDocmostMarkdown(full).body)` over a collab socket
|
||||||
|
// this harness does not stand up, so we test the same pure composition directly.
|
||||||
|
|
||||||
|
test("footnoteWarningsField is present on problems and omitted on clean input", () => {
|
||||||
|
const problem = footnoteWarningsField("See[^missing].\n\n[^a]: defined");
|
||||||
|
assert.ok(Array.isArray(problem.footnoteWarnings));
|
||||||
|
assert.match(problem.footnoteWarnings.join("\n"), /no matching definition/);
|
||||||
|
|
||||||
|
const clean = footnoteWarningsField("A[^a] and reuse[^a].\n\n[^a]: fine");
|
||||||
|
assert.deepEqual(clean, {}); // no key at all on clean input
|
||||||
|
});
|
||||||
|
|
||||||
|
test("import analyzes the BODY only — tokens inside meta/comments never warn", () => {
|
||||||
|
// meta + comments JSON carry `[^metaonly]` / `[^commentonly]`-looking text; the
|
||||||
|
// BODY has a genuinely dangling `[^bodyref]`.
|
||||||
|
const full = serializeDocmostMarkdown(
|
||||||
|
{ pageId: "p1", note: "front-matter mentions [^metaonly] in text" },
|
||||||
|
"Body with a dangling[^bodyref] marker.",
|
||||||
|
[{ id: "c1", content: "a comment that says [^commentonly]" }],
|
||||||
|
);
|
||||||
|
|
||||||
|
const { body } = parseDocmostMarkdown(full);
|
||||||
|
// Sanity: the meta/comments markers are NOT in the parsed body.
|
||||||
|
assert.ok(!body.includes("[^metaonly]"));
|
||||||
|
assert.ok(!body.includes("[^commentonly]"));
|
||||||
|
|
||||||
|
const field = footnoteWarningsField(body);
|
||||||
|
const joined = (field.footnoteWarnings ?? []).join("\n");
|
||||||
|
// ONLY the body's dangling reference is flagged.
|
||||||
|
assert.match(joined, /\[\^bodyref\]/);
|
||||||
|
assert.ok(!joined.includes("metaonly"));
|
||||||
|
assert.ok(!joined.includes("commentonly"));
|
||||||
|
|
||||||
|
// Cross-check against analyzeFootnotes directly (same composition the importer uses).
|
||||||
|
assert.deepEqual(analyzeFootnotes(body).danglingReferences, ["bodyref"]);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("import on a clean body yields no footnoteWarnings field", () => {
|
||||||
|
const full = serializeDocmostMarkdown(
|
||||||
|
{ pageId: "p1" },
|
||||||
|
"Clean body[^a] reusing[^a].\n\n[^a]: ok",
|
||||||
|
[],
|
||||||
|
);
|
||||||
|
const { body } = parseDocmostMarkdown(full);
|
||||||
|
assert.deepEqual(footnoteWarningsField(body), {});
|
||||||
|
});
|
||||||
@@ -90,11 +90,10 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
|
|||||||
assert.match(md2, /\[\^fn2\]: Second note\./);
|
assert.match(md2, /\[\^fn2\]: Second note\./);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => {
|
test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => {
|
||||||
// The MCP import must derive duplicate ids deterministically (NOT random) so
|
// Reuse semantics: many `[^d]` references + several `[^d]:` definitions import
|
||||||
// the same markdown imported here and via the editor produces identical ids,
|
// as ONE footnote — the references all keep id "d" (reuse), and only the FIRST
|
||||||
// and re-importing is stable. This is the test that would FAIL on the old
|
// definition is kept (first-wins). Deterministic and stable across re-imports.
|
||||||
// Math.random()/Date.now() implementation.
|
|
||||||
const md = [
|
const md = [
|
||||||
"See[^d] one[^d] two[^d].",
|
"See[^d] one[^d] two[^d].",
|
||||||
"",
|
"",
|
||||||
@@ -106,21 +105,26 @@ test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)",
|
|||||||
const idsOf = async () => {
|
const idsOf = async () => {
|
||||||
const json = await markdownToProseMirror(md);
|
const json = await markdownToProseMirror(md);
|
||||||
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
|
const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
|
||||||
const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
|
const defs = findAll(json, "footnoteDefinition");
|
||||||
return { refs, defs };
|
return {
|
||||||
|
refs,
|
||||||
|
defIds: defs.map((d) => d.attrs.id),
|
||||||
|
defText: defs
|
||||||
|
.map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1])
|
||||||
|
.join("|"),
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
const a = await idsOf();
|
const a = await idsOf();
|
||||||
const b = await idsOf();
|
const b = await idsOf();
|
||||||
|
|
||||||
// Identical across runs.
|
// Stable across runs.
|
||||||
assert.deepEqual(a.refs, b.refs);
|
assert.deepEqual(a, b);
|
||||||
assert.deepEqual(a.defs, b.defs);
|
// Reuse: all three reference markers stay "d".
|
||||||
// Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
|
assert.deepEqual(a.refs, ["d", "d", "d"]);
|
||||||
assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]);
|
// First-wins: a single definition "d" with the FIRST text.
|
||||||
// 1:1 reference <-> definition pairing, all distinct.
|
assert.deepEqual(a.defIds, ["d"]);
|
||||||
assert.equal(new Set(a.defs).size, 3);
|
assert.equal(a.defText, "first");
|
||||||
assert.deepEqual([...a.refs].sort(), [...a.defs].sort());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
|
test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
|
||||||
|
|||||||
Reference in New Issue
Block a user